mirror of https://github.com/grafana/loki
chore: Split indexing into distinct steps (#19987)
parent
2ce207fbdb
commit
044ded2e29
@ -0,0 +1,55 @@ |
||||
package index |
||||
|
||||
import ( |
||||
"context" |
||||
"fmt" |
||||
|
||||
"github.com/bits-and-blooms/bloom/v3" |
||||
"github.com/prometheus/prometheus/model/labels" |
||||
|
||||
"github.com/grafana/loki/v3/pkg/dataobj" |
||||
"github.com/grafana/loki/v3/pkg/dataobj/sections/logs" |
||||
) |
||||
|
||||
type columnValuesCalculation struct { |
||||
columnBloomBuilders map[string]*bloom.BloomFilter |
||||
columnIndexes map[string]int64 |
||||
} |
||||
|
||||
func (c *columnValuesCalculation) Prepare(_ context.Context, _ *dataobj.Section, stats logs.Stats) error { |
||||
c.columnBloomBuilders = make(map[string]*bloom.BloomFilter) |
||||
c.columnIndexes = make(map[string]int64) |
||||
|
||||
for _, column := range stats.Columns { |
||||
logsType, _ := logs.ParseColumnType(column.Type) |
||||
if logsType != logs.ColumnTypeMetadata { |
||||
continue |
||||
} |
||||
c.columnBloomBuilders[column.Name] = bloom.NewWithEstimates(uint(column.Cardinality), 1.0/128.0) |
||||
c.columnIndexes[column.Name] = column.ColumnIndex |
||||
} |
||||
return nil |
||||
} |
||||
|
||||
func (c *columnValuesCalculation) ProcessBatch(_ context.Context, _ *logsCalculationContext, batch []logs.Record) error { |
||||
for _, log := range batch { |
||||
log.Metadata.Range(func(md labels.Label) { |
||||
c.columnBloomBuilders[md.Name].Add([]byte(md.Value)) |
||||
}) |
||||
} |
||||
return nil |
||||
} |
||||
|
||||
func (c *columnValuesCalculation) Flush(_ context.Context, context *logsCalculationContext) error { |
||||
for columnName, bloom := range c.columnBloomBuilders { |
||||
bloomBytes, err := bloom.MarshalBinary() |
||||
if err != nil { |
||||
return fmt.Errorf("failed to marshal bloom filter: %w", err) |
||||
} |
||||
err = context.builder.AppendColumnIndex(context.tenantID, context.objectPath, context.sectionIdx, columnName, c.columnIndexes[columnName], bloomBytes) |
||||
if err != nil { |
||||
return fmt.Errorf("failed to append column index: %w", err) |
||||
} |
||||
} |
||||
return nil |
||||
} |
||||
@ -0,0 +1,29 @@ |
||||
package index |
||||
|
||||
import ( |
||||
"context" |
||||
"fmt" |
||||
|
||||
"github.com/grafana/loki/v3/pkg/dataobj" |
||||
"github.com/grafana/loki/v3/pkg/dataobj/sections/logs" |
||||
) |
||||
|
||||
type streamStatisticsCalculation struct{} |
||||
|
||||
func (c *streamStatisticsCalculation) Prepare(_ context.Context, _ *dataobj.Section, _ logs.Stats) error { |
||||
return nil |
||||
} |
||||
|
||||
func (c *streamStatisticsCalculation) ProcessBatch(_ context.Context, context *logsCalculationContext, batch []logs.Record) error { |
||||
for _, log := range batch { |
||||
err := context.builder.ObserveLogLine(context.tenantID, context.objectPath, context.sectionIdx, log.StreamID, context.streamIDLookup[log.StreamID], log.Timestamp, int64(len(log.Line))) |
||||
if err != nil { |
||||
return fmt.Errorf("failed to observe log line: %w", err) |
||||
} |
||||
} |
||||
return nil |
||||
} |
||||
|
||||
func (c *streamStatisticsCalculation) Flush(_ context.Context, _ *logsCalculationContext) error { |
||||
return nil |
||||
} |
||||
Loading…
Reference in new issue