Like Prometheus, but for logs.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 
loki/pkg/dataobj/internal/sections/logs/iter.go

148 lines
4.0 KiB

package logs
import (
"cmp"
"context"
"fmt"
"slices"
"time"
"github.com/grafana/loki/pkg/push"
"github.com/grafana/loki/v3/pkg/dataobj/internal/dataset"
"github.com/grafana/loki/v3/pkg/dataobj/internal/encoding"
"github.com/grafana/loki/v3/pkg/dataobj/internal/metadata/datasetmd"
"github.com/grafana/loki/v3/pkg/dataobj/internal/metadata/filemd"
"github.com/grafana/loki/v3/pkg/dataobj/internal/metadata/logsmd"
"github.com/grafana/loki/v3/pkg/dataobj/internal/result"
)
// Iter iterates over records in the provided decoder. All logs sections are
// iterated over in order.
func Iter(ctx context.Context, dec encoding.Decoder) result.Seq[Record] {
return result.Iter(func(yield func(Record) bool) error {
sections, err := dec.Sections(ctx)
if err != nil {
return err
}
logsDec := dec.LogsDecoder()
for _, section := range sections {
if section.Type != filemd.SECTION_TYPE_LOGS {
continue
}
for result := range IterSection(ctx, logsDec, section) {
if result.Err() != nil || !yield(result.MustValue()) {
return result.Err()
}
}
}
return nil
})
}
func IterSection(ctx context.Context, dec encoding.LogsDecoder, section *filemd.SectionInfo) result.Seq[Record] {
return result.Iter(func(yield func(Record) bool) error {
// We need to pull the columns twice: once from the dataset implementation
// and once for the metadata to retrieve column type.
//
// TODO(rfratto): find a way to expose this information from
// encoding.StreamsDataset to avoid the double call.
streamsColumns, err := dec.Columns(ctx, section)
if err != nil {
return err
}
dset := encoding.LogsDataset(dec, section)
columns, err := result.Collect(dset.ListColumns(ctx))
if err != nil {
return err
}
for result := range dataset.Iter(ctx, columns) {
row, err := result.Value()
if err != nil {
return err
}
record, err := decodeRecord(streamsColumns, row)
if err != nil {
return err
} else if !yield(record) {
return nil
}
}
return nil
})
}
func decodeRecord(columns []*logsmd.ColumnDesc, row dataset.Row) (Record, error) {
record := Record{
// Preallocate metadata to exact number of metadata columns to avoid
// oversizing.
Metadata: make(push.LabelsAdapter, 0, metadataColumns(columns)),
}
for columnIndex, columnValue := range row.Values {
if columnValue.IsNil() || columnValue.IsZero() {
continue
}
column := columns[columnIndex]
switch column.Type {
case logsmd.COLUMN_TYPE_STREAM_ID:
if ty := columnValue.Type(); ty != datasetmd.VALUE_TYPE_INT64 {
return Record{}, fmt.Errorf("invalid type %s for %s", ty, column.Type)
}
record.StreamID = columnValue.Int64()
case logsmd.COLUMN_TYPE_TIMESTAMP:
if ty := columnValue.Type(); ty != datasetmd.VALUE_TYPE_INT64 {
return Record{}, fmt.Errorf("invalid type %s for %s", ty, column.Type)
}
record.Timestamp = time.Unix(0, columnValue.Int64()).UTC()
case logsmd.COLUMN_TYPE_METADATA:
if ty := columnValue.Type(); ty != datasetmd.VALUE_TYPE_STRING {
return Record{}, fmt.Errorf("invalid type %s for %s", ty, column.Type)
}
record.Metadata = append(record.Metadata, push.LabelAdapter{
Name: column.Info.Name,
Value: columnValue.String(),
})
case logsmd.COLUMN_TYPE_MESSAGE:
if ty := columnValue.Type(); ty != datasetmd.VALUE_TYPE_STRING {
return Record{}, fmt.Errorf("invalid type %s for %s", ty, column.Type)
}
record.Line = columnValue.String()
}
}
// Metadata is originally sorted in received order; we sort it by key
// per-record since it might not be obvious why keys appear in a certain
// order.
slices.SortFunc(record.Metadata, func(a, b push.LabelAdapter) int {
if res := cmp.Compare(a.Name, b.Name); res != 0 {
return res
}
return cmp.Compare(a.Value, b.Value)
})
return record, nil
}
func metadataColumns(columns []*logsmd.ColumnDesc) int {
var count int
for _, column := range columns {
if column.Type == logsmd.COLUMN_TYPE_METADATA {
count++
}
}
return count
}