Like Prometheus, but for logs.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 
loki/pkg/dataobj/logs_reader.go

268 lines
6.6 KiB

package dataobj
import (
"context"
"fmt"
"io"
"iter"
"sort"
"time"
"github.com/prometheus/prometheus/model/labels"
"github.com/grafana/loki/pkg/push"
"github.com/grafana/loki/v3/pkg/dataobj/internal/metadata/filemd"
"github.com/grafana/loki/v3/pkg/dataobj/internal/result"
"github.com/grafana/loki/v3/pkg/dataobj/internal/sections/logs"
)
// A Record is an individual log record in a data object.
type Record struct {
StreamID int64 // StreamID associated with the log record.
Timestamp time.Time // Timestamp of the log record.
Metadata labels.Labels // Set of metadata associated with the log record.
Line string // Line of the log record.
}
// LogsReader reads the set of logs from an [Object].
type LogsReader struct {
obj *Object
idx int
matchIDs map[int64]struct{}
predicate LogsPredicate
next func() (result.Result[logs.Record], bool)
stop func()
}
// NewLogsReader creates a new LogsReader that reads from the logs section of
// the given object.
func NewLogsReader(obj *Object, sectionIndex int) *LogsReader {
var lr LogsReader
lr.Reset(obj, sectionIndex)
return &lr
}
// MatchStreams provides a sequence of stream IDs for the logs reader to match.
// [LogsReader.Read] will only return logs for the provided stream IDs.
//
// MatchStreams may be called multiple times to match multiple sets of streams.
//
// MatchStreams may only be called before reading begins or after a call to
// [LogsReader.Reset].
func (r *LogsReader) MatchStreams(ids iter.Seq[int64]) error {
if r.next != nil {
return fmt.Errorf("cannot change matched streams after reading has started")
}
if r.matchIDs == nil {
r.matchIDs = make(map[int64]struct{})
}
for id := range ids {
r.matchIDs[id] = struct{}{}
}
return nil
}
// SetPredicate sets the predicate to use for filtering logs. [LogsReader.Read]
// will only return logs for which the predicate passes.
//
// A predicate may only be set before reading begins or after a call to
// [LogsReader.Reset].
func (r *LogsReader) SetPredicate(p LogsPredicate) error {
if r.next != nil {
return fmt.Errorf("cannot change predicate after reading has started")
}
r.predicate = p
return nil
}
// Read reads up to the next len(s) records from the reader and stores them
// into s. It returns the number of records read and any error encountered. At
// the end of the logs section, Read returns 0, io.EOF.
func (r *LogsReader) Read(ctx context.Context, s []Record) (int, error) {
// TODO(rfratto): The implementation below is the initial, naive approach. It
// lacks a few features that will be needed at scale:
//
// * Read columns/pages in batches of len(s), rather than one row at a time,
//
// * Add page-level filtering based on min/max page values to quickly filter
// out batches of rows without needing to download or decode them.
//
// * Download pages in batches, rather than one at a time.
//
// * Only download/decode non-predicate columns following finding rows that
// match all predicate columns.
//
// * Reuse as much memory as possible from a combination of s and the state
// of LogsReader.
//
// These details can change internally without changing the API exposed by
// LogsReader, which is designed to permit efficient use in the future.
if r.obj == nil {
return 0, io.EOF
} else if r.idx < 0 {
return 0, fmt.Errorf("invalid section index %d", r.idx)
}
if r.next == nil {
err := r.initIter(ctx)
if err != nil {
return 0, err
}
}
for i := range s {
res, ok := r.nextMatching()
if !ok {
return i, io.EOF
}
record, err := res.Value()
if err != nil {
return i, fmt.Errorf("reading record: %w", err)
}
s[i] = Record{
StreamID: record.StreamID,
Timestamp: record.Timestamp,
Metadata: convertMetadata(record.Metadata),
Line: record.Line,
}
}
return len(s), nil
}
func (r *LogsReader) initIter(ctx context.Context) error {
sec, err := r.findSection(ctx)
if err != nil {
return fmt.Errorf("finding section: %w", err)
}
if r.stop != nil {
r.stop()
}
seq := logs.IterSection(ctx, r.obj.dec.LogsDecoder(), sec)
r.next, r.stop = result.Pull(seq)
return nil
}
func (r *LogsReader) findSection(ctx context.Context) (*filemd.SectionInfo, error) {
si, err := r.obj.dec.Sections(ctx)
if err != nil {
return nil, fmt.Errorf("reading sections: %w", err)
}
var n int
for _, s := range si {
if s.Type == filemd.SECTION_TYPE_LOGS {
if n == r.idx {
return s, nil
}
n++
}
}
return nil, fmt.Errorf("section index %d not found", r.idx)
}
func (r *LogsReader) nextMatching() (result.Result[logs.Record], bool) {
if r.next == nil {
return result.Result[logs.Record]{}, false
}
NextRow:
res, ok := r.next()
if !ok {
return res, ok
}
record, err := res.Value()
if err != nil {
return res, true
}
if r.matchIDs != nil {
if _, ok := r.matchIDs[record.StreamID]; !ok {
goto NextRow
}
}
if !matchLogsPredicate(r.predicate, record) {
goto NextRow
}
return res, true
}
func matchLogsPredicate(p Predicate, record logs.Record) bool {
if p == nil {
return true
}
switch p := p.(type) {
case AndPredicate[LogsPredicate]:
return matchLogsPredicate(p.Left, record) && matchLogsPredicate(p.Right, record)
case OrPredicate[LogsPredicate]:
return matchLogsPredicate(p.Left, record) || matchLogsPredicate(p.Right, record)
case NotPredicate[LogsPredicate]:
return !matchLogsPredicate(p.Inner, record)
case TimeRangePredicate[LogsPredicate]:
return matchTimestamp(p, record.Timestamp)
case MetadataMatcherPredicate:
return getMetadata(record.Metadata, p.Key) == p.Value
case MetadataFilterPredicate:
return p.Keep(p.Key, getMetadata(record.Metadata, p.Key))
default:
// Unsupported predicates should already be caught by
// [LogsReader.SetPredicate].
panic(fmt.Sprintf("unsupported predicate type %T", p))
}
}
func getMetadata(md push.LabelsAdapter, key string) string {
for _, l := range md {
if l.Name == key {
return l.Value
}
}
return ""
}
func convertMetadata(md push.LabelsAdapter) labels.Labels {
l := make(labels.Labels, 0, len(md))
for _, label := range md {
l = append(l, labels.Label{Name: label.Name, Value: label.Value})
}
sort.Sort(l)
return l
}
// Reset resets the LogsReader with a new object and section index to read
// from. Reset allows reusing a LogsReader without allocating a new one.
//
// Any set predicate is cleared when Reset is called.
//
// Reset may be called with a nil object and a negative section index to clear
// the LogsReader without needing a new object.
func (r *LogsReader) Reset(obj *Object, sectionIndex int) {
if r.stop != nil {
r.stop()
}
r.obj = obj
r.idx = sectionIndex
r.next = nil
r.stop = nil
clear(r.matchIDs)
r.predicate = nil
}