Like Prometheus, but for logs.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 
loki/pkg/logql/accumulator.go

436 lines
12 KiB

package logql
import (
"container/heap"
"context"
"fmt"
"sort"
"time"
"golang.org/x/exp/maps"
"github.com/grafana/loki/v3/pkg/logproto"
"github.com/grafana/loki/v3/pkg/logqlmodel"
"github.com/grafana/loki/v3/pkg/logqlmodel/metadata"
"github.com/grafana/loki/v3/pkg/logqlmodel/stats"
"github.com/grafana/loki/v3/pkg/querier/queryrange/queryrangebase/definitions"
"github.com/grafana/loki/v3/pkg/util/math"
)
// NewBufferedAccumulator returns an accumulator which aggregates all query
// results in a slice. This is useful for metric queries, which are generally
// small payloads and the memory overhead for buffering is negligible.
func NewBufferedAccumulator(n int) *BufferedAccumulator {
return &BufferedAccumulator{
results: make([]logqlmodel.Result, n),
}
}
type BufferedAccumulator struct {
results []logqlmodel.Result
}
func (a *BufferedAccumulator) Accumulate(_ context.Context, acc logqlmodel.Result, i int) error {
a.results[i] = acc
return nil
}
func (a *BufferedAccumulator) Result() []logqlmodel.Result {
return a.results
}
type QuantileSketchAccumulator struct {
matrix ProbabilisticQuantileMatrix
stats stats.Result // for accumulating statistics from downstream requests
headers map[string][]string // for accumulating headers from downstream requests
warnings map[string]struct{} // for accumulating warnings from downstream requests}
}
// newQuantileSketchAccumulator returns an accumulator for sharded
// probabilistic quantile queries that merges results as they come in.
func newQuantileSketchAccumulator() *QuantileSketchAccumulator {
return &QuantileSketchAccumulator{
headers: make(map[string][]string),
warnings: make(map[string]struct{}),
}
}
func (a *QuantileSketchAccumulator) Accumulate(_ context.Context, res logqlmodel.Result, _ int) error {
if res.Data.Type() != QuantileSketchMatrixType {
return fmt.Errorf("unexpected matrix data type: got (%s), want (%s)", res.Data.Type(), QuantileSketchMatrixType)
}
data, ok := res.Data.(ProbabilisticQuantileMatrix)
if !ok {
return fmt.Errorf("unexpected matrix type: got (%T), want (ProbabilisticQuantileMatrix)", res.Data)
}
// TODO(owen-d/ewelch): Shard counts should be set by the querier
// so we don't have to do it in tricky ways in multiple places.
// See pkg/logql/downstream.go:DownstreamEvaluator.Downstream
// for another example.
if res.Statistics.Summary.Shards == 0 {
res.Statistics.Summary.Shards = 1
}
a.stats.Merge(res.Statistics)
metadata.ExtendHeaders(a.headers, res.Headers)
for _, w := range res.Warnings {
a.warnings[w] = struct{}{}
}
if a.matrix == nil {
a.matrix = data
return nil
}
var err error
a.matrix, err = a.matrix.Merge(data)
a.stats.Merge(res.Statistics)
return err
}
func (a *QuantileSketchAccumulator) Result() []logqlmodel.Result {
headers := make([]*definitions.PrometheusResponseHeader, 0, len(a.headers))
for name, vals := range a.headers {
headers = append(
headers,
&definitions.PrometheusResponseHeader{
Name: name,
Values: vals,
},
)
}
warnings := maps.Keys(a.warnings)
sort.Strings(warnings)
return []logqlmodel.Result{
{
Data: a.matrix,
Headers: headers,
Warnings: warnings,
Statistics: a.stats,
},
}
}
// heap impl for keeping only the top n results across m streams
// importantly, AccumulatedStreams is _bounded_, so it will only
// store the top `limit` results across all streams.
// To implement this, we use a min-heap when looking
// for the max values (logproto.FORWARD)
// and vice versa for logproto.BACKWARD.
// This allows us to easily find the 'worst' value
// and replace it with a better one.
// Once we've fully processed all log lines,
// we return the heap in opposite order and then reverse it
// to get the correct order.
// Heap implements container/heap.Interface
// solely to use heap.Interface as a library.
// It is not intended for the heap pkg functions
// to otherwise call this type.
type AccumulatedStreams struct {
count, limit int
labelmap map[string]int
streams []*logproto.Stream
order logproto.Direction
stats stats.Result // for accumulating statistics from downstream requests
headers map[string][]string // for accumulating headers from downstream requests
warnings map[string]struct{} // for accumulating warnings from downstream requests
}
// NewStreamAccumulator returns an accumulator for limited log queries.
// Log queries, sharded thousands of times and each returning <limit>
// results, can be _considerably_ larger. In this case, we eagerly
// accumulate the results into a logsAccumulator, discarding values
// over the limit to keep memory pressure down while other subqueries
// are executing.
func NewStreamAccumulator(params Params) *AccumulatedStreams {
// the stream accumulator stores a heap with reversed order
// from the results we expect, so we need to reverse the direction
order := logproto.FORWARD
if params.Direction() == logproto.FORWARD {
order = logproto.BACKWARD
}
return &AccumulatedStreams{
labelmap: make(map[string]int),
order: order,
limit: int(params.Limit()),
headers: make(map[string][]string),
warnings: make(map[string]struct{}),
}
}
// returns the top priority
func (acc *AccumulatedStreams) top() (time.Time, bool) {
if len(acc.streams) > 0 && len(acc.streams[0].Entries) > 0 {
return acc.streams[0].Entries[len(acc.streams[0].Entries)-1].Timestamp, true
}
return time.Time{}, false
}
func (acc *AccumulatedStreams) Find(labels string) (int, bool) {
i, ok := acc.labelmap[labels]
return i, ok
}
// number of streams
func (acc *AccumulatedStreams) Len() int { return len(acc.streams) }
func (acc *AccumulatedStreams) Swap(i, j int) {
// for i=0, j=1
// {'a': 0, 'b': 1}
// [a, b]
acc.streams[i], acc.streams[j] = acc.streams[j], acc.streams[i]
// {'a': 0, 'b': 1}
// [b, a]
acc.labelmap[acc.streams[i].Labels] = i
acc.labelmap[acc.streams[j].Labels] = j
// {'a': 1, 'b': 0}
// [b, a]
}
// first order by timestamp, then by labels
func (acc *AccumulatedStreams) Less(i, j int) bool {
// order by the 'oldest' entry in the stream
if a, b := acc.streams[i].Entries[len(acc.streams[i].Entries)-1].Timestamp, acc.streams[j].Entries[len(acc.streams[j].Entries)-1].Timestamp; !a.Equal(b) {
return acc.less(a, b)
}
return acc.streams[i].Labels <= acc.streams[j].Labels
}
func (acc *AccumulatedStreams) less(a, b time.Time) bool {
// use after for stable sort
if acc.order == logproto.FORWARD {
return !a.After(b)
}
return !b.After(a)
}
func (acc *AccumulatedStreams) Push(x any) {
s := x.(*logproto.Stream)
if len(s.Entries) == 0 {
return
}
if room := acc.limit - acc.count; room >= len(s.Entries) {
if i, ok := acc.Find(s.Labels); ok {
// stream already exists, append entries
// these are already guaranteed to be sorted
// Reasoning: we shard subrequests so each stream exists on only one
// shard. Therefore, the only time a stream should already exist
// is in successive splits, which are already guaranteed to be ordered
// and we can just append.
acc.appendTo(acc.streams[i], s)
return
}
// new stream
acc.addStream(s)
return
}
// there's not enough room for all the entries,
// so we need to
acc.push(s)
}
// there's not enough room for all the entries.
// since we store them in a reverse heap relative to what we _want_
// (i.e. the max value for FORWARD, the min value for BACKWARD),
// we test if the new entry is better than the worst entry,
// swapping them if so.
func (acc *AccumulatedStreams) push(s *logproto.Stream) {
worst, ok := acc.top()
room := math.Min(acc.limit-acc.count, len(s.Entries))
if !ok {
if room == 0 {
// special case: limit must be zero since there's no room and no worst entry
return
}
s.Entries = s.Entries[:room]
// special case: there are no entries in the heap. Push entries up to the limit
acc.addStream(s)
return
}
// since entries are sorted by timestamp from best -> worst,
// we can discard the entire stream if the incoming best entry
// is worse than the worst entry in the heap.
cutoff := sort.Search(len(s.Entries), func(i int) bool {
// TODO(refactor label comparison -- should be in another fn)
if worst.Equal(s.Entries[i].Timestamp) {
return acc.streams[0].Labels < s.Labels
}
return acc.less(s.Entries[i].Timestamp, worst)
})
s.Entries = s.Entries[:cutoff]
for i := 0; i < len(s.Entries) && acc.less(worst, s.Entries[i].Timestamp); i++ {
// push one entry at a time
room = acc.limit - acc.count
// pop if there's no room to make the heap small enough for an append;
// in the short path of Push() we know that there's room for at least one entry
if room == 0 {
acc.Pop()
}
cpy := *s
cpy.Entries = []logproto.Entry{s.Entries[i]}
acc.Push(&cpy)
// update worst
worst, _ = acc.top()
}
}
func (acc *AccumulatedStreams) addStream(s *logproto.Stream) {
// ensure entries conform to order we expect
// TODO(owen-d): remove? should be unnecessary since we insert in appropriate order
// but it's nice to have the safeguard
sort.Slice(s.Entries, func(i, j int) bool {
return acc.less(s.Entries[j].Timestamp, s.Entries[i].Timestamp)
})
acc.streams = append(acc.streams, s)
i := len(acc.streams) - 1
acc.labelmap[s.Labels] = i
acc.count += len(s.Entries)
heap.Fix(acc, i)
}
// dst must already exist in acc
func (acc *AccumulatedStreams) appendTo(dst, src *logproto.Stream) {
// these are already guaranteed to be sorted
// Reasoning: we shard subrequests so each stream exists on only one
// shard. Therefore, the only time a stream should already exist
// is in successive splits, which are already guaranteed to be ordered
// and we can just append.
var needsSort bool
for _, e := range src.Entries {
// sort if order has broken
if len(dst.Entries) > 0 && acc.less(dst.Entries[len(dst.Entries)-1].Timestamp, e.Timestamp) {
needsSort = true
}
dst.Entries = append(dst.Entries, e)
}
if needsSort {
sort.Slice(dst.Entries, func(i, j int) bool {
// store in reverse order so we can more reliably insert without sorting and pop from end
return acc.less(dst.Entries[j].Timestamp, dst.Entries[i].Timestamp)
})
}
acc.count += len(src.Entries)
heap.Fix(acc, acc.labelmap[dst.Labels])
}
// Pop returns a stream with one entry. It pops the first entry of the first stream
func (acc *AccumulatedStreams) Pop() any {
n := acc.Len()
if n == 0 {
return nil
}
stream := acc.streams[0]
cpy := *stream
cpy.Entries = []logproto.Entry{cpy.Entries[len(stream.Entries)-1]}
stream.Entries = stream.Entries[:len(stream.Entries)-1]
acc.count--
if len(stream.Entries) == 0 {
// remove stream
acc.Swap(0, n-1)
acc.streams[n-1] = nil // avoid leaking reference
delete(acc.labelmap, stream.Labels)
acc.streams = acc.streams[:n-1]
}
if acc.Len() > 0 {
heap.Fix(acc, 0)
}
return &cpy
}
// Note: can only be called once as it will alter stream ordreing.
func (acc *AccumulatedStreams) Result() []logqlmodel.Result {
// sort streams by label
sort.Slice(acc.streams, func(i, j int) bool {
return acc.streams[i].Labels < acc.streams[j].Labels
})
streams := make(logqlmodel.Streams, 0, len(acc.streams))
for _, s := range acc.streams {
// sort entries by timestamp, inversely based on direction
sort.Slice(s.Entries, func(i, j int) bool {
return acc.less(s.Entries[j].Timestamp, s.Entries[i].Timestamp)
})
streams = append(streams, *s)
}
res := logqlmodel.Result{
// stats & headers are already aggregated in the context
Data: streams,
Statistics: acc.stats,
Headers: make([]*definitions.PrometheusResponseHeader, 0, len(acc.headers)),
}
for name, vals := range acc.headers {
res.Headers = append(
res.Headers,
&definitions.PrometheusResponseHeader{
Name: name,
Values: vals,
},
)
}
warnings := maps.Keys(acc.warnings)
sort.Strings(warnings)
res.Warnings = warnings
return []logqlmodel.Result{res}
}
func (acc *AccumulatedStreams) Accumulate(_ context.Context, x logqlmodel.Result, _ int) error {
// TODO(owen-d/ewelch): Shard counts should be set by the querier
// so we don't have to do it in tricky ways in multiple places.
// See pkg/logql/downstream.go:DownstreamEvaluator.Downstream
// for another example.
if x.Statistics.Summary.Shards == 0 {
x.Statistics.Summary.Shards = 1
}
acc.stats.Merge(x.Statistics)
metadata.ExtendHeaders(acc.headers, x.Headers)
for _, w := range x.Warnings {
acc.warnings[w] = struct{}{}
}
switch got := x.Data.(type) {
case logqlmodel.Streams:
for i := range got {
acc.Push(&got[i])
}
default:
return fmt.Errorf("unexpected response type during response result accumulation. Got (%T), wanted %s", got, logqlmodel.ValueTypeStreams)
}
return nil
}