Like Prometheus, but for logs.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 
loki/pkg/engine/compat.go

570 lines
18 KiB

package engine
import (
"sort"
"time"
"github.com/apache/arrow-go/v18/arrow"
"github.com/apache/arrow-go/v18/arrow/array"
"github.com/prometheus/prometheus/model/labels"
"github.com/prometheus/prometheus/promql"
"github.com/grafana/loki/pkg/push"
"github.com/grafana/loki/v3/pkg/engine/internal/semconv"
"github.com/grafana/loki/v3/pkg/engine/internal/types"
"github.com/grafana/loki/v3/pkg/logproto"
"github.com/grafana/loki/v3/pkg/logqlmodel"
"github.com/grafana/loki/v3/pkg/logqlmodel/metadata"
"github.com/grafana/loki/v3/pkg/logqlmodel/stats"
)
type ResultBuilder interface {
CollectRecord(arrow.RecordBatch)
Build(stats.Result, *metadata.Context) logqlmodel.Result
Len() int
}
var (
_ ResultBuilder = &streamsResultBuilder{}
_ ResultBuilder = &vectorResultBuilder{}
_ ResultBuilder = &matrixResultBuilder{}
)
func newStreamsResultBuilder(dir logproto.Direction, categorizeLabels bool) *streamsResultBuilder {
return &streamsResultBuilder{
direction: dir,
categorizeLabels: categorizeLabels,
data: make(logqlmodel.Streams, 0),
streams: make(map[string]int),
rowBuilders: nil,
}
}
type streamsResultBuilder struct {
direction logproto.Direction
categorizeLabels bool
streams map[string]int
data logqlmodel.Streams
count int
// buffer for rows
rowBuilders []rowBuilder
}
type rowBuilder struct {
timestamp time.Time
line string
lbsBuilder *labels.Builder
metadataBuilder *labels.Builder
parsedBuilder *labels.Builder
parsedEmptyKeys []string
// errorLabelKeys tracks error label names (__error__, __error_details__) that were added
// to lbsBuilder. These are excluded from the stream grouping key but included in stream
// labels for display, matching classic Loki engine behavior.
errorLabelKeys []string
}
func (b *streamsResultBuilder) CollectRecord(rec arrow.RecordBatch) {
numRows := int(rec.NumRows())
if numRows == 0 {
return
}
// let's say we have the following log entries in rec:
// - {labelenv="prod-1", metadatatrace="123-1", parsed="v1"} ts1 line 1
// - {labelenv="prod-2", metadatatrace="123-2", parsed="v2"} ts2 line 2
// - {labelenv="prod-3", metadatatrace="123-3", parsed="v3"} ts3 line 3
// we pre-initialize slices to store column values for all the rows, e.g.:
// rows | 1 | 2 | 3 | ...
// ==============+=========+=========+=========+====
// timestamps | r1 ts | r2 ts | r3 ts | ...
// lines | r1 line | r2 line | r3 line | ...
// ...
// We iterate over the columns and convert the values to our format column by column, e.g.,
// first all the timestamps, then all the log lines, etc.
// After all the values are collected and converted we transform the columnar representation to a row-based one.
b.ensureRowBuilders(numRows)
// Convert arrow values to our format column by column
for colIdx := range int(rec.NumCols()) {
col := rec.Column(colIdx)
field := rec.Schema().Field(colIdx)
ident, err := semconv.ParseFQN(field.Name)
if err != nil {
continue
}
shortName := ident.ShortName()
switch true {
// Log line
case ident.Equal(semconv.ColumnIdentMessage):
lineCol := col.(*array.String)
forEachNotNullRowColValue(numRows, lineCol, func(rowIdx int) {
b.rowBuilders[rowIdx].line = lineCol.Value(rowIdx)
})
// Timestamp
case ident.Equal(semconv.ColumnIdentTimestamp):
tsCol := col.(*array.Timestamp)
forEachNotNullRowColValue(numRows, tsCol, func(rowIdx int) {
b.rowBuilders[rowIdx].timestamp = time.Unix(0, int64(tsCol.Value(rowIdx)))
})
// One of the label columns
case ident.ColumnType() == types.ColumnTypeLabel:
labelCol := col.(*array.String)
forEachNotNullRowColValue(numRows, labelCol, func(rowIdx int) {
val := labelCol.Value(rowIdx)
if val == "" {
// We also drop empty labels from stream labels to match classic Loki engine behavior.
return
}
b.rowBuilders[rowIdx].lbsBuilder.Set(shortName, val)
})
// One of the metadata columns
case ident.ColumnType() == types.ColumnTypeMetadata:
metadataCol := col.(*array.String)
forEachNotNullRowColValue(numRows, metadataCol, func(rowIdx int) {
val := metadataCol.Value(rowIdx)
b.rowBuilders[rowIdx].metadataBuilder.Set(shortName, val)
if !b.categorizeLabels {
b.rowBuilders[rowIdx].lbsBuilder.Set(shortName, val)
}
})
// One of the parsed columns
case ident.ColumnType() == types.ColumnTypeParsed || (ident.ColumnType() == types.ColumnTypeGenerated &&
(shortName == types.ColumnNameError || shortName == types.ColumnNameErrorDetails)):
parsedCol := col.(*array.String)
isErrorColumn := ident.ColumnType() == types.ColumnTypeGenerated &&
(shortName == types.ColumnNameError || shortName == types.ColumnNameErrorDetails)
forEachNotNullRowColValue(numRows, parsedCol, func(rowIdx int) {
parsedVal := parsedCol.Value(rowIdx)
if b.rowBuilders[rowIdx].parsedBuilder.Get(shortName) != "" {
return
}
b.rowBuilders[rowIdx].parsedBuilder.Set(shortName, parsedVal)
if !b.categorizeLabels {
b.rowBuilders[rowIdx].lbsBuilder.Set(shortName, parsedVal)
// Track error labels separately - they're included in stream labels for display
// but excluded from stream grouping key to match classic Loki behavior.
if isErrorColumn {
b.rowBuilders[rowIdx].errorLabelKeys = append(b.rowBuilders[rowIdx].errorLabelKeys, shortName)
}
}
if b.rowBuilders[rowIdx].metadataBuilder.Get(shortName) != "" {
b.rowBuilders[rowIdx].metadataBuilder.Del(shortName)
}
// If the parsed value is empty, the builder won't accept it as it's not a valid Prometheus-style label. We must add it later for LogQL compatibility.
if parsedVal == "" && !isErrorColumn {
b.rowBuilders[rowIdx].parsedEmptyKeys = append(b.rowBuilders[rowIdx].parsedEmptyKeys, shortName)
}
})
}
}
// Convert columnar representation to a row-based one
for rowIdx := range numRows {
lbs := b.rowBuilders[rowIdx].lbsBuilder.Labels()
ts := b.rowBuilders[rowIdx].timestamp
line := b.rowBuilders[rowIdx].line
// Ignore rows that don't have stream labels, or timestamp
if ts.IsZero() || lbs.IsEmpty() {
b.resetRowBuilder(rowIdx)
continue
}
// For compatibility with LogQL, empty parsed labels need to be added to the stream labels & parsed label sets.
// The Prometheus label builder does not allow empty strings for label values, so we must work around it by creating a new builder and adding the empty labels to it.
var lbsString string
parsedLbs := logproto.FromLabelsToLabelAdapters(b.rowBuilders[rowIdx].parsedBuilder.Labels())
if len(b.rowBuilders[rowIdx].parsedEmptyKeys) > 0 {
newLbsBuilder := labels.NewScratchBuilder(lbs.Len())
lbs.Range(func(label labels.Label) {
newLbsBuilder.Add(label.Name, label.Value)
})
for _, key := range b.rowBuilders[rowIdx].parsedEmptyKeys {
newLbsBuilder.Add(key, "")
parsedLbs = append(parsedLbs, logproto.LabelAdapter{Name: key, Value: ""})
}
newLbsBuilder.Sort()
lbsString = newLbsBuilder.Labels().String()
sort.Slice(parsedLbs, func(i, j int) bool {
return parsedLbs[i].Name < parsedLbs[j].Name
})
} else {
lbsString = lbs.String()
}
// Compute stream grouping key by excluding error labels.
// Error labels are included in stream labels for display but excluded from grouping
// so that entries with different error details stay in the same stream.
streamKey := lbsString
if len(b.rowBuilders[rowIdx].errorLabelKeys) > 0 {
keyBuilder := labels.NewScratchBuilder(lbs.Len())
lbs.Range(func(label labels.Label) {
for _, errKey := range b.rowBuilders[rowIdx].errorLabelKeys {
if label.Name == errKey {
return // skip error labels in grouping key
}
}
keyBuilder.Add(label.Name, label.Value)
})
// Also add empty parsed keys (excluding error labels)
for _, key := range b.rowBuilders[rowIdx].parsedEmptyKeys {
keyBuilder.Add(key, "")
}
keyBuilder.Sort()
streamKey = keyBuilder.Labels().String()
}
entry := logproto.Entry{
Timestamp: ts,
Line: line,
StructuredMetadata: logproto.FromLabelsToLabelAdapters(b.rowBuilders[rowIdx].metadataBuilder.Labels()),
Parsed: parsedLbs,
}
b.resetRowBuilder(rowIdx)
// Add entry to appropriate stream
idx, ok := b.streams[streamKey]
if !ok {
idx = len(b.data)
b.streams[streamKey] = idx
b.data = append(b.data, push.Stream{Labels: lbsString})
}
b.data[idx].Entries = append(b.data[idx].Entries, entry)
b.count++
}
}
func (b *streamsResultBuilder) ensureRowBuilders(newLen int) {
if newLen == len(b.rowBuilders) {
return
}
if newLen < len(b.rowBuilders) {
// free not used items at the end of the slices so they can be GC-ed
clear(b.rowBuilders[newLen:len(b.rowBuilders)])
b.rowBuilders = b.rowBuilders[:newLen]
return
}
// newLen > buf.len
numRowsToAdd := newLen - len(b.rowBuilders)
oldLen := len(b.rowBuilders)
b.rowBuilders = append(b.rowBuilders, make([]rowBuilder, numRowsToAdd)...)
for i := oldLen; i < newLen; i++ {
b.rowBuilders[i] = rowBuilder{
lbsBuilder: labels.NewBuilder(labels.EmptyLabels()),
metadataBuilder: labels.NewBuilder(labels.EmptyLabels()),
parsedBuilder: labels.NewBuilder(labels.EmptyLabels()),
parsedEmptyKeys: make([]string, 0),
errorLabelKeys: make([]string, 0),
}
}
}
func (b *streamsResultBuilder) resetRowBuilder(i int) {
b.rowBuilders[i].timestamp = time.Time{}
b.rowBuilders[i].line = ""
b.rowBuilders[i].lbsBuilder.Reset(labels.EmptyLabels())
b.rowBuilders[i].metadataBuilder.Reset(labels.EmptyLabels())
b.rowBuilders[i].parsedBuilder.Reset(labels.EmptyLabels())
b.rowBuilders[i].parsedEmptyKeys = b.rowBuilders[i].parsedEmptyKeys[:0]
b.rowBuilders[i].errorLabelKeys = b.rowBuilders[i].errorLabelKeys[:0]
}
func forEachNotNullRowColValue(numRows int, col arrow.Array, f func(rowIdx int)) {
for rowIdx := range numRows {
if col.IsNull(rowIdx) {
continue
}
f(rowIdx)
}
}
func (b *streamsResultBuilder) Build(s stats.Result, md *metadata.Context) logqlmodel.Result {
// Executor does not guarantee order of entries, so we sort them here.
for i, stream := range b.data {
if b.direction == logproto.BACKWARD {
sort.Slice(stream.Entries, func(a, b int) bool {
return stream.Entries[a].Timestamp.After(stream.Entries[b].Timestamp)
})
} else {
sort.Slice(stream.Entries, func(a, b int) bool {
return stream.Entries[a].Timestamp.Before(stream.Entries[b].Timestamp)
})
}
// Deduplicate entries with the same (timestamp, line) within each
// stream. Multiple data object sections can contain the same log entry,
// and the merge pipeline concatenates them without deduplication.
b.data[i].Entries = dedupeEntries(stream.Entries)
}
sort.Sort(b.data)
// Recount entries after dedup so the stats reflect the actual result size.
total := 0
for _, stream := range b.data {
total += len(stream.Entries)
}
s.Summary.TotalEntriesReturned = int64(total)
return logqlmodel.Result{
Data: b.data,
Statistics: s,
Headers: md.Headers(),
Warnings: md.Warnings(),
}
}
// dedupeEntries removes consecutive duplicate entries. Two entries are
// considered duplicates when all fields (timestamp, line, structured metadata,
// and parsed labels) are equal. The input slice must already be sorted by
// timestamp.
func dedupeEntries(entries []logproto.Entry) []logproto.Entry {
if len(entries) <= 1 {
return entries
}
// tracks the next position to write the next unique entry.
next := 1
// we use a form of two-pointer technique to deduplicate entries.
// we keep comparing i with i-1. if they are a duplicate we accumulate by moving forward only one pointer (i).
// if we find a non-duplicate we want to write its data to the next position and increment the two pointers.
for i := 1; i < len(entries); i++ {
prev := &entries[next-1]
cur := &entries[i]
if cur.Equal(prev) {
continue
}
entries[next] = entries[i]
next++
}
return entries[:next]
}
func (b *streamsResultBuilder) Len() int {
return b.count
}
type vectorResultBuilder struct {
data promql.Vector
lblsBuilder *labels.Builder
}
func newVectorResultBuilder() *vectorResultBuilder {
return &vectorResultBuilder{
data: promql.Vector{},
lblsBuilder: labels.NewBuilder(labels.EmptyLabels()),
}
}
func (b *vectorResultBuilder) CollectRecord(rec arrow.RecordBatch) {
for row := range int(rec.NumRows()) {
sample, ok := b.collectRow(rec, row)
if !ok {
continue
}
b.data = append(b.data, sample)
}
}
func (b *vectorResultBuilder) collectRow(rec arrow.RecordBatch, i int) (promql.Sample, bool) {
return collectSamplesFromRow(b.lblsBuilder, rec, i)
}
func (b *vectorResultBuilder) Build(s stats.Result, md *metadata.Context) logqlmodel.Result {
sort.Slice(b.data, func(i, j int) bool {
return labels.Compare(b.data[i].Metric, b.data[j].Metric) < 0
})
return logqlmodel.Result{
Data: b.data,
Statistics: s,
Headers: md.Headers(),
Warnings: md.Warnings(),
}
}
func (b *vectorResultBuilder) Len() int {
return len(b.data)
}
type matrixResultBuilder struct {
seriesIndex map[uint64]promql.Series
lblsBuilder *labels.Builder
}
func newMatrixResultBuilder() *matrixResultBuilder {
return &matrixResultBuilder{
seriesIndex: make(map[uint64]promql.Series),
lblsBuilder: labels.NewBuilder(labels.EmptyLabels()),
}
}
func (b *matrixResultBuilder) CollectRecord(rec arrow.RecordBatch) {
for row := range int(rec.NumRows()) {
sample, ok := b.collectRow(rec, row)
if !ok {
continue
}
// TODO(ashwanth): apply query series limits.
// Group samples by series (labels hash)
hash := labels.StableHash(sample.Metric)
series, exists := b.seriesIndex[hash]
if !exists {
// Create new series
series = promql.Series{
Metric: sample.Metric,
Floats: make([]promql.FPoint, 0, 1),
}
}
series.Floats = append(series.Floats, promql.FPoint{
T: sample.T,
F: sample.F,
})
b.seriesIndex[hash] = series
}
}
func (b *matrixResultBuilder) collectRow(rec arrow.RecordBatch, i int) (promql.Sample, bool) {
return collectSamplesFromRow(b.lblsBuilder, rec, i)
}
func (b *matrixResultBuilder) Build(s stats.Result, md *metadata.Context) logqlmodel.Result {
series := make([]promql.Series, 0, len(b.seriesIndex))
for _, s := range b.seriesIndex {
series = append(series, s)
}
// Create matrix and sort it
result := promql.Matrix(series)
sort.Sort(result)
return logqlmodel.Result{
Data: result,
Statistics: s,
Headers: md.Headers(),
Warnings: md.Warnings(),
}
}
func (b *matrixResultBuilder) Len() int {
total := 0
for _, series := range b.seriesIndex {
total += len(series.Floats) + len(series.Histograms)
}
return total
}
func collectSamplesFromRow(builder *labels.Builder, rec arrow.RecordBatch, i int) (promql.Sample, bool) {
var sample promql.Sample
builder.Reset(labels.EmptyLabels())
// emptyParsedKeys collects label names whose value is the empty string (not NULL).
// Pipeline stages such as `| json` can produce parsed labels with empty values, and
// these must appear in the result to match classic Loki engine behaviour.
// The Prometheus labels.Builder treats Set(name, "") as a deletion, so we handle
// these labels separately using labels.NewScratchBuilder at the end.
var emptyParsedKeys []string
// TODO: we add a lot of overhead by reading row by row. Switch to vectorized conversion.
for colIdx := range int(rec.NumCols()) {
col := rec.Column(colIdx)
field := rec.Schema().Field(colIdx)
ident, err := semconv.ParseFQN(field.Name)
if err != nil {
return promql.Sample{}, false
}
shortName := ident.ShortName()
// Extract timestamp
if ident.Equal(semconv.ColumnIdentTimestamp) {
// Ignore column values that are NULL or invalid
if col.IsNull(i) || !col.IsValid(i) {
return promql.Sample{}, false
}
// [promql.Sample] expects milliseconds as timestamp unit
sample.T = int64(col.(*array.Timestamp).Value(i) / 1e6)
continue
}
if ident.Equal(semconv.ColumnIdentValue) {
// Ignore column values that are NULL or invalid
if col.IsNull(i) || !col.IsValid(i) {
return promql.Sample{}, false
}
col, ok := col.(*array.Float64)
if !ok {
return promql.Sample{}, false
}
sample.F = col.Value(i)
continue
}
// allow any string columns
if ident.DataType() == types.Loki.String {
// The aggregator schema contains every label seen across all series. For a
// series that doesn't have a particular label, BuildRecord calls AppendNull,
// so IsNull(i)==true here means this label is simply absent for this series.
// Skip it rather than treating the empty string returned by Value(i) as a
// genuine empty label value.
if col.IsNull(i) {
continue
}
val := col.(*array.String).Value(i)
if val == "" {
// Stream labels and structured metadata should ideally never carry empty values:
// Loki removes them at ingestion time. Parsed labels (and ambiguous columns that
// originate from parsed labels) can legitimately be empty; we track them
// to add them back below after the Prometheus builder has finished.
if ident.ColumnType() != types.ColumnTypeLabel &&
ident.ColumnType() != types.ColumnTypeMetadata {
emptyParsedKeys = append(emptyParsedKeys, shortName)
}
continue
}
builder.Set(shortName, val)
}
}
if len(emptyParsedKeys) > 0 {
// labels.Builder silently drops empty-valued labels, so we build the final
// label set manually when there are empty parsed labels.
lbs := builder.Labels()
scratch := labels.NewScratchBuilder(lbs.Len() + len(emptyParsedKeys))
lbs.Range(func(l labels.Label) {
scratch.Add(l.Name, l.Value)
})
for _, key := range emptyParsedKeys {
scratch.Add(key, "")
}
scratch.Sort()
sample.Metric = scratch.Labels()
} else {
sample.Metric = builder.Labels()
}
return sample, true
}