Like Prometheus, but for logs.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 
loki/pkg/engine/internal/executor/util.go

118 lines
3.3 KiB

package executor
import (
"fmt"
"github.com/apache/arrow-go/v18/arrow"
"github.com/apache/arrow-go/v18/arrow/array"
"github.com/cespare/xxhash/v2"
"github.com/grafana/loki/v3/pkg/engine/internal/semconv"
)
var (
separator = []byte{0}
)
// columnForIdent returns the column ([arrow.Array]) and its column index in the schema of the given input batch ([arrow.RecordBatch]).
// It returns an optional error in case the column with the fully qualified name of the identifier could not be found,
// or there are where multiple columns with the same name in the schema.
// In case of an error, the returned column index is -1.
func columnForIdent(ident *semconv.Identifier, batch arrow.RecordBatch) (arrow.Array, int, error) {
return columnForFQN(ident.FQN(), batch)
}
func columnForFQN(fqn string, batch arrow.RecordBatch) (arrow.Array, int, error) {
indices := batch.Schema().FieldIndices(fqn)
if len(indices) == 0 {
return nil, -1, fmt.Errorf("column not found for %s", fqn)
}
if len(indices) > 1 {
return nil, -1, fmt.Errorf("multiple columns found for %s", fqn)
}
return batch.Column(indices[0]), indices[0], nil
}
// labelValuesCache returns label values for a given row in range and vector aggregators, but cache them in order
// to reduce object allocations for repeated label sets. It first scans the row for non-empty labels and computes xxhash.
// In case of a cache miss it scans the row again and allocates arrays for label values.
type labelValuesCache struct {
digest *xxhash.Digest
cache map[uint64][]string
}
func newLabelValuesCache() *labelValuesCache {
return &labelValuesCache{
digest: xxhash.New(),
cache: make(map[uint64][]string),
}
}
func (c *labelValuesCache) getLabelValues(arrays []*array.String, row int) []string {
c.digest.Reset()
for _, arr := range arrays {
val := arr.Value(row)
if val != "" {
_, _ = c.digest.Write(separator)
_, _ = c.digest.WriteString(val)
}
}
key := c.digest.Sum64()
labelValues, ok := c.cache[key]
if !ok {
labelValues = make([]string, 0, len(arrays))
for _, arr := range arrays {
val := arr.Value(row)
if val != "" {
labelValues = append(labelValues, val)
}
}
c.cache[key] = labelValues
}
return labelValues
}
// fieldsCache returns labels for a given row in range and vector aggregators, but cache them in order
// to reduce object allocations for repeated label sets. It first scans the row for non-empty labels and computes xxhash.
// In case of a cache miss it scans the row again and allocates arrays for label names.
type fieldsCache struct {
digest *xxhash.Digest
cache map[uint64][]arrow.Field
}
func newFieldsCache() *fieldsCache {
return &fieldsCache{
digest: xxhash.New(),
cache: make(map[uint64][]arrow.Field),
}
}
func (c *fieldsCache) getFields(arrays []*array.String, fields []arrow.Field, row int) []arrow.Field {
c.digest.Reset()
for i, arr := range arrays {
val := arr.Value(row)
if val != "" {
_, _ = c.digest.Write(separator)
_, _ = c.digest.WriteString(fields[i].Name)
}
}
key := c.digest.Sum64()
labels, ok := c.cache[key]
if !ok {
labels = make([]arrow.Field, 0, len(arrays))
for i, arr := range arrays {
val := arr.Value(row)
if val != "" {
labels = append(labels, fields[i])
}
}
c.cache[key] = labels
}
return labels
}