mirror of https://github.com/grafana/loki
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
342 lines
8.9 KiB
342 lines
8.9 KiB
|
9 months ago
|
package executor
|
||
|
|
|
||
|
|
import (
|
||
|
|
"fmt"
|
||
|
6 months ago
|
"slices"
|
||
|
9 months ago
|
|
||
|
|
"github.com/apache/arrow-go/v18/arrow"
|
||
|
|
"github.com/apache/arrow-go/v18/arrow/array"
|
||
|
|
"github.com/apache/arrow-go/v18/arrow/memory"
|
||
|
|
|
||
|
8 months ago
|
"github.com/grafana/loki/v3/pkg/engine/internal/datatype"
|
||
|
9 months ago
|
"github.com/grafana/loki/v3/pkg/engine/internal/types"
|
||
|
3 months ago
|
"github.com/grafana/loki/v3/pkg/engine/internal/planner/physical"
|
||
|
9 months ago
|
)
|
||
|
|
|
||
|
|
type expressionEvaluator struct{}
|
||
|
|
|
||
|
9 months ago
|
func (e expressionEvaluator) eval(expr physical.Expression, input arrow.Record) (ColumnVector, error) {
|
||
|
9 months ago
|
switch expr := expr.(type) {
|
||
|
|
|
||
|
|
case *physical.LiteralExpr:
|
||
|
|
return &Scalar{
|
||
|
8 months ago
|
value: expr.Literal,
|
||
|
9 months ago
|
rows: input.NumRows(),
|
||
|
8 months ago
|
ct: types.ColumnTypeAmbiguous,
|
||
|
9 months ago
|
}, nil
|
||
|
|
|
||
|
|
case *physical.ColumnExpr:
|
||
|
6 months ago
|
fieldIndices := input.Schema().FieldIndices(expr.Ref.Column)
|
||
|
|
if len(fieldIndices) > 0 {
|
||
|
|
// For non-ambiguous look-ups, look for an exact match
|
||
|
|
if expr.Ref.Type != types.ColumnTypeAmbiguous {
|
||
|
|
for _, idx := range fieldIndices {
|
||
|
|
field := input.Schema().Field(idx)
|
||
|
|
dt, ok := field.Metadata.GetValue(types.MetadataKeyColumnDataType)
|
||
|
|
if !ok {
|
||
|
|
continue
|
||
|
|
}
|
||
|
|
|
||
|
|
ct, ok := field.Metadata.GetValue(types.MetadataKeyColumnType)
|
||
|
|
if !ok || ct != expr.Ref.Type.String() {
|
||
|
|
continue
|
||
|
|
}
|
||
|
|
|
||
|
|
return &Array{
|
||
|
|
array: input.Column(idx),
|
||
|
|
dt: datatype.FromString(dt),
|
||
|
|
ct: types.ColumnTypeFromString(ct),
|
||
|
|
rows: input.NumRows(),
|
||
|
|
}, nil
|
||
|
8 months ago
|
}
|
||
|
6 months ago
|
} else {
|
||
|
|
// For ambiguous columns, collect all matching columns and order by precedence
|
||
|
|
var vecs []ColumnVector
|
||
|
|
for _, idx := range fieldIndices {
|
||
|
|
field := input.Schema().Field(idx)
|
||
|
|
dt, ok := field.Metadata.GetValue(types.MetadataKeyColumnDataType)
|
||
|
|
if !ok {
|
||
|
|
continue
|
||
|
|
}
|
||
|
|
|
||
|
|
ct, ok := field.Metadata.GetValue(types.MetadataKeyColumnType)
|
||
|
|
if !ok {
|
||
|
|
continue
|
||
|
|
}
|
||
|
|
|
||
|
|
// TODO(ashwanth): Support other data types in CoalesceVector.
|
||
|
|
// For now, ensure all vectors are strings to avoid type conflicts.
|
||
|
6 months ago
|
if datatype.Loki.String.String() != dt {
|
||
|
6 months ago
|
return nil, fmt.Errorf("column %s has datatype %s, but expression expects string", expr.Ref.Column, dt)
|
||
|
|
}
|
||
|
|
|
||
|
|
vecs = append(vecs, &Array{
|
||
|
|
array: input.Column(idx),
|
||
|
|
dt: datatype.FromString(dt),
|
||
|
|
ct: types.ColumnTypeFromString(ct),
|
||
|
|
rows: input.NumRows(),
|
||
|
|
})
|
||
|
|
}
|
||
|
|
|
||
|
|
if len(vecs) > 1 {
|
||
|
|
// Multiple matches - sort by precedence and create CoalesceVector
|
||
|
|
slices.SortFunc(vecs, func(a, b ColumnVector) int {
|
||
|
|
return types.ColumnTypePrecedence(a.ColumnType()) - types.ColumnTypePrecedence(b.ColumnType())
|
||
|
|
})
|
||
|
|
|
||
|
|
return &CoalesceVector{
|
||
|
|
vectors: vecs,
|
||
|
|
rows: input.NumRows(),
|
||
|
|
}, nil
|
||
|
|
} else if len(vecs) == 1 {
|
||
|
|
return vecs[0], nil
|
||
|
8 months ago
|
}
|
||
|
9 months ago
|
}
|
||
|
6 months ago
|
|
||
|
9 months ago
|
}
|
||
|
6 months ago
|
|
||
|
7 months ago
|
// A non-existent column is represented as a string scalar with zero-value.
|
||
|
|
// This reflects current behaviour, where a label filter `| foo=""` would match all if `foo` is not defined.
|
||
|
|
return &Scalar{
|
||
|
6 months ago
|
value: datatype.NewLiteral(""),
|
||
|
7 months ago
|
rows: input.NumRows(),
|
||
|
|
ct: types.ColumnTypeGenerated,
|
||
|
|
}, nil
|
||
|
9 months ago
|
|
||
|
|
case *physical.UnaryExpr:
|
||
|
9 months ago
|
lhr, err := e.eval(expr.Left, input)
|
||
|
9 months ago
|
if err != nil {
|
||
|
|
return nil, err
|
||
|
|
}
|
||
|
9 months ago
|
|
||
|
8 months ago
|
fn, err := unaryFunctions.GetForSignature(expr.Op, lhr.Type().ArrowType())
|
||
|
9 months ago
|
if err != nil {
|
||
|
|
return nil, fmt.Errorf("failed to lookup unary function: %w", err)
|
||
|
|
}
|
||
|
|
return fn.Evaluate(lhr)
|
||
|
9 months ago
|
|
||
|
|
case *physical.BinaryExpr:
|
||
|
9 months ago
|
lhs, err := e.eval(expr.Left, input)
|
||
|
9 months ago
|
if err != nil {
|
||
|
|
return nil, err
|
||
|
|
}
|
||
|
9 months ago
|
rhs, err := e.eval(expr.Right, input)
|
||
|
9 months ago
|
if err != nil {
|
||
|
|
return nil, err
|
||
|
|
}
|
||
|
|
|
||
|
9 months ago
|
// At the moment we only support functions that accept the same input types.
|
||
|
8 months ago
|
if lhs.Type().ArrowType().ID() != rhs.Type().ArrowType().ID() {
|
||
|
|
return nil, fmt.Errorf("failed to lookup binary function for signature %v(%v,%v): types do not match", expr.Op, lhs.Type().ArrowType(), rhs.Type().ArrowType())
|
||
|
9 months ago
|
}
|
||
|
|
|
||
|
8 months ago
|
fn, err := binaryFunctions.GetForSignature(expr.Op, lhs.Type().ArrowType())
|
||
|
9 months ago
|
if err != nil {
|
||
|
8 months ago
|
return nil, fmt.Errorf("failed to lookup binary function for signature %v(%v,%v): %w", expr.Op, lhs.Type().ArrowType(), rhs.Type().ArrowType(), err)
|
||
|
9 months ago
|
}
|
||
|
|
return fn.Evaluate(lhs, rhs)
|
||
|
9 months ago
|
}
|
||
|
|
|
||
|
|
return nil, fmt.Errorf("unknown expression: %v", expr)
|
||
|
|
}
|
||
|
|
|
||
|
9 months ago
|
// newFunc returns a new function that can evaluate an input against a binded expression.
|
||
|
|
func (e expressionEvaluator) newFunc(expr physical.Expression) evalFunc {
|
||
|
|
return func(input arrow.Record) (ColumnVector, error) {
|
||
|
|
return e.eval(expr, input)
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
type evalFunc func(input arrow.Record) (ColumnVector, error)
|
||
|
|
|
||
|
9 months ago
|
// ColumnVector represents columnar values from evaluated expressions.
|
||
|
|
type ColumnVector interface {
|
||
|
|
// ToArray returns the underlying Arrow array representation of the column vector.
|
||
|
|
ToArray() arrow.Array
|
||
|
|
// Value returns the value at the specified index position in the column vector.
|
||
|
9 months ago
|
Value(i int) any
|
||
|
8 months ago
|
// Type returns the Loki data type of the column vector.
|
||
|
|
Type() datatype.DataType
|
||
|
|
// ColumnType returns the type of column the vector originates from.
|
||
|
|
ColumnType() types.ColumnType
|
||
|
9 months ago
|
// Len returns the length of the vector
|
||
|
|
Len() int64
|
||
|
9 months ago
|
}
|
||
|
|
|
||
|
|
// Scalar represents a single value repeated any number of times.
|
||
|
|
type Scalar struct {
|
||
|
8 months ago
|
value datatype.Literal
|
||
|
9 months ago
|
rows int64
|
||
|
8 months ago
|
ct types.ColumnType
|
||
|
9 months ago
|
}
|
||
|
|
|
||
|
|
var _ ColumnVector = (*Scalar)(nil)
|
||
|
|
|
||
|
|
// ToArray implements ColumnVector.
|
||
|
|
func (v *Scalar) ToArray() arrow.Array {
|
||
|
|
mem := memory.NewGoAllocator()
|
||
|
8 months ago
|
builder := array.NewBuilder(mem, v.Type().ArrowType())
|
||
|
9 months ago
|
defer builder.Release()
|
||
|
|
|
||
|
8 months ago
|
switch builder := builder.(type) {
|
||
|
|
case *array.NullBuilder:
|
||
|
|
for range v.rows {
|
||
|
9 months ago
|
builder.AppendNull()
|
||
|
|
}
|
||
|
8 months ago
|
case *array.BooleanBuilder:
|
||
|
|
value := v.value.Any().(bool)
|
||
|
|
for range v.rows {
|
||
|
|
builder.Append(value)
|
||
|
|
}
|
||
|
|
case *array.StringBuilder:
|
||
|
|
value := v.value.Any().(string)
|
||
|
|
for range v.rows {
|
||
|
|
builder.Append(value)
|
||
|
|
}
|
||
|
|
case *array.Int64Builder:
|
||
|
|
value := v.value.Any().(int64)
|
||
|
|
for range v.rows {
|
||
|
|
builder.Append(value)
|
||
|
|
}
|
||
|
|
case *array.Float64Builder:
|
||
|
|
value := v.value.Any().(float64)
|
||
|
|
for range v.rows {
|
||
|
|
builder.Append(value)
|
||
|
|
}
|
||
|
9 months ago
|
}
|
||
|
|
return builder.NewArray()
|
||
|
|
}
|
||
|
|
|
||
|
|
// Value implements ColumnVector.
|
||
|
9 months ago
|
func (v *Scalar) Value(_ int) any {
|
||
|
8 months ago
|
return v.value.Any()
|
||
|
9 months ago
|
}
|
||
|
|
|
||
|
|
// Type implements ColumnVector.
|
||
|
8 months ago
|
func (v *Scalar) Type() datatype.DataType {
|
||
|
|
return v.value.Type()
|
||
|
|
}
|
||
|
|
|
||
|
|
// ColumnType implements ColumnVector.
|
||
|
|
func (v *Scalar) ColumnType() types.ColumnType {
|
||
|
|
return v.ct
|
||
|
9 months ago
|
}
|
||
|
|
|
||
|
9 months ago
|
// Len implements ColumnVector.
|
||
|
8 months ago
|
func (v *Scalar) Len() int64 {
|
||
|
9 months ago
|
return v.rows
|
||
|
|
}
|
||
|
|
|
||
|
9 months ago
|
// Array represents a column of data, stored as an [arrow.Array].
|
||
|
|
type Array struct {
|
||
|
|
array arrow.Array
|
||
|
8 months ago
|
dt datatype.DataType
|
||
|
|
ct types.ColumnType
|
||
|
9 months ago
|
rows int64
|
||
|
|
}
|
||
|
|
|
||
|
9 months ago
|
var _ ColumnVector = (*Array)(nil)
|
||
|
|
|
||
|
9 months ago
|
// ToArray implements ColumnVector.
|
||
|
|
func (a *Array) ToArray() arrow.Array {
|
||
|
|
return a.array
|
||
|
|
}
|
||
|
|
|
||
|
|
// Value implements ColumnVector.
|
||
|
9 months ago
|
func (a *Array) Value(i int) any {
|
||
|
|
if a.array.IsNull(i) || !a.array.IsValid(i) {
|
||
|
|
return nil
|
||
|
|
}
|
||
|
|
|
||
|
|
switch arr := a.array.(type) {
|
||
|
|
case *array.Boolean:
|
||
|
|
return arr.Value(i)
|
||
|
|
case *array.String:
|
||
|
|
return arr.Value(i)
|
||
|
|
case *array.Int64:
|
||
|
|
return arr.Value(i)
|
||
|
|
case *array.Uint64:
|
||
|
|
return arr.Value(i)
|
||
|
|
case *array.Float64:
|
||
|
|
return arr.Value(i)
|
||
|
|
default:
|
||
|
|
return nil
|
||
|
|
}
|
||
|
9 months ago
|
}
|
||
|
|
|
||
|
|
// Type implements ColumnVector.
|
||
|
8 months ago
|
func (a *Array) Type() datatype.DataType {
|
||
|
|
return a.dt
|
||
|
|
}
|
||
|
|
|
||
|
|
// ColumnType implements ColumnVector.
|
||
|
|
func (a *Array) ColumnType() types.ColumnType {
|
||
|
|
return a.ct
|
||
|
9 months ago
|
}
|
||
|
|
|
||
|
9 months ago
|
// Len implements ColumnVector.
|
||
|
|
func (a *Array) Len() int64 {
|
||
|
|
return int64(a.array.Len())
|
||
|
|
}
|
||
|
6 months ago
|
|
||
|
|
// CoalesceVector represents multiple columns with the same name but different [types.ColumnType]
|
||
|
|
// Vectors are ordered by precedence (highest precedence first).
|
||
|
|
type CoalesceVector struct {
|
||
|
|
vectors []ColumnVector // Ordered by precedence (Generated first, Label last)
|
||
|
|
rows int64
|
||
|
|
}
|
||
|
|
|
||
|
|
var _ ColumnVector = (*CoalesceVector)(nil)
|
||
|
|
|
||
|
|
// ToArray implements [ColumnVector].
|
||
|
|
func (m *CoalesceVector) ToArray() arrow.Array {
|
||
|
|
mem := memory.NewGoAllocator()
|
||
|
|
builder := array.NewBuilder(mem, m.Type().ArrowType())
|
||
|
|
defer builder.Release()
|
||
|
|
|
||
|
|
// use Value() method which already handles precedence logic
|
||
|
|
for i := 0; i < int(m.rows); i++ {
|
||
|
|
val := m.Value(i)
|
||
|
|
if val == nil {
|
||
|
|
builder.AppendNull()
|
||
|
|
} else {
|
||
|
|
// [CoalesceVector] only supports [datatype.String] for now
|
||
|
|
if strVal, ok := val.(string); ok {
|
||
|
|
builder.(*array.StringBuilder).Append(strVal)
|
||
|
|
} else {
|
||
|
|
// Fallback: convert to string representation
|
||
|
|
builder.(*array.StringBuilder).Append(fmt.Sprintf("%v", val))
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
return builder.NewArray()
|
||
|
|
}
|
||
|
|
|
||
|
|
// Value returns the value at the specified index position considering the precedence rules.
|
||
|
|
func (m *CoalesceVector) Value(i int) any {
|
||
|
|
// Try each vector in precedence order
|
||
|
|
for _, vec := range m.vectors {
|
||
|
|
if val := vec.Value(i); val != nil {
|
||
|
|
return val
|
||
|
|
}
|
||
|
|
}
|
||
|
|
return nil
|
||
|
|
}
|
||
|
|
|
||
|
|
// Type implements ColumnVector.
|
||
|
|
func (m *CoalesceVector) Type() datatype.DataType {
|
||
|
|
// TODO: Support other data types in CoalesceVector.
|
||
|
6 months ago
|
return datatype.Loki.String
|
||
|
6 months ago
|
}
|
||
|
|
|
||
|
|
// ColumnType implements ColumnVector.
|
||
|
|
func (m *CoalesceVector) ColumnType() types.ColumnType {
|
||
|
|
return types.ColumnTypeAmbiguous
|
||
|
|
}
|
||
|
|
|
||
|
|
// Len implements ColumnVector.
|
||
|
|
func (m *CoalesceVector) Len() int64 {
|
||
|
|
return m.rows
|
||
|
|
}
|