mirror of https://github.com/grafana/loki
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
386 lines
11 KiB
386 lines
11 KiB
package executor
|
|
|
|
import (
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/apache/arrow-go/v18/arrow"
|
|
"github.com/apache/arrow-go/v18/arrow/array"
|
|
"github.com/apache/arrow-go/v18/arrow/memory"
|
|
"github.com/stretchr/testify/require"
|
|
|
|
"github.com/grafana/loki/v3/pkg/engine/internal/datatype"
|
|
"github.com/grafana/loki/v3/pkg/engine/internal/types"
|
|
"github.com/grafana/loki/v3/pkg/engine/internal/planner/physical"
|
|
)
|
|
|
|
var (
|
|
fields = []arrow.Field{
|
|
{Name: "name", Type: datatype.Arrow.String, Metadata: datatype.ColumnMetadata(types.ColumnTypeBuiltin, datatype.Loki.String)},
|
|
{Name: "timestamp", Type: datatype.Arrow.Timestamp, Metadata: datatype.ColumnMetadata(types.ColumnTypeBuiltin, datatype.Loki.Timestamp)},
|
|
{Name: "value", Type: datatype.Arrow.Float, Metadata: datatype.ColumnMetadata(types.ColumnTypeBuiltin, datatype.Loki.Float)},
|
|
{Name: "valid", Type: datatype.Arrow.Bool, Metadata: datatype.ColumnMetadata(types.ColumnTypeBuiltin, datatype.Loki.Bool)},
|
|
}
|
|
sampledata = `Alice,1745487598764058205,0.2586284611568047,false
|
|
Bob,1745487598764058305,0.7823145698741236,true
|
|
Charlie,1745487598764058405,0.3451289756123478,false
|
|
David,1745487598764058505,0.9217834561278945,true
|
|
Eve,1745487598764058605,0.1245789632145789,false
|
|
Frank,1745487598764058705,0.5678912345678912,true
|
|
Grace,1745487598764058805,0.8912345678912345,false
|
|
Hannah,1745487598764058905,0.2345678912345678,true
|
|
Ian,1745487598764059005,0.6789123456789123,false
|
|
Julia,1745487598764059105,0.4123456789123456,true`
|
|
)
|
|
|
|
func TestEvaluateLiteralExpression(t *testing.T) {
|
|
for _, tt := range []struct {
|
|
name string
|
|
value any
|
|
want any
|
|
arrowType arrow.Type
|
|
}{
|
|
{
|
|
name: "null",
|
|
value: nil,
|
|
arrowType: arrow.NULL,
|
|
},
|
|
{
|
|
name: "bool",
|
|
value: true,
|
|
arrowType: arrow.BOOL,
|
|
},
|
|
{
|
|
name: "str",
|
|
value: "loki",
|
|
arrowType: arrow.STRING,
|
|
},
|
|
{
|
|
name: "int",
|
|
value: int64(123456789),
|
|
arrowType: arrow.INT64,
|
|
},
|
|
{
|
|
name: "float",
|
|
value: 123.456789,
|
|
arrowType: arrow.FLOAT64,
|
|
},
|
|
{
|
|
name: "timestamp",
|
|
value: datatype.Timestamp(3600000000),
|
|
arrowType: arrow.INT64,
|
|
},
|
|
{
|
|
name: "duration",
|
|
value: datatype.Duration(3600000000),
|
|
arrowType: arrow.INT64,
|
|
},
|
|
{
|
|
name: "bytes",
|
|
value: datatype.Bytes(1024),
|
|
arrowType: arrow.INT64,
|
|
},
|
|
} {
|
|
t.Run(tt.name, func(t *testing.T) {
|
|
literal := physical.NewLiteral(tt.value)
|
|
e := expressionEvaluator{}
|
|
|
|
n := len(words)
|
|
rec := batch(n, time.Now())
|
|
colVec, err := e.eval(literal, rec)
|
|
require.NoError(t, err)
|
|
require.Equalf(t, tt.arrowType, colVec.Type().ArrowType().ID(), "expected: %v got: %v", tt.arrowType.String(), colVec.Type().ArrowType().ID().String())
|
|
|
|
for i := range n {
|
|
val := colVec.Value(i)
|
|
if tt.want != nil {
|
|
require.Equal(t, tt.want, val)
|
|
} else {
|
|
require.Equal(t, tt.value, val)
|
|
}
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestEvaluateColumnExpression(t *testing.T) {
|
|
e := expressionEvaluator{}
|
|
|
|
t.Run("unknown column", func(t *testing.T) {
|
|
colExpr := &physical.ColumnExpr{
|
|
Ref: types.ColumnRef{
|
|
Column: "does_not_exist",
|
|
Type: types.ColumnTypeBuiltin,
|
|
},
|
|
}
|
|
|
|
n := len(words)
|
|
rec := batch(n, time.Now())
|
|
colVec, err := e.eval(colExpr, rec)
|
|
require.NoError(t, err)
|
|
|
|
_, ok := colVec.(*Scalar)
|
|
require.True(t, ok, "expected column vector to be a *Scalar, got %T", colVec)
|
|
require.Equal(t, arrow.STRING, colVec.Type().ArrowType().ID())
|
|
})
|
|
|
|
t.Run("string(message)", func(t *testing.T) {
|
|
colExpr := &physical.ColumnExpr{
|
|
Ref: types.ColumnRef{
|
|
Column: "message",
|
|
Type: types.ColumnTypeBuiltin,
|
|
},
|
|
}
|
|
|
|
n := len(words)
|
|
rec := batch(n, time.Now())
|
|
colVec, err := e.eval(colExpr, rec)
|
|
require.NoError(t, err)
|
|
require.Equal(t, arrow.STRING, colVec.Type().ArrowType().ID())
|
|
|
|
for i := range n {
|
|
val := colVec.Value(i)
|
|
require.Equal(t, words[i%len(words)], val)
|
|
}
|
|
})
|
|
}
|
|
|
|
func TestEvaluateBinaryExpression(t *testing.T) {
|
|
rec, err := CSVToArrow(fields, sampledata)
|
|
require.NoError(t, err)
|
|
defer rec.Release()
|
|
|
|
e := expressionEvaluator{}
|
|
|
|
t.Run("error if types do not match", func(t *testing.T) {
|
|
expr := &physical.BinaryExpr{
|
|
Left: &physical.ColumnExpr{
|
|
Ref: types.ColumnRef{Column: "name", Type: types.ColumnTypeBuiltin},
|
|
},
|
|
Right: &physical.ColumnExpr{
|
|
Ref: types.ColumnRef{Column: "timestamp", Type: types.ColumnTypeBuiltin},
|
|
},
|
|
Op: types.BinaryOpEq,
|
|
}
|
|
|
|
_, err := e.eval(expr, rec)
|
|
require.ErrorContains(t, err, "failed to lookup binary function for signature EQ(utf8,int64): types do not match")
|
|
})
|
|
|
|
t.Run("error if function for signature is not registered", func(t *testing.T) {
|
|
expr := &physical.BinaryExpr{
|
|
Left: &physical.ColumnExpr{
|
|
Ref: types.ColumnRef{Column: "name", Type: types.ColumnTypeBuiltin},
|
|
},
|
|
Right: &physical.ColumnExpr{
|
|
Ref: types.ColumnRef{Column: "name", Type: types.ColumnTypeBuiltin},
|
|
},
|
|
Op: types.BinaryOpXor,
|
|
}
|
|
|
|
_, err := e.eval(expr, rec)
|
|
require.ErrorContains(t, err, "failed to lookup binary function for signature XOR(utf8,utf8): not implemented")
|
|
})
|
|
|
|
t.Run("EQ(string,string)", func(t *testing.T) {
|
|
expr := &physical.BinaryExpr{
|
|
Left: &physical.ColumnExpr{
|
|
Ref: types.ColumnRef{Column: "name", Type: types.ColumnTypeBuiltin},
|
|
},
|
|
Right: physical.NewLiteral("Charlie"),
|
|
Op: types.BinaryOpEq,
|
|
}
|
|
|
|
res, err := e.eval(expr, rec)
|
|
require.NoError(t, err)
|
|
result := collectBooleanColumnVector(res)
|
|
require.Equal(t, []bool{false, false, true, false, false, false, false, false, false, false}, result)
|
|
})
|
|
|
|
t.Run("GT(float,float)", func(t *testing.T) {
|
|
expr := &physical.BinaryExpr{
|
|
Left: &physical.ColumnExpr{
|
|
Ref: types.ColumnRef{Column: "value", Type: types.ColumnTypeBuiltin},
|
|
},
|
|
Right: physical.NewLiteral(0.5),
|
|
Op: types.BinaryOpGt,
|
|
}
|
|
|
|
res, err := e.eval(expr, rec)
|
|
require.NoError(t, err)
|
|
result := collectBooleanColumnVector(res)
|
|
require.Equal(t, []bool{false, true, false, true, false, true, true, false, true, false}, result)
|
|
})
|
|
}
|
|
|
|
func collectBooleanColumnVector(vec ColumnVector) []bool {
|
|
res := make([]bool, 0, vec.Len())
|
|
arr := vec.ToArray().(*array.Boolean)
|
|
for i := range int(vec.Len()) {
|
|
res = append(res, arr.Value(i))
|
|
}
|
|
return res
|
|
}
|
|
|
|
var words = []string{"one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten"}
|
|
|
|
func batch(n int, now time.Time) arrow.Record {
|
|
// 1. Create a memory allocator
|
|
mem := memory.NewGoAllocator()
|
|
|
|
// 2. Define the schema
|
|
schema := arrow.NewSchema(
|
|
[]arrow.Field{
|
|
{Name: "message", Type: datatype.Arrow.String, Metadata: datatype.ColumnMetadataBuiltinMessage},
|
|
{Name: "timestamp", Type: datatype.Arrow.Timestamp, Metadata: datatype.ColumnMetadataBuiltinTimestamp},
|
|
},
|
|
nil, // No metadata
|
|
)
|
|
|
|
// 3. Create builders for each column
|
|
logBuilder := array.NewStringBuilder(mem)
|
|
defer logBuilder.Release()
|
|
|
|
tsBuilder := array.NewTimestampBuilder(mem, &arrow.TimestampType{Unit: arrow.Nanosecond, TimeZone: "UTC"})
|
|
defer tsBuilder.Release()
|
|
|
|
// 4. Append data to the builders
|
|
logs := make([]string, n)
|
|
ts := make([]arrow.Timestamp, n)
|
|
|
|
for i := range n {
|
|
logs[i] = words[i%len(words)]
|
|
ts[i] = arrow.Timestamp(now.Add(time.Duration(i) * time.Second).UnixNano())
|
|
}
|
|
|
|
tsBuilder.AppendValues(ts, nil)
|
|
logBuilder.AppendValues(logs, nil)
|
|
|
|
// 5. Build the arrays
|
|
logArray := logBuilder.NewArray()
|
|
defer logArray.Release()
|
|
|
|
tsArray := tsBuilder.NewArray()
|
|
defer tsArray.Release()
|
|
|
|
// 6. Create the record
|
|
columns := []arrow.Array{logArray, tsArray}
|
|
record := array.NewRecord(schema, columns, int64(n))
|
|
|
|
return record
|
|
}
|
|
|
|
func TestEvaluateAmbiguousColumnExpression(t *testing.T) {
|
|
// Test precedence between generated, metadata, and label columns
|
|
fields := []arrow.Field{
|
|
{Name: "test", Type: arrow.BinaryTypes.String, Metadata: datatype.ColumnMetadata(types.ColumnTypeLabel, datatype.Loki.String)},
|
|
{Name: "test", Type: arrow.BinaryTypes.String, Metadata: datatype.ColumnMetadata(types.ColumnTypeMetadata, datatype.Loki.String)},
|
|
{Name: "test", Type: arrow.BinaryTypes.String, Metadata: datatype.ColumnMetadata(types.ColumnTypeGenerated, datatype.Loki.String)},
|
|
}
|
|
|
|
// CSV data where:
|
|
// Row 0: All columns have values - should pick generated (highest precedence)
|
|
// Row 1: Generated is null, others have values - should pick metadata
|
|
// Row 2: Generated and metadata are null - should pick label
|
|
// Row 3: All are null - should return null
|
|
data := `label_0,metadata_0,generated_0
|
|
label_1,metadata_1,null
|
|
label_2,null,null
|
|
null,null,null`
|
|
|
|
record, err := CSVToArrow(fields, data)
|
|
require.NoError(t, err)
|
|
defer record.Release()
|
|
|
|
e := expressionEvaluator{}
|
|
|
|
t.Run("ambiguous column should use per-row precedence order", func(t *testing.T) {
|
|
colExpr := &physical.ColumnExpr{
|
|
Ref: types.ColumnRef{
|
|
Column: "test",
|
|
Type: types.ColumnTypeAmbiguous,
|
|
},
|
|
}
|
|
|
|
colVec, err := e.eval(colExpr, record)
|
|
require.NoError(t, err)
|
|
require.IsType(t, &CoalesceVector{}, colVec)
|
|
require.Equal(t, arrow.STRING, colVec.Type().ArrowType().ID())
|
|
require.Equal(t, types.ColumnTypeAmbiguous, colVec.ColumnType())
|
|
|
|
// Test per-row precedence resolution
|
|
require.Equal(t, "generated_0", colVec.Value(0)) // Generated has highest precedence
|
|
require.Equal(t, "metadata_1", colVec.Value(1)) // Generated is null, metadata has next precedence
|
|
require.Equal(t, "label_2", colVec.Value(2)) // Generated and metadata are null, label has next precedence
|
|
require.Equal(t, nil, colVec.Value(3)) // All are null
|
|
})
|
|
|
|
t.Run("ToArray method should return correct Arrow array", func(t *testing.T) {
|
|
colExpr := &physical.ColumnExpr{
|
|
Ref: types.ColumnRef{
|
|
Column: "test",
|
|
Type: types.ColumnTypeAmbiguous,
|
|
},
|
|
}
|
|
|
|
colVec, err := e.eval(colExpr, record)
|
|
require.NoError(t, err)
|
|
require.IsType(t, &CoalesceVector{}, colVec)
|
|
|
|
arr := colVec.ToArray()
|
|
require.IsType(t, &array.String{}, arr)
|
|
stringArr := arr.(*array.String)
|
|
|
|
require.Equal(t, 4, stringArr.Len())
|
|
require.Equal(t, "generated_0", stringArr.Value(0))
|
|
require.Equal(t, "metadata_1", stringArr.Value(1))
|
|
require.Equal(t, "label_2", stringArr.Value(2))
|
|
require.True(t, stringArr.IsNull(3)) // Row 3 should be null
|
|
})
|
|
|
|
t.Run("look-up matching single column should return Array", func(t *testing.T) {
|
|
// Create a record with only one column type
|
|
fields := []arrow.Field{
|
|
{Name: "single", Type: arrow.BinaryTypes.String, Metadata: datatype.ColumnMetadata(types.ColumnTypeLabel, datatype.Loki.String)},
|
|
}
|
|
data := `label_0
|
|
label_1
|
|
label_2
|
|
`
|
|
|
|
singleRecord, err := CSVToArrow(fields, data)
|
|
require.NoError(t, err)
|
|
defer singleRecord.Release()
|
|
|
|
colExpr := &physical.ColumnExpr{
|
|
Ref: types.ColumnRef{
|
|
Column: "single",
|
|
Type: types.ColumnTypeAmbiguous,
|
|
},
|
|
}
|
|
|
|
colVec, err := e.eval(colExpr, singleRecord)
|
|
require.NoError(t, err)
|
|
require.IsType(t, &Array{}, colVec)
|
|
require.Equal(t, arrow.STRING, colVec.Type().ArrowType().ID())
|
|
require.Equal(t, types.ColumnTypeLabel, colVec.ColumnType())
|
|
|
|
// Test single column behavior
|
|
require.Equal(t, "label_0", colVec.Value(0))
|
|
require.Equal(t, "label_1", colVec.Value(1))
|
|
require.Equal(t, "label_2", colVec.Value(2))
|
|
})
|
|
|
|
t.Run("ambiguous column with no matching columns should return default scalar", func(t *testing.T) {
|
|
colExpr := &physical.ColumnExpr{
|
|
Ref: types.ColumnRef{
|
|
Column: "nonexistent",
|
|
Type: types.ColumnTypeAmbiguous,
|
|
},
|
|
}
|
|
|
|
colVec, err := e.eval(colExpr, record)
|
|
require.NoError(t, err)
|
|
require.IsType(t, &Scalar{}, colVec)
|
|
})
|
|
}
|
|
|