Like Prometheus, but for logs.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 
loki/pkg/engine/executor/util_test.go

133 lines
3.6 KiB

package executor
import (
"context"
"errors"
"testing"
"time"
"github.com/apache/arrow-go/v18/arrow"
"github.com/apache/arrow-go/v18/arrow/array"
"github.com/apache/arrow-go/v18/arrow/memory"
"github.com/grafana/loki/v3/pkg/engine/internal/datatype"
"github.com/grafana/loki/v3/pkg/engine/internal/types"
)
var (
incrementingIntPipeline = newRecordGenerator(
arrow.NewSchema([]arrow.Field{
{Name: "id", Type: datatype.Arrow.Integer, Metadata: datatype.ColumnMetadata(types.ColumnTypeBuiltin, datatype.Loki.Integer)},
}, nil),
func(offset, maxRows, batchSize int64, schema *arrow.Schema) arrow.Record {
builder := array.NewInt64Builder(memory.DefaultAllocator)
defer builder.Release()
rows := int64(0)
for ; rows < batchSize && offset+rows < maxRows; rows++ {
builder.Append(offset + rows)
}
data := builder.NewArray()
defer data.Release()
columns := []arrow.Array{data}
return array.NewRecord(schema, columns, rows)
},
)
)
func ascendingTimestampPipeline(start time.Time) *recordGenerator {
return timestampPipeline(start, ascending)
}
func descendingTimestampPipeline(start time.Time) *recordGenerator {
return timestampPipeline(start, descending)
}
const (
ascending = time.Duration(1)
descending = time.Duration(-1)
)
func timestampPipeline(start time.Time, order time.Duration) *recordGenerator {
return newRecordGenerator(
arrow.NewSchema([]arrow.Field{
{Name: "id", Type: datatype.Arrow.Integer, Metadata: datatype.ColumnMetadata(types.ColumnTypeBuiltin, datatype.Loki.Integer)},
{Name: "timestamp", Type: datatype.Arrow.Timestamp, Metadata: datatype.ColumnMetadata(types.ColumnTypeBuiltin, datatype.Loki.Timestamp)},
}, nil),
func(offset, maxRows, batchSize int64, schema *arrow.Schema) arrow.Record {
idColBuilder := array.NewInt64Builder(memory.DefaultAllocator)
defer idColBuilder.Release()
tsColBuilder := array.NewTimestampBuilder(memory.DefaultAllocator, arrow.FixedWidthTypes.Timestamp_ns.(*arrow.TimestampType))
defer tsColBuilder.Release()
rows := int64(0)
for ; rows < batchSize && offset+rows < maxRows; rows++ {
idColBuilder.Append(offset + rows)
tsColBuilder.Append(arrow.Timestamp(start.Add(order * (time.Duration(offset)*time.Second + time.Duration(rows)*time.Millisecond)).UnixNano()))
}
idData := idColBuilder.NewArray()
defer idData.Release()
tsData := tsColBuilder.NewArray()
defer tsData.Release()
columns := []arrow.Array{idData, tsData}
return array.NewRecord(schema, columns, rows)
},
)
}
type batchFunc func(offset, maxRows, batchSize int64, schema *arrow.Schema) arrow.Record
type recordGenerator struct {
schema *arrow.Schema
batch batchFunc
}
func newRecordGenerator(schema *arrow.Schema, batch batchFunc) *recordGenerator {
return &recordGenerator{
schema: schema,
batch: batch,
}
}
func (p *recordGenerator) Pipeline(batchSize int64, rows int64) Pipeline {
var pos int64
return newGenericPipeline(
Local,
func(_ context.Context, _ []Pipeline) state {
if pos >= rows {
return Exhausted
}
batch := p.batch(pos, rows, batchSize, p.schema)
pos += batch.NumRows()
return successState(batch)
},
nil,
)
}
// collect reads all data from the pipeline until it is exhausted or returns an error.
func collect(t *testing.T, pipeline Pipeline) (batches int64, rows int64) {
ctx := t.Context()
for {
err := pipeline.Read(ctx)
if errors.Is(err, EOF) {
break
}
if err != nil {
t.Fatalf("did not expect error, got %s", err.Error())
}
batch, _ := pipeline.Value()
t.Log("batch", batch, "err", err)
batches++
rows += batch.NumRows()
}
return batches, rows
}