mirror of https://github.com/grafana/loki
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
192 lines
5.3 KiB
192 lines
5.3 KiB
|
9 months ago
|
package executor
|
||
|
|
|
||
|
|
import (
|
||
|
6 months ago
|
"context"
|
||
|
8 months ago
|
"errors"
|
||
|
5 months ago
|
"math"
|
||
|
9 months ago
|
"testing"
|
||
|
9 months ago
|
"time"
|
||
|
9 months ago
|
|
||
|
|
"github.com/apache/arrow-go/v18/arrow"
|
||
|
|
"github.com/apache/arrow-go/v18/arrow/array"
|
||
|
|
"github.com/apache/arrow-go/v18/arrow/memory"
|
||
|
8 months ago
|
|
||
|
|
"github.com/grafana/loki/v3/pkg/engine/internal/datatype"
|
||
|
|
"github.com/grafana/loki/v3/pkg/engine/internal/types"
|
||
|
5 months ago
|
"github.com/grafana/loki/v3/pkg/util/arrowtest"
|
||
|
9 months ago
|
)
|
||
|
|
|
||
|
|
var (
|
||
|
|
incrementingIntPipeline = newRecordGenerator(
|
||
|
|
arrow.NewSchema([]arrow.Field{
|
||
|
6 months ago
|
{Name: "id", Type: datatype.Arrow.Integer, Metadata: datatype.ColumnMetadata(types.ColumnTypeBuiltin, datatype.Loki.Integer)},
|
||
|
9 months ago
|
}, nil),
|
||
|
9 months ago
|
|
||
|
6 months ago
|
func(offset, maxRows, batchSize int64, schema *arrow.Schema) arrow.Record {
|
||
|
9 months ago
|
builder := array.NewInt64Builder(memory.DefaultAllocator)
|
||
|
|
defer builder.Release()
|
||
|
|
|
||
|
6 months ago
|
rows := int64(0)
|
||
|
|
for ; rows < batchSize && offset+rows < maxRows; rows++ {
|
||
|
|
builder.Append(offset + rows)
|
||
|
9 months ago
|
}
|
||
|
|
|
||
|
|
data := builder.NewArray()
|
||
|
|
defer data.Release()
|
||
|
|
|
||
|
|
columns := []arrow.Array{data}
|
||
|
6 months ago
|
return array.NewRecord(schema, columns, rows)
|
||
|
9 months ago
|
},
|
||
|
|
)
|
||
|
|
)
|
||
|
|
|
||
|
9 months ago
|
func ascendingTimestampPipeline(start time.Time) *recordGenerator {
|
||
|
|
return timestampPipeline(start, ascending)
|
||
|
|
}
|
||
|
|
|
||
|
|
func descendingTimestampPipeline(start time.Time) *recordGenerator {
|
||
|
|
return timestampPipeline(start, descending)
|
||
|
|
}
|
||
|
|
|
||
|
|
const (
|
||
|
|
ascending = time.Duration(1)
|
||
|
|
descending = time.Duration(-1)
|
||
|
|
)
|
||
|
|
|
||
|
|
func timestampPipeline(start time.Time, order time.Duration) *recordGenerator {
|
||
|
|
return newRecordGenerator(
|
||
|
|
arrow.NewSchema([]arrow.Field{
|
||
|
6 months ago
|
{Name: "id", Type: datatype.Arrow.Integer, Metadata: datatype.ColumnMetadata(types.ColumnTypeBuiltin, datatype.Loki.Integer)},
|
||
|
|
{Name: "timestamp", Type: datatype.Arrow.Timestamp, Metadata: datatype.ColumnMetadata(types.ColumnTypeBuiltin, datatype.Loki.Timestamp)},
|
||
|
9 months ago
|
}, nil),
|
||
|
|
|
||
|
6 months ago
|
func(offset, maxRows, batchSize int64, schema *arrow.Schema) arrow.Record {
|
||
|
9 months ago
|
idColBuilder := array.NewInt64Builder(memory.DefaultAllocator)
|
||
|
|
defer idColBuilder.Release()
|
||
|
|
|
||
|
8 months ago
|
tsColBuilder := array.NewTimestampBuilder(memory.DefaultAllocator, arrow.FixedWidthTypes.Timestamp_ns.(*arrow.TimestampType))
|
||
|
9 months ago
|
defer tsColBuilder.Release()
|
||
|
|
|
||
|
6 months ago
|
rows := int64(0)
|
||
|
|
for ; rows < batchSize && offset+rows < maxRows; rows++ {
|
||
|
|
idColBuilder.Append(offset + rows)
|
||
|
|
tsColBuilder.Append(arrow.Timestamp(start.Add(order * (time.Duration(offset)*time.Second + time.Duration(rows)*time.Millisecond)).UnixNano()))
|
||
|
9 months ago
|
}
|
||
|
|
|
||
|
|
idData := idColBuilder.NewArray()
|
||
|
|
defer idData.Release()
|
||
|
|
|
||
|
|
tsData := tsColBuilder.NewArray()
|
||
|
|
defer tsData.Release()
|
||
|
|
|
||
|
|
columns := []arrow.Array{idData, tsData}
|
||
|
6 months ago
|
return array.NewRecord(schema, columns, rows)
|
||
|
9 months ago
|
},
|
||
|
|
)
|
||
|
|
}
|
||
|
|
|
||
|
6 months ago
|
type batchFunc func(offset, maxRows, batchSize int64, schema *arrow.Schema) arrow.Record
|
||
|
|
|
||
|
9 months ago
|
type recordGenerator struct {
|
||
|
|
schema *arrow.Schema
|
||
|
6 months ago
|
batch batchFunc
|
||
|
9 months ago
|
}
|
||
|
|
|
||
|
6 months ago
|
func newRecordGenerator(schema *arrow.Schema, batch batchFunc) *recordGenerator {
|
||
|
9 months ago
|
return &recordGenerator{
|
||
|
|
schema: schema,
|
||
|
|
batch: batch,
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
func (p *recordGenerator) Pipeline(batchSize int64, rows int64) Pipeline {
|
||
|
|
var pos int64
|
||
|
|
return newGenericPipeline(
|
||
|
|
Local,
|
||
|
6 months ago
|
func(_ context.Context, _ []Pipeline) state {
|
||
|
9 months ago
|
if pos >= rows {
|
||
|
|
return Exhausted
|
||
|
|
}
|
||
|
6 months ago
|
batch := p.batch(pos, rows, batchSize, p.schema)
|
||
|
9 months ago
|
pos += batch.NumRows()
|
||
|
|
return successState(batch)
|
||
|
|
},
|
||
|
|
nil,
|
||
|
|
)
|
||
|
|
}
|
||
|
|
|
||
|
|
// collect reads all data from the pipeline until it is exhausted or returns an error.
|
||
|
|
func collect(t *testing.T, pipeline Pipeline) (batches int64, rows int64) {
|
||
|
6 months ago
|
ctx := t.Context()
|
||
|
9 months ago
|
for {
|
||
|
4 months ago
|
batch, err := pipeline.Read(ctx)
|
||
|
8 months ago
|
if errors.Is(err, EOF) {
|
||
|
9 months ago
|
break
|
||
|
|
}
|
||
|
|
if err != nil {
|
||
|
|
t.Fatalf("did not expect error, got %s", err.Error())
|
||
|
|
}
|
||
|
|
t.Log("batch", batch, "err", err)
|
||
|
|
batches++
|
||
|
|
rows += batch.NumRows()
|
||
|
|
}
|
||
|
|
return batches, rows
|
||
|
|
}
|
||
|
5 months ago
|
|
||
|
|
// ArrowtestPipeline creates a [Pipeline] that emits test data from a sequence
|
||
|
|
// of [arrowtest.Rows].
|
||
|
|
type ArrowtestPipeline struct {
|
||
|
|
alloc memory.Allocator
|
||
|
|
schema *arrow.Schema
|
||
|
|
rows []arrowtest.Rows
|
||
|
|
|
||
|
4 months ago
|
cur int
|
||
|
5 months ago
|
}
|
||
|
|
|
||
|
|
var _ Pipeline = (*ArrowtestPipeline)(nil)
|
||
|
|
|
||
|
|
// NewArrowtestPipeline creates a new ArrowtestPipeline which will emit each
|
||
|
|
// [arrowtest.Rows] as a record.
|
||
|
|
//
|
||
|
|
// If schema is defined, all rows will be emitted using that schema. If schema
|
||
|
|
// is nil, the schema is derived from each element in rows as it is emitted.
|
||
|
|
func NewArrowtestPipeline(alloc memory.Allocator, schema *arrow.Schema, rows ...arrowtest.Rows) *ArrowtestPipeline {
|
||
|
|
if alloc == nil {
|
||
|
|
alloc = memory.DefaultAllocator
|
||
|
|
}
|
||
|
|
|
||
|
|
return &ArrowtestPipeline{
|
||
|
|
alloc: alloc,
|
||
|
|
schema: schema,
|
||
|
|
rows: rows,
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
// Read implements [Pipeline], converting the next [arrowtest.Rows] into a
|
||
|
|
// [arrow.Record] and storing it in the pipeline's state. The state can then be
|
||
|
|
// accessed via [ArrowtestPipeline.Value].
|
||
|
4 months ago
|
func (p *ArrowtestPipeline) Read(_ context.Context) (arrow.Record, error) {
|
||
|
5 months ago
|
if p.cur >= len(p.rows) {
|
||
|
4 months ago
|
return nil, EOF
|
||
|
5 months ago
|
}
|
||
|
|
|
||
|
|
rows := p.rows[p.cur]
|
||
|
|
schema := p.schema
|
||
|
|
|
||
|
|
if schema == nil {
|
||
|
|
schema = rows.Schema()
|
||
|
|
}
|
||
|
|
|
||
|
|
p.cur++
|
||
|
4 months ago
|
return rows.Record(p.alloc, schema), nil
|
||
|
5 months ago
|
}
|
||
|
|
|
||
|
|
// Close implements [Pipeline], immediately exhausting the pipeline.
|
||
|
|
func (p *ArrowtestPipeline) Close() { p.cur = math.MaxInt64 }
|
||
|
|
|
||
|
|
// Inputs implements [Pipeline], returning nil as this pipeline has no inputs.
|
||
|
|
func (p *ArrowtestPipeline) Inputs() []Pipeline { return nil }
|
||
|
|
|
||
|
|
// Transport implements [Pipeline], returning [Local].
|
||
|
|
func (p *ArrowtestPipeline) Transport() Transport { return Local }
|