mirror of https://github.com/grafana/loki
chore(engine): Implement execution pipeline for the limit operator (#17264)
Signed-off-by: Christian Haudum <christian.haudum@gmail.com>pull/17311/head^2
parent
cff0df63b3
commit
e828c48702
@ -0,0 +1 @@ |
||||
package executor |
||||
@ -0,0 +1,65 @@ |
||||
package executor |
||||
|
||||
import ( |
||||
"context" |
||||
"testing" |
||||
|
||||
"github.com/stretchr/testify/require" |
||||
|
||||
"github.com/grafana/loki/v3/pkg/engine/planner/physical" |
||||
) |
||||
|
||||
func TestLimit(t *testing.T) { |
||||
for _, tt := range []struct { |
||||
name string |
||||
offset uint32 |
||||
limit uint32 |
||||
batchSize int64 |
||||
expectedBatches int64 |
||||
expectedRows int64 |
||||
}{ |
||||
{ |
||||
name: "without offset", |
||||
offset: 0, |
||||
limit: 5, |
||||
batchSize: 3, |
||||
expectedBatches: 2, |
||||
expectedRows: 5, |
||||
}, |
||||
{ |
||||
name: "with offset", |
||||
offset: 3, |
||||
limit: 5, |
||||
batchSize: 4, |
||||
expectedBatches: 2, |
||||
expectedRows: 5, |
||||
}, |
||||
{ |
||||
name: "with offset greater than batch size", |
||||
offset: 5, |
||||
limit: 6, |
||||
batchSize: 2, |
||||
expectedBatches: 4, |
||||
expectedRows: 6, |
||||
}, |
||||
} { |
||||
t.Run(tt.name, func(t *testing.T) { |
||||
c := &Context{ |
||||
batchSize: tt.batchSize, |
||||
} |
||||
limit := &physical.Limit{ |
||||
Skip: tt.offset, |
||||
Fetch: tt.limit, |
||||
} |
||||
inputs := []Pipeline{ |
||||
incrementingIntPipeline.Pipeline(tt.batchSize, 1000), |
||||
} |
||||
|
||||
pipeline := c.executeLimit(context.Background(), limit, inputs) |
||||
batches, rows := collect(t, pipeline) |
||||
|
||||
require.Equal(t, tt.expectedBatches, batches) |
||||
require.Equal(t, tt.expectedRows, rows) |
||||
}) |
||||
} |
||||
} |
||||
@ -0,0 +1,77 @@ |
||||
package executor |
||||
|
||||
import ( |
||||
"testing" |
||||
|
||||
"github.com/apache/arrow-go/v18/arrow" |
||||
"github.com/apache/arrow-go/v18/arrow/array" |
||||
"github.com/apache/arrow-go/v18/arrow/memory" |
||||
) |
||||
|
||||
var ( |
||||
incrementingIntPipeline = newRecordGenerator( |
||||
arrow.NewSchema([]arrow.Field{ |
||||
{Name: "id", Type: arrow.PrimitiveTypes.Int64}, |
||||
}, nil), |
||||
func(offset, sz int64, schema *arrow.Schema) arrow.Record { |
||||
builder := array.NewInt64Builder(memory.DefaultAllocator) |
||||
defer builder.Release() |
||||
|
||||
for i := int64(0); i < sz; i++ { |
||||
builder.Append(offset + i) |
||||
} |
||||
|
||||
data := builder.NewArray() |
||||
defer data.Release() |
||||
|
||||
columns := []arrow.Array{data} |
||||
return array.NewRecord(schema, columns, sz) |
||||
}, |
||||
) |
||||
) |
||||
|
||||
type recordGenerator struct { |
||||
schema *arrow.Schema |
||||
batch func(offset, sz int64, schema *arrow.Schema) arrow.Record |
||||
} |
||||
|
||||
func newRecordGenerator(schema *arrow.Schema, batch func(offset, sz int64, schema *arrow.Schema) arrow.Record) *recordGenerator { |
||||
return &recordGenerator{ |
||||
schema: schema, |
||||
batch: batch, |
||||
} |
||||
} |
||||
|
||||
func (p *recordGenerator) Pipeline(batchSize int64, rows int64) Pipeline { |
||||
var pos int64 |
||||
return newGenericPipeline( |
||||
Local, |
||||
func(_ []Pipeline) state { |
||||
if pos >= rows { |
||||
return Exhausted |
||||
} |
||||
batch := p.batch(pos, batchSize, p.schema) |
||||
pos += batch.NumRows() |
||||
return successState(batch) |
||||
}, |
||||
nil, |
||||
) |
||||
} |
||||
|
||||
// collect reads all data from the pipeline until it is exhausted or returns an error.
|
||||
func collect(t *testing.T, pipeline Pipeline) (batches int64, rows int64) { |
||||
for { |
||||
err := pipeline.Read() |
||||
if err == EOF { |
||||
break |
||||
} |
||||
if err != nil { |
||||
t.Fatalf("did not expect error, got %s", err.Error()) |
||||
} |
||||
batch, _ := pipeline.Value() |
||||
t.Log("batch", batch, "err", err) |
||||
batches++ |
||||
rows += batch.NumRows() |
||||
} |
||||
return batches, rows |
||||
} |
||||
Loading…
Reference in new issue