Like Prometheus, but for logs.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
loki/pkg/engine/executor/executor.go

121 lines
3.0 KiB

package executor
import (
"context"
"errors"
"fmt"
"github.com/grafana/loki/v3/pkg/engine/planner/physical"
)
type Config struct {
BatchSize int64 `yaml:"batch_size"`
}
func Run(ctx context.Context, cfg Config, plan *physical.Plan) Pipeline {
c := &Context{
plan: plan,
batchSize: cfg.BatchSize,
}
if plan == nil {
return errorPipeline(errors.New("plan is nil"))
}
node, err := plan.Root()
if err != nil {
return errorPipeline(err)
}
return c.execute(ctx, node)
}
// Context is the execution context
type Context struct {
batchSize int64
plan *physical.Plan
evaluator expressionEvaluator
}
func (c *Context) execute(ctx context.Context, node physical.Node) Pipeline {
children := c.plan.Children(node)
inputs := make([]Pipeline, 0, len(children))
for _, child := range children {
inputs = append(inputs, c.execute(ctx, child))
}
switch n := node.(type) {
case *physical.DataObjScan:
return c.executeDataObjScan(ctx, n)
case *physical.SortMerge:
return c.executeSortMerge(ctx, n, inputs)
case *physical.Limit:
return c.executeLimit(ctx, n, inputs)
case *physical.Filter:
return c.executeFilter(ctx, n, inputs)
case *physical.Projection:
return c.executeProjection(ctx, n, inputs)
default:
return errorPipeline(fmt.Errorf("invalid node type: %T", node))
}
}
func (c *Context) executeDataObjScan(_ context.Context, _ *physical.DataObjScan) Pipeline {
return errorPipeline(errNotImplemented)
}
chore(engine): Implement execution pipeline for SortMerge operator (#17406) This PR contains an implementation of the k-way merge operation without using a heap, like @rfratto described [here](https://github.com/grafana/loki/pull/17280). The SortMerge is implemented only using slices: * Maintain the following invariant: * For each input pipeline, we store the next record to process. (this already exists as `HeapSortMerge.batches`) * Additionally for each record, track the starting slice offset (which resets to zero whenever a new record is loaded in). * Iteration stops when all input pipelines have been exhausted (no change from how this is now). * To get the next record: * Iterate through each record, looking at the value from their starting slice offset. * Track the top _two_ winners (e.g., the record whose next value is the smallest and the record whose next value is the next smallest). * Find the largest offset in the starting record whose value is still less than the value of the runner-up record from the previous step. * Return the slice of that record using the two offsets, and update the stored offset of the returned record for the next call to `Read`. This approach, like the one with heap, still requires to concatenate (coalesce) the single row records - which is not implemented in this PR yet. On that note, single row records are the worst case scenario with this implementation, not necessarily the regular case. **Update:** After an offline discussion, @owen-d and I agreed on ignoring the worst-case scenario of single-row records for now. Signed-off-by: Christian Haudum <christian.haudum@gmail.com>
9 months ago
func (c *Context) executeSortMerge(_ context.Context, sortmerge *physical.SortMerge, inputs []Pipeline) Pipeline {
if len(inputs) == 0 {
return emptyPipeline()
}
chore(engine): Implement execution pipeline for SortMerge operator (#17406) This PR contains an implementation of the k-way merge operation without using a heap, like @rfratto described [here](https://github.com/grafana/loki/pull/17280). The SortMerge is implemented only using slices: * Maintain the following invariant: * For each input pipeline, we store the next record to process. (this already exists as `HeapSortMerge.batches`) * Additionally for each record, track the starting slice offset (which resets to zero whenever a new record is loaded in). * Iteration stops when all input pipelines have been exhausted (no change from how this is now). * To get the next record: * Iterate through each record, looking at the value from their starting slice offset. * Track the top _two_ winners (e.g., the record whose next value is the smallest and the record whose next value is the next smallest). * Find the largest offset in the starting record whose value is still less than the value of the runner-up record from the previous step. * Return the slice of that record using the two offsets, and update the stored offset of the returned record for the next call to `Read`. This approach, like the one with heap, still requires to concatenate (coalesce) the single row records - which is not implemented in this PR yet. On that note, single row records are the worst case scenario with this implementation, not necessarily the regular case. **Update:** After an offline discussion, @owen-d and I agreed on ignoring the worst-case scenario of single-row records for now. Signed-off-by: Christian Haudum <christian.haudum@gmail.com>
9 months ago
pipeline, err := NewSortMergePipeline(inputs, sortmerge.Order, sortmerge.Column, c.evaluator)
if err != nil {
return errorPipeline(err)
}
return pipeline
}
func (c *Context) executeLimit(_ context.Context, limit *physical.Limit, inputs []Pipeline) Pipeline {
if len(inputs) == 0 {
return emptyPipeline()
}
if len(inputs) > 1 {
return errorPipeline(fmt.Errorf("limit expects exactly one input, got %d", len(inputs)))
}
return NewLimitPipeline(inputs[0], limit.Skip, limit.Fetch)
}
func (c *Context) executeFilter(_ context.Context, filter *physical.Filter, inputs []Pipeline) Pipeline {
if len(inputs) == 0 {
return emptyPipeline()
}
// TODO: support multiple inputs
if len(inputs) > 1 {
return errorPipeline(fmt.Errorf("filter expects exactly one input, got %d", len(inputs)))
}
return NewFilterPipeline(filter, inputs[0], c.evaluator)
}
func (c *Context) executeProjection(_ context.Context, proj *physical.Projection, inputs []Pipeline) Pipeline {
if len(inputs) == 0 {
return emptyPipeline()
}
if len(inputs) > 1 {
// unsupported for now
return errorPipeline(fmt.Errorf("projection expects exactly one input, got %d", len(inputs)))
}
if len(proj.Columns) == 0 {
return errorPipeline(fmt.Errorf("projection expects at least one column, got 0"))
}
p, err := NewProjectPipeline(inputs[0], proj.Columns, &c.evaluator)
if err != nil {
return errorPipeline(err)
}
return p
}