loki/pkg/engine/executor/executor.go

package executor

import (
	"context"
	"errors"
	"fmt"

	"github.com/grafana/loki/v3/pkg/engine/planner/physical"
)

type Config struct {
	BatchSize int64 `yaml:"batch_size"`
}

func Run(ctx context.Context, cfg Config, plan *physical.Plan) Pipeline {
	c := &Context{
		plan:      plan,
		batchSize: cfg.BatchSize,
	}
	if plan == nil {
		return errorPipeline(errors.New("plan is nil"))
	}
	node, err := plan.Root()
	if err != nil {
		return errorPipeline(err)
	}
	return c.execute(ctx, node)
}

// Context is the execution context
type Context struct {
	batchSize int64
	plan      *physical.Plan
	evaluator expressionEvaluator
}

func (c *Context) execute(ctx context.Context, node physical.Node) Pipeline {
	children := c.plan.Children(node)
	inputs := make([]Pipeline, 0, len(children))
	for _, child := range children {
		inputs = append(inputs, c.execute(ctx, child))
	}

	switch n := node.(type) {
	case *physical.DataObjScan:
		return c.executeDataObjScan(ctx, n)
	case *physical.SortMerge:
		return c.executeSortMerge(ctx, n, inputs)
	case *physical.Limit:
		return c.executeLimit(ctx, n, inputs)
	case *physical.Filter:
		return c.executeFilter(ctx, n, inputs)
	case *physical.Projection:
		return c.executeProjection(ctx, n, inputs)
	default:
		return errorPipeline(fmt.Errorf("invalid node type: %T", node))
	}
}

func (c *Context) executeDataObjScan(_ context.Context, _ *physical.DataObjScan) Pipeline {
	return errorPipeline(errNotImplemented)
}

func (c *Context) executeSortMerge(_ context.Context, sortmerge *physical.SortMerge, inputs []Pipeline) Pipeline {
	if len(inputs) == 0 {
		return emptyPipeline()
	}

	pipeline, err := NewSortMergePipeline(inputs, sortmerge.Order, sortmerge.Column, c.evaluator)
	if err != nil {
		return errorPipeline(err)
	}
	return pipeline
}

func (c *Context) executeLimit(_ context.Context, limit *physical.Limit, inputs []Pipeline) Pipeline {
	if len(inputs) == 0 {
		return emptyPipeline()
	}

	if len(inputs) > 1 {
		return errorPipeline(fmt.Errorf("limit expects exactly one input, got %d", len(inputs)))
	}

	return NewLimitPipeline(inputs[0], limit.Skip, limit.Fetch)
}

func (c *Context) executeFilter(_ context.Context, filter *physical.Filter, inputs []Pipeline) Pipeline {
	if len(inputs) == 0 {
		return emptyPipeline()
	}

	// TODO: support multiple inputs
	if len(inputs) > 1 {
		return errorPipeline(fmt.Errorf("filter expects exactly one input, got %d", len(inputs)))
	}

	return NewFilterPipeline(filter, inputs[0], c.evaluator)
}

func (c *Context) executeProjection(_ context.Context, proj *physical.Projection, inputs []Pipeline) Pipeline {
	if len(inputs) == 0 {
		return emptyPipeline()
	}

	if len(inputs) > 1 {
		// unsupported for now
		return errorPipeline(fmt.Errorf("projection expects exactly one input, got %d", len(inputs)))
	}

	if len(proj.Columns) == 0 {
		return errorPipeline(fmt.Errorf("projection expects at least one column, got 0"))
	}

	p, err := NewProjectPipeline(inputs[0], proj.Columns, &c.evaluator)
	if err != nil {
		return errorPipeline(err)
	}
	return p
}
chore(engine): Add framework for query executor (#17260) Signed-off-by: Christian Haudum <christian.haudum@gmail.com> 9 months ago			`package executor`

			`import (`
			`"context"`
			`"errors"`
			`"fmt"`

			`"github.com/grafana/loki/v3/pkg/engine/planner/physical"`
			`)`

			`type Config struct {`
			BatchSize int64 `yaml:"batch_size"`
			`}`

			`func Run(ctx context.Context, cfg Config, plan *physical.Plan) Pipeline {`
			`c := &Context{`
			`plan: plan,`
			`batchSize: cfg.BatchSize,`
			`}`
			`if plan == nil {`
			`return errorPipeline(errors.New("plan is nil"))`
			`}`
			`node, err := plan.Root()`
			`if err != nil {`
			`return errorPipeline(err)`
			`}`
			`return c.execute(ctx, node)`
			`}`

			`// Context is the execution context`
			`type Context struct {`
			`batchSize int64`
			`plan *physical.Plan`
feat(dataobj executor): project node (#17312) 9 months ago			`evaluator expressionEvaluator`
chore(engine): Add framework for query executor (#17260) Signed-off-by: Christian Haudum <christian.haudum@gmail.com> 9 months ago			`}`

			`func (c *Context) execute(ctx context.Context, node physical.Node) Pipeline {`
			`children := c.plan.Children(node)`
			`inputs := make([]Pipeline, 0, len(children))`
			`for _, child := range children {`
			`inputs = append(inputs, c.execute(ctx, child))`
			`}`

			`switch n := node.(type) {`
			`case *physical.DataObjScan:`
			`return c.executeDataObjScan(ctx, n)`
			`case *physical.SortMerge:`
			`return c.executeSortMerge(ctx, n, inputs)`
			`case *physical.Limit:`
			`return c.executeLimit(ctx, n, inputs)`
			`case *physical.Filter:`
			`return c.executeFilter(ctx, n, inputs)`
			`case *physical.Projection:`
			`return c.executeProjection(ctx, n, inputs)`
			`default:`
			`return errorPipeline(fmt.Errorf("invalid node type: %T", node))`
			`}`
			`}`

			`func (c Context) executeDataObjScan(_ context.Context, _ physical.DataObjScan) Pipeline {`
			`return errorPipeline(errNotImplemented)`
			`}`

chore(engine): Implement execution pipeline for SortMerge operator (#17406) This PR contains an implementation of the k-way merge operation without using a heap, like @rfratto described [here](https://github.com/grafana/loki/pull/17280). The SortMerge is implemented only using slices: * Maintain the following invariant: * For each input pipeline, we store the next record to process. (this already exists as `HeapSortMerge.batches`) * Additionally for each record, track the starting slice offset (which resets to zero whenever a new record is loaded in). * Iteration stops when all input pipelines have been exhausted (no change from how this is now). * To get the next record: * Iterate through each record, looking at the value from their starting slice offset. * Track the top _two_ winners (e.g., the record whose next value is the smallest and the record whose next value is the next smallest). * Find the largest offset in the starting record whose value is still less than the value of the runner-up record from the previous step. * Return the slice of that record using the two offsets, and update the stored offset of the returned record for the next call to `Read`. This approach, like the one with heap, still requires to concatenate (coalesce) the single row records - which is not implemented in this PR yet. On that note, single row records are the worst case scenario with this implementation, not necessarily the regular case. Update: After an offline discussion, @owen-d and I agreed on ignoring the worst-case scenario of single-row records for now. Signed-off-by: Christian Haudum <christian.haudum@gmail.com> 9 months ago			`func (c Context) executeSortMerge(_ context.Context, sortmerge physical.SortMerge, inputs []Pipeline) Pipeline {`
chore(engine): Add framework for query executor (#17260) Signed-off-by: Christian Haudum <christian.haudum@gmail.com> 9 months ago			`if len(inputs) == 0 {`
			`return emptyPipeline()`
			`}`

chore(engine): Implement execution pipeline for SortMerge operator (#17406) This PR contains an implementation of the k-way merge operation without using a heap, like @rfratto described [here](https://github.com/grafana/loki/pull/17280). The SortMerge is implemented only using slices: * Maintain the following invariant: * For each input pipeline, we store the next record to process. (this already exists as `HeapSortMerge.batches`) * Additionally for each record, track the starting slice offset (which resets to zero whenever a new record is loaded in). * Iteration stops when all input pipelines have been exhausted (no change from how this is now). * To get the next record: * Iterate through each record, looking at the value from their starting slice offset. * Track the top _two_ winners (e.g., the record whose next value is the smallest and the record whose next value is the next smallest). * Find the largest offset in the starting record whose value is still less than the value of the runner-up record from the previous step. * Return the slice of that record using the two offsets, and update the stored offset of the returned record for the next call to `Read`. This approach, like the one with heap, still requires to concatenate (coalesce) the single row records - which is not implemented in this PR yet. On that note, single row records are the worst case scenario with this implementation, not necessarily the regular case. Update: After an offline discussion, @owen-d and I agreed on ignoring the worst-case scenario of single-row records for now. Signed-off-by: Christian Haudum <christian.haudum@gmail.com> 9 months ago			`pipeline, err := NewSortMergePipeline(inputs, sortmerge.Order, sortmerge.Column, c.evaluator)`
			`if err != nil {`
			`return errorPipeline(err)`
			`}`
			`return pipeline`
chore(engine): Add framework for query executor (#17260) Signed-off-by: Christian Haudum <christian.haudum@gmail.com> 9 months ago			`}`

chore(engine): Implement execution pipeline for the limit operator (#17264) Signed-off-by: Christian Haudum <christian.haudum@gmail.com> 9 months ago			`func (c Context) executeLimit(_ context.Context, limit physical.Limit, inputs []Pipeline) Pipeline {`
chore(engine): Add framework for query executor (#17260) Signed-off-by: Christian Haudum <christian.haudum@gmail.com> 9 months ago			`if len(inputs) == 0 {`
			`return emptyPipeline()`
			`}`

			`if len(inputs) > 1 {`
			`return errorPipeline(fmt.Errorf("limit expects exactly one input, got %d", len(inputs)))`
			`}`

chore(dataobj,testing,executor): pipeline equality testing (#17311) 9 months ago			`return NewLimitPipeline(inputs[0], limit.Skip, limit.Fetch)`
chore(engine): Add framework for query executor (#17260) Signed-off-by: Christian Haudum <christian.haudum@gmail.com> 9 months ago			`}`

feat(dataobj, executor): filter node execution (#17327) # Add Filter Pipeline Implementation This PR implements a filter pipeline for Loki's engine executor component. The filter operator enables row-level filtering of Arrow record batches based on boolean expressions. This is a fundamental component for query execution. ## Changes - Implement `NewFilterPipeline` for evaluating filter predicates on record batches - Add `filterBatch` utility to efficiently filter Arrow records - Update `executeFilter` in the executor to use the new implementation - Add comprehensive test suite for the filter pipeline: - Basic literal predicates (true/false) - Column reference predicates - Empty batch handling - Multiple batch processing ## Implementation Notes The filter implementation follows the same pattern as the project pipeline, evaluating predicates against each record and creating a new filtered record based on the results. The implementation supports compound predicates with AND logic. This implementation provides a foundation for more advanced filtering operations as the expression evaluator is enhanced to support more complex expressions. 9 months ago			`func (c Context) executeFilter(_ context.Context, filter physical.Filter, inputs []Pipeline) Pipeline {`
chore(engine): Add framework for query executor (#17260) Signed-off-by: Christian Haudum <christian.haudum@gmail.com> 9 months ago			`if len(inputs) == 0 {`
			`return emptyPipeline()`
			`}`

feat(dataobj, executor): filter node execution (#17327) # Add Filter Pipeline Implementation This PR implements a filter pipeline for Loki's engine executor component. The filter operator enables row-level filtering of Arrow record batches based on boolean expressions. This is a fundamental component for query execution. ## Changes - Implement `NewFilterPipeline` for evaluating filter predicates on record batches - Add `filterBatch` utility to efficiently filter Arrow records - Update `executeFilter` in the executor to use the new implementation - Add comprehensive test suite for the filter pipeline: - Basic literal predicates (true/false) - Column reference predicates - Empty batch handling - Multiple batch processing ## Implementation Notes The filter implementation follows the same pattern as the project pipeline, evaluating predicates against each record and creating a new filtered record based on the results. The implementation supports compound predicates with AND logic. This implementation provides a foundation for more advanced filtering operations as the expression evaluator is enhanced to support more complex expressions. 9 months ago			`// TODO: support multiple inputs`
chore(engine): Add framework for query executor (#17260) Signed-off-by: Christian Haudum <christian.haudum@gmail.com> 9 months ago			`if len(inputs) > 1 {`
			`return errorPipeline(fmt.Errorf("filter expects exactly one input, got %d", len(inputs)))`
			`}`

feat(dataobj, executor): filter node execution (#17327) # Add Filter Pipeline Implementation This PR implements a filter pipeline for Loki's engine executor component. The filter operator enables row-level filtering of Arrow record batches based on boolean expressions. This is a fundamental component for query execution. ## Changes - Implement `NewFilterPipeline` for evaluating filter predicates on record batches - Add `filterBatch` utility to efficiently filter Arrow records - Update `executeFilter` in the executor to use the new implementation - Add comprehensive test suite for the filter pipeline: - Basic literal predicates (true/false) - Column reference predicates - Empty batch handling - Multiple batch processing ## Implementation Notes The filter implementation follows the same pattern as the project pipeline, evaluating predicates against each record and creating a new filtered record based on the results. The implementation supports compound predicates with AND logic. This implementation provides a foundation for more advanced filtering operations as the expression evaluator is enhanced to support more complex expressions. 9 months ago			`return NewFilterPipeline(filter, inputs[0], c.evaluator)`
chore(engine): Add framework for query executor (#17260) Signed-off-by: Christian Haudum <christian.haudum@gmail.com> 9 months ago			`}`

			`func (c Context) executeProjection(_ context.Context, proj physical.Projection, inputs []Pipeline) Pipeline {`
			`if len(inputs) == 0 {`
			`return emptyPipeline()`
			`}`

			`if len(inputs) > 1 {`
feat(dataobj executor): project node (#17312) 9 months ago			`// unsupported for now`
chore(engine): Add framework for query executor (#17260) Signed-off-by: Christian Haudum <christian.haudum@gmail.com> 9 months ago			`return errorPipeline(fmt.Errorf("projection expects exactly one input, got %d", len(inputs)))`
			`}`

			`if len(proj.Columns) == 0 {`
			`return errorPipeline(fmt.Errorf("projection expects at least one column, got 0"))`
			`}`

feat(dataobj executor): project node (#17312) 9 months ago			`p, err := NewProjectPipeline(inputs[0], proj.Columns, &c.evaluator)`
			`if err != nil {`
			`return errorPipeline(err)`
			`}`
			`return p`
chore(engine): Add framework for query executor (#17260) Signed-off-by: Christian Haudum <christian.haudum@gmail.com> 9 months ago			`}`