chore(engine): introduce ScanSet node (#19524)

Signed-off-by: Robert Fratto <robertfratto@gmail.com>
6 months ago · bd3f3dabe1
parent b95bfabd42
commit bd3f3dabe1
18 changed files with 374 additions and 885 deletions
--- a/pkg/engine/internal/executor/executor.go
+++ b/pkg/engine/internal/executor/executor.go
@ -77,16 +77,12 @@ func (c *Context) execute(ctx context.Context, node physical.Node) Pipeline {
 			return tracePipeline("physical.DataObjScan", c.executeDataObjScan(ctx, n))
 		}, inputs)

-	case *physical.SortMerge:
-		return tracePipeline("physical.SortMerge", c.executeSortMerge(ctx, n, inputs))
 	case *physical.TopK:
 		return tracePipeline("physical.TopK", c.executeTopK(ctx, n, inputs))
 	case *physical.Limit:
 		return tracePipeline("physical.Limit", c.executeLimit(ctx, n, inputs))
 	case *physical.Filter:
 		return tracePipeline("physical.Filter", c.executeFilter(ctx, n, inputs))
-	case *physical.Merge:
-		return tracePipeline("physical.Merge", c.executeMerge(ctx, n, inputs))
 	case *physical.Projection:
 		return tracePipeline("physical.Projection", c.executeProjection(ctx, n, inputs))
 	case *physical.RangeAggregation:
@ -99,6 +95,8 @@ func (c *Context) execute(ctx context.Context, node physical.Node) Pipeline {
 		return tracePipeline("physical.ColumnCompat", c.executeColumnCompat(ctx, n, inputs))
 	case *physical.Parallelize:
 		return tracePipeline("physical.Parallelize", c.executeParallelize(ctx, n, inputs))
+	case *physical.ScanSet:
+		return tracePipeline("physical.ScanSet", c.executeScanSet(ctx, n))
 	default:
 		return errorPipeline(ctx, fmt.Errorf("invalid node type: %T", node))
 	}
@ -248,27 +246,6 @@ func (c *Context) executeTopK(ctx context.Context, topK *physical.TopK, inputs [
 	return pipeline
 }

-func (c *Context) executeSortMerge(ctx context.Context, sortmerge *physical.SortMerge, inputs []Pipeline) Pipeline {
-	ctx, span := tracer.Start(ctx, "Context.executeSortMerge", trace.WithAttributes(
-		attribute.Stringer("order", sortmerge.Order),
-		attribute.Int("num_inputs", len(inputs)),
-	))
-	if sortmerge.Column != nil {
-		span.SetAttributes(attribute.Stringer("column", sortmerge.Column))
-	}
-	defer span.End()
-
-	if len(inputs) == 0 {
-		return emptyPipeline()
-	}
-
-	pipeline, err := NewSortMergePipeline(inputs, sortmerge.Order, sortmerge.Column, c.evaluator)
-	if err != nil {
-		return errorPipeline(ctx, err)
-	}
-	return pipeline
-}
-
 func (c *Context) executeLimit(ctx context.Context, limit *physical.Limit, inputs []Pipeline) Pipeline {
 	ctx, span := tracer.Start(ctx, "Context.executeLimit", trace.WithAttributes(
 		attribute.Int("skip", int(limit.Skip)),
@ -308,24 +285,6 @@ func (c *Context) executeFilter(ctx context.Context, filter *physical.Filter, in
 	return NewFilterPipeline(filter, inputs[0], c.evaluator, allocator)
 }

-func (c *Context) executeMerge(ctx context.Context, _ *physical.Merge, inputs []Pipeline) Pipeline {
-	ctx, span := tracer.Start(ctx, "Context.executeMerge", trace.WithAttributes(
-		attribute.Int("num_inputs", len(inputs)),
-	))
-	defer span.End()
-
-	if len(inputs) == 0 {
-		return emptyPipeline()
-	}
-
-	pipeline, err := newMergePipeline(inputs, c.mergePrefetchCount)
-	if err != nil {
-		return errorPipeline(ctx, err)
-	}
-
-	return pipeline
-}
-
 func (c *Context) executeProjection(ctx context.Context, proj *physical.Projection, inputs []Pipeline) Pipeline {
 	ctx, span := tracer.Start(ctx, "Context.executeProjection", trace.WithAttributes(
 		attribute.Int("num_columns", len(proj.Columns)),
@ -441,3 +400,41 @@ func (c *Context) executeParallelize(ctx context.Context, _ *physical.Paralleliz
 	// propagate up the input.
 	return inputs[0]
 }
+
+func (c *Context) executeScanSet(ctx context.Context, set *physical.ScanSet) Pipeline {
+	// ScanSet typically gets partitioned by the scheduler into multiple scan
+	// nodes.
+	//
+	// However, for locally testing unpartitioned pipelines, we still supprt
+	// running a ScanSet. In this case, we treat internally execute it as a
+	// Merge on top of multiple sequential scans.
+
+	var targets []Pipeline
+
+	for _, target := range set.Targets {
+		switch target.Type {
+		case physical.ScanTypeDataObject:
+			// Make sure projections and predicates get passed down to the
+			// individual scan.
+			partition := target.DataObject
+			partition.Predicates = set.Predicates
+			partition.Projections = set.Projections
+
+			targets = append(targets, newLazyPipeline(func(ctx context.Context, _ []Pipeline) Pipeline {
+				return tracePipeline("physical.DataObjScan", c.executeDataObjScan(ctx, partition))
+			}, nil))
+		default:
+			return errorPipeline(ctx, fmt.Errorf("unrecognized ScanSet target %s", target.Type))
+		}
+	}
+	if len(targets) == 0 {
+		return emptyPipeline()
+	}
+
+	pipeline, err := newMergePipeline(targets, c.mergePrefetchCount)
+	if err != nil {
+		return errorPipeline(ctx, err)
+	}
+
+	return pipeline
+}
--- a/pkg/engine/internal/executor/executor_test.go
+++ b/pkg/engine/internal/executor/executor_test.go
@ -25,16 +25,6 @@ func TestExecutor(t *testing.T) {
 	})
 }

-func TestExecutor_SortMerge(t *testing.T) {
-	t.Run("no inputs result in empty pipeline", func(t *testing.T) {
-		ctx := t.Context()
-		c := &Context{}
-		pipeline := c.executeSortMerge(ctx, &physical.SortMerge{}, nil)
-		_, err := pipeline.Read(ctx)
-		require.ErrorContains(t, err, EOF.Error())
-	})
-}
-
 func TestExecutor_Limit(t *testing.T) {
 	t.Run("no inputs result in empty pipeline", func(t *testing.T) {
 		ctx := t.Context()
--- a/pkg/engine/internal/executor/sortmerge.go
+++ b/pkg/engine/internal/executor/sortmerge.go
@ -1,248 +0,0 @@
-package executor
-
-import (
-	"context"
-	"errors"
-	"fmt"
-	"slices"
-	"sort"
-
-	"github.com/apache/arrow-go/v18/arrow"
-	"github.com/apache/arrow-go/v18/arrow/array"
-
-	"github.com/grafana/loki/v3/pkg/engine/internal/planner/physical"
-)
-
-type compareFunc[T comparable] func(a, b T) bool
-
-// NewSortMergePipeline returns a new pipeline that merges already sorted inputs into a single output.
-func NewSortMergePipeline(inputs []Pipeline, order physical.SortOrder, column physical.ColumnExpression, evaluator expressionEvaluator) (*KWayMerge, error) {
-	var lessFunc func(a, b int64) bool
-	switch order {
-	case physical.ASC:
-		lessFunc = func(a, b int64) bool { return a <= b }
-	case physical.DESC:
-		lessFunc = func(a, b int64) bool { return a >= b }
-	default:
-		return nil, fmt.Errorf("invalid sort order %v", order)
-	}
-
-	for i := range inputs {
-		inputs[i] = newPrefetchingPipeline(inputs[i])
-	}
-
-	return &KWayMerge{
-		inputs:     inputs,
-		columnEval: evaluator.newFunc(column),
-		compare:    lessFunc,
-	}, nil
-}
-
-// KWayMerge is a k-way merge of multiple sorted inputs.
-// It requires the input batches to be sorted in the same order (ASC/DESC) as the SortMerge operator itself.
-// The sort order is defined by the direction of the query, which is either FORWARD or BACKWARDS,
-// which is applied to the SortMerge as well as to the DataObjScan during query planning.
-type KWayMerge struct {
-	inputs      []Pipeline
-	initialized bool
-	batches     []arrow.Record
-	exhausted   []bool
-	offsets     []int64
-	columnEval  evalFunc
-	compare     compareFunc[int64]
-}
-
-var _ Pipeline = (*KWayMerge)(nil)
-
-// Close implements Pipeline.
-func (p *KWayMerge) Close() {
-	for _, batch := range p.batches {
-		if batch != nil {
-			batch.Release()
-		}
-	}
-	for _, input := range p.inputs {
-		input.Close()
-	}
-}
-
-// Read implements Pipeline.
-func (p *KWayMerge) Read(ctx context.Context) (arrow.Record, error) {
-	p.init(ctx)
-	return p.read(ctx)
-}
-
-func (p *KWayMerge) init(ctx context.Context) {
-	if p.initialized {
-		return
-	}
-
-	p.initialized = true
-
-	n := len(p.inputs)
-	p.batches = make([]arrow.Record, n)
-	p.exhausted = make([]bool, n)
-	p.offsets = make([]int64, n)
-
-	// Initialize pre-fetching on inputs
-	for i := range p.inputs {
-		inp, ok := p.inputs[i].(*prefetchWrapper)
-		if ok {
-			inp.init(ctx)
-		}
-	}
-
-	if p.compare == nil {
-		p.compare = func(a, b int64) bool { return a <= b }
-	}
-}
-
-// Iterate through each record, looking at the value from their starting slice offset.
-// Track the top two winners (e.g., the record whose next value is the smallest and the record whose next value is the next smallest).
-// Find the largest offset in the starting record whose value is still less than the value of the runner-up record from the previous step.
-// Return the slice of that record using the two offsets, and update the stored offset of the returned record for the next call to Read.
-func (p *KWayMerge) read(ctx context.Context) (arrow.Record, error) {
-start:
-	timestamps := make([]int64, 0, len(p.inputs))
-	inputIndexes := make([]int, 0, len(p.inputs))
-
-loop:
-	for i := range len(p.inputs) {
-		// Skip exhausted inputs
-		if p.exhausted[i] {
-			continue loop
-		}
-
-		// Load next batch if it hasn't been loaded yet, or if current one is already fully consumed
-		// Read another batch as long as the input yields zero-length batches.
-		for p.batches[i] == nil || p.offsets[i] == p.batches[i].NumRows() {
-			// Reset offset for input at index i
-			p.offsets[i] = 0
-
-			// Release previously fully consumed batch
-			if p.batches[i] != nil {
-				p.batches[i].Release()
-				p.batches[i] = nil // remove reference to arrow.Record from slice
-			}
-
-			// Read next batch from input at index i
-			// If it reaches EOF, mark the input as exhausted and continue with the next input.
-			rec, err := p.inputs[i].Read(ctx)
-			if err != nil {
-				if errors.Is(err, EOF) {
-					p.exhausted[i] = true
-					continue loop
-				}
-				return nil, err
-			}
-
-			p.batches[i] = rec
-		}
-
-		// Fetch timestamp value at current offset
-		col, err := p.columnEval(p.batches[i])
-		if err != nil {
-			return nil, err
-		}
-		defer col.Release()
-
-		tsCol, ok := col.ToArray().(*array.Timestamp)
-		if !ok {
-			return nil, errors.New("column is not a timestamp column")
-		}
-		ts := tsCol.Value(int(p.offsets[i]))
-		tsCol.Release()
-
-		// Populate slices for sorting
-		inputIndexes = append(inputIndexes, i)
-		timestamps = append(timestamps, int64(ts))
-	}
-
-	// Pipeline is exhausted if no more input batches are available
-	if !slices.Contains(p.exhausted, false) {
-		return nil, EOF
-	}
-
-	if len(inputIndexes) == 0 {
-		goto start
-	}
-
-	// If there is only a single remaining batch, return the remaining record
-	if len(inputIndexes) == 1 {
-		j := inputIndexes[0]
-		start := p.offsets[j]
-		end := p.batches[j].NumRows()
-
-		// check against empty last batch
-		if start >= end || end == 0 {
-			return nil, EOF
-		}
-
-		p.offsets[j] = end
-		return p.batches[j].NewSlice(start, end), nil
-	}
-
-	sortIndexesByTimestamps(inputIndexes, timestamps, p.compare)
-
-	// Return the slice of the current record
-	j := inputIndexes[0]
-
-	// Fetch timestamp value at current offset
-	col, err := p.columnEval(p.batches[j])
-	if err != nil {
-		return nil, err
-	}
-	defer col.Release()
-	// We assume the column is a Uint64 array
-	tsCol, ok := col.ToArray().(*array.Timestamp)
-	if !ok {
-		return nil, errors.New("column is not a timestamp column")
-	}
-	defer tsCol.Release()
-
-	// Calculate start/end of the sub-slice of the record
-	start := p.offsets[j]
-	end := start + 1
-	for ; end < p.batches[j].NumRows(); end++ {
-		ts := tsCol.Value(int(end))
-		if !p.compare(int64(ts), timestamps[1]) {
-			break
-		}
-	}
-
-	// check against empty batch
-	if start > end || end == 0 {
-		p.offsets[j] = end
-		return p.batches[j], nil
-	}
-
-	p.offsets[j] = end
-	return p.batches[j].NewSlice(start, end), nil
-}
-
-func sortIndexesByTimestamps(indexes []int, timestamps []int64, lessFn compareFunc[int64]) {
-	if len(indexes) != len(timestamps) {
-		panic("lengths of indexes and timestamps must match")
-	}
-
-	pairs := make([]inputTimestampPair, len(indexes))
-	for i := range indexes {
-		pairs[i] = inputTimestampPair{indexes[i], timestamps[i]}
-	}
-
-	// Sort pairs by timestamp
-	sort.SliceStable(pairs, func(i, j int) bool {
-		return lessFn(pairs[i].timestamp, pairs[j].timestamp)
-	})
-
-	// Unpack the sorted pairs back into the original slices
-	for i := range pairs {
-		indexes[i] = pairs[i].index
-		timestamps[i] = pairs[i].timestamp
-	}
-}
-
-type inputTimestampPair struct {
-	index     int
-	timestamp int64
-}
--- a/pkg/engine/internal/executor/sortmerge_test.go
+++ b/pkg/engine/internal/executor/sortmerge_test.go
@ -1,146 +0,0 @@
-package executor
-
-import (
-	"slices"
-	"testing"
-	"time"
-
-	"github.com/apache/arrow-go/v18/arrow"
-	"github.com/apache/arrow-go/v18/arrow/array"
-	"github.com/stretchr/testify/require"
-
-	"github.com/grafana/loki/v3/pkg/engine/internal/planner/physical"
-	"github.com/grafana/loki/v3/pkg/engine/internal/types"
-)
-
-func TestSortMerge(t *testing.T) {
-	now := time.Unix(1000000, 0)
-	var batchSize = int64(3)
-
-	c := &Context{
-		batchSize: batchSize,
-	}
-
-	t.Run("invalid column name", func(t *testing.T) {
-		merge := &physical.SortMerge{
-			Column: &physical.ColumnExpr{
-				Ref: types.ColumnRef{
-					Column: "not_a_timestamp_column",
-					Type:   types.ColumnTypeBuiltin,
-				},
-			},
-			Order: physical.ASC,
-		}
-
-		inputs := []Pipeline{
-			ascendingTimestampPipeline(now.Add(1*time.Nanosecond)).Pipeline(batchSize, 10),
-			ascendingTimestampPipeline(now.Add(2*time.Nanosecond)).Pipeline(batchSize, 10),
-			ascendingTimestampPipeline(now.Add(3*time.Nanosecond)).Pipeline(batchSize, 10),
-		}
-
-		pipeline, err := NewSortMergePipeline(inputs, merge.Order, merge.Column, expressionEvaluator{})
-		require.NoError(t, err)
-
-		ctx := t.Context()
-		_, err = pipeline.Read(ctx)
-		require.ErrorContains(t, err, "column is not a timestamp column")
-	})
-
-	t.Run("ascending timestamp", func(t *testing.T) {
-		merge := &physical.SortMerge{
-			Column: &physical.ColumnExpr{
-				Ref: types.ColumnRef{
-					Column: types.ColumnNameBuiltinTimestamp,
-					Type:   types.ColumnTypeBuiltin,
-				},
-			},
-			Order: physical.ASC,
-		}
-
-		inputs := []Pipeline{
-			ascendingTimestampPipeline(now.Add(1*time.Nanosecond)).Pipeline(batchSize, 10),
-			ascendingTimestampPipeline(now.Add(2*time.Millisecond)).Pipeline(batchSize, 10),
-			ascendingTimestampPipeline(now.Add(3*time.Second)).Pipeline(batchSize, 10),
-		}
-
-		pipeline, err := NewSortMergePipeline(inputs, merge.Order, merge.Column, expressionEvaluator{})
-		require.NoError(t, err)
-
-		ctx := t.Context()
-		timestamps := make([]arrow.Timestamp, 0, 30)
-		var batches, rows int64
-		for {
-			batch, err := pipeline.Read(ctx)
-			if err == EOF {
-				break
-			}
-			if err != nil {
-				t.Fatalf("did not expect error, got %s", err.Error())
-			}
-
-			tsCol, err := c.evaluator.eval(merge.Column, batch)
-			require.NoError(t, err)
-			defer tsCol.Release()
-			arr := tsCol.ToArray().(*array.Timestamp)
-			defer arr.Release()
-
-			timestamps = append(timestamps, arr.Values()...)
-			batches++
-			rows += batch.NumRows()
-		}
-
-		// Check if ts column is sorted
-		require.Truef(t,
-			slices.IsSortedFunc(timestamps, func(a, b arrow.Timestamp) int { return int(a - b) }),
-			"timestamps are not sorted in ASC order: %v", timestamps)
-	})
-
-	t.Run("descending timestamp", func(t *testing.T) {
-		merge := &physical.SortMerge{
-			Column: &physical.ColumnExpr{
-				Ref: types.ColumnRef{
-					Column: types.ColumnNameBuiltinTimestamp,
-					Type:   types.ColumnTypeBuiltin,
-				},
-			},
-			Order: physical.DESC,
-		}
-
-		inputs := []Pipeline{
-			descendingTimestampPipeline(now.Add(1*time.Nanosecond)).Pipeline(batchSize, 10),
-			descendingTimestampPipeline(now.Add(2*time.Millisecond)).Pipeline(batchSize, 10),
-			descendingTimestampPipeline(now.Add(3*time.Second)).Pipeline(batchSize, 10),
-		}
-
-		pipeline, err := NewSortMergePipeline(inputs, merge.Order, merge.Column, expressionEvaluator{})
-		require.NoError(t, err)
-
-		ctx := t.Context()
-		timestamps := make([]arrow.Timestamp, 0, 30)
-		var batches, rows int64
-		for {
-			batch, err := pipeline.Read(ctx)
-			if err == EOF {
-				break
-			}
-			if err != nil {
-				t.Fatalf("did not expect error, got %s", err.Error())
-			}
-
-			tsCol, err := c.evaluator.eval(merge.Column, batch)
-			defer tsCol.Release()
-			require.NoError(t, err)
-			arr := tsCol.ToArray().(*array.Timestamp)
-			defer arr.Release()
-
-			timestamps = append(timestamps, arr.Values()...)
-			batches++
-			rows += batch.NumRows()
-		}
-
-		// Check if ts column is sorted
-		require.Truef(t,
-			slices.IsSortedFunc(timestamps, func(a, b arrow.Timestamp) int { return int(b - a) }),
-			"timestamps are not sorted in DESC order: %v", timestamps)
-	})
-}
--- a/pkg/engine/internal/planner/physical/merge.go
+++ b/pkg/engine/internal/planner/physical/merge.go
@ -1,31 +0,0 @@
-package physical
-
-import "fmt"
-
-// Merge represents a merge operation in the physical plan that merges
-// N inputs to 1 output.
-type Merge struct {
-	id string
-}
-
-// ID implements the [Node] interface.
-// Returns a string that uniquely identifies the node in the plan.
-func (m *Merge) ID() string {
-	if m.id == "" {
-		return fmt.Sprintf("%p", m)
-	}
-
-	return m.id
-}
-
-// Type implements the [Node] interface.
-// Returns the type of the node.
-func (m *Merge) Type() NodeType {
-	return NodeTypeMerge
-}
-
-// Accept implements the [Node] interface.
-// Dispatches itself to the provided [Visitor] v
-func (m *Merge) Accept(v Visitor) error {
-	return v.VisitMerge(m)
-}
--- a/pkg/engine/internal/planner/physical/optimizer.go
+++ b/pkg/engine/internal/planner/physical/optimizer.go
@ -35,26 +35,6 @@ func (r *removeNoopFilter) apply(node Node) bool {

 var _ rule = (*removeNoopFilter)(nil)

-// removeNoopMerge is a rule that removes merge/sortmerge nodes with only a single input
-type removeNoopMerge struct {
-	plan *Plan
-}
-
-// apply implements rule.
-func (r *removeNoopMerge) apply(node Node) bool {
-	changed := false
-	switch node := node.(type) {
-	case *Merge, *SortMerge:
-		if len(r.plan.Children(node)) <= 1 {
-			r.plan.graph.Eliminate(node)
-			changed = true
-		}
-	}
-	return changed
-}
-
-var _ rule = (*removeNoopMerge)(nil)
-
 // predicatePushdown is a rule that moves down filter predicates to the scan nodes.
 type predicatePushdown struct {
 	plan *Plan
@ -79,6 +59,12 @@ func (r *predicatePushdown) apply(node Node) bool {

 func (r *predicatePushdown) applyPredicatePushdown(node Node, predicate Expression) bool {
 	switch node := node.(type) {
+	case *ScanSet:
+		if canApplyPredicate(predicate) {
+			node.Predicates = append(node.Predicates, predicate)
+			return true
+		}
+		return false
 	case *DataObjScan:
 		if canApplyPredicate(predicate) {
 			node.Predicates = append(node.Predicates, predicate)
@ -224,13 +210,15 @@ func (r *projectionPushdown) applyProjectionPushdown(
 	applyIfNotEmpty bool,
 ) bool {
 	switch node := node.(type) {
+	case *ScanSet:
+		return r.handleScanSet(node, projections, applyIfNotEmpty)
 	case *DataObjScan:
 		return r.handleDataObjScan(node, projections, applyIfNotEmpty)
 	case *ParseNode:
 		return r.handleParseNode(node, projections, applyIfNotEmpty)
 	case *RangeAggregation:
 		return r.handleRangeAggregation(node, projections)
-	case *Parallelize, *Filter, *Merge, *SortMerge, *ColumnCompat:
+	case *Parallelize, *Filter, *ColumnCompat:
 		// Push to next direct child that cares about projections
 		return r.pushToChildren(node, projections, applyIfNotEmpty)
 	}
@ -238,6 +226,36 @@ func (r *projectionPushdown) applyProjectionPushdown(
 	return false
 }

+// handleScanSet handles projection pushdown for ScanSet nodes
+func (r *projectionPushdown) handleScanSet(node *ScanSet, projections []ColumnExpression, applyIfNotEmpty bool) bool {
+	shouldNotApply := len(projections) == 0 && applyIfNotEmpty
+	if !r.isMetricQuery() || shouldNotApply {
+		return false
+	}
+
+	// Add to scan projections if not already present
+	changed := false
+	for _, colExpr := range projections {
+		colExpr, ok := colExpr.(*ColumnExpr)
+		if !ok {
+			continue
+		}
+
+		var wasAdded bool
+		node.Projections, wasAdded = addUniqueProjection(node.Projections, colExpr)
+		if wasAdded {
+			changed = true
+		}
+	}
+
+	if changed {
+		// Sort projections by column name for deterministic order
+		slices.SortFunc(node.Projections, sortProjections)
+	}
+
+	return changed
+}
+
 // handleDataObjScan handles projection pushdown for DataObjScan nodes
 func (r *projectionPushdown) handleDataObjScan(node *DataObjScan, projections []ColumnExpression, applyIfNotEmpty bool) bool {
 	shouldNotApply := len(projections) == 0 && applyIfNotEmpty
--- a/pkg/engine/internal/planner/physical/optimizer_test.go
+++ b/pkg/engine/internal/planner/physical/optimizer_test.go
@ -1,7 +1,6 @@
 package physical

 import (
-	"fmt"
 	"sort"
 	"testing"
 	"time"
@ -76,9 +75,15 @@ var time1000 = types.Timestamp(1000000000)

 func dummyPlan() *Plan {
 	plan := &Plan{}
-	scan1 := plan.graph.Add(&DataObjScan{id: "scan1"})
-	scan2 := plan.graph.Add(&DataObjScan{id: "scan2"})
-	merge := plan.graph.Add(&SortMerge{id: "merge"})
+
+	scanSet := plan.graph.Add(&ScanSet{
+		id: "set",
+
+		Targets: []*ScanTarget{
+			{Type: ScanTypeDataObject, DataObject: &DataObjScan{}},
+			{Type: ScanTypeDataObject, DataObject: &DataObjScan{}},
+		},
+	})
 	filter1 := plan.graph.Add(&Filter{id: "filter1", Predicates: []Expression{
 		&BinaryExpr{
 			Left:  newColumnExpr("timestamp", types.ColumnTypeBuiltin),
@ -97,9 +102,7 @@ func dummyPlan() *Plan {

 	_ = plan.graph.AddEdge(dag.Edge[Node]{Parent: filter3, Child: filter2})
 	_ = plan.graph.AddEdge(dag.Edge[Node]{Parent: filter2, Child: filter1})
-	_ = plan.graph.AddEdge(dag.Edge[Node]{Parent: filter1, Child: merge})
-	_ = plan.graph.AddEdge(dag.Edge[Node]{Parent: merge, Child: scan1})
-	_ = plan.graph.AddEdge(dag.Edge[Node]{Parent: merge, Child: scan2})
+	_ = plan.graph.AddEdge(dag.Edge[Node]{Parent: filter1, Child: scanSet})

 	return plan
 }
@ -133,21 +136,22 @@ func TestOptimizer(t *testing.T) {
 		actual := PrintAsTree(plan)

 		optimized := &Plan{}
-		scan1 := optimized.graph.Add(&DataObjScan{id: "scan1", Predicates: []Expression{
-			&BinaryExpr{
-				Left:  newColumnExpr("timestamp", types.ColumnTypeBuiltin),
-				Right: NewLiteral(time1000),
-				Op:    types.BinaryOpGt,
+		scanSet := optimized.graph.Add(&ScanSet{
+			id: "set",
+
+			Targets: []*ScanTarget{
+				{Type: ScanTypeDataObject, DataObject: &DataObjScan{}},
+				{Type: ScanTypeDataObject, DataObject: &DataObjScan{}},
 			},
-		}})
-		scan2 := optimized.graph.Add(&DataObjScan{id: "scan2", Predicates: []Expression{
-			&BinaryExpr{
-				Left:  newColumnExpr("timestamp", types.ColumnTypeBuiltin),
-				Right: NewLiteral(time1000),
-				Op:    types.BinaryOpGt,
+
+			Predicates: []Expression{
+				&BinaryExpr{
+					Left:  newColumnExpr("timestamp", types.ColumnTypeBuiltin),
+					Right: NewLiteral(time1000),
+					Op:    types.BinaryOpGt,
+				},
 			},
-		}})
-		merge := optimized.graph.Add(&SortMerge{id: "merge"})
+		})
 		filter1 := optimized.graph.Add(&Filter{id: "filter1", Predicates: []Expression{}})
 		filter2 := optimized.graph.Add(&Filter{id: "filter2", Predicates: []Expression{
 			&BinaryExpr{
@ -160,9 +164,7 @@ func TestOptimizer(t *testing.T) {

 		_ = optimized.graph.AddEdge(dag.Edge[Node]{Parent: filter3, Child: filter2})
 		_ = optimized.graph.AddEdge(dag.Edge[Node]{Parent: filter2, Child: filter1})
-		_ = optimized.graph.AddEdge(dag.Edge[Node]{Parent: filter1, Child: merge})
-		_ = optimized.graph.AddEdge(dag.Edge[Node]{Parent: merge, Child: scan1})
-		_ = optimized.graph.AddEdge(dag.Edge[Node]{Parent: merge, Child: scan2})
+		_ = optimized.graph.AddEdge(dag.Edge[Node]{Parent: filter1, Child: scanSet})

 		expected := PrintAsTree(optimized)
 		require.Equal(t, expected, actual)
@ -181,9 +183,16 @@ func TestOptimizer(t *testing.T) {
 		actual := PrintAsTree(plan)

 		optimized := &Plan{}
-		scan1 := optimized.graph.Add(&DataObjScan{id: "scan1", Predicates: []Expression{}})
-		scan2 := optimized.graph.Add(&DataObjScan{id: "scan2", Predicates: []Expression{}})
-		merge := optimized.graph.Add(&SortMerge{id: "merge"})
+		scanSet := optimized.graph.Add(&ScanSet{
+			id: "set",
+
+			Targets: []*ScanTarget{
+				{Type: ScanTypeDataObject, DataObject: &DataObjScan{}},
+				{Type: ScanTypeDataObject, DataObject: &DataObjScan{}},
+			},
+
+			Predicates: []Expression{},
+		})
 		filter1 := optimized.graph.Add(&Filter{id: "filter1", Predicates: []Expression{
 			&BinaryExpr{
 				Left:  newColumnExpr("timestamp", types.ColumnTypeBuiltin),
@ -200,9 +209,7 @@ func TestOptimizer(t *testing.T) {
 		}})

 		_ = optimized.graph.AddEdge(dag.Edge[Node]{Parent: filter2, Child: filter1})
-		_ = optimized.graph.AddEdge(dag.Edge[Node]{Parent: filter1, Child: merge})
-		_ = optimized.graph.AddEdge(dag.Edge[Node]{Parent: merge, Child: scan1})
-		_ = optimized.graph.AddEdge(dag.Edge[Node]{Parent: merge, Child: scan2})
+		_ = optimized.graph.AddEdge(dag.Edge[Node]{Parent: filter1, Child: scanSet})

 		expected := PrintAsTree(optimized)
 		require.Equal(t, expected, actual)
@ -626,49 +633,18 @@ func TestOptimizer(t *testing.T) {
 		require.Equal(t, expected, actual)
 	})

-	t.Run("cleanup no-op merge nodes", func(t *testing.T) {
-		plan := func() *Plan {
-			plan := &Plan{}
-			limit := plan.graph.Add(&Limit{id: "limit"})
-			merge := plan.graph.Add(&Merge{id: "merge"})
-			sortmerge := plan.graph.Add(&Merge{id: "sortmerge"})
-			scan := plan.graph.Add(&DataObjScan{id: "scan"})
-
-			_ = plan.graph.AddEdge(dag.Edge[Node]{Parent: limit, Child: merge})
-			_ = plan.graph.AddEdge(dag.Edge[Node]{Parent: merge, Child: sortmerge})
-			_ = plan.graph.AddEdge(dag.Edge[Node]{Parent: sortmerge, Child: scan})
-			return plan
-		}()
-
-		optimizations := []*optimization{
-			newOptimization("cleanup", plan).withRules(
-				&removeNoopMerge{plan},
-			),
-		}
-
-		o := newOptimizer(plan, optimizations)
-		o.optimize(plan.Roots()[0])
-		actual := PrintAsTree(plan)
-
-		optimized := func() *Plan {
-			plan := &Plan{}
-			limit := plan.graph.Add(&Limit{id: "limit"})
-			scan := plan.graph.Add(&DataObjScan{id: "scan"})
-
-			_ = plan.graph.AddEdge(dag.Edge[Node]{Parent: limit, Child: scan})
-			return plan
-		}()
-
-		expected := PrintAsTree(optimized)
-		require.Equal(t, expected, actual, fmt.Sprintf("Expected:\n%s\nActual:\n%s\n", expected, actual))
-	})
-
 	// both predicate pushdown and limits pushdown should work together
 	t.Run("predicate and limits pushdown", func(t *testing.T) {
 		plan := &Plan{}
-		scan1 := plan.graph.Add(&DataObjScan{id: "scan1"})
-		scan2 := plan.graph.Add(&DataObjScan{id: "scan2"})
-		sortMerge := plan.graph.Add(&SortMerge{id: "sortMerge"})
+
+		scanSet := plan.graph.Add(&ScanSet{
+			id: "set",
+
+			Targets: []*ScanTarget{
+				{Type: ScanTypeDataObject, DataObject: &DataObjScan{}},
+				{Type: ScanTypeDataObject, DataObject: &DataObjScan{}},
+			},
+		})
 		filter := plan.graph.Add(&Filter{id: "filter", Predicates: []Expression{
 			&BinaryExpr{
 				Left:  newColumnExpr("timestamp", types.ColumnTypeBuiltin),
@ -679,9 +655,7 @@ func TestOptimizer(t *testing.T) {
 		limit := plan.graph.Add(&Limit{id: "limit", Fetch: 100})

 		_ = plan.graph.AddEdge(dag.Edge[Node]{Parent: limit, Child: filter})
-		_ = plan.graph.AddEdge(dag.Edge[Node]{Parent: filter, Child: sortMerge})
-		_ = plan.graph.AddEdge(dag.Edge[Node]{Parent: sortMerge, Child: scan1})
-		_ = plan.graph.AddEdge(dag.Edge[Node]{Parent: sortMerge, Child: scan2})
+		_ = plan.graph.AddEdge(dag.Edge[Node]{Parent: filter, Child: scanSet})

 		planner := NewPlanner(NewContext(time.Unix(0, 0), time.Unix(3600, 0)), &catalog{})
 		actual, err := planner.Optimize(plan)
@ -689,28 +663,25 @@ func TestOptimizer(t *testing.T) {

 		optimized := &Plan{}
 		{
-			scan1 := optimized.graph.Add(&DataObjScan{id: "scan1",
-				Predicates: []Expression{
-					&BinaryExpr{
-						Left:  newColumnExpr("timestamp", types.ColumnTypeBuiltin),
-						Right: NewLiteral(time1000),
-						Op:    types.BinaryOpGt,
-					},
-				}})
-			scan2 := optimized.graph.Add(&DataObjScan{id: "scan2",
+			scanSet := optimized.graph.Add(&ScanSet{
+				id: "set",
+
+				Targets: []*ScanTarget{
+					{Type: ScanTypeDataObject, DataObject: &DataObjScan{}},
+					{Type: ScanTypeDataObject, DataObject: &DataObjScan{}},
+				},
+
 				Predicates: []Expression{
 					&BinaryExpr{
 						Left:  newColumnExpr("timestamp", types.ColumnTypeBuiltin),
 						Right: NewLiteral(time1000),
 						Op:    types.BinaryOpGt,
 					},
-				}})
-			merge := optimized.graph.Add(&SortMerge{id: "merge"})
+				},
+			})
 			limit := optimized.graph.Add(&Limit{id: "limit1", Fetch: 100})

-			_ = optimized.graph.AddEdge(dag.Edge[Node]{Parent: limit, Child: merge})
-			_ = optimized.graph.AddEdge(dag.Edge[Node]{Parent: merge, Child: scan1})
-			_ = optimized.graph.AddEdge(dag.Edge[Node]{Parent: merge, Child: scan2})
+			_ = optimized.graph.AddEdge(dag.Edge[Node]{Parent: limit, Child: scanSet})
 		}

 		expected := PrintAsTree(optimized)
@ -1053,7 +1024,7 @@ func TestProjectionPushdown_PushesRequestedKeysToParseNodes(t *testing.T) {
 					parseNode = pn
 					continue
 				}
-				if pn, ok := node.(*DataObjScan); ok {
+				if pn, ok := node.(*ScanSet); ok {
 					for _, colExpr := range pn.Projections {
 						expr := colExpr.(*ColumnExpr)
 						projections[expr.Ref.Column] = struct{}{}
--- a/pkg/engine/internal/planner/physical/plan.go
+++ b/pkg/engine/internal/planner/physical/plan.go
@ -19,6 +19,7 @@ const (
 	NodeTypeCompat
 	NodeTypeTopK
 	NodeTypeParallelize
+	NodeTypeScanSet
 )

 func (t NodeType) String() string {
@ -47,6 +48,8 @@ func (t NodeType) String() string {
 		return "TopK"
 	case NodeTypeParallelize:
 		return "Parallelize"
+	case NodeTypeScanSet:
+		return "ScanSet"
 	default:
 		return "Undefined"
 	}
@ -73,8 +76,6 @@ type Node interface {
 }

 var _ Node = (*DataObjScan)(nil)
-var _ Node = (*Merge)(nil)
-var _ Node = (*SortMerge)(nil)
 var _ Node = (*Projection)(nil)
 var _ Node = (*Limit)(nil)
 var _ Node = (*Filter)(nil)
@ -84,10 +85,9 @@ var _ Node = (*ParseNode)(nil)
 var _ Node = (*ColumnCompat)(nil)
 var _ Node = (*TopK)(nil)
 var _ Node = (*Parallelize)(nil)
+var _ Node = (*ScanSet)(nil)

 func (*DataObjScan) isNode()       {}
-func (*Merge) isNode()             {}
-func (*SortMerge) isNode()         {}
 func (*Projection) isNode()        {}
 func (*Limit) isNode()             {}
 func (*Filter) isNode()            {}
@ -97,6 +97,7 @@ func (*ParseNode) isNode()         {}
 func (*ColumnCompat) isNode()      {}
 func (*TopK) isNode()              {}
 func (*Parallelize) isNode()       {}
+func (*ScanSet) isNode()           {}

 // WalkOrder defines the order for how a node and its children are visited.
 type WalkOrder uint8
--- a/pkg/engine/internal/planner/physical/planner.go
+++ b/pkg/engine/internal/planner/physical/planner.go
@ -160,84 +160,6 @@ func (p *Planner) process(inst logical.Value, ctx *Context) ([]Node, error) {
 	return nil, nil
 }

-func (p *Planner) buildNodeGroup(currentGroup []FilteredShardDescriptor, baseNode Node, ctx *Context) error {
-	scans := []Node{}
-	for _, descriptor := range currentGroup {
-		// output current group to nodes
-		for _, section := range descriptor.Sections {
-			scan := &DataObjScan{
-				Location:  descriptor.Location,
-				StreamIDs: descriptor.Streams,
-				Section:   section,
-			}
-			p.plan.graph.Add(scan)
-			scans = append(scans, scan)
-		}
-	}
-	if len(scans) > 1 && ctx.direction != UNSORTED {
-		// a single topK for overlapping scan nodes.
-		topK := &TopK{
-			SortBy:     newColumnExpr(types.ColumnNameBuiltinTimestamp, types.ColumnTypeBuiltin),
-			Ascending:  ctx.direction == ASC, // apply direction from previously visited Sort node
-			NullsFirst: false,                // temporarily hardcoded.
-		}
-		p.plan.graph.Add(topK)
-		for _, scan := range scans {
-			if err := p.plan.graph.AddEdge(dag.Edge[Node]{Parent: topK, Child: scan}); err != nil {
-				return err
-			}
-		}
-		if err := p.plan.graph.AddEdge(dag.Edge[Node]{Parent: baseNode, Child: topK}); err != nil {
-			return err
-		}
-	} else {
-		for _, scan := range scans {
-			child := scan
-			if ctx.direction != UNSORTED {
-				topK := &TopK{
-					SortBy:     newColumnExpr(types.ColumnNameBuiltinTimestamp, types.ColumnTypeBuiltin),
-					Ascending:  ctx.direction == ASC, // apply direction from previously visited Sort node
-					NullsFirst: false,                // temporarily hardcoded.
-				}
-				p.plan.graph.Add(topK)
-
-				if err := p.plan.graph.AddEdge(dag.Edge[Node]{Parent: topK, Child: scan}); err != nil {
-					return err
-				}
-
-				child = topK
-			}
-
-			if err := p.plan.graph.AddEdge(dag.Edge[Node]{Parent: baseNode, Child: child}); err != nil {
-				return err
-			}
-		}
-	}
-	return nil
-}
-
-func overlappingShardDescriptors(filteredShardDescriptors []FilteredShardDescriptor) [][]FilteredShardDescriptor {
-	// Ensure that shard descriptors are sorted by end time
-	sort.Slice(filteredShardDescriptors, func(i, j int) bool {
-		return filteredShardDescriptors[i].TimeRange.End.After(filteredShardDescriptors[j].TimeRange.End)
-	})
-
-	groups := make([][]FilteredShardDescriptor, 0, len(filteredShardDescriptors))
-	var tr TimeRange
-	for i, shardDesc := range filteredShardDescriptors {
-		if i == 0 || !tr.Overlaps(shardDesc.TimeRange) {
-			// Create new group for first item or if item does not overlap with previous group
-			groups = append(groups, []FilteredShardDescriptor{shardDesc})
-			tr = shardDesc.TimeRange
-		} else {
-			// Append to existing group
-			groups[len(groups)-1] = append(groups[len(groups)-1], shardDesc)
-			tr = tr.Merge(shardDesc.TimeRange)
-		}
-	}
-	return groups
-}
-
 // Convert [logical.MakeTable] into one or more [DataObjScan] nodes.
 func (p *Planner) processMakeTable(lp *logical.MakeTable, ctx *Context) ([]Node, error) {
 	shard, ok := lp.Shard.(*logical.ShardInfo)
@ -256,10 +178,11 @@ func (p *Planner) processMakeTable(lp *logical.MakeTable, ctx *Context) ([]Node,
 	if err != nil {
 		return nil, err
 	}
-
-	groups := overlappingShardDescriptors(filteredShardDescriptors)
+	sort.Slice(filteredShardDescriptors, func(i, j int) bool {
+		return filteredShardDescriptors[i].TimeRange.End.After(filteredShardDescriptors[j].TimeRange.End)
+	})
 	if ctx.direction == ASC {
-		slices.Reverse(groups)
+		slices.Reverse(filteredShardDescriptors)
 	}

 	// Scan work can be parallelized across multiple workers, so we wrap
@ -267,29 +190,40 @@ func (p *Planner) processMakeTable(lp *logical.MakeTable, ctx *Context) ([]Node,
 	var parallelize Node = &Parallelize{}
 	p.plan.graph.Add(parallelize)

-	var merge Node = &Merge{}
-	p.plan.graph.Add(merge)
-	for _, gr := range groups {
-		if err := p.buildNodeGroup(gr, merge, ctx); err != nil {
-			return nil, err
+	scanSet := &ScanSet{}
+	p.plan.graph.Add(scanSet)
+
+	for _, desc := range filteredShardDescriptors {
+		for _, section := range desc.Sections {
+			scanSet.Targets = append(scanSet.Targets, &ScanTarget{
+				Type: ScanTypeDataObject,
+
+				DataObject: &DataObjScan{
+					Location:  desc.Location,
+					StreamIDs: desc.Streams,
+					Section:   section,
+				},
+			})
 		}
 	}

+	var base Node = scanSet
+
 	if p.context.v1Compatible {
 		compat := &ColumnCompat{
 			Source:      types.ColumnTypeMetadata,
 			Destination: types.ColumnTypeMetadata,
 			Collision:   types.ColumnTypeLabel,
 		}
-		merge, err = p.wrapNodeWith(merge, compat)
+		base, err = p.wrapNodeWith(base, compat)
 		if err != nil {
 			return nil, err
 		}
 	}

-	// Add an edge between the parallelize and the final merge node (which may
+	// Add an edge between the parallelize and the final base node (which may
 	// have been changed after processing compatibility).
-	if err := p.plan.graph.AddEdge(dag.Edge[Node]{Parent: parallelize, Child: merge}); err != nil {
+	if err := p.plan.graph.AddEdge(dag.Edge[Node]{Parent: parallelize, Child: base}); err != nil {
 		return nil, err
 	}
 	return []Node{parallelize}, nil
@ -313,14 +247,37 @@ func (p *Planner) processSelect(lp *logical.Select, ctx *Context) ([]Node, error
 	return []Node{node}, nil
 }

-// Pass sort direction from [logical.Sort] to the children.
+// processSort processes a [logical.Sort] node.
 func (p *Planner) processSort(lp *logical.Sort, ctx *Context) ([]Node, error) {
 	order := DESC
 	if lp.Ascending {
 		order = ASC
 	}

-	return p.process(lp.Table, ctx.WithDirection(order))
+	node := &TopK{
+		SortBy:     &ColumnExpr{Ref: lp.Column.Ref},
+		Ascending:  order == ASC,
+		NullsFirst: false,
+
+		// K initially starts at 0, indicating to sort everything. The
+		// [limitPushdown] optimization pass can update this value based on how
+		// many rows are needed.
+		K: 0,
+	}
+
+	p.plan.graph.Add(node)
+
+	children, err := p.process(lp.Table, ctx.WithDirection(order))
+	if err != nil {
+		return nil, err
+	}
+
+	for i := range children {
+		if err := p.plan.graph.AddEdge(dag.Edge[Node]{Parent: node, Child: children[i]}); err != nil {
+			return nil, err
+		}
+	}
+	return []Node{node}, nil
 }

 // Convert [logical.Limit] into one [Limit] node.
@ -454,9 +411,6 @@ func (p *Planner) Optimize(plan *Plan) (*Plan, error) {
 			newOptimization("ProjectionPushdown", plan).withRules(
 				&projectionPushdown{plan: plan},
 			),
-			newOptimization("CleanupMerge", plan).withRules(
-				&removeNoopMerge{plan: plan},
-			),
 		}
 		optimizer := newOptimizer(plan, optimizations)
 		optimizer.optimize(root)
--- a/pkg/engine/internal/planner/physical/planner_test.go
+++ b/pkg/engine/internal/planner/physical/planner_test.go
@ -90,8 +90,13 @@ func locations(t *testing.T, plan *Plan, nodes []Node) []string {
 	res := make([]string, 0, len(nodes))

 	visitor := &nodeCollectVisitor{
-		onVisitDataObjScan: func(scan *DataObjScan) error {
-			res = append(res, string(scan.Location))
+		onVisitScanSet: func(set *ScanSet) error {
+			for _, target := range set.Targets {
+				switch target.Type {
+				case ScanTypeDataObject:
+					res = append(res, string(target.DataObject.Location))
+				}
+			}
 			return nil
 		},
 	}
@ -106,8 +111,13 @@ func sections(t *testing.T, plan *Plan, nodes []Node) [][]int {
 	res := make([][]int, 0, len(nodes))

 	visitor := &nodeCollectVisitor{
-		onVisitDataObjScan: func(scan *DataObjScan) error {
-			res = append(res, []int{scan.Section})
+		onVisitScanSet: func(set *ScanSet) error {
+			for _, target := range set.Targets {
+				switch target.Type {
+				case ScanTypeDataObject:
+					res = append(res, []int{target.DataObject.Section})
+				}
+			}
 			return nil
 		},
 	}
@ -483,25 +493,21 @@ func TestPlanner_MakeTable_Ordering(t *testing.T) {
 		expectedPlan := &Plan{}
 		parallelize := expectedPlan.graph.Add(&Parallelize{id: "parallelize"})
 		compat := expectedPlan.graph.Add(&ColumnCompat{id: "compat", Source: types.ColumnTypeMetadata, Destination: types.ColumnTypeMetadata, Collision: types.ColumnTypeLabel})
-		merge := expectedPlan.graph.Add(&Merge{id: "merge"})
-		topK1 := expectedPlan.graph.Add(&TopK{id: "topk1", SortBy: &ColumnExpr{Ref: types.ColumnRef{Column: "timestamp", Type: types.ColumnTypeBuiltin}}, Ascending: true})
-		topK2 := expectedPlan.graph.Add(&TopK{id: "topk2", SortBy: &ColumnExpr{Ref: types.ColumnRef{Column: "timestamp", Type: types.ColumnTypeBuiltin}}, Ascending: true})
-		scan1 := expectedPlan.graph.Add(&DataObjScan{id: "scan1", Location: "obj1", Section: 3, StreamIDs: []int64{1, 2}})
-		scan2 := expectedPlan.graph.Add(&DataObjScan{id: "scan2", Location: "obj2", Section: 1, StreamIDs: []int64{3, 4}})
-		scan3 := expectedPlan.graph.Add(&DataObjScan{id: "scan3", Location: "obj3", Section: 2, StreamIDs: []int64{5, 1}})
-		scan4 := expectedPlan.graph.Add(&DataObjScan{id: "scan4", Location: "obj3", Section: 3, StreamIDs: []int64{5, 1}})
+		scanSet := expectedPlan.graph.Add(&ScanSet{
+			id: "scanset",
+
+			// Targets should be added in the order of the scan timestamps
+			// ASC => oldest to newest
+			Targets: []*ScanTarget{
+				{Type: ScanTypeDataObject, DataObject: &DataObjScan{id: "scan4", Location: "obj3", Section: 3, StreamIDs: []int64{5, 1}}},
+				{Type: ScanTypeDataObject, DataObject: &DataObjScan{id: "scan3", Location: "obj3", Section: 2, StreamIDs: []int64{5, 1}}},
+				{Type: ScanTypeDataObject, DataObject: &DataObjScan{id: "scan2", Location: "obj2", Section: 1, StreamIDs: []int64{3, 4}}},
+				{Type: ScanTypeDataObject, DataObject: &DataObjScan{id: "scan1", Location: "obj1", Section: 3, StreamIDs: []int64{1, 2}}},
+			},
+		})

 		_ = expectedPlan.graph.AddEdge(dag.Edge[Node]{Parent: parallelize, Child: compat})
-		_ = expectedPlan.graph.AddEdge(dag.Edge[Node]{Parent: compat, Child: merge})
-		_ = expectedPlan.graph.AddEdge(dag.Edge[Node]{Parent: merge, Child: topK1})
-		_ = expectedPlan.graph.AddEdge(dag.Edge[Node]{Parent: merge, Child: topK2})
-
-		// Sort merges should be added in the order of the scan timestamps
-		// ASC => oldest to newest
-		_ = expectedPlan.graph.AddEdge(dag.Edge[Node]{Parent: topK1, Child: scan3})
-		_ = expectedPlan.graph.AddEdge(dag.Edge[Node]{Parent: topK1, Child: scan4})
-		_ = expectedPlan.graph.AddEdge(dag.Edge[Node]{Parent: topK2, Child: scan1})
-		_ = expectedPlan.graph.AddEdge(dag.Edge[Node]{Parent: topK2, Child: scan2})
+		_ = expectedPlan.graph.AddEdge(dag.Edge[Node]{Parent: compat, Child: scanSet})

 		actual := PrintAsTree(plan)
 		expected := PrintAsTree(expectedPlan)
@ -521,24 +527,20 @@ func TestPlanner_MakeTable_Ordering(t *testing.T) {
 		expectedPlan := &Plan{}
 		parallelize := expectedPlan.graph.Add(&Parallelize{id: "parallelize"})
 		compat := expectedPlan.graph.Add(&ColumnCompat{id: "compat", Source: types.ColumnTypeMetadata, Destination: types.ColumnTypeMetadata, Collision: types.ColumnTypeLabel})
-		merge := expectedPlan.graph.Add(&Merge{id: "merge"})
-		topK1 := expectedPlan.graph.Add(&TopK{id: "topk1", SortBy: &ColumnExpr{Ref: types.ColumnRef{Column: "timestamp", Type: types.ColumnTypeBuiltin}}, Ascending: false})
-		topK2 := expectedPlan.graph.Add(&TopK{id: "topk2", SortBy: &ColumnExpr{Ref: types.ColumnRef{Column: "timestamp", Type: types.ColumnTypeBuiltin}}, Ascending: false})
-		scan1 := expectedPlan.graph.Add(&DataObjScan{id: "scan1", Location: "obj1", Section: 3, StreamIDs: []int64{1, 2}})
-		scan2 := expectedPlan.graph.Add(&DataObjScan{id: "scan2", Location: "obj2", Section: 1, StreamIDs: []int64{3, 4}})
-		scan3 := expectedPlan.graph.Add(&DataObjScan{id: "scan3", Location: "obj3", Section: 2, StreamIDs: []int64{5, 1}})
-		scan4 := expectedPlan.graph.Add(&DataObjScan{id: "scan4", Location: "obj3", Section: 3, StreamIDs: []int64{5, 1}})
+		scanSet := expectedPlan.graph.Add(&ScanSet{
+			id: "scanset",
+
+			// Targets should be added in the order of the scan timestamps
+			Targets: []*ScanTarget{
+				{Type: ScanTypeDataObject, DataObject: &DataObjScan{id: "scan1", Location: "obj1", Section: 3, StreamIDs: []int64{1, 2}}},
+				{Type: ScanTypeDataObject, DataObject: &DataObjScan{id: "scan2", Location: "obj2", Section: 1, StreamIDs: []int64{3, 4}}},
+				{Type: ScanTypeDataObject, DataObject: &DataObjScan{id: "scan3", Location: "obj3", Section: 2, StreamIDs: []int64{5, 1}}},
+				{Type: ScanTypeDataObject, DataObject: &DataObjScan{id: "scan4", Location: "obj3", Section: 3, StreamIDs: []int64{5, 1}}},
+			},
+		})

 		_ = expectedPlan.graph.AddEdge(dag.Edge[Node]{Parent: parallelize, Child: compat})
-		_ = expectedPlan.graph.AddEdge(dag.Edge[Node]{Parent: compat, Child: merge})
-		_ = expectedPlan.graph.AddEdge(dag.Edge[Node]{Parent: merge, Child: topK1})
-		_ = expectedPlan.graph.AddEdge(dag.Edge[Node]{Parent: merge, Child: topK2})
-
-		// Sort merges should be added in the order of the scan timestamps
-		_ = expectedPlan.graph.AddEdge(dag.Edge[Node]{Parent: topK1, Child: scan1})
-		_ = expectedPlan.graph.AddEdge(dag.Edge[Node]{Parent: topK1, Child: scan2})
-		_ = expectedPlan.graph.AddEdge(dag.Edge[Node]{Parent: topK2, Child: scan3})
-		_ = expectedPlan.graph.AddEdge(dag.Edge[Node]{Parent: topK2, Child: scan4})
+		_ = expectedPlan.graph.AddEdge(dag.Edge[Node]{Parent: compat, Child: scanSet})

 		actual := PrintAsTree(plan)
 		expected := PrintAsTree(expectedPlan)
@ -550,67 +552,3 @@ func TestPlanner_MakeTable_Ordering(t *testing.T) {
 		require.Equal(t, expected, actual)
 	})
 }
-
-func TestPlanner_OverlappingShardDescriptors(t *testing.T) {
-	tests := []struct {
-		name   string
-		ranges []TimeRange
-		groups int
-	}{
-		{
-			name: "Isolated groups",
-			ranges: []TimeRange{
-				{Start: time.UnixMilli(1), End: time.UnixMilli(2)},
-				{Start: time.UnixMilli(3), End: time.UnixMilli(4)},
-				{Start: time.UnixMilli(5), End: time.UnixMilli(6)},
-			},
-			groups: 3,
-		},
-		{
-			name: "Equal start and end are one group",
-			ranges: []TimeRange{
-				{Start: time.UnixMilli(1), End: time.UnixMilli(2)},
-				{Start: time.UnixMilli(2), End: time.UnixMilli(4)},
-			},
-			groups: 1,
-		},
-		{
-			name: "One range contains two isolated groups",
-			ranges: []TimeRange{
-				{Start: time.UnixMilli(1), End: time.UnixMilli(2)},
-				{Start: time.UnixMilli(3), End: time.UnixMilli(4)},
-				{Start: time.UnixMilli(0), End: time.UnixMilli(5)},
-			},
-			groups: 1,
-		},
-		{
-			name: "One range spans two isolated groups",
-			ranges: []TimeRange{
-				{Start: time.UnixMilli(0), End: time.UnixMilli(2)},
-				{Start: time.UnixMilli(4), End: time.UnixMilli(5)},
-				{Start: time.UnixMilli(2), End: time.UnixMilli(4)},
-			},
-			groups: 1,
-		},
-		{
-			name: "Real world example",
-			ranges: []TimeRange{
-				{Start: time.Date(2025, time.September, 16, 15, 0, 31, 361695211, time.UTC), End: time.Date(2025, time.September, 16, 15, 0, 46, 800186241, time.UTC)},
-				{Start: time.Date(2025, time.September, 16, 15, 0, 31, 350398040, time.UTC), End: time.Date(2025, time.September, 16, 15, 0, 31, 350398040, time.UTC)},
-				{Start: time.Date(2025, time.September, 16, 15, 0, 31, 330227014, time.UTC), End: time.Date(2025, time.September, 16, 15, 1, 3, 337407239, time.UTC)},
-			},
-			groups: 1,
-		},
-	}
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			descriptors := []FilteredShardDescriptor{}
-			for _, tr := range tt.ranges {
-				descriptors = append(descriptors, FilteredShardDescriptor{TimeRange: tr})
-			}
-
-			groups := overlappingShardDescriptors(descriptors)
-			require.Equal(t, tt.groups, len(groups))
-		})
-	}
-}
--- a/pkg/engine/internal/planner/physical/printer.go
+++ b/pkg/engine/internal/planner/physical/printer.go
@ -38,11 +38,6 @@ func toTreeNode(n Node) *tree.Node {
 		for i := range node.Predicates {
 			treeNode.Properties = append(treeNode.Properties, tree.NewProperty(fmt.Sprintf("predicate[%d]", i), false, node.Predicates[i].String()))
 		}
-	case *SortMerge:
-		treeNode.Properties = []tree.Property{
-			tree.NewProperty("column", false, node.Column),
-			tree.NewProperty("order", false, node.Order),
-		}
 	case *Projection:
 		treeNode.Properties = []tree.Property{
 			tree.NewProperty("columns", true, toAnySlice(node.Columns)...),
@ -51,8 +46,6 @@ func toTreeNode(n Node) *tree.Node {
 		for i := range node.Predicates {
 			treeNode.Properties = append(treeNode.Properties, tree.NewProperty(fmt.Sprintf("predicate[%d]", i), false, node.Predicates[i].String()))
 		}
-	case *Merge:
-		// nothing to add
 	case *Limit:
 		treeNode.Properties = []tree.Property{
 			tree.NewProperty("offset", false, node.Skip),
@ -92,6 +85,34 @@ func toTreeNode(n Node) *tree.Node {
 			tree.NewProperty("nulls_first", false, node.NullsFirst),
 			tree.NewProperty("k", false, node.K),
 		}
+	case *Parallelize:
+		// Nothing to add
+	case *ScanSet:
+		treeNode.Properties = []tree.Property{
+			tree.NewProperty("num_targets", false, len(node.Targets)),
+		}
+
+		if len(node.Projections) > 0 {
+			treeNode.Properties = append(treeNode.Properties, tree.NewProperty("projections", true, toAnySlice(node.Projections)...))
+		}
+		for i := range node.Predicates {
+			treeNode.Properties = append(treeNode.Properties, tree.NewProperty(fmt.Sprintf("predicate[%d]", i), false, node.Predicates[i].String()))
+		}
+
+		for _, target := range node.Targets {
+			properties := []tree.Property{
+				tree.NewProperty("type", false, target.Type.String()),
+			}
+
+			switch target.Type {
+			case ScanTypeDataObject:
+				// Create a child node to extract the properties of the target.
+				childNode := toTreeNode(target.DataObject)
+				properties = append(properties, childNode.Properties...)
+			}
+
+			treeNode.AddComment("@target", "", properties)
+		}
 	}
 	return treeNode
 }
--- a/pkg/engine/internal/planner/physical/printer_test.go
+++ b/pkg/engine/internal/planner/physical/printer_test.go
@ -12,13 +12,17 @@ func TestPrinter(t *testing.T) {

 		limit := p.graph.Add(&Limit{id: "limit"})
 		filter := p.graph.Add(&Filter{id: "filter"})
-		merge := p.graph.Add(&SortMerge{id: "merge"})
-		scan1 := p.graph.Add(&DataObjScan{id: "scan1"})
-		scan2 := p.graph.Add(&DataObjScan{id: "scan2"})
+		scanSet := p.graph.Add(&ScanSet{
+			id: "set",
+
+			Targets: []*ScanTarget{
+				{Type: ScanTypeDataObject, DataObject: &DataObjScan{}},
+				{Type: ScanTypeDataObject, DataObject: &DataObjScan{}},
+			},
+		})
+
 		_ = p.graph.AddEdge(dag.Edge[Node]{Parent: limit, Child: filter})
-		_ = p.graph.AddEdge(dag.Edge[Node]{Parent: filter, Child: merge})
-		_ = p.graph.AddEdge(dag.Edge[Node]{Parent: merge, Child: scan1})
-		_ = p.graph.AddEdge(dag.Edge[Node]{Parent: merge, Child: scan2})
+		_ = p.graph.AddEdge(dag.Edge[Node]{Parent: filter, Child: scanSet})

 		repr := PrintAsTree(p)
 		t.Log("\n" + repr)
--- a/pkg/engine/internal/planner/physical/scanset.go
+++ b/pkg/engine/internal/planner/physical/scanset.go
@ -0,0 +1,70 @@
+package physical
+
+import (
+	"fmt"
+)
+
+// ScanTarget represents a target of a [ScanSet].
+type ScanTarget struct {
+	Type ScanType
+
+	// DataObj is non-nil if Type is [ScanTypeDataObject]. Despite DataObjScan
+	// implementing [Node], the value is not inserted into the graph as a node.
+	DataObject *DataObjScan
+}
+
+// ScanType represents the data being scanned in a target of a [ScanSet].
+type ScanType int
+
+const (
+	ScanTypeInvalid ScanType = iota
+	ScanTypeDataObject
+)
+
+// String returns a string representation of the scan type.
+func (ty ScanType) String() string {
+	switch ty {
+	case ScanTypeInvalid:
+		return "ScanTypeInvalid"
+	case ScanTypeDataObject:
+		return "ScanTypeDataObject"
+	default:
+		return fmt.Sprintf("ScanType(%d)", ty)
+	}
+}
+
+// ScanSet represents a physical plan operation for reading data from targets.
+type ScanSet struct {
+	id string
+
+	// Targets to scan.
+	Targets []*ScanTarget
+
+	// Projections are used to limit the columns that are read to the ones
+	// provided in the column expressions to reduce the amount of data that
+	// needs to be processed.
+	Projections []ColumnExpression
+
+	// Predicates are used to filter rows to reduce the amount of rows that are
+	// returned. Predicates would almost always contain a time range filter to
+	// only read the logs for the requested time range.
+	Predicates []Expression
+}
+
+// ID returns a string that uniquely identifies the node in the plan.
+func (s *ScanSet) ID() string {
+	if s.id == "" {
+		return fmt.Sprintf("%p", s)
+	}
+	return s.id
+}
+
+// Type returns [NodeTypeScanSet].
+func (s *ScanSet) Type() NodeType {
+	return NodeTypeScanSet
+}
+
+// Accept dispatches s to the provided [Visitor] v.
+func (s *ScanSet) Accept(v Visitor) error {
+	return v.VisitScanSet(s)
+}
--- a/pkg/engine/internal/planner/physical/sort_order.go
+++ b/pkg/engine/internal/planner/physical/sort_order.go
@ -0,0 +1,23 @@
+package physical
+
+type SortOrder uint8
+
+const (
+	UNSORTED SortOrder = iota
+	ASC
+	DESC
+)
+
+// String returns the string representation of the [SortOrder].
+func (o SortOrder) String() string {
+	switch o {
+	case UNSORTED:
+		return "UNSORTED"
+	case ASC:
+		return "ASC"
+	case DESC:
+		return "DESC"
+	default:
+		return "UNDEFINED"
+	}
+}
--- a/pkg/engine/internal/planner/physical/sortmerge.go
+++ b/pkg/engine/internal/planner/physical/sortmerge.go
@ -1,63 +0,0 @@
-package physical
-
-import "fmt"
-
-type SortOrder uint8
-
-const (
-	UNSORTED SortOrder = iota
-	ASC
-	DESC
-)
-
-// String returns the string representation of the [SortOrder].
-func (o SortOrder) String() string {
-	switch o {
-	case UNSORTED:
-		return "UNSORTED"
-	case ASC:
-		return "ASC"
-	case DESC:
-		return "DESC"
-	default:
-		return "UNDEFINED"
-	}
-}
-
-// SortMerge represents a sort+merge operation in the physical plan. It
-// performs sorting of data based on the specified Column and Order direction.
-type SortMerge struct {
-	id string
-
-	// Column defines the column expression by which the rows should be sorted.
-	// This is almost always the timestamp column, because it is the column
-	// by which the results of the DataObjScan node are sorted. This allows
-	// for sorting and merging multiple already sorted inputs from the DataObjScan
-	// without being a pipeline breaker.
-	Column ColumnExpression
-	// Order defines whether the column should be sorted in ascending or
-	// descending order. Must match the read direction of the DataObjScan that
-	// feeds into the SortMerge.
-	Order SortOrder
-}
-
-// ID implements the [Node] interface.
-// Returns a string that uniquely identifies the node in the plan.
-func (m *SortMerge) ID() string {
-	if m.id == "" {
-		return fmt.Sprintf("%p", m)
-	}
-	return m.id
-}
-
-// Type implements the [Node] interface.
-// Returns the type of the node.
-func (*SortMerge) Type() NodeType {
-	return NodeTypeSortMerge
-}
-
-// Accept implements the [Node] interface.
-// Dispatches itself to the provided [Visitor] v
-func (m *SortMerge) Accept(v Visitor) error {
-	return v.VisitSortMerge(m)
-}
--- a/pkg/engine/internal/planner/physical/visitor.go
+++ b/pkg/engine/internal/planner/physical/visitor.go
@ -6,15 +6,14 @@ package physical
 // plan.
 type Visitor interface {
 	VisitDataObjScan(*DataObjScan) error
-	VisitSortMerge(*SortMerge) error
 	VisitProjection(*Projection) error
 	VisitRangeAggregation(*RangeAggregation) error
 	VisitFilter(*Filter) error
-	VisitMerge(*Merge) error
 	VisitLimit(*Limit) error
 	VisitVectorAggregation(*VectorAggregation) error
 	VisitParse(*ParseNode) error
 	VisitCompat(*ColumnCompat) error
 	VisitTopK(*TopK) error
 	VisitParallelize(*Parallelize) error
+	VisitScanSet(*ScanSet) error
 }
--- a/pkg/engine/internal/planner/physical/visitor_test.go
+++ b/pkg/engine/internal/planner/physical/visitor_test.go
@ -14,13 +14,12 @@ type nodeCollectVisitor struct {
 	onVisitDataObjScan       func(*DataObjScan) error
 	onVisitFilter            func(*Filter) error
 	onVisitLimit             func(*Limit) error
-	onVisitSortMerge         func(*SortMerge) error
-	onVisitMerge             func(*Merge) error
 	onVisitProjection        func(*Projection) error
 	onVisitRangeAggregation  func(*RangeAggregation) error
 	onVisitVectorAggregation func(*VectorAggregation) error
 	onVisitParse             func(*ParseNode) error
 	onVisitParallelize       func(*Parallelize) error
+	onVisitScanSet           func(*ScanSet) error
 }

 func (v *nodeCollectVisitor) VisitDataObjScan(n *DataObjScan) error {
@ -55,23 +54,6 @@ func (v *nodeCollectVisitor) VisitProjection(n *Projection) error {
 	return nil
 }

-func (v *nodeCollectVisitor) VisitSortMerge(n *SortMerge) error {
-	if v.onVisitSortMerge != nil {
-		return v.onVisitSortMerge(n)
-	}
-	v.visited = append(v.visited, fmt.Sprintf("%s.%s", n.Type().String(), n.ID()))
-	return nil
-}
-
-func (v *nodeCollectVisitor) VisitMerge(n *Merge) error {
-	if v.onVisitMerge != nil {
-		return v.onVisitMerge(n)
-	}
-
-	v.visited = append(v.visited, fmt.Sprintf("%s.%s", n.Type().String(), n.ID()))
-	return nil
-}
-
 func (v *nodeCollectVisitor) VisitRangeAggregation(n *RangeAggregation) error {
 	if v.onVisitRangeAggregation != nil {
 		return v.onVisitRangeAggregation(n)
@ -114,3 +96,11 @@ func (v *nodeCollectVisitor) VisitParallelize(n *Parallelize) error {
 	v.visited = append(v.visited, fmt.Sprintf("%s.%s", n.Type().String(), n.ID()))
 	return nil
 }
+
+func (v *nodeCollectVisitor) VisitScanSet(n *ScanSet) error {
+	if v.onVisitScanSet != nil {
+		return v.onVisitScanSet(n)
+	}
+	v.visited = append(v.visited, fmt.Sprintf("%s.%s", n.Type().String(), n.ID()))
+	return nil
+}
--- a/pkg/logql/bench/store_dataobj_v2_engine.go
+++ b/pkg/logql/bench/store_dataobj_v2_engine.go
@ -29,9 +29,10 @@ type DataObjV2EngineStore struct {
 func NewDataObjV2EngineStore(dir string, tenantID string) (*DataObjV2EngineStore, error) {
 	storageDir := filepath.Join(dir, storageDir)
 	return dataobjV2StoreWithOpts(storageDir, tenantID, engine.Config{
-		Enable:      true,
-		BatchSize:   512,
-		RangeConfig: rangeio.DefaultConfig,
+		Enable:             true,
+		BatchSize:          512,
+		RangeConfig:        rangeio.DefaultConfig,
+		MergePrefetchCount: 8,
 	}, metastore.Config{
 		IndexStoragePrefix: "index/v0",
 	})