From 6efbb873c720b3f1af6a8a401fc738773663bb4e Mon Sep 17 00:00:00 2001 From: Julien Pivotto <291750+roidelapluie@users.noreply.github.com> Date: Fri, 12 Dec 2025 14:48:29 +0100 Subject: [PATCH 1/2] promql: Fix collision error with delayed name removal for non-overlapping series Signed-off-by: Julien Pivotto <291750+roidelapluie@users.noreply.github.com> --- promql/engine.go | 99 ++++++++++++++++++- .../testdata/name_label_dropping.test | 10 ++ 2 files changed, 104 insertions(+), 5 deletions(-) diff --git a/promql/engine.go b/promql/engine.go index 6ba6008b19..37c4e12cd9 100644 --- a/promql/engine.go +++ b/promql/engine.go @@ -1175,7 +1175,7 @@ func (ev *evaluator) Eval(ctx context.Context, expr parser.Expr) (v parser.Value v, ws = ev.eval(ctx, expr) if ev.enableDelayedNameRemoval { - ev.cleanupMetricLabels(v) + v = ev.cleanupMetricLabels(v) } return v, ws, nil } @@ -3832,7 +3832,7 @@ func (*evaluator) aggregationCountValues(e *parser.AggregateExpr, grouping []str return enh.Out, nil } -func (ev *evaluator) cleanupMetricLabels(v parser.Value) { +func (ev *evaluator) cleanupMetricLabels(v parser.Value) parser.Value { if v.Type() == parser.ValueTypeMatrix { mat := v.(Matrix) for i := range mat { @@ -3840,9 +3840,7 @@ func (ev *evaluator) cleanupMetricLabels(v parser.Value) { mat[i].Metric = mat[i].Metric.DropReserved(schema.IsMetadataLabel) } } - if mat.ContainsSameLabelset() { - ev.errorf("vector cannot contain metrics with the same labelset") - } + return ev.mergeSeriesWithSameLabelset(mat) } else if v.Type() == parser.ValueTypeVector { vec := v.(Vector) for i := range vec { @@ -3853,7 +3851,98 @@ func (ev *evaluator) cleanupMetricLabels(v parser.Value) { if vec.ContainsSameLabelset() { ev.errorf("vector cannot contain metrics with the same labelset") } + return vec + } + return v +} + +// mergeSeriesWithSameLabelset merges series in a matrix that have the same labelset +// after __name__ label removal. This happens when delayed name removal is enabled and +// operations like OR combine series that originally had different names but end up +// with the same labelset after dropping the name. If series with the same labelset +// have overlapping timestamps, an error is returned. +func (ev *evaluator) mergeSeriesWithSameLabelset(mat Matrix) Matrix { + if len(mat) <= 1 { + return mat + } + + // Group series by their labelset hash. + seriesByHash := make(map[uint64][]int) + for i := range mat { + hash := mat[i].Metric.Hash() + seriesByHash[hash] = append(seriesByHash[hash], i) + } + + // Check if any merging is needed. + needsMerge := false + for _, indices := range seriesByHash { + if len(indices) > 1 { + needsMerge = true + break + } + } + + if !needsMerge { + return mat + } + + // Merge series with the same labelset. + merged := make(Matrix, 0, len(seriesByHash)) + for _, indices := range seriesByHash { + base := mat[indices[0]] + + if len(indices) == 1 { + // No collision, add as-is. + merged = append(merged, base) + continue + } + + // Multiple series with the same labelset - check for overlaps and merge. + // Build a set of timestamps to detect overlaps. + timestamps := make(map[int64]struct{}, len(base.Floats)+len(base.Histograms)) + for _, p := range base.Floats { + timestamps[p.T] = struct{}{} + } + for _, p := range base.Histograms { + timestamps[p.T] = struct{}{} + } + + // Merge remaining series, checking for timestamp overlaps. + for _, idx := range indices[1:] { + series := mat[idx] + + // Check floats for overlaps. + for _, p := range series.Floats { + if _, exists := timestamps[p.T]; exists { + ev.errorf("vector cannot contain metrics with the same labelset") + } + timestamps[p.T] = struct{}{} + } + // Check histograms for overlaps. + for _, p := range series.Histograms { + if _, exists := timestamps[p.T]; exists { + ev.errorf("vector cannot contain metrics with the same labelset") + } + timestamps[p.T] = struct{}{} + } + + // No overlaps, merge the samples. + base.Floats = append(base.Floats, series.Floats...) + base.Histograms = append(base.Histograms, series.Histograms...) + } + + // Sort merged samples by timestamp. + sort.Slice(base.Floats, func(i, j int) bool { + return base.Floats[i].T < base.Floats[j].T + }) + sort.Slice(base.Histograms, func(i, j int) bool { + return base.Histograms[i].T < base.Histograms[j].T + }) + + merged = append(merged, base) } + + return merged } func addToSeries(ss *Series, ts int64, f float64, h *histogram.FloatHistogram, numSteps int) { diff --git a/promql/promqltest/testdata/name_label_dropping.test b/promql/promqltest/testdata/name_label_dropping.test index 3a6f4098df..e0180c7ffe 100644 --- a/promql/promqltest/testdata/name_label_dropping.test +++ b/promql/promqltest/testdata/name_label_dropping.test @@ -126,3 +126,13 @@ eval instant at 10m sum by (__name__) (metric_total{env="3"} or rate(metric_tota # Same as above, but with reversed order. eval instant at 10m sum by (__name__) (rate(metric_total{env="3"}[5m]) or metric_total{env="1"}) metric_total 10 + +clear + +# Test delayed name removal with range queries and OR operator. +load 10m + metric_a 1 _ + metric_b 3 4 + +eval range from 0 to 20m step 10m -metric_a or -metric_b + {} -1 -4 _ From 29878f7b91cf485448ccd571c7ac3d35ec43dabc Mon Sep 17 00:00:00 2001 From: Julien Pivotto <291750+roidelapluie@users.noreply.github.com> Date: Mon, 15 Dec 2025 11:56:34 +0100 Subject: [PATCH 2/2] promql: Optimize mergeSeriesWithSameLabelset for common case Add fast path that returns early when no duplicate labelsets exist, avoiding allocations in the common case. For the merge case, simplify collision detection by checking for duplicate timestamps after sorting instead of building a timestamp map, reducing memory overhead. Signed-off-by: Julien Pivotto <291750+roidelapluie@users.noreply.github.com> --- promql/engine.go | 71 ++++++++++++++++-------------------------------- 1 file changed, 24 insertions(+), 47 deletions(-) diff --git a/promql/engine.go b/promql/engine.go index 37c4e12cd9..07fb03d66c 100644 --- a/promql/engine.go +++ b/promql/engine.go @@ -3866,6 +3866,13 @@ func (ev *evaluator) mergeSeriesWithSameLabelset(mat Matrix) Matrix { return mat } + // Fast path: check if there are any duplicate labelsets without allocating. + // This is the common case and we want to avoid allocations. + if !mat.ContainsSameLabelset() { + return mat + } + + // Slow path: there are duplicates, so we need to merge series with non-overlapping timestamps. // Group series by their labelset hash. seriesByHash := make(map[uint64][]int) for i := range mat { @@ -3873,62 +3880,20 @@ func (ev *evaluator) mergeSeriesWithSameLabelset(mat Matrix) Matrix { seriesByHash[hash] = append(seriesByHash[hash], i) } - // Check if any merging is needed. - needsMerge := false - for _, indices := range seriesByHash { - if len(indices) > 1 { - needsMerge = true - break - } - } - - if !needsMerge { - return mat - } - // Merge series with the same labelset. merged := make(Matrix, 0, len(seriesByHash)) for _, indices := range seriesByHash { - base := mat[indices[0]] - if len(indices) == 1 { // No collision, add as-is. - merged = append(merged, base) + merged = append(merged, mat[indices[0]]) continue } - // Multiple series with the same labelset - check for overlaps and merge. - // Build a set of timestamps to detect overlaps. - timestamps := make(map[int64]struct{}, len(base.Floats)+len(base.Histograms)) - for _, p := range base.Floats { - timestamps[p.T] = struct{}{} - } - for _, p := range base.Histograms { - timestamps[p.T] = struct{}{} - } - - // Merge remaining series, checking for timestamp overlaps. + // Multiple series with the same labelset - merge all samples. + base := mat[indices[0]] for _, idx := range indices[1:] { - series := mat[idx] - - // Check floats for overlaps. - for _, p := range series.Floats { - if _, exists := timestamps[p.T]; exists { - ev.errorf("vector cannot contain metrics with the same labelset") - } - timestamps[p.T] = struct{}{} - } - // Check histograms for overlaps. - for _, p := range series.Histograms { - if _, exists := timestamps[p.T]; exists { - ev.errorf("vector cannot contain metrics with the same labelset") - } - timestamps[p.T] = struct{}{} - } - - // No overlaps, merge the samples. - base.Floats = append(base.Floats, series.Floats...) - base.Histograms = append(base.Histograms, series.Histograms...) + base.Floats = append(base.Floats, mat[idx].Floats...) + base.Histograms = append(base.Histograms, mat[idx].Histograms...) } // Sort merged samples by timestamp. @@ -3939,6 +3904,18 @@ func (ev *evaluator) mergeSeriesWithSameLabelset(mat Matrix) Matrix { return base.Histograms[i].T < base.Histograms[j].T }) + // Check for duplicate timestamps in sorted samples. + for i := 1; i < len(base.Floats); i++ { + if base.Floats[i].T == base.Floats[i-1].T { + ev.errorf("vector cannot contain metrics with the same labelset") + } + } + for i := 1; i < len(base.Histograms); i++ { + if base.Histograms[i].T == base.Histograms[i-1].T { + ev.errorf("vector cannot contain metrics with the same labelset") + } + } + merged = append(merged, base) }