Sharding optimizations (#10101)

A few bugfixes and more sharding optimizations
* fix bug on `<aggr> by|without ()` groupings which removed the grouping
while downstreaming
* shardable implementations for `max+min`, operation specific merge
strategies which enable many more types of sharded requests, even when
label-reduction is performed at edge.
pull/10271/head
Owen Diehl 2 years ago committed by GitHub
parent 42b8a6cbca
commit 9097f1ff42
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 270
      pkg/logql/shardmapper.go
  2. 194
      pkg/logql/shardmapper_test.go
  3. 136
      pkg/logql/syntax/ast.go
  4. 4
      pkg/logql/syntax/ast_test.go
  5. 2
      pkg/querier/queryrange/querysharding_test.go

@ -180,96 +180,125 @@ func (m ShardMapper) mapSampleExpr(expr syntax.SampleExpr, r *downstreamRecorder
return head, bytesPerShard, nil
}
// turn a vector aggr into a wrapped+sharded variant,
// used as a subroutine in mapping
func (m ShardMapper) wrappedShardedVectorAggr(expr *syntax.VectorAggregationExpr, r *downstreamRecorder) (*syntax.VectorAggregationExpr, uint64, error) {
sharded, bytesPerShard, err := m.mapSampleExpr(expr, r)
if err != nil {
return nil, 0, err
}
return &syntax.VectorAggregationExpr{
Left: sharded,
Grouping: expr.Grouping,
Params: expr.Params,
Operation: expr.Operation,
}, bytesPerShard, nil
}
// technically, std{dev,var} are also parallelizable if there is no cross-shard merging
// in descendent nodes in the AST. This optimization is currently avoided for simplicity.
func (m ShardMapper) mapVectorAggregationExpr(expr *syntax.VectorAggregationExpr, r *downstreamRecorder) (syntax.SampleExpr, uint64, error) {
// if this AST contains unshardable operations, don't shard this at this level,
// but attempt to shard a child node.
if !expr.Shardable() {
subMapped, bytesPerShard, err := m.Map(expr.Left, r)
if err != nil {
return nil, 0, err
}
sampleExpr, ok := subMapped.(syntax.SampleExpr)
if !ok {
return nil, 0, badASTMapping(subMapped)
}
if expr.Shardable() {
return &syntax.VectorAggregationExpr{
Left: sampleExpr,
Grouping: expr.Grouping,
Params: expr.Params,
Operation: expr.Operation,
}, bytesPerShard, nil
switch expr.Operation {
}
case syntax.OpTypeSum:
// sum(x) -> sum(sum(x, shard=1) ++ sum(x, shard=2)...)
return m.wrappedShardedVectorAggr(expr, r)
switch expr.Operation {
case syntax.OpTypeSum:
// sum(x) -> sum(sum(x, shard=1) ++ sum(x, shard=2)...)
sharded, bytesPerShard, err := m.mapSampleExpr(expr, r)
if err != nil {
return nil, 0, err
}
return &syntax.VectorAggregationExpr{
Left: sharded,
Grouping: expr.Grouping,
Params: expr.Params,
Operation: expr.Operation,
}, bytesPerShard, nil
case syntax.OpTypeAvg:
// avg(x) -> sum(x)/count(x)
lhs, lhsBytesPerShard, err := m.mapVectorAggregationExpr(&syntax.VectorAggregationExpr{
Left: expr.Left,
Grouping: expr.Grouping,
Operation: syntax.OpTypeSum,
}, r)
if err != nil {
return nil, 0, err
}
rhs, rhsBytesPerShard, err := m.mapVectorAggregationExpr(&syntax.VectorAggregationExpr{
Left: expr.Left,
Grouping: expr.Grouping,
Operation: syntax.OpTypeCount,
}, r)
if err != nil {
return nil, 0, err
}
case syntax.OpTypeMin, syntax.OpTypeMax:
if syntax.ReducesLabels(expr) {
// skip sharding optimizations at this level. If labels are reduced,
// the same series may exist on multiple shards and must be aggregated
// together before a max|min is applied
break
}
// max(x) -> max(max(x, shard=1) ++ max(x, shard=2)...)
// min(x) -> min(min(x, shard=1) ++ min(x, shard=2)...)
return m.wrappedShardedVectorAggr(expr, r)
case syntax.OpTypeAvg:
// avg(x) -> sum(x)/count(x), which is parallelizable
lhs, lhsBytesPerShard, err := m.mapVectorAggregationExpr(&syntax.VectorAggregationExpr{
Left: expr.Left,
Grouping: expr.Grouping,
Operation: syntax.OpTypeSum,
}, r)
if err != nil {
return nil, 0, err
}
// We take the maximum bytes per shard of both sides of the operation
bytesPerShard := uint64(math.Max(int(lhsBytesPerShard), int(rhsBytesPerShard)))
rhs, rhsBytesPerShard, err := m.mapVectorAggregationExpr(&syntax.VectorAggregationExpr{
Left: expr.Left,
Grouping: expr.Grouping,
Operation: syntax.OpTypeCount,
}, r)
if err != nil {
return nil, 0, err
}
return &syntax.BinOpExpr{
SampleExpr: lhs,
RHS: rhs,
Op: syntax.OpTypeDiv,
}, bytesPerShard, nil
// We take the maximum bytes per shard of both sides of the operation
bytesPerShard := uint64(math.Max(int(lhsBytesPerShard), int(rhsBytesPerShard)))
return &syntax.BinOpExpr{
SampleExpr: lhs,
RHS: rhs,
Op: syntax.OpTypeDiv,
}, bytesPerShard, nil
case syntax.OpTypeCount:
if syntax.ReducesLabels(expr) {
// skip sharding optimizations at this level. If labels are reduced,
// the same series may exist on multiple shards and must be aggregated
// together before a count is applied
break
}
case syntax.OpTypeCount:
// count(x) -> sum(count(x, shard=1) ++ count(x, shard=2)...)
sharded, bytesPerShard, err := m.mapSampleExpr(expr, r)
if err != nil {
return nil, 0, err
}
return &syntax.VectorAggregationExpr{
Left: sharded,
Grouping: expr.Grouping,
Operation: syntax.OpTypeSum,
}, bytesPerShard, nil
default:
// this should not be reachable. If an operation is shardable it should
// have an optimization listed.
level.Warn(util_log.Logger).Log(
"msg", "unexpected operation which appears shardable, ignoring",
"operation", expr.Operation,
)
exprStats, err := m.shards.GetStats(expr)
if err != nil {
return nil, 0, err
// count(x) -> sum(count(x, shard=1) ++ count(x, shard=2)...)
sharded, bytesPerShard, err := m.mapSampleExpr(expr, r)
if err != nil {
return nil, 0, err
}
return &syntax.VectorAggregationExpr{
Left: sharded,
Grouping: expr.Grouping,
Operation: syntax.OpTypeSum,
}, bytesPerShard, nil
default:
// this should not be reachable. If an operation is shardable it should
// have an optimization listed. Nonetheless, we log this as a warning
// and return the original expression unsharded.
level.Warn(util_log.Logger).Log(
"msg", "unexpected operation which appears shardable, ignoring",
"operation", expr.Operation,
)
exprStats, err := m.shards.GetStats(expr)
if err != nil {
return nil, 0, err
}
return expr, exprStats.Bytes, nil
}
return expr, exprStats.Bytes, nil
}
// if this AST contains unshardable operations, don't shard this at this level,
// but attempt to shard a child node.
subMapped, bytesPerShard, err := m.Map(expr.Left, r)
if err != nil {
return nil, 0, err
}
sampleExpr, ok := subMapped.(syntax.SampleExpr)
if !ok {
return nil, 0, badASTMapping(subMapped)
}
return &syntax.VectorAggregationExpr{
Left: sampleExpr,
Grouping: expr.Grouping,
Params: expr.Params,
Operation: expr.Operation,
}, bytesPerShard, nil
}
func (m ShardMapper) mapLabelReplaceExpr(expr *syntax.LabelReplaceExpr, r *downstreamRecorder) (syntax.SampleExpr, uint64, error) {
@ -283,52 +312,77 @@ func (m ShardMapper) mapLabelReplaceExpr(expr *syntax.LabelReplaceExpr, r *downs
}
func (m ShardMapper) mapRangeAggregationExpr(expr *syntax.RangeAggregationExpr, r *downstreamRecorder) (syntax.SampleExpr, uint64, error) {
if hasLabelModifier(expr) {
// if an expr can modify labels this means multiple shards can return the same labelset.
// When this happens the merge strategy needs to be different from a simple concatenation.
// For instance for rates we need to sum data from different shards but same series.
// Since we currently support only concatenation as merge strategy, we skip those queries.
if !expr.Shardable() {
exprStats, err := m.shards.GetStats(expr)
if err != nil {
return nil, 0, err
}
return expr, exprStats.Bytes, nil
}
switch expr.Operation {
case syntax.OpRangeTypeCount, syntax.OpRangeTypeRate, syntax.OpRangeTypeBytesRate, syntax.OpRangeTypeBytes:
// count_over_time(x) -> count_over_time(x, shard=1) ++ count_over_time(x, shard=2)...
// rate(x) -> rate(x, shard=1) ++ rate(x, shard=2)...
// same goes for bytes_rate and bytes_over_time
return m.mapSampleExpr(expr, r)
case syntax.OpRangeTypeCount, syntax.OpRangeTypeRate, syntax.OpRangeTypeBytes, syntax.OpRangeTypeBytesRate, syntax.OpRangeTypeSum, syntax.OpRangeTypeMax, syntax.OpRangeTypeMin:
// if the expr can reduce labels, it can cause the same labelset to
// exist on separate shards and we'll need to merge the results
// accordingly. If it does not reduce labels and has no special grouping
// aggregation, we can shard it as normal via concatenation.
potentialConflict := syntax.ReducesLabels(expr)
if !potentialConflict && (expr.Grouping == nil || expr.Grouping.Noop()) {
return m.mapSampleExpr(expr, r)
}
// These functions require a different merge strategy than the default
// concatentation.
// This is because the same label sets may exist on multiple shards when label-reducing parsing is applied or when
// grouping by some subset of the labels. In this case, the resulting vector may have multiple values for the same
// series and we need to combine them appropriately given a particular operation.
mergeMap := map[string]string{
// all these may be summed
syntax.OpRangeTypeCount: syntax.OpTypeSum,
syntax.OpRangeTypeRate: syntax.OpTypeSum,
syntax.OpRangeTypeBytes: syntax.OpTypeSum,
syntax.OpRangeTypeBytesRate: syntax.OpTypeSum,
syntax.OpRangeTypeSum: syntax.OpTypeSum,
// min & max require taking the min|max of the shards
syntax.OpRangeTypeMin: syntax.OpTypeMin,
syntax.OpRangeTypeMax: syntax.OpTypeMax,
}
// range aggregation groupings default to `without ()` behavior
// so we explicitly set the wrapping vector aggregation to this
// for parity when it's not explicitly set
grouping := expr.Grouping
if grouping == nil {
grouping = &syntax.Grouping{Without: true}
}
mapped, bytes, err := m.mapSampleExpr(expr, r)
// max_over_time(_) -> max without() (max_over_time(_) ++ max_over_time(_)...)
// max_over_time(_) by (foo) -> max by (foo) (max_over_time(_) by (foo) ++ max_over_time(_) by (foo)...)
merger, ok := mergeMap[expr.Operation]
if !ok {
return nil, 0, fmt.Errorf(
"error while finding merge operation for %s", expr.Operation,
)
}
return &syntax.VectorAggregationExpr{
Left: mapped,
Grouping: grouping,
Operation: merger,
}, bytes, err
default:
// This part of the query is not shardable, so the bytesPerShard is the bytes for all the log matchers in expr
// don't shard if there's not an appropriate optimization
exprStats, err := m.shards.GetStats(expr)
if err != nil {
return nil, 0, err
}
return expr, exprStats.Bytes, nil
}
}
// hasLabelModifier tells if an expression contains pipelines that can modify stream labels
// parsers introduce new labels but does not alter original one for instance.
func hasLabelModifier(expr *syntax.RangeAggregationExpr) bool {
switch ex := expr.Left.Left.(type) {
case *syntax.MatchersExpr:
return false
case *syntax.PipelineExpr:
for _, p := range ex.MultiStages {
if _, ok := p.(*syntax.LabelFmtExpr); ok {
return true
}
}
}
return false
}
func badASTMapping(got syntax.Expr) error {
return fmt.Errorf("bad AST mapping: expected SampleExpr, but got (%T)", got)
}

@ -154,30 +154,91 @@ func TestMappingStrings(t *testing.T) {
{
in: `sum(max(rate({foo="bar"}[5m])))`,
out: `sum(max(
downstream<rate({foo="bar"}[5m]), shard=0_of_2>
++ downstream<rate({foo="bar"}[5m]), shard=1_of_2>
downstream<max(rate({foo="bar"}[5m])), shard=0_of_2>
++ downstream<max(rate({foo="bar"}[5m])), shard=1_of_2>
))`,
},
{
in: `sum(max(rate({foo="bar"} | json | label_format foo=bar [5m])))`,
out: `sum(max(rate({foo="bar"} | json | label_format foo=bar [5m])))`,
in: `max without (env) (rate({foo="bar"}[5m]))`,
out: `max without (env) (
downstream<max without (env)(rate({foo="bar"}[5m])), shard=0_of_2> ++ downstream<max without (env)(rate({foo="bar"}[5m])), shard=1_of_2>
)`,
},
{
in: `sum(max(rate({foo="bar"} | json | label_format foo=bar [5m])))`,
out: `sum(
max(
sum without() (
downstream<rate({foo="bar"}|json|label_formatfoo=bar[5m]),shard=0_of_2>
++
downstream<rate({foo="bar"}|json|label_formatfoo=bar[5m]),shard=1_of_2>
)
)
)`,
},
{
in: `max(sum by (abc) (rate({foo="bar"} | json | label_format bazz=buzz [5m])))`,
out: `max(
sum by (abc) (
downstream<sumby(abc)(rate({foo="bar"}|json|label_formatbazz=buzz[5m])),shard=0_of_2>
++
downstream<sumby(abc)(rate({foo="bar"}|json|label_formatbazz=buzz[5m])),shard=1_of_2>
)
)`,
},
{
in: `rate({foo="bar"} | json | label_format foo=bar [5m])`,
out: `rate({foo="bar"} | json | label_format foo=bar [5m])`,
in: `rate({foo="bar"} | json | label_format foo=bar [5m])`,
out: `sum without()(
downstream<rate({foo="bar"}|json|label_formatfoo=bar[5m]),shard=0_of_2>
++
downstream<rate({foo="bar"}|json|label_formatfoo=bar[5m]),shard=1_of_2>
)`,
},
{
in: `count(rate({foo="bar"} | json [5m]))`,
out: `count(
downstream<rate({foo="bar"} | json [5m]), shard=0_of_2>
++ downstream<rate({foo="bar"} | json [5m]), shard=1_of_2>
out: `sum(
downstream<count(rate({foo="bar"}|json[5m])),shard=0_of_2>
++
downstream<count(rate({foo="bar"}|json[5m])),shard=1_of_2>
)`,
},
{
in: `avg(rate({foo="bar"} | json [5m]))`,
out: `avg(
downstream<rate({foo="bar"} | json [5m]), shard=0_of_2>
++ downstream<rate({foo="bar"} | json [5m]), shard=1_of_2>
out: `(
sum(
downstream<sum(rate({foo="bar"}|json[5m])),shard=0_of_2>++downstream<sum(rate({foo="bar"}|json[5m])),shard=1_of_2>
)
/
sum(
downstream<count(rate({foo="bar"}|json[5m])),shard=0_of_2>++downstream<count(rate({foo="bar"}|json[5m])),shard=1_of_2>
)
)`,
},
{
in: `count(rate({foo="bar"} | json | keep foo [5m]))`,
out: `count(
sum without()(
downstream<rate({foo="bar"}|json|keepfoo[5m]),shard=0_of_2>
++
downstream<rate({foo="bar"}|json|keepfoo[5m]),shard=1_of_2>
)
)`,
},
{
// renaming reduces the labelset and must be reaggregated before counting
in: `count(rate({foo="bar"} | json | label_format foo=bar [5m]))`,
out: `count(
sum without() (
downstream<rate({foo="bar"}|json|label_formatfoo=bar[5m]),shard=0_of_2>
++
downstream<rate({foo="bar"}|json|label_formatfoo=bar[5m]),shard=1_of_2>
)
)`,
},
{
in: `sum without () (rate({job="foo"}[5m]))`,
out: `sumwithout()(
downstream<sumwithout()(rate({job="foo"}[5m])),shard=0_of_2>++downstream<sumwithout()(rate({job="foo"}[5m])),shard=1_of_2>
)`,
},
{
@ -223,9 +284,12 @@ func TestMappingStrings(t *testing.T) {
)`,
},
{
// Ensure we don't try to shard expressions that include label reformatting.
in: `sum(count_over_time({foo="bar"} | logfmt | label_format bar=baz | bar="buz" [5m]))`,
out: `sum(count_over_time({foo="bar"} | logfmt | label_format bar=baz | bar="buz" [5m]))`,
in: `sum(count_over_time({foo="bar"} | logfmt | label_format bar=baz | bar="buz" [5m])) by (bar)`,
out: `sum by (bar) (
downstream<sum by (bar) (count_over_time({foo="bar"}|logfmt|label_formatbar=baz|bar="buz"[5m])),shard=0_of_2>
++
downstream<sum by (bar) (count_over_time({foo="bar"}|logfmt|label_formatbar=baz|bar="buz"[5m])),shard=1_of_2>
)`,
},
{
in: `sum by (cluster) (rate({foo="bar"} [5m])) + ignoring(machine) sum by (cluster,machine) (rate({foo="bar"} [5m]))`,
@ -255,6 +319,14 @@ func TestMappingStrings(t *testing.T) {
)
)`,
},
{
in: `max_over_time({foo="ugh"} | unwrap baz [1m]) by ()`,
out: `max(
downstream<max_over_time({foo="ugh"}|unwrapbaz[1m])by(),shard=0_of_2>
++
downstream<max_over_time({foo="ugh"}|unwrapbaz[1m])by(),shard=1_of_2>
)`,
},
{
in: `avg(avg_over_time({job=~"myapps.*"} |= "stats" | json busy="utilization" | unwrap busy [5m]))`,
out: `avg(avg_over_time({job=~"myapps.*"} |= "stats" | json busy="utilization" | unwrap busy [5m]))`,
@ -554,51 +626,6 @@ func TestMapping(t *testing.T) {
},
},
},
{
in: `max without (env) (rate({foo="bar"}[5m]))`,
expr: &syntax.VectorAggregationExpr{
Grouping: &syntax.Grouping{
Without: true,
Groups: []string{"env"},
},
Operation: syntax.OpTypeMax,
Left: &ConcatSampleExpr{
DownstreamSampleExpr: DownstreamSampleExpr{
shard: &astmapper.ShardAnnotation{
Shard: 0,
Of: 2,
},
SampleExpr: &syntax.RangeAggregationExpr{
Operation: syntax.OpRangeTypeRate,
Left: &syntax.LogRange{
Left: &syntax.MatchersExpr{
Mts: []*labels.Matcher{mustNewMatcher(labels.MatchEqual, "foo", "bar")},
},
Interval: 5 * time.Minute,
},
},
},
next: &ConcatSampleExpr{
DownstreamSampleExpr: DownstreamSampleExpr{
shard: &astmapper.ShardAnnotation{
Shard: 1,
Of: 2,
},
SampleExpr: &syntax.RangeAggregationExpr{
Operation: syntax.OpRangeTypeRate,
Left: &syntax.LogRange{
Left: &syntax.MatchersExpr{
Mts: []*labels.Matcher{mustNewMatcher(labels.MatchEqual, "foo", "bar")},
},
Interval: 5 * time.Minute,
},
},
},
next: nil,
},
},
},
},
{
in: `count(rate({foo="bar"}[5m]))`,
expr: &syntax.VectorAggregationExpr{
@ -871,53 +898,6 @@ func TestMapping(t *testing.T) {
},
},
},
// sum(max) should not shard the maxes
{
in: `sum(max(rate({foo="bar"}[5m])))`,
expr: &syntax.VectorAggregationExpr{
Grouping: &syntax.Grouping{},
Operation: syntax.OpTypeSum,
Left: &syntax.VectorAggregationExpr{
Grouping: &syntax.Grouping{},
Operation: syntax.OpTypeMax,
Left: &ConcatSampleExpr{
DownstreamSampleExpr: DownstreamSampleExpr{
shard: &astmapper.ShardAnnotation{
Shard: 0,
Of: 2,
},
SampleExpr: &syntax.RangeAggregationExpr{
Operation: syntax.OpRangeTypeRate,
Left: &syntax.LogRange{
Left: &syntax.MatchersExpr{
Mts: []*labels.Matcher{mustNewMatcher(labels.MatchEqual, "foo", "bar")},
},
Interval: 5 * time.Minute,
},
},
},
next: &ConcatSampleExpr{
DownstreamSampleExpr: DownstreamSampleExpr{
shard: &astmapper.ShardAnnotation{
Shard: 1,
Of: 2,
},
SampleExpr: &syntax.RangeAggregationExpr{
Operation: syntax.OpRangeTypeRate,
Left: &syntax.LogRange{
Left: &syntax.MatchersExpr{
Mts: []*labels.Matcher{mustNewMatcher(labels.MatchEqual, "foo", "bar")},
},
Interval: 5 * time.Minute,
},
},
},
next: nil,
},
},
},
},
},
// max(count) should shard the count, but not the max
{
in: `max(count(rate({foo="bar"}[5m])))`,

@ -669,7 +669,11 @@ func newLabelFmtExpr(fmts []log.LabelFmt) *LabelFmtExpr {
}
}
func (e *LabelFmtExpr) Shardable() bool { return false }
func (e *LabelFmtExpr) Shardable() bool {
// While LabelFmt is shardable in certain cases, it is not always,
// but this is left to the shardmapper to determine
return true
}
func (e *LabelFmtExpr) Walk(f WalkFn) { f(e) }
@ -1219,28 +1223,30 @@ type Grouping struct {
func (g Grouping) String() string {
var sb strings.Builder
if g.Groups == nil {
return ""
}
if g.Without {
sb.WriteString(" without ")
} else {
sb.WriteString(" by ")
}
if len(g.Groups) > 0 {
sb.WriteString("(")
sb.WriteString(strings.Join(g.Groups, ","))
sb.WriteString(")")
}
if len(g.Groups) == 0 {
sb.WriteString("()")
}
sb.WriteString("(")
sb.WriteString(strings.Join(g.Groups, ","))
sb.WriteString(")")
return sb.String()
}
// whether grouping doesn't change the result
func (g Grouping) Noop() bool {
return len(g.Groups) == 0 && g.Without
}
// whether grouping reduces the result to a single value
// with no labels
func (g Grouping) Singleton() bool {
return len(g.Groups) == 0 && !g.Without
}
// VectorAggregationExpr all vector aggregation expressions support grouping by/without label(s),
// therefore the Grouping struct can never be nil.
type VectorAggregationExpr struct {
@ -1340,33 +1346,60 @@ func (e *VectorAggregationExpr) String() string {
params = []string{e.Left.String()}
}
}
return formatOperation(e.Operation, e.Grouping, params...)
return formatVectorOperation(e.Operation, e.Grouping, params...)
}
// impl SampleExpr
func (e *VectorAggregationExpr) Shardable() bool {
if e.Operation == OpTypeCount || e.Operation == OpTypeAvg {
if !e.Left.Shardable() {
return false
}
if !shardableOps[e.Operation] || !e.Left.Shardable() {
return false
}
switch e.Operation {
case OpTypeCount, OpTypeAvg:
// count is shardable if labels are not mutated
// otherwise distinct values can be counted twice per shard
shardable := true
e.Left.Walk(func(e interface{}) {
switch e.(type) {
// LabelParserExpr is normally shardable, but not in this case.
// TODO(owen-d): I think LabelParserExpr is shardable
// for avg, but not for count. Let's refactor to make this
// cleaner. For now I'm disallowing sharding on both.
case *LabelParserExpr:
shardable = false
case *LogfmtParserExpr:
shardable = false
}
})
// otherwise distinct values can be present in multiple shards and
// counted twice.
// avg is similar since it's remapped to sum/count.
// TODO(owen-d): this is hard to figure out; we should refactor to
// make these relationships clearer, safer, and more extensible.
shardable := !ReducesLabels(e.Left)
return shardable
case OpTypeMax, OpTypeMin:
// max(<range_aggr>) can be sharded by pushing down the max|min aggregation,
// but max(<vector_aggr>) cannot. It needs to perform the
// aggregation on the total result set, and then pick the max|min.
// For instance, `max(max_over_time)` or `max(rate)` can turn into
// `max( max(rate(shard1)) ++ max(rate(shard2)) ... etc)`,
// but you can’t do
// `max( max(sum(rate(shard1))) ++ max(sum(rate(shard2))) ... etc)`
// because it’s only taking the maximum from each shard,
// but we actually need to sum all the shards then put the max on top
if _, ok := e.Left.(*RangeAggregationExpr); ok {
return true
}
return false
case OpTypeSum:
// sum can shard & merge vector & range aggregations, but only if
// the resulting computation is commutative and associative.
// This does not apply to min & max, because while `min(min(min))`
// satisfies the above, sum( sum(min(shard1) ++ sum(min(shard2)) )
// does not
if child, ok := e.Left.(*VectorAggregationExpr); ok {
switch child.Operation {
case OpTypeMin, OpTypeMax:
return false
}
}
return true
}
return shardableOps[e.Operation] && e.Left.Shardable()
return true
}
func (e *VectorAggregationExpr) Walk(f WalkFn) {
@ -1823,7 +1856,7 @@ func (e *LiteralExpr) Value() (float64, error) {
// helper used to impl Stringer for vector and range aggregations
// nolint:interfacer
func formatOperation(op string, grouping *Grouping, params ...string) string {
func formatVectorOperation(op string, grouping *Grouping, params ...string) string {
nonEmptyParams := make([]string, 0, len(params))
for _, p := range params {
if p != "" {
@ -1833,7 +1866,7 @@ func formatOperation(op string, grouping *Grouping, params ...string) string {
var sb strings.Builder
sb.WriteString(op)
if grouping != nil {
if grouping != nil && !grouping.Singleton() {
sb.WriteString(grouping.String())
}
sb.WriteString("(")
@ -1921,7 +1954,9 @@ func (e *LabelReplaceExpr) String() string {
return sb.String()
}
// shardableOps lists the operations which may be sharded.
// shardableOps lists the operations which may be sharded, but are not
// guaranteed to be. See the `Shardable()` implementations
// on the respective expr types for more details.
// topk, botk, max, & min all must be concatenated and then evaluated in order to avoid
// potential data loss due to series distribution across shards.
// For example, grouping by `cluster` for a `max` operation may yield
@ -1944,6 +1979,8 @@ var shardableOps = map[string]bool{
// avg is only marked as shardable because we remap it into sum/count.
OpTypeAvg: true,
OpTypeCount: true,
OpTypeMax: true,
OpTypeMin: true,
// range vector ops
OpRangeTypeCount: true,
@ -2027,3 +2064,30 @@ func (e *VectorExpr) Pipeline() (log.Pipeline, error) { return log.NewNo
func (e *VectorExpr) Matchers() []*labels.Matcher { return nil }
func (e *VectorExpr) MatcherGroups() ([]MatcherRange, error) { return nil, e.err }
func (e *VectorExpr) Extractor() (log.SampleExtractor, error) { return nil, nil }
func ReducesLabels(e Expr) (conflict bool) {
e.Walk(func(e interface{}) {
switch expr := e.(type) {
// Technically, any parser that mutates labels could cause the query
// to be non-shardable _if_ the total (inherent+extracted) labels
// exist on two different shards, but this is incredibly unlikely
// for parsers which add new labels so I (owen-d) am preferring
// to continue sharding in those cases and only prevent sharding
// when using `drop` or `keep` which reduce labels to a smaller subset
// more likely to collide across shards.
case *KeepLabelsExpr, *DropLabelsExpr:
conflict = true
case *LabelFmtExpr:
// TODO(owen-d): renaming is shardable in many cases, but will
// likely require a `sum without ()` wrapper to combine the
// same extracted labelsets executed on different shards
for _, f := range expr.Formats {
if f.Rename {
conflict = true
break
}
}
}
})
return
}

@ -711,7 +711,7 @@ func TestGroupingString(t *testing.T) {
Groups: nil,
Without: false,
}
require.Equal(t, "", g.String())
require.Equal(t, " by ()", g.String())
g = Grouping{
Groups: []string{"a", "b"},
@ -729,5 +729,5 @@ func TestGroupingString(t *testing.T) {
Groups: nil,
Without: true,
}
require.Equal(t, "", g.String())
require.Equal(t, " without ()", g.String())
}

@ -208,7 +208,7 @@ func Test_astMapper_QuerySizeLimits(t *testing.T) {
},
{
desc: "Non shardable query too big",
query: `sum_over_time({app="foo"} |= "foo" | unwrap foo [1h])`,
query: `avg_over_time({job="foo"} | json busy="utilization" | unwrap busy [5m])`,
maxQuerierBytesSize: 10,
err: fmt.Sprintf(limErrQuerierTooManyBytesUnshardableTmpl, "100 B", "10 B"),
expectedStatsHandlerHits: 1,

Loading…
Cancel
Save