From b152bc2cf3e0c3b558ff400af3056f2f64baa8dd Mon Sep 17 00:00:00 2001 From: Sandeep Sukhani Date: Tue, 30 Mar 2021 22:34:54 +0530 Subject: [PATCH] check for stream selectors to have atleast one equality matcher (#3216) * check for stream selectors to have atleast one equality matcher * move equality matchers check to logql --- pkg/chunkenc/memchunk_test.go | 4 +- pkg/ingester/tailer.go | 2 +- pkg/logentry/stages/match.go | 2 +- pkg/logql/ast.go | 6 +- pkg/logql/ast_test.go | 10 ++-- pkg/logql/functions_test.go | 2 +- pkg/logql/log/parser_hints_test.go | 2 +- pkg/logql/optimize.go | 2 +- pkg/logql/optimize_test.go | 2 +- pkg/logql/parser.go | 49 +++++++++++++++- pkg/logql/parser_test.go | 94 +++++++++++++++++++++++++++++- pkg/logql/sharding_test.go | 24 ++++---- pkg/querier/http.go | 2 +- 13 files changed, 169 insertions(+), 32 deletions(-) diff --git a/pkg/chunkenc/memchunk_test.go b/pkg/chunkenc/memchunk_test.go index 4ec1a6e1e0..ea85d36ee4 100644 --- a/pkg/chunkenc/memchunk_test.go +++ b/pkg/chunkenc/memchunk_test.go @@ -960,7 +960,7 @@ func BenchmarkBufferedIteratorLabels(b *testing.B) { } { b.Run(test, func(b *testing.B) { b.ReportAllocs() - expr, err := logql.ParseLogSelector(test) + expr, err := logql.ParseLogSelector(test, true) if err != nil { b.Fatal(err) } @@ -999,7 +999,7 @@ func BenchmarkBufferedIteratorLabels(b *testing.B) { } { b.Run(test, func(b *testing.B) { b.ReportAllocs() - expr, err := logql.ParseSampleExpr(test) + expr, err := logql.ParseSampleExpr(test, true) if err != nil { b.Fatal(err) } diff --git a/pkg/ingester/tailer.go b/pkg/ingester/tailer.go index 2822b0d562..234643c810 100644 --- a/pkg/ingester/tailer.go +++ b/pkg/ingester/tailer.go @@ -47,7 +47,7 @@ type tailer struct { } func newTailer(orgID, query string, conn TailServer) (*tailer, error) { - expr, err := logql.ParseLogSelector(query) + expr, err := logql.ParseLogSelector(query, true) if err != nil { return nil, err } diff --git a/pkg/logentry/stages/match.go b/pkg/logentry/stages/match.go index e4746e20e3..c4ec786eb4 100644 --- a/pkg/logentry/stages/match.go +++ b/pkg/logentry/stages/match.go @@ -62,7 +62,7 @@ func validateMatcherConfig(cfg *MatcherConfig) (logql.LogSelectorExpr, error) { return nil, errors.New(ErrStagesWithDropLine) } - selector, err := logql.ParseLogSelector(cfg.Selector) + selector, err := logql.ParseLogSelector(cfg.Selector, false) if err != nil { return nil, errors.Wrap(err, ErrSelectorSyntax) } diff --git a/pkg/logql/ast.go b/pkg/logql/ast.go index 02e6aabf73..b2e7138ca1 100644 --- a/pkg/logql/ast.go +++ b/pkg/logql/ast.go @@ -44,7 +44,7 @@ type SelectLogParams struct { // LogSelector returns the LogSelectorExpr from the SelectParams. // The `LogSelectorExpr` can then returns all matchers and filters to use for that request. func (s SelectLogParams) LogSelector() (LogSelectorExpr, error) { - return ParseLogSelector(s.Selector) + return ParseLogSelector(s.Selector, true) } type SelectSampleParams struct { @@ -54,13 +54,13 @@ type SelectSampleParams struct { // Expr returns the SampleExpr from the SelectSampleParams. // The `LogSelectorExpr` can then returns all matchers and filters to use for that request. func (s SelectSampleParams) Expr() (SampleExpr, error) { - return ParseSampleExpr(s.Selector) + return ParseSampleExpr(s.Selector, true) } // LogSelector returns the LogSelectorExpr from the SelectParams. // The `LogSelectorExpr` can then returns all matchers and filters to use for that request. func (s SelectSampleParams) LogSelector() (LogSelectorExpr, error) { - expr, err := ParseSampleExpr(s.Selector) + expr, err := ParseSampleExpr(s.Selector, true) if err != nil { return nil, err } diff --git a/pkg/logql/ast_test.go b/pkg/logql/ast_test.go index 1700c64671..2123a31d74 100644 --- a/pkg/logql/ast_test.go +++ b/pkg/logql/ast_test.go @@ -16,7 +16,7 @@ func Test_logSelectorExpr_String(t *testing.T) { selector string expectFilter bool }{ - {`{foo!~"bar"}`, false}, + {`{foo="bar"}`, false}, {`{foo="bar", bar!="baz"}`, false}, {`{foo="bar", bar!="baz"} != "bip" !~ ".+bop"`, true}, {`{foo="bar"} |= "baz" |~ "blip" != "flip" !~ "flap"`, true}, @@ -37,7 +37,7 @@ func Test_logSelectorExpr_String(t *testing.T) { tt := tt t.Run(tt.selector, func(t *testing.T) { t.Parallel() - expr, err := ParseLogSelector(tt.selector) + expr, err := ParseLogSelector(tt.selector, true) if err != nil { t.Fatalf("failed to parse log selector: %s", err) } @@ -176,7 +176,7 @@ func Test_NilFilterDoesntPanic(t *testing.T) { `{namespace="dev", container_name="cart"} |= "bleep" |= "bloop" |= ""`, } { t.Run(tc, func(t *testing.T) { - expr, err := ParseLogSelector(tc) + expr, err := ParseLogSelector(tc, true) require.Nil(t, err) p, err := expr.Pipeline() @@ -265,7 +265,7 @@ func Test_FilterMatcher(t *testing.T) { tt := tt t.Run(tt.q, func(t *testing.T) { t.Parallel() - expr, err := ParseLogSelector(tt.q) + expr, err := ParseLogSelector(tt.q, true) assert.Nil(t, err) assert.Equal(t, tt.expectedMatchers, expr.Matchers()) p, err := expr.Pipeline() @@ -323,7 +323,7 @@ func TestStringer(t *testing.T) { } func BenchmarkContainsFilter(b *testing.B) { - expr, err := ParseLogSelector(`{app="foo"} |= "foo"`) + expr, err := ParseLogSelector(`{app="foo"} |= "foo"`, true) if err != nil { b.Fatal(err) } diff --git a/pkg/logql/functions_test.go b/pkg/logql/functions_test.go index 31af9b9ec8..03eb33933f 100644 --- a/pkg/logql/functions_test.go +++ b/pkg/logql/functions_test.go @@ -96,7 +96,7 @@ func Test_Extractor(t *testing.T) { `, } { t.Run(tc, func(t *testing.T) { - expr, err := ParseSampleExpr(tc) + expr, err := ParseSampleExpr(tc, true) require.Nil(t, err) _, err = expr.Extractor() require.Nil(t, err) diff --git a/pkg/logql/log/parser_hints_test.go b/pkg/logql/log/parser_hints_test.go index fc639461ca..fc64884feb 100644 --- a/pkg/logql/log/parser_hints_test.go +++ b/pkg/logql/log/parser_hints_test.go @@ -209,7 +209,7 @@ func Test_ParserHints(t *testing.T) { tt := tt t.Run(tt.expr, func(t *testing.T) { t.Parallel() - expr, err := logql.ParseSampleExpr(tt.expr) + expr, err := logql.ParseSampleExpr(tt.expr, true) require.NoError(t, err) ex, err := expr.Extractor() diff --git a/pkg/logql/optimize.go b/pkg/logql/optimize.go index 25745fcaee..6c15712904 100644 --- a/pkg/logql/optimize.go +++ b/pkg/logql/optimize.go @@ -15,7 +15,7 @@ func optimizeSampleExpr(expr SampleExpr) (SampleExpr, error) { } // clone the expr. q := expr.String() - expr, err := ParseSampleExpr(q) + expr, err := ParseSampleExpr(q, true) if err != nil { return nil, err } diff --git a/pkg/logql/optimize_test.go b/pkg/logql/optimize_test.go index 11806c4348..5f419f8a93 100644 --- a/pkg/logql/optimize_test.go +++ b/pkg/logql/optimize_test.go @@ -28,7 +28,7 @@ func Test_optimizeSampleExpr(t *testing.T) { } for _, tt := range tests { t.Run(tt.in, func(t *testing.T) { - e, err := ParseSampleExpr(tt.in) + e, err := ParseSampleExpr(tt.in, true) require.NoError(t, err) got, err := optimizeSampleExpr(e) require.NoError(t, err) diff --git a/pkg/logql/parser.go b/pkg/logql/parser.go index 8485ec6b0b..55b43affb2 100644 --- a/pkg/logql/parser.go +++ b/pkg/logql/parser.go @@ -8,10 +8,13 @@ import ( "sync" "text/scanner" + "github.com/cortexproject/cortex/pkg/util" "github.com/prometheus/prometheus/pkg/labels" promql_parser "github.com/prometheus/prometheus/promql/parser" ) +const errAtleastOneEqualityMatcherRequired = "queries require at least one regexp or equality matcher that does not have an empty-compatible value. For instance, app=~\".*\" does not meet this requirement, but app=~\".+\" will" + var parserPool = sync.Pool{ New: func() interface{} { p := &parser{ @@ -80,6 +83,19 @@ func ParseExpr(input string) (expr Expr, err error) { return p.Parse() } +// checkEqualityMatchers checks whether a query would touch all the streams in the query range or uses at least one matcher to select specific streams. +func checkEqualityMatchers(matchers []*labels.Matcher) error { + if len(matchers) == 0 { + return nil + } + _, matchers = util.SplitFiltersAndMatchers(matchers) + if len(matchers) == 0 { + return errors.New(errAtleastOneEqualityMatcherRequired) + } + + return nil +} + // ParseMatchers parses a string and returns labels matchers, if the expression contains // anything else it will return an error. func ParseMatchers(input string) ([]*labels.Matcher, error) { @@ -95,7 +111,7 @@ func ParseMatchers(input string) ([]*labels.Matcher, error) { } // ParseSampleExpr parses a string and returns the sampleExpr -func ParseSampleExpr(input string) (SampleExpr, error) { +func ParseSampleExpr(input string, equalityMatcherRequired bool) (SampleExpr, error) { expr, err := ParseExpr(input) if err != nil { return nil, err @@ -104,11 +120,33 @@ func ParseSampleExpr(input string) (SampleExpr, error) { if !ok { return nil, errors.New("only sample expression supported") } + + if equalityMatcherRequired { + err = sampleExprCheckEqualityMatchers(sampleExpr) + if err != nil { + return nil, err + } + } return sampleExpr, nil } +func sampleExprCheckEqualityMatchers(expr SampleExpr) error { + switch e := expr.(type) { + case *binOpExpr: + if err := sampleExprCheckEqualityMatchers(e.SampleExpr); err != nil { + return err + } + + return sampleExprCheckEqualityMatchers(e.RHS) + case *literalExpr: + return nil + default: + return checkEqualityMatchers(expr.Selector().Matchers()) + } +} + // ParseLogSelector parses a log selector expression `{app="foo"} |= "filter"` -func ParseLogSelector(input string) (LogSelectorExpr, error) { +func ParseLogSelector(input string, equalityMatcherRequired bool) (LogSelectorExpr, error) { expr, err := ParseExpr(input) if err != nil { return nil, err @@ -117,6 +155,13 @@ func ParseLogSelector(input string) (LogSelectorExpr, error) { if !ok { return nil, errors.New("only log selector is supported") } + + if equalityMatcherRequired { + err = checkEqualityMatchers(logSelector.Matchers()) + if err != nil { + return nil, err + } + } return logSelector, nil } diff --git a/pkg/logql/parser_test.go b/pkg/logql/parser_test.go index 835a2f5f9b..b3533b47f4 100644 --- a/pkg/logql/parser_test.go +++ b/pkg/logql/parser_test.go @@ -2460,7 +2460,7 @@ func TestIsParseError(t *testing.T) { func Test_PipelineCombined(t *testing.T) { query := `{job="cortex-ops/query-frontend"} |= "logging.go" | logfmt | line_format "{{.msg}}" | regexp "(?P\\w+) (?P[\\w|/]+) \\((?P\\d+?)\\) (?P.*)" | (duration > 1s or status==200) and method="POST" | line_format "{{.duration}}|{{.method}}|{{.status}}"` - expr, err := ParseLogSelector(query) + expr, err := ParseLogSelector(query, true) require.Nil(t, err) p, err := expr.Pipeline() @@ -2505,3 +2505,95 @@ func Benchmark_CompareParseLabels(b *testing.B) { } }) } + +func TestParseSampleExpr_equalityMatcher(t *testing.T) { + for _, tc := range []struct { + in string + err error + }{ + { + in: `count_over_time({foo="bar"}[5m])`, + }, + { + in: `count_over_time({foo!="bar"}[5m])`, + err: errors.New(errAtleastOneEqualityMatcherRequired), + }, + { + in: `count_over_time({app="baz", foo!="bar"}[5m])`, + }, + { + in: `count_over_time({app=~".+"}[5m])`, + }, + { + in: `count_over_time({app=~".*"}[5m])`, + err: errors.New(errAtleastOneEqualityMatcherRequired), + }, + { + in: `count_over_time({app=~"bar|baz"}[5m])`, + }, + { + in: `count_over_time({app!~"bar|baz"}[5m])`, + err: errors.New(errAtleastOneEqualityMatcherRequired), + }, + { + in: `1 + count_over_time({app=~".*"}[5m])`, + err: errors.New(errAtleastOneEqualityMatcherRequired), + }, + { + in: `1 + count_over_time({app=~".+"}[5m]) + count_over_time({app=~".*"}[5m])`, + err: errors.New(errAtleastOneEqualityMatcherRequired), + }, + { + in: `1 + count_over_time({app=~".+"}[5m]) + count_over_time({app=~".+"}[5m])`, + }, + { + in: `1 + count_over_time({app=~".+"}[5m]) + count_over_time({app=~".*"}[5m]) + 1`, + err: errors.New(errAtleastOneEqualityMatcherRequired), + }, + { + in: `1 + count_over_time({app=~".+"}[5m]) + count_over_time({app=~".+"}[5m]) + 1`, + }, + } { + t.Run(tc.in, func(t *testing.T) { + _, err := ParseSampleExpr(tc.in, true) + require.Equal(t, tc.err, err) + }) + } +} + +func TestParseLogSelectorExpr_equalityMatcher(t *testing.T) { + for _, tc := range []struct { + in string + err error + }{ + { + in: `{foo="bar"}`, + }, + { + in: `{foo!="bar"}`, + err: errors.New(errAtleastOneEqualityMatcherRequired), + }, + { + in: `{app="baz", foo!="bar"}`, + }, + { + in: `{app=~".+"}`, + }, + { + in: `{app=~".*"}`, + err: errors.New(errAtleastOneEqualityMatcherRequired), + }, + { + in: `{foo=~"bar|baz"}`, + }, + { + in: `{foo!~"bar|baz"}`, + err: errors.New(errAtleastOneEqualityMatcherRequired), + }, + } { + t.Run(tc.in, func(t *testing.T) { + _, err := ParseLogSelector(tc.in, true) + require.Equal(t, tc.err, err) + }) + } +} diff --git a/pkg/logql/sharding_test.go b/pkg/logql/sharding_test.go index 38c6f27037..c4ca9a5a35 100644 --- a/pkg/logql/sharding_test.go +++ b/pkg/logql/sharding_test.go @@ -36,22 +36,22 @@ func TestMappingEquivalence(t *testing.T) { {`1 + 1`, false}, {`{a="1"}`, false}, {`{a="1"} |= "number: 10"`, false}, - {`rate({a=~".*"}[1s])`, false}, - {`sum by (a) (rate({a=~".*"}[1s]))`, false}, - {`sum(rate({a=~".*"}[1s]))`, false}, - {`max without (a) (rate({a=~".*"}[1s]))`, false}, - {`count(rate({a=~".*"}[1s]))`, false}, - {`avg(rate({a=~".*"}[1s]))`, true}, - {`avg(rate({a=~".*"}[1s])) by (a)`, true}, - {`1 + sum by (cluster) (rate({a=~".*"}[1s]))`, false}, - {`sum(max(rate({a=~".*"}[1s])))`, false}, - {`max(count(rate({a=~".*"}[1s])))`, false}, - {`max(sum by (cluster) (rate({a=~".*"}[1s]))) / count(rate({a=~".*"}[1s]))`, false}, + {`rate({a=~".+"}[1s])`, false}, + {`sum by (a) (rate({a=~".+"}[1s]))`, false}, + {`sum(rate({a=~".+"}[1s]))`, false}, + {`max without (a) (rate({a=~".+"}[1s]))`, false}, + {`count(rate({a=~".+"}[1s]))`, false}, + {`avg(rate({a=~".+"}[1s]))`, true}, + {`avg(rate({a=~".+"}[1s])) by (a)`, true}, + {`1 + sum by (cluster) (rate({a=~".+"}[1s]))`, false}, + {`sum(max(rate({a=~".+"}[1s])))`, false}, + {`max(count(rate({a=~".+"}[1s])))`, false}, + {`max(sum by (cluster) (rate({a=~".+"}[1s]))) / count(rate({a=~".+"}[1s]))`, false}, // topk prefers already-seen values in tiebreakers. Since the test data generates // the same log lines for each series & the resulting promql.Vectors aren't deterministically // sorted by labels, we don't expect this to pass. // We could sort them as stated, but it doesn't seem worth the performance hit. - // {`topk(3, rate({a=~".*"}[1s]))`, false}, + // {`topk(3, rate({a=~".+"}[1s]))`, false}, } { q := NewMockQuerier( shards, diff --git a/pkg/querier/http.go b/pkg/querier/http.go index e265099a67..e880986d1a 100644 --- a/pkg/querier/http.go +++ b/pkg/querier/http.go @@ -332,7 +332,7 @@ func parseRegexQuery(httpRequest *http.Request) (string, error) { query := httpRequest.Form.Get("query") regexp := httpRequest.Form.Get("regexp") if regexp != "" { - expr, err := logql.ParseLogSelector(query) + expr, err := logql.ParseLogSelector(query, true) if err != nil { return "", err }