Regex line/label filters return original regex when `String` is called (#9584)

To optimize regex line and label filters, Loki rewrites them to be
non-greedy. This can cause extra characters to appear in the regex when
`String()` is called. In addition to confusing logging, this can also
cause internal panics when a previously-parsable query becomes too long.

This PR stores the original regex in the label/line filter and uses that
in calls to `String`.

This PR also removes regex optimizations from label selectors. Making
the optimization at parse time caused the original regex to be lost.
pull/9592/head
Travis Patterson 3 years ago committed by GitHub
parent 20752d20d0
commit fd0efe0799
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 12
      pkg/logql/log/filter.go
  2. 54
      pkg/logql/log/filter_test.go
  3. 2
      pkg/logql/matchers_test.go
  4. 14
      pkg/logql/shardmapper_test.go
  5. 20
      pkg/logql/syntax/ast.go
  6. 2
      pkg/logql/syntax/parser_test.go
  7. 2
      pkg/loki/runtime_config_test.go
  8. 4
      pkg/storage/store_test.go

@ -232,16 +232,18 @@ func (a orFilter) ToStage() Stage {
type regexpFilter struct {
*regexp.Regexp
orig string
}
// newRegexpFilter creates a new line filter for a given regexp.
// If match is false the filter is the negation of the regexp.
func newRegexpFilter(re string, match bool) (Filterer, error) {
func newRegexpFilter(re string, orig string, match bool) (Filterer, error) {
reg, err := regexp.Compile(re)
if err != nil {
return nil, err
}
f := regexpFilter{reg}
f := regexpFilter{Regexp: reg, orig: orig}
if match {
return f, nil
}
@ -260,6 +262,10 @@ func (r regexpFilter) ToStage() Stage {
}
}
func (r regexpFilter) String() string {
return r.orig
}
type equalFilter struct {
match []byte
caseInsensitive bool
@ -451,7 +457,7 @@ func parseRegexpFilter(re string, match bool, isLabel bool) (Filterer, error) {
// the beginning and ending of lines
regex = "^(?:" + regex + ")$"
}
return newRegexpFilter(regex, match)
return newRegexpFilter(regex, re, match)
}
if match {
return f, nil

@ -13,10 +13,15 @@ func Test_SimplifiedRegex(t *testing.T) {
"foo, 世界", allunicode(), "fooÏbar",
}
for _, test := range []struct {
re string
re string
// Simplified is true when the regex is converted to non-regex filters
// or when the regex is rewritten to be non-greedy
simplified bool
expected Filterer
match bool
// Expected != nil when the regex is converted to non-regex filters
expected Filterer
match bool
}{
// regex we intend to support.
{"foo", true, newContainsFilter([]byte("foo"), false), true},
@ -63,25 +68,26 @@ func Test_SimplifiedRegex(t *testing.T) {
{"(?i).*foo.*", true, newContainsFilter([]byte("FOO"), true), true},
{".+", true, ExistsFilter, true},
// regex we are not supporting.
{"[a-z]+foo", true, nil, false},
{".+foo", true, nil, false},
{".*fo.*o", true, nil, false},
{`\d`, true, nil, false},
{`\sfoo`, true, nil, false},
{`foo?`, false, nil, false},
{`foo{1,2}bar{2,3}`, true, nil, false},
{`foo|\d*bar`, true, nil, false},
{`foo|fo{1,2}`, true, nil, false},
{`foo|fo\d*`, true, nil, false},
{`foo|fo\d+`, true, nil, false},
{`(\w\d+)`, true, nil, false},
{`.*f.*oo|fo{1,2}`, true, nil, false},
{"f|f(?i)oo", true, nil, false},
{".foo+", true, nil, false},
// These regexes are rewritten to be non-greedy but no new
// filter is generated.
{"[a-z]+foo", true, nil, true},
{".+foo", true, nil, true},
{".*fo.*o", true, nil, true},
{`\d`, true, nil, true},
{`\sfoo`, true, nil, true},
{`foo?`, false, nil, true},
{`foo{1,2}bar{2,3}`, true, nil, true},
{`foo|\d*bar`, true, nil, true},
{`foo|fo{1,2}`, true, nil, true},
{`foo|fo\d*`, true, nil, true},
{`foo|fo\d+`, true, nil, true},
{`(\w\d+)`, true, nil, true},
{`.*f.*oo|fo{1,2}`, true, nil, true},
{"f|f(?i)oo", true, nil, true},
{".foo+", true, nil, true},
} {
t.Run(test.re, func(t *testing.T) {
d, err := newRegexpFilter(test.re, test.match)
d, err := newRegexpFilter(test.re, test.re, test.match)
require.NoError(t, err, "invalid regex")
f, err := parseRegexpFilter(test.re, test.match, false)
@ -92,11 +98,17 @@ func Test_SimplifiedRegex(t *testing.T) {
require.Equal(t, d, f)
return
}
// otherwise ensure we have different filter
require.NotEqual(t, f, d)
if test.expected != nil {
require.Equal(t, test.expected, f)
} else {
reFilter, ok := f.(regexpFilter)
require.True(t, ok)
require.Equal(t, test.re, reFilter.String())
}
// tests all lines with both filter, they should have the same result.
for _, line := range fixtures {
l := []byte(line)
@ -182,7 +194,7 @@ var res bool
func benchmarkRegex(b *testing.B, re, line string, match bool) {
var m bool
l := []byte(line)
d, err := newRegexpFilter(re, match)
d, err := newRegexpFilter(re, re, match)
if err != nil {
b.Fatal(err)
}

@ -32,7 +32,7 @@ func Test_match(t *testing.T) {
{mustMatcher(labels.MatchEqual, "a", "1")},
{
mustMatcher(labels.MatchEqual, "b", "2"),
mustMatcher(labels.MatchEqual, "c", "3"),
mustMatcher(labels.MatchRegexp, "c", "3"),
mustMatcher(labels.MatchNotEqual, "d", "4"),
},
},

@ -257,11 +257,11 @@ func TestMappingStrings(t *testing.T) {
},
{
in: `avg(avg_over_time({job=~"myapps.*"} |= "stats" | json busy="utilization" | unwrap busy [5m]))`,
out: `avg(avg_over_time({job=~"myapps(?-s:.)*?"} |= "stats" | json busy="utilization" | unwrap busy [5m]))`,
out: `avg(avg_over_time({job=~"myapps.*"} |= "stats" | json busy="utilization" | unwrap busy [5m]))`,
},
{
in: `avg_over_time({job=~"myapps.*"} |= "stats" | json busy="utilization" | unwrap busy [5m])`,
out: `avg_over_time({job=~"myapps(?-s:.)*?"} |= "stats" | json busy="utilization" | unwrap busy [5m])`,
out: `avg_over_time({job=~"myapps.*"} |= "stats" | json busy="utilization" | unwrap busy [5m])`,
},
// should be noop if VectorExpr
{
@ -271,28 +271,28 @@ func TestMappingStrings(t *testing.T) {
{
// or exprs aren't shardable
in: `count_over_time({a=~".+"}[1s]) or count_over_time({a=~".+"}[1s])`,
out: `(downstream<count_over_time({a=~"(?-s:.)+?"}[1s]),shard=0_of_2>++downstream<count_over_time({a=~"(?-s:.)+?"}[1s]),shard=1_of_2>ordownstream<count_over_time({a=~"(?-s:.)+?"}[1s]),shard=0_of_2>++downstream<count_over_time({a=~"(?-s:.)+?"}[1s]),shard=1_of_2>)`,
out: `(downstream<count_over_time({a=~".+"}[1s]),shard=0_of_2>++downstream<count_over_time({a=~".+"}[1s]),shard=1_of_2>ordownstream<count_over_time({a=~".+"}[1s]),shard=0_of_2>++downstream<count_over_time({a=~".+"}[1s]),shard=1_of_2>)`,
},
{
// vector() exprs aren't shardable
in: `sum(count_over_time({a=~".+"}[1s]) + vector(1))`,
out: `sum((downstream<count_over_time({a=~"(?-s:.)+?"}[1s]),shard=0_of_2>++downstream<count_over_time({a=~"(?-s:.)+?"}[1s]),shard=1_of_2>+vector(1.000000)))`,
out: `sum((downstream<count_over_time({a=~".+"}[1s]),shard=0_of_2>++downstream<count_over_time({a=~".+"}[1s]),shard=1_of_2>+vector(1.000000)))`,
},
{
// on() is never shardable as it can mutate labels
in: `sum(count_over_time({a=~".+"}[1s]) * on () count_over_time({a=~".+"}[1s]))`,
out: `sum((downstream<count_over_time({a=~"(?-s:.)+?"}[1s]),shard=0_of_2>++downstream<count_over_time({a=~"(?-s:.)+?"}[1s]),shard=1_of_2>*on()downstream<count_over_time({a=~"(?-s:.)+?"}[1s]),shard=0_of_2>++downstream<count_over_time({a=~"(?-s:.)+?"}[1s]),shard=1_of_2>))`,
out: `sum((downstream<count_over_time({a=~".+"}[1s]),shard=0_of_2>++downstream<count_over_time({a=~".+"}[1s]),shard=1_of_2>*on()downstream<count_over_time({a=~".+"}[1s]),shard=0_of_2>++downstream<count_over_time({a=~".+"}[1s]),shard=1_of_2>))`,
},
{
// ignoring(<non-empty-labels>) is never shardable as it can mutate labels
in: `sum(count_over_time({a=~".+"}[1s]) * ignoring (foo) count_over_time({a=~".+"}[1s]))`,
out: `sum((downstream<count_over_time({a=~"(?-s:.)+?"}[1s]),shard=0_of_2>++downstream<count_over_time({a=~"(?-s:.)+?"}[1s]),shard=1_of_2>*ignoring(foo)downstream<count_over_time({a=~"(?-s:.)+?"}[1s]),shard=0_of_2>++downstream<count_over_time({a=~"(?-s:.)+?"}[1s]),shard=1_of_2>))`,
out: `sum((downstream<count_over_time({a=~".+"}[1s]),shard=0_of_2>++downstream<count_over_time({a=~".+"}[1s]),shard=1_of_2>*ignoring(foo)downstream<count_over_time({a=~".+"}[1s]),shard=0_of_2>++downstream<count_over_time({a=~".+"}[1s]),shard=1_of_2>))`,
},
{
// ignoring () doesn't mutate labels and therefore can be shardable
// as long as the operation is shardable
in: `sum(count_over_time({a=~".+"}[1s]) * ignoring () count_over_time({a=~".+"}[1s]))`,
out: `sum(downstream<sum((count_over_time({a=~"(?-s:.)+?"}[1s])*count_over_time({a=~"(?-s:.)+?"}[1s]))),shard=0_of_2>++downstream<sum((count_over_time({a=~"(?-s:.)+?"}[1s])*count_over_time({a=~"(?-s:.)+?"}[1s]))),shard=1_of_2>)`,
out: `sum(downstream<sum((count_over_time({a=~".+"}[1s])*count_over_time({a=~".+"}[1s]))),shard=0_of_2>++downstream<sum((count_over_time({a=~".+"}[1s])*count_over_time({a=~".+"}[1s]))),shard=1_of_2>)`,
},
} {
t.Run(tc.in, func(t *testing.T) {

@ -705,10 +705,6 @@ func (l *LogfmtExpressionParser) String() string {
}
func mustNewMatcher(t labels.MatchType, n, v string) *labels.Matcher {
if t == labels.MatchRegexp || t == labels.MatchNotRegexp {
return simplifyRegexMatcher(t, n, v)
}
m, err := labels.NewMatcher(t, n, v)
if err != nil {
panic(logqlmodel.NewParseError(err.Error(), 0, 0))
@ -716,22 +712,6 @@ func mustNewMatcher(t labels.MatchType, n, v string) *labels.Matcher {
return m
}
func simplifyRegexMatcher(typ labels.MatchType, name, value string) *labels.Matcher {
reg, err := syntax.Parse(value, syntax.Perl)
if err != nil {
panic(logqlmodel.NewParseError(err.Error(), 0, 0))
}
reg = reg.Simplify()
m, ok := simplify(typ, name, reg)
if !ok {
util.AllNonGreedy(reg)
return labels.MustNewMatcher(typ, name, reg.String())
}
return m
}
// simplify will return an equals matcher if there is a regex matching a literal
func simplify(typ labels.MatchType, name string, reg *syntax.Regexp) (*labels.Matcher, bool) {
switch reg.Op {

@ -3429,7 +3429,7 @@ func TestNoOpLabelToString(t *testing.T) {
logExpr := `{container_name="app"} | foo=~".*"`
l, err := ParseLogSelector(logExpr, false)
require.NoError(t, err)
require.Equal(t, `{container_name="app"} | foo=~"(?-s:.)*?"`, l.String())
require.Equal(t, `{container_name="app"} | foo=~".*"`, l.String())
stages, err := l.(*PipelineExpr).MultiStages.stages()
require.NoError(t, err)

@ -52,7 +52,7 @@ overrides:
}},
{Period: model.Duration(24 * time.Hour), Priority: 5, Selector: `{namespace="bar", cluster=~"fo.*|b.+|[1-2]"}`, Matchers: []*labels.Matcher{
labels.MustNewMatcher(labels.MatchEqual, "namespace", "bar"),
labels.MustNewMatcher(labels.MatchRegexp, "cluster", "fo(?-s:.)*?|b(?-s:.)+?|[1-2]"),
labels.MustNewMatcher(labels.MatchRegexp, "cluster", "fo.*|b.+|[1-2]"),
}},
}, overrides.StreamRetention("29"))
}

@ -954,7 +954,7 @@ func Test_store_decodeReq_Matchers(t *testing.T) {
"unsharded",
newQuery("{foo=~\"ba.*\"}", from, from.Add(6*time.Millisecond), nil, nil),
[]*labels.Matcher{
labels.MustNewMatcher(labels.MatchRegexp, "foo", "ba(?-s:.)*?"),
labels.MustNewMatcher(labels.MatchRegexp, "foo", "ba.*"),
labels.MustNewMatcher(labels.MatchEqual, labels.MetricName, "logs"),
},
},
@ -968,7 +968,7 @@ func Test_store_decodeReq_Matchers(t *testing.T) {
nil,
),
[]*labels.Matcher{
labels.MustNewMatcher(labels.MatchRegexp, "foo", "ba(?-s:.)*?"),
labels.MustNewMatcher(labels.MatchRegexp, "foo", "ba.*"),
labels.MustNewMatcher(labels.MatchEqual, labels.MetricName, "logs"),
labels.MustNewMatcher(
labels.MatchEqual,

Loading…
Cancel
Save