From c42a0ba8687f3444873b23b4cc21cd112d659f1d Mon Sep 17 00:00:00 2001 From: Travis Patterson Date: Wed, 1 Mar 2023 18:11:40 +0100 Subject: [PATCH] Optimize .+ regex line filter (#8646) Interpolated variables from Grafana dashboards may result in the regex matcher `.+`. Filters with `+` are already being rewritten to be non greedy but there is a further optimization to make replace the regex altogether with a filter that just matchers `len(line) > 0` --- pkg/logql/log/filter.go | 18 ++++++++++++++++++ pkg/logql/log/filter_test.go | 2 ++ 2 files changed, 20 insertions(+) diff --git a/pkg/logql/log/filter.go b/pkg/logql/log/filter.go index 89fa90f724..04f52e3a84 100644 --- a/pkg/logql/log/filter.go +++ b/pkg/logql/log/filter.go @@ -33,6 +33,20 @@ func (trueFilter) ToStage() Stage { return NoopStage } // TrueFilter is a filter that returns and matches all log lines whatever their content. var TrueFilter = trueFilter{} +type existsFilter struct{} + +func (existsFilter) Filter(_ []byte) bool { return true } +func (existsFilter) ToStage() Stage { + return StageFunc{ + process: func(_ int64, line []byte, _ *LabelsBuilder) ([]byte, bool) { + return line, len(line) > 0 + }, + } +} + +// ExistsFilter is a filter that returns and matches when a line has any characters. +var ExistsFilter = existsFilter{} + type notFilter struct { Filterer } @@ -425,6 +439,10 @@ func simplify(reg *syntax.Regexp) (Filterer, bool) { if reg.Sub[0].Op == syntax.OpAnyCharNotNL { return TrueFilter, true } + case syntax.OpPlus: + if len(reg.Sub) == 1 && reg.Sub[0].Op == syntax.OpAnyCharNotNL { // simplify ".+" + return ExistsFilter, true + } case syntax.OpEmptyMatch: return TrueFilter, true } diff --git a/pkg/logql/log/filter_test.go b/pkg/logql/log/filter_test.go index 13b7c0428e..e445e9fff4 100644 --- a/pkg/logql/log/filter_test.go +++ b/pkg/logql/log/filter_test.go @@ -61,6 +61,7 @@ func Test_SimplifiedRegex(t *testing.T) { {"(?i)f|foo|foobar", true, newOrFilter(newContainsFilter([]byte("F"), true), newOrFilter(newContainsFilter([]byte("FOO"), true), newContainsFilter([]byte("FOOBAR"), true))), true}, {"(?i)f|fatal|e.*", true, newOrFilter(newOrFilter(newContainsFilter([]byte("F"), true), newContainsFilter([]byte("FATAL"), true)), newContainsFilter([]byte("E"), true)), true}, {"(?i).*foo.*", true, newContainsFilter([]byte("FOO"), true), true}, + {".+", true, ExistsFilter, true}, // regex we are not supporting. {"[a-z]+foo", true, nil, false}, @@ -77,6 +78,7 @@ func Test_SimplifiedRegex(t *testing.T) { {`(\w\d+)`, true, nil, false}, {`.*f.*oo|fo{1,2}`, true, nil, false}, {"f|f(?i)oo", true, nil, false}, + {".foo+", true, nil, false}, } { t.Run(test.re, func(t *testing.T) { d, err := newRegexpFilter(test.re, test.match)