feat: add pattern match line filter (#12398)

pull/12427/head
Anton Kolesnikov 2 years ago committed by GitHub
parent a331746c2f
commit 36c703dae8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
  1. 1
      CHANGELOG.md
  2. 45
      pkg/logql/log/filter.go
  3. 2
      pkg/logql/log/parser.go
  4. 33
      pkg/logql/log/pattern/ast.go
  5. 6
      pkg/logql/log/pattern/parser.go
  6. 93
      pkg/logql/log/pattern/pattern.go
  7. 91
      pkg/logql/log/pattern/pattern_test.go
  8. 2
      pkg/logql/syntax/ast.go
  9. 58
      pkg/logql/syntax/ast_test.go
  10. 4
      pkg/logql/syntax/expr.y
  11. 915
      pkg/logql/syntax/expr.y.go
  12. 2
      pkg/logql/syntax/lex.go
  13. 5
      pkg/logql/syntax/linefilter_test.go
  14. 22
      pkg/storage/bloom/v1/bloom_tester.go
  15. 36
      pkg/storage/bloom/v1/bloom_tester_test.go

@ -63,6 +63,7 @@
* [11970](https://github.com/grafana/loki/pull/11897) **masslessparticle** Ksonnet: Introduces memory limits to the compactor configuration to avoid unbounded memory usage.
* [12318](https://github.com/grafana/loki/pull/12318) **DylanGuedes** Memcached: Add mTLS support.
* [12392](https://github.com/grafana/loki/pull/12392) **sandeepsukhani** Detect name of service emitting logs and add it as a label.
* [12398](https://github.com/grafana/loki/pull/12398) **kolesnikovae** LogQL: Introduces pattern match filter operators.
##### Fixes
* [11074](https://github.com/grafana/loki/pull/11074) **hainenber** Fix panic in lambda-promtail due to mishandling of empty DROP_LABELS env var.

@ -11,6 +11,7 @@ import (
"github.com/prometheus/prometheus/model/labels"
"github.com/grafana/loki/pkg/logql/log/pattern"
"github.com/grafana/loki/pkg/util"
)
@ -23,6 +24,8 @@ const (
LineMatchNotEqual
LineMatchRegexp
LineMatchNotRegexp
LineMatchPattern
LineMatchNotPattern
)
func (t LineMatchType) String() string {
@ -35,6 +38,10 @@ func (t LineMatchType) String() string {
return "|~"
case LineMatchNotRegexp:
return "!~"
case LineMatchPattern:
return "|>"
case LineMatchNotPattern:
return "!>"
default:
return ""
}
@ -553,6 +560,10 @@ func NewFilter(match string, mt LineMatchType) (Filterer, error) {
return newContainsFilter([]byte(match), false), nil
case LineMatchNotEqual:
return NewNotFilter(newContainsFilter([]byte(match), false)), nil
case LineMatchPattern:
return newPatternFilterer([]byte(match), true)
case LineMatchNotPattern:
return newPatternFilterer([]byte(match), false)
default:
return nil, fmt.Errorf("unknown matcher: %v", match)
}
@ -783,3 +794,37 @@ func (s *RegexSimplifier) simplifyConcatAlternate(reg *syntax.Regexp, literal []
}
return nil, false
}
type patternFilter struct {
matcher *pattern.Matcher
pattern []byte
}
func newPatternFilterer(p []byte, match bool) (MatcherFilterer, error) {
m, err := pattern.ParseLineFilter(p)
if err != nil {
return nil, err
}
filter := &patternFilter{
matcher: m,
pattern: p,
}
if !match {
return NewNotFilter(filter), nil
}
return filter, nil
}
func (f *patternFilter) Filter(line []byte) bool { return f.matcher.Test(line) }
func (f *patternFilter) Matches(test Checker) bool {
return test.Test(f.pattern, false, false)
}
func (f *patternFilter) ToStage() Stage {
return StageFunc{
process: func(_ int64, line []byte, _ *LabelsBuilder) ([]byte, bool) {
return line, f.Filter(line)
},
}
}

@ -373,7 +373,7 @@ func (l *LogfmtParser) Process(_ int64, line []byte, lbs *LabelsBuilder) ([]byte
func (l *LogfmtParser) RequiredLabelNames() []string { return []string{} }
type PatternParser struct {
matcher pattern.Matcher
matcher *pattern.Matcher
names []string
}

@ -20,6 +20,21 @@ func (e expr) validate() error {
return ErrNoCapture
}
// Consecutive captures are not allowed.
if err := e.validateNoConsecutiveCaptures(); err != nil {
return err
}
caps := e.captures()
uniq := map[string]struct{}{}
for _, c := range caps {
if _, ok := uniq[c]; ok {
return fmt.Errorf("duplicate capture name (%s): %w", c, ErrInvalidExpr)
}
uniq[c] = struct{}{}
}
return nil
}
func (e expr) validateNoConsecutiveCaptures() error {
for i, n := range e {
if i+1 >= len(e) {
break
@ -30,21 +45,21 @@ func (e expr) validate() error {
}
}
}
return nil
}
caps := e.captures()
uniq := map[string]struct{}{}
for _, c := range caps {
if _, ok := uniq[c]; ok {
return fmt.Errorf("duplicate capture name (%s): %w", c, ErrInvalidExpr)
func (e expr) validateNoNamedCaptures() error {
for i, n := range e {
if c, ok := e[i].(capture); ok && !c.isUnnamed() {
return fmt.Errorf("%w: found '%s'", ErrCaptureNotAllowed, n.String())
}
uniq[c] = struct{}{}
}
return nil
}
func (e expr) captures() (captures []string) {
for _, n := range e {
if c, ok := n.(capture); ok && !c.isUnamed() {
if c, ok := n.(capture); ok && !c.isUnnamed() {
captures = append(captures, c.Name())
}
}
@ -65,8 +80,8 @@ func (c capture) Name() string {
return string(c)
}
func (c capture) isUnamed() bool {
return string(c) == underscore
func (c capture) isUnnamed() bool {
return len(c) == 1 && c[0] == underscore[0]
}
type literals []byte

@ -19,8 +19,12 @@ func init() {
}
func parseExpr(input string) (expr, error) {
return parseExprBytes([]byte(input))
}
func parseExprBytes(input []byte) (expr, error) {
l := newLexer()
l.setData([]byte(input))
l.setData(input)
e := exprNewParser().Parse(l)
if e != 0 || len(l.errs) > 0 {
return nil, l.errs[0]

@ -6,23 +6,19 @@ import (
)
var (
ErrNoCapture = errors.New("at least one capture is required")
ErrInvalidExpr = errors.New("invalid expression")
ErrNoCapture = errors.New("at least one capture is required")
ErrCaptureNotAllowed = errors.New("named captures are not allowed")
ErrInvalidExpr = errors.New("invalid expression")
)
type Matcher interface {
Matches(in []byte) [][]byte
Names() []string
}
type matcher struct {
type Matcher struct {
e expr
captures [][]byte
names []string
}
func New(in string) (Matcher, error) {
func New(in string) (*Matcher, error) {
e, err := parseExpr(in)
if err != nil {
return nil, err
@ -30,16 +26,47 @@ func New(in string) (Matcher, error) {
if err := e.validate(); err != nil {
return nil, err
}
return &matcher{
return &Matcher{
e: e,
captures: make([][]byte, 0, e.captureCount()),
names: e.captures(),
}, nil
}
func ParseLineFilter(in []byte) (*Matcher, error) {
if len(in) == 0 {
return new(Matcher), nil
}
e, err := parseExprBytes(in)
if err != nil {
return nil, err
}
if err = e.validateNoConsecutiveCaptures(); err != nil {
return nil, err
}
if err = e.validateNoNamedCaptures(); err != nil {
return nil, err
}
return &Matcher{e: e}, nil
}
func ParseLiterals(in string) ([][]byte, error) {
e, err := parseExpr(in)
if err != nil {
return nil, err
}
lit := make([][]byte, 0, len(e))
for _, n := range e {
if l, ok := n.(literals); ok {
lit = append(lit, l)
}
}
return lit, nil
}
// Matches matches the given line with the provided pattern.
// Matches invalidates the previous returned captures array.
func (m *matcher) Matches(in []byte) [][]byte {
func (m *Matcher) Matches(in []byte) [][]byte {
if len(in) == 0 {
return nil
}
@ -62,7 +89,7 @@ func (m *matcher) Matches(in []byte) [][]byte {
// from now we have capture - literals - capture ... (literals)?
for len(expr) != 0 {
if len(expr) == 1 { // we're ending on a capture.
if !(expr[0].(capture)).isUnamed() {
if !(expr[0].(capture)).isUnnamed() {
captures = append(captures, in)
}
return captures
@ -73,13 +100,13 @@ func (m *matcher) Matches(in []byte) [][]byte {
i := bytes.Index(in, ls)
if i == -1 {
// if a capture is missed we return up to the end as the capture.
if !capt.isUnamed() {
if !capt.isUnnamed() {
captures = append(captures, in)
}
return captures
}
if capt.isUnamed() {
if capt.isUnnamed() {
in = in[len(ls)+i:]
continue
}
@ -90,6 +117,42 @@ func (m *matcher) Matches(in []byte) [][]byte {
return captures
}
func (m *matcher) Names() []string {
func (m *Matcher) Names() []string {
return m.names
}
func (m *Matcher) Test(in []byte) bool {
if len(in) == 0 || len(m.e) == 0 {
// An empty line can only match an empty pattern.
return len(in) == 0 && len(m.e) == 0
}
var off int
for i := 0; i < len(m.e); i++ {
lit, ok := m.e[i].(literals)
if !ok {
continue
}
j := bytes.Index(in[off:], lit)
if j == -1 {
return false
}
if i != 0 && j == 0 {
// This means we either have repetitive literals, or an empty
// capture. Either way, the line does not match the pattern.
return false
}
off += j + len(lit)
}
// If we end up on a literal, we only consider the test successful if
// the remaining input is empty. Otherwise, if we end up on a capture,
// the remainder (the captured text) must not be empty.
//
// For example, "foo bar baz" does not match "<_> bar", but it matches
// "<_> baz" and "foo <_>".
//
// Empty captures are not allowed as well: " bar " does not match
// "<_> bar <_>", but matches "<_>bar<_>".
_, reqRem := m.e[len(m.e)-1].(capture)
hasRem := off != len(in)
return reqRem == hasRem
}

@ -4,6 +4,7 @@ import (
"fmt"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
@ -11,97 +12,144 @@ var fixtures = []struct {
expr string
in string
expected []string
matches bool
}{
{
"foo <foo> bar",
"foo buzz bar",
[]string{"buzz"},
true,
},
{
"foo <foo> bar<fuzz>",
"foo buzz bar",
[]string{"buzz", ""},
false,
},
{
"<foo>foo <bar> bar",
"foo buzz bar",
[]string{"", "buzz"},
false,
},
{
"<foo> bar<fuzz>",
" bar",
[]string{"", ""},
false,
},
{
"<foo>bar<baz>",
" bar ",
[]string{" ", " "},
true,
},
{
"<foo> bar<baz>",
" bar ",
[]string{"", " "},
false,
},
{
"<foo>bar <baz>",
" bar ",
[]string{" ", ""},
false,
},
{
"<foo>",
" bar ",
[]string{" bar "},
true,
},
{
"<path>?<_>",
`/api/plugins/versioncheck?slugIn=snuids-trafficlights-panel,input,gel&grafanaVersion=7.0.0-beta1`,
[]string{"/api/plugins/versioncheck"},
true,
},
{
"<path>?<_>",
`/api/plugins/status`,
[]string{"/api/plugins/status"},
false,
},
{
// Common Log Format
`<ip> <userid> <user> [<_>] "<method> <path> <_>" <status> <size>`,
`127.0.0.1 user-identifier frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326`,
[]string{"127.0.0.1", "user-identifier", "frank", "GET", "/apache_pb.gif", "200", "2326"},
true,
},
{
// Combined Log Format
`<ip> - - [<_>] "<method> <path> <_>" <status> <size> `,
`35.191.8.106 - - [19/May/2021:07:21:49 +0000] "GET /api/plugins/versioncheck?slugIn=snuids-trafficlights-panel,input,gel&grafanaVersion=7.0.0-beta1 HTTP/1.1" 200 107 "-" "Go-http-client/2.0" "80.153.74.144, 34.120.177.193" "TLSv1.3" "DE" "DEBW"`,
[]string{"35.191.8.106", "GET", "/api/plugins/versioncheck?slugIn=snuids-trafficlights-panel,input,gel&grafanaVersion=7.0.0-beta1", "200", "107"},
false,
},
{
// MySQL
`<_> <id> [<level>] [<no>] [<component>] `,
`2020-08-06T14:25:02.835618Z 0 [Note] [MY-012487] [InnoDB] DDL log recovery : begin`,
[]string{"0", "Note", "MY-012487", "InnoDB"},
false,
},
{
// MySQL
`<_> <id> [<level>] `,
`2021-05-19T07:40:12.215792Z 42761518 [Note] Aborted connection 42761518 to db: 'hosted_grafana' user: 'hosted_grafana' host: '10.36.4.122' (Got an error reading communication packets)`,
[]string{"42761518", "Note"},
false,
},
{
// Kubernetes api-server
`<id> <_> <_> <line>] `,
`W0519 07:46:47.647050 1 clientconn.go:1223] grpc: addrConn.createTransport failed to connect to {https://kubernetes-etcd-1.kubernetes-etcd:2379 <nil> 0 <nil>}. Err :connection error: desc = "transport: Error while dialing dial tcp 10.32.85.85:2379: connect: connection refused". Reconnecting...`,
[]string{"W0519", "clientconn.go:1223"},
false,
},
{
// Cassandra
`<level> [<component>]<_> in <duration>.<_>`,
`INFO [Service Thread] 2021-05-19 07:40:12,130 GCInspector.java:284 - ParNew GC in 248ms. CMS Old Gen: 5043436640 -> 5091062064; Par Eden Space: 671088640 -> 0; Par Survivor Space: 70188280 -> 60139760`,
[]string{"INFO", "Service Thread", "248ms"},
true,
},
{
// Cortex & Loki distributor
`<_> msg="<method> <path> (<status>) <duration>"`,
`level=debug ts=2021-05-19T07:54:26.864644382Z caller=logging.go:66 traceID=7fbb92fd0eb9c65d msg="POST /loki/api/v1/push (204) 1.238734ms"`,
[]string{"POST", "/loki/api/v1/push", "204", "1.238734ms"},
true,
},
{
// Etcd
`<_> <_> <level> | <component>: <_> peer <peer_id> <_> tcp <ip>:<_>`,
`2021-05-19 08:16:50.181436 W | rafthttp: health check for peer fd8275e521cfb532 could not connect: dial tcp 10.32.85.85:2380: connect: connection refused`,
[]string{"W", "rafthttp", "fd8275e521cfb532", "10.32.85.85"},
true,
},
{
// Kafka
`<_>] <level> [Log partition=<part>, dir=<dir>] `,
`[2021-05-19 08:35:28,681] INFO [Log partition=p-636-L-fs-117, dir=/data/kafka-logs] Deleting segment 455976081 (kafka.log.Log)`,
[]string{"INFO", "p-636-L-fs-117", "/data/kafka-logs"},
false,
},
{
// Elastic
`<_>][<level>][<component>] [<id>] [<index>]`,
`[2021-05-19T06:54:06,994][INFO ][o.e.c.m.MetaDataMappingService] [1f605d47-8454-4bfb-a67f-49f318bf837a] [usage-stats-2021.05.19/O2Je9IbmR8CqFyUvNpTttA] update_mapping [report]`,
[]string{"INFO ", "o.e.c.m.MetaDataMappingService", "1f605d47-8454-4bfb-a67f-49f318bf837a", "usage-stats-2021.05.19/O2Je9IbmR8CqFyUvNpTttA"},
false,
},
{
// Envoy
`<_> "<method> <path> <_>" <status> <_> <received_bytes> <sent_bytes> <duration> <upstream_time> "<forward_for>" "<agent>" <_> <_> "<upstream>"`,
`[2016-04-15T20:17:00.310Z] "POST /api/v1/locations HTTP/2" 204 - 154 0 226 100 "10.0.35.28" "nsq2http" "cc21d9b0-cf5c-432b-8c7e-98aeb7988cd2" "locations" "tcp://10.0.2.1:80"`,
[]string{"POST", "/api/v1/locations", "204", "154", "0", "226", "100", "10.0.35.28", "nsq2http", "tcp://10.0.2.1:80"},
true,
},
}
@ -112,12 +160,14 @@ func Test_matcher_Matches(t *testing.T) {
t.Parallel()
m, err := New(tt.expr)
require.NoError(t, err)
actual := m.Matches([]byte(tt.in))
line := []byte(tt.in)
assert.Equal(t, tt.matches, m.Test(line))
actual := m.Matches(line)
var actualStrings []string
for _, a := range actual {
actualStrings = append(actualStrings, string(a))
}
require.Equal(t, tt.expected, actualStrings)
assert.Equal(t, tt.expected, actualStrings)
})
}
}
@ -162,3 +212,40 @@ func Test_Error(t *testing.T) {
})
}
}
func Test_ParseLineFilter(t *testing.T) {
for _, tt := range []struct {
name string
err error
}{
{"<_>", nil}, // Meaningless, but valid: matches everything.
{"", nil}, // Empty pattern matches empty lines.
{"foo <_> bar <_>", nil},
{"<foo <foo> bar <_>", fmt.Errorf("%w: found '<foo>'", ErrCaptureNotAllowed)},
{"<foo>", fmt.Errorf("%w: found '<foo>'", ErrCaptureNotAllowed)},
} {
t.Run(tt.name, func(t *testing.T) {
_, err := ParseLineFilter([]byte(tt.name))
require.Equal(t, tt.err, err)
})
}
}
func Test_ParseLiterals(t *testing.T) {
for _, tt := range []struct {
pattern string
lit [][]byte
err error
}{
{"<_>", [][]byte{}, nil},
{"", nil, newParseError("syntax error: unexpected $end, expecting IDENTIFIER or LITERAL", 1, 1)},
{"foo <_> bar <_>", [][]byte{[]byte("foo "), []byte(" bar ")}, nil},
{"<foo>", [][]byte{}, nil},
} {
t.Run(tt.pattern, func(t *testing.T) {
lit, err := ParseLiterals(tt.pattern)
require.Equal(t, tt.err, err)
require.Equal(t, tt.lit, lit)
})
}
}

@ -355,7 +355,7 @@ func newLineFilterExpr(ty log.LineMatchType, op, match string) *LineFilterExpr {
func newOrLineFilter(left, right *LineFilterExpr) *LineFilterExpr {
right.Ty = left.Ty
if left.Ty == log.LineMatchEqual || left.Ty == log.LineMatchRegexp {
if left.Ty == log.LineMatchEqual || left.Ty == log.LineMatchRegexp || left.Ty == log.LineMatchPattern {
left.Or = right
right.IsOrChild = true
return left

@ -24,7 +24,7 @@ func Test_logSelectorExpr_String(t *testing.T) {
{`{foo="bar"}`, false},
{`{foo="bar", bar!="baz"}`, false},
{`{foo="bar", bar!="baz"} != "bip" !~ ".+bop"`, true},
{`{foo="bar"} |= "baz" |~ "blip" != "flip" !~ "flap"`, true},
{`{foo="bar"} |= "baz" |~ "blip" |> "qux" !> "waldo" != "flip" !~ "flap"`, true},
{`{foo="bar", bar!="baz"} |= ""`, false},
{`{foo="bar", bar!="baz"} |= "" |= ip("::1")`, true},
{`{foo="bar", bar!="baz"} |= "" != ip("127.0.0.1")`, true},
@ -32,7 +32,10 @@ func Test_logSelectorExpr_String(t *testing.T) {
{`{foo="bar", bar!="baz"} |~ ".*"`, false},
{`{foo="bar", bar!="baz"} |= "" |= ""`, false},
{`{foo="bar", bar!="baz"} |~ "" |= "" |~ ".*"`, false},
{`{foo="bar", bar!="baz"} != "bip" !~ ".+bop" | json`, true},
{`{foo="bar", bar!="baz"} |> ""`, true},
{`{foo="bar", bar!="baz"} |> "<_>"`, true},
{`{foo="bar", bar!="baz"} |> "<_>" !> "<_> <_>"`, true},
{`{foo="bar", bar!="baz"} != "bip" !~ ".+bop" |> "<_> bop <_>" | json`, true},
{`{foo="bar"} |= "baz" |~ "blip" != "flip" !~ "flap" | logfmt`, true},
{`{foo="bar"} |= "baz" |~ "blip" != "flip" !~ "flap" | logfmt --strict`, true},
{`{foo="bar"} |= "baz" |~ "blip" != "flip" !~ "flap" | logfmt --strict --keep-empty`, true},
@ -275,6 +278,7 @@ func Test_NilFilterDoesntPanic(t *testing.T) {
`{namespace="dev", container_name="cart"} |= "bleep" |= "" |= "bloop"`,
`{namespace="dev", container_name="cart"} |= "bleep" |= "" |= "bloop"`,
`{namespace="dev", container_name="cart"} |= "bleep" |= "bloop" |= ""`,
`{namespace="dev", container_name="cart"} !> ""`,
} {
t.Run(tc, func(t *testing.T) {
expr, err := ParseLogSelector(tc, true)
@ -355,6 +359,20 @@ func Test_FilterMatcher(t *testing.T) {
},
[]linecheck{{"foo", true}, {"bar", false}, {"foobar", true}},
},
{
`{app="foo"} |> "foo <_>"`,
[]*labels.Matcher{
mustNewMatcher(labels.MatchEqual, "app", "foo"),
},
[]linecheck{{"foo bar", true}, {"foo", false}},
},
{
`{app="foo"} !> "foo <_>"`,
[]*labels.Matcher{
mustNewMatcher(labels.MatchEqual, "app", "foo"),
},
[]linecheck{{"foo bar", false}, {"foo", true}},
},
{
`{app="foo"} |~ "foo\\.bar\\.baz"`,
[]*labels.Matcher{
@ -425,6 +443,20 @@ func Test_FilterMatcher(t *testing.T) {
},
[]linecheck{{"foo", false}, {"bar", true}, {"127.0.0.2", true}, {"127.0.0.1", false}},
},
{
`{app="foo"} |> "foo" or "bar"`,
[]*labels.Matcher{
mustNewMatcher(labels.MatchEqual, "app", "foo"),
},
[]linecheck{{"foo", true}, {"bar", true}, {"none", false}},
},
{
`{app="foo"} !> "foo" or "bar"`,
[]*labels.Matcher{
mustNewMatcher(labels.MatchEqual, "app", "foo"),
},
[]linecheck{{"foo", false}, {"bar", false}, {"none", true}},
},
} {
tt := tt
t.Run(tt.q, func(t *testing.T) {
@ -455,6 +487,8 @@ func TestOrLineFilterTypes(t *testing.T) {
{log.LineMatchNotEqual},
{log.LineMatchRegexp},
{log.LineMatchNotRegexp},
{log.LineMatchPattern},
{log.LineMatchNotPattern},
} {
t.Run("right inherits left's type", func(t *testing.T) {
left := &LineFilterExpr{LineFilter: LineFilter{Ty: tt.ty, Match: "something"}}
@ -523,6 +557,14 @@ func TestStringer(t *testing.T) {
in: `{app="foo"} |~ ip("127.0.0.1") or "foo"`,
out: `{app="foo"} |~ ip("127.0.0.1") or "foo"`,
},
{
in: `{app="foo"} |> "foo <_> baz" or "foo <_>"`,
out: `{app="foo"} |> "foo <_> baz" or "foo <_>"`,
},
{
in: `{app="foo"} |> "foo <_> baz" or "foo <_>" |> "foo <_> baz"`,
out: `{app="foo"} |> "foo <_> baz" or "foo <_>" |> "foo <_> baz"`,
},
{ // !(A || B) == !A && !B
in: `{app="foo"} != "foo" or "bar"`,
out: `{app="foo"} != "foo" != "bar"`,
@ -539,6 +581,10 @@ func TestStringer(t *testing.T) {
in: `{app="foo"} !~ ip("127.0.0.1") or "foo"`,
out: `{app="foo"} !~ ip("127.0.0.1") !~ "foo"`,
},
{
in: `{app="foo"} !> "<_> foo <_>" or "foo <_>" !> "foo <_> baz"`,
out: `{app="foo"} !> "<_> foo <_>" !> "foo <_>" !> "foo <_> baz"`,
},
} {
t.Run(tc.in, func(t *testing.T) {
expr, err := ParseExpr(tc.in)
@ -563,19 +609,19 @@ func BenchmarkContainsFilter(b *testing.B) {
}{
{
"AllMatches",
`{app="foo"} |= "foo" |= "hello" |= "world" |= "bar"`,
`{app="foo"} |= "foo" |= "hello" |= "world" |= "bar" |> "<_> world <_>"`,
},
{
"OneMatches",
`{app="foo"} |= "foo" |= "not" |= "in" |= "there"`,
`{app="foo"} |= "foo" |= "not" |= "in" |= "there" |> "yet"`,
},
{
"MixedFiltersTrue",
`{app="foo"} |= "foo" != "not" |~ "hello.*bar" != "there" |= "world"`,
`{app="foo"} |= "foo" != "not" |~ "hello.*bar" != "there" |= "world" |> "<_> more than one <_>"`,
},
{
"MixedFiltersFalse",
`{app="foo"} |= "baz" != "not" |~ "hello.*bar" != "there" |= "world"`,
`{app="foo"} |= "baz" != "not" |~ "hello.*bar" != "there" |= "world" !> "<_> more than one"`,
},
{
"GreedyRegex",

@ -134,7 +134,7 @@ import (
%token <bytes> BYTES
%token <str> IDENTIFIER STRING NUMBER PARSER_FLAG
%token <duration> DURATION RANGE
%token <val> MATCHERS LABELS EQ RE NRE OPEN_BRACE CLOSE_BRACE OPEN_BRACKET CLOSE_BRACKET COMMA DOT PIPE_MATCH PIPE_EXACT
%token <val> MATCHERS LABELS EQ RE NRE NPA OPEN_BRACE CLOSE_BRACE OPEN_BRACKET CLOSE_BRACKET COMMA DOT PIPE_MATCH PIPE_EXACT PIPE_PATTERN
OPEN_PARENTHESIS CLOSE_PARENTHESIS BY WITHOUT COUNT_OVER_TIME RATE RATE_COUNTER SUM SORT SORT_DESC AVG MAX MIN COUNT STDDEV STDVAR BOTTOMK TOPK
BYTES_OVER_TIME BYTES_RATE BOOL JSON REGEXP LOGFMT PIPE LINE_FMT LABEL_FMT UNWRAP AVG_OVER_TIME SUM_OVER_TIME MIN_OVER_TIME
MAX_OVER_TIME STDVAR_OVER_TIME STDDEV_OVER_TIME QUANTILE_OVER_TIME BYTES_CONV DURATION_CONV DURATION_SECONDS_CONV
@ -241,8 +241,10 @@ labelReplaceExpr:
filter:
PIPE_MATCH { $$ = log.LineMatchRegexp }
| PIPE_EXACT { $$ = log.LineMatchEqual }
| PIPE_PATTERN { $$ = log.LineMatchPattern }
| NRE { $$ = log.LineMatchNotRegexp }
| NEQ { $$ = log.LineMatchNotEqual }
| NPA { $$ = log.LineMatchNotPattern }
;
selector:

File diff suppressed because it is too large Load Diff

@ -23,8 +23,10 @@ var tokens = map[string]int{
OpTypeNEQ: NEQ,
"=~": RE,
"!~": NRE,
"!>": NPA,
"|=": PIPE_EXACT,
"|~": PIPE_MATCH,
"|>": PIPE_PATTERN,
OpPipe: PIPE,
OpUnwrap: UNWRAP,
"(": OPEN_PARENTHESIS,

@ -16,7 +16,10 @@ func TestLineFilterSerialization(t *testing.T) {
{Ty: log.LineMatchEqual, Match: "match", Op: "OR"},
{Ty: log.LineMatchNotEqual, Match: "not match"},
{Ty: log.LineMatchNotEqual, Match: "not match", Op: "OR"},
{Ty: log.LineMatchRegexp, Op: "OR"},
{Ty: log.LineMatchPattern, Match: "match"},
{Ty: log.LineMatchPattern, Match: "match", Op: "OR"},
{Ty: log.LineMatchNotPattern, Match: "not match"},
{Ty: log.LineMatchNotPattern, Match: "not match", Op: "OR"},
} {
t.Run(fmt.Sprintf("%d", i), func(t *testing.T) {
b := make([]byte, orig.Size())

@ -5,6 +5,7 @@ import (
regexpsyntax "github.com/grafana/regexp/syntax"
"github.com/grafana/loki/pkg/logql/log"
"github.com/grafana/loki/pkg/logql/log/pattern"
"github.com/grafana/loki/pkg/logql/syntax"
"github.com/grafana/loki/pkg/storage/bloom/v1/filter"
)
@ -89,7 +90,7 @@ func FiltersToBloomTest(b NGramBuilder, filters ...syntax.LineFilterExpr) BloomT
func simpleFilterToBloomTest(b NGramBuilder, filter syntax.LineFilter) BloomTest {
switch filter.Ty {
case log.LineMatchNotEqual, log.LineMatchNotRegexp:
case log.LineMatchNotEqual, log.LineMatchNotRegexp, log.LineMatchNotPattern:
// We cannot test _negated_ filters with a bloom filter since blooms are probabilistic
// filters that can only tell us if a string _might_ exist.
// For example, for `!= "foo"`, the bloom filter might tell us that the string "foo" might exist
@ -114,6 +115,8 @@ func simpleFilterToBloomTest(b NGramBuilder, filter syntax.LineFilter) BloomTest
}
return matcherFilterWrapper{filter: matcher}
case log.LineMatchPattern:
return newPatternTest(b, filter.Match)
default:
return MatchAll
}
@ -275,3 +278,20 @@ func (o orTest) Matches(bloom filter.Checker) bool {
func (o orTest) MatchesWithPrefixBuf(bloom filter.Checker, buf []byte, prefixLen int) bool {
return o.left.MatchesWithPrefixBuf(bloom, buf, prefixLen) || o.right.MatchesWithPrefixBuf(bloom, buf, prefixLen)
}
func newPatternTest(b NGramBuilder, match string) BloomTest {
lit, err := pattern.ParseLiterals(match)
if err != nil {
return MatchAll
}
var test stringTest
for _, l := range lit {
it := b.Tokens(string(l))
for it.Next() {
ngram := make([]byte, len(it.At()))
copy(ngram, it.At())
test.ngrams = append(test.ngrams, ngram)
}
}
return test
}

@ -166,6 +166,42 @@ func TestFiltersToBloomTests(t *testing.T) {
bloom: fakeBloom{"foo"},
expectMatch: true,
},
{
name: "pattern match exists",
query: `{app="fake"} |> "<_>foo<bar>"`,
bloom: fakeBloom{"foo", "bar"},
expectMatch: true,
},
{
name: "pattern match does not exist",
query: `{app="fake"} |> "<_>foo<bar>"`,
bloom: fakeBloom{"bar", "baz"},
expectMatch: false,
},
{
name: "pattern not match exists",
query: `{app="fake"} !> "<_>foo<bar>"`,
bloom: fakeBloom{"foo", "bar"},
expectMatch: true,
},
{
name: "pattern not match does not exist",
query: `{app="fake"} !> "<_>foo<bar>"`,
bloom: fakeBloom{"bar", "baz"},
expectMatch: true,
},
{
name: "pattern all",
query: `{app="fake"} |> "<_>"`,
bloom: fakeBloom{"bar", "baz"},
expectMatch: true,
},
{
name: "pattern empty",
query: `{app="fake"} |> ""`,
bloom: fakeBloom{"bar", "baz"},
expectMatch: true,
},
} {
t.Run(tc.name, func(t *testing.T) {
expr, err := syntax.ParseExpr(tc.query)

Loading…
Cancel
Save