LogQL: Pattern Parser (#3837)

* The beginning of a fun story.

Signed-off-by: Cyril Tovena <cyril.tovena@gmail.com>

* Working on adding ragel.

Signed-off-by: Cyril Tovena <cyril.tovena@gmail.com>

* Adding AST parsing with Yacc and Ragel.

Signed-off-by: Cyril Tovena <cyril.tovena@gmail.com>

* Got a pattern parser working.

Reworking ast to works with bytes and not runes.

Signed-off-by: Cyril Tovena <cyril.tovena@gmail.com>

* Setup tests and the matches algorithm.

Signed-off-by: Cyril Tovena <cyril.tovena@gmail.com>

* moar tests case.

Signed-off-by: Cyril Tovena <cyril.tovena@gmail.com>

* Add some validation for the pattern expression.

Signed-off-by: Cyril Tovena <cyril.tovena@gmail.com>

* Hooking to LogQL + performance boost.

Signed-off-by: Cyril Tovena <cyril.tovena@gmail.com>

* Adds documentation

Signed-off-by: Cyril Tovena <cyril.tovena@gmail.com>

* Improve bound check.

Signed-off-by: Cyril Tovena <cyril.tovena@gmail.com>

* Removes generated files from being linted.

Signed-off-by: Cyril Tovena <cyril.tovena@gmail.com>

* Update docs/sources/logql/_index.md

Co-authored-by: Danny Kopping <dannykopping@gmail.com>

* Update docs/sources/logql/_index.md

Co-authored-by: Danny Kopping <dannykopping@gmail.com>

* Review feedback

Signed-off-by: Cyril Tovena <cyril.tovena@gmail.com>

* Update docs/sources/logql/_index.md

Co-authored-by: Danny Kopping <dannykopping@gmail.com>

* Update docs/sources/logql/_index.md

Co-authored-by: Karen Miller <84039272+KMiller-Grafana@users.noreply.github.com>

* Update docs/sources/logql/_index.md

Co-authored-by: Karen Miller <84039272+KMiller-Grafana@users.noreply.github.com>

* Update docs/sources/logql/_index.md

Co-authored-by: Karen Miller <84039272+KMiller-Grafana@users.noreply.github.com>

* Update docs/sources/logql/_index.md

Co-authored-by: Karen Miller <84039272+KMiller-Grafana@users.noreply.github.com>

* Update docs/sources/logql/_index.md

Co-authored-by: Karen Miller <84039272+KMiller-Grafana@users.noreply.github.com>

* Update docs/sources/logql/_index.md

Co-authored-by: Karen Miller <84039272+KMiller-Grafana@users.noreply.github.com>

* Update docs/sources/logql/_index.md

Co-authored-by: Karen Miller <84039272+KMiller-Grafana@users.noreply.github.com>

* Update docs/sources/logql/_index.md

Co-authored-by: Karen Miller <84039272+KMiller-Grafana@users.noreply.github.com>

* Update docs/sources/logql/_index.md

Co-authored-by: Karen Miller <84039272+KMiller-Grafana@users.noreply.github.com>

* Update docs/sources/logql/_index.md

Co-authored-by: Karen Miller <84039272+KMiller-Grafana@users.noreply.github.com>

* Docs suggestions

Signed-off-by: Cyril Tovena <cyril.tovena@gmail.com>

Co-authored-by: Danny Kopping <dannykopping@gmail.com>
Co-authored-by: Karen Miller <84039272+KMiller-Grafana@users.noreply.github.com>
pull/3860/head
Cyril Tovena 5 years ago committed by GitHub
parent 6d026d211d
commit 59bb6d3fba
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 8
      .drone/drone.yml
  2. 3
      .golangci.yml
  3. 43
      Makefile
  4. 59
      docs/sources/logql/_index.md
  5. 2
      loki-build-image/Dockerfile
  6. 11
      pkg/logql/ast.go
  7. 12
      pkg/logql/ast_test.go
  8. 3
      pkg/logql/expr.y
  9. 691
      pkg/logql/expr.y.go
  10. 1
      pkg/logql/functions_test.go
  11. 9
      pkg/logql/lex.go
  12. 52
      pkg/logql/log/parser.go
  13. 87
      pkg/logql/log/parser_test.go
  14. 83
      pkg/logql/log/pattern/ast.go
  15. 45
      pkg/logql/log/pattern/expr.y
  16. 466
      pkg/logql/log/pattern/expr.y.go
  17. 62
      pkg/logql/log/pattern/lexer.go
  18. 43
      pkg/logql/log/pattern/lexer.rl
  19. 241
      pkg/logql/log/pattern/lexer.rl.go
  20. 47
      pkg/logql/log/pattern/lexer_test.go
  21. 50
      pkg/logql/log/pattern/parser.go
  22. 59
      pkg/logql/log/pattern/parser_test.go
  23. 95
      pkg/logql/log/pattern/pattern.go
  24. 162
      pkg/logql/log/pattern/pattern_test.go
  25. 21
      pkg/logql/parser_test.go

@ -12,28 +12,28 @@ workspace:
steps:
- name: test
image: grafana/loki-build-image:0.14.0
image: grafana/loki-build-image:0.15.0
commands:
- make BUILD_IN_CONTAINER=false test
depends_on:
- clone
- name: lint
image: grafana/loki-build-image:0.14.0
image: grafana/loki-build-image:0.15.0
commands:
- make BUILD_IN_CONTAINER=false lint
depends_on:
- clone
- name: check-generated-files
image: grafana/loki-build-image:0.14.0
image: grafana/loki-build-image:0.15.0
commands:
- make BUILD_IN_CONTAINER=false check-generated-files
depends_on:
- clone
- name: check-mod
image: grafana/loki-build-image:0.14.0
image: grafana/loki-build-image:0.15.0
commands:
- make BUILD_IN_CONTAINER=false check-mod
depends_on:

@ -30,6 +30,9 @@ run:
# no need to include all autogenerated files, we confidently recognize
# autogenerated files. If it's not please let us know.
skip-files:
- .*.pb.go
- .*.y.go
- .*.rl.go
# output configuration options
output:
# colored-line-number|line-number|json|tab|checkstyle, default is "colored-line-number"

@ -6,7 +6,7 @@
.PHONY: push-images push-latest save-images load-images promtail-image loki-image build-image
.PHONY: bigtable-backup, push-bigtable-backup
.PHONY: benchmark-store, drone, check-mod
.PHONY: migrate migrate-image lint-markdown
.PHONY: migrate migrate-image lint-markdown ragel
SHELL = /usr/bin/env bash
@ -38,7 +38,7 @@ DOCKER_IMAGE_DIRS := $(patsubst %/Dockerfile,%,$(DOCKERFILES))
# make BUILD_IN_CONTAINER=false target
# or you can override this with an environment variable
BUILD_IN_CONTAINER ?= true
BUILD_IMAGE_VERSION := 0.14.0
BUILD_IMAGE_VERSION := 0.15.0
# Docker image info
IMAGE_PREFIX ?= grafana
@ -87,6 +87,10 @@ PROTO_GOS := $(patsubst %.proto,%.pb.go,$(PROTO_DEFS))
YACC_DEFS := $(shell find . $(DONT_FIND) -type f -name *.y -print)
YACC_GOS := $(patsubst %.y,%.y.go,$(YACC_DEFS))
# Ragel Files
RAGEL_DEFS := $(shell find . $(DONT_FIND) -type f -name *.rl -print)
RAGEL_GOS := $(patsubst %.rl,%.rl.go,$(RAGEL_DEFS))
# Promtail UI files
PROMTAIL_GENERATED_FILE := clients/pkg/promtail/server/ui/assets_vfsdata.go
PROMTAIL_UI_FILES := $(shell find ./clients/pkg/promtail/server/ui -type f -name assets_vfsdata.go -prune -o -print)
@ -126,8 +130,8 @@ binfmt:
all: promtail logcli loki loki-canary check-generated-files
# This is really a check for the CI to make sure generated files are built and checked in manually
check-generated-files: touch-protobuf-sources yacc protos clients/pkg/promtail/server/ui/assets_vfsdata.go
@if ! (git diff --exit-code $(YACC_GOS) $(PROTO_GOS) $(PROMTAIL_GENERATED_FILE)); then \
check-generated-files: touch-protobuf-sources yacc ragel protos clients/pkg/promtail/server/ui/assets_vfsdata.go
@if ! (git diff --exit-code $(YACC_GOS) $(RAGEL_GOS) $(PROTO_GOS) $(PROMTAIL_GENERATED_FILE)); then \
echo "\nChanges found in generated files"; \
echo "Run 'make check-generated-files' and commit the changes to fix this error."; \
echo "If you are actively developing these files you can ignore this error"; \
@ -147,7 +151,7 @@ touch-protobuf-sources:
# Logcli #
##########
logcli: yacc cmd/logcli/logcli
logcli: yacc ragel cmd/logcli/logcli
logcli-image:
$(SUDO) docker build -t $(IMAGE_PREFIX)/logcli:$(IMAGE_TAG) -f cmd/logcli/Dockerfile .
@ -160,8 +164,8 @@ cmd/logcli/logcli: $(APP_GO_FILES) cmd/logcli/main.go
# Loki #
########
loki: protos yacc cmd/loki/loki
loki-debug: protos yacc cmd/loki/loki-debug
loki: protos yacc ragel cmd/loki/loki
loki-debug: protos yacc ragel cmd/loki/loki-debug
cmd/loki/loki: $(APP_GO_FILES) cmd/loki/main.go
CGO_ENABLED=0 go build $(GO_FLAGS) -o $@ ./$(@D)
@ -175,7 +179,7 @@ cmd/loki/loki-debug: $(APP_GO_FILES) cmd/loki/main.go
# Loki-Canary #
###############
loki-canary: protos yacc cmd/loki-canary/loki-canary
loki-canary: protos yacc ragel cmd/loki-canary/loki-canary
cmd/loki-canary/loki-canary: $(APP_GO_FILES) cmd/loki-canary/main.go
CGO_ENABLED=0 go build $(GO_FLAGS) -o $@ ./$(@D)
@ -206,8 +210,8 @@ PROMTAIL_DEBUG_GO_FLAGS = $(DYN_DEBUG_GO_FLAGS)
endif
endif
promtail: yacc clients/cmd/promtail/promtail
promtail-debug: yacc clients/cmd/promtail/promtail-debug
promtail: yacc ragel clients/cmd/promtail/promtail
promtail-debug: yacc ragel clients/cmd/promtail/promtail-debug
promtail-clean-assets:
rm -rf clients/pkg/promtail/server/ui/assets_vfsdata.go
@ -308,6 +312,25 @@ else
rm ${@}.back
endif
#########
# Ragels #
#########
ragel: $(RAGEL_GOS)
%.rl.go: %.rl
ifeq ($(BUILD_IN_CONTAINER),true)
@mkdir -p $(shell pwd)/.pkg
@mkdir -p $(shell pwd)/.cache
$(SUDO) docker run $(RM) $(TTY) -i \
-v $(shell pwd)/.cache:/go/cache$(MOUNT_FLAGS) \
-v $(shell pwd)/.pkg:/go/pkg$(MOUNT_FLAGS) \
-v $(shell pwd):/src/loki$(MOUNT_FLAGS) \
$(IMAGE_PREFIX)/loki-build-image:$(BUILD_IMAGE_VERSION) $@;
else
ragel -Z $< -o $@
endif
#############
# Protobufs #
#############

@ -154,9 +154,10 @@ In case of errors, for instance if the line is not in the expected format, the l
If an extracted label key name already exists in the original log stream, the extracted label key will be suffixed with the `_extracted` keyword to make the distinction between the two labels. You can forcefully override the original label using a [label formatter expression](#labels-format-expression). However if an extracted key appears twice, only the latest label value will be kept.
We support currently support [json](#json), [logfmt](#logfmt), [regexp](#regexp) and [unpack](#unpack) parsers.
Loki supports [JSON](#json), [logfmt](#logfmt), [pattern](#pattern), [regexp](#regexp) and [unpack](#unpack) parsers.
It's easier to use the predefined parsers like `json` and `logfmt` when you can, falling back to `regexp` when the log lines have unusual structure. Multiple parsers can be used during the same log pipeline which is useful when you want to parse complex logs. ([see examples](#multiple-parsers))
It's easier to use the predefined parsers `json` and `logfmt` when you can. If you can't, the `pattern` and `regexp` parsers can be used for log lines with an unusual structure. The `pattern` parser is easier and faster to write; it also outperforms the `regexp` parser.
Multiple parsers can be used by a single log pipeline. This is useful for parsing complex logs. There are examples in [Multiple parsers](#multiple-parsers).
##### Json
@ -277,6 +278,60 @@ will get those labels extracted:
"status" => "200"
```
##### Pattern
The pattern parser allows the explicit extraction of fields from log lines by defining a pattern expression. The expression matches the structure of a log line.
Consider this NGINX log line.
```log
0.191.12.2 - - [10/Jun/2021:09:14:29 +0000] "GET /api/plugins/versioncheck HTTP/1.1" 200 2 "-" "Go-http-client/2.0" "13.76.247.102, 34.120.177.193" "TLSv1.2" "US" ""
```
This log line can be parsed with the expression
`<ip> - - <_> "<method> <uri> <_>" <status> <size> <_> "<agent>" <_>`
to extract these fields:
```kv
"ip" => "0.191.12.2"
"method" => "GET"
"uri" => "/api/plugins/versioncheck"
"status" => "200"
"size" => "2"
"agent" => "Go-http-client/2.0"
```
A pattern expression is composed of captures and literals.
A capture is a field name delimited by the `<` and `>` characters. `<example>` defines the field name `example`.
An unnamed capture appears as `<_>`. The unnamed capture skips matched content.
Captures are matched from the line beginning or the previous set of literals, to the line end or the next set of literals.
If a capture is not matched, the pattern parser will stop.
Literals can be any sequence of UTF-8 characters, including whitespace characters.
By default, a pattern expression is anchored at the start of the log line. If the expression start with literals, then the log line must also start with the same set of literals. Use `<_>` at the beginning of the expression to anchor the expression at the start.
Consider the log line
```log
level=debug ts=2021-06-10T09:24:13.472094048Z caller=logging.go:66 traceID=0568b66ad2d9294c msg="POST /loki/api/v1/push (204) 16.652862ms"
```
To match `msg="`, use the expression:
```pattern
<_> msg="<method> <path> (<status>) <latency>"
```
A pattern expression is invalid if
- It does not contain any named capture.
- It contains two consecutive captures not separated by whitespace characters.
##### regexp
Unlike the logfmt and json, which extract implicitly all values and takes no parameters, the **regexp** parser takes a single parameter `| regexp "<re>"` which is the regular expression using the [Golang](https://golang.org/) [RE2 syntax](https://github.com/google/re2/wiki/Syntax).

@ -40,7 +40,7 @@ RUN GO111MODULE=on go get github.com/fatih/faillint@v1.5.0
FROM golang:1.16.2-buster
RUN apt-get update && \
apt-get install -qy \
musl gnupg \
musl gnupg ragel \
file zip unzip jq gettext\
protobuf-compiler libprotobuf-dev \
libsystemd-dev && \

@ -330,6 +330,8 @@ func (e *labelParserExpr) Stage() (log.Stage, error) {
return log.NewRegexpParser(e.param)
case OpParserTypeUnpack:
return log.NewUnpackParser(), nil
case OpParserTypePattern:
return log.NewPatternParser(e.param)
default:
return nil, fmt.Errorf("unknown parser operator: %s", e.op)
}
@ -601,10 +603,11 @@ const (
OpTypeLTE = "<="
// parsers
OpParserTypeJSON = "json"
OpParserTypeLogfmt = "logfmt"
OpParserTypeRegexp = "regexp"
OpParserTypeUnpack = "unpack"
OpParserTypeJSON = "json"
OpParserTypeLogfmt = "logfmt"
OpParserTypeRegexp = "regexp"
OpParserTypeUnpack = "unpack"
OpParserTypePattern = "pattern"
OpFmtLine = "line_format"
OpFmtLabel = "label_format"

@ -28,6 +28,7 @@ func Test_logSelectorExpr_String(t *testing.T) {
{`{foo="bar", bar!="baz"} != "bip" !~ ".+bop" | json`, true},
{`{foo="bar"} |= "baz" |~ "blip" != "flip" !~ "flap" | logfmt`, true},
{`{foo="bar"} |= "baz" |~ "blip" != "flip" !~ "flap" | unpack | foo>5`, true},
{`{foo="bar"} |= "baz" |~ "blip" != "flip" !~ "flap" | pattern "<foo> bar <buzz>" | foo>5`, true},
{`{foo="bar"} |= "baz" |~ "blip" != "flip" !~ "flap" | logfmt | b>=10GB`, true},
{`{foo="bar"} |= "baz" |~ "blip" != "flip" !~ "flap" | regexp "(?P<foo>foo|bar)"`, true},
{`{foo="bar"} |= "baz" |~ "blip" != "flip" !~ "flap" | regexp "(?P<foo>foo|bar)" | ( ( foo<5.01 , bar>20ms ) or foo="bar" ) | line_format "blip{{.boop}}bap" | label_format foo=bar,bar="blip{{.blop}}"`, true},
@ -69,6 +70,7 @@ func Test_SampleExpr_String(t *testing.T) {
`sum(count_over_time({job="mysql"} | json [5m] offset 10m))`,
`sum(count_over_time({job="mysql"} | logfmt [5m]))`,
`sum(count_over_time({job="mysql"} | logfmt [5m] offset 10m))`,
`sum(count_over_time({job="mysql"} | pattern "<foo> bar <buzz>" | json [5m]))`,
`sum(count_over_time({job="mysql"} | unpack | json [5m]))`,
`sum(count_over_time({job="mysql"} | regexp "(?P<foo>foo|bar)" [5m]))`,
`sum(count_over_time({job="mysql"} | regexp "(?P<foo>foo|bar)" [5m] offset 10m))`,
@ -358,6 +360,8 @@ func Test_parserExpr_Parser(t *testing.T) {
{"json", OpParserTypeJSON, "", log.NewJSONParser(), false},
{"unpack", OpParserTypeUnpack, "", log.NewUnpackParser(), false},
{"logfmt", OpParserTypeLogfmt, "", log.NewLogfmtParser(), false},
{"pattern", OpParserTypePattern, "<foo> bar <buzz>", mustNewPatternParser("<foo> bar <buzz>"), false},
{"pattern err", OpParserTypePattern, "bar", nil, true},
{"regexp", OpParserTypeRegexp, "(?P<foo>foo)", mustNewRegexParser("(?P<foo>foo)"), false},
{"regexp err ", OpParserTypeRegexp, "foo", nil, true},
}
@ -389,6 +393,14 @@ func mustNewRegexParser(re string) log.Stage {
return r
}
func mustNewPatternParser(p string) log.Stage {
r, err := log.NewPatternParser(p)
if err != nil {
panic(err)
}
return r
}
func Test_canInjectVectorGrouping(t *testing.T) {
tests := []struct {
vecOp string

@ -100,7 +100,7 @@ import (
OPEN_PARENTHESIS CLOSE_PARENTHESIS BY WITHOUT COUNT_OVER_TIME RATE SUM AVG MAX MIN COUNT STDDEV STDVAR BOTTOMK TOPK
BYTES_OVER_TIME BYTES_RATE BOOL JSON REGEXP LOGFMT PIPE LINE_FMT LABEL_FMT UNWRAP AVG_OVER_TIME SUM_OVER_TIME MIN_OVER_TIME
MAX_OVER_TIME STDVAR_OVER_TIME STDDEV_OVER_TIME QUANTILE_OVER_TIME BYTES_CONV DURATION_CONV DURATION_SECONDS_CONV
FIRST_OVER_TIME LAST_OVER_TIME ABSENT_OVER_TIME LABEL_REPLACE UNPACK OFFSET
FIRST_OVER_TIME LAST_OVER_TIME ABSENT_OVER_TIME LABEL_REPLACE UNPACK OFFSET PATTERN
// Operators are listed with increasing precedence.
%left <binOp> OR
@ -246,6 +246,7 @@ labelParser:
| LOGFMT { $$ = newLabelParserExpr(OpParserTypeLogfmt, "") }
| REGEXP STRING { $$ = newLabelParserExpr(OpParserTypeRegexp, $2) }
| UNPACK { $$ = newLabelParserExpr(OpParserTypeUnpack, "") }
| PATTERN STRING { $$ = newLabelParserExpr(OpParserTypePattern, $2) }
;
jsonExpressionParser:

File diff suppressed because it is too large Load Diff

@ -17,6 +17,7 @@ func Test_Extractor(t *testing.T) {
`sum(count_over_time({job="mysql"}[5m]))`,
`sum(count_over_time({job="mysql"} | json [5m]))`,
`sum(count_over_time({job="mysql"} | logfmt [5m]))`,
`sum(count_over_time({job="mysql"} | pattern "<foo> bar <buzz>" [5m]))`,
`sum(count_over_time({job="mysql"} | regexp "(?P<foo>foo|bar)" [5m]))`,
`sum(count_over_time({job="mysql"} | regexp "(?P<foo>foo|bar)" [5m] offset 1h))`,
`topk(10,sum(rate({region="us-east1"}[5m])) by (name))`,

@ -55,10 +55,11 @@ var tokens = map[string]int{
OpTypeLTE: LTE,
// parsers
OpParserTypeJSON: JSON,
OpParserTypeRegexp: REGEXP,
OpParserTypeLogfmt: LOGFMT,
OpParserTypeUnpack: UNPACK,
OpParserTypeJSON: JSON,
OpParserTypeRegexp: REGEXP,
OpParserTypeLogfmt: LOGFMT,
OpParserTypeUnpack: UNPACK,
OpParserTypePattern: PATTERN,
// fmt
OpFmtLabel: LABEL_FMT,

@ -8,6 +8,7 @@ import (
"github.com/grafana/loki/pkg/logql/log/jsonexpr"
"github.com/grafana/loki/pkg/logql/log/logfmt"
"github.com/grafana/loki/pkg/logql/log/pattern"
"github.com/grafana/loki/pkg/logqlmodel"
jsoniter "github.com/json-iterator/go"
@ -212,14 +213,6 @@ func NewRegexpParser(re string) (*RegexpParser, error) {
}, nil
}
func mustNewRegexParser(re string) *RegexpParser {
r, err := NewRegexpParser(re)
if err != nil {
panic(err)
}
return r
}
func (r *RegexpParser) Process(line []byte, lbs *LabelsBuilder) ([]byte, bool) {
for i, value := range r.regex.FindSubmatch(line) {
if name, ok := r.nameIndex[i]; ok {
@ -265,6 +258,49 @@ func (l *LogfmtParser) Process(line []byte, lbs *LabelsBuilder) ([]byte, bool) {
func (l *LogfmtParser) RequiredLabelNames() []string { return []string{} }
type PatternParser struct {
matcher pattern.Matcher
names []string
}
func NewPatternParser(pn string) (*PatternParser, error) {
m, err := pattern.New(pn)
if err != nil {
return nil, err
}
for _, name := range m.Names() {
if !model.LabelName(name).IsValid() {
return nil, fmt.Errorf("invalid capture label name '%s'", name)
}
}
return &PatternParser{
matcher: m,
names: m.Names(),
}, nil
}
func (l *PatternParser) Process(line []byte, lbs *LabelsBuilder) ([]byte, bool) {
if lbs.ParserLabelHints().NoLabels() {
return line, true
}
matches := l.matcher.Matches(line)
names := l.names[:len(matches)]
for i, m := range matches {
name := names[i]
if !lbs.parserKeyHints.ShouldExtract(name) {
continue
}
if lbs.BaseHas(name) {
name = name + duplicateSuffix
}
lbs.Set(name, string(m))
}
return line, true
}
func (l *PatternParser) RequiredLabelNames() []string { return []string{} }
type JSONExpressionParser struct {
expressions map[string][]interface{}
}

@ -408,8 +408,9 @@ func Benchmark_Parser(b *testing.B) {
{"json", jsonLine, NewJSONParser(), []string{"response_latency_seconds"}},
{"unpack", packedLike, NewUnpackParser(), []string{"pod"}},
{"logfmt", logfmtLine, NewLogfmtParser(), []string{"info", "throughput", "org_id"}},
{"regex greedy", nginxline, mustNewRegexParser(`GET (?P<path>.*?)/\?`), []string{"path"}},
{"regex status digits", nginxline, mustNewRegexParser(`HTTP/1.1" (?P<statuscode>\d{3}) `), []string{"statuscode"}},
{"regex greedy", nginxline, mustStage(NewRegexpParser(`GET (?P<path>.*?)/\?`)), []string{"path"}},
{"regex status digits", nginxline, mustStage(NewRegexpParser(`HTTP/1.1" (?P<statuscode>\d{3}) `)), []string{"statuscode"}},
{"pattern", nginxline, mustStage(NewPatternParser(`<_> "<method> <path> <_>"<_>`)), []string{"path"}},
} {
b.Run(tt.name, func(b *testing.B) {
line := []byte(tt.line)
@ -433,6 +434,13 @@ func Benchmark_Parser(b *testing.B) {
}
}
func mustStage(s Stage, err error) Stage {
if err != nil {
panic(err)
}
return s
}
func TestNewRegexpParser(t *testing.T) {
tests := []struct {
name string
@ -460,14 +468,14 @@ func TestNewRegexpParser(t *testing.T) {
func Test_regexpParser_Parse(t *testing.T) {
tests := []struct {
name string
parser *RegexpParser
parser Stage
line []byte
lbs labels.Labels
want labels.Labels
}{
{
"no matches",
mustNewRegexParser("(?P<foo>foo|bar)buzz"),
mustStage(NewRegexpParser("(?P<foo>foo|bar)buzz")),
[]byte("blah"),
labels.Labels{
{Name: "app", Value: "foo"},
@ -478,7 +486,7 @@ func Test_regexpParser_Parse(t *testing.T) {
},
{
"double matches",
mustNewRegexParser("(?P<foo>.*)buzz"),
mustStage(NewRegexpParser("(?P<foo>.*)buzz")),
[]byte("matchebuzz barbuzz"),
labels.Labels{
{Name: "app", Value: "bar"},
@ -490,7 +498,7 @@ func Test_regexpParser_Parse(t *testing.T) {
},
{
"duplicate labels",
mustNewRegexParser("(?P<bar>bar)buzz"),
mustStage(NewRegexpParser("(?P<bar>bar)buzz")),
[]byte("barbuzz"),
labels.Labels{
{Name: "bar", Value: "foo"},
@ -502,7 +510,7 @@ func Test_regexpParser_Parse(t *testing.T) {
},
{
"multiple labels extracted",
mustNewRegexParser("status=(?P<status>\\w+),latency=(?P<latency>\\w+)(ms|ns)"),
mustStage(NewRegexpParser("status=(?P<status>\\w+),latency=(?P<latency>\\w+)(ms|ns)")),
[]byte("status=200,latency=500ms"),
labels.Labels{
{Name: "app", Value: "foo"},
@ -733,3 +741,68 @@ func Test_unpackParser_Parse(t *testing.T) {
})
}
}
func Test_PatternParser(t *testing.T) {
tests := []struct {
pattern string
line []byte
lbs labels.Labels
want labels.Labels
}{
{
`<ip> <userid> <user> [<_>] "<method> <path> <_>" <status> <size>`,
[]byte(`127.0.0.1 user-identifier frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326`),
labels.Labels{
{Name: "foo", Value: "bar"},
},
labels.Labels{
{Name: "foo", Value: "bar"},
{Name: "ip", Value: "127.0.0.1"},
{Name: "userid", Value: "user-identifier"},
{Name: "user", Value: "frank"},
{Name: "method", Value: "GET"},
{Name: "path", Value: "/apache_pb.gif"},
{Name: "status", Value: "200"},
{Name: "size", Value: "2326"},
},
},
{
`<_> msg="<method> <path> (<status>) <duration>"`,
[]byte(`level=debug ts=2021-05-19T07:54:26.864644382Z caller=logging.go:66 traceID=7fbb92fd0eb9c65d msg="POST /loki/api/v1/push (204) 1.238734ms"`),
labels.Labels{
{Name: "method", Value: "bar"},
},
labels.Labels{
{Name: "method", Value: "bar"},
{Name: "method_extracted", Value: "POST"},
{Name: "path", Value: "/loki/api/v1/push"},
{Name: "status", Value: "204"},
{Name: "duration", Value: "1.238734ms"},
},
},
{
`foo <f>"`,
[]byte(`bar`),
labels.Labels{
{Name: "method", Value: "bar"},
},
labels.Labels{
{Name: "method", Value: "bar"},
},
},
}
for _, tt := range tests {
tt := tt
t.Run(tt.pattern, func(t *testing.T) {
t.Parallel()
b := NewBaseLabelsBuilder().ForLabels(tt.lbs, tt.lbs.Hash())
b.Reset()
pp, err := NewPatternParser(tt.pattern)
require.NoError(t, err)
_, _ = pp.Process(tt.line, b)
sort.Sort(tt.want)
require.Equal(t, tt.want, b.Labels())
})
}
}

@ -0,0 +1,83 @@
package pattern
import (
"fmt"
"unicode/utf8"
)
type node interface {
fmt.Stringer
}
type expr []node
func (e expr) hasCapture() bool {
return e.captureCount() != 0
}
func (e expr) validate() error {
if !e.hasCapture() {
return ErrNoCapture
}
// if there is at least 2 node, verify that none are consecutive.
if len(e) >= 2 {
for i := 0; i < len(e); i = i + 2 {
if i+1 >= len(e) {
break
}
if _, ok := e[i].(capture); ok {
if _, ok := e[i+1].(capture); ok {
return fmt.Errorf("found consecutive capture: %w", ErrInvalidExpr)
}
}
}
}
caps := e.captures()
uniq := map[string]struct{}{}
for _, c := range caps {
if _, ok := uniq[c]; ok {
return fmt.Errorf("duplicate capture name (%s): %w", c, ErrInvalidExpr)
}
uniq[c] = struct{}{}
}
return nil
}
func (e expr) captures() (captures []string) {
for _, n := range e {
if c, ok := n.(capture); ok && !c.isUnamed() {
captures = append(captures, c.String())
}
}
return
}
func (e expr) captureCount() (count int) {
return len(e.captures())
}
type capture string
func (c capture) String() string {
return string(c)
}
func (c capture) isUnamed() bool {
return string(c) == underscore
}
type literals []byte
func (l literals) String() string {
return string(l)
}
func runesToLiterals(rs []rune) literals {
res := make([]byte, len(rs)*utf8.UTFMax)
count := 0
for _, r := range rs {
count += utf8.EncodeRune(res[count:], r)
}
res = res[:count]
return res
}

@ -0,0 +1,45 @@
%{
package pattern
%}
%union{
Expr []node
Node node
literal rune
Literals []rune
str string
token int
}
%start root
%type <Expr> expr
%type <Node> node
%type <Literals> literals
%token <str> IDENTIFIER
%token <literal> LITERAL
%token <token> LESS_THAN MORE_THAN UNDERSCORE
%%
root:
expr { exprlex.(*lexer).expr = $1 };
expr:
node { $$ = []node{$1} }
| expr node { $$ = append($1, $2) }
;
node:
IDENTIFIER { $$ = capture($1) }
| literals { $$ = runesToLiterals($1) }
;
literals:
LITERAL { $$ = []rune{$1} }
| literals LITERAL { $$ = append($1, $2) }
%%

@ -0,0 +1,466 @@
// Code generated by goyacc -p expr -o pkg/logql/log/pattern/expr.y.go pkg/logql/log/pattern/expr.y. DO NOT EDIT.
package pattern
import __yyfmt__ "fmt"
type exprSymType struct {
yys int
Expr []node
Node node
literal rune
Literals []rune
str string
token int
}
const IDENTIFIER = 57346
const LITERAL = 57347
const LESS_THAN = 57348
const MORE_THAN = 57349
const UNDERSCORE = 57350
var exprToknames = [...]string{
"$end",
"error",
"$unk",
"IDENTIFIER",
"LITERAL",
"LESS_THAN",
"MORE_THAN",
"UNDERSCORE",
}
var exprStatenames = [...]string{}
const exprEofCode = 1
const exprErrCode = 2
const exprInitialStackSize = 16
var exprExca = [...]int{
-1, 1,
1, -1,
-2, 0,
}
const exprPrivate = 57344
const exprLast = 8
var exprAct = [...]int{
4, 6, 8, 3, 5, 2, 7, 1,
}
var exprPact = [...]int{
-4, -1000, -4, -1000, -1000, -3, -1000, -1000, -1000,
}
var exprPgo = [...]int{
0, 7, 5, 3, 4,
}
var exprR1 = [...]int{
0, 1, 2, 2, 3, 3, 4, 4,
}
var exprR2 = [...]int{
0, 1, 1, 2, 1, 1, 1, 2,
}
var exprChk = [...]int{
-1000, -1, -2, -3, 4, -4, 5, -3, 5,
}
var exprDef = [...]int{
0, -2, 1, 2, 4, 5, 6, 3, 7,
}
var exprTok1 = [...]int{
1,
}
var exprTok2 = [...]int{
2, 3, 4, 5, 6, 7, 8,
}
var exprTok3 = [...]int{
0,
}
var exprErrorMessages = [...]struct {
state int
token int
msg string
}{}
/* parser for yacc output */
var (
exprDebug = 0
exprErrorVerbose = false
)
type exprLexer interface {
Lex(lval *exprSymType) int
Error(s string)
}
type exprParser interface {
Parse(exprLexer) int
Lookahead() int
}
type exprParserImpl struct {
lval exprSymType
stack [exprInitialStackSize]exprSymType
char int
}
func (p *exprParserImpl) Lookahead() int {
return p.char
}
func exprNewParser() exprParser {
return &exprParserImpl{}
}
const exprFlag = -1000
func exprTokname(c int) string {
if c >= 1 && c-1 < len(exprToknames) {
if exprToknames[c-1] != "" {
return exprToknames[c-1]
}
}
return __yyfmt__.Sprintf("tok-%v", c)
}
func exprStatname(s int) string {
if s >= 0 && s < len(exprStatenames) {
if exprStatenames[s] != "" {
return exprStatenames[s]
}
}
return __yyfmt__.Sprintf("state-%v", s)
}
func exprErrorMessage(state, lookAhead int) string {
const TOKSTART = 4
if !exprErrorVerbose {
return "syntax error"
}
for _, e := range exprErrorMessages {
if e.state == state && e.token == lookAhead {
return "syntax error: " + e.msg
}
}
res := "syntax error: unexpected " + exprTokname(lookAhead)
// To match Bison, suggest at most four expected tokens.
expected := make([]int, 0, 4)
// Look for shiftable tokens.
base := exprPact[state]
for tok := TOKSTART; tok-1 < len(exprToknames); tok++ {
if n := base + tok; n >= 0 && n < exprLast && exprChk[exprAct[n]] == tok {
if len(expected) == cap(expected) {
return res
}
expected = append(expected, tok)
}
}
if exprDef[state] == -2 {
i := 0
for exprExca[i] != -1 || exprExca[i+1] != state {
i += 2
}
// Look for tokens that we accept or reduce.
for i += 2; exprExca[i] >= 0; i += 2 {
tok := exprExca[i]
if tok < TOKSTART || exprExca[i+1] == 0 {
continue
}
if len(expected) == cap(expected) {
return res
}
expected = append(expected, tok)
}
// If the default action is to accept or reduce, give up.
if exprExca[i+1] != 0 {
return res
}
}
for i, tok := range expected {
if i == 0 {
res += ", expecting "
} else {
res += " or "
}
res += exprTokname(tok)
}
return res
}
func exprlex1(lex exprLexer, lval *exprSymType) (char, token int) {
token = 0
char = lex.Lex(lval)
if char <= 0 {
token = exprTok1[0]
goto out
}
if char < len(exprTok1) {
token = exprTok1[char]
goto out
}
if char >= exprPrivate {
if char < exprPrivate+len(exprTok2) {
token = exprTok2[char-exprPrivate]
goto out
}
}
for i := 0; i < len(exprTok3); i += 2 {
token = exprTok3[i+0]
if token == char {
token = exprTok3[i+1]
goto out
}
}
out:
if token == 0 {
token = exprTok2[1] /* unknown char */
}
if exprDebug >= 3 {
__yyfmt__.Printf("lex %s(%d)\n", exprTokname(token), uint(char))
}
return char, token
}
func exprParse(exprlex exprLexer) int {
return exprNewParser().Parse(exprlex)
}
func (exprrcvr *exprParserImpl) Parse(exprlex exprLexer) int {
var exprn int
var exprVAL exprSymType
var exprDollar []exprSymType
_ = exprDollar // silence set and not used
exprS := exprrcvr.stack[:]
Nerrs := 0 /* number of errors */
Errflag := 0 /* error recovery flag */
exprstate := 0
exprrcvr.char = -1
exprtoken := -1 // exprrcvr.char translated into internal numbering
defer func() {
// Make sure we report no lookahead when not parsing.
exprstate = -1
exprrcvr.char = -1
exprtoken = -1
}()
exprp := -1
goto exprstack
ret0:
return 0
ret1:
return 1
exprstack:
/* put a state and value onto the stack */
if exprDebug >= 4 {
__yyfmt__.Printf("char %v in %v\n", exprTokname(exprtoken), exprStatname(exprstate))
}
exprp++
if exprp >= len(exprS) {
nyys := make([]exprSymType, len(exprS)*2)
copy(nyys, exprS)
exprS = nyys
}
exprS[exprp] = exprVAL
exprS[exprp].yys = exprstate
exprnewstate:
exprn = exprPact[exprstate]
if exprn <= exprFlag {
goto exprdefault /* simple state */
}
if exprrcvr.char < 0 {
exprrcvr.char, exprtoken = exprlex1(exprlex, &exprrcvr.lval)
}
exprn += exprtoken
if exprn < 0 || exprn >= exprLast {
goto exprdefault
}
exprn = exprAct[exprn]
if exprChk[exprn] == exprtoken { /* valid shift */
exprrcvr.char = -1
exprtoken = -1
exprVAL = exprrcvr.lval
exprstate = exprn
if Errflag > 0 {
Errflag--
}
goto exprstack
}
exprdefault:
/* default state action */
exprn = exprDef[exprstate]
if exprn == -2 {
if exprrcvr.char < 0 {
exprrcvr.char, exprtoken = exprlex1(exprlex, &exprrcvr.lval)
}
/* look through exception table */
xi := 0
for {
if exprExca[xi+0] == -1 && exprExca[xi+1] == exprstate {
break
}
xi += 2
}
for xi += 2; ; xi += 2 {
exprn = exprExca[xi+0]
if exprn < 0 || exprn == exprtoken {
break
}
}
exprn = exprExca[xi+1]
if exprn < 0 {
goto ret0
}
}
if exprn == 0 {
/* error ... attempt to resume parsing */
switch Errflag {
case 0: /* brand new error */
exprlex.Error(exprErrorMessage(exprstate, exprtoken))
Nerrs++
if exprDebug >= 1 {
__yyfmt__.Printf("%s", exprStatname(exprstate))
__yyfmt__.Printf(" saw %s\n", exprTokname(exprtoken))
}
fallthrough
case 1, 2: /* incompletely recovered error ... try again */
Errflag = 3
/* find a state where "error" is a legal shift action */
for exprp >= 0 {
exprn = exprPact[exprS[exprp].yys] + exprErrCode
if exprn >= 0 && exprn < exprLast {
exprstate = exprAct[exprn] /* simulate a shift of "error" */
if exprChk[exprstate] == exprErrCode {
goto exprstack
}
}
/* the current p has no shift on "error", pop stack */
if exprDebug >= 2 {
__yyfmt__.Printf("error recovery pops state %d\n", exprS[exprp].yys)
}
exprp--
}
/* there is no state on the stack with an error shift ... abort */
goto ret1
case 3: /* no shift yet; clobber input char */
if exprDebug >= 2 {
__yyfmt__.Printf("error recovery discards %s\n", exprTokname(exprtoken))
}
if exprtoken == exprEofCode {
goto ret1
}
exprrcvr.char = -1
exprtoken = -1
goto exprnewstate /* try again in the same state */
}
}
/* reduction by production exprn */
if exprDebug >= 2 {
__yyfmt__.Printf("reduce %v in:\n\t%v\n", exprn, exprStatname(exprstate))
}
exprnt := exprn
exprpt := exprp
_ = exprpt // guard against "declared and not used"
exprp -= exprR2[exprn]
// exprp is now the index of $0. Perform the default action. Iff the
// reduced production is ε, $1 is possibly out of range.
if exprp+1 >= len(exprS) {
nyys := make([]exprSymType, len(exprS)*2)
copy(nyys, exprS)
exprS = nyys
}
exprVAL = exprS[exprp+1]
/* consult goto table to find next state */
exprn = exprR1[exprn]
exprg := exprPgo[exprn]
exprj := exprg + exprS[exprp].yys + 1
if exprj >= exprLast {
exprstate = exprAct[exprg]
} else {
exprstate = exprAct[exprj]
if exprChk[exprstate] != -exprn {
exprstate = exprAct[exprg]
}
}
// dummy call; replaced with literal code
switch exprnt {
case 1:
exprDollar = exprS[exprpt-1 : exprpt+1]
{
exprlex.(*lexer).expr = exprDollar[1].Expr
}
case 2:
exprDollar = exprS[exprpt-1 : exprpt+1]
{
exprVAL.Expr = []node{exprDollar[1].Node}
}
case 3:
exprDollar = exprS[exprpt-2 : exprpt+1]
{
exprVAL.Expr = append(exprDollar[1].Expr, exprDollar[2].Node)
}
case 4:
exprDollar = exprS[exprpt-1 : exprpt+1]
{
exprVAL.Node = capture(exprDollar[1].str)
}
case 5:
exprDollar = exprS[exprpt-1 : exprpt+1]
{
exprVAL.Node = runesToLiterals(exprDollar[1].Literals)
}
case 6:
exprDollar = exprS[exprpt-1 : exprpt+1]
{
exprVAL.Literals = []rune{exprDollar[1].literal}
}
case 7:
exprDollar = exprS[exprpt-2 : exprpt+1]
{
exprVAL.Literals = append(exprDollar[1].Literals, exprDollar[2].literal)
}
}
goto exprstack /* stack new state and value */
}

@ -0,0 +1,62 @@
package pattern
type lexer struct {
data []byte
p, pe, cs int
ts, te, act int
lastnewline int
curline int
errs []parseError
expr []node
}
func newLexer() *lexer {
lex := &lexer{}
lex.init()
return lex
}
func (lex *lexer) setData(data []byte) {
lex.data = data
lex.pe = len(data)
lex.lastnewline = -1
lex.curline = 1
}
// Error implements exprLexer interface generated by yacc (yyLexer)
func (lex *lexer) Error(e string) {
lex.errs = append(lex.errs, newParseError(e, lex.curline, lex.curcol()))
}
// curcol calculates the current token's start column based on the last newline position
// returns a 1-indexed value
func (lex *lexer) curcol() int {
return (lex.ts + 1 /* 1-indexed columns */) - (lex.lastnewline + 1 /* next after newline */)
}
func (lex *lexer) handle(token int, err error) int {
if err != nil {
lex.Error(err.Error())
return LEXER_ERROR
}
return token
}
func (lex *lexer) token() string {
return string(lex.data[lex.ts:lex.te])
}
// nolint
func (lex *lexer) identifier(out *exprSymType) (int, error) {
t := lex.token()
out.str = t[1 : len(t)-1]
return IDENTIFIER, nil
}
// nolint
func (lex *lexer) literal(out *exprSymType) (int, error) {
out.literal = rune(lex.data[lex.ts])
return LITERAL, nil
}

@ -0,0 +1,43 @@
package pattern
%%{
machine pattern;
write data;
access lex.;
variable p lex.p;
variable pe lex.pe;
prepush {
if len(lex.stack) <= lex.top {
lex.stack = append(lex.stack, 0)
}
}
}%%
const LEXER_ERROR = 0
%%{
identifier = '<' (alpha| '_') (alnum | '_' )* '>';
literal = any;
}%%
func (lex *lexer) Lex(out *exprSymType) int {
eof := lex.pe
tok := 0
%%{
main := |*
identifier => { tok = lex.handle(lex.identifier(out)); fbreak; };
literal => { tok = lex.handle(lex.literal(out)); fbreak; };
*|;
write exec;
}%%
return tok;
}
func (lex *lexer) init() {
%% write init;
}

@ -0,0 +1,241 @@
//line pkg/logql/log/pattern/lexer.rl:1
package pattern
//line pkg/logql/log/pattern/lexer.rl.go:7
var _pattern_actions []byte = []byte{
0, 1, 0, 1, 1, 1, 2, 1, 3,
1, 4, 1, 5, 1, 6,
}
var _pattern_key_offsets []byte = []byte{
0, 8, 9,
}
var _pattern_trans_keys []byte = []byte{
62, 95, 48, 57, 65, 90, 97, 122,
60, 95, 65, 90, 97, 122,
}
var _pattern_single_lengths []byte = []byte{
2, 1, 1,
}
var _pattern_range_lengths []byte = []byte{
3, 0, 2,
}
var _pattern_index_offsets []byte = []byte{
0, 6, 8,
}
var _pattern_trans_targs []byte = []byte{
1, 0, 0, 0, 0, 1, 2, 1,
0, 0, 0, 1, 1, 1,
}
var _pattern_trans_actions []byte = []byte{
7, 0, 0, 0, 0, 13, 5, 9,
0, 0, 0, 11, 13, 11,
}
var _pattern_to_state_actions []byte = []byte{
0, 1, 0,
}
var _pattern_from_state_actions []byte = []byte{
0, 3, 0,
}
var _pattern_eof_trans []byte = []byte{
13, 0, 14,
}
const pattern_start int = 1
const pattern_first_final int = 1
const pattern_error int = -1
const pattern_en_main int = 1
//line pkg/logql/log/pattern/lexer.rl:14
const LEXER_ERROR = 0
//line pkg/logql/log/pattern/lexer.rl:21
func (lex *lexer) Lex(out *exprSymType) int {
eof := lex.pe
tok := 0
//line pkg/logql/log/pattern/lexer.rl.go:77
{
var _klen int
var _trans int
var _acts int
var _nacts uint
var _keys int
if ( lex.p) == ( lex.pe) {
goto _test_eof
}
_resume:
_acts = int(_pattern_from_state_actions[ lex.cs])
_nacts = uint(_pattern_actions[_acts]); _acts++
for ; _nacts > 0; _nacts-- {
_acts++
switch _pattern_actions[_acts - 1] {
case 1:
//line NONE:1
lex.ts = ( lex.p)
//line pkg/logql/log/pattern/lexer.rl.go:97
}
}
_keys = int(_pattern_key_offsets[ lex.cs])
_trans = int(_pattern_index_offsets[ lex.cs])
_klen = int(_pattern_single_lengths[ lex.cs])
if _klen > 0 {
_lower := int(_keys)
var _mid int
_upper := int(_keys + _klen - 1)
for {
if _upper < _lower {
break
}
_mid = _lower + ((_upper - _lower) >> 1)
switch {
case lex.data[( lex.p)] < _pattern_trans_keys[_mid]:
_upper = _mid - 1
case lex.data[( lex.p)] > _pattern_trans_keys[_mid]:
_lower = _mid + 1
default:
_trans += int(_mid - int(_keys))
goto _match
}
}
_keys += _klen
_trans += _klen
}
_klen = int(_pattern_range_lengths[ lex.cs])
if _klen > 0 {
_lower := int(_keys)
var _mid int
_upper := int(_keys + (_klen << 1) - 2)
for {
if _upper < _lower {
break
}
_mid = _lower + (((_upper - _lower) >> 1) & ^1)
switch {
case lex.data[( lex.p)] < _pattern_trans_keys[_mid]:
_upper = _mid - 2
case lex.data[( lex.p)] > _pattern_trans_keys[_mid + 1]:
_lower = _mid + 2
default:
_trans += int((_mid - int(_keys)) >> 1)
goto _match
}
}
_trans += _klen
}
_match:
_eof_trans:
lex.cs = int(_pattern_trans_targs[_trans])
if _pattern_trans_actions[_trans] == 0 {
goto _again
}
_acts = int(_pattern_trans_actions[_trans])
_nacts = uint(_pattern_actions[_acts]); _acts++
for ; _nacts > 0; _nacts-- {
_acts++
switch _pattern_actions[_acts-1] {
case 2:
//line NONE:1
lex.te = ( lex.p)+1
case 3:
//line pkg/logql/log/pattern/lexer.rl:30
lex.te = ( lex.p)+1
{ tok = lex.handle(lex.identifier(out)); ( lex.p)++; goto _out
}
case 4:
//line pkg/logql/log/pattern/lexer.rl:31
lex.te = ( lex.p)+1
{ tok = lex.handle(lex.literal(out)); ( lex.p)++; goto _out
}
case 5:
//line pkg/logql/log/pattern/lexer.rl:31
lex.te = ( lex.p)
( lex.p)--
{ tok = lex.handle(lex.literal(out)); ( lex.p)++; goto _out
}
case 6:
//line pkg/logql/log/pattern/lexer.rl:31
( lex.p) = ( lex.te) - 1
{ tok = lex.handle(lex.literal(out)); ( lex.p)++; goto _out
}
//line pkg/logql/log/pattern/lexer.rl.go:191
}
}
_again:
_acts = int(_pattern_to_state_actions[ lex.cs])
_nacts = uint(_pattern_actions[_acts]); _acts++
for ; _nacts > 0; _nacts-- {
_acts++
switch _pattern_actions[_acts-1] {
case 0:
//line NONE:1
lex.ts = 0
//line pkg/logql/log/pattern/lexer.rl.go:205
}
}
( lex.p)++
if ( lex.p) != ( lex.pe) {
goto _resume
}
_test_eof: {}
if ( lex.p) == eof {
if _pattern_eof_trans[ lex.cs] > 0 {
_trans = int(_pattern_eof_trans[ lex.cs] - 1)
goto _eof_trans
}
}
_out: {}
}
//line pkg/logql/log/pattern/lexer.rl:35
return tok;
}
func (lex *lexer) init() {
//line pkg/logql/log/pattern/lexer.rl.go:233
{
lex.cs = pattern_start
lex.ts = 0
lex.te = 0
lex.act = 0
}
//line pkg/logql/log/pattern/lexer.rl:43
}

@ -0,0 +1,47 @@
package pattern
import (
"testing"
"github.com/stretchr/testify/assert"
)
func Test_Lex(t *testing.T) {
for _, tc := range []struct {
input string
expected []int
}{
{`_foo`, []int{LITERAL, LITERAL, LITERAL, LITERAL}},
{`<foo`, []int{LITERAL, LITERAL, LITERAL, LITERAL}},
{`<`, []int{LITERAL}},
{`>`, []int{LITERAL}},
{`<_1foo>`, []int{IDENTIFIER}},
{`<_1foo> bar <buzz>`, []int{IDENTIFIER, LITERAL, LITERAL, LITERAL, LITERAL, LITERAL, IDENTIFIER}},
{`<1foo>`, []int{LITERAL, LITERAL, LITERAL, LITERAL, LITERAL, LITERAL}},
} {
tc := tc
t.Run(tc.input, func(t *testing.T) {
actual := []int{}
l := newLexer()
l.setData([]byte(tc.input))
for {
var lval exprSymType
tok := l.Lex(&lval)
if tok == 0 {
break
}
actual = append(actual, tok)
}
assert.Equal(t, toksToStrings(tc.expected), toksToStrings(actual))
assert.Equal(t, tc.expected, actual)
})
}
}
func toksToStrings(toks []int) []string {
strings := make([]string, len(toks))
for i, tok := range toks {
strings[i] = exprToknames[tok-exprPrivate+1]
}
return strings
}

@ -0,0 +1,50 @@
package pattern
import "fmt"
const underscore = "_"
var tokens = map[int]string{
LESS_THAN: "<",
MORE_THAN: ">",
UNDERSCORE: underscore,
}
func init() {
// Improve the error messages coming out of yacc.
exprErrorVerbose = true
for tok, str := range tokens {
exprToknames[tok-exprPrivate+1] = str
}
}
func parseExpr(input string) (expr, error) {
l := newLexer()
l.setData([]byte(input))
e := exprNewParser().Parse(l)
if e != 0 || len(l.errs) > 0 {
return nil, l.errs[0]
}
return l.expr, nil
}
// parseError is what is returned when we failed to parse.
type parseError struct {
msg string
line, col int
}
func (p parseError) Error() string {
if p.col == 0 && p.line == 0 {
return p.msg
}
return fmt.Sprintf("parse error at line %d, col %d: %s", p.line, p.col, p.msg)
}
func newParseError(msg string, line, col int) parseError {
return parseError{
msg: msg,
line: line,
col: col,
}
}

@ -0,0 +1,59 @@
package pattern
import (
"testing"
"github.com/stretchr/testify/require"
)
func Test_Parse(t *testing.T) {
for _, tc := range []struct {
input string
expected expr
err error
}{
{
"<foo> bar f <f>",
expr{capture("foo"), literals(" bar f "), capture("f")},
nil,
},
{
"<foo",
expr{literals("<foo")},
nil,
},
{
"<foo ><bar>",
expr{literals("<foo >"), capture("bar")},
nil,
},
{
"<>",
expr{literals("<>")},
nil,
},
{
"<_>",
expr{capture("_")},
nil,
},
{
"<1_>",
expr{literals("<1_>")},
nil,
},
{
`<ip> - <user> [<_>] "<method> <path> <_>" <status> <size> <url> <user_agent>`,
expr{capture("ip"), literals(" - "), capture("user"), literals(" ["), capture("_"), literals(`] "`), capture("method"), literals(" "), capture("path"), literals(" "), capture('_'), literals(`" `), capture("status"), literals(" "), capture("size"), literals(" "), capture("url"), literals(" "), capture("user_agent")},
nil,
},
} {
tc := tc
actual, err := parseExpr(tc.input)
if tc.err != nil || err != nil {
require.Equal(t, tc.err, err)
return
}
require.Equal(t, tc.expected, actual)
}
}

@ -0,0 +1,95 @@
package pattern
import (
"bytes"
"errors"
)
var (
ErrNoCapture = errors.New("at least one capture is required")
ErrInvalidExpr = errors.New("invalid expression")
)
type Matcher interface {
Matches(in []byte) [][]byte
Names() []string
}
type matcher struct {
e expr
captures [][]byte
names []string
}
func New(in string) (Matcher, error) {
e, err := parseExpr(in)
if err != nil {
return nil, err
}
if err := e.validate(); err != nil {
return nil, err
}
return &matcher{
e: e,
captures: make([][]byte, 0, e.captureCount()),
names: e.captures(),
}, nil
}
// Matches matches the given line with the provided pattern.
// Matches invalidates the previous returned captures array.
func (m *matcher) Matches(in []byte) [][]byte {
if len(in) == 0 {
return nil
}
if len(m.e) == 0 {
return nil
}
captures := m.captures[:0]
expr := m.e
if ls, ok := expr[0].(literals); ok {
i := bytes.Index(in, ls)
if i != 0 {
return nil
}
in = in[len(ls):]
expr = expr[1:]
}
if len(expr) == 0 {
return nil
}
// from now we have capture - literals - capture ... (literals)?
for len(expr) != 0 {
if len(expr) == 1 { // we're ending on a capture.
if !(expr[0].(capture)).isUnamed() {
captures = append(captures, in)
}
return captures
}
cap := expr[0].(capture)
ls := expr[1].(literals)
expr = expr[2:]
i := bytes.Index(in, ls)
if i == -1 {
// if a capture is missed we return up to the end as the capture.
if !cap.isUnamed() {
captures = append(captures, in)
}
return captures
}
if cap.isUnamed() {
in = in[len(ls)+i:]
continue
}
captures = append(captures, in[:i])
in = in[len(ls)+i:]
}
return captures
}
func (m *matcher) Names() []string {
return m.names
}

@ -0,0 +1,162 @@
package pattern
import (
"fmt"
"testing"
"github.com/stretchr/testify/require"
)
var fixtures = []struct {
expr string
in string
expected []string
}{
{
"foo <foo> bar",
"foo buzz bar",
[]string{"buzz"},
},
{
"foo <foo> bar<fuzz>",
"foo buzz bar",
[]string{"buzz", ""},
},
{
"<foo> bar<fuzz>",
" bar",
[]string{"", ""},
},
{
"<path>?<_>",
`/api/plugins/versioncheck?slugIn=snuids-trafficlights-panel,input,gel&grafanaVersion=7.0.0-beta1`,
[]string{"/api/plugins/versioncheck"},
},
{
"<path>?<_>",
`/api/plugins/status`,
[]string{"/api/plugins/status"},
},
{
// Common Log Format
`<ip> <userid> <user> [<_>] "<method> <path> <_>" <status> <size>`,
`127.0.0.1 user-identifier frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326`,
[]string{"127.0.0.1", "user-identifier", "frank", "GET", "/apache_pb.gif", "200", "2326"},
},
{
// Combined Log Format
`<ip> - - [<_>] "<method> <path> <_>" <status> <size> `,
`35.191.8.106 - - [19/May/2021:07:21:49 +0000] "GET /api/plugins/versioncheck?slugIn=snuids-trafficlights-panel,input,gel&grafanaVersion=7.0.0-beta1 HTTP/1.1" 200 107 "-" "Go-http-client/2.0" "80.153.74.144, 34.120.177.193" "TLSv1.3" "DE" "DEBW"`,
[]string{"35.191.8.106", "GET", "/api/plugins/versioncheck?slugIn=snuids-trafficlights-panel,input,gel&grafanaVersion=7.0.0-beta1", "200", "107"},
},
{
// MySQL
`<_> <id> [<level>] [<no>] [<component>] `,
`2020-08-06T14:25:02.835618Z 0 [Note] [MY-012487] [InnoDB] DDL log recovery : begin`,
[]string{"0", "Note", "MY-012487", "InnoDB"},
},
{
// MySQL
`<_> <id> [<level>] `,
`2021-05-19T07:40:12.215792Z 42761518 [Note] Aborted connection 42761518 to db: 'hosted_grafana' user: 'hosted_grafana' host: '10.36.4.122' (Got an error reading communication packets)`,
[]string{"42761518", "Note"},
},
{
// Kubernetes api-server
`<id> <_> <_> <line>] `,
`W0519 07:46:47.647050 1 clientconn.go:1223] grpc: addrConn.createTransport failed to connect to {https://kubernetes-etcd-1.kubernetes-etcd:2379 <nil> 0 <nil>}. Err :connection error: desc = "transport: Error while dialing dial tcp 10.32.85.85:2379: connect: connection refused". Reconnecting...`,
[]string{"W0519", "clientconn.go:1223"},
},
{
// Cassandra
`<level> [<component>]<_> in <duration>.<_>`,
`INFO [Service Thread] 2021-05-19 07:40:12,130 GCInspector.java:284 - ParNew GC in 248ms. CMS Old Gen: 5043436640 -> 5091062064; Par Eden Space: 671088640 -> 0; Par Survivor Space: 70188280 -> 60139760`,
[]string{"INFO", "Service Thread", "248ms"},
},
{
// Cortex & Loki distributor
`<_> msg="<method> <path> (<status>) <duration>"`,
`level=debug ts=2021-05-19T07:54:26.864644382Z caller=logging.go:66 traceID=7fbb92fd0eb9c65d msg="POST /loki/api/v1/push (204) 1.238734ms"`,
[]string{"POST", "/loki/api/v1/push", "204", "1.238734ms"},
},
{
// Etcd
`<_> <_> <level> | <component>: <_> peer <peer_id> <_> tcp <ip>:<_>`,
`2021-05-19 08:16:50.181436 W | rafthttp: health check for peer fd8275e521cfb532 could not connect: dial tcp 10.32.85.85:2380: connect: connection refused`,
[]string{"W", "rafthttp", "fd8275e521cfb532", "10.32.85.85"},
},
{
// Kafka
`<_>] <level> [Log partition=<part>, dir=<dir>] `,
`[2021-05-19 08:35:28,681] INFO [Log partition=p-636-L-fs-117, dir=/data/kafka-logs] Deleting segment 455976081 (kafka.log.Log)`,
[]string{"INFO", "p-636-L-fs-117", "/data/kafka-logs"},
},
{
// Elastic
`<_>][<level>][<component>] [<id>] [<index>]`,
`[2021-05-19T06:54:06,994][INFO ][o.e.c.m.MetaDataMappingService] [1f605d47-8454-4bfb-a67f-49f318bf837a] [usage-stats-2021.05.19/O2Je9IbmR8CqFyUvNpTttA] update_mapping [report]`,
[]string{"INFO ", "o.e.c.m.MetaDataMappingService", "1f605d47-8454-4bfb-a67f-49f318bf837a", "usage-stats-2021.05.19/O2Je9IbmR8CqFyUvNpTttA"},
},
{
// Envoy
`<_> "<method> <path> <_>" <status> <_> <received_bytes> <sent_bytes> <duration> <upstream_time> "<forward_for>" "<agent>" <_> <_> "<upstream>"`,
`[2016-04-15T20:17:00.310Z] "POST /api/v1/locations HTTP/2" 204 - 154 0 226 100 "10.0.35.28" "nsq2http" "cc21d9b0-cf5c-432b-8c7e-98aeb7988cd2" "locations" "tcp://10.0.2.1:80"`,
[]string{"POST", "/api/v1/locations", "204", "154", "0", "226", "100", "10.0.35.28", "nsq2http", "tcp://10.0.2.1:80"},
},
}
func Test_matcher_Matches(t *testing.T) {
for _, tt := range fixtures {
tt := tt
t.Run(tt.expr, func(t *testing.T) {
t.Parallel()
m, err := New(tt.expr)
require.NoError(t, err)
actual := m.Matches([]byte(tt.in))
var actualStrings []string
for _, a := range actual {
actualStrings = append(actualStrings, string(a))
}
require.Equal(t, tt.expected, actualStrings)
})
}
}
var res [][]byte
func Benchmark_matcher_Matches(b *testing.B) {
for _, tt := range fixtures {
b.Run(tt.expr, func(b *testing.B) {
b.ReportAllocs()
m, err := New(tt.expr)
require.NoError(b, err)
b.ResetTimer()
l := []byte(tt.in)
for n := 0; n < b.N; n++ {
res = m.Matches(l)
}
})
}
}
func Test_Error(t *testing.T) {
for _, tt := range []struct {
name string
err error
}{
{"<f>", nil},
{"<f> <a>", nil},
{"", newParseError("syntax error: unexpected $end, expecting IDENTIFIER or LITERAL", 1, 1)},
{"<_>", ErrNoCapture},
{"foo <_> bar <_>", ErrNoCapture},
{"foo bar buzz", ErrNoCapture},
{"<f><f>", fmt.Errorf("found consecutive capture: %w", ErrInvalidExpr)},
{"<f> f<d><b>", fmt.Errorf("found consecutive capture: %w", ErrInvalidExpr)},
{"<f> f<f>", fmt.Errorf("duplicate capture name (f): %w", ErrInvalidExpr)},
} {
t.Run(tt.name, func(t *testing.T) {
_, err := New(tt.name)
require.Equal(t, tt.err, err)
})
}
}

@ -1061,6 +1061,25 @@ func TestParse(t *testing.T) {
},
},
},
{
in: `{app="foo"} |= "bar" | pattern "<foo> bar <buzz>" | (duration > 1s or status!= 200) and method!="POST"`,
exp: &pipelineExpr{
left: newMatcherExpr([]*labels.Matcher{{Type: labels.MatchEqual, Name: "app", Value: "foo"}}),
pipeline: MultiStageExpr{
newLineFilterExpr(nil, labels.MatchEqual, "bar"),
newLabelParserExpr(OpParserTypePattern, "<foo> bar <buzz>"),
&labelFilterExpr{
LabelFilterer: log.NewAndLabelFilter(
log.NewOrLabelFilter(
log.NewDurationLabelFilter(log.LabelFilterGreaterThan, "duration", 1*time.Second),
log.NewNumericLabelFilter(log.LabelFilterNotEqual, "status", 200.0),
),
log.NewStringLabelFilter(mustNewMatcher(labels.MatchNotEqual, "method", "POST")),
),
},
},
},
},
{
in: `{app="foo"} |= "bar" | json | ( status_code < 500 and status_code > 200) or latency >= 250ms `,
exp: &pipelineExpr{
@ -2508,7 +2527,6 @@ func TestParseLogSelectorExpr_equalityMatcher(t *testing.T) {
}
func Test_match(t *testing.T) {
tests := []struct {
name string
input []string
@ -2554,7 +2572,6 @@ func Test_match(t *testing.T) {
} else {
require.Equal(t, tt.want, got)
}
})
}
}

Loading…
Cancel
Save