LogQL: Pattern Parser (#3837)

* The beginning of a fun story. Signed-off-by: Cyril Tovena <cyril.tovena@gmail.com> * Working on adding ragel. Signed-off-by: Cyril Tovena <cyril.tovena@gmail.com> * Adding AST parsing with Yacc and Ragel. Signed-off-by: Cyril Tovena <cyril.tovena@gmail.com> * Got a pattern parser working. Reworking ast to works with bytes and not runes. Signed-off-by: Cyril Tovena <cyril.tovena@gmail.com> * Setup tests and the matches algorithm. Signed-off-by: Cyril Tovena <cyril.tovena@gmail.com> * moar tests case. Signed-off-by: Cyril Tovena <cyril.tovena@gmail.com> * Add some validation for the pattern expression. Signed-off-by: Cyril Tovena <cyril.tovena@gmail.com> * Hooking to LogQL + performance boost. Signed-off-by: Cyril Tovena <cyril.tovena@gmail.com> * Adds documentation Signed-off-by: Cyril Tovena <cyril.tovena@gmail.com> * Improve bound check. Signed-off-by: Cyril Tovena <cyril.tovena@gmail.com> * Removes generated files from being linted. Signed-off-by: Cyril Tovena <cyril.tovena@gmail.com> * Update docs/sources/logql/_index.md Co-authored-by: Danny Kopping <dannykopping@gmail.com> * Update docs/sources/logql/_index.md Co-authored-by: Danny Kopping <dannykopping@gmail.com> * Review feedback Signed-off-by: Cyril Tovena <cyril.tovena@gmail.com> * Update docs/sources/logql/_index.md Co-authored-by: Danny Kopping <dannykopping@gmail.com> * Update docs/sources/logql/_index.md Co-authored-by: Karen Miller <84039272+KMiller-Grafana@users.noreply.github.com> * Update docs/sources/logql/_index.md Co-authored-by: Karen Miller <84039272+KMiller-Grafana@users.noreply.github.com> * Update docs/sources/logql/_index.md Co-authored-by: Karen Miller <84039272+KMiller-Grafana@users.noreply.github.com> * Update docs/sources/logql/_index.md Co-authored-by: Karen Miller <84039272+KMiller-Grafana@users.noreply.github.com> * Update docs/sources/logql/_index.md Co-authored-by: Karen Miller <84039272+KMiller-Grafana@users.noreply.github.com> * Update docs/sources/logql/_index.md Co-authored-by: Karen Miller <84039272+KMiller-Grafana@users.noreply.github.com> * Update docs/sources/logql/_index.md Co-authored-by: Karen Miller <84039272+KMiller-Grafana@users.noreply.github.com> * Update docs/sources/logql/_index.md Co-authored-by: Karen Miller <84039272+KMiller-Grafana@users.noreply.github.com> * Update docs/sources/logql/_index.md Co-authored-by: Karen Miller <84039272+KMiller-Grafana@users.noreply.github.com> * Update docs/sources/logql/_index.md Co-authored-by: Karen Miller <84039272+KMiller-Grafana@users.noreply.github.com> * Docs suggestions Signed-off-by: Cyril Tovena <cyril.tovena@gmail.com> Co-authored-by: Danny Kopping <dannykopping@gmail.com> Co-authored-by: Karen Miller <84039272+KMiller-Grafana@users.noreply.github.com>
5 years ago · 59bb6d3fba
parent 6d026d211d
commit 59bb6d3fba
25 changed files with 1879 additions and 476 deletions
--- a/.drone/drone.yml
+++ b/.drone/drone.yml
@ -12,28 +12,28 @@ workspace:

 steps:
 - name: test
-  image: grafana/loki-build-image:0.14.0
+  image: grafana/loki-build-image:0.15.0
  commands:
  - make BUILD_IN_CONTAINER=false test
  depends_on:
  - clone

 - name: lint
-  image: grafana/loki-build-image:0.14.0
+  image: grafana/loki-build-image:0.15.0
  commands:
  - make BUILD_IN_CONTAINER=false lint
  depends_on:
  - clone

 - name: check-generated-files
-  image: grafana/loki-build-image:0.14.0
+  image: grafana/loki-build-image:0.15.0
  commands:
  - make BUILD_IN_CONTAINER=false check-generated-files
  depends_on:
  - clone

 - name: check-mod
-  image: grafana/loki-build-image:0.14.0
+  image: grafana/loki-build-image:0.15.0
  commands:
  - make BUILD_IN_CONTAINER=false check-mod
  depends_on:
--- a/.golangci.yml
+++ b/.golangci.yml
@ -30,6 +30,9 @@ run:
  # no need to include all autogenerated files, we confidently recognize
  # autogenerated files. If it's not please let us know.
  skip-files:
+  - .*.pb.go
+  - .*.y.go
+  - .*.rl.go
 # output configuration options
 output:
  # colored-line-number|line-number|json|tab|checkstyle, default is "colored-line-number"
--- a/43
+++ b/43
@ -6,7 +6,7 @@
 .PHONY: push-images push-latest save-images load-images promtail-image loki-image build-image
 .PHONY: bigtable-backup, push-bigtable-backup
 .PHONY: benchmark-store, drone, check-mod
-.PHONY: migrate migrate-image lint-markdown
+.PHONY: migrate migrate-image lint-markdown ragel

 SHELL = /usr/bin/env bash

@ -38,7 +38,7 @@ DOCKER_IMAGE_DIRS := $(patsubst %/Dockerfile,%,$(DOCKERFILES))
 # make BUILD_IN_CONTAINER=false target
 # or you can override this with an environment variable
 BUILD_IN_CONTAINER ?= true
-BUILD_IMAGE_VERSION := 0.14.0
+BUILD_IMAGE_VERSION := 0.15.0

 # Docker image info
 IMAGE_PREFIX ?= grafana
@ -87,6 +87,10 @@ PROTO_GOS := $(patsubst %.proto,%.pb.go,$(PROTO_DEFS))
 YACC_DEFS := $(shell find . $(DONT_FIND) -type f -name *.y -print)
 YACC_GOS := $(patsubst %.y,%.y.go,$(YACC_DEFS))

+# Ragel Files
+RAGEL_DEFS := $(shell find . $(DONT_FIND) -type f -name *.rl -print)
+RAGEL_GOS := $(patsubst %.rl,%.rl.go,$(RAGEL_DEFS))
+
 # Promtail UI files
 PROMTAIL_GENERATED_FILE := clients/pkg/promtail/server/ui/assets_vfsdata.go
 PROMTAIL_UI_FILES := $(shell find ./clients/pkg/promtail/server/ui -type f -name assets_vfsdata.go -prune -o -print)
@ -126,8 +130,8 @@ binfmt:
 all: promtail logcli loki loki-canary check-generated-files

 # This is really a check for the CI to make sure generated files are built and checked in manually
-check-generated-files: touch-protobuf-sources yacc protos clients/pkg/promtail/server/ui/assets_vfsdata.go
-	@if ! (git diff --exit-code $(YACC_GOS) $(PROTO_GOS) $(PROMTAIL_GENERATED_FILE)); then \
+check-generated-files: touch-protobuf-sources yacc ragel protos clients/pkg/promtail/server/ui/assets_vfsdata.go
+	@if ! (git diff --exit-code $(YACC_GOS) $(RAGEL_GOS) $(PROTO_GOS) $(PROMTAIL_GENERATED_FILE)); then \
 		echo "\nChanges found in generated files"; \
 		echo "Run 'make check-generated-files' and commit the changes to fix this error."; \
 		echo "If you are actively developing these files you can ignore this error"; \
@ -147,7 +151,7 @@ touch-protobuf-sources:
 # Logcli #
 ##########

-logcli: yacc cmd/logcli/logcli
+logcli: yacc ragel cmd/logcli/logcli

 logcli-image:
 	$(SUDO) docker build -t $(IMAGE_PREFIX)/logcli:$(IMAGE_TAG) -f cmd/logcli/Dockerfile .
@ -160,8 +164,8 @@ cmd/logcli/logcli: $(APP_GO_FILES) cmd/logcli/main.go
 # Loki #
 ########

-loki: protos yacc cmd/loki/loki
-loki-debug: protos yacc cmd/loki/loki-debug
+loki: protos yacc ragel cmd/loki/loki
+loki-debug: protos yacc ragel cmd/loki/loki-debug

 cmd/loki/loki: $(APP_GO_FILES) cmd/loki/main.go
 	CGO_ENABLED=0 go build $(GO_FLAGS) -o $@ ./$(@D)
@ -175,7 +179,7 @@ cmd/loki/loki-debug: $(APP_GO_FILES) cmd/loki/main.go
 # Loki-Canary #
 ###############

-loki-canary: protos yacc cmd/loki-canary/loki-canary
+loki-canary: protos yacc ragel cmd/loki-canary/loki-canary

 cmd/loki-canary/loki-canary: $(APP_GO_FILES) cmd/loki-canary/main.go
 	CGO_ENABLED=0 go build $(GO_FLAGS) -o $@ ./$(@D)
@ -206,8 +210,8 @@ PROMTAIL_DEBUG_GO_FLAGS = $(DYN_DEBUG_GO_FLAGS)
 endif
 endif

-promtail: yacc clients/cmd/promtail/promtail
-promtail-debug: yacc clients/cmd/promtail/promtail-debug
+promtail: yacc ragel clients/cmd/promtail/promtail
+promtail-debug: yacc ragel clients/cmd/promtail/promtail-debug

 promtail-clean-assets:
 	rm -rf clients/pkg/promtail/server/ui/assets_vfsdata.go
@ -308,6 +312,25 @@ else
 	rm ${@}.back
 endif

+#########
+# Ragels #
+#########
+
+ragel: $(RAGEL_GOS)
+
+%.rl.go: %.rl
+ifeq ($(BUILD_IN_CONTAINER),true)
+	@mkdir -p $(shell pwd)/.pkg
+	@mkdir -p $(shell pwd)/.cache
+	$(SUDO) docker run $(RM) $(TTY) -i \
+		-v $(shell pwd)/.cache:/go/cache$(MOUNT_FLAGS) \
+		-v $(shell pwd)/.pkg:/go/pkg$(MOUNT_FLAGS) \
+		-v $(shell pwd):/src/loki$(MOUNT_FLAGS) \
+		$(IMAGE_PREFIX)/loki-build-image:$(BUILD_IMAGE_VERSION) $@;
+else
+	ragel -Z $< -o $@
+endif
+
 #############
 # Protobufs #
 #############
--- a/docs/sources/logql/_index.md
+++ b/docs/sources/logql/_index.md
@ -154,9 +154,10 @@ In case of errors, for instance if the line is not in the expected format, the l

 If an extracted label key name already exists in the original log stream, the extracted label key will be suffixed with the `_extracted` keyword to make the distinction between the two labels. You can forcefully override the original label using a [label formatter expression](#labels-format-expression). However if an extracted key appears twice, only the latest label value will be kept.

-We support currently support [json](#json), [logfmt](#logfmt), [regexp](#regexp) and [unpack](#unpack) parsers.
+Loki supports  [JSON](#json), [logfmt](#logfmt), [pattern](#pattern), [regexp](#regexp) and [unpack](#unpack) parsers.

-It's easier to use the predefined parsers like `json` and `logfmt` when you can, falling back to `regexp` when the log lines have unusual structure. Multiple parsers can be used during the same log pipeline which is useful when you want to parse complex logs. ([see examples](#multiple-parsers))
+It's easier to use the predefined parsers `json` and `logfmt` when you can. If you can't, the `pattern` and `regexp` parsers can be used for log lines with an unusual structure. The `pattern` parser is easier and faster to write; it also outperforms the `regexp` parser.
+Multiple parsers can be used by a single log pipeline. This is useful for parsing complex logs. There are examples in [Multiple parsers](#multiple-parsers).

 ##### Json

@ -277,6 +278,60 @@ will get those labels extracted:
 "status" => "200"
 ```

+##### Pattern
+
+The pattern parser allows the explicit extraction of fields from log lines by defining a pattern expression. The expression matches the structure of a log line.
+
+Consider this NGINX log line.
+
+```log
+0.191.12.2 - - [10/Jun/2021:09:14:29 +0000] "GET /api/plugins/versioncheck HTTP/1.1" 200 2 "-" "Go-http-client/2.0" "13.76.247.102, 34.120.177.193" "TLSv1.2" "US" ""
+```
+
+This log line can be parsed with the expression
+
+`<ip> - - <_> "<method> <uri> <_>" <status> <size> <_> "<agent>" <_>`
+
+to extract these fields:
+
+```kv
+"ip" => "0.191.12.2"
+"method" => "GET"
+"uri" => "/api/plugins/versioncheck"
+"status" => "200"
+"size" => "2"
+"agent" => "Go-http-client/2.0"
+```
+
+A pattern expression is composed of captures and literals.
+
+A capture is a field name delimited by the `<` and `>` characters. `<example>` defines the field name `example`.
+An unnamed capture appears as `<_>`. The unnamed capture skips matched content.
+
+Captures are matched from the line beginning or the previous set of literals, to the line end or the next set of literals.
+If a capture is not matched, the pattern parser will stop.
+
+Literals can be any sequence of UTF-8 characters, including whitespace characters.
+
+By default, a pattern expression is anchored at the start of the log line. If the expression start with literals, then the log line must also start with the same set of literals. Use `<_>` at the beginning of the expression to anchor the expression at the start.
+
+Consider the log line
+
+```log
+level=debug ts=2021-06-10T09:24:13.472094048Z caller=logging.go:66 traceID=0568b66ad2d9294c msg="POST /loki/api/v1/push (204) 16.652862ms"
+```
+
+To match `msg="`, use the expression:
+
+```pattern
+<_> msg="<method> <path> (<status>) <latency>"
+```
+
+A pattern expression is invalid if
+
+- It does not contain any named capture.
+- It contains two consecutive captures not separated by whitespace characters.
+
 ##### regexp

 Unlike the logfmt and json, which extract implicitly all values and takes no parameters, the **regexp** parser takes a single parameter `| regexp "<re>"` which is the regular expression using the [Golang](https://golang.org/) [RE2 syntax](https://github.com/google/re2/wiki/Syntax).
--- a/loki-build-image/Dockerfile
+++ b/loki-build-image/Dockerfile
@ -40,7 +40,7 @@ RUN GO111MODULE=on go get github.com/fatih/faillint@v1.5.0
 FROM golang:1.16.2-buster
 RUN apt-get update && \
    apt-get install -qy \
-    musl gnupg \
+    musl gnupg ragel \
    file zip unzip jq gettext\
    protobuf-compiler libprotobuf-dev \
    libsystemd-dev && \
--- a/pkg/logql/ast.go
+++ b/pkg/logql/ast.go
@ -330,6 +330,8 @@ func (e *labelParserExpr) Stage() (log.Stage, error) {
 		return log.NewRegexpParser(e.param)
 	case OpParserTypeUnpack:
 		return log.NewUnpackParser(), nil
+	case OpParserTypePattern:
+		return log.NewPatternParser(e.param)
 	default:
 		return nil, fmt.Errorf("unknown parser operator: %s", e.op)
 	}
@ -601,10 +603,11 @@ const (
 	OpTypeLTE   = "<="

 	// parsers
-	OpParserTypeJSON   = "json"
-	OpParserTypeLogfmt = "logfmt"
-	OpParserTypeRegexp = "regexp"
-	OpParserTypeUnpack = "unpack"
+	OpParserTypeJSON    = "json"
+	OpParserTypeLogfmt  = "logfmt"
+	OpParserTypeRegexp  = "regexp"
+	OpParserTypeUnpack  = "unpack"
+	OpParserTypePattern = "pattern"

 	OpFmtLine  = "line_format"
 	OpFmtLabel = "label_format"
--- a/pkg/logql/ast_test.go
+++ b/pkg/logql/ast_test.go
@ -28,6 +28,7 @@ func Test_logSelectorExpr_String(t *testing.T) {
 		{`{foo="bar", bar!="baz"} != "bip" !~ ".+bop" | json`, true},
 		{`{foo="bar"} |= "baz" |~ "blip" != "flip" !~ "flap" | logfmt`, true},
 		{`{foo="bar"} |= "baz" |~ "blip" != "flip" !~ "flap" | unpack | foo>5`, true},
+		{`{foo="bar"} |= "baz" |~ "blip" != "flip" !~ "flap" | pattern "<foo> bar <buzz>" | foo>5`, true},
 		{`{foo="bar"} |= "baz" |~ "blip" != "flip" !~ "flap" | logfmt | b>=10GB`, true},
 		{`{foo="bar"} |= "baz" |~ "blip" != "flip" !~ "flap" | regexp "(?P<foo>foo|bar)"`, true},
 		{`{foo="bar"} |= "baz" |~ "blip" != "flip" !~ "flap" | regexp "(?P<foo>foo|bar)" | ( ( foo<5.01 , bar>20ms ) or foo="bar" ) | line_format "blip{{.boop}}bap" | label_format foo=bar,bar="blip{{.blop}}"`, true},
@ -69,6 +70,7 @@ func Test_SampleExpr_String(t *testing.T) {
 		`sum(count_over_time({job="mysql"} | json [5m] offset 10m))`,
 		`sum(count_over_time({job="mysql"} | logfmt [5m]))`,
 		`sum(count_over_time({job="mysql"} | logfmt [5m] offset 10m))`,
+		`sum(count_over_time({job="mysql"} | pattern "<foo> bar <buzz>" | json [5m]))`,
 		`sum(count_over_time({job="mysql"} | unpack | json [5m]))`,
 		`sum(count_over_time({job="mysql"} | regexp "(?P<foo>foo|bar)" [5m]))`,
 		`sum(count_over_time({job="mysql"} | regexp "(?P<foo>foo|bar)" [5m] offset 10m))`,
@ -358,6 +360,8 @@ func Test_parserExpr_Parser(t *testing.T) {
 		{"json", OpParserTypeJSON, "", log.NewJSONParser(), false},
 		{"unpack", OpParserTypeUnpack, "", log.NewUnpackParser(), false},
 		{"logfmt", OpParserTypeLogfmt, "", log.NewLogfmtParser(), false},
+		{"pattern", OpParserTypePattern, "<foo> bar <buzz>", mustNewPatternParser("<foo> bar <buzz>"), false},
+		{"pattern err", OpParserTypePattern, "bar", nil, true},
 		{"regexp", OpParserTypeRegexp, "(?P<foo>foo)", mustNewRegexParser("(?P<foo>foo)"), false},
 		{"regexp err ", OpParserTypeRegexp, "foo", nil, true},
 	}
@ -389,6 +393,14 @@ func mustNewRegexParser(re string) log.Stage {
 	return r
 }

+func mustNewPatternParser(p string) log.Stage {
+	r, err := log.NewPatternParser(p)
+	if err != nil {
+		panic(err)
+	}
+	return r
+}
+
 func Test_canInjectVectorGrouping(t *testing.T) {
 	tests := []struct {
 		vecOp   string
--- a/pkg/logql/expr.y
+++ b/pkg/logql/expr.y
@ -100,7 +100,7 @@ import (
                  OPEN_PARENTHESIS CLOSE_PARENTHESIS BY WITHOUT COUNT_OVER_TIME RATE SUM AVG MAX MIN COUNT STDDEV STDVAR BOTTOMK TOPK
                  BYTES_OVER_TIME BYTES_RATE BOOL JSON REGEXP LOGFMT PIPE LINE_FMT LABEL_FMT UNWRAP AVG_OVER_TIME SUM_OVER_TIME MIN_OVER_TIME
                  MAX_OVER_TIME STDVAR_OVER_TIME STDDEV_OVER_TIME QUANTILE_OVER_TIME BYTES_CONV DURATION_CONV DURATION_SECONDS_CONV
-                  FIRST_OVER_TIME LAST_OVER_TIME ABSENT_OVER_TIME LABEL_REPLACE UNPACK OFFSET
+                  FIRST_OVER_TIME LAST_OVER_TIME ABSENT_OVER_TIME LABEL_REPLACE UNPACK OFFSET PATTERN

 // Operators are listed with increasing precedence.
 %left <binOp> OR
@ -246,6 +246,7 @@ labelParser:
  | LOGFMT         { $$ = newLabelParserExpr(OpParserTypeLogfmt, "") }
  | REGEXP STRING  { $$ = newLabelParserExpr(OpParserTypeRegexp, $2) }
  | UNPACK         { $$ = newLabelParserExpr(OpParserTypeUnpack, "") }
+  | PATTERN STRING { $$ = newLabelParserExpr(OpParserTypePattern, $2) }
  ;

 jsonExpressionParser:
--- a/pkg/logql/expr.y.go
+++ b/pkg/logql/expr.y.go
--- a/pkg/logql/functions_test.go
+++ b/pkg/logql/functions_test.go
@ -17,6 +17,7 @@ func Test_Extractor(t *testing.T) {
 		`sum(count_over_time({job="mysql"}[5m]))`,
 		`sum(count_over_time({job="mysql"} | json [5m]))`,
 		`sum(count_over_time({job="mysql"} | logfmt [5m]))`,
+		`sum(count_over_time({job="mysql"} | pattern "<foo> bar <buzz>" [5m]))`,
 		`sum(count_over_time({job="mysql"} | regexp "(?P<foo>foo|bar)" [5m]))`,
 		`sum(count_over_time({job="mysql"} | regexp "(?P<foo>foo|bar)" [5m] offset 1h))`,
 		`topk(10,sum(rate({region="us-east1"}[5m])) by (name))`,
--- a/pkg/logql/lex.go
+++ b/pkg/logql/lex.go
@ -55,10 +55,11 @@ var tokens = map[string]int{
 	OpTypeLTE:   LTE,

 	// parsers
-	OpParserTypeJSON:   JSON,
-	OpParserTypeRegexp: REGEXP,
-	OpParserTypeLogfmt: LOGFMT,
-	OpParserTypeUnpack: UNPACK,
+	OpParserTypeJSON:    JSON,
+	OpParserTypeRegexp:  REGEXP,
+	OpParserTypeLogfmt:  LOGFMT,
+	OpParserTypeUnpack:  UNPACK,
+	OpParserTypePattern: PATTERN,

 	// fmt
 	OpFmtLabel: LABEL_FMT,
--- a/pkg/logql/log/parser.go
+++ b/pkg/logql/log/parser.go
@ -8,6 +8,7 @@ import (

 	"github.com/grafana/loki/pkg/logql/log/jsonexpr"
 	"github.com/grafana/loki/pkg/logql/log/logfmt"
+	"github.com/grafana/loki/pkg/logql/log/pattern"
 	"github.com/grafana/loki/pkg/logqlmodel"

 	jsoniter "github.com/json-iterator/go"
@ -212,14 +213,6 @@ func NewRegexpParser(re string) (*RegexpParser, error) {
 	}, nil
 }

-func mustNewRegexParser(re string) *RegexpParser {
-	r, err := NewRegexpParser(re)
-	if err != nil {
-		panic(err)
-	}
-	return r
-}
-
 func (r *RegexpParser) Process(line []byte, lbs *LabelsBuilder) ([]byte, bool) {
 	for i, value := range r.regex.FindSubmatch(line) {
 		if name, ok := r.nameIndex[i]; ok {
@ -265,6 +258,49 @@ func (l *LogfmtParser) Process(line []byte, lbs *LabelsBuilder) ([]byte, bool) {

 func (l *LogfmtParser) RequiredLabelNames() []string { return []string{} }

+type PatternParser struct {
+	matcher pattern.Matcher
+	names   []string
+}
+
+func NewPatternParser(pn string) (*PatternParser, error) {
+	m, err := pattern.New(pn)
+	if err != nil {
+		return nil, err
+	}
+	for _, name := range m.Names() {
+		if !model.LabelName(name).IsValid() {
+			return nil, fmt.Errorf("invalid capture label name '%s'", name)
+		}
+	}
+	return &PatternParser{
+		matcher: m,
+		names:   m.Names(),
+	}, nil
+}
+
+func (l *PatternParser) Process(line []byte, lbs *LabelsBuilder) ([]byte, bool) {
+	if lbs.ParserLabelHints().NoLabels() {
+		return line, true
+	}
+	matches := l.matcher.Matches(line)
+	names := l.names[:len(matches)]
+	for i, m := range matches {
+		name := names[i]
+		if !lbs.parserKeyHints.ShouldExtract(name) {
+			continue
+		}
+		if lbs.BaseHas(name) {
+			name = name + duplicateSuffix
+		}
+
+		lbs.Set(name, string(m))
+	}
+	return line, true
+}
+
+func (l *PatternParser) RequiredLabelNames() []string { return []string{} }
+
 type JSONExpressionParser struct {
 	expressions map[string][]interface{}
 }
--- a/pkg/logql/log/parser_test.go
+++ b/pkg/logql/log/parser_test.go
@ -408,8 +408,9 @@ func Benchmark_Parser(b *testing.B) {
 		{"json", jsonLine, NewJSONParser(), []string{"response_latency_seconds"}},
 		{"unpack", packedLike, NewUnpackParser(), []string{"pod"}},
 		{"logfmt", logfmtLine, NewLogfmtParser(), []string{"info", "throughput", "org_id"}},
-		{"regex greedy", nginxline, mustNewRegexParser(`GET (?P<path>.*?)/\?`), []string{"path"}},
-		{"regex status digits", nginxline, mustNewRegexParser(`HTTP/1.1" (?P<statuscode>\d{3}) `), []string{"statuscode"}},
+		{"regex greedy", nginxline, mustStage(NewRegexpParser(`GET (?P<path>.*?)/\?`)), []string{"path"}},
+		{"regex status digits", nginxline, mustStage(NewRegexpParser(`HTTP/1.1" (?P<statuscode>\d{3}) `)), []string{"statuscode"}},
+		{"pattern", nginxline, mustStage(NewPatternParser(`<_> "<method> <path> <_>"<_>`)), []string{"path"}},
 	} {
 		b.Run(tt.name, func(b *testing.B) {
 			line := []byte(tt.line)
@ -433,6 +434,13 @@ func Benchmark_Parser(b *testing.B) {
 	}
 }

+func mustStage(s Stage, err error) Stage {
+	if err != nil {
+		panic(err)
+	}
+	return s
+}
+
 func TestNewRegexpParser(t *testing.T) {
 	tests := []struct {
 		name    string
@ -460,14 +468,14 @@ func TestNewRegexpParser(t *testing.T) {
 func Test_regexpParser_Parse(t *testing.T) {
 	tests := []struct {
 		name   string
-		parser *RegexpParser
+		parser Stage
 		line   []byte
 		lbs    labels.Labels
 		want   labels.Labels
 	}{
 		{
 			"no matches",
-			mustNewRegexParser("(?P<foo>foo|bar)buzz"),
+			mustStage(NewRegexpParser("(?P<foo>foo|bar)buzz")),
 			[]byte("blah"),
 			labels.Labels{
 				{Name: "app", Value: "foo"},
@ -478,7 +486,7 @@ func Test_regexpParser_Parse(t *testing.T) {
 		},
 		{
 			"double matches",
-			mustNewRegexParser("(?P<foo>.*)buzz"),
+			mustStage(NewRegexpParser("(?P<foo>.*)buzz")),
 			[]byte("matchebuzz barbuzz"),
 			labels.Labels{
 				{Name: "app", Value: "bar"},
@ -490,7 +498,7 @@ func Test_regexpParser_Parse(t *testing.T) {
 		},
 		{
 			"duplicate labels",
-			mustNewRegexParser("(?P<bar>bar)buzz"),
+			mustStage(NewRegexpParser("(?P<bar>bar)buzz")),
 			[]byte("barbuzz"),
 			labels.Labels{
 				{Name: "bar", Value: "foo"},
@ -502,7 +510,7 @@ func Test_regexpParser_Parse(t *testing.T) {
 		},
 		{
 			"multiple labels extracted",
-			mustNewRegexParser("status=(?P<status>\\w+),latency=(?P<latency>\\w+)(ms|ns)"),
+			mustStage(NewRegexpParser("status=(?P<status>\\w+),latency=(?P<latency>\\w+)(ms|ns)")),
 			[]byte("status=200,latency=500ms"),
 			labels.Labels{
 				{Name: "app", Value: "foo"},
@ -733,3 +741,68 @@ func Test_unpackParser_Parse(t *testing.T) {
 		})
 	}
 }
+
+func Test_PatternParser(t *testing.T) {
+	tests := []struct {
+		pattern string
+		line    []byte
+		lbs     labels.Labels
+		want    labels.Labels
+	}{
+		{
+			`<ip> <userid> <user> [<_>] "<method> <path> <_>" <status> <size>`,
+			[]byte(`127.0.0.1 user-identifier frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326`),
+			labels.Labels{
+				{Name: "foo", Value: "bar"},
+			},
+			labels.Labels{
+				{Name: "foo", Value: "bar"},
+				{Name: "ip", Value: "127.0.0.1"},
+				{Name: "userid", Value: "user-identifier"},
+				{Name: "user", Value: "frank"},
+				{Name: "method", Value: "GET"},
+				{Name: "path", Value: "/apache_pb.gif"},
+				{Name: "status", Value: "200"},
+				{Name: "size", Value: "2326"},
+			},
+		},
+		{
+			`<_> msg="<method> <path> (<status>) <duration>"`,
+			[]byte(`level=debug ts=2021-05-19T07:54:26.864644382Z caller=logging.go:66 traceID=7fbb92fd0eb9c65d msg="POST /loki/api/v1/push (204) 1.238734ms"`),
+			labels.Labels{
+				{Name: "method", Value: "bar"},
+			},
+			labels.Labels{
+				{Name: "method", Value: "bar"},
+				{Name: "method_extracted", Value: "POST"},
+				{Name: "path", Value: "/loki/api/v1/push"},
+				{Name: "status", Value: "204"},
+				{Name: "duration", Value: "1.238734ms"},
+			},
+		},
+		{
+			`foo <f>"`,
+			[]byte(`bar`),
+			labels.Labels{
+				{Name: "method", Value: "bar"},
+			},
+			labels.Labels{
+				{Name: "method", Value: "bar"},
+			},
+		},
+	}
+
+	for _, tt := range tests {
+		tt := tt
+		t.Run(tt.pattern, func(t *testing.T) {
+			t.Parallel()
+			b := NewBaseLabelsBuilder().ForLabels(tt.lbs, tt.lbs.Hash())
+			b.Reset()
+			pp, err := NewPatternParser(tt.pattern)
+			require.NoError(t, err)
+			_, _ = pp.Process(tt.line, b)
+			sort.Sort(tt.want)
+			require.Equal(t, tt.want, b.Labels())
+		})
+	}
+}
--- a/pkg/logql/log/pattern/ast.go
+++ b/pkg/logql/log/pattern/ast.go
@ -0,0 +1,83 @@
+package pattern
+
+import (
+	"fmt"
+	"unicode/utf8"
+)
+
+type node interface {
+	fmt.Stringer
+}
+
+type expr []node
+
+func (e expr) hasCapture() bool {
+	return e.captureCount() != 0
+}
+
+func (e expr) validate() error {
+	if !e.hasCapture() {
+		return ErrNoCapture
+	}
+	// if there is at least 2 node, verify that none are consecutive.
+	if len(e) >= 2 {
+		for i := 0; i < len(e); i = i + 2 {
+			if i+1 >= len(e) {
+				break
+			}
+			if _, ok := e[i].(capture); ok {
+				if _, ok := e[i+1].(capture); ok {
+					return fmt.Errorf("found consecutive capture: %w", ErrInvalidExpr)
+				}
+			}
+		}
+	}
+	caps := e.captures()
+	uniq := map[string]struct{}{}
+	for _, c := range caps {
+		if _, ok := uniq[c]; ok {
+			return fmt.Errorf("duplicate capture name (%s): %w", c, ErrInvalidExpr)
+		}
+		uniq[c] = struct{}{}
+	}
+	return nil
+}
+
+func (e expr) captures() (captures []string) {
+	for _, n := range e {
+		if c, ok := n.(capture); ok && !c.isUnamed() {
+			captures = append(captures, c.String())
+		}
+	}
+	return
+}
+
+func (e expr) captureCount() (count int) {
+	return len(e.captures())
+}
+
+type capture string
+
+func (c capture) String() string {
+	return string(c)
+}
+
+func (c capture) isUnamed() bool {
+	return string(c) == underscore
+}
+
+type literals []byte
+
+func (l literals) String() string {
+	return string(l)
+}
+
+func runesToLiterals(rs []rune) literals {
+	res := make([]byte, len(rs)*utf8.UTFMax)
+	count := 0
+	for _, r := range rs {
+		count += utf8.EncodeRune(res[count:], r)
+	}
+	res = res[:count]
+	return res
+}
--- a/pkg/logql/log/pattern/expr.y
+++ b/pkg/logql/log/pattern/expr.y
@ -0,0 +1,45 @@
+%{
+
+package pattern
+
+%}
+
+%union{
+  Expr                 []node
+  Node                 node
+
+  literal              rune
+  Literals             []rune
+  str                  string
+  token                int
+}
+
+%start root
+
+%type <Expr>             expr
+%type <Node>             node
+%type <Literals>         literals
+
+%token <str>              IDENTIFIER
+%token <literal>          LITERAL
+%token <token>            LESS_THAN MORE_THAN UNDERSCORE
+
+%%
+
+root:
+    expr { exprlex.(*lexer).expr = $1 };
+
+expr:
+    node { $$ = []node{$1} }
+    | expr node { $$ = append($1, $2) }
+    ;
+
+node:
+     IDENTIFIER  { $$ = capture($1) }
+    | literals  { $$ = runesToLiterals($1) }
+    ;
+
+literals:
+    LITERAL { $$ = []rune{$1} }
+    | literals LITERAL { $$ = append($1, $2) }
+%%
--- a/pkg/logql/log/pattern/expr.y.go
+++ b/pkg/logql/log/pattern/expr.y.go
@ -0,0 +1,466 @@
+// Code generated by goyacc -p expr -o pkg/logql/log/pattern/expr.y.go pkg/logql/log/pattern/expr.y. DO NOT EDIT.
+
+
+package pattern
+
+import __yyfmt__ "fmt"
+
+
+type exprSymType struct {
+	yys  int
+	Expr []node
+	Node node
+
+	literal  rune
+	Literals []rune
+	str      string
+	token    int
+}
+
+const IDENTIFIER = 57346
+const LITERAL = 57347
+const LESS_THAN = 57348
+const MORE_THAN = 57349
+const UNDERSCORE = 57350
+
+var exprToknames = [...]string{
+	"$end",
+	"error",
+	"$unk",
+	"IDENTIFIER",
+	"LITERAL",
+	"LESS_THAN",
+	"MORE_THAN",
+	"UNDERSCORE",
+}
+var exprStatenames = [...]string{}
+
+const exprEofCode = 1
+const exprErrCode = 2
+const exprInitialStackSize = 16
+
+
+var exprExca = [...]int{
+	-1, 1,
+	1, -1,
+	-2, 0,
+}
+
+const exprPrivate = 57344
+
+const exprLast = 8
+
+var exprAct = [...]int{
+
+	4, 6, 8, 3, 5, 2, 7, 1,
+}
+var exprPact = [...]int{
+
+	-4, -1000, -4, -1000, -1000, -3, -1000, -1000, -1000,
+}
+var exprPgo = [...]int{
+
+	0, 7, 5, 3, 4,
+}
+var exprR1 = [...]int{
+
+	0, 1, 2, 2, 3, 3, 4, 4,
+}
+var exprR2 = [...]int{
+
+	0, 1, 1, 2, 1, 1, 1, 2,
+}
+var exprChk = [...]int{
+
+	-1000, -1, -2, -3, 4, -4, 5, -3, 5,
+}
+var exprDef = [...]int{
+
+	0, -2, 1, 2, 4, 5, 6, 3, 7,
+}
+var exprTok1 = [...]int{
+
+	1,
+}
+var exprTok2 = [...]int{
+
+	2, 3, 4, 5, 6, 7, 8,
+}
+var exprTok3 = [...]int{
+	0,
+}
+
+var exprErrorMessages = [...]struct {
+	state int
+	token int
+	msg   string
+}{}
+
+
+/*	parser for yacc output	*/
+
+var (
+	exprDebug        = 0
+	exprErrorVerbose = false
+)
+
+type exprLexer interface {
+	Lex(lval *exprSymType) int
+	Error(s string)
+}
+
+type exprParser interface {
+	Parse(exprLexer) int
+	Lookahead() int
+}
+
+type exprParserImpl struct {
+	lval  exprSymType
+	stack [exprInitialStackSize]exprSymType
+	char  int
+}
+
+func (p *exprParserImpl) Lookahead() int {
+	return p.char
+}
+
+func exprNewParser() exprParser {
+	return &exprParserImpl{}
+}
+
+const exprFlag = -1000
+
+func exprTokname(c int) string {
+	if c >= 1 && c-1 < len(exprToknames) {
+		if exprToknames[c-1] != "" {
+			return exprToknames[c-1]
+		}
+	}
+	return __yyfmt__.Sprintf("tok-%v", c)
+}
+
+func exprStatname(s int) string {
+	if s >= 0 && s < len(exprStatenames) {
+		if exprStatenames[s] != "" {
+			return exprStatenames[s]
+		}
+	}
+	return __yyfmt__.Sprintf("state-%v", s)
+}
+
+func exprErrorMessage(state, lookAhead int) string {
+	const TOKSTART = 4
+
+	if !exprErrorVerbose {
+		return "syntax error"
+	}
+
+	for _, e := range exprErrorMessages {
+		if e.state == state && e.token == lookAhead {
+			return "syntax error: " + e.msg
+		}
+	}
+
+	res := "syntax error: unexpected " + exprTokname(lookAhead)
+
+	// To match Bison, suggest at most four expected tokens.
+	expected := make([]int, 0, 4)
+
+	// Look for shiftable tokens.
+	base := exprPact[state]
+	for tok := TOKSTART; tok-1 < len(exprToknames); tok++ {
+		if n := base + tok; n >= 0 && n < exprLast && exprChk[exprAct[n]] == tok {
+			if len(expected) == cap(expected) {
+				return res
+			}
+			expected = append(expected, tok)
+		}
+	}
+
+	if exprDef[state] == -2 {
+		i := 0
+		for exprExca[i] != -1 || exprExca[i+1] != state {
+			i += 2
+		}
+
+		// Look for tokens that we accept or reduce.
+		for i += 2; exprExca[i] >= 0; i += 2 {
+			tok := exprExca[i]
+			if tok < TOKSTART || exprExca[i+1] == 0 {
+				continue
+			}
+			if len(expected) == cap(expected) {
+				return res
+			}
+			expected = append(expected, tok)
+		}
+
+		// If the default action is to accept or reduce, give up.
+		if exprExca[i+1] != 0 {
+			return res
+		}
+	}
+
+	for i, tok := range expected {
+		if i == 0 {
+			res += ", expecting "
+		} else {
+			res += " or "
+		}
+		res += exprTokname(tok)
+	}
+	return res
+}
+
+func exprlex1(lex exprLexer, lval *exprSymType) (char, token int) {
+	token = 0
+	char = lex.Lex(lval)
+	if char <= 0 {
+		token = exprTok1[0]
+		goto out
+	}
+	if char < len(exprTok1) {
+		token = exprTok1[char]
+		goto out
+	}
+	if char >= exprPrivate {
+		if char < exprPrivate+len(exprTok2) {
+			token = exprTok2[char-exprPrivate]
+			goto out
+		}
+	}
+	for i := 0; i < len(exprTok3); i += 2 {
+		token = exprTok3[i+0]
+		if token == char {
+			token = exprTok3[i+1]
+			goto out
+		}
+	}
+
+out:
+	if token == 0 {
+		token = exprTok2[1] /* unknown char */
+	}
+	if exprDebug >= 3 {
+		__yyfmt__.Printf("lex %s(%d)\n", exprTokname(token), uint(char))
+	}
+	return char, token
+}
+
+func exprParse(exprlex exprLexer) int {
+	return exprNewParser().Parse(exprlex)
+}
+
+func (exprrcvr *exprParserImpl) Parse(exprlex exprLexer) int {
+	var exprn int
+	var exprVAL exprSymType
+	var exprDollar []exprSymType
+	_ = exprDollar // silence set and not used
+	exprS := exprrcvr.stack[:]
+
+	Nerrs := 0   /* number of errors */
+	Errflag := 0 /* error recovery flag */
+	exprstate := 0
+	exprrcvr.char = -1
+	exprtoken := -1 // exprrcvr.char translated into internal numbering
+	defer func() {
+		// Make sure we report no lookahead when not parsing.
+		exprstate = -1
+		exprrcvr.char = -1
+		exprtoken = -1
+	}()
+	exprp := -1
+	goto exprstack
+
+ret0:
+	return 0
+
+ret1:
+	return 1
+
+exprstack:
+	/* put a state and value onto the stack */
+	if exprDebug >= 4 {
+		__yyfmt__.Printf("char %v in %v\n", exprTokname(exprtoken), exprStatname(exprstate))
+	}
+
+	exprp++
+	if exprp >= len(exprS) {
+		nyys := make([]exprSymType, len(exprS)*2)
+		copy(nyys, exprS)
+		exprS = nyys
+	}
+	exprS[exprp] = exprVAL
+	exprS[exprp].yys = exprstate
+
+exprnewstate:
+	exprn = exprPact[exprstate]
+	if exprn <= exprFlag {
+		goto exprdefault /* simple state */
+	}
+	if exprrcvr.char < 0 {
+		exprrcvr.char, exprtoken = exprlex1(exprlex, &exprrcvr.lval)
+	}
+	exprn += exprtoken
+	if exprn < 0 || exprn >= exprLast {
+		goto exprdefault
+	}
+	exprn = exprAct[exprn]
+	if exprChk[exprn] == exprtoken { /* valid shift */
+		exprrcvr.char = -1
+		exprtoken = -1
+		exprVAL = exprrcvr.lval
+		exprstate = exprn
+		if Errflag > 0 {
+			Errflag--
+		}
+		goto exprstack
+	}
+
+exprdefault:
+	/* default state action */
+	exprn = exprDef[exprstate]
+	if exprn == -2 {
+		if exprrcvr.char < 0 {
+			exprrcvr.char, exprtoken = exprlex1(exprlex, &exprrcvr.lval)
+		}
+
+		/* look through exception table */
+		xi := 0
+		for {
+			if exprExca[xi+0] == -1 && exprExca[xi+1] == exprstate {
+				break
+			}
+			xi += 2
+		}
+		for xi += 2; ; xi += 2 {
+			exprn = exprExca[xi+0]
+			if exprn < 0 || exprn == exprtoken {
+				break
+			}
+		}
+		exprn = exprExca[xi+1]
+		if exprn < 0 {
+			goto ret0
+		}
+	}
+	if exprn == 0 {
+		/* error ... attempt to resume parsing */
+		switch Errflag {
+		case 0: /* brand new error */
+			exprlex.Error(exprErrorMessage(exprstate, exprtoken))
+			Nerrs++
+			if exprDebug >= 1 {
+				__yyfmt__.Printf("%s", exprStatname(exprstate))
+				__yyfmt__.Printf(" saw %s\n", exprTokname(exprtoken))
+			}
+			fallthrough
+
+		case 1, 2: /* incompletely recovered error ... try again */
+			Errflag = 3
+
+			/* find a state where "error" is a legal shift action */
+			for exprp >= 0 {
+				exprn = exprPact[exprS[exprp].yys] + exprErrCode
+				if exprn >= 0 && exprn < exprLast {
+					exprstate = exprAct[exprn] /* simulate a shift of "error" */
+					if exprChk[exprstate] == exprErrCode {
+						goto exprstack
+					}
+				}
+
+				/* the current p has no shift on "error", pop stack */
+				if exprDebug >= 2 {
+					__yyfmt__.Printf("error recovery pops state %d\n", exprS[exprp].yys)
+				}
+				exprp--
+			}
+			/* there is no state on the stack with an error shift ... abort */
+			goto ret1
+
+		case 3: /* no shift yet; clobber input char */
+			if exprDebug >= 2 {
+				__yyfmt__.Printf("error recovery discards %s\n", exprTokname(exprtoken))
+			}
+			if exprtoken == exprEofCode {
+				goto ret1
+			}
+			exprrcvr.char = -1
+			exprtoken = -1
+			goto exprnewstate /* try again in the same state */
+		}
+	}
+
+	/* reduction by production exprn */
+	if exprDebug >= 2 {
+		__yyfmt__.Printf("reduce %v in:\n\t%v\n", exprn, exprStatname(exprstate))
+	}
+
+	exprnt := exprn
+	exprpt := exprp
+	_ = exprpt // guard against "declared and not used"
+
+	exprp -= exprR2[exprn]
+	// exprp is now the index of $0. Perform the default action. Iff the
+	// reduced production is ε, $1 is possibly out of range.
+	if exprp+1 >= len(exprS) {
+		nyys := make([]exprSymType, len(exprS)*2)
+		copy(nyys, exprS)
+		exprS = nyys
+	}
+	exprVAL = exprS[exprp+1]
+
+	/* consult goto table to find next state */
+	exprn = exprR1[exprn]
+	exprg := exprPgo[exprn]
+	exprj := exprg + exprS[exprp].yys + 1
+
+	if exprj >= exprLast {
+		exprstate = exprAct[exprg]
+	} else {
+		exprstate = exprAct[exprj]
+		if exprChk[exprstate] != -exprn {
+			exprstate = exprAct[exprg]
+		}
+	}
+	// dummy call; replaced with literal code
+	switch exprnt {
+
+	case 1:
+		exprDollar = exprS[exprpt-1 : exprpt+1]
+		{
+			exprlex.(*lexer).expr = exprDollar[1].Expr
+		}
+	case 2:
+		exprDollar = exprS[exprpt-1 : exprpt+1]
+		{
+			exprVAL.Expr = []node{exprDollar[1].Node}
+		}
+	case 3:
+		exprDollar = exprS[exprpt-2 : exprpt+1]
+		{
+			exprVAL.Expr = append(exprDollar[1].Expr, exprDollar[2].Node)
+		}
+	case 4:
+		exprDollar = exprS[exprpt-1 : exprpt+1]
+		{
+			exprVAL.Node = capture(exprDollar[1].str)
+		}
+	case 5:
+		exprDollar = exprS[exprpt-1 : exprpt+1]
+		{
+			exprVAL.Node = runesToLiterals(exprDollar[1].Literals)
+		}
+	case 6:
+		exprDollar = exprS[exprpt-1 : exprpt+1]
+		{
+			exprVAL.Literals = []rune{exprDollar[1].literal}
+		}
+	case 7:
+		exprDollar = exprS[exprpt-2 : exprpt+1]
+		{
+			exprVAL.Literals = append(exprDollar[1].Literals, exprDollar[2].literal)
+		}
+	}
+	goto exprstack /* stack new state and value */
+}
--- a/pkg/logql/log/pattern/lexer.go
+++ b/pkg/logql/log/pattern/lexer.go
@ -0,0 +1,62 @@
+package pattern
+
+type lexer struct {
+	data        []byte
+	p, pe, cs   int
+	ts, te, act int
+
+	lastnewline int
+	curline     int
+
+	errs []parseError
+	expr []node
+}
+
+func newLexer() *lexer {
+	lex := &lexer{}
+	lex.init()
+	return lex
+}
+
+func (lex *lexer) setData(data []byte) {
+	lex.data = data
+	lex.pe = len(data)
+	lex.lastnewline = -1
+	lex.curline = 1
+}
+
+// Error implements exprLexer interface generated by yacc (yyLexer)
+func (lex *lexer) Error(e string) {
+	lex.errs = append(lex.errs, newParseError(e, lex.curline, lex.curcol()))
+}
+
+// curcol calculates the current token's start column based on the last newline position
+// returns a 1-indexed value
+func (lex *lexer) curcol() int {
+	return (lex.ts + 1 /* 1-indexed columns */) - (lex.lastnewline + 1 /* next after newline */)
+}
+
+func (lex *lexer) handle(token int, err error) int {
+	if err != nil {
+		lex.Error(err.Error())
+		return LEXER_ERROR
+	}
+	return token
+}
+
+func (lex *lexer) token() string {
+	return string(lex.data[lex.ts:lex.te])
+}
+
+// nolint
+func (lex *lexer) identifier(out *exprSymType) (int, error) {
+	t := lex.token()
+	out.str = t[1 : len(t)-1]
+	return IDENTIFIER, nil
+}
+
+// nolint
+func (lex *lexer) literal(out *exprSymType) (int, error) {
+	out.literal = rune(lex.data[lex.ts])
+	return LITERAL, nil
+}
--- a/pkg/logql/log/pattern/lexer.rl
+++ b/pkg/logql/log/pattern/lexer.rl
@ -0,0 +1,43 @@
+package pattern
+
+%%{
+    machine pattern;
+    write data;
+    access lex.;
+    variable p lex.p;
+    variable pe lex.pe;
+    prepush {
+        if len(lex.stack) <= lex.top {
+            lex.stack = append(lex.stack, 0)
+        }
+    }
+}%%
+
+const LEXER_ERROR = 0
+
+%%{
+        identifier = '<' (alpha| '_') (alnum | '_' )* '>';
+        literal = any;
+}%%
+
+func (lex *lexer) Lex(out *exprSymType) int {
+    eof := lex.pe
+    tok := 0
+
+    %%{
+
+        main := |*
+            identifier => { tok = lex.handle(lex.identifier(out)); fbreak; };
+            literal => { tok = lex.handle(lex.literal(out)); fbreak; };
+        *|;
+
+        write exec;
+    }%%
+
+    return tok;
+}
+
+
+func (lex *lexer) init() {
+    %% write init;
+}
--- a/pkg/logql/log/pattern/lexer.rl.go
+++ b/pkg/logql/log/pattern/lexer.rl.go
@ -0,0 +1,241 @@
+
+//line pkg/logql/log/pattern/lexer.rl:1
+package pattern
+
+
+//line pkg/logql/log/pattern/lexer.rl.go:7
+var _pattern_actions []byte = []byte{
+	0, 1, 0, 1, 1, 1, 2, 1, 3, 
+	1, 4, 1, 5, 1, 6, 
+}
+
+var _pattern_key_offsets []byte = []byte{
+	0, 8, 9, 
+}
+
+var _pattern_trans_keys []byte = []byte{
+	62, 95, 48, 57, 65, 90, 97, 122, 
+	60, 95, 65, 90, 97, 122, 
+}
+
+var _pattern_single_lengths []byte = []byte{
+	2, 1, 1, 
+}
+
+var _pattern_range_lengths []byte = []byte{
+	3, 0, 2, 
+}
+
+var _pattern_index_offsets []byte = []byte{
+	0, 6, 8, 
+}
+
+var _pattern_trans_targs []byte = []byte{
+	1, 0, 0, 0, 0, 1, 2, 1, 
+	0, 0, 0, 1, 1, 1, 
+}
+
+var _pattern_trans_actions []byte = []byte{
+	7, 0, 0, 0, 0, 13, 5, 9, 
+	0, 0, 0, 11, 13, 11, 
+}
+
+var _pattern_to_state_actions []byte = []byte{
+	0, 1, 0, 
+}
+
+var _pattern_from_state_actions []byte = []byte{
+	0, 3, 0, 
+}
+
+var _pattern_eof_trans []byte = []byte{
+	13, 0, 14, 
+}
+
+const pattern_start int = 1
+const pattern_first_final int = 1
+const pattern_error int = -1
+
+const pattern_en_main int = 1
+
+
+//line pkg/logql/log/pattern/lexer.rl:14
+
+
+const LEXER_ERROR = 0
+
+
+//line pkg/logql/log/pattern/lexer.rl:21
+
+
+func (lex *lexer) Lex(out *exprSymType) int {
+    eof := lex.pe
+    tok := 0
+
+    
+//line pkg/logql/log/pattern/lexer.rl.go:77
+	{
+	var _klen int
+	var _trans int
+	var _acts int
+	var _nacts uint
+	var _keys int
+	if ( lex.p) == ( lex.pe) {
+		goto _test_eof
+	}
+_resume:
+	_acts = int(_pattern_from_state_actions[ lex.cs])
+	_nacts = uint(_pattern_actions[_acts]); _acts++
+	for ; _nacts > 0; _nacts-- {
+		 _acts++
+		switch _pattern_actions[_acts - 1] {
+		case 1:
+//line NONE:1
+ lex.ts = ( lex.p)
+
+//line pkg/logql/log/pattern/lexer.rl.go:97
+		}
+	}
+
+	_keys = int(_pattern_key_offsets[ lex.cs])
+	_trans = int(_pattern_index_offsets[ lex.cs])
+
+	_klen = int(_pattern_single_lengths[ lex.cs])
+	if _klen > 0 {
+		_lower := int(_keys)
+		var _mid int
+		_upper := int(_keys + _klen - 1)
+		for {
+			if _upper < _lower {
+				break
+			}
+
+			_mid = _lower + ((_upper - _lower) >> 1)
+			switch {
+			case  lex.data[( lex.p)] < _pattern_trans_keys[_mid]:
+				_upper = _mid - 1
+			case  lex.data[( lex.p)] > _pattern_trans_keys[_mid]:
+				_lower = _mid + 1
+			default:
+				_trans += int(_mid - int(_keys))
+				goto _match
+			}
+		}
+		_keys += _klen
+		_trans += _klen
+	}
+
+	_klen = int(_pattern_range_lengths[ lex.cs])
+	if _klen > 0 {
+		_lower := int(_keys)
+		var _mid int
+		_upper := int(_keys + (_klen << 1) - 2)
+		for {
+			if _upper < _lower {
+				break
+			}
+
+			_mid = _lower + (((_upper - _lower) >> 1) & ^1)
+			switch {
+			case  lex.data[( lex.p)] < _pattern_trans_keys[_mid]:
+				_upper = _mid - 2
+			case  lex.data[( lex.p)] > _pattern_trans_keys[_mid + 1]:
+				_lower = _mid + 2
+			default:
+				_trans += int((_mid - int(_keys)) >> 1)
+				goto _match
+			}
+		}
+		_trans += _klen
+	}
+
+_match:
+_eof_trans:
+	 lex.cs = int(_pattern_trans_targs[_trans])
+
+	if _pattern_trans_actions[_trans] == 0 {
+		goto _again
+	}
+
+	_acts = int(_pattern_trans_actions[_trans])
+	_nacts = uint(_pattern_actions[_acts]); _acts++
+	for ; _nacts > 0; _nacts-- {
+		_acts++
+		switch _pattern_actions[_acts-1] {
+		case 2:
+//line NONE:1
+ lex.te = ( lex.p)+1
+
+		case 3:
+//line pkg/logql/log/pattern/lexer.rl:30
+ lex.te = ( lex.p)+1
+{ tok = lex.handle(lex.identifier(out)); ( lex.p)++; goto _out
+ }
+		case 4:
+//line pkg/logql/log/pattern/lexer.rl:31
+ lex.te = ( lex.p)+1
+{ tok = lex.handle(lex.literal(out)); ( lex.p)++; goto _out
+ }
+		case 5:
+//line pkg/logql/log/pattern/lexer.rl:31
+ lex.te = ( lex.p)
+( lex.p)--
+{ tok = lex.handle(lex.literal(out)); ( lex.p)++; goto _out
+ }
+		case 6:
+//line pkg/logql/log/pattern/lexer.rl:31
+( lex.p) = ( lex.te) - 1
+{ tok = lex.handle(lex.literal(out)); ( lex.p)++; goto _out
+ }
+//line pkg/logql/log/pattern/lexer.rl.go:191
+		}
+	}
+
+_again:
+	_acts = int(_pattern_to_state_actions[ lex.cs])
+	_nacts = uint(_pattern_actions[_acts]); _acts++
+	for ; _nacts > 0; _nacts-- {
+		_acts++
+		switch _pattern_actions[_acts-1] {
+		case 0:
+//line NONE:1
+ lex.ts = 0
+
+//line pkg/logql/log/pattern/lexer.rl.go:205
+		}
+	}
+
+	( lex.p)++
+	if ( lex.p) != ( lex.pe) {
+		goto _resume
+	}
+	_test_eof: {}
+	if ( lex.p) == eof {
+		if _pattern_eof_trans[ lex.cs] > 0 {
+			_trans = int(_pattern_eof_trans[ lex.cs] - 1)
+			goto _eof_trans
+		}
+	}
+
+	_out: {}
+	}
+
+//line pkg/logql/log/pattern/lexer.rl:35
+
+
+    return tok;
+}
+
+
+func (lex *lexer) init() {
+    
+//line pkg/logql/log/pattern/lexer.rl.go:233
+	{
+	 lex.cs = pattern_start
+	 lex.ts = 0
+	 lex.te = 0
+	 lex.act = 0
+	}
+
+//line pkg/logql/log/pattern/lexer.rl:43
+}
--- a/pkg/logql/log/pattern/lexer_test.go
+++ b/pkg/logql/log/pattern/lexer_test.go
@ -0,0 +1,47 @@
+package pattern
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+)
+
+func Test_Lex(t *testing.T) {
+	for _, tc := range []struct {
+		input    string
+		expected []int
+	}{
+		{`_foo`, []int{LITERAL, LITERAL, LITERAL, LITERAL}},
+		{`<foo`, []int{LITERAL, LITERAL, LITERAL, LITERAL}},
+		{`<`, []int{LITERAL}},
+		{`>`, []int{LITERAL}},
+		{`<_1foo>`, []int{IDENTIFIER}},
+		{`<_1foo> bar <buzz>`, []int{IDENTIFIER, LITERAL, LITERAL, LITERAL, LITERAL, LITERAL, IDENTIFIER}},
+		{`<1foo>`, []int{LITERAL, LITERAL, LITERAL, LITERAL, LITERAL, LITERAL}},
+	} {
+		tc := tc
+		t.Run(tc.input, func(t *testing.T) {
+			actual := []int{}
+			l := newLexer()
+			l.setData([]byte(tc.input))
+			for {
+				var lval exprSymType
+				tok := l.Lex(&lval)
+				if tok == 0 {
+					break
+				}
+				actual = append(actual, tok)
+			}
+			assert.Equal(t, toksToStrings(tc.expected), toksToStrings(actual))
+			assert.Equal(t, tc.expected, actual)
+		})
+	}
+}
+
+func toksToStrings(toks []int) []string {
+	strings := make([]string, len(toks))
+	for i, tok := range toks {
+		strings[i] = exprToknames[tok-exprPrivate+1]
+	}
+	return strings
+}
--- a/pkg/logql/log/pattern/parser.go
+++ b/pkg/logql/log/pattern/parser.go
@ -0,0 +1,50 @@
+package pattern
+
+import "fmt"
+
+const underscore = "_"
+
+var tokens = map[int]string{
+	LESS_THAN:  "<",
+	MORE_THAN:  ">",
+	UNDERSCORE: underscore,
+}
+
+func init() {
+	// Improve the error messages coming out of yacc.
+	exprErrorVerbose = true
+	for tok, str := range tokens {
+		exprToknames[tok-exprPrivate+1] = str
+	}
+}
+
+func parseExpr(input string) (expr, error) {
+	l := newLexer()
+	l.setData([]byte(input))
+	e := exprNewParser().Parse(l)
+	if e != 0 || len(l.errs) > 0 {
+		return nil, l.errs[0]
+	}
+	return l.expr, nil
+}
+
+// parseError is what is returned when we failed to parse.
+type parseError struct {
+	msg       string
+	line, col int
+}
+
+func (p parseError) Error() string {
+	if p.col == 0 && p.line == 0 {
+		return p.msg
+	}
+	return fmt.Sprintf("parse error at line %d, col %d: %s", p.line, p.col, p.msg)
+}
+
+func newParseError(msg string, line, col int) parseError {
+	return parseError{
+		msg:  msg,
+		line: line,
+		col:  col,
+	}
+}
--- a/pkg/logql/log/pattern/parser_test.go
+++ b/pkg/logql/log/pattern/parser_test.go
@ -0,0 +1,59 @@
+package pattern
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/require"
+)
+
+func Test_Parse(t *testing.T) {
+	for _, tc := range []struct {
+		input    string
+		expected expr
+		err      error
+	}{
+		{
+			"<foo> bar f <f>",
+			expr{capture("foo"), literals(" bar f "), capture("f")},
+			nil,
+		},
+		{
+			"<foo",
+			expr{literals("<foo")},
+			nil,
+		},
+		{
+			"<foo ><bar>",
+			expr{literals("<foo >"), capture("bar")},
+			nil,
+		},
+		{
+			"<>",
+			expr{literals("<>")},
+			nil,
+		},
+		{
+			"<_>",
+			expr{capture("_")},
+			nil,
+		},
+		{
+			"<1_>",
+			expr{literals("<1_>")},
+			nil,
+		},
+		{
+			`<ip> - <user> [<_>] "<method> <path> <_>" <status> <size> <url> <user_agent>`,
+			expr{capture("ip"), literals(" - "), capture("user"), literals(" ["), capture("_"), literals(`] "`), capture("method"), literals(" "), capture("path"), literals(" "), capture('_'), literals(`" `), capture("status"), literals(" "), capture("size"), literals(" "), capture("url"), literals(" "), capture("user_agent")},
+			nil,
+		},
+	} {
+		tc := tc
+		actual, err := parseExpr(tc.input)
+		if tc.err != nil || err != nil {
+			require.Equal(t, tc.err, err)
+			return
+		}
+		require.Equal(t, tc.expected, actual)
+	}
+}
--- a/pkg/logql/log/pattern/pattern.go
+++ b/pkg/logql/log/pattern/pattern.go
@ -0,0 +1,95 @@
+package pattern
+
+import (
+	"bytes"
+	"errors"
+)
+
+var (
+	ErrNoCapture   = errors.New("at least one capture is required")
+	ErrInvalidExpr = errors.New("invalid expression")
+)
+
+type Matcher interface {
+	Matches(in []byte) [][]byte
+	Names() []string
+}
+
+type matcher struct {
+	e expr
+
+	captures [][]byte
+	names    []string
+}
+
+func New(in string) (Matcher, error) {
+	e, err := parseExpr(in)
+	if err != nil {
+		return nil, err
+	}
+	if err := e.validate(); err != nil {
+		return nil, err
+	}
+	return &matcher{
+		e:        e,
+		captures: make([][]byte, 0, e.captureCount()),
+		names:    e.captures(),
+	}, nil
+}
+
+// Matches matches the given line with the provided pattern.
+// Matches invalidates the previous returned captures array.
+func (m *matcher) Matches(in []byte) [][]byte {
+	if len(in) == 0 {
+		return nil
+	}
+	if len(m.e) == 0 {
+		return nil
+	}
+	captures := m.captures[:0]
+	expr := m.e
+	if ls, ok := expr[0].(literals); ok {
+		i := bytes.Index(in, ls)
+		if i != 0 {
+			return nil
+		}
+		in = in[len(ls):]
+		expr = expr[1:]
+	}
+	if len(expr) == 0 {
+		return nil
+	}
+	// from now we have capture - literals - capture ... (literals)?
+	for len(expr) != 0 {
+		if len(expr) == 1 { // we're ending on a capture.
+			if !(expr[0].(capture)).isUnamed() {
+				captures = append(captures, in)
+			}
+			return captures
+		}
+		cap := expr[0].(capture)
+		ls := expr[1].(literals)
+		expr = expr[2:]
+		i := bytes.Index(in, ls)
+		if i == -1 {
+			// if a capture is missed we return up to the end as the capture.
+			if !cap.isUnamed() {
+				captures = append(captures, in)
+			}
+			return captures
+		}
+
+		if cap.isUnamed() {
+			in = in[len(ls)+i:]
+			continue
+		}
+		captures = append(captures, in[:i])
+		in = in[len(ls)+i:]
+	}
+
+	return captures
+}
+
+func (m *matcher) Names() []string {
+	return m.names
+}
--- a/pkg/logql/log/pattern/pattern_test.go
+++ b/pkg/logql/log/pattern/pattern_test.go
@ -0,0 +1,162 @@
+package pattern
+
+import (
+	"fmt"
+	"testing"
+
+	"github.com/stretchr/testify/require"
+)
+
+var fixtures = []struct {
+	expr     string
+	in       string
+	expected []string
+}{
+	{
+		"foo <foo> bar",
+		"foo buzz bar",
+		[]string{"buzz"},
+	},
+	{
+		"foo <foo> bar<fuzz>",
+		"foo buzz bar",
+		[]string{"buzz", ""},
+	},
+	{
+		"<foo> bar<fuzz>",
+		" bar",
+		[]string{"", ""},
+	},
+	{
+		"<path>?<_>",
+		`/api/plugins/versioncheck?slugIn=snuids-trafficlights-panel,input,gel&grafanaVersion=7.0.0-beta1`,
+		[]string{"/api/plugins/versioncheck"},
+	},
+	{
+		"<path>?<_>",
+		`/api/plugins/status`,
+		[]string{"/api/plugins/status"},
+	},
+	{
+		// Common Log Format
+		`<ip> <userid> <user> [<_>] "<method> <path> <_>" <status> <size>`,
+		`127.0.0.1 user-identifier frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326`,
+		[]string{"127.0.0.1", "user-identifier", "frank", "GET", "/apache_pb.gif", "200", "2326"},
+	},
+	{
+		// Combined Log Format
+		`<ip> - - [<_>] "<method> <path> <_>" <status> <size> `,
+		`35.191.8.106 - - [19/May/2021:07:21:49 +0000] "GET /api/plugins/versioncheck?slugIn=snuids-trafficlights-panel,input,gel&grafanaVersion=7.0.0-beta1 HTTP/1.1" 200 107 "-" "Go-http-client/2.0" "80.153.74.144, 34.120.177.193" "TLSv1.3" "DE" "DEBW"`,
+		[]string{"35.191.8.106", "GET", "/api/plugins/versioncheck?slugIn=snuids-trafficlights-panel,input,gel&grafanaVersion=7.0.0-beta1", "200", "107"},
+	},
+	{
+		// MySQL
+		`<_> <id> [<level>] [<no>] [<component>] `,
+		`2020-08-06T14:25:02.835618Z 0 [Note] [MY-012487] [InnoDB] DDL log recovery : begin`,
+		[]string{"0", "Note", "MY-012487", "InnoDB"},
+	},
+	{
+		// MySQL
+		`<_> <id> [<level>] `,
+		`2021-05-19T07:40:12.215792Z 42761518 [Note] Aborted connection 42761518 to db: 'hosted_grafana' user: 'hosted_grafana' host: '10.36.4.122' (Got an error reading communication packets)`,
+		[]string{"42761518", "Note"},
+	},
+	{
+		// Kubernetes api-server
+		`<id> <_>       <_> <line>] `,
+		`W0519 07:46:47.647050       1 clientconn.go:1223] grpc: addrConn.createTransport failed to connect to {https://kubernetes-etcd-1.kubernetes-etcd:2379  <nil> 0 <nil>}. Err :connection error: desc = "transport: Error while dialing dial tcp 10.32.85.85:2379: connect: connection refused". Reconnecting...`,
+		[]string{"W0519", "clientconn.go:1223"},
+	},
+	{
+		// Cassandra
+		`<level>  [<component>]<_> in <duration>.<_>`,
+		`INFO  [Service Thread] 2021-05-19 07:40:12,130 GCInspector.java:284 - ParNew GC in 248ms.  CMS Old Gen: 5043436640 -> 5091062064; Par Eden Space: 671088640 -> 0; Par Survivor Space: 70188280 -> 60139760`,
+		[]string{"INFO", "Service Thread", "248ms"},
+	},
+	{
+		// Cortex & Loki distributor
+		`<_> msg="<method> <path> (<status>) <duration>"`,
+		`level=debug ts=2021-05-19T07:54:26.864644382Z caller=logging.go:66 traceID=7fbb92fd0eb9c65d msg="POST /loki/api/v1/push (204) 1.238734ms"`,
+		[]string{"POST", "/loki/api/v1/push", "204", "1.238734ms"},
+	},
+	{
+		// Etcd
+		`<_> <_> <level> | <component>: <_> peer <peer_id> <_> tcp <ip>:<_>`,
+		`2021-05-19 08:16:50.181436 W | rafthttp: health check for peer fd8275e521cfb532 could not connect: dial tcp 10.32.85.85:2380: connect: connection refused`,
+		[]string{"W", "rafthttp", "fd8275e521cfb532", "10.32.85.85"},
+	},
+	{
+		// Kafka
+		`<_>] <level> [Log partition=<part>, dir=<dir>] `,
+		`[2021-05-19 08:35:28,681] INFO [Log partition=p-636-L-fs-117, dir=/data/kafka-logs] Deleting segment 455976081 (kafka.log.Log)`,
+		[]string{"INFO", "p-636-L-fs-117", "/data/kafka-logs"},
+	},
+	{
+		// Elastic
+		`<_>][<level>][<component>] [<id>] [<index>]`,
+		`[2021-05-19T06:54:06,994][INFO ][o.e.c.m.MetaDataMappingService] [1f605d47-8454-4bfb-a67f-49f318bf837a] [usage-stats-2021.05.19/O2Je9IbmR8CqFyUvNpTttA] update_mapping [report]`,
+		[]string{"INFO ", "o.e.c.m.MetaDataMappingService", "1f605d47-8454-4bfb-a67f-49f318bf837a", "usage-stats-2021.05.19/O2Je9IbmR8CqFyUvNpTttA"},
+	},
+	{
+		// Envoy
+		`<_> "<method> <path> <_>" <status> <_> <received_bytes> <sent_bytes> <duration> <upstream_time> "<forward_for>" "<agent>" <_> <_> "<upstream>"`,
+		`[2016-04-15T20:17:00.310Z] "POST /api/v1/locations HTTP/2" 204 - 154 0 226 100 "10.0.35.28" "nsq2http" "cc21d9b0-cf5c-432b-8c7e-98aeb7988cd2" "locations" "tcp://10.0.2.1:80"`,
+		[]string{"POST", "/api/v1/locations", "204", "154", "0", "226", "100", "10.0.35.28", "nsq2http", "tcp://10.0.2.1:80"},
+	},
+}
+
+func Test_matcher_Matches(t *testing.T) {
+	for _, tt := range fixtures {
+		tt := tt
+		t.Run(tt.expr, func(t *testing.T) {
+			t.Parallel()
+			m, err := New(tt.expr)
+			require.NoError(t, err)
+			actual := m.Matches([]byte(tt.in))
+			var actualStrings []string
+			for _, a := range actual {
+				actualStrings = append(actualStrings, string(a))
+			}
+			require.Equal(t, tt.expected, actualStrings)
+		})
+	}
+}
+
+var res [][]byte
+
+func Benchmark_matcher_Matches(b *testing.B) {
+	for _, tt := range fixtures {
+		b.Run(tt.expr, func(b *testing.B) {
+			b.ReportAllocs()
+			m, err := New(tt.expr)
+			require.NoError(b, err)
+			b.ResetTimer()
+			l := []byte(tt.in)
+			for n := 0; n < b.N; n++ {
+				res = m.Matches(l)
+			}
+		})
+	}
+}
+
+func Test_Error(t *testing.T) {
+	for _, tt := range []struct {
+		name string
+		err  error
+	}{
+		{"<f>", nil},
+		{"<f> <a>", nil},
+		{"", newParseError("syntax error: unexpected $end, expecting IDENTIFIER or LITERAL", 1, 1)},
+		{"<_>", ErrNoCapture},
+		{"foo <_> bar <_>", ErrNoCapture},
+		{"foo bar buzz", ErrNoCapture},
+		{"<f><f>", fmt.Errorf("found consecutive capture: %w", ErrInvalidExpr)},
+		{"<f> f<d><b>", fmt.Errorf("found consecutive capture: %w", ErrInvalidExpr)},
+		{"<f> f<f>", fmt.Errorf("duplicate capture name (f): %w", ErrInvalidExpr)},
+	} {
+		t.Run(tt.name, func(t *testing.T) {
+			_, err := New(tt.name)
+			require.Equal(t, tt.err, err)
+		})
+	}
+}
--- a/pkg/logql/parser_test.go
+++ b/pkg/logql/parser_test.go
@ -1061,6 +1061,25 @@ func TestParse(t *testing.T) {
 				},
 			},
 		},
+		{
+			in: `{app="foo"} |= "bar" | pattern "<foo> bar <buzz>" | (duration > 1s or status!= 200) and method!="POST"`,
+			exp: &pipelineExpr{
+				left: newMatcherExpr([]*labels.Matcher{{Type: labels.MatchEqual, Name: "app", Value: "foo"}}),
+				pipeline: MultiStageExpr{
+					newLineFilterExpr(nil, labels.MatchEqual, "bar"),
+					newLabelParserExpr(OpParserTypePattern, "<foo> bar <buzz>"),
+					&labelFilterExpr{
+						LabelFilterer: log.NewAndLabelFilter(
+							log.NewOrLabelFilter(
+								log.NewDurationLabelFilter(log.LabelFilterGreaterThan, "duration", 1*time.Second),
+								log.NewNumericLabelFilter(log.LabelFilterNotEqual, "status", 200.0),
+							),
+							log.NewStringLabelFilter(mustNewMatcher(labels.MatchNotEqual, "method", "POST")),
+						),
+					},
+				},
+			},
+		},
 		{
 			in: `{app="foo"} |= "bar" | json | ( status_code < 500 and status_code > 200) or latency >= 250ms `,
 			exp: &pipelineExpr{
@ -2508,7 +2527,6 @@ func TestParseLogSelectorExpr_equalityMatcher(t *testing.T) {
 }

 func Test_match(t *testing.T) {
-
 	tests := []struct {
 		name    string
 		input   []string
@ -2554,7 +2572,6 @@ func Test_match(t *testing.T) {
 			} else {
 				require.Equal(t, tt.want, got)
 			}
-
 		})
 	}
 }