diff --git a/pkg/logql/log/fmt.go b/pkg/logql/log/fmt.go index e30d92a823..270df15d68 100644 --- a/pkg/logql/log/fmt.go +++ b/pkg/logql/log/fmt.go @@ -245,7 +245,14 @@ func (lf *LabelsFormatter) Process(l []byte, lbs *LabelsBuilder) ([]byte, bool) func (lf *LabelsFormatter) RequiredLabelNames() []string { var names []string - return names + for _, fm := range lf.formats { + if fm.Rename { + names = append(names, fm.Value) + continue + } + names = append(names, listNodeFields(fm.tmpl.Root)...) + } + return uniqueString(names) } func trunc(c int, s string) string { diff --git a/pkg/logql/log/fmt_test.go b/pkg/logql/log/fmt_test.go index 0e9a3e3a84..8903973e65 100644 --- a/pkg/logql/log/fmt_test.go +++ b/pkg/logql/log/fmt_test.go @@ -264,7 +264,6 @@ func Test_trunc(t *testing.T) { } func Test_substring(t *testing.T) { - tests := []struct { start int end int @@ -309,3 +308,20 @@ func TestLineFormatter_RequiredLabelNames(t *testing.T) { }) } } + +func TestLabelFormatter_RequiredLabelNames(t *testing.T) { + tests := []struct { + name string + fmts []LabelFmt + want []string + }{ + {"rename", []LabelFmt{NewRenameLabelFmt("foo", "bar")}, []string{"bar"}}, + {"rename and fmt", []LabelFmt{NewRenameLabelFmt("fuzz", "bar"), NewTemplateLabelFmt("1", "{{ .foo | ToUpper | .buzz }} and {{.bar}}")}, []string{"bar", "foo", "buzz"}}, + {"fmt", []LabelFmt{NewTemplateLabelFmt("1", "{{.blip}}"), NewTemplateLabelFmt("2", "{{ .foo | ToUpper | .buzz }} and {{.bar}}")}, []string{"blip", "foo", "buzz", "bar"}}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + require.Equal(t, tt.want, mustNewLabelsFormatter(tt.fmts).RequiredLabelNames()) + }) + } +} diff --git a/pkg/logql/log/labels.go b/pkg/logql/log/labels.go index 3d6d2bd833..fc6e036f82 100644 --- a/pkg/logql/log/labels.go +++ b/pkg/logql/log/labels.go @@ -6,9 +6,7 @@ import ( "github.com/prometheus/prometheus/pkg/labels" ) -var ( - emptyLabelsResult = NewLabelsResult(labels.Labels{}, labels.Labels{}.Hash()) -) +var emptyLabelsResult = NewLabelsResult(labels.Labels{}, labels.Labels{}.Hash()) // LabelsResult is a computed labels result that contains the labels set with associated string and hash. // The is mainly used for caching and returning labels computations out of pipelines and stages. @@ -68,7 +66,7 @@ type BaseLabelsBuilder struct { err string groups []string - parserKeyHints []string // label key hints for metric queries that allows to limit parser extractions to only this list of labels. + parserKeyHints ParserHint // label key hints for metric queries that allows to limit parser extractions to only this list of labels. without, noLabels bool resultCache map[uint64]LabelsResult @@ -85,7 +83,7 @@ type LabelsBuilder struct { } // NewBaseLabelsBuilderWithGrouping creates a new base labels builder with grouping to compute results. -func NewBaseLabelsBuilderWithGrouping(groups []string, parserKeyHints []string, without, noLabels bool) *BaseLabelsBuilder { +func NewBaseLabelsBuilderWithGrouping(groups []string, parserKeyHints ParserHint, without, noLabels bool) *BaseLabelsBuilder { return &BaseLabelsBuilder{ del: make([]string, 0, 5), add: make([]labels.Label, 0, 16), @@ -100,7 +98,7 @@ func NewBaseLabelsBuilderWithGrouping(groups []string, parserKeyHints []string, // NewLabelsBuilder creates a new base labels builder. func NewBaseLabelsBuilder() *BaseLabelsBuilder { - return NewBaseLabelsBuilderWithGrouping(nil, nil, false, false) + return NewBaseLabelsBuilderWithGrouping(nil, noParserHints, false, false) } // ForLabels creates a labels builder for a given labels set as base. @@ -133,7 +131,7 @@ func (b *LabelsBuilder) Reset() { // ParserLabelHints returns a limited list of expected labels to extract for metric queries. // Returns nil when it's impossible to hint labels extractions. -func (b *BaseLabelsBuilder) ParserLabelHints() []string { +func (b *BaseLabelsBuilder) ParserLabelHints() ParserHint { return b.parserKeyHints } diff --git a/pkg/logql/log/metrics_extraction.go b/pkg/logql/log/metrics_extraction.go index 1a5b446fdf..9625f3048c 100644 --- a/pkg/logql/log/metrics_extraction.go +++ b/pkg/logql/log/metrics_extraction.go @@ -47,17 +47,13 @@ type lineSampleExtractor struct { // NewLineSampleExtractor creates a SampleExtractor from a LineExtractor. // Multiple log stages are run before converting the log line. -func NewLineSampleExtractor(ex LineExtractor, stages []Stage, groups []string, without bool, noLabels bool) (SampleExtractor, error) { +func NewLineSampleExtractor(ex LineExtractor, stages []Stage, groups []string, without, noLabels bool) (SampleExtractor, error) { s := ReduceStages(stages) - var expectedLabels []string - if !without { - expectedLabels = append(expectedLabels, s.RequiredLabelNames()...) - expectedLabels = uniqueString(append(expectedLabels, groups...)) - } + hints := newParserHint(s.RequiredLabelNames(), groups, without, noLabels, "") return &lineSampleExtractor{ Stage: s, LineExtractor: ex, - baseBuilder: NewBaseLabelsBuilderWithGrouping(groups, expectedLabels, without, noLabels), + baseBuilder: NewBaseLabelsBuilderWithGrouping(groups, hints, without, noLabels), streamExtractors: make(map[uint64]StreamSampleExtractor), }, nil } @@ -118,7 +114,7 @@ type labelSampleExtractor struct { // to remove sample containing the __error__ label. func LabelExtractorWithStages( labelName, conversion string, - groups []string, without bool, noLabels bool, + groups []string, without, noLabels bool, preStages []Stage, postFilter Stage, ) (SampleExtractor, error) { @@ -139,20 +135,13 @@ func LabelExtractorWithStages( sort.Strings(groups) } preStage := ReduceStages(preStages) - var expectedLabels []string - if !without { - expectedLabels = append(expectedLabels, preStage.RequiredLabelNames()...) - expectedLabels = append(expectedLabels, groups...) - expectedLabels = append(expectedLabels, postFilter.RequiredLabelNames()...) - expectedLabels = append(expectedLabels, labelName) - expectedLabels = uniqueString(expectedLabels) - } + hints := newParserHint(append(preStage.RequiredLabelNames(), postFilter.RequiredLabelNames()...), groups, without, noLabels, labelName) return &labelSampleExtractor{ preStage: preStage, conversionFn: convFn, labelName: labelName, postFilter: postFilter, - baseBuilder: NewBaseLabelsBuilderWithGrouping(groups, expectedLabels, without, noLabels), + baseBuilder: NewBaseLabelsBuilderWithGrouping(groups, hints, without, noLabels), streamExtractors: make(map[uint64]StreamSampleExtractor), }, nil } diff --git a/pkg/logql/log/parser.go b/pkg/logql/log/parser.go index 47bd612c53..fe2fe030cb 100644 --- a/pkg/logql/log/parser.go +++ b/pkg/logql/log/parser.go @@ -5,7 +5,6 @@ import ( "fmt" "io" "regexp" - "strings" "github.com/grafana/loki/pkg/logql/log/logfmt" @@ -41,6 +40,9 @@ func NewJSONParser() *JSONParser { } func (j *JSONParser) Process(line []byte, lbs *LabelsBuilder) ([]byte, bool) { + if lbs.ParserLabelHints().NoLabels() { + return line, true + } it := jsoniter.ConfigFastest.BorrowIterator(line) defer jsoniter.ConfigFastest.ReturnIterator(it) @@ -91,7 +93,7 @@ func (j *JSONParser) nextKeyPrefix(prefix, field string) (string, bool) { // first time we add return the field as prefix. if len(prefix) == 0 { field = sanitizeLabelKey(field, true) - if isValidKeyPrefix(field, j.lbs.ParserLabelHints()) { + if j.lbs.ParserLabelHints().ShouldExtractPrefix(field) { return field, true } return "", false @@ -102,31 +104,17 @@ func (j *JSONParser) nextKeyPrefix(prefix, field string) (string, bool) { j.buf = append(j.buf, byte(jsonSpacer)) j.buf = append(j.buf, sanitizeLabelKey(field, false)...) // if matches keep going - if isValidKeyPrefix(string(j.buf), j.lbs.ParserLabelHints()) { + if j.lbs.ParserLabelHints().ShouldExtractPrefix(string(j.buf)) { return string(j.buf), true } return "", false - -} - -// isValidKeyPrefix extract an object if the prefix is valid -func isValidKeyPrefix(objectprefix string, hints []string) bool { - if len(hints) == 0 { - return true - } - for _, k := range hints { - if strings.HasPrefix(k, objectprefix) { - return true - } - } - return false } func (j *JSONParser) parseLabelValue(iter *jsoniter.Iterator, prefix, field string) { // the first time we use the field as label key. if len(prefix) == 0 { field = sanitizeLabelKey(field, true) - if !shouldExtractKey(field, j.lbs.ParserLabelHints()) { + if !j.lbs.ParserLabelHints().ShouldExtract(field) { // we can skip the value iter.Skip() return @@ -147,7 +135,7 @@ func (j *JSONParser) parseLabelValue(iter *jsoniter.Iterator, prefix, field stri if j.lbs.BaseHas(string(j.buf)) { j.buf = append(j.buf, duplicateSuffix...) } - if !shouldExtractKey(string(j.buf), j.lbs.ParserLabelHints()) { + if !j.lbs.ParserLabelHints().ShouldExtract(string(j.buf)) { iter.Skip() return } @@ -173,18 +161,6 @@ func readValue(iter *jsoniter.Iterator) string { } } -func shouldExtractKey(key string, hints []string) bool { - if len(hints) == 0 { - return true - } - for _, k := range hints { - if k == key { - return true - } - } - return false -} - func addLabel(lbs *LabelsBuilder, key, value string) { key = sanitizeLabelKey(key, true) if lbs.BaseHas(key) { @@ -263,9 +239,12 @@ func NewLogfmtParser() *LogfmtParser { } func (l *LogfmtParser) Process(line []byte, lbs *LabelsBuilder) ([]byte, bool) { + if lbs.ParserLabelHints().NoLabels() { + return line, true + } l.dec.Reset(line) for l.dec.ScanKeyval() { - if !shouldExtractKey(string(l.dec.Key()), lbs.ParserLabelHints()) { + if !lbs.ParserLabelHints().ShouldExtract(string(l.dec.Key())) { continue } key := string(l.dec.Key()) diff --git a/pkg/logql/log/parser_hints.go b/pkg/logql/log/parser_hints.go new file mode 100644 index 0000000000..eb3264bbd3 --- /dev/null +++ b/pkg/logql/log/parser_hints.go @@ -0,0 +1,86 @@ +package log + +import ( + "strings" +) + +var noParserHints = &parserHint{} + +// ParserHint are hints given to LogQL parsers. +// This is specially useful for parser that extract implicitly all possible label keys. +// This is used only within metric queries since it's rare that you need all label keys. +// For example in the following expression: +// +// sum by (status_code) (rate({app="foo"} | json [5m])) +// +// All we need to extract is the status_code in the json parser. +type ParserHint interface { + // Tells if a label with the given key should be extracted. + ShouldExtract(key string) bool + // Tells if there's any hint that start with the given prefix. + // This allows to speed up key searching in nested structured like json. + ShouldExtractPrefix(prefix string) bool + // Tells if we should not extract any labels. + // For example in : + // sum(rate({app="foo"} | json [5m])) + // We don't need to extract any labels from the log line. + NoLabels() bool +} + +type parserHint struct { + noLabels bool + requiredLabels []string +} + +func (p *parserHint) ShouldExtract(key string) bool { + if len(p.requiredLabels) == 0 { + return true + } + for _, l := range p.requiredLabels { + if l == key { + return true + } + } + return false +} + +func (p *parserHint) ShouldExtractPrefix(prefix string) bool { + if len(p.requiredLabels) == 0 { + return true + } + for _, l := range p.requiredLabels { + if strings.HasPrefix(l, prefix) { + return true + } + } + + return false +} + +func (p *parserHint) NoLabels() bool { + return p.noLabels +} + +// newParserHint creates a new parser hint using the list of labels that are seen and required in a query. +func newParserHint(requiredLabelNames, groups []string, without, noLabels bool, metricLabelName string) *parserHint { + if len(groups) > 0 { + requiredLabelNames = append(requiredLabelNames, groups...) + } + if metricLabelName != "" { + requiredLabelNames = append(requiredLabelNames, metricLabelName) + } + requiredLabelNames = uniqueString(requiredLabelNames) + if noLabels { + if len(requiredLabelNames) > 0 { + return &parserHint{requiredLabels: requiredLabelNames} + } + return &parserHint{noLabels: true} + } + // we don't know what is required when a without clause is used. + // Same is true when there's no grouping. + // no hints available then. + if without || len(groups) == 0 { + return noParserHints + } + return &parserHint{requiredLabels: requiredLabelNames} +} diff --git a/pkg/logql/log/parser_hints_test.go b/pkg/logql/log/parser_hints_test.go new file mode 100644 index 0000000000..bef9238908 --- /dev/null +++ b/pkg/logql/log/parser_hints_test.go @@ -0,0 +1,118 @@ +// uses log_test package to avoid circular dependency between log and logql package. +package log_test + +import ( + "testing" + + "github.com/prometheus/prometheus/pkg/labels" + "github.com/stretchr/testify/require" + + "github.com/grafana/loki/pkg/logql" +) + +var jsonLine = []byte(`{ + "remote_user": "foo", + "upstream_addr": "10.0.0.1:80", + "protocol": "HTTP/2.0", + "request": { + "time": "30.001", + "method": "POST", + "host": "foo.grafana.net", + "uri": "/rpc/v2/stage", + "size": "101" + }, + "response": { + "status": 204, + "latency_seconds": "30.001" + } +}`) + +func Test_ParserHints(t *testing.T) { + lbs := labels.Labels{{Name: "app", Value: "nginx"}} + + t.Parallel() + for _, tt := range []struct { + expr string + expectOk bool + expectVal float64 + expectLbs string + }{ + { + `rate({app="nginx"} | json | response_status = 204 [1m])`, + true, + 1.0, + `{app="nginx", protocol="HTTP/2.0", remote_user="foo", request_host="foo.grafana.net", request_method="POST", request_size="101", request_time="30.001", request_uri="/rpc/v2/stage", response_latency_seconds="30.001", response_status="204", upstream_addr="10.0.0.1:80"}`, + }, + { + `sum without (request_host,app) (rate({app="nginx"} | json | __error__="" | response_status = 204 [1m]))`, + true, + 1.0, + `{protocol="HTTP/2.0", remote_user="foo", request_method="POST", request_size="101", request_time="30.001", request_uri="/rpc/v2/stage", response_latency_seconds="30.001", response_status="204", upstream_addr="10.0.0.1:80"}`, + }, + { + `sum by (request_host,app) (rate({app="nginx"} | json | __error__="" | response_status = 204 [1m]))`, + true, + 1.0, + `{app="nginx", request_host="foo.grafana.net"}`, + }, + { + `sum(rate({app="nginx"} | json | __error__="" | response_status = 204 [1m]))`, + true, + 1.0, + `{}`, + }, + { + `sum(rate({app="nginx"} | json [1m]))`, + true, + 1.0, + `{}`, + }, + { + `sum(rate({app="nginx"} | json | unwrap response_latency_seconds [1m]))`, + true, + 30.001, + `{}`, + }, + { + `sum(rate({app="nginx"} | json | response_status = 204 | unwrap response_latency_seconds [1m]))`, + true, + 30.001, + `{}`, + }, + { + `sum by (request_host,app)(rate({app="nginx"} | json | response_status = 204 and remote_user = "foo" | unwrap response_latency_seconds [1m]))`, + true, + 30.001, + `{app="nginx", request_host="foo.grafana.net"}`, + }, + { + `rate({app="nginx"} | json | response_status = 204 | unwrap response_latency_seconds [1m])`, + true, + 30.001, + `{app="nginx", protocol="HTTP/2.0", remote_user="foo", request_host="foo.grafana.net", request_method="POST", request_size="101", request_time="30.001", request_uri="/rpc/v2/stage", response_status="204", upstream_addr="10.0.0.1:80"}`, + }, + { + `sum without (request_host,app)(rate({app="nginx"} | json | response_status = 204 | unwrap response_latency_seconds [1m]))`, + true, + 30.001, + `{protocol="HTTP/2.0", remote_user="foo", request_method="POST", request_size="101", request_time="30.001", request_uri="/rpc/v2/stage", response_status="204", upstream_addr="10.0.0.1:80"}`, + }, + } { + tt := tt + t.Run(tt.expr, func(t *testing.T) { + t.Parallel() + expr, err := logql.ParseSampleExpr(tt.expr) + require.NoError(t, err) + ex, err := expr.Extractor() + require.NoError(t, err) + v, lbsRes, ok := ex.ForStream(lbs).Process(jsonLine) + var lbsResString string + if lbsRes != nil { + lbsResString = lbsRes.String() + } + require.Equal(t, tt.expectOk, ok) + require.Equal(t, tt.expectVal, v) + require.Equal(t, tt.expectLbs, lbsResString) + }) + } +} diff --git a/pkg/logql/log/parser_test.go b/pkg/logql/log/parser_test.go index d2b4494072..1f7d97eaf4 100644 --- a/pkg/logql/log/parser_test.go +++ b/pkg/logql/log/parser_test.go @@ -88,7 +88,6 @@ func Test_jsonParser_Parse(t *testing.T) { } func Benchmark_Parser(b *testing.B) { - lbs := labels.Labels{ {Name: "cluster", Value: "qa-us-central1"}, {Name: "namespace", Value: "qa"}, @@ -127,7 +126,7 @@ func Benchmark_Parser(b *testing.B) { b.Run("labels hints", func(b *testing.B) { builder := NewBaseLabelsBuilder().ForLabels(lbs, lbs.Hash()) - builder.parserKeyHints = tt.LabelParseHints + builder.parserKeyHints = newParserHint(tt.LabelParseHints, tt.LabelParseHints, false, false, "") for n := 0; n < b.N; n++ { builder.Reset() _, _ = tt.s.Process(line, builder) @@ -135,7 +134,6 @@ func Benchmark_Parser(b *testing.B) { }) }) } - } func TestNewRegexpParser(t *testing.T) {