Fix a bug with metric queries and label_format. (#3263)

* Fix a bug whith metric queries and label_format. In my previous PR where I introduced hints, I forgot to implement label format label required. This would cause label format label to be skipped during parsing. I've also took the opportunity to improve the code. Signed-off-by: Cyril Tovena <cyril.tovena@gmail.com> * Refactor and add more tests. Signed-off-by: Cyril Tovena <cyril.tovena@gmail.com>
4 years ago · e8cce09de2
parent 287244d110
commit e8cce09de2
8 changed files with 252 additions and 61 deletions
--- a/pkg/logql/log/fmt.go
+++ b/pkg/logql/log/fmt.go
@ -245,7 +245,14 @@ func (lf *LabelsFormatter) Process(l []byte, lbs *LabelsBuilder) ([]byte, bool)

 func (lf *LabelsFormatter) RequiredLabelNames() []string {
 	var names []string
-	return names
+	for _, fm := range lf.formats {
+		if fm.Rename {
+			names = append(names, fm.Value)
+			continue
+		}
+		names = append(names, listNodeFields(fm.tmpl.Root)...)
+	}
+	return uniqueString(names)
 }

 func trunc(c int, s string) string {
--- a/pkg/logql/log/fmt_test.go
+++ b/pkg/logql/log/fmt_test.go
@ -264,7 +264,6 @@ func Test_trunc(t *testing.T) {
 }

 func Test_substring(t *testing.T) {
-
 	tests := []struct {
 		start int
 		end   int
@ -309,3 +308,20 @@ func TestLineFormatter_RequiredLabelNames(t *testing.T) {
 		})
 	}
 }
+
+func TestLabelFormatter_RequiredLabelNames(t *testing.T) {
+	tests := []struct {
+		name string
+		fmts []LabelFmt
+		want []string
+	}{
+		{"rename", []LabelFmt{NewRenameLabelFmt("foo", "bar")}, []string{"bar"}},
+		{"rename and fmt", []LabelFmt{NewRenameLabelFmt("fuzz", "bar"), NewTemplateLabelFmt("1", "{{ .foo | ToUpper | .buzz }} and {{.bar}}")}, []string{"bar", "foo", "buzz"}},
+		{"fmt", []LabelFmt{NewTemplateLabelFmt("1", "{{.blip}}"), NewTemplateLabelFmt("2", "{{ .foo | ToUpper | .buzz }} and {{.bar}}")}, []string{"blip", "foo", "buzz", "bar"}},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			require.Equal(t, tt.want, mustNewLabelsFormatter(tt.fmts).RequiredLabelNames())
+		})
+	}
+}
--- a/pkg/logql/log/labels.go
+++ b/pkg/logql/log/labels.go
@ -6,9 +6,7 @@ import (
 	"github.com/prometheus/prometheus/pkg/labels"
 )

-var (
-	emptyLabelsResult = NewLabelsResult(labels.Labels{}, labels.Labels{}.Hash())
-)
+var emptyLabelsResult = NewLabelsResult(labels.Labels{}, labels.Labels{}.Hash())

 // LabelsResult is a computed labels result that contains the labels set with associated string and hash.
 // The is mainly used for caching and returning labels computations out of pipelines and stages.
@ -68,7 +66,7 @@ type BaseLabelsBuilder struct {
 	err string

 	groups            []string
-	parserKeyHints    []string // label key hints for metric queries that allows to limit parser extractions to only this list of labels.
+	parserKeyHints    ParserHint // label key hints for metric queries that allows to limit parser extractions to only this list of labels.
 	without, noLabels bool

 	resultCache map[uint64]LabelsResult
@ -85,7 +83,7 @@ type LabelsBuilder struct {
 }

 // NewBaseLabelsBuilderWithGrouping creates a new base labels builder with grouping to compute results.
-func NewBaseLabelsBuilderWithGrouping(groups []string, parserKeyHints []string, without, noLabels bool) *BaseLabelsBuilder {
+func NewBaseLabelsBuilderWithGrouping(groups []string, parserKeyHints ParserHint, without, noLabels bool) *BaseLabelsBuilder {
 	return &BaseLabelsBuilder{
 		del:            make([]string, 0, 5),
 		add:            make([]labels.Label, 0, 16),
@ -100,7 +98,7 @@ func NewBaseLabelsBuilderWithGrouping(groups []string, parserKeyHints []string,

 // NewLabelsBuilder creates a new base labels builder.
 func NewBaseLabelsBuilder() *BaseLabelsBuilder {
-	return NewBaseLabelsBuilderWithGrouping(nil, nil, false, false)
+	return NewBaseLabelsBuilderWithGrouping(nil, noParserHints, false, false)
 }

 // ForLabels creates a labels builder for a given labels set as base.
@ -133,7 +131,7 @@ func (b *LabelsBuilder) Reset() {

 // ParserLabelHints returns a limited list of expected labels to extract for metric queries.
 // Returns nil when it's impossible to hint labels extractions.
-func (b *BaseLabelsBuilder) ParserLabelHints() []string {
+func (b *BaseLabelsBuilder) ParserLabelHints() ParserHint {
 	return b.parserKeyHints
 }

--- a/pkg/logql/log/metrics_extraction.go
+++ b/pkg/logql/log/metrics_extraction.go
@ -47,17 +47,13 @@ type lineSampleExtractor struct {

 // NewLineSampleExtractor creates a SampleExtractor from a LineExtractor.
 // Multiple log stages are run before converting the log line.
-func NewLineSampleExtractor(ex LineExtractor, stages []Stage, groups []string, without bool, noLabels bool) (SampleExtractor, error) {
+func NewLineSampleExtractor(ex LineExtractor, stages []Stage, groups []string, without, noLabels bool) (SampleExtractor, error) {
 	s := ReduceStages(stages)
-	var expectedLabels []string
-	if !without {
-		expectedLabels = append(expectedLabels, s.RequiredLabelNames()...)
-		expectedLabels = uniqueString(append(expectedLabels, groups...))
-	}
+	hints := newParserHint(s.RequiredLabelNames(), groups, without, noLabels, "")
 	return &lineSampleExtractor{
 		Stage:            s,
 		LineExtractor:    ex,
-		baseBuilder:      NewBaseLabelsBuilderWithGrouping(groups, expectedLabels, without, noLabels),
+		baseBuilder:      NewBaseLabelsBuilderWithGrouping(groups, hints, without, noLabels),
 		streamExtractors: make(map[uint64]StreamSampleExtractor),
 	}, nil
 }
@ -118,7 +114,7 @@ type labelSampleExtractor struct {
 // to remove sample containing the __error__ label.
 func LabelExtractorWithStages(
 	labelName, conversion string,
-	groups []string, without bool, noLabels bool,
+	groups []string, without, noLabels bool,
 	preStages []Stage,
 	postFilter Stage,
 ) (SampleExtractor, error) {
@ -139,20 +135,13 @@ func LabelExtractorWithStages(
 		sort.Strings(groups)
 	}
 	preStage := ReduceStages(preStages)
-	var expectedLabels []string
-	if !without {
-		expectedLabels = append(expectedLabels, preStage.RequiredLabelNames()...)
-		expectedLabels = append(expectedLabels, groups...)
-		expectedLabels = append(expectedLabels, postFilter.RequiredLabelNames()...)
-		expectedLabels = append(expectedLabels, labelName)
-		expectedLabels = uniqueString(expectedLabels)
-	}
+	hints := newParserHint(append(preStage.RequiredLabelNames(), postFilter.RequiredLabelNames()...), groups, without, noLabels, labelName)
 	return &labelSampleExtractor{
 		preStage:         preStage,
 		conversionFn:     convFn,
 		labelName:        labelName,
 		postFilter:       postFilter,
-		baseBuilder:      NewBaseLabelsBuilderWithGrouping(groups, expectedLabels, without, noLabels),
+		baseBuilder:      NewBaseLabelsBuilderWithGrouping(groups, hints, without, noLabels),
 		streamExtractors: make(map[uint64]StreamSampleExtractor),
 	}, nil
 }
--- a/pkg/logql/log/parser.go
+++ b/pkg/logql/log/parser.go
@ -5,7 +5,6 @@ import (
 	"fmt"
 	"io"
 	"regexp"
-	"strings"

 	"github.com/grafana/loki/pkg/logql/log/logfmt"

@ -41,6 +40,9 @@ func NewJSONParser() *JSONParser {
 }

 func (j *JSONParser) Process(line []byte, lbs *LabelsBuilder) ([]byte, bool) {
+	if lbs.ParserLabelHints().NoLabels() {
+		return line, true
+	}
 	it := jsoniter.ConfigFastest.BorrowIterator(line)
 	defer jsoniter.ConfigFastest.ReturnIterator(it)

@ -91,7 +93,7 @@ func (j *JSONParser) nextKeyPrefix(prefix, field string) (string, bool) {
 	// first time we add return the field as prefix.
 	if len(prefix) == 0 {
 		field = sanitizeLabelKey(field, true)
-		if isValidKeyPrefix(field, j.lbs.ParserLabelHints()) {
+		if j.lbs.ParserLabelHints().ShouldExtractPrefix(field) {
 			return field, true
 		}
 		return "", false
@ -102,31 +104,17 @@ func (j *JSONParser) nextKeyPrefix(prefix, field string) (string, bool) {
 	j.buf = append(j.buf, byte(jsonSpacer))
 	j.buf = append(j.buf, sanitizeLabelKey(field, false)...)
 	// if matches keep going
-	if isValidKeyPrefix(string(j.buf), j.lbs.ParserLabelHints()) {
+	if j.lbs.ParserLabelHints().ShouldExtractPrefix(string(j.buf)) {
 		return string(j.buf), true
 	}
 	return "", false
-
-}
-
-// isValidKeyPrefix extract an object if the prefix is valid
-func isValidKeyPrefix(objectprefix string, hints []string) bool {
-	if len(hints) == 0 {
-		return true
-	}
-	for _, k := range hints {
-		if strings.HasPrefix(k, objectprefix) {
-			return true
-		}
-	}
-	return false
 }

 func (j *JSONParser) parseLabelValue(iter *jsoniter.Iterator, prefix, field string) {
 	// the first time we use the field as label key.
 	if len(prefix) == 0 {
 		field = sanitizeLabelKey(field, true)
-		if !shouldExtractKey(field, j.lbs.ParserLabelHints()) {
+		if !j.lbs.ParserLabelHints().ShouldExtract(field) {
 			// we can skip the value
 			iter.Skip()
 			return
@ -147,7 +135,7 @@ func (j *JSONParser) parseLabelValue(iter *jsoniter.Iterator, prefix, field stri
 	if j.lbs.BaseHas(string(j.buf)) {
 		j.buf = append(j.buf, duplicateSuffix...)
 	}
-	if !shouldExtractKey(string(j.buf), j.lbs.ParserLabelHints()) {
+	if !j.lbs.ParserLabelHints().ShouldExtract(string(j.buf)) {
 		iter.Skip()
 		return
 	}
@ -173,18 +161,6 @@ func readValue(iter *jsoniter.Iterator) string {
 	}
 }

-func shouldExtractKey(key string, hints []string) bool {
-	if len(hints) == 0 {
-		return true
-	}
-	for _, k := range hints {
-		if k == key {
-			return true
-		}
-	}
-	return false
-}
-
 func addLabel(lbs *LabelsBuilder, key, value string) {
 	key = sanitizeLabelKey(key, true)
 	if lbs.BaseHas(key) {
@ -263,9 +239,12 @@ func NewLogfmtParser() *LogfmtParser {
 }

 func (l *LogfmtParser) Process(line []byte, lbs *LabelsBuilder) ([]byte, bool) {
+	if lbs.ParserLabelHints().NoLabels() {
+		return line, true
+	}
 	l.dec.Reset(line)
 	for l.dec.ScanKeyval() {
-		if !shouldExtractKey(string(l.dec.Key()), lbs.ParserLabelHints()) {
+		if !lbs.ParserLabelHints().ShouldExtract(string(l.dec.Key())) {
 			continue
 		}
 		key := string(l.dec.Key())
--- a/pkg/logql/log/parser_hints.go
+++ b/pkg/logql/log/parser_hints.go
@ -0,0 +1,86 @@
+package log
+
+import (
+	"strings"
+)
+
+var noParserHints = &parserHint{}
+
+// ParserHint are hints given to LogQL parsers.
+// This is specially useful for parser that extract implicitly all possible label keys.
+// This is used only within metric queries since it's rare that you need all label keys.
+// For example in the following expression:
+//
+//		sum by (status_code) (rate({app="foo"} | json [5m]))
+//
+// All we need to extract is the status_code in the json parser.
+type ParserHint interface {
+	// Tells if a label with the given key should be extracted.
+	ShouldExtract(key string) bool
+	// Tells if there's any hint that start with the given prefix.
+	// This allows to speed up key searching in nested structured like json.
+	ShouldExtractPrefix(prefix string) bool
+	// Tells if we should not extract any labels.
+	// For example in :
+	//		 sum(rate({app="foo"} | json [5m]))
+	// We don't need to extract any labels from the log line.
+	NoLabels() bool
+}
+
+type parserHint struct {
+	noLabels       bool
+	requiredLabels []string
+}
+
+func (p *parserHint) ShouldExtract(key string) bool {
+	if len(p.requiredLabels) == 0 {
+		return true
+	}
+	for _, l := range p.requiredLabels {
+		if l == key {
+			return true
+		}
+	}
+	return false
+}
+
+func (p *parserHint) ShouldExtractPrefix(prefix string) bool {
+	if len(p.requiredLabels) == 0 {
+		return true
+	}
+	for _, l := range p.requiredLabels {
+		if strings.HasPrefix(l, prefix) {
+			return true
+		}
+	}
+
+	return false
+}
+
+func (p *parserHint) NoLabels() bool {
+	return p.noLabels
+}
+
+// newParserHint creates a new parser hint using the list of labels that are seen and required in a query.
+func newParserHint(requiredLabelNames, groups []string, without, noLabels bool, metricLabelName string) *parserHint {
+	if len(groups) > 0 {
+		requiredLabelNames = append(requiredLabelNames, groups...)
+	}
+	if metricLabelName != "" {
+		requiredLabelNames = append(requiredLabelNames, metricLabelName)
+	}
+	requiredLabelNames = uniqueString(requiredLabelNames)
+	if noLabels {
+		if len(requiredLabelNames) > 0 {
+			return &parserHint{requiredLabels: requiredLabelNames}
+		}
+		return &parserHint{noLabels: true}
+	}
+	// we don't know what is required when a without clause is used.
+	// Same is true when there's no grouping.
+	// no hints available then.
+	if without || len(groups) == 0 {
+		return noParserHints
+	}
+	return &parserHint{requiredLabels: requiredLabelNames}
+}
--- a/pkg/logql/log/parser_hints_test.go
+++ b/pkg/logql/log/parser_hints_test.go
@ -0,0 +1,118 @@
+// uses log_test package to avoid circular dependency between log and logql package.
+package log_test
+
+import (
+	"testing"
+
+	"github.com/prometheus/prometheus/pkg/labels"
+	"github.com/stretchr/testify/require"
+
+	"github.com/grafana/loki/pkg/logql"
+)
+
+var jsonLine = []byte(`{
+	"remote_user": "foo",
+	"upstream_addr": "10.0.0.1:80",
+	"protocol": "HTTP/2.0",
+	"request": {
+		"time": "30.001",
+		"method": "POST",
+		"host": "foo.grafana.net",
+		"uri": "/rpc/v2/stage",
+		"size": "101"
+	},
+	"response": {
+		"status": 204,
+		"latency_seconds": "30.001"
+	}
+}`)
+
+func Test_ParserHints(t *testing.T) {
+	lbs := labels.Labels{{Name: "app", Value: "nginx"}}
+
+	t.Parallel()
+	for _, tt := range []struct {
+		expr      string
+		expectOk  bool
+		expectVal float64
+		expectLbs string
+	}{
+		{
+			`rate({app="nginx"} | json | response_status = 204 [1m])`,
+			true,
+			1.0,
+			`{app="nginx", protocol="HTTP/2.0", remote_user="foo", request_host="foo.grafana.net", request_method="POST", request_size="101", request_time="30.001", request_uri="/rpc/v2/stage", response_latency_seconds="30.001", response_status="204", upstream_addr="10.0.0.1:80"}`,
+		},
+		{
+			`sum without (request_host,app) (rate({app="nginx"} | json | __error__="" | response_status = 204 [1m]))`,
+			true,
+			1.0,
+			`{protocol="HTTP/2.0", remote_user="foo", request_method="POST", request_size="101", request_time="30.001", request_uri="/rpc/v2/stage", response_latency_seconds="30.001", response_status="204", upstream_addr="10.0.0.1:80"}`,
+		},
+		{
+			`sum by (request_host,app) (rate({app="nginx"} | json | __error__="" | response_status = 204 [1m]))`,
+			true,
+			1.0,
+			`{app="nginx", request_host="foo.grafana.net"}`,
+		},
+		{
+			`sum(rate({app="nginx"} | json | __error__="" | response_status = 204 [1m]))`,
+			true,
+			1.0,
+			`{}`,
+		},
+		{
+			`sum(rate({app="nginx"} | json [1m]))`,
+			true,
+			1.0,
+			`{}`,
+		},
+		{
+			`sum(rate({app="nginx"} | json | unwrap response_latency_seconds [1m]))`,
+			true,
+			30.001,
+			`{}`,
+		},
+		{
+			`sum(rate({app="nginx"} | json | response_status = 204 | unwrap response_latency_seconds [1m]))`,
+			true,
+			30.001,
+			`{}`,
+		},
+		{
+			`sum by (request_host,app)(rate({app="nginx"} | json | response_status = 204 and  remote_user = "foo" | unwrap response_latency_seconds [1m]))`,
+			true,
+			30.001,
+			`{app="nginx", request_host="foo.grafana.net"}`,
+		},
+		{
+			`rate({app="nginx"} | json | response_status = 204 | unwrap response_latency_seconds [1m])`,
+			true,
+			30.001,
+			`{app="nginx", protocol="HTTP/2.0", remote_user="foo", request_host="foo.grafana.net", request_method="POST", request_size="101", request_time="30.001", request_uri="/rpc/v2/stage", response_status="204", upstream_addr="10.0.0.1:80"}`,
+		},
+		{
+			`sum without (request_host,app)(rate({app="nginx"} | json | response_status = 204 | unwrap response_latency_seconds [1m]))`,
+			true,
+			30.001,
+			`{protocol="HTTP/2.0", remote_user="foo", request_method="POST", request_size="101", request_time="30.001", request_uri="/rpc/v2/stage", response_status="204", upstream_addr="10.0.0.1:80"}`,
+		},
+	} {
+		tt := tt
+		t.Run(tt.expr, func(t *testing.T) {
+			t.Parallel()
+			expr, err := logql.ParseSampleExpr(tt.expr)
+			require.NoError(t, err)
+			ex, err := expr.Extractor()
+			require.NoError(t, err)
+			v, lbsRes, ok := ex.ForStream(lbs).Process(jsonLine)
+			var lbsResString string
+			if lbsRes != nil {
+				lbsResString = lbsRes.String()
+			}
+			require.Equal(t, tt.expectOk, ok)
+			require.Equal(t, tt.expectVal, v)
+			require.Equal(t, tt.expectLbs, lbsResString)
+		})
+	}
+}
--- a/pkg/logql/log/parser_test.go
+++ b/pkg/logql/log/parser_test.go
@ -88,7 +88,6 @@ func Test_jsonParser_Parse(t *testing.T) {
 }

 func Benchmark_Parser(b *testing.B) {
-
 	lbs := labels.Labels{
 		{Name: "cluster", Value: "qa-us-central1"},
 		{Name: "namespace", Value: "qa"},
@ -127,7 +126,7 @@ func Benchmark_Parser(b *testing.B) {

 			b.Run("labels hints", func(b *testing.B) {
 				builder := NewBaseLabelsBuilder().ForLabels(lbs, lbs.Hash())
-				builder.parserKeyHints = tt.LabelParseHints
+				builder.parserKeyHints = newParserHint(tt.LabelParseHints, tt.LabelParseHints, false, false, "")
 				for n := 0; n < b.N; n++ {
 					builder.Reset()
 					_, _ = tt.s.Process(line, builder)
@ -135,7 +134,6 @@ func Benchmark_Parser(b *testing.B) {
 			})
 		})
 	}
-
 }

 func TestNewRegexpParser(t *testing.T) {