From 9c6e509a6a656cd222b92461464f4e17cd2ecd5d Mon Sep 17 00:00:00 2001 From: Travis Patterson Date: Wed, 8 Mar 2023 16:53:34 +0100 Subject: [PATCH] Reimplement JsonExpressionParser in terms of jsonparser (#8734) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR reimplements the functionality of `JSONExpressionParser` using the `jsonparser` library rather than `jsoniter` Benchmarks: ``` benchstat json_expression_old.txt json_expression_new.txt name old time/op new time/op delta JsonExpressionParser/json-expression-8 4.15µs ± 4% 0.82µs ± 3% -80.17% (p=0.000 n=10+10) name old alloc/op new alloc/op delta JsonExpressionParser/json-expression-8 1.07kB ± 0% 0.02kB ± 0% -97.76% (p=0.000 n=10+10) name old allocs/op new allocs/op delta JsonExpressionParser/json-expression-8 72.0 ± 0% 2.0 ± 0% -97.22% (p=0.000 n=10+10) ``` --- pkg/logql/log/parser.go | 74 ++++++++++++++++++++++++++++++------ pkg/logql/log/parser_test.go | 44 ++++++++++++++++++++- 2 files changed, 104 insertions(+), 14 deletions(-) diff --git a/pkg/logql/log/parser.go b/pkg/logql/log/parser.go index 811e7f639b..9c0527663e 100644 --- a/pkg/logql/log/parser.go +++ b/pkg/logql/log/parser.go @@ -442,14 +442,14 @@ func (l *LogfmtExpressionParser) Process(_ int64, line []byte, lbs *LabelsBuilde func (l *LogfmtExpressionParser) RequiredLabelNames() []string { return []string{} } type JSONExpressionParser struct { - expressions map[string][]interface{} - - keys internedStringSet + ids []string + paths [][]string + keys internedStringSet } func NewJSONExpressionParser(expressions []LabelExtractionExpr) (*JSONExpressionParser, error) { - paths := make(map[string][]interface{}) - + var ids []string + var paths [][]string for _, exp := range expressions { path, err := jsonexpr.Parse(exp.Expression, false) if err != nil { @@ -460,27 +460,51 @@ func NewJSONExpressionParser(expressions []LabelExtractionExpr) (*JSONExpression return nil, fmt.Errorf("invalid extracted label name '%s'", exp.Identifier) } - paths[exp.Identifier] = path + ids = append(ids, exp.Identifier) + paths = append(paths, pathsToString(path)) } return &JSONExpressionParser{ - expressions: paths, - keys: internedStringSet{}, + ids: ids, + paths: paths, + keys: internedStringSet{}, }, nil } +func pathsToString(paths []interface{}) []string { + stingPaths := make([]string, 0, len(paths)) + for _, p := range paths { + switch v := p.(type) { + case int: + stingPaths = append(stingPaths, fmt.Sprintf("[%d]", v)) + case string: + stingPaths = append(stingPaths, v) + } + } + return stingPaths +} + func (j *JSONExpressionParser) Process(_ int64, line []byte, lbs *LabelsBuilder) ([]byte, bool) { if lbs.ParserLabelHints().NoLabels() { return line, true } - if !jsoniter.ConfigFastest.Valid(line) { + // Check that the line starts correctly + // the parser will pass an error if other + // parts of the line are malformed + if !isValidJSONStart(line) { lbs.SetErr(errJSON) return line, true } - for identifier, paths := range j.expressions { - result := jsoniter.ConfigFastest.Get(line, paths...).ToString() + var matches int + jsonparser.EachKey(line, func(idx int, data []byte, typ jsonparser.ValueType, err error) { + if err != nil { + lbs.SetErr(errJSON) + return + } + + identifier := j.ids[idx] key, _ := j.keys.Get(unsafeGetBytes(identifier), func() (string, bool) { if lbs.BaseHas(identifier) { identifier = identifier + duplicateSuffix @@ -488,11 +512,37 @@ func (j *JSONExpressionParser) Process(_ int64, line []byte, lbs *LabelsBuilder) return identifier, true }) - lbs.Set(key, result) + switch typ { + case jsonparser.Null: + lbs.Set(key, "") + default: + lbs.Set(key, unsafeGetString(data)) + } + + matches++ + }, j.paths...) + + // Ensure there's a label for every value + if matches < len(j.ids) { + for _, id := range j.ids { + if _, ok := lbs.Get(id); !ok { + lbs.Set(id, "") + } + } } + return line, true } +func isValidJSONStart(data []byte) bool { + switch data[0] { + case '"', '{', '[': + return true + default: + return false + } +} + func (j *JSONExpressionParser) RequiredLabelNames() []string { return []string{} } type UnpackParser struct { diff --git a/pkg/logql/log/parser_test.go b/pkg/logql/log/parser_test.go index bf6487677f..ae1cc90d11 100644 --- a/pkg/logql/log/parser_test.go +++ b/pkg/logql/log/parser_test.go @@ -2,13 +2,12 @@ package log import ( "fmt" + "github.com/grafana/loki/pkg/logqlmodel" "sort" "testing" "github.com/prometheus/prometheus/model/labels" "github.com/stretchr/testify/require" - - "github.com/grafana/loki/pkg/logqlmodel" ) func Test_jsonParser_Parse(t *testing.T) { @@ -1049,3 +1048,44 @@ func Test_PatternParser(t *testing.T) { }) } } + +func BenchmarkJsonExpressionParser(b *testing.B) { + simpleJsn := []byte(`{ + "data": "Click Here", + "size": 36, + "style": "bold", + "name": "text1", + "hOffset": 250, + "vOffset": 100, + "alignment": "center", + "onMouseUp": "sun1.opacity = (sun1.opacity / 100) * 90;" + }`) + lbs := NewBaseLabelsBuilder().ForLabels(labels.Labels{}, 0) + + benchmarks := []struct { + name string + p Stage + line []byte + }{ + {"json-expression", mustStage(NewJSONExpressionParser([]LabelExtractionExpr{ + NewLabelExtractionExpr("data", "data"), + NewLabelExtractionExpr("size", "size"), + NewLabelExtractionExpr("style", "style"), + NewLabelExtractionExpr("name", "name"), + NewLabelExtractionExpr("hOffset", "hOffset"), + NewLabelExtractionExpr("vOffset", "vOffset"), + NewLabelExtractionExpr("alignment", "alignment"), + NewLabelExtractionExpr("onMouseUp", "onMouseUp"), + })), simpleJsn}, + } + for _, bb := range benchmarks { + b.Run(bb.name, func(b *testing.B) { + b.ResetTimer() + + for n := 0; n < b.N; n++ { + lbs.Reset() + _, result = bb.p.Process(0, bb.line, lbs) + } + }) + } +}