diff --git a/pkg/logql/log/parser.go b/pkg/logql/log/parser.go index 811e7f639b..9c0527663e 100644 --- a/pkg/logql/log/parser.go +++ b/pkg/logql/log/parser.go @@ -442,14 +442,14 @@ func (l *LogfmtExpressionParser) Process(_ int64, line []byte, lbs *LabelsBuilde func (l *LogfmtExpressionParser) RequiredLabelNames() []string { return []string{} } type JSONExpressionParser struct { - expressions map[string][]interface{} - - keys internedStringSet + ids []string + paths [][]string + keys internedStringSet } func NewJSONExpressionParser(expressions []LabelExtractionExpr) (*JSONExpressionParser, error) { - paths := make(map[string][]interface{}) - + var ids []string + var paths [][]string for _, exp := range expressions { path, err := jsonexpr.Parse(exp.Expression, false) if err != nil { @@ -460,27 +460,51 @@ func NewJSONExpressionParser(expressions []LabelExtractionExpr) (*JSONExpression return nil, fmt.Errorf("invalid extracted label name '%s'", exp.Identifier) } - paths[exp.Identifier] = path + ids = append(ids, exp.Identifier) + paths = append(paths, pathsToString(path)) } return &JSONExpressionParser{ - expressions: paths, - keys: internedStringSet{}, + ids: ids, + paths: paths, + keys: internedStringSet{}, }, nil } +func pathsToString(paths []interface{}) []string { + stingPaths := make([]string, 0, len(paths)) + for _, p := range paths { + switch v := p.(type) { + case int: + stingPaths = append(stingPaths, fmt.Sprintf("[%d]", v)) + case string: + stingPaths = append(stingPaths, v) + } + } + return stingPaths +} + func (j *JSONExpressionParser) Process(_ int64, line []byte, lbs *LabelsBuilder) ([]byte, bool) { if lbs.ParserLabelHints().NoLabels() { return line, true } - if !jsoniter.ConfigFastest.Valid(line) { + // Check that the line starts correctly + // the parser will pass an error if other + // parts of the line are malformed + if !isValidJSONStart(line) { lbs.SetErr(errJSON) return line, true } - for identifier, paths := range j.expressions { - result := jsoniter.ConfigFastest.Get(line, paths...).ToString() + var matches int + jsonparser.EachKey(line, func(idx int, data []byte, typ jsonparser.ValueType, err error) { + if err != nil { + lbs.SetErr(errJSON) + return + } + + identifier := j.ids[idx] key, _ := j.keys.Get(unsafeGetBytes(identifier), func() (string, bool) { if lbs.BaseHas(identifier) { identifier = identifier + duplicateSuffix @@ -488,11 +512,37 @@ func (j *JSONExpressionParser) Process(_ int64, line []byte, lbs *LabelsBuilder) return identifier, true }) - lbs.Set(key, result) + switch typ { + case jsonparser.Null: + lbs.Set(key, "") + default: + lbs.Set(key, unsafeGetString(data)) + } + + matches++ + }, j.paths...) + + // Ensure there's a label for every value + if matches < len(j.ids) { + for _, id := range j.ids { + if _, ok := lbs.Get(id); !ok { + lbs.Set(id, "") + } + } } + return line, true } +func isValidJSONStart(data []byte) bool { + switch data[0] { + case '"', '{', '[': + return true + default: + return false + } +} + func (j *JSONExpressionParser) RequiredLabelNames() []string { return []string{} } type UnpackParser struct { diff --git a/pkg/logql/log/parser_test.go b/pkg/logql/log/parser_test.go index bf6487677f..ae1cc90d11 100644 --- a/pkg/logql/log/parser_test.go +++ b/pkg/logql/log/parser_test.go @@ -2,13 +2,12 @@ package log import ( "fmt" + "github.com/grafana/loki/pkg/logqlmodel" "sort" "testing" "github.com/prometheus/prometheus/model/labels" "github.com/stretchr/testify/require" - - "github.com/grafana/loki/pkg/logqlmodel" ) func Test_jsonParser_Parse(t *testing.T) { @@ -1049,3 +1048,44 @@ func Test_PatternParser(t *testing.T) { }) } } + +func BenchmarkJsonExpressionParser(b *testing.B) { + simpleJsn := []byte(`{ + "data": "Click Here", + "size": 36, + "style": "bold", + "name": "text1", + "hOffset": 250, + "vOffset": 100, + "alignment": "center", + "onMouseUp": "sun1.opacity = (sun1.opacity / 100) * 90;" + }`) + lbs := NewBaseLabelsBuilder().ForLabels(labels.Labels{}, 0) + + benchmarks := []struct { + name string + p Stage + line []byte + }{ + {"json-expression", mustStage(NewJSONExpressionParser([]LabelExtractionExpr{ + NewLabelExtractionExpr("data", "data"), + NewLabelExtractionExpr("size", "size"), + NewLabelExtractionExpr("style", "style"), + NewLabelExtractionExpr("name", "name"), + NewLabelExtractionExpr("hOffset", "hOffset"), + NewLabelExtractionExpr("vOffset", "vOffset"), + NewLabelExtractionExpr("alignment", "alignment"), + NewLabelExtractionExpr("onMouseUp", "onMouseUp"), + })), simpleJsn}, + } + for _, bb := range benchmarks { + b.Run(bb.name, func(b *testing.B) { + b.ResetTimer() + + for n := 0; n < b.N; n++ { + lbs.Reset() + _, result = bb.p.Process(0, bb.line, lbs) + } + }) + } +}