Like Prometheus, but for logs.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 
loki/pkg/logql/log/parser.go

769 lines
19 KiB

package log
import (
"bytes"
"errors"
"fmt"
"strings"
"unicode/utf8"
"github.com/grafana/jsonparser"
"github.com/grafana/loki/v3/pkg/logql/log/jsonexpr"
"github.com/grafana/loki/v3/pkg/logql/log/logfmt"
"github.com/grafana/loki/v3/pkg/logql/log/pattern"
"github.com/grafana/loki/v3/pkg/logqlmodel"
"github.com/grafana/regexp"
jsoniter "github.com/json-iterator/go"
"github.com/prometheus/common/model"
)
const (
jsonSpacer = '_'
duplicateSuffix = "_extracted"
trueString = "true"
falseString = "false"
// How much stack space to allocate for unescaping JSON strings; if a string longer
// than this needs to be escaped, it will result in a heap allocation
unescapeStackBufSize = 64
)
var (
_ Stage = &JSONParser{}
_ Stage = &RegexpParser{}
_ Stage = &LogfmtParser{}
trueBytes = []byte("true")
errUnexpectedJSONObject = fmt.Errorf("expecting json object(%d), but it is not", jsoniter.ObjectValue)
errMissingCapture = errors.New("at least one named capture must be supplied")
errFoundAllLabels = errors.New("found all required labels")
errLabelDoesNotMatch = errors.New("found a label with a matcher that didn't match")
// the rune error replacement is rejected by Prometheus hence replacing them with space.
removeInvalidUtf = func(r rune) rune {
if r == utf8.RuneError {
return 32 // rune value for space
}
return r
}
)
type JSONParser struct {
prefixBuffer []byte // buffer used to build json keys
lbs *LabelsBuilder
keys internedStringSet
parserHints ParserHint
}
// NewJSONParser creates a log stage that can parse a json log line and add properties as labels.
func NewJSONParser() *JSONParser {
return &JSONParser{
prefixBuffer: make([]byte, 0, 1024),
keys: internedStringSet{},
}
}
func (j *JSONParser) Process(_ int64, line []byte, lbs *LabelsBuilder) ([]byte, bool) {
parserHints := lbs.ParserLabelHints()
if parserHints.NoLabels() {
return line, true
}
// reset the state.
j.prefixBuffer = j.prefixBuffer[:0]
j.lbs = lbs
j.parserHints = parserHints
if err := jsonparser.ObjectEach(line, j.parseObject); err != nil {
if errors.Is(err, errFoundAllLabels) {
// Short-circuited
return line, true
}
if errors.Is(err, errLabelDoesNotMatch) {
// one of the label matchers does not match. The whole line can be thrown away
return line, false
}
addErrLabel(errJSON, err, lbs)
return line, true
}
return line, true
}
func (j *JSONParser) parseObject(key, value []byte, dataType jsonparser.ValueType, _ int) error {
var err error
switch dataType {
case jsonparser.String, jsonparser.Number, jsonparser.Boolean:
err = j.parseLabelValue(key, value, dataType)
case jsonparser.Object:
prefixLen := len(j.prefixBuffer)
if ok := j.nextKeyPrefix(key); ok {
err = jsonparser.ObjectEach(value, j.parseObject)
}
// rollback the prefix as we exit the current object.
j.prefixBuffer = j.prefixBuffer[:prefixLen]
return err
}
if j.parserHints.AllRequiredExtracted() {
// Not actually an error. Parsing can be short-circuited
// and this tells jsonparser to stop parsing
return errFoundAllLabels
}
return err
}
// nextKeyPrefix load the next prefix in the buffer and tells if it should be processed based on hints.
func (j *JSONParser) nextKeyPrefix(key []byte) bool {
// first add the spacer if needed.
if len(j.prefixBuffer) != 0 {
j.prefixBuffer = append(j.prefixBuffer, byte(jsonSpacer))
}
j.prefixBuffer = appendSanitized(j.prefixBuffer, key)
return j.lbs.ParserLabelHints().ShouldExtractPrefix(unsafeGetString(j.prefixBuffer))
}
func (j *JSONParser) parseLabelValue(key, value []byte, dataType jsonparser.ValueType) error {
// the first time we use the field as label key.
if len(j.prefixBuffer) == 0 {
key, ok := j.keys.Get(key, func() (string, bool) {
field := sanitizeLabelKey(string(key), true)
if j.lbs.BaseHas(field) {
field = field + duplicateSuffix
}
if !j.lbs.ParserLabelHints().ShouldExtract(field) {
return "", false
}
return field, true
})
if !ok {
return nil
}
j.lbs.Set(ParsedLabel, key, readValue(value, dataType))
if !j.parserHints.ShouldContinueParsingLine(key, j.lbs) {
return errLabelDoesNotMatch
}
return nil
}
// otherwise we build the label key using the buffer
// snapshot the current prefix position
prefixLen := len(j.prefixBuffer)
j.prefixBuffer = append(j.prefixBuffer, byte(jsonSpacer))
j.prefixBuffer = appendSanitized(j.prefixBuffer, key)
keyString, ok := j.keys.Get(j.prefixBuffer, func() (string, bool) {
if j.lbs.BaseHas(string(j.prefixBuffer)) {
j.prefixBuffer = append(j.prefixBuffer, duplicateSuffix...)
}
if !j.parserHints.ShouldExtract(string(j.prefixBuffer)) {
return "", false
}
return string(j.prefixBuffer), true
})
// reset the prefix position
j.prefixBuffer = j.prefixBuffer[:prefixLen]
if !ok {
return nil
}
j.lbs.Set(ParsedLabel, keyString, readValue(value, dataType))
if !j.parserHints.ShouldContinueParsingLine(keyString, j.lbs) {
return errLabelDoesNotMatch
}
return nil
}
func (j *JSONParser) RequiredLabelNames() []string { return []string{} }
func readValue(v []byte, dataType jsonparser.ValueType) string {
switch dataType {
case jsonparser.String:
return unescapeJSONString(v)
case jsonparser.Null:
return ""
case jsonparser.Number:
return string(v)
case jsonparser.Boolean:
if bytes.Equal(v, trueBytes) {
return trueString
}
return falseString
default:
return ""
}
}
func unescapeJSONString(b []byte) string {
var stackbuf [unescapeStackBufSize]byte // stack-allocated array for allocation-free unescaping of small strings
bU, err := jsonparser.Unescape(b, stackbuf[:])
if err != nil {
return ""
}
res := string(bU)
if strings.ContainsRune(res, utf8.RuneError) {
res = strings.Map(removeInvalidUtf, res)
}
return res
}
type RegexpParser struct {
regex *regexp.Regexp
nameIndex map[int]string
keys internedStringSet
}
// NewRegexpParser creates a new log stage that can extract labels from a log line using a regex expression.
// The regex expression must contains at least one named match. If the regex doesn't match the line is not filtered out.
func NewRegexpParser(re string) (*RegexpParser, error) {
regex, err := regexp.Compile(re)
if err != nil {
return nil, err
}
if regex.NumSubexp() == 0 {
return nil, errMissingCapture
}
nameIndex := map[int]string{}
uniqueNames := map[string]struct{}{}
for i, n := range regex.SubexpNames() {
if n != "" {
if !model.LabelName(n).IsValid() {
return nil, fmt.Errorf("invalid extracted label name '%s'", n)
}
if _, ok := uniqueNames[n]; ok {
return nil, fmt.Errorf("duplicate extracted label name '%s'", n)
}
nameIndex[i] = n
uniqueNames[n] = struct{}{}
}
}
if len(nameIndex) == 0 {
return nil, errMissingCapture
}
return &RegexpParser{
regex: regex,
nameIndex: nameIndex,
keys: internedStringSet{},
}, nil
}
func (r *RegexpParser) Process(_ int64, line []byte, lbs *LabelsBuilder) ([]byte, bool) {
parserHints := lbs.ParserLabelHints()
for i, value := range r.regex.FindSubmatch(line) {
if name, ok := r.nameIndex[i]; ok {
key, ok := r.keys.Get(unsafeGetBytes(name), func() (string, bool) {
sanitize := sanitizeLabelKey(name, true)
if len(sanitize) == 0 {
return "", false
}
if lbs.BaseHas(sanitize) {
sanitize = fmt.Sprintf("%s%s", sanitize, duplicateSuffix)
}
if !parserHints.ShouldExtract(sanitize) {
return "", false
}
return sanitize, true
})
if !ok {
continue
}
lbs.Set(ParsedLabel, key, string(value))
if !parserHints.ShouldContinueParsingLine(key, lbs) {
return line, false
}
}
}
return line, true
}
func (r *RegexpParser) RequiredLabelNames() []string { return []string{} }
type LogfmtParser struct {
strict bool
keepEmpty bool
dec *logfmt.Decoder
keys internedStringSet
}
// NewLogfmtParser creates a parser that can extract labels from a logfmt log line.
// Each keyval is extracted into a respective label.
func NewLogfmtParser(strict, keepEmpty bool) *LogfmtParser {
return &LogfmtParser{
strict: strict,
keepEmpty: keepEmpty,
dec: logfmt.NewDecoder(nil),
keys: internedStringSet{},
}
}
func (l *LogfmtParser) Process(_ int64, line []byte, lbs *LabelsBuilder) ([]byte, bool) {
parserHints := lbs.ParserLabelHints()
if parserHints.NoLabels() {
return line, true
}
l.dec.Reset(line)
for !l.dec.EOL() {
ok := l.dec.ScanKeyval()
if !ok {
// for strict parsing, do not continue on errs
if l.strict {
break
}
continue
}
key, ok := l.keys.Get(l.dec.Key(), func() (string, bool) {
sanitized := sanitizeLabelKey(string(l.dec.Key()), true)
if len(sanitized) == 0 {
return "", false
}
if lbs.BaseHas(sanitized) {
sanitized = fmt.Sprintf("%s%s", sanitized, duplicateSuffix)
}
if !parserHints.ShouldExtract(sanitized) {
return "", false
}
return sanitized, true
})
if !ok {
continue
}
val := l.dec.Value()
if bytes.ContainsRune(val, utf8.RuneError) {
val = bytes.Map(removeInvalidUtf, val)
}
if !l.keepEmpty && len(val) == 0 {
continue
}
lbs.Set(ParsedLabel, key, string(val))
if !parserHints.ShouldContinueParsingLine(key, lbs) {
return line, false
}
if parserHints.AllRequiredExtracted() {
break
}
}
if l.strict && l.dec.Err() != nil {
addErrLabel(errLogfmt, l.dec.Err(), lbs)
if !parserHints.ShouldContinueParsingLine(logqlmodel.ErrorLabel, lbs) {
return line, false
}
return line, true
}
return line, true
}
func (l *LogfmtParser) RequiredLabelNames() []string { return []string{} }
type PatternParser struct {
matcher *pattern.Matcher
names []string
}
func NewPatternParser(pn string) (*PatternParser, error) {
m, err := pattern.New(pn)
if err != nil {
return nil, err
}
for _, name := range m.Names() {
if !model.LabelName(name).IsValid() {
return nil, fmt.Errorf("invalid capture label name '%s'", name)
}
}
return &PatternParser{
matcher: m,
names: m.Names(),
}, nil
}
func (l *PatternParser) Process(_ int64, line []byte, lbs *LabelsBuilder) ([]byte, bool) {
parserHints := lbs.ParserLabelHints()
if parserHints.NoLabels() {
return line, true
}
matches := l.matcher.Matches(line)
names := l.names[:len(matches)]
for i, m := range matches {
name := names[i]
if lbs.BaseHas(name) {
name = name + duplicateSuffix
}
if !parserHints.ShouldExtract(name) {
continue
}
lbs.Set(ParsedLabel, name, string(m))
if !parserHints.ShouldContinueParsingLine(name, lbs) {
return line, false
}
}
return line, true
}
func (l *PatternParser) RequiredLabelNames() []string { return []string{} }
type LogfmtExpressionParser struct {
expressions map[string][]interface{}
dec *logfmt.Decoder
keys internedStringSet
strict bool
}
func NewLogfmtExpressionParser(expressions []LabelExtractionExpr, strict bool) (*LogfmtExpressionParser, error) {
if len(expressions) == 0 {
return nil, fmt.Errorf("no logfmt expression provided")
}
paths := make(map[string][]interface{}, len(expressions))
for _, exp := range expressions {
path, err := logfmt.Parse(exp.Expression, false)
if err != nil {
return nil, fmt.Errorf("cannot parse expression [%s]: %w", exp.Expression, err)
}
if !model.LabelName(exp.Identifier).IsValid() {
return nil, fmt.Errorf("invalid extracted label name '%s'", exp.Identifier)
}
paths[exp.Identifier] = path
}
return &LogfmtExpressionParser{
expressions: paths,
dec: logfmt.NewDecoder(nil),
keys: internedStringSet{},
strict: strict,
}, nil
}
func (l *LogfmtExpressionParser) Process(_ int64, line []byte, lbs *LabelsBuilder) ([]byte, bool) {
// If there are no expressions, extract common labels
// and add the suffix "_extracted"
if len(l.expressions) == 0 {
return line, false
}
if lbs.ParserLabelHints().NoLabels() {
return line, true
}
// Create a map of every renamed label and its original name
// in order to retrieve it later in the extraction phase
keys := make(map[string]string, len(l.expressions))
for id, paths := range l.expressions {
keys[id] = fmt.Sprintf("%v", paths...)
if !lbs.BaseHas(id) {
lbs.Set(ParsedLabel, id, "")
}
}
l.dec.Reset(line)
var current []byte
for !l.dec.EOL() {
ok := l.dec.ScanKeyval()
if !ok {
// for strict parsing, do not continue on errs
if l.strict {
break
}
continue
}
current = l.dec.Key()
key, ok := l.keys.Get(current, func() (string, bool) {
sanitized := sanitizeLabelKey(string(current), true)
if len(sanitized) == 0 {
return "", false
}
_, alwaysExtract := keys[sanitized]
if !alwaysExtract && !lbs.ParserLabelHints().ShouldExtract(sanitized) {
return "", false
}
return sanitized, true
})
if !ok {
continue
}
val := l.dec.Value()
if bytes.ContainsRune(val, utf8.RuneError) {
val = nil
}
for id, orig := range keys {
if key == orig {
key = id
break
}
}
if _, ok := l.expressions[key]; ok {
if lbs.BaseHas(key) {
key = key + duplicateSuffix
if !lbs.ParserLabelHints().ShouldExtract(key) {
// Don't extract duplicates if we don't have to
break
}
}
lbs.Set(ParsedLabel, key, string(val))
if lbs.ParserLabelHints().AllRequiredExtracted() {
break
}
}
}
if l.strict && l.dec.Err() != nil {
addErrLabel(errLogfmt, l.dec.Err(), lbs)
return line, true
}
return line, true
}
func (l *LogfmtExpressionParser) RequiredLabelNames() []string { return []string{} }
type JSONExpressionParser struct {
ids []string
paths [][]string
keys internedStringSet
}
func NewJSONExpressionParser(expressions []LabelExtractionExpr) (*JSONExpressionParser, error) {
var ids []string
var paths [][]string
for _, exp := range expressions {
path, err := jsonexpr.Parse(exp.Expression, false)
if err != nil {
return nil, fmt.Errorf("cannot parse expression [%s]: %w", exp.Expression, err)
}
if !model.LabelName(exp.Identifier).IsValid() {
return nil, fmt.Errorf("invalid extracted label name '%s'", exp.Identifier)
}
ids = append(ids, exp.Identifier)
paths = append(paths, pathsToString(path))
}
return &JSONExpressionParser{
ids: ids,
paths: paths,
keys: internedStringSet{},
}, nil
}
func pathsToString(paths []interface{}) []string {
stingPaths := make([]string, 0, len(paths))
for _, p := range paths {
switch v := p.(type) {
case int:
stingPaths = append(stingPaths, fmt.Sprintf("[%d]", v))
case string:
stingPaths = append(stingPaths, v)
}
}
return stingPaths
}
func (j *JSONExpressionParser) Process(_ int64, line []byte, lbs *LabelsBuilder) ([]byte, bool) {
if len(line) == 0 || lbs.ParserLabelHints().NoLabels() {
return line, true
}
// Check that the line starts correctly
// the parser will pass an error if other
// parts of the line are malformed
if !isValidJSONStart(line) {
addErrLabel(errJSON, nil, lbs)
return line, true
}
var matches int
jsonparser.EachKey(line, func(idx int, data []byte, typ jsonparser.ValueType, err error) {
if err != nil {
addErrLabel(errJSON, err, lbs)
return
}
identifier := j.ids[idx]
key, _ := j.keys.Get(unsafeGetBytes(identifier), func() (string, bool) {
if lbs.BaseHas(identifier) {
identifier = identifier + duplicateSuffix
}
return identifier, true
})
switch typ {
case jsonparser.Null:
lbs.Set(ParsedLabel, key, "")
case jsonparser.Object:
lbs.Set(ParsedLabel, key, string(data))
default:
lbs.Set(ParsedLabel, key, unescapeJSONString(data))
}
matches++
}, j.paths...)
// Ensure there's a label for every value
if matches < len(j.ids) {
for _, id := range j.ids {
if _, ok := lbs.Get(id); !ok {
lbs.Set(ParsedLabel, id, "")
}
}
}
return line, true
}
func isValidJSONStart(data []byte) bool {
switch data[0] {
case '"', '{', '[':
return true
default:
return false
}
}
func (j *JSONExpressionParser) RequiredLabelNames() []string { return []string{} }
type UnpackParser struct {
lbsBuffer []string
keys internedStringSet
}
// NewUnpackParser creates a new unpack stage.
// The unpack stage will parse a json log line as map[string]string where each key will be translated into labels.
// A special key _entry will also be used to replace the original log line. This is to be used in conjunction with Promtail pack stage.
// see https://grafana.com/docs/loki/latest/clients/promtail/stages/pack/
func NewUnpackParser() *UnpackParser {
return &UnpackParser{
lbsBuffer: make([]string, 0, 16),
keys: internedStringSet{},
}
}
func (UnpackParser) RequiredLabelNames() []string { return []string{} }
func (u *UnpackParser) Process(_ int64, line []byte, lbs *LabelsBuilder) ([]byte, bool) {
if len(line) == 0 || lbs.ParserLabelHints().NoLabels() {
return line, true
}
// we only care about object and values.
if line[0] != '{' {
addErrLabel(errJSON, errUnexpectedJSONObject, lbs)
return line, true
}
u.lbsBuffer = u.lbsBuffer[:0]
entry, err := u.unpack(line, lbs)
if err != nil {
if errors.Is(err, errLabelDoesNotMatch) {
return entry, false
}
addErrLabel(errJSON, err, lbs)
return line, true
}
return entry, true
}
func addErrLabel(msg string, err error, lbs *LabelsBuilder) {
lbs.SetErr(msg)
if err != nil {
lbs.SetErrorDetails(err.Error())
}
if lbs.ParserLabelHints().PreserveError() {
lbs.Set(ParsedLabel, logqlmodel.PreserveErrorLabel, "true")
}
}
func (u *UnpackParser) unpack(entry []byte, lbs *LabelsBuilder) ([]byte, error) {
var isPacked bool
err := jsonparser.ObjectEach(entry, func(key, value []byte, typ jsonparser.ValueType, _ int) error {
switch typ {
case jsonparser.String:
if unsafeGetString(key) == logqlmodel.PackedEntryKey {
// Inlined bytes escape to save allocs
var stackbuf [unescapeStackBufSize]byte // stack-allocated array for allocation-free unescaping of small strings
bU, err := jsonparser.Unescape(value, stackbuf[:])
if err != nil {
return err
}
entry = bU
isPacked = true
return nil
}
key, ok := u.keys.Get(key, func() (string, bool) {
field := string(key)
if lbs.BaseHas(field) {
field = field + duplicateSuffix
}
if !lbs.ParserLabelHints().ShouldExtract(field) {
return "", false
}
return field, true
})
if !ok {
return nil
}
// append to the buffer of labels
u.lbsBuffer = append(u.lbsBuffer, sanitizeLabelKey(key, true), unescapeJSONString(value))
default:
return nil
}
return nil
})
if err != nil {
return nil, err
}
// flush the buffer if we found a packed entry.
if isPacked {
for i := 0; i < len(u.lbsBuffer); i = i + 2 {
lbs.Set(ParsedLabel, u.lbsBuffer[i], u.lbsBuffer[i+1])
if !lbs.ParserLabelHints().ShouldContinueParsingLine(u.lbsBuffer[i], lbs) {
return entry, errLabelDoesNotMatch
}
}
}
return entry, nil
}