Loki: implement decolorize filter (#7602)

Implement `decolorize` filter that get rids of ANSI color codes in the
log message.

**Which issue(s) this PR fixes**:
Fixes #7601
pull/6360/head
Max Vorobev 3 years ago committed by GitHub
parent 3bcc7ccd5a
commit c5911d5342
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 4
      CHANGELOG.md
  2. 35
      clients/pkg/logentry/stages/decolorize.go
  3. 52
      clients/pkg/logentry/stages/decolorize_test.go
  4. 6
      clients/pkg/logentry/stages/stage.go
  5. 1
      docs/sources/clients/promtail/stages/_index.md
  6. 39
      docs/sources/clients/promtail/stages/decolorize.md
  7. 10
      docs/sources/logql/log_queries.md
  8. 19
      docs/sources/logql/query_examples.md
  9. 16
      pkg/logql/log/fmt.go
  10. 18
      pkg/logql/log/fmt_test.go
  11. 23
      pkg/logql/syntax/ast.go
  12. 6
      pkg/logql/syntax/expr.y
  13. 1033
      pkg/logql/syntax/expr.y.go
  14. 7
      pkg/logql/syntax/lex.go
  15. 1
      pkg/logql/syntax/lex_test.go
  16. 9
      pkg/logql/syntax/parser_test.go

@ -6,12 +6,16 @@
##### Enhancements
* [7602](https://github.com/grafana/loki/pull/7602) **vmax**: Add decolorize filter to easily parse colored logs.
##### Fixes
##### Changes
#### Promtail
* [7602](https://github.com/grafana/loki/pull/7602) **vmax**: Add decolorize stage to Promtail to easily parse colored logs.
##### Enhancements
##### Fixes

@ -0,0 +1,35 @@
package stages
import (
"github.com/grafana/loki/pkg/logql/log"
)
type decolorizeStage struct{}
func newDecolorizeStage(_ interface{}) (Stage, error) {
return &decolorizeStage{}, nil
}
// Run implements Stage
func (m *decolorizeStage) Run(in chan Entry) chan Entry {
decolorizer, _ := log.NewDecolorizer()
out := make(chan Entry)
go func() {
defer close(out)
for e := range in {
decolorizedLine, _ := decolorizer.Process(
e.Timestamp.Unix(),
[]byte(e.Entry.Line),
nil,
)
e.Entry.Line = string(decolorizedLine)
out <- e
}
}()
return out
}
// Name implements Stage
func (m *decolorizeStage) Name() string {
return StageTypeDecolorize
}

@ -0,0 +1,52 @@
package stages
import (
"testing"
"time"
"github.com/prometheus/client_golang/prometheus"
"github.com/stretchr/testify/assert"
util_log "github.com/grafana/loki/pkg/util/log"
)
var testDecolorizePipeline = `
pipeline_stages:
- decolorize:
`
func TestPipeline_Decolorize(t *testing.T) {
t.Parallel()
tests := map[string]struct {
config string
entry string
expectedEntry string
}{
"successfully run pipeline on non-colored text": {
testDecolorizePipeline,
"sample text",
"sample text",
},
"successfully run pipeline on colored text": {
testDecolorizePipeline,
"\033[0;32mgreen\033[0m \033[0;31mred\033[0m",
"green red",
},
}
for testName, testData := range tests {
testData := testData
t.Run(testName, func(t *testing.T) {
t.Parallel()
pl, err := NewPipeline(util_log.Logger, loadConfig(testData.config), nil, prometheus.DefaultRegisterer)
if err != nil {
t.Fatal(err)
}
out := processEntries(pl, newEntry(nil, nil, testData.entry, time.Now()))[0]
assert.Equal(t, testData.expectedEntry, out.Line)
})
}
}

@ -36,6 +36,7 @@ const (
StageTypePack = "pack"
StageTypeLabelAllow = "labelallow"
StageTypeStaticLabels = "static_labels"
StageTypeDecolorize = "decolorize"
)
// Processor takes an existing set of labels, timestamp and log entry and returns either a possibly mutated
@ -209,6 +210,11 @@ func New(logger log.Logger, jobName *string, stageType string,
if err != nil {
return nil, err
}
case StageTypeDecolorize:
s, err = newDecolorizeStage(cfg)
if err != nil {
return nil, err
}
default:
return nil, errors.Errorf("Unknown stage type: %s", stageType)
}

@ -20,6 +20,7 @@ Transform stages:
- [template](template/): Use Go templates to modify extracted data.
- [pack](pack/): Packs a log line in a JSON object allowing extracted values and labels to be placed inside the log line.
- [decolorize](decolorize/): Strips ANSI color sequences from the log line.
Action stages:

@ -0,0 +1,39 @@
---
title: decolorize
---
# `decolorize` stage
The `decolorize` stage is a transform stage that lets you strip
ANSI color codes from the log line, thus making it easier to
parse logs further.
There are examples below to help explain.
## Decolorize stage schema
```yaml
decolorize:
# Currently this stage has no configurable options
```
## Examples
The following is an example showing the use of the `decolorize` stage.
Given the pipeline:
```yaml
- decolorize:
```
Would turn each line having a color code into a non-colored one, e.g.
```
[2022-11-04 22:17:57.811] \033[0;32http\033[0m: GET /_health (0 ms) 204
```
is turned into
```
[2022-11-04 22:17:57.811] http: GET /_health (0 ms) 204
```

@ -183,6 +183,16 @@ log stream selectors have been applied.
Line filter expressions have support matching IP addresses. See [Matching IP addresses](../ip/) for details.
### Removing color codes
Line filter expressions support stripping ANSI sequences (color codes) from
the line:
```
{job="example"} | decolorize
```
### Label filter expression
Label filter expression allows filtering log line using their original and extracted labels. It can contain multiple predicates.

@ -155,6 +155,25 @@ The result would be:
2020-10-23T20:32:18.068866235Z 624.008132ms traceID = 1980d41501b57b68 {cluster="ops-tools1", job="loki-ops/query-frontend"} |= "query_range"
```
It's possible to strip ANSI sequences from the log line, making it easier
to parse it further:
```
{job="example"} | decolorize
```
This way this log line:
```
[2022-11-04 22:17:57.811] \033[0;32http\033[0m: GET /_health (0 ms) 204
```
turns into:
```
[2022-11-04 22:17:57.811] http: GET /_health (0 ms) 204
```
## Unwrap examples
- Calculate the p99 of the nginx-ingress latency by path:

@ -353,6 +353,22 @@ func trunc(c int, s string) string {
return s
}
type Decolorizer struct{}
// RegExp to select ANSI characters courtesy of https://github.com/acarl005/stripansi
const ansiPattern = "[\u001B\u009B][[\\]()#;?]*(?:(?:(?:[a-zA-Z\\d]*(?:;[a-zA-Z\\d]*)*)?\u0007)|(?:(?:\\d{1,4}(?:;\\d{0,4})*)?[\\dA-PRZcf-ntqry=><~]))"
var ansiRegex = regexp.MustCompile(ansiPattern)
func NewDecolorizer() (*Decolorizer, error) {
return &Decolorizer{}, nil
}
func (Decolorizer) Process(_ int64, line []byte, _ *LabelsBuilder) ([]byte, bool) {
return ansiRegex.ReplaceAll(line, []byte{}), true
}
func (Decolorizer) RequiredLabelNames() []string { return []string{} }
// substring creates a substring of the given string.
//
// If start is < 0, this calls string[:end].

@ -573,3 +573,21 @@ func TestLabelFormatter_RequiredLabelNames(t *testing.T) {
})
}
}
func TestDecolorizer(t *testing.T) {
var decolorizer, _ = NewDecolorizer()
tests := []struct {
name string
src []byte
expected []byte
}{
{"uncolored text remains the same", []byte("sample text"), []byte("sample text")},
{"colored text loses color", []byte("\033[0;32mgreen\033[0m \033[0;31mred\033[0m"), []byte("green red")},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
var result, _ = decolorizer.Process(0, tt.src, nil)
require.Equal(t, tt.expected, result)
})
}
}

@ -409,6 +409,24 @@ func newLineFmtExpr(value string) *LineFmtExpr {
}
}
type DecolorizeExpr struct {
implicit
}
func newDecolorizeExpr() *DecolorizeExpr {
return &DecolorizeExpr{}
}
func (e *DecolorizeExpr) Shardable() bool { return true }
func (e *DecolorizeExpr) Stage() (log.Stage, error) {
return log.NewDecolorizer()
}
func (e *DecolorizeExpr) String() string {
return fmt.Sprintf("%s %s", OpPipe, OpDecolorize)
}
func (e *DecolorizeExpr) Walk(f WalkFn) { f(e) }
func (e *LineFmtExpr) Shardable() bool { return true }
func (e *LineFmtExpr) Walk(f WalkFn) { f(e) }
@ -663,8 +681,9 @@ const (
OpParserTypeUnpack = "unpack"
OpParserTypePattern = "pattern"
OpFmtLine = "line_format"
OpFmtLabel = "label_format"
OpFmtLine = "line_format"
OpFmtLabel = "label_format"
OpDecolorize = "decolorize"
OpPipe = "|"
OpUnwrap = "unwrap"

@ -57,6 +57,7 @@ import (
JSONExpression log.JSONExpression
JSONExpressionList []log.JSONExpression
UnwrapExpr *UnwrapExpr
DecolorizeExpr *DecolorizeExpr
OffsetExpr *OffsetExpr
}
@ -96,6 +97,7 @@ import (
%type <LineFilters> lineFilters
%type <LineFilter> lineFilter
%type <LineFormatExpr> lineFormatExpr
%type <DecolorizeExpr> decolorizeExpr
%type <LabelFormatExpr> labelFormatExpr
%type <LabelFormat> labelFormat
%type <LabelsFormat> labelsFormat
@ -115,6 +117,7 @@ import (
BYTES_OVER_TIME BYTES_RATE BOOL JSON REGEXP LOGFMT PIPE LINE_FMT LABEL_FMT UNWRAP AVG_OVER_TIME SUM_OVER_TIME MIN_OVER_TIME
MAX_OVER_TIME STDVAR_OVER_TIME STDDEV_OVER_TIME QUANTILE_OVER_TIME BYTES_CONV DURATION_CONV DURATION_SECONDS_CONV
FIRST_OVER_TIME LAST_OVER_TIME ABSENT_OVER_TIME VECTOR LABEL_REPLACE UNPACK OFFSET PATTERN IP ON IGNORING GROUP_LEFT GROUP_RIGHT
DECOLORIZE
// Operators are listed with increasing precedence.
%left <binOp> OR
@ -249,6 +252,7 @@ pipelineStage:
| PIPE jsonExpressionParser { $$ = $2 }
| PIPE labelFilter { $$ = &LabelFilterExpr{LabelFilterer: $2 }}
| PIPE lineFormatExpr { $$ = $2 }
| PIPE decolorizeExpr { $$ = $2 }
| PIPE labelFormatExpr { $$ = $2 }
;
@ -279,6 +283,8 @@ jsonExpressionParser:
lineFormatExpr: LINE_FMT STRING { $$ = newLineFmtExpr($2) };
decolorizeExpr: DECOLORIZE { $$ = newDecolorizeExpr() };
labelFormat:
IDENTIFIER EQ IDENTIFIER { $$ = log.NewRenameLabelFmt($1, $3)}
| IDENTIFIER EQ STRING { $$ = log.NewTemplateLabelFmt($1, $3)}

File diff suppressed because it is too large Load Diff

@ -66,11 +66,12 @@ var tokens = map[string]int{
OpParserTypePattern: PATTERN,
// fmt
OpFmtLabel: LABEL_FMT,
OpFmtLine: LINE_FMT,
OpFmtLabel: LABEL_FMT,
OpFmtLine: LINE_FMT,
// filter functions
OpFilterIP: IP,
OpFilterIP: IP,
OpDecolorize: DECOLORIZE,
}
// functionTokens are tokens that needs to be suffixes with parenthesis

@ -85,6 +85,7 @@ func TestLex(t *testing.T) {
# |~ "\\w+"
| json`, []int{OPEN_BRACE, IDENTIFIER, EQ, STRING, CLOSE_BRACE, PIPE, JSON}},
{`{foo="bar"} | json code="response.code", param="request.params[0]"`, []int{OPEN_BRACE, IDENTIFIER, EQ, STRING, CLOSE_BRACE, PIPE, JSON, IDENTIFIER, EQ, STRING, COMMA, IDENTIFIER, EQ, STRING}},
{`decolorize`, []int{DECOLORIZE}},
} {
t.Run(tc.input, func(t *testing.T) {
actual := []int{}

@ -43,6 +43,15 @@ func TestParse(t *testing.T) {
},
},
},
{
in: `{ foo = "bar" } | decolorize`,
exp: newPipelineExpr(
newMatcherExpr([]*labels.Matcher{mustNewMatcher(labels.MatchEqual, "foo", "bar")}),
MultiStageExpr{
newDecolorizeExpr(),
},
),
},
{
// test [12h] before filter expr
in: `count_over_time({foo="bar"}[12h] |= "error")`,

Loading…
Cancel
Save