Like Prometheus, but for logs.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
loki/pkg/logql/log/parser_hints_test.go

286 lines
8.3 KiB

// uses log_test package to avoid circular dependency between log and logql package.
package log_test
import (
"testing"
Loki: Add a limit for the [range] value on range queries (#8343) Signed-off-by: Edward Welch <edward.welch@grafana.com> **What this PR does / why we need it**: Loki does not currently split queries by time to a value smaller than what's in the [range] of a range query. Example ``` sum(rate({job="foo"}[2d])) ``` Imagine now this query being executed over a longer window of a few days with a step of something like 30m. Every step evaluation would query the last [2d] of data. There are use cases where this is desired, specifically if you force the step to match the value in the range, however what is more common is someone accidentally uses `[$__range]` in here instead of `[$__interval]` within Grafana and then sets the query time selector to a large value like 7 days. This PR adds a limit which will fail queries that set the [range] value higher than the configured limit. It's disabled by default. In the future it may be possible for Loki to perform splits within the [range] and remove the need for this limit, but until then this can be an important safeguard in clusters with a lot of data. **Which issue(s) this PR fixes**: Fixes #8746 **Special notes for your reviewer**: **Checklist** - [ ] Reviewed the [`CONTRIBUTING.md`](https://github.com/grafana/loki/blob/main/CONTRIBUTING.md) guide (**required**) - [ ] Documentation added - [ ] Tests updated - [ ] `CHANGELOG.md` updated - [ ] Changes that require user attention or interaction to upgrade are documented in `docs/sources/upgrading/_index.md` --------- Signed-off-by: Edward Welch <edward.welch@grafana.com> Co-authored-by: Karsten Jeschkies <karsten.jeschkies@grafana.com> Co-authored-by: Vladyslav Diachenko <82767850+vlad-diachenko@users.noreply.github.com>
3 years ago
"github.com/grafana/loki/pkg/logql/log"
"github.com/prometheus/prometheus/model/labels"
"github.com/stretchr/testify/require"
"github.com/grafana/loki/pkg/logql/syntax"
)
var (
jsonLine = []byte(`{
"remote_user": "foo",
"upstream_addr": "10.0.0.1:80",
"protocol": "HTTP/2.0",
"cluster": "us-east-west",
"request": {
"time": "30.001",
"method": "POST",
"host": "foo.grafana.net",
"uri": "/rpc/v2/stage",
"size": "101"
},
"response": {
"status": 204,
"latency_seconds": "30.001"
}
}`)
packedLine = []byte(`{
"remote_user": "foo",
"upstream_addr": "10.0.0.1:80",
"protocol": "HTTP/2.0",
"cluster": "us-east-west",
"_entry":"foo"
}`)
logfmtLine = []byte(`ts=2021-02-02T14:35:05.983992774Z caller=spanlogger.go:79 org_id=3677 traceID=2e5c7234b8640997 Ingester.TotalReached=15 Ingester.TotalChunksMatched=0 Ingester.TotalBatches=0`)
)
func Test_ParserHints(t *testing.T) {
lbs := labels.FromStrings("app", "nginx", "cluster", "us-central-west")
t.Parallel()
for _, tt := range []struct {
expr string
line []byte
expectOk bool
expectVal float64
expectLbs string
}{
{
`rate({app="nginx"} | json | response_status = 204 [1m])`,
jsonLine,
true,
1.0,
`{app="nginx", cluster="us-central-west", cluster_extracted="us-east-west", protocol="HTTP/2.0", remote_user="foo", request_host="foo.grafana.net", request_method="POST", request_size="101", request_time="30.001", request_uri="/rpc/v2/stage", response_latency_seconds="30.001", response_status="204", upstream_addr="10.0.0.1:80"}`,
},
{
`sum without (request_host,app,cluster) (rate({app="nginx"} | json | __error__="" | response_status = 204 [1m]))`,
jsonLine,
true,
1.0,
`{cluster_extracted="us-east-west", protocol="HTTP/2.0", remote_user="foo", request_method="POST", request_size="101", request_time="30.001", request_uri="/rpc/v2/stage", response_latency_seconds="30.001", response_status="204", upstream_addr="10.0.0.1:80"}`,
},
{
`sum by (request_host,app) (rate({app="nginx"} | json | __error__="" | response_status = 204 [1m]))`,
jsonLine,
true,
1.0,
`{app="nginx", request_host="foo.grafana.net"}`,
},
{
`sum(rate({app="nginx"} | json | __error__="" | response_status = 204 [1m]))`,
jsonLine,
true,
1.0,
`{}`,
},
{
`sum(rate({app="nginx"} | json [1m]))`,
jsonLine,
true,
1.0,
`{}`,
},
{
`sum(rate({app="nginx"} | json | unwrap response_latency_seconds [1m]))`,
jsonLine,
true,
30.001,
`{}`,
},
{
`sum(rate({app="nginx"} | json | response_status = 204 | unwrap response_latency_seconds [1m]))`,
jsonLine,
true,
30.001,
`{}`,
},
{
`sum by (request_host,app)(rate({app="nginx"} | json | response_status = 204 and remote_user = "foo" | unwrap response_latency_seconds [1m]))`,
jsonLine,
true,
30.001,
`{app="nginx", request_host="foo.grafana.net"}`,
},
{
`rate({app="nginx"} | json | response_status = 204 | unwrap response_latency_seconds [1m])`,
jsonLine,
true,
30.001,
`{app="nginx", cluster="us-central-west", cluster_extracted="us-east-west", protocol="HTTP/2.0", remote_user="foo", request_host="foo.grafana.net", request_method="POST", request_size="101", request_time="30.001", request_uri="/rpc/v2/stage", response_status="204", upstream_addr="10.0.0.1:80"}`,
},
{
`sum without (request_host,app,cluster)(rate({app="nginx"} | json | response_status = 204 | unwrap response_latency_seconds [1m]))`,
jsonLine,
true,
30.001,
`{cluster_extracted="us-east-west", protocol="HTTP/2.0", remote_user="foo", request_method="POST", request_size="101", request_time="30.001", request_uri="/rpc/v2/stage", response_status="204", upstream_addr="10.0.0.1:80"}`,
},
{
`sum(rate({app="nginx"} | logfmt | org_id=3677 | unwrap Ingester_TotalReached[1m]))`,
logfmtLine,
true,
15.0,
`{}`,
},
{
`sum by (org_id,app) (rate({app="nginx"} | logfmt | org_id=3677 | unwrap Ingester_TotalReached[1m]))`,
logfmtLine,
true,
15.0,
`{app="nginx", org_id="3677"}`,
},
{
`rate({app="nginx"} | logfmt | org_id=3677 | unwrap Ingester_TotalReached[1m])`,
logfmtLine,
true,
15.0,
`{Ingester_TotalBatches="0", Ingester_TotalChunksMatched="0", app="nginx", caller="spanlogger.go:79", cluster="us-central-west", org_id="3677", traceID="2e5c7234b8640997", ts="2021-02-02T14:35:05.983992774Z"}`,
},
{
`sum without (org_id,app,cluster)(rate({app="nginx"} | logfmt | org_id=3677 | unwrap Ingester_TotalReached[1m]))`,
logfmtLine,
true,
15.0,
`{Ingester_TotalBatches="0", Ingester_TotalChunksMatched="0", caller="spanlogger.go:79", traceID="2e5c7234b8640997", ts="2021-02-02T14:35:05.983992774Z"}`,
},
{
`sum(rate({app="nginx"} | json | remote_user="foo" [1m]))`,
jsonLine,
true,
1.0,
`{}`,
},
{
`sum(rate({app="nginx"} | json | nonexistant_field="foo" [1m]))`,
jsonLine,
false,
0,
``,
},
{
`absent_over_time({app="nginx"} | json [1m])`,
jsonLine,
true,
1.0,
`{}`,
},
{
`absent_over_time({app="nginx"} | json | nonexistant_field="foo" [1m])`,
jsonLine,
false,
0,
``,
},
{
`absent_over_time({app="nginx"} | json | remote_user="foo" [1m])`,
jsonLine,
true,
1.0,
`{}`,
},
{
`sum by (cluster_extracted)(count_over_time({app="nginx"} | json | cluster_extracted="us-east-west" [1m]))`,
jsonLine,
true,
1.0,
`{cluster_extracted="us-east-west"}`,
},
{
`sum by (cluster_extracted)(count_over_time({app="nginx"} | unpack | cluster_extracted="us-east-west" [1m]))`,
packedLine,
true,
1.0,
`{cluster_extracted="us-east-west"}`,
},
{
`sum by (cluster_extracted)(count_over_time({app="nginx"} | unpack[1m]))`,
packedLine,
true,
1.0,
`{cluster_extracted="us-east-west"}`,
},
{
`sum(rate({app="nginx"} | unpack | nonexistant_field="foo" [1m]))`,
packedLine,
false,
0,
``,
},
} {
tt := tt
t.Run(tt.expr, func(t *testing.T) {
t.Parallel()
expr, err := syntax.ParseSampleExpr(tt.expr)
require.NoError(t, err)
ex, err := expr.Extractor()
require.NoError(t, err)
v, lbsRes, ok := ex.ForStream(lbs).Process(0, append([]byte{}, tt.line...))
var lbsResString string
if lbsRes != nil {
lbsResString = lbsRes.String()
}
require.Equal(t, tt.expectOk, ok)
require.Equal(t, tt.expectVal, v)
require.Equal(t, tt.expectLbs, lbsResString)
})
}
}
Short circuit parsers (#8724) In many cases we only need a few labels from a line. Because most of our parsers parse lines incrementally, we can stop parsing a line after we have all the labels we want from it. This pr uses `ParserHints` to keep track of the number of extracted labels. It also provides a way for parsers to know when they should stop parsing. Notes: - parsers had inconsistent ordering between the `ShouldExtract` call and adding the `_extracted` label to duplicates. This PR makes appending `_extracted` always happen before `ShouldExtract` to keep counts of what is extracted compared to expected labels consistent. Next Steps: - When the user specifies a query with a grouping containing the `_extracted` label but there is no duplicate between the passed line and labels, short circuiting will not work. I'll address this in a follow up PR. Benchmarks: To try and show a balanced view of what this buys us, this pr picks a label our of the middle of a line. In a best case this might be much better. In the worst case, we have to parse the whole line. ``` benchstat short_circuit_old.txt short_circuit_new.txt name old time/op new time/op delta KeyExtraction/json-8 456ns ± 3% 256ns ± 2% -43.84% (p=0.000 n=9+10) KeyExtraction/logfmt-8 347ns ± 4% 171ns ± 2% -50.86% (p=0.000 n=10+10) KeyExtraction/logfmt-expression-8 552ns ± 2% 368ns ± 2% -33.23% (p=0.000 n=9+10) name old alloc/op new alloc/op delta KeyExtraction/json-8 5.00B ± 0% 5.00B ± 0% ~ (all equal) KeyExtraction/logfmt-8 5.00B ± 0% 5.00B ± 0% ~ (all equal) KeyExtraction/logfmt-expression-8 16.0B ± 0% 16.0B ± 0% ~ (all equal) name old allocs/op new allocs/op delta KeyExtraction/json-8 1.00 ± 0% 1.00 ± 0% ~ (all equal) KeyExtraction/logfmt-8 1.00 ± 0% 1.00 ± 0% ~ (all equal) KeyExtraction/logfmt-expression-8 2.00 ± 0% 2.00 ± 0% ~ (all equal) ``` --------- Co-authored-by: J Stickler <julie.stickler@grafana.com>
3 years ago
func TestRecordingExtractedLabels(t *testing.T) {
Short circuit parsing when label matchers are present (#8890) This PR makes parsers aware of any downstream label-matcher stages at parse time. As labels are parsed, if one has a matcher, the matcher is checked at parse time. If the label does not match it's matcher, parsing is halted on that log line. **ex 1:** consider the log: `foo=1 bar=2 baz=3` And the query `{} | logfmt | bar=3` When `bar` is parsed it is immediately checked against it's matcher. The match fails so we the parser never spends time parsing the rest of the line. **ex 2:** consider the log: `foo=1 baz=3 bletch=4` And the query `{} | logfmt | bar=3` `bar` is never seen in the log so the whole line is parsed. **Benchmarks:** ``` │ parsers__old_2.txt │ parsers__new_3.txt │ │ sec/op │ sec/op vs base │ _Parser/json/inline_stages-8 3413.5n ± 5% 766.4n ± 4% -77.55% (p=0.000 n=10) _Parser/jsonParser-not_json_line/inline_stages-8 101.5n ± 6% 103.1n ± 8% ~ (p=0.645 n=10) _Parser/unpack/inline_stages-8 383.8n ± 4% 388.0n ± 9% ~ (p=0.954 n=10) _Parser/unpack-not_json_line/inline_stages-8 13.30n ± 2% 13.11n ± 1% ~ (p=0.247 n=10) _Parser/logfmt/inline_stages-8 2105.5n ± 16% 727.7n ± 4% -65.44% (p=0.000 n=10) _Parser/regex_greedy/inline_stages-8 4.220µ ± 4% 4.175µ ± 4% ~ (p=0.739 n=10) _Parser/regex_status_digits/inline_stages-8 319.8n ± 5% 326.4n ± 8% ~ (p=0.481 n=10) _Parser/pattern/inline_stages-8 185.2n ± 7% 154.2n ± 3% -16.74% (p=0.000 n=10) │ parsers__old_2.txt │ parsers__new_3.txt │ │ B/op │ B/op vs base │ _Parser/json/inline_stages-8 280.00 ± 0% 64.00 ± 0% -77.14% (p=0.000 n=10) _Parser/jsonParser-not_json_line/inline_stages-8 16.00 ± 0% 16.00 ± 0% ~ (p=1.000 n=10) _Parser/unpack/inline_stages-8 80.00 ± 0% 80.00 ± 0% ~ (p=1.000 n=10) _Parser/unpack-not_json_line/inline_stages-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) _Parser/logfmt/inline_stages-8 336.00 ± 0% 74.00 ± 0% -77.98% (p=0.000 n=10) _Parser/regex_greedy/inline_stages-8 193.0 ± 1% 192.0 ± 1% ~ (p=0.656 n=10) _Parser/regex_status_digits/inline_stages-8 51.00 ± 0% 51.00 ± 0% ~ (p=1.000 n=10) _Parser/pattern/inline_stages-8 35.000 ± 0% 3.000 ± 0% -91.43% (p=0.000 n=10) │ parsers__old_2.txt │ parsers__new_3.txt │ │ allocs/op │ allocs/op vs base │ _Parser/json/inline_stages-8 18.000 ± 0% 4.000 ± 0% -77.78% (p=0.000 n=10) _Parser/jsonParser-not_json_line/inline_stages-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) _Parser/unpack/inline_stages-8 4.000 ± 0% 4.000 ± 0% ~ (p=1.000 n=10) _Parser/unpack-not_json_line/inline_stages-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) _Parser/logfmt/inline_stages-8 16.000 ± 0% 6.000 ± 0% -62.50% (p=0.000 n=10) _Parser/regex_greedy/inline_stages-8 2.000 ± 0% 2.000 ± 0% ~ (p=1.000 n=10) _Parser/regex_status_digits/inline_stages-8 2.000 ± 0% 2.000 ± 0% ~ (p=1.000 n=10) _Parser/pattern/inline_stages-8 2.000 ± 0% 1.000 ± 0% -50.00% (p=0.000 n=10) ``` --------- Co-authored-by: Owen Diehl <ow.diehl@gmail.com>
3 years ago
p := log.NewParserHint([]string{"1", "2", "3"}, nil, false, true, "", nil)
Short circuit parsers (#8724) In many cases we only need a few labels from a line. Because most of our parsers parse lines incrementally, we can stop parsing a line after we have all the labels we want from it. This pr uses `ParserHints` to keep track of the number of extracted labels. It also provides a way for parsers to know when they should stop parsing. Notes: - parsers had inconsistent ordering between the `ShouldExtract` call and adding the `_extracted` label to duplicates. This PR makes appending `_extracted` always happen before `ShouldExtract` to keep counts of what is extracted compared to expected labels consistent. Next Steps: - When the user specifies a query with a grouping containing the `_extracted` label but there is no duplicate between the passed line and labels, short circuiting will not work. I'll address this in a follow up PR. Benchmarks: To try and show a balanced view of what this buys us, this pr picks a label our of the middle of a line. In a best case this might be much better. In the worst case, we have to parse the whole line. ``` benchstat short_circuit_old.txt short_circuit_new.txt name old time/op new time/op delta KeyExtraction/json-8 456ns ± 3% 256ns ± 2% -43.84% (p=0.000 n=9+10) KeyExtraction/logfmt-8 347ns ± 4% 171ns ± 2% -50.86% (p=0.000 n=10+10) KeyExtraction/logfmt-expression-8 552ns ± 2% 368ns ± 2% -33.23% (p=0.000 n=9+10) name old alloc/op new alloc/op delta KeyExtraction/json-8 5.00B ± 0% 5.00B ± 0% ~ (all equal) KeyExtraction/logfmt-8 5.00B ± 0% 5.00B ± 0% ~ (all equal) KeyExtraction/logfmt-expression-8 16.0B ± 0% 16.0B ± 0% ~ (all equal) name old allocs/op new allocs/op delta KeyExtraction/json-8 1.00 ± 0% 1.00 ± 0% ~ (all equal) KeyExtraction/logfmt-8 1.00 ± 0% 1.00 ± 0% ~ (all equal) KeyExtraction/logfmt-expression-8 2.00 ± 0% 2.00 ± 0% ~ (all equal) ``` --------- Co-authored-by: J Stickler <julie.stickler@grafana.com>
3 years ago
p.RecordExtracted("1")
p.RecordExtracted("2")
require.False(t, p.AllRequiredExtracted())
require.False(t, p.NoLabels())
p.RecordExtracted("3")
require.True(t, p.AllRequiredExtracted())
require.True(t, p.NoLabels())
p.Reset()
require.False(t, p.AllRequiredExtracted())
require.False(t, p.NoLabels())
}
Short circuit parsing when label matchers are present (#8890) This PR makes parsers aware of any downstream label-matcher stages at parse time. As labels are parsed, if one has a matcher, the matcher is checked at parse time. If the label does not match it's matcher, parsing is halted on that log line. **ex 1:** consider the log: `foo=1 bar=2 baz=3` And the query `{} | logfmt | bar=3` When `bar` is parsed it is immediately checked against it's matcher. The match fails so we the parser never spends time parsing the rest of the line. **ex 2:** consider the log: `foo=1 baz=3 bletch=4` And the query `{} | logfmt | bar=3` `bar` is never seen in the log so the whole line is parsed. **Benchmarks:** ``` │ parsers__old_2.txt │ parsers__new_3.txt │ │ sec/op │ sec/op vs base │ _Parser/json/inline_stages-8 3413.5n ± 5% 766.4n ± 4% -77.55% (p=0.000 n=10) _Parser/jsonParser-not_json_line/inline_stages-8 101.5n ± 6% 103.1n ± 8% ~ (p=0.645 n=10) _Parser/unpack/inline_stages-8 383.8n ± 4% 388.0n ± 9% ~ (p=0.954 n=10) _Parser/unpack-not_json_line/inline_stages-8 13.30n ± 2% 13.11n ± 1% ~ (p=0.247 n=10) _Parser/logfmt/inline_stages-8 2105.5n ± 16% 727.7n ± 4% -65.44% (p=0.000 n=10) _Parser/regex_greedy/inline_stages-8 4.220µ ± 4% 4.175µ ± 4% ~ (p=0.739 n=10) _Parser/regex_status_digits/inline_stages-8 319.8n ± 5% 326.4n ± 8% ~ (p=0.481 n=10) _Parser/pattern/inline_stages-8 185.2n ± 7% 154.2n ± 3% -16.74% (p=0.000 n=10) │ parsers__old_2.txt │ parsers__new_3.txt │ │ B/op │ B/op vs base │ _Parser/json/inline_stages-8 280.00 ± 0% 64.00 ± 0% -77.14% (p=0.000 n=10) _Parser/jsonParser-not_json_line/inline_stages-8 16.00 ± 0% 16.00 ± 0% ~ (p=1.000 n=10) _Parser/unpack/inline_stages-8 80.00 ± 0% 80.00 ± 0% ~ (p=1.000 n=10) _Parser/unpack-not_json_line/inline_stages-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) _Parser/logfmt/inline_stages-8 336.00 ± 0% 74.00 ± 0% -77.98% (p=0.000 n=10) _Parser/regex_greedy/inline_stages-8 193.0 ± 1% 192.0 ± 1% ~ (p=0.656 n=10) _Parser/regex_status_digits/inline_stages-8 51.00 ± 0% 51.00 ± 0% ~ (p=1.000 n=10) _Parser/pattern/inline_stages-8 35.000 ± 0% 3.000 ± 0% -91.43% (p=0.000 n=10) │ parsers__old_2.txt │ parsers__new_3.txt │ │ allocs/op │ allocs/op vs base │ _Parser/json/inline_stages-8 18.000 ± 0% 4.000 ± 0% -77.78% (p=0.000 n=10) _Parser/jsonParser-not_json_line/inline_stages-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) _Parser/unpack/inline_stages-8 4.000 ± 0% 4.000 ± 0% ~ (p=1.000 n=10) _Parser/unpack-not_json_line/inline_stages-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) _Parser/logfmt/inline_stages-8 16.000 ± 0% 6.000 ± 0% -62.50% (p=0.000 n=10) _Parser/regex_greedy/inline_stages-8 2.000 ± 0% 2.000 ± 0% ~ (p=1.000 n=10) _Parser/regex_status_digits/inline_stages-8 2.000 ± 0% 2.000 ± 0% ~ (p=1.000 n=10) _Parser/pattern/inline_stages-8 2.000 ± 0% 1.000 ± 0% -50.00% (p=0.000 n=10) ``` --------- Co-authored-by: Owen Diehl <ow.diehl@gmail.com>
3 years ago
func TestLabelFiltersInParseHints(t *testing.T) {
t.Run("it rejects the line when label matchers don't match the label", func(t *testing.T) {
s := []log.Stage{log.NewStringLabelFilter(labels.MustNewMatcher(labels.MatchEqual, "protocol", "nothing"))}
h := log.NewParserHint(nil, nil, true, true, "metric", s)
lb := log.NewBaseLabelsBuilder().ForLabels(labels.FromStrings("protocol", "HTTP/2.0"), 0)
Short circuit parsing when label matchers are present (#8890) This PR makes parsers aware of any downstream label-matcher stages at parse time. As labels are parsed, if one has a matcher, the matcher is checked at parse time. If the label does not match it's matcher, parsing is halted on that log line. **ex 1:** consider the log: `foo=1 bar=2 baz=3` And the query `{} | logfmt | bar=3` When `bar` is parsed it is immediately checked against it's matcher. The match fails so we the parser never spends time parsing the rest of the line. **ex 2:** consider the log: `foo=1 baz=3 bletch=4` And the query `{} | logfmt | bar=3` `bar` is never seen in the log so the whole line is parsed. **Benchmarks:** ``` │ parsers__old_2.txt │ parsers__new_3.txt │ │ sec/op │ sec/op vs base │ _Parser/json/inline_stages-8 3413.5n ± 5% 766.4n ± 4% -77.55% (p=0.000 n=10) _Parser/jsonParser-not_json_line/inline_stages-8 101.5n ± 6% 103.1n ± 8% ~ (p=0.645 n=10) _Parser/unpack/inline_stages-8 383.8n ± 4% 388.0n ± 9% ~ (p=0.954 n=10) _Parser/unpack-not_json_line/inline_stages-8 13.30n ± 2% 13.11n ± 1% ~ (p=0.247 n=10) _Parser/logfmt/inline_stages-8 2105.5n ± 16% 727.7n ± 4% -65.44% (p=0.000 n=10) _Parser/regex_greedy/inline_stages-8 4.220µ ± 4% 4.175µ ± 4% ~ (p=0.739 n=10) _Parser/regex_status_digits/inline_stages-8 319.8n ± 5% 326.4n ± 8% ~ (p=0.481 n=10) _Parser/pattern/inline_stages-8 185.2n ± 7% 154.2n ± 3% -16.74% (p=0.000 n=10) │ parsers__old_2.txt │ parsers__new_3.txt │ │ B/op │ B/op vs base │ _Parser/json/inline_stages-8 280.00 ± 0% 64.00 ± 0% -77.14% (p=0.000 n=10) _Parser/jsonParser-not_json_line/inline_stages-8 16.00 ± 0% 16.00 ± 0% ~ (p=1.000 n=10) _Parser/unpack/inline_stages-8 80.00 ± 0% 80.00 ± 0% ~ (p=1.000 n=10) _Parser/unpack-not_json_line/inline_stages-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) _Parser/logfmt/inline_stages-8 336.00 ± 0% 74.00 ± 0% -77.98% (p=0.000 n=10) _Parser/regex_greedy/inline_stages-8 193.0 ± 1% 192.0 ± 1% ~ (p=0.656 n=10) _Parser/regex_status_digits/inline_stages-8 51.00 ± 0% 51.00 ± 0% ~ (p=1.000 n=10) _Parser/pattern/inline_stages-8 35.000 ± 0% 3.000 ± 0% -91.43% (p=0.000 n=10) │ parsers__old_2.txt │ parsers__new_3.txt │ │ allocs/op │ allocs/op vs base │ _Parser/json/inline_stages-8 18.000 ± 0% 4.000 ± 0% -77.78% (p=0.000 n=10) _Parser/jsonParser-not_json_line/inline_stages-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) _Parser/unpack/inline_stages-8 4.000 ± 0% 4.000 ± 0% ~ (p=1.000 n=10) _Parser/unpack-not_json_line/inline_stages-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) _Parser/logfmt/inline_stages-8 16.000 ± 0% 6.000 ± 0% -62.50% (p=0.000 n=10) _Parser/regex_greedy/inline_stages-8 2.000 ± 0% 2.000 ± 0% ~ (p=1.000 n=10) _Parser/regex_status_digits/inline_stages-8 2.000 ± 0% 2.000 ± 0% ~ (p=1.000 n=10) _Parser/pattern/inline_stages-8 2.000 ± 0% 1.000 ± 0% -50.00% (p=0.000 n=10) ``` --------- Co-authored-by: Owen Diehl <ow.diehl@gmail.com>
3 years ago
require.False(t, h.ShouldContinueParsingLine("protocol", lb))
})
t.Run("it returns true when the label doesn't have a matcher", func(t *testing.T) {
s := []log.Stage{log.NewStringLabelFilter(labels.MustNewMatcher(labels.MatchEqual, "protocol", "nothing"))}
h := log.NewParserHint(nil, nil, true, true, "metric", s)
lb := log.NewBaseLabelsBuilder().ForLabels(labels.FromStrings("response", "200"), 0)
Short circuit parsing when label matchers are present (#8890) This PR makes parsers aware of any downstream label-matcher stages at parse time. As labels are parsed, if one has a matcher, the matcher is checked at parse time. If the label does not match it's matcher, parsing is halted on that log line. **ex 1:** consider the log: `foo=1 bar=2 baz=3` And the query `{} | logfmt | bar=3` When `bar` is parsed it is immediately checked against it's matcher. The match fails so we the parser never spends time parsing the rest of the line. **ex 2:** consider the log: `foo=1 baz=3 bletch=4` And the query `{} | logfmt | bar=3` `bar` is never seen in the log so the whole line is parsed. **Benchmarks:** ``` │ parsers__old_2.txt │ parsers__new_3.txt │ │ sec/op │ sec/op vs base │ _Parser/json/inline_stages-8 3413.5n ± 5% 766.4n ± 4% -77.55% (p=0.000 n=10) _Parser/jsonParser-not_json_line/inline_stages-8 101.5n ± 6% 103.1n ± 8% ~ (p=0.645 n=10) _Parser/unpack/inline_stages-8 383.8n ± 4% 388.0n ± 9% ~ (p=0.954 n=10) _Parser/unpack-not_json_line/inline_stages-8 13.30n ± 2% 13.11n ± 1% ~ (p=0.247 n=10) _Parser/logfmt/inline_stages-8 2105.5n ± 16% 727.7n ± 4% -65.44% (p=0.000 n=10) _Parser/regex_greedy/inline_stages-8 4.220µ ± 4% 4.175µ ± 4% ~ (p=0.739 n=10) _Parser/regex_status_digits/inline_stages-8 319.8n ± 5% 326.4n ± 8% ~ (p=0.481 n=10) _Parser/pattern/inline_stages-8 185.2n ± 7% 154.2n ± 3% -16.74% (p=0.000 n=10) │ parsers__old_2.txt │ parsers__new_3.txt │ │ B/op │ B/op vs base │ _Parser/json/inline_stages-8 280.00 ± 0% 64.00 ± 0% -77.14% (p=0.000 n=10) _Parser/jsonParser-not_json_line/inline_stages-8 16.00 ± 0% 16.00 ± 0% ~ (p=1.000 n=10) _Parser/unpack/inline_stages-8 80.00 ± 0% 80.00 ± 0% ~ (p=1.000 n=10) _Parser/unpack-not_json_line/inline_stages-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) _Parser/logfmt/inline_stages-8 336.00 ± 0% 74.00 ± 0% -77.98% (p=0.000 n=10) _Parser/regex_greedy/inline_stages-8 193.0 ± 1% 192.0 ± 1% ~ (p=0.656 n=10) _Parser/regex_status_digits/inline_stages-8 51.00 ± 0% 51.00 ± 0% ~ (p=1.000 n=10) _Parser/pattern/inline_stages-8 35.000 ± 0% 3.000 ± 0% -91.43% (p=0.000 n=10) │ parsers__old_2.txt │ parsers__new_3.txt │ │ allocs/op │ allocs/op vs base │ _Parser/json/inline_stages-8 18.000 ± 0% 4.000 ± 0% -77.78% (p=0.000 n=10) _Parser/jsonParser-not_json_line/inline_stages-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) _Parser/unpack/inline_stages-8 4.000 ± 0% 4.000 ± 0% ~ (p=1.000 n=10) _Parser/unpack-not_json_line/inline_stages-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) _Parser/logfmt/inline_stages-8 16.000 ± 0% 6.000 ± 0% -62.50% (p=0.000 n=10) _Parser/regex_greedy/inline_stages-8 2.000 ± 0% 2.000 ± 0% ~ (p=1.000 n=10) _Parser/regex_status_digits/inline_stages-8 2.000 ± 0% 2.000 ± 0% ~ (p=1.000 n=10) _Parser/pattern/inline_stages-8 2.000 ± 0% 1.000 ± 0% -50.00% (p=0.000 n=10) ``` --------- Co-authored-by: Owen Diehl <ow.diehl@gmail.com>
3 years ago
require.True(t, h.ShouldContinueParsingLine("response", lb))
})
t.Run("it ignores BinaryMatchers", func(t *testing.T) {
s := []log.Stage{
log.ReduceAndLabelFilter([]log.LabelFilterer{
log.NewStringLabelFilter(labels.MustNewMatcher(labels.MatchEqual, "protocol", "nothing")),
log.NewStringLabelFilter(labels.MustNewMatcher(labels.MatchEqual, "protocol", "something")),
}),
}
h := log.NewParserHint(nil, nil, true, true, "metric", s)
lb := log.NewBaseLabelsBuilder().ForLabels(labels.FromStrings("protocol", "HTTP/2.0"), 0)
Short circuit parsing when label matchers are present (#8890) This PR makes parsers aware of any downstream label-matcher stages at parse time. As labels are parsed, if one has a matcher, the matcher is checked at parse time. If the label does not match it's matcher, parsing is halted on that log line. **ex 1:** consider the log: `foo=1 bar=2 baz=3` And the query `{} | logfmt | bar=3` When `bar` is parsed it is immediately checked against it's matcher. The match fails so we the parser never spends time parsing the rest of the line. **ex 2:** consider the log: `foo=1 baz=3 bletch=4` And the query `{} | logfmt | bar=3` `bar` is never seen in the log so the whole line is parsed. **Benchmarks:** ``` │ parsers__old_2.txt │ parsers__new_3.txt │ │ sec/op │ sec/op vs base │ _Parser/json/inline_stages-8 3413.5n ± 5% 766.4n ± 4% -77.55% (p=0.000 n=10) _Parser/jsonParser-not_json_line/inline_stages-8 101.5n ± 6% 103.1n ± 8% ~ (p=0.645 n=10) _Parser/unpack/inline_stages-8 383.8n ± 4% 388.0n ± 9% ~ (p=0.954 n=10) _Parser/unpack-not_json_line/inline_stages-8 13.30n ± 2% 13.11n ± 1% ~ (p=0.247 n=10) _Parser/logfmt/inline_stages-8 2105.5n ± 16% 727.7n ± 4% -65.44% (p=0.000 n=10) _Parser/regex_greedy/inline_stages-8 4.220µ ± 4% 4.175µ ± 4% ~ (p=0.739 n=10) _Parser/regex_status_digits/inline_stages-8 319.8n ± 5% 326.4n ± 8% ~ (p=0.481 n=10) _Parser/pattern/inline_stages-8 185.2n ± 7% 154.2n ± 3% -16.74% (p=0.000 n=10) │ parsers__old_2.txt │ parsers__new_3.txt │ │ B/op │ B/op vs base │ _Parser/json/inline_stages-8 280.00 ± 0% 64.00 ± 0% -77.14% (p=0.000 n=10) _Parser/jsonParser-not_json_line/inline_stages-8 16.00 ± 0% 16.00 ± 0% ~ (p=1.000 n=10) _Parser/unpack/inline_stages-8 80.00 ± 0% 80.00 ± 0% ~ (p=1.000 n=10) _Parser/unpack-not_json_line/inline_stages-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) _Parser/logfmt/inline_stages-8 336.00 ± 0% 74.00 ± 0% -77.98% (p=0.000 n=10) _Parser/regex_greedy/inline_stages-8 193.0 ± 1% 192.0 ± 1% ~ (p=0.656 n=10) _Parser/regex_status_digits/inline_stages-8 51.00 ± 0% 51.00 ± 0% ~ (p=1.000 n=10) _Parser/pattern/inline_stages-8 35.000 ± 0% 3.000 ± 0% -91.43% (p=0.000 n=10) │ parsers__old_2.txt │ parsers__new_3.txt │ │ allocs/op │ allocs/op vs base │ _Parser/json/inline_stages-8 18.000 ± 0% 4.000 ± 0% -77.78% (p=0.000 n=10) _Parser/jsonParser-not_json_line/inline_stages-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) _Parser/unpack/inline_stages-8 4.000 ± 0% 4.000 ± 0% ~ (p=1.000 n=10) _Parser/unpack-not_json_line/inline_stages-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) _Parser/logfmt/inline_stages-8 16.000 ± 0% 6.000 ± 0% -62.50% (p=0.000 n=10) _Parser/regex_greedy/inline_stages-8 2.000 ± 0% 2.000 ± 0% ~ (p=1.000 n=10) _Parser/regex_status_digits/inline_stages-8 2.000 ± 0% 2.000 ± 0% ~ (p=1.000 n=10) _Parser/pattern/inline_stages-8 2.000 ± 0% 1.000 ± 0% -50.00% (p=0.000 n=10) ``` --------- Co-authored-by: Owen Diehl <ow.diehl@gmail.com>
3 years ago
require.True(t, h.ShouldContinueParsingLine("protocol", lb))
})
}