fix: prevent fast ascii comparison if char is not letter (#15774)

pull/15784/head^2
Sven Grossmann 12 months ago committed by GitHub
parent 1a9f382e6f
commit 9182addea6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
  1. 38
      pkg/logql/log/filter.go
  2. 229
      pkg/logql/log/filter_test.go
  3. 14
      pkg/logql/syntax/ast_test.go

@ -421,6 +421,9 @@ func (l equalFilter) String() string {
}
func newEqualFilter(match []byte, caseInsensitive bool) MatcherFilterer {
if caseInsensitive {
match = bytes.ToLower(match)
}
return equalFilter{match, caseInsensitive}
}
@ -441,7 +444,7 @@ func contains(line, substr []byte, caseInsensitive bool) bool {
}
// containsLower verifies if substr is a substring of line, with case insensitive comparison.
// substr is expected to be in lowercase.
// substr MUST be in lowercase before calling this function.
func containsLower(line, substr []byte) bool {
if len(substr) == 0 {
return true
@ -458,7 +461,11 @@ func containsLower(line, substr []byte) bool {
for i <= maxIndex {
// Find potential first byte match
c := line[i]
if c != firstByte && c+'a'-'A' != firstByte && c != firstByte+'a'-'A' {
// Fast path for ASCII - if c is uppercase letter, convert to lowercase
if c >= 'A' && c <= 'Z' {
c += 'a' - 'A'
}
if c != firstByte {
i++
continue
}
@ -472,9 +479,13 @@ func containsLower(line, substr []byte) bool {
c := line[linePos]
s := substr[substrPos]
// Fast ASCII comparison
// Fast path for ASCII
if c < utf8.RuneSelf && s < utf8.RuneSelf {
if c != s && c+'a'-'A' != s && c != s+'a'-'A' {
// Convert line char to lowercase if needed
if c >= 'A' && c <= 'Z' {
c += 'a' - 'A'
}
if c != s {
matched = false
break
}
@ -485,13 +496,28 @@ func containsLower(line, substr []byte) bool {
// Slower Unicode path only when needed
lr, lineSize := utf8.DecodeRune(line[linePos:])
mr, substrSize := utf8.DecodeRune(substr[substrPos:])
if lr == utf8.RuneError && lineSize == 1 {
// Invalid UTF-8, treat as raw bytes
if c >= 'A' && c <= 'Z' {
c += 'a' - 'A'
}
if c != s {
matched = false
break
}
linePos++
substrPos++
continue
}
if lr == utf8.RuneError || mr == utf8.RuneError {
mr, substrSize := utf8.DecodeRune(substr[substrPos:])
if mr == utf8.RuneError && substrSize == 1 {
// Invalid UTF-8 in pattern (shouldn't happen as substr should be valid)
matched = false
break
}
// Compare line rune converted to lowercase with pattern (which is already lowercase)
if unicode.ToLower(lr) != mr {
matched = false
break

@ -220,87 +220,158 @@ func Test_rune(t *testing.T) {
require.True(t, newContainsFilter([]byte("foo"), true).Filter([]byte("foo")))
}
func BenchmarkContainsLower(b *testing.B) {
cases := []struct {
name string
line string
substr string
expected bool
}{
{
name: "short_line_no_match",
line: "this is a short log line",
substr: "missing",
expected: false,
},
{
name: "short_line_with_match",
line: "this is a short log line",
substr: "SHORT",
expected: true,
},
{
name: "long_line_no_match",
line: "2023-06-14T12:34:56.789Z INFO [service_name] This is a much longer log line with timestamps, levels and other information that typically appears in production logs. RequestID=123456 UserID=789 Action=GetUser Duration=123ms Status=200",
substr: "nonexistent",
expected: false,
},
{
name: "long_line_match_start",
line: "2023-06-14T12:34:56.789Z INFO [service_name] This is a much longer log line with timestamps, levels and other information that typically appears in production logs. RequestID=123456 UserID=789 Action=GetUser Duration=123ms Status=200",
substr: "2023",
expected: true,
},
{
name: "long_line_match_middle",
line: "2023-06-14T12:34:56.789Z INFO [service_name] This is a much longer log line with timestamps, levels and other information that typically appears in production logs. RequestID=123456 UserID=789 Action=GetUser Duration=123ms Status=200",
substr: "LEVELS",
expected: true,
},
{
name: "long_line_match_end",
line: "2023-06-14T12:34:56.789Z INFO [service_name] This is a much longer log line with timestamps, levels and other information that typically appears in production logs. RequestID=123456 UserID=789 Action=GetUser Duration=123ms Status=200",
substr: "status",
expected: true,
},
{
name: "short_unicode_line_no_match",
line: "🌟 Unicode line with emojis 🎉 and special chars ñ é ß",
substr: "missing",
expected: false,
},
{
name: "short_unicode_line_with_match",
line: "🌟 Unicode line with emojis 🎉 and special chars ñ é ß",
substr: "EMOJIS",
expected: true,
},
{
name: "long_unicode_line_no_match",
line: "2023-06-14T12:34:56.789Z 🚀 [микросервис] Длинное сообщение с Unicode символами 统一码 が大好き! エラー分析: システムは正常に動作しています。RequestID=123456 状態=良好 Résultat=Succès ß=γ 🎯 τέλος",
substr: "nonexistent",
expected: false,
},
{
name: "long_unicode_line_match_start",
line: "2023-06-14T12:34:56.789Z 🚀[МИКРОСервис] Длинное сообщение с Unicode символами 统一码 が大好き! エラー分析: システムは正常に動作しています。RequestID=123456 状態=良好 Résultat=Succès ß=γ 🎯 τέλος",
substr: "микросервис",
expected: true,
},
{
name: "long_unicode_line_match_middle",
line: "2023-06-14T12:34:56.789Z 🚀 [микросервис] Длинное сообщение с Unicode символами 统一码 が大好き! エラー分析: システムは正常に動作しています。RequestID=123456 状態=良好 Résultat=Succès ß=γ 🎯 τέλος",
substr: "UNICODE",
expected: true,
},
{
name: "long_unicode_line_match_end",
line: "2023-06-14T12:34:56.789Z 🚀 [микросервис] Длинное сообщение с Unicode символами 统一码 が大好き! エラー分析: システムは正常に動作しています。RequestID=123456 状態=良好 Résultat=Succès ß=γ 🎯 τέλος",
substr: "τέλος",
expected: true,
},
var cases = []struct {
name string
line string
substr string
expected bool
}{
{
name: "short_line_no_match",
line: "this is a short log line",
substr: "missing",
expected: false,
},
{
name: "short_line_no_match_special_chars",
line: "this contains a \\ character",
substr: "|",
expected: false,
},
{
name: "short_line_no_match_special_chars_match",
line: "this contains a | character",
substr: "|",
expected: true,
},
{
name: "short_line_with_match",
line: "this is a shorT log line",
substr: "short",
expected: true,
},
{
name: "long_line_no_match",
line: "2023-06-14T12:34:56.789Z INFO [service_name] This is a much longer log line with timestamps, levels and other information that typically appears in production logs. RequestID=123456 UserID=789 Action=GetUser Duration=123ms Status=200",
substr: "nonexistent",
expected: false,
},
{
name: "long_line_match_start",
line: "2023-06-14T12:34:56.789Z INFO [service_name] This is a much longer log line with timestamps, levels and other information that typically appears in production logs. RequestID=123456 UserID=789 Action=GetUser Duration=123ms Status=200",
substr: "2023",
expected: true,
},
{
name: "long_line_match_middle",
line: "2023-06-14T12:34:56.789Z INFO [service_name] This is a much longer log line with timestamps, leVelS and other information that typically appears in production logs. RequestID=123456 UserID=789 Action=GetUser Duration=123ms Status=200",
substr: "levels",
expected: true,
},
{
name: "long_line_match_end",
line: "2023-06-14T12:34:56.789Z INFO [service_name] This is a much longer log line with timestamps, levels and other information that typically appears in production logs. RequestID=123456 UserID=789 Action=GetUser Duration=123ms Status=200",
substr: "status",
expected: true,
},
{
name: "short_unicode_line_no_match",
line: "🌟 Unicode line with emojis 🎉 and special chars ñ é ß",
substr: "missing",
expected: false,
},
{
name: "short_unicode_line_with_match",
line: "🌟 Unicode line with eMojiS 🎉 and special chars ñ é ß",
substr: "emojis",
expected: true,
},
{
name: "long_unicode_line_no_match",
line: "2023-06-14T12:34:56.789Z 🚀 [микросервис] Длинное сообщение с Unicode символами 统一码 が大好き! エラー分析: システムは正常に動作しています。RequestID=123456 状態=良好 Résultat=Succès ß=γ 🎯 τέλος",
substr: "nonexistent",
expected: false,
},
{
name: "long_unicode_line_match_start",
line: "2023-06-14T12:34:56.789Z 🚀[МИКРОСервис] Длинное сообщение с Unicode символами 统一码 が大好き! エラー分析: システムは正常に動作しています。RequestID=123456 状態=良好 Résultat=Succès ß=γ 🎯 τέλος",
substr: "микросервис",
expected: true,
},
{
name: "long_unicode_line_match_middle",
line: "2023-06-14T12:34:56.789Z 🚀 [микросервис] Длинное сообщение с Unicode символами 统一码 が大好き! エラー分析: システムは正常に動作しています。RequestID=123456 状態=良好 Résultat=Succès ß=γ 🎯 τέλος",
substr: "unicode",
expected: true,
},
{
name: "long_unicode_line_match_end",
line: "2023-06-14T12:34:56.789Z 🚀 [микросервис] Длинное сообщение с Unicode символами 统一码 が大好き! エラー分析: システムは正常に動作しています。RequestID=123456 状態=良好 Résultat=Succès ß=γ 🎯 τέλος",
substr: "τέλος",
expected: true,
},
{
name: "utf8_case_insensitive_match_middle",
line: "ΣΑΣ ΓΕΙΑ ΚΟΣΜΕ", // "WORLD HELLO WORLD" in Greek uppercase
substr: "γεια", // "hello" in Greek lowercase
expected: true,
},
{
name: "utf8_case_insensitive_no_match",
line: "ΣΑΣ ΚΟΣΜΕ", // "WORLD WORLD" in Greek uppercase
substr: "γεια", // "hello" in Greek lowercase
expected: false,
},
{
name: "empty_substr",
line: "any line",
substr: "",
expected: true,
},
{
name: "empty_line",
line: "",
substr: "something",
expected: false,
},
{
name: "both_empty",
line: "",
substr: "",
expected: true,
},
{
name: "substr_longer_than_line",
line: "short",
substr: "longer than line",
expected: false,
},
{
name: "invalid_utf8_in_line",
line: string([]byte{0xFF, 0xFE, 0xFD}),
substr: "test",
expected: false,
},
{
name: "partial_utf8_match",
line: "Hello 世界", // "Hello World" with CJK characters
substr: "世", // Just "World"
expected: true,
},
}
func Test_containsLower(t *testing.T) {
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
line := []byte(c.line)
substr := []byte(c.substr)
m := containsLower(line, substr)
require.Equal(t, c.expected, m, "line: %s substr: %s", c.line, c.substr)
})
}
}
func BenchmarkContainsLower(b *testing.B) {
var m bool
for _, c := range cases {
b.Run(c.name, func(b *testing.B) {

@ -586,6 +586,20 @@ func Test_FilterMatcher(t *testing.T) {
},
[]linecheck{{"counter=1", false}, {"counter=0", false}, {"counter=-1", true}, {"counter=-2", true}},
},
{
`{app="foo"} |~ "\\|"`,
[]*labels.Matcher{
mustNewMatcher(labels.MatchEqual, "app", "foo"),
},
[]linecheck{{"\\", false}, {"|", true}},
},
{
`{app="foo"} |~ "(?i)\\|"`,
[]*labels.Matcher{
mustNewMatcher(labels.MatchEqual, "app", "foo"),
},
[]linecheck{{"\\", false}, {"|", true}},
},
} {
t.Run(tt.q, func(t *testing.T) {
t.Parallel()

Loading…
Cancel
Save