chore(storage/bloom): support simplifiable regexp matchers (#14622)

This adds support for basic regexps which can be simplified into a sequence of OR matchers, such as: * `key=~"value" becomes key="value" * `key=~"value1|value2" becomes key="value1" or key="value2". * `key=~".+" checks for the presence of key. This is currently the only way to check if a key exists. Only the cases above are "officially" supported. However, we technically support basic concatenations and character classes due to how regexp/syntax parses and simplifies expressions such as `value1|value2` into `value[12]`. To prevent unbounded cardinality, we limit regexp expansion to 25 matchers; otherwise a regexp like `value[0-9][0-9][0-9][0-9]` would expand into 10,000 matchers (too many!). Closes grafana/loki-private#1106. Co-authored-by: J Stickler <julie.stickler@grafana.com>
7 months ago · 8eca826795
parent 7b53f20f70
commit 8eca826795
6 changed files with 425 additions and 38 deletions
--- a/docs/sources/query/query_accceleration.md
+++ b/docs/sources/query/query_accceleration.md
@ -26,6 +26,11 @@ If [bloom filters][] are enabled, you can write LogQL queries using [structured
 Queries will be accelerated for any [label filter expression][] that satisfies _all_ of the following criteria:

 * The label filter expression using **string equality**, such as `| key="value"`.
+    * `or` and `and` operators can be used to match multiple values, such as `| detected_level="error" or detected_level="warn"`.
+    * _Basic_ regular expressions are automatically simplified into a supported expression:
+        * `| key=~"value"` is converted to `| key="value"`.
+        * `| key=~"value1|value2"` is converted to `| key="value1" or key="value2"`.
+        * `| key=~".+"` checks for existence of `key`. `.*` is not supported.
 * The label filter expression is querying for structured metadata and not a stream label.
 * The label filter expression is placed before any [parser expression][], [labels format expression][], [drop labels expression][], or [keep labels expression][].

--- a/pkg/bloomgateway/processor_test.go
+++ b/pkg/bloomgateway/processor_test.go
@ -141,7 +141,7 @@ func TestProcessor(t *testing.T) {
 		}

 		matchers := []v1.LabelMatcher{
-			v1.PlainLabelMatcher{
+			v1.KeyValueMatcher{
 				Key:   "trace_id",
 				Value: "nomatch",
 			},
@ -191,7 +191,7 @@ func TestProcessor(t *testing.T) {
 			day: config.NewDayTime(truncateDay(now)),
 		}
 		matchers := []v1.LabelMatcher{
-			v1.PlainLabelMatcher{
+			v1.KeyValueMatcher{
 				Key:   "trace_id",
 				Value: "nomatch",
 			},
@ -238,7 +238,7 @@ func TestProcessor(t *testing.T) {
 			day: config.NewDayTime(truncateDay(now)),
 		}
 		matchers := []v1.LabelMatcher{
-			v1.PlainLabelMatcher{
+			v1.KeyValueMatcher{
 				Key:   "trace_id",
 				Value: "nomatch",
 			},
--- a/pkg/storage/bloom/v1/ast_extractor.go
+++ b/pkg/storage/bloom/v1/ast_extractor.go
@ -1,12 +1,24 @@
 package v1

 import (
+	regexsyn "github.com/grafana/regexp/syntax"
+
 	"github.com/prometheus/prometheus/model/labels"

 	"github.com/grafana/loki/v3/pkg/logql/log"
 	"github.com/grafana/loki/v3/pkg/logql/syntax"
+	"github.com/grafana/loki/v3/pkg/util"
 )

+// Simplifiable regexp expressions can quickly expand into very high
+// cardinality; we limit the number of matchers to prevent this. However,
+// since bloom tests are relatively cheap to test, we can afford to be a little
+// generous while still preventing excessive cardinality.
+//
+// For example, the regex `[0-9]` expands to 10 matchers (0, 1, .. 9), while
+// `[0-9][0-9][0-9]` expands to 1000 matchers (000, 001, .., 999).
+const maxRegexMatchers = 200
+
 // LabelMatcher represents bloom tests for key-value pairs, mapped from
 // LabelFilterExprs from the AST.
 type LabelMatcher interface{ isLabelMatcher() }
@ -15,9 +27,13 @@ type LabelMatcher interface{ isLabelMatcher() }
 // mapped. Bloom tests for UnsupportedLabelMatchers must always pass.
 type UnsupportedLabelMatcher struct{}

-// PlainLabelMatcher represents a direct key-value matcher. Bloom tests
-// must only pass if the key-value pair exists in the bloom.
-type PlainLabelMatcher struct{ Key, Value string }
+// KeyValueMatcher represents a direct key-value matcher. Bloom tests must only
+// pass if the key-value pair exists in the bloom.
+type KeyValueMatcher struct{ Key, Value string }
+
+// KeyMatcher represents a key matcher. Bloom tests must only pass if the key
+// exists in the bloom.
+type KeyMatcher struct{ Key string }

 // OrLabelMatcher represents a logical OR test. Bloom tests must only pass if
 // one of the Left or Right label matcher bloom tests pass.
@ -54,21 +70,27 @@ func buildLabelMatcher(filter log.LabelFilterer) LabelMatcher {
 	switch filter := filter.(type) {

 	case *log.LineFilterLabelFilter:
-		if filter.Type != labels.MatchEqual {
-			return UnsupportedLabelMatcher{}
+		if filter.Type == labels.MatchEqual {
+			return KeyValueMatcher{
+				Key:   filter.Name,
+				Value: filter.Value,
+			}
+		} else if filter.Type == labels.MatchRegexp {
+			reg, err := regexsyn.Parse(filter.Value, regexsyn.Perl)
+			if err != nil {
+				return UnsupportedLabelMatcher{}
+			}
+			return buildSimplifiedRegexMatcher(filter.Name, reg.Simplify())
 		}

-		return PlainLabelMatcher{
-			Key:   filter.Name,
-			Value: filter.Value,
-		}
+		return UnsupportedLabelMatcher{}

 	case *log.StringLabelFilter:
 		if filter.Type != labels.MatchEqual {
 			return UnsupportedLabelMatcher{}
 		}

-		return PlainLabelMatcher{
+		return KeyValueMatcher{
 			Key:   filter.Name,
 			Value: filter.Value,
 		}
@ -89,11 +111,169 @@ func buildLabelMatcher(filter log.LabelFilterer) LabelMatcher {
 	}
 }

+// buildSimplifiedRegexMatcher builds a simplified label matcher from a regex.
+// reg may be mutated.
+func buildSimplifiedRegexMatcher(key string, reg *regexsyn.Regexp) LabelMatcher {
+	switch reg.Op {
+	case regexsyn.OpAlternate:
+		util.ClearCapture(reg)
+
+		left := buildSimplifiedRegexMatcher(key, reg.Sub[0])
+		if len(reg.Sub) == 1 {
+			// This shouldn't be possible (even `warn|` has two subexpressions, where
+			// the latter matches an empty string), but we have a length check here
+			// anyway just to avoid a potential panic.
+			return left
+		}
+		for _, sub := range reg.Sub[1:] {
+			right := buildSimplifiedRegexMatcher(key, sub)
+			left = OrLabelMatcher{Left: left, Right: right}
+		}
+		return left
+
+	case regexsyn.OpConcat:
+		// OpConcat checks for the concatenation of two or more subexpressions. For
+		// example, value1|value2 simplifies to value[12], with the two
+		// subexpressions value and [12].
+		//
+		// We expand subexpressions back out into full matchers where possible, so
+		// value[12] becomes value1 OR value2, and value[1-9] becomes value1 OR
+		// value2 .. OR value9.
+		util.ClearCapture(reg)
+
+		matchers, ok := expandSubexpr(reg)
+		if !ok || len(matchers) == 0 {
+			return UnsupportedLabelMatcher{}
+		}
+
+		var left LabelMatcher = KeyValueMatcher{Key: key, Value: matchers[0]}
+		for _, matcher := range matchers[1:] {
+			right := KeyValueMatcher{Key: key, Value: matcher}
+			left = OrLabelMatcher{Left: left, Right: right}
+		}
+		return left
+
+	case regexsyn.OpCapture:
+		util.ClearCapture(reg)
+		return buildSimplifiedRegexMatcher(key, reg)
+
+	case regexsyn.OpLiteral:
+		return KeyValueMatcher{
+			Key:   key,
+			Value: string(reg.Rune),
+		}
+
+	case regexsyn.OpPlus:
+		if reg.Sub[0].Op == regexsyn.OpAnyChar || reg.Sub[0].Op == regexsyn.OpAnyCharNotNL { // .+
+			return KeyMatcher{Key: key}
+		}
+
+		return UnsupportedLabelMatcher{}
+
+	default:
+		return UnsupportedLabelMatcher{}
+	}
+}
+
+func expandSubexpr(reg *regexsyn.Regexp) (prefixes []string, ok bool) {
+	switch reg.Op {
+	case regexsyn.OpAlternate:
+		util.ClearCapture(reg)
+
+		for _, sub := range reg.Sub {
+			subPrefixes, ok := expandSubexpr(sub)
+			if !ok {
+				return nil, false
+			} else if len(prefixes)+len(subPrefixes) > maxRegexMatchers {
+				return nil, false
+			}
+			prefixes = append(prefixes, subPrefixes...)
+		}
+		return prefixes, true
+
+	case regexsyn.OpCharClass:
+		// OpCharClass stores ranges of characters, so [12] is the range of bytes
+		// []rune('1', '2'), while [15] is represented as []rune('1', '1', '5',
+		// '5').
+		//
+		// To expand OpCharClass, we iterate over each pair of runes.
+		if len(reg.Rune)%2 != 0 {
+			// Invalid regexp; sequences should be even.
+			return nil, false
+		}
+
+		for i := 0; i < len(reg.Rune); i += 2 {
+			start, end := reg.Rune[i+0], reg.Rune[i+1]
+			for r := start; r <= end; r++ {
+				prefixes = append(prefixes, string(r))
+				if len(prefixes) > maxRegexMatchers {
+					return nil, false
+				}
+			}
+		}
+
+		return prefixes, true
+
+	case regexsyn.OpConcat:
+		if len(reg.Sub) == 0 {
+			return nil, false
+		}
+
+		// We get the prefixes for each subexpression and then iteratively combine
+		// them together.
+		//
+		// For the regexp [12][34]value (which concatenates [12], [34], and value):
+		//
+		// 1. We get the prefixes for [12], which are 1 and 2.
+		// 2. We get the prefixes for [34], which are 3 and 4.
+		// 3. We add the prefixes together to get 13, 14, 23, and 24.
+		// 4. We get the prerfixes for value, which is value.
+		// 5. Finally, we add the prefixes together to get 13value, 14value, 23value, and 24value.
+		curPrefixes, ok := expandSubexpr(reg.Sub[0])
+		if !ok {
+			return nil, false
+		}
+
+		for _, sub := range reg.Sub[1:] {
+			subPrefixes, ok := expandSubexpr(sub)
+			if !ok {
+				return nil, false
+			} else if len(curPrefixes)*len(subPrefixes) > maxRegexMatchers {
+				return nil, false
+			}
+
+			newPrefixes := make([]string, 0, len(curPrefixes)*len(subPrefixes))
+
+			for _, curPrefix := range curPrefixes {
+				for _, subPrefix := range subPrefixes {
+					newPrefixes = append(newPrefixes, curPrefix+subPrefix)
+				}
+			}
+
+			curPrefixes = newPrefixes
+		}
+
+		return curPrefixes, true
+
+	case regexsyn.OpCapture:
+		util.ClearCapture(reg)
+		return expandSubexpr(reg)
+
+	case regexsyn.OpLiteral:
+		prefixes = append(prefixes, string(reg.Rune))
+		return prefixes, true
+
+	default:
+		return nil, false
+	}
+}
+
 //
 // Implement marker types:
 //

 func (UnsupportedLabelMatcher) isLabelMatcher() {}
-func (PlainLabelMatcher) isLabelMatcher()       {}
+func (KeyValueMatcher) isLabelMatcher()         {}
+func (KeyMatcher) isLabelMatcher()              {}
 func (OrLabelMatcher) isLabelMatcher()          {}
 func (AndLabelMatcher) isLabelMatcher()         {}
--- a/pkg/storage/bloom/v1/ast_extractor_test.go
+++ b/pkg/storage/bloom/v1/ast_extractor_test.go
@ -20,7 +20,7 @@ func TestExtractLabelMatchers(t *testing.T) {
 			name:  "basic label matcher",
 			input: `{app="foo"} | key="value"`,
 			expect: []v1.LabelMatcher{
-				v1.PlainLabelMatcher{Key: "key", Value: "value"},
+				v1.KeyValueMatcher{Key: "key", Value: "value"},
 			},
 		},

@ -29,8 +29,8 @@ func TestExtractLabelMatchers(t *testing.T) {
 			input: `{app="foo"} | key1="value1" or key2="value2"`,
 			expect: []v1.LabelMatcher{
 				v1.OrLabelMatcher{
-					Left:  v1.PlainLabelMatcher{Key: "key1", Value: "value1"},
-					Right: v1.PlainLabelMatcher{Key: "key2", Value: "value2"},
+					Left:  v1.KeyValueMatcher{Key: "key1", Value: "value1"},
+					Right: v1.KeyValueMatcher{Key: "key2", Value: "value2"},
 				},
 			},
 		},
@ -40,8 +40,8 @@ func TestExtractLabelMatchers(t *testing.T) {
 			input: `{app="foo"} | key1="value1" and key2="value2"`,
 			expect: []v1.LabelMatcher{
 				v1.AndLabelMatcher{
-					Left:  v1.PlainLabelMatcher{Key: "key1", Value: "value1"},
-					Right: v1.PlainLabelMatcher{Key: "key2", Value: "value2"},
+					Left:  v1.KeyValueMatcher{Key: "key1", Value: "value1"},
+					Right: v1.KeyValueMatcher{Key: "key2", Value: "value2"},
 				},
 			},
 		},
@ -50,14 +50,136 @@ func TestExtractLabelMatchers(t *testing.T) {
 			name:  "multiple label matchers",
 			input: `{app="foo"} | key1="value1" | key2="value2"`,
 			expect: []v1.LabelMatcher{
-				v1.PlainLabelMatcher{Key: "key1", Value: "value1"},
-				v1.PlainLabelMatcher{Key: "key2", Value: "value2"},
+				v1.KeyValueMatcher{Key: "key1", Value: "value1"},
+				v1.KeyValueMatcher{Key: "key2", Value: "value2"},
 			},
 		},

 		{
-			name:  "unsupported label matchers",
+			name:  "basic regex matcher",
 			input: `{app="foo"} | key1=~"value1"`,
+			expect: []v1.LabelMatcher{
+				v1.KeyValueMatcher{Key: "key1", Value: "value1"},
+			},
+		},
+
+		{
+			name:  "regex matcher short", // simplifies to value[15].
+			input: `{app="foo"} | key1=~"value1|value5"`,
+			expect: []v1.LabelMatcher{
+				v1.OrLabelMatcher{
+					v1.KeyValueMatcher{Key: "key1", Value: "value1"},
+					v1.KeyValueMatcher{Key: "key1", Value: "value5"},
+				},
+			},
+		},
+
+		{
+			name:  "regex matcher range",
+			input: `{app="foo"} | key1=~"value[0-9]"`,
+			expect: []v1.LabelMatcher{
+				buildOrMatchers(
+					v1.KeyValueMatcher{Key: "key1", Value: "value0"},
+					v1.KeyValueMatcher{Key: "key1", Value: "value1"},
+					v1.KeyValueMatcher{Key: "key1", Value: "value2"},
+					v1.KeyValueMatcher{Key: "key1", Value: "value3"},
+					v1.KeyValueMatcher{Key: "key1", Value: "value4"},
+					v1.KeyValueMatcher{Key: "key1", Value: "value5"},
+					v1.KeyValueMatcher{Key: "key1", Value: "value6"},
+					v1.KeyValueMatcher{Key: "key1", Value: "value7"},
+					v1.KeyValueMatcher{Key: "key1", Value: "value8"},
+					v1.KeyValueMatcher{Key: "key1", Value: "value9"},
+				),
+			},
+		},
+
+		{
+			name:  "regex matcher ignore high cardinality",
+			input: `{app="foo"} | key1=~"value[0-9][0-9][0-9]"`, // This would expand to 1000 matchers. Too many!
+			expect: []v1.LabelMatcher{
+				v1.UnsupportedLabelMatcher{},
+			},
+		},
+
+		{
+			name:  "regex matcher",
+			input: `{app="foo"} | key1=~"value123|value456"`,
+			expect: []v1.LabelMatcher{
+				v1.OrLabelMatcher{
+					v1.KeyValueMatcher{Key: "key1", Value: "value123"},
+					v1.KeyValueMatcher{Key: "key1", Value: "value456"},
+				},
+			},
+		},
+
+		{
+			name:  "regex multiple expands",
+			input: `{app="foo"} | detected_level=~"debug|info|warn|error"`,
+			expect: []v1.LabelMatcher{
+				buildOrMatchers(
+					v1.KeyValueMatcher{Key: "detected_level", Value: "debug"},
+					v1.KeyValueMatcher{Key: "detected_level", Value: "info"},
+					v1.KeyValueMatcher{Key: "detected_level", Value: "warn"},
+					v1.KeyValueMatcher{Key: "detected_level", Value: "error"},
+				),
+			},
+		},
+
+		{
+			name:  "regex matcher with ignored capture groups",
+			input: `{app="foo"} | key1=~"value1|(value2)"`,
+			expect: []v1.LabelMatcher{
+				v1.OrLabelMatcher{
+					v1.KeyValueMatcher{Key: "key1", Value: "value1"},
+					v1.KeyValueMatcher{Key: "key1", Value: "value2"},
+				},
+			},
+		},
+
+		{
+			name:  "advanced regex matcher",
+			input: `{app="foo"} | key1=~"(warn|info[0-3])"`,
+			expect: []v1.LabelMatcher{
+				v1.OrLabelMatcher{
+					v1.KeyValueMatcher{Key: "key1", Value: "warn"},
+					buildOrMatchers(
+						v1.KeyValueMatcher{Key: "key1", Value: "info0"},
+						v1.KeyValueMatcher{Key: "key1", Value: "info1"},
+						v1.KeyValueMatcher{Key: "key1", Value: "info2"},
+						v1.KeyValueMatcher{Key: "key1", Value: "info3"},
+					),
+				},
+			},
+		},
+
+		{
+			name:  "regex .+ matcher",
+			input: `{app="foo"} | key1=~".+"`,
+			expect: []v1.LabelMatcher{
+				v1.KeyMatcher{Key: "key1"},
+			},
+		},
+
+		{
+			// This should also be unsupported for suffix or substring regexes.
+			name:  "regex .+ prefix matcher",
+			input: `{app="foo"} | key1=~".+foo"`,
+			expect: []v1.LabelMatcher{
+				v1.UnsupportedLabelMatcher{},
+			},
+		},
+
+		{
+			name:  "regex .* matcher",
+			input: `{app="foo"} | key1=~".*"`,
+			expect: []v1.LabelMatcher{
+				v1.UnsupportedLabelMatcher{},
+			},
+		},
+
+		{
+			name:  "unsupported label matchers",
+			input: `{app="foo"} | key1!="value1"`,
 			expect: []v1.LabelMatcher{
 				v1.UnsupportedLabelMatcher{},
 			},
@ -73,6 +195,23 @@ func TestExtractLabelMatchers(t *testing.T) {
 	}
 }

+func buildOrMatchers(matchers ...v1.LabelMatcher) v1.LabelMatcher {
+	if len(matchers) == 1 {
+		return matchers[0]
+	}
+
+	left := matchers[0]
+
+	for _, right := range matchers[1:] {
+		left = v1.OrLabelMatcher{
+			Left:  left,
+			Right: right,
+		}
+	}
+
+	return left
+}
+
 func TestExtractLabelMatchers_IgnoreAfterParse(t *testing.T) {
 	tt := []struct {
 		name string
@ -92,7 +231,7 @@ func TestExtractLabelMatchers_IgnoreAfterParse(t *testing.T) {
 		t.Run(tc.name, func(t *testing.T) {
 			fullInput := fmt.Sprintf(`{app="foo"} | key1="value1" | %s | key2="value2"`, tc.expr)
 			expect := []v1.LabelMatcher{
-				v1.PlainLabelMatcher{Key: "key1", Value: "value1"},
+				v1.KeyValueMatcher{Key: "key1", Value: "value1"},
 				// key2="value2" should be ignored following tc.expr
 			}

--- a/pkg/storage/bloom/v1/bloom_tester.go
+++ b/pkg/storage/bloom/v1/bloom_tester.go
@ -119,8 +119,11 @@ func matcherToBloomTest(matcher LabelMatcher) BloomTest {
 	case UnsupportedLabelMatcher:
 		return matchAllTest{}

-	case PlainLabelMatcher:
-		return newStringMatcherTest(matcher)
+	case KeyValueMatcher:
+		return newKeyValueMatcherTest(matcher)
+
+	case KeyMatcher:
+		return newKeyMatcherTest(matcher)

 	case OrLabelMatcher:
 		return newOrTest(
@ -140,15 +143,15 @@ func matcherToBloomTest(matcher LabelMatcher) BloomTest {
 	}
 }

-type stringMatcherTest struct {
-	matcher PlainLabelMatcher
+type keyValueMatcherTest struct {
+	matcher KeyValueMatcher
 }

-func newStringMatcherTest(matcher PlainLabelMatcher) stringMatcherTest {
-	return stringMatcherTest{matcher: matcher}
+func newKeyValueMatcherTest(matcher KeyValueMatcher) keyValueMatcherTest {
+	return keyValueMatcherTest{matcher: matcher}
 }

-func (sm stringMatcherTest) Matches(series labels.Labels, bloom filter.Checker) bool {
+func (kvm keyValueMatcherTest) Matches(series labels.Labels, bloom filter.Checker) bool {
 	// TODO(rfratto): reintroduce the use of a shared tokenizer here to avoid
 	// desyncing between how tokens are passed during building vs passed during
 	// querying.
@ -159,24 +162,24 @@ func (sm stringMatcherTest) Matches(series labels.Labels, bloom filter.Checker)
 	// 2. It should be possible to test for just the key

 	var (
-		combined    = fmt.Sprintf("%s=%s", sm.matcher.Key, sm.matcher.Value)
+		combined    = fmt.Sprintf("%s=%s", kvm.matcher.Key, kvm.matcher.Value)
 		rawCombined = unsafe.Slice(unsafe.StringData(combined), len(combined))
 	)

-	return sm.match(series, bloom, rawCombined)
+	return kvm.match(series, bloom, rawCombined)
 }

-func (sm stringMatcherTest) MatchesWithPrefixBuf(series labels.Labels, bloom filter.Checker, buf []byte, prefixLen int) bool {
+func (kvm keyValueMatcherTest) MatchesWithPrefixBuf(series labels.Labels, bloom filter.Checker, buf []byte, prefixLen int) bool {
 	var (
-		combined         = fmt.Sprintf("%s=%s", sm.matcher.Key, sm.matcher.Value)
+		combined         = fmt.Sprintf("%s=%s", kvm.matcher.Key, kvm.matcher.Value)
 		prefixedCombined = appendToBuf(buf, prefixLen, combined)
 	)

-	return sm.match(series, bloom, prefixedCombined)
+	return kvm.match(series, bloom, prefixedCombined)
 }

 // match returns true if the series matches the matcher or is in the bloom filter.
-func (sm stringMatcherTest) match(series labels.Labels, bloom filter.Checker, combined []byte) bool {
+func (kvm keyValueMatcherTest) match(series labels.Labels, bloom filter.Checker, combined []byte) bool {
 	// If we don't have the series labels, we cannot disambiguate which labels come from the series in which case
 	// we may filter out chunks for queries like `{env="prod"} | env="prod"` if env=prod is not structured metadata
 	if len(series) == 0 {
@ -186,8 +189,8 @@ func (sm stringMatcherTest) match(series labels.Labels, bloom filter.Checker, co

 	// It's in the series if the key is set and has the same value.
 	// By checking val != "" we handle `{env="prod"} | user=""`.
-	val := series.Get(sm.matcher.Key)
-	inSeries := val != "" && val == sm.matcher.Value
+	val := series.Get(kvm.matcher.Key)
+	inSeries := val != "" && val == kvm.matcher.Value

 	inBloom := bloom.Test(combined)
 	return inSeries || inBloom
@ -199,3 +202,53 @@ func appendToBuf(buf []byte, prefixLen int, str string) []byte {
 	rawString := unsafe.Slice(unsafe.StringData(str), len(str))
 	return append(buf[:prefixLen], rawString...)
 }
+
+type keyMatcherTest struct {
+	matcher KeyMatcher
+}
+
+func newKeyMatcherTest(matcher KeyMatcher) keyMatcherTest {
+	return keyMatcherTest{matcher: matcher}
+}
+
+func (km keyMatcherTest) Matches(series labels.Labels, bloom filter.Checker) bool {
+	// TODO(rfratto): reintroduce the use of a shared tokenizer here to avoid
+	// desyncing between how tokens are passed during building vs passed during
+	// querying.
+	//
+	// For a shared tokenizer to be ergonomic:
+	//
+	// 1. A prefix shouldn't be required until MatchesWithPrefixBuf is called
+	// 2. It should be possible to test for just the key
+
+	var (
+		key    = km.matcher.Key
+		rawKey = unsafe.Slice(unsafe.StringData(key), len(key))
+	)
+
+	return km.match(series, bloom, rawKey)
+}
+
+func (km keyMatcherTest) MatchesWithPrefixBuf(series labels.Labels, bloom filter.Checker, buf []byte, prefixLen int) bool {
+	var (
+		key         = km.matcher.Key
+		prefixedKey = appendToBuf(buf, prefixLen, key)
+	)
+
+	return km.match(series, bloom, prefixedKey)
+}
+
+// match returns true if the series matches the matcher or is in the bloom
+// filter.
+func (km keyMatcherTest) match(series labels.Labels, bloom filter.Checker, key []byte) bool {
+	// If we don't have the series labels, we cannot disambiguate which labels come from the series in which case
+	// we may filter out chunks for queries like `{env="prod"} | env="prod"` if env=prod is not structured metadata
+	if len(series) == 0 {
+		level.Warn(util_log.Logger).Log("msg", "series has no labels, cannot filter out chunks")
+		return true
+	}
+
+	inSeries := series.Get(km.matcher.Key) != ""
+	inBloom := bloom.Test(key)
+	return inSeries || inBloom
+}
--- a/pkg/storage/bloom/v1/bloom_tester_test.go
+++ b/pkg/storage/bloom/v1/bloom_tester_test.go
@ -116,6 +116,16 @@ func TestLabelMatchersToBloomTest(t *testing.T) {
 			query: `{app="fake"} | trace_id="exists_1" and trace_id="noexist"`,
 			match: false,
 		},
+		{
+			name:  "presence test pass",
+			query: `{app="fake"} | trace_id=~".+"`,
+			match: true,
+		},
+		{
+			name:  "presence test pass",
+			query: `{app="fake"} | noexist=~".+"`,
+			match: false,
+		},
 	}

 	for _, tc := range tt {