chore: remove unused pattern tokenization form Loki (#13783)

1 year ago · feee240f46
parent a1f7d0a654
commit feee240f46
4 changed files with 0 additions and 1511 deletions
--- a/pkg/pattern/tokenization/replacer.go
+++ b/pkg/pattern/tokenization/replacer.go
@ -1,900 +0,0 @@
-package tokenization
-
-import (
-	"bytes"
-	"slices"
-)
-
-var (
-	placeholderNumber    = []byte("<NUM>")
-	placeholderHex       = []byte("<HEX>")
-	placeholderUUID      = []byte("<UUID>")
-	placeholderTimestamp = []byte("<TIMESTAMP>")
-	placeholderDuration  = []byte("<DURATION>")
-	placeholderBytesize  = []byte("<BYTESIZE>")
-	placeholderIP        = []byte("<IP>")
-)
-
-// Integer numbers after these words won't be replaced by `<NUM>`.
-var numPreservingKeys = [][]byte{[]byte("status"), []byte("status_code"), []byte("httpStatus")}
-
-const (
-	maxYear   = 2100 // anything above that is unlikely to be a timestamp...
-	minHexLen = 12   // 48 bits min for free-standing hex strings (e.g "0123456789ab") or 42 bits for "0xABCDEF1234" strings
-)
-
-var boundaryChars = [256]bool{}
-
-func init() {
-	for i := 0; i < 128; i++ { // for now, we don't consider non-ASCII chars as boundary
-		if i < '0' || (i > '9' && i < 'A') || (i > 'Z' && i < 'a') || i > 'z' {
-			boundaryChars[i] = true
-		}
-	}
-
-	// We need these keys sorted in an ascending length to efficiently compare them.
-	slices.SortStableFunc(numPreservingKeys, func(a, b []byte) int {
-		return len(a) - len(b)
-	})
-}
-
-type replacer struct {
-	source, dest []byte
-
-	// This is the last position that was copied to the destination buffer.
-	tail int
-	// This is the current position we are working with
-	cur int
-	// This is essentially used for lookahed operations
-	head int
-
-	// Here's some ASCII art to visualize that (the long string of dashes
-	// visualizes the log line:
-	//
-	// 0 <already in dest, copied or replaced >   `tail`   <about to be copied or replaced>   `cur`   <lookahead>   `head`   <remaining unprocessed log>   `len(source)`
-	// |---------------------------------------------|------------------------------------------|---------------------|------------------------------------------|
-
-	// A somewhat hacky solution that allows us to preserve specific numeric
-	// values we care about, like status and status_code.
-	preserveNextNumbers bool
-}
-
-// commit advances the current position marker to the lookahead position, i.e.
-// we are committing to consume everything we've looked ahead so far.
-func (r *replacer) commit() {
-	r.cur = r.head
-}
-
-func (r *replacer) resetHead() {
-	r.head = r.cur
-}
-
-func (r *replacer) resetHeadExpr() bool {
-	r.head = r.cur
-	return true // useful when included in boolean expressions with advance()
-}
-
-func (r *replacer) backtrack() {
-	r.head--
-}
-
-func (r *replacer) consumeUpToCurrent() {
-	r.tail = r.cur
-}
-
-// advanceUintRet returns the actual value of the number we read, as well as its
-// length. The value might overflow for large numbers, so it's also important to
-// check the length!
-func (r *replacer) advanceUintRet() (val uint, length uint) {
-	var c byte
-	for ; r.head < len(r.source); r.head++ {
-		c = r.source[r.head]
-		if c < '0' || '9' < c {
-			break
-		}
-		length++
-		val = val*10 + uint(c-'0') // TODO: use bitwise trick?
-	}
-
-	return val, length
-}
-
-func (r *replacer) advanceUintOrRangeRet(lc, hc byte) (length uint) {
-	var c byte
-	for ; r.head < len(r.source); r.head++ {
-		c = r.source[r.head]
-		if !(('0' <= c && c <= '9') || (lc <= c && c <= hc)) {
-			break
-		}
-		length++
-	}
-
-	return length
-}
-
-func (r *replacer) advanceUint() bool {
-	_, l := r.advanceUintRet()
-	return l > 0
-}
-
-func (r *replacer) advanceUintUpTo(val uint, length uint) bool {
-	foundVal, foundLength := r.advanceUintRet()
-	return foundLength > 0 && foundVal <= val && foundLength <= length
-}
-
-func (r *replacer) advanceYear() bool {
-	return r.advanceUintUpTo(maxYear, 4)
-}
-
-func (r *replacer) advanceChar(c byte) bool {
-	if r.head < len(r.source) && r.source[r.head] == c {
-		r.head++
-		return true
-	}
-	return false
-}
-
-func (r *replacer) peekNextIsBoundary() bool {
-	if r.head >= len(r.source) {
-		return true // we are at the end of the line
-	}
-	return boundaryChars[r.source[r.head]]
-}
-
-func (r *replacer) peekFirstNonInt() (c byte) {
-	for i := r.head; i < len(r.source); i++ {
-		c = r.source[i]
-		if c < '0' || '9' < c {
-			return c
-		}
-	}
-	return 0 // we can return the 0 value here!
-}
-
-func (r *replacer) peekNext4() (result [4]byte) {
-	overhead := len(r.source) - r.head
-	switch {
-	case overhead > 3:
-		result[3] = r.source[r.head+3]
-		fallthrough
-	case overhead > 2:
-		result[2] = r.source[r.head+2]
-		fallthrough
-	case overhead > 1:
-		result[1] = r.source[r.head+1]
-		fallthrough
-	case overhead > 0:
-		result[0] = r.source[r.head+0]
-	}
-	return result
-}
-
-func (r *replacer) advanceTimeZoneLetters() bool {
-	UCLetters := 0
-	for {
-		nc, ok := r.advance()
-		if !ok {
-			break
-		}
-		if nc < 'A' || nc > 'Z' {
-			r.backtrack()
-			break
-		}
-		UCLetters++
-	}
-
-	return UCLetters >= 2 && UCLetters <= 5
-}
-
-func (r *replacer) advanceNumericTimeZone() bool {
-	// See https://en.wikipedia.org/wiki/List_of_tz_database_time_zones
-	return r.advanceOneOf('+', '-') && r.advanceUintUpTo(14000, 5)
-}
-
-// helper for handleWeirdTimestamp()
-func (r *replacer) advanceStringOrNumericTimeZone(isOptional bool) bool {
-	curHead := r.head
-	if r.advanceChar(' ') && r.advanceNumericTimeZone() {
-		return true
-	}
-	r.head = curHead
-	if r.advanceChar(' ') && r.advanceTimeZoneLetters() {
-		return true
-	}
-	r.head = curHead
-	return isOptional
-}
-
-func (r *replacer) advanceOneOf(chars ...byte) bool {
-	if r.head >= len(r.source) {
-		return false
-	}
-	c := r.source[r.head]
-
-	for _, ec := range chars {
-		if c == ec {
-			r.head++
-			return true
-		}
-	}
-	return false
-}
-
-func (r *replacer) advanceTime(secondsOptional bool) bool {
-	return r.advanceUintUpTo(24, 2) && r.advanceChar(':') && r.advanceUintUpTo(60, 2) && (secondsOptional || (r.advanceChar(':') && r.advanceUintUpTo(60, 2)))
-}
-
-// Mon|Tue|Wed|Thu|Fri|Sat|Sun
-func (r *replacer) advanceDayOfTheWeek() bool {
-	c1, ok1 := r.advance()
-	c2, ok2 := r.advance()
-	c3, ok3 := r.advance()
-	if !ok1 || !ok2 || !ok3 {
-		return false
-	}
-	return (c1 == 'S' && ((c2 == 'a' && c3 == 't') || (c2 == 'u' && c3 == 'n'))) || // Sat, Sun
-		(c1 == 'M' && c2 == 'o' && c3 == 'n') ||
-		(c1 == 'T' && c2 == 'u' && c3 == 'e') ||
-		(c1 == 'W' && c2 == 'e' && c3 == 'd') ||
-		(c1 == 'T' && c2 == 'h' && c3 == 'u') ||
-		(c1 == 'F' && c2 == 'r' && c3 == 'i')
-}
-
-// Jan|Feb|Mar|Apr|May|Jul|Jun|Aug|Sep|Oct|Nov|Dec
-func (r *replacer) advanceMonthName() bool {
-	c1, ok1 := r.advance()
-	c2, ok2 := r.advance()
-	c3, ok3 := r.advance()
-	if !ok1 || !ok2 || !ok3 {
-		return false
-	}
-
-	return (c1 == 'J' && ((c2 == 'u' && (c3 == 'n' || c3 == 'l')) || // Jun, Jul
-		(c2 == 'a' && c3 == 'n'))) || // Jan
-		(c1 == 'M' && c2 == 'a' && (c3 == 'r' || c3 == 'y')) || // Mar, May
-		(c2 == 'e' && ((c1 == 'F' && c3 == 'b') || (c1 == 'S' && c3 == 'p') || (c1 == 'D' && c3 == 'c'))) || // Feb, Sep, Dec
-		(c1 == 'A' && ((c2 == 'p' && c3 == 'r') || // Apr
-			(c2 == 'u' && c3 == 'g'))) || // Aug
-		(c1 == 'O' && c2 == 'c' && c3 == 't') || // Oct
-		(c1 == 'N' && c2 == 'o' && c3 == 'v') // Nov
-}
-
-// Check if we in the middle of an UUID, exactly after the first 8 characters
-// and the dash after them, e.g after "abcd0123-":
-func (r *replacer) advanceUUIDAfterFirstDash(lc, hc byte) (endsWithBoundary bool) {
-	return (r.advanceUintOrRangeRet(lc, hc) == 4) && r.advanceChar('-') &&
-		(r.advanceUintOrRangeRet(lc, hc) == 4) && r.advanceChar('-') &&
-		(r.advanceUintOrRangeRet(lc, hc) == 4) && r.advanceChar('-') &&
-		(r.advanceUintOrRangeRet(lc, hc) == 12) && r.peekNextIsBoundary()
-}
-
-// Only moves the head forward if it successfully matches a duration
-func (r *replacer) advanceDuration() (matched bool) {
-	curHead := r.head
-
-	var secsLen int
-	n := r.peekNext4()
-	if n[0] == 'h' {
-		r.head++
-		if boundaryChars[n[1]] {
-			return true // e.g. "1h", "123h"
-		}
-		if !r.advanceUintUpTo(60, 2) {
-			goto restore
-		}
-		n = r.peekNext4()
-	}
-	if n[0] == 'm' && (boundaryChars[n[1]] || n[1] != 's') { // we don't want to match 'ms' here
-		r.head++
-		if boundaryChars[n[1]] {
-			return true // e.g. "2h21m", "121m"
-		}
-		if !(r.advanceUintUpTo(60, 2) && ((r.advanceChar('.') && r.advanceUint()) || true)) {
-			goto restore
-		}
-		n = r.peekNext4()
-	}
-
-	if n[0] == 's' && boundaryChars[n[1]] {
-		secsLen = 1
-	} else if n[1] == 's' && (n[0] == 'm' || n[0] == 'n' || n[0] == 'u') && boundaryChars[n[2]] {
-		secsLen = 2
-	} else if n[2] == 's' && ((n[0] == 0xC2 && n[1] == 0xB5) || (n[0] == 0xCE && n[1] == 0xBC)) && boundaryChars[n[3]] {
-		// This checks for the unicode "µs" (U+00B5 = micro symbol) and "μs" (U+03BC = Greek letter mu)
-		secsLen = 3
-	} else {
-		goto restore
-	}
-
-	r.head += secsLen
-	return true
-
-restore: // should be faster than a defer
-	r.head = curHead
-	return false
-}
-
-// 'b' and 'B' are not present here because of the way we check for byte size
-// units below. If they were present, then suffixes like 'Bb', 'bb', etc. would
-// be considered valid byte sizes. Also, only integer numbers are accepted as
-// valid bytesizes in bytes, so we handle bytes with special cases instead.
-var byteSizes = [256]bool{'k': true, 'K': true, 'm': true, 'M': true, 'g': true, 'G': true, 't': true, 'T': true, 'p': true, 'P': true}
-
-// Only moves the head forward if it successfully matches a duration
-func (r *replacer) advanceBytesize(c1 byte) (matched bool) {
-	if !byteSizes[c1] {
-		return false
-	}
-	n := r.peekNext4()
-
-	var unitLen int // not counting the first character c1, which is already advanced to
-	if (n[0] == 'b' || n[0] == 'B') && boundaryChars[n[1]] {
-		unitLen = 1
-	} else if n[0] == 'i' && (n[1] == 'b' || n[1] == 'B') && boundaryChars[n[2]] {
-		unitLen = 2
-	} else if ((n[0] == 'b' && n[1] == 'p' && n[2] == 's') || (n[0] == 'b' && n[1] == 'i' && n[2] == 't')) && boundaryChars[n[3]] {
-		unitLen = 3
-	}
-
-	if unitLen > 0 {
-		r.head += unitLen
-		return true
-	}
-	return false
-}
-
-func (r *replacer) advanceSpacedBytesize(canBeBytes bool) (matched bool) {
-	// Get the next character after the space
-	c1, hasNext := r.advance()
-	if !hasNext {
-		return false
-	}
-	if canBeBytes && (c1 == 'b' || c1 == 'B') && r.peekNextIsBoundary() {
-		return true
-	}
-	if r.advanceBytesize(c1) {
-		return true
-	}
-	r.backtrack()
-	return false
-}
-
-func (r *replacer) advance() (c byte, advanced bool) {
-	if r.head >= len(r.source) {
-		return 0, false
-	}
-
-	c = r.source[r.head]
-	r.head++
-	return c, true
-}
-
-func (r *replacer) emitNumber() {
-	r.commit()
-	r.dest = append(r.dest, placeholderNumber...)
-	r.consumeUpToCurrent()
-}
-
-func (r *replacer) emitNumberOrCopyText(hasMinusPrefix bool) {
-	r.commit()
-	if !r.preserveNextNumbers {
-		r.dest = append(r.dest, placeholderNumber...)
-		r.consumeUpToCurrent()
-	} else {
-		r.maybeEmitDash(hasMinusPrefix)
-		r.copyUpToCurrent()
-	}
-}
-
-func (r *replacer) emit(hasMinusPrefix bool, placeholder []byte) {
-	r.commit()
-	r.maybeEmitDash(hasMinusPrefix)
-	r.dest = append(r.dest, placeholder...)
-	r.consumeUpToCurrent()
-}
-
-func (r *replacer) maybeEmitDash(hasMinusPrefix bool) {
-	// This minus was actually a dash, so we just copy it to the result
-	if hasMinusPrefix {
-		r.dest = append(r.dest, '-')
-	}
-}
-
-func (r *replacer) copyUpToCurrent() {
-	r.dest = append(r.dest, r.source[r.tail:r.cur]...)
-	r.consumeUpToCurrent()
-}
-
-func (r *replacer) handleHexOrUnit(hasMinusPrefix bool, n1, l1 uint, c1 byte) (endsWithBoundary bool) {
-	// Special case that is likely a hex string of the format "0x", but we don't
-	// know whether the rest is upper case or lower case yet.
-	zeroHex := false
-	if n1 == 0 && l1 == 1 && c1 == 'x' {
-		zeroHex = true // these cannot be the start of an UUID
-		c1 = r.peekFirstNonInt()
-	}
-
-	// Special case, this might be a byte size
-	if (c1 == 'b' || c1 == 'B') && r.peekNextIsBoundary() {
-		// We do not subsume a minus sign - byte sizes are unlikely to be
-		// negative, it's more likely this is a dash as a part of a range
-		r.emit(hasMinusPrefix, placeholderBytesize)
-		return true
-	}
-
-	// Maybe we are at the start of a hex string, either something like
-	// "[0-9]+[a-f]", "[0-9]+[A-F]", or "0x". We support both lower and upper
-	// case letters, but to avoid false positives, we want hex replacements to
-	// happen only if the strings are fully lower case or fully upper case.
-	if 'a' <= c1 && c1 <= 'f' {
-		return r.handleHex(hasMinusPrefix, l1+1, 'a', 'f', !zeroHex)
-	} else if 'A' <= c1 && c1 <= 'F' {
-		return r.handleHex(hasMinusPrefix, l1+1, 'A', 'F', !zeroHex)
-	} else if zeroHex {
-		// Well, it probably wasn't a zero-hex after all, or it contained only
-		// digits, so try to handle that or emit what we absorbed
-		_, l2 := r.advanceUintRet()
-		if l2+2 >= minHexLen { // We consider "0x" to be part of the hex string when comparing with minHexLen
-			r.emit(hasMinusPrefix, placeholderHex)
-		} else {
-			r.maybeEmitDash(hasMinusPrefix)
-			r.commit()
-			r.copyUpToCurrent()
-		}
-		return r.peekNextIsBoundary()
-	}
-
-	return r.handlePotentialUnitWithDecimal(hasMinusPrefix, c1)
-}
-
-func (r *replacer) handleHex(hasMinusPrefix bool, l1 uint, lc, hc byte, canBeUUID bool) (endsWithBoundary bool) {
-	totalLen := l1 + r.advanceUintOrRangeRet(lc, hc)
-	r.commit()
-
-	if totalLen == 8 && canBeUUID {
-		// We might be at the first part of a UUID, right before the first dash
-		if r.advanceChar('-') && r.advanceUUIDAfterFirstDash(lc, hc) {
-			r.emit(hasMinusPrefix, placeholderUUID)
-			return true
-		}
-		r.resetHead()
-	}
-
-	if totalLen >= minHexLen && r.peekNextIsBoundary() {
-		r.emit(hasMinusPrefix, placeholderHex)
-		return true
-	}
-
-	r.copyUpToCurrent()
-	return r.peekNextIsBoundary()
-}
-
-func (r *replacer) handlePotentialUnitWithDecimal(hasMinusPrefix bool, c1 byte) (endsWithBoundary bool) {
-	if r.advanceBytesize(c1) {
-		// We do not subsume a minus sign - byte sizes are unlikely to be
-		// negative, it's more likely this is a dash as a part of a range
-		r.emit(hasMinusPrefix, placeholderBytesize)
-		return true
-	}
-
-	r.backtrack()
-	if r.advanceDuration() {
-		// We subsume hasMinusPrefix, since durations can be negative
-		r.emit(false, placeholderDuration)
-		return true
-	}
-
-	// We couldn't match anything, so just copy what existed.
-	r.maybeEmitDash(hasMinusPrefix)
-	r.copyUpToCurrent()
-	return false
-}
-
-func (r *replacer) handleNumberWithDecimal(hasMinusPrefix bool, n1 uint, l1 uint) (endsWithBoundary bool) {
-	n2, l2 := r.advanceUintRet()
-	if l2 == 0 {
-		// The dot wasn't followed by another number, so emit everything before it.
-		r.backtrack()
-		r.emitNumberOrCopyText(hasMinusPrefix)
-		return false
-	}
-
-	// See if the number after the decimal is also followed by a boundary
-	b2, hasNext := r.advance()
-
-	// We are at the end of the string, which is a boundary, replace evertything
-	// up to now with a number
-	if !hasNext {
-		r.emitNumber() // this also subsumes any minus sign we had
-		return true
-	}
-
-	// The decimal number isn't followed by a boundary char (which include
-	// things like '.', ':', '/', etc.), so the part after the decimal is either
-	// not a real number, or it's some sort of a unit that can support decimals
-	// like size (e.g. 12KB, 3MiB) or durations (e.g. 3.5124s), etc.
-	if !boundaryChars[b2] {
-		return r.handlePotentialUnitWithDecimal(hasMinusPrefix, b2)
-	}
-
-	// This can be a byte size with a space, e.g. "3.14 GiB"
-	if b2 == ' ' && r.advanceSpacedBytesize(false) {
-		// We do not subsume a minus sign - byte sizes are unlikely to be
-		// negative, it's more likely this is a dash as a part of a range
-		r.emit(hasMinusPrefix, placeholderBytesize)
-		return true
-	}
-
-	// We have a decimal number followed by a non-dot boundary, so this is not
-	// an IP or a version number or anything like that.
-	if b2 != '.' {
-		r.backtrack()
-		r.emitNumber()
-		return true
-	}
-
-	n3, l3 := r.advanceUintRet()
-	if l3 == 0 || !r.peekNextIsBoundary() {
-		// The second dot wasn't followed by another number and a boundary, so
-		// emit just the first number.
-		r.resetHead()
-		r.emitNumber()
-		return true
-	}
-
-	// We have "<NUM>.<NUM>.<NUM>" at this point, so we either have something
-	// like a version number, or an IP address, but certainly not a simple
-	// decimal number we can just emit.
-	r.commit()
-
-	// Check if this is an IP address...
-	if n1 <= 255 && l1 <= 3 && n2 <= 255 && l2 <= 3 && n3 <= 255 && l3 <= 3 && r.advanceChar('.') && r.advanceUintUpTo(255, 3) && r.peekNextIsBoundary() {
-		r.emit(hasMinusPrefix, placeholderIP)
-		return true
-	}
-
-	// This wasn't an IP after all, so just emit "<NUM>.<NUM>.<NUM>". We don't
-	// want to assume this is a simple decimal number because of the second dot,
-	// e.g. this might be something like a version number.
-	r.resetHead()
-	r.maybeEmitDash(hasMinusPrefix) // we preserve the dashes here, since it's likely not a minus
-	r.dest = append(r.dest, placeholderNumber...)
-	r.dest = append(r.dest, '.')
-	r.dest = append(r.dest, placeholderNumber...)
-	r.dest = append(r.dest, '.')
-	r.dest = append(r.dest, placeholderNumber...)
-	r.consumeUpToCurrent()
-	return true
-}
-
-func (r *replacer) handleSaneTimestamp(hasMinusPrefix bool, n1 uint, delim byte) (endsWithBoundary bool) {
-	if r.advanceUintUpTo(12, 2) && r.advanceChar(delim) && r.advanceUintUpTo(31, 2) && r.advanceOneOf('T', ' ') && r.advanceTime(false) {
-		r.commit()
-		// continue down to parsing sub-second and timezone values
-	} else if r.resetHeadExpr() && n1 <= 31 && r.advanceChar(delim) && r.advanceMonthName() && r.advanceChar(delim) && r.advanceYear() && r.advanceChar(':') && r.advanceTime(false) && r.advanceChar(' ') && r.advanceNumericTimeZone() && r.peekNextIsBoundary() {
-		// We might not have a sane timestamp, but apparently a not-so-sane
-		// timestamp format first, e.g. "27/Mar/2024:14:34:37 +0000"...
-		r.emit(hasMinusPrefix, placeholderTimestamp)
-		return true
-	} else {
-		// Apparently that wasn't it either, we probably just have a dash or
-		// forward slash after a number, so we backtrack and emit the number.
-		r.resetHead()
-		r.emitNumberOrCopyText(hasMinusPrefix)
-		return true
-	}
-
-	// We have a date that looks like `YY[YY]-MM-DD hh:mm:ss` or
-	// `YY[YY]/MM/DDZhh:mm:ss`
-	r.commit() // we want to keep this
-
-	// Now we need to also check for sub-second precision and time zones:
-	c, canAdvance := r.advance()
-	if !canAdvance {
-		// End of the string
-		r.emit(hasMinusPrefix, placeholderTimestamp)
-		return true
-	}
-	if c == '.' {
-		_, intl := r.advanceUintRet()
-		if intl == 0 {
-			// No sub-second precision, the dot was not part of the timestamp
-			r.backtrack()
-			r.emit(hasMinusPrefix, placeholderTimestamp)
-			return true
-		}
-
-		// We had sub-second precision, capture that too
-		r.commit()
-
-		// Get the next char to see if we have time zone
-		c, canAdvance = r.advance()
-		if !canAdvance {
-			// We are at the end of the sting after we captured the
-			// sub-second value.
-			r.emit(hasMinusPrefix, placeholderTimestamp)
-			return true
-		}
-	}
-
-	if c == 'Z' || c == 'z' {
-		//UTC string, nothing to do after that
-		r.emit(hasMinusPrefix, placeholderTimestamp)
-		return true
-	}
-	// See https://en.wikipedia.org/wiki/List_of_tz_database_time_zones
-	if (c == '+' || c == '-') && r.advanceUintUpTo(14, 2) && r.advanceChar(':') && r.advanceUintUpTo(60, 2) {
-		// e.g. "2020-09-30T00:00:59.9999+03:00"
-		r.commit()
-	} else if r.resetHeadExpr() && r.advanceChar(' ') && r.advanceNumericTimeZone() && r.advanceChar(' ') && r.advanceTimeZoneLetters() && r.peekNextIsBoundary() {
-		// e.g. "2023-09-05 23:20:28.030285153 +0000 UTC"
-		r.commit()
-	} else {
-		r.resetHead()
-	}
-	r.emit(hasMinusPrefix, placeholderTimestamp)
-	return true
-}
-
-func (r *replacer) handleNumberStart(hasMinusPrefix bool) (endsWithBoundary bool) {
-	r.resetHead() // keep the head pos in sync with the current pos
-	// We know we were at a digit due to how handleNumber() is called
-	n1, l1 := r.advanceUintRet()
-	r.commit() // we will consume this one way or another for sure
-
-	// See if the number is followed by a boundary
-	b1, hasNext := r.advance()
-
-	switch {
-	// We are at the end of the string, which is a boundary, replace evertything
-	// up to now with a number
-	case !hasNext:
-		r.emitNumberOrCopyText(hasMinusPrefix) // this also may subsume any minus sign we had
-		return true
-
-	// The number isn't followed by a boundary char (which include things like
-	// '.', ' ', '/', etc.), so it's either not a real number or date, or it's
-	// some sort of a unit like a duration (e.g. 3h5m2s), size, (e.g. 12KB,
-	// 3MiB), etc.
-	case !boundaryChars[b1]:
-		return r.handleHexOrUnit(hasMinusPrefix, n1, l1, b1)
-
-	// We have a decimal point, so this can either be a plain number, a unit
-	// that can have a float value, or an IP address.
-	case b1 == '.':
-		return r.handleNumberWithDecimal(hasMinusPrefix, n1, l1)
-
-	// This might be a timestamp that looks like '2024-04-01...' or
-	// `2024/03/27...`; timestamps in the far future are not supported :)
-	case n1 <= maxYear && l1 <= 4 && (b1 == '-' || b1 == '/'):
-		return r.handleSaneTimestamp(hasMinusPrefix, n1, b1)
-
-	// This might be a byte size with a space, e.g. "2 b", "3 GiB"
-	case b1 == ' ' && r.advanceSpacedBytesize(true):
-		r.emit(hasMinusPrefix, placeholderBytesize)
-		return true
-
-	// Weird RFC822 dates like "02 Jan 06 15:04 MST"
-	case n1 <= 31 && l1 <= 2 && b1 == ' ':
-		if r.advanceMonthName() && r.advanceChar(' ') && r.advanceYear() && r.advanceChar(' ') && r.advanceTime(true) && r.advanceStringOrNumericTimeZone(false) {
-			r.commit()
-			r.emit(hasMinusPrefix, placeholderTimestamp)
-			return true
-		}
-		// if not, go to default handler after switch statement
-
-	// It could be a UUID that starts with 8 digits
-	case l1 == 8 && b1 == '-':
-		if r.advanceUUIDAfterFirstDash('a', 'f') || (r.resetHeadExpr() && r.advanceChar('-') && r.advanceUUIDAfterFirstDash('A', 'F')) {
-			r.emit(hasMinusPrefix, placeholderUUID)
-			return true
-		}
-		// if not, go to default handler after switch statement
-	}
-
-	// Number with an unknown boundary - emit the number and leave the boundary
-	// for the following passes.
-	r.resetHead()
-	r.emitNumberOrCopyText(hasMinusPrefix)
-	return true
-}
-
-var longDayNames = [...][]byte{
-	[]byte("Sunday"),
-	[]byte("Monday"),
-	[]byte("Tuesday"),
-	[]byte("Wednesday"),
-	[]byte("Thursday"),
-	[]byte("Friday"),
-	[]byte("Saturday"),
-}
-
-func (r *replacer) handleWeirdTimestamp() (endsWithBoundary bool) {
-	r.resetHead()
-
-	if r.advanceDayOfTheWeek() {
-		r.commit() // we will always consume this
-
-		// RFC1123 and RFC1123Z, e.g.:
-		// - "Mon, 02 Jan 2006 15:04:05 MST"
-		// - "Mon, 02 Jan 2006 15:04:05 -0700"
-		if r.advanceChar(',') && r.advanceChar(' ') && r.advanceUintUpTo(31, 2) && r.advanceChar(' ') && r.advanceMonthName() && r.advanceChar(' ') && r.advanceYear() && r.advanceChar(' ') && r.advanceTime(false) && r.advanceStringOrNumericTimeZone(false) {
-			r.emit(false, placeholderTimestamp)
-			return true
-		}
-		r.resetHead()
-
-		// ANSIC, UnixDatem, RubyDate e.g
-		// - "Mon Jan  2 15:04:05 2006"
-		// - "Mon Jan  2 15:04:05 MST 2006"
-		// - "Mon Jan 02 15:04:05 -0700 2006"
-		if r.advanceChar(' ') && r.advanceMonthName() && r.advanceChar(' ') && (r.advanceChar(' ') || true) && r.advanceUintUpTo(31, 2) && r.advanceChar(' ') && r.advanceTime(false) && r.advanceStringOrNumericTimeZone(true) && r.advanceChar(' ') && r.advanceYear() {
-			r.emit(false, placeholderTimestamp)
-			return true
-		}
-		r.resetHead()
-
-		// Linux, e.g.
-		// - "Mon  2 Jan 15:04:05 MST 2006"
-		// - "Tue 23 Jan 15:04:05 -0700 2023"
-		if r.advanceChar(' ') && (r.advanceChar(' ') || true) && r.advanceUintUpTo(31, 2) && r.advanceChar(' ') && r.advanceMonthName() && r.advanceChar(' ') && r.advanceTime(false) && r.advanceStringOrNumericTimeZone(false) && r.advanceChar(' ') && r.advanceYear() {
-			r.emit(false, placeholderTimestamp)
-			return true
-		}
-		r.resetHead()
-
-		// RFC850, e.g.
-		//  - "Monday, 02-Jan-06 15:04:05 MST"
-		backtrackedSlice := r.source[r.head-3:]
-		var matchedDay []byte
-		for _, dw := range longDayNames {
-			if bytes.HasPrefix(backtrackedSlice, dw) {
-				matchedDay = dw
-				break
-			}
-		}
-		if matchedDay != nil {
-			r.head += len(matchedDay) - 3
-			if r.advanceChar(',') && r.advanceChar(' ') && r.advanceUintUpTo(31, 2) && r.advanceChar('-') && r.advanceMonthName() && r.advanceChar('-') && r.advanceUintUpTo(99, 2) && r.advanceChar(' ') && r.advanceTime(false) && r.advanceStringOrNumericTimeZone(true) {
-				r.emit(false, placeholderTimestamp)
-				return true
-			}
-		}
-
-		r.cur -= 3 // unconsume
-		r.resetHead()
-		return false
-	}
-	r.resetHead()
-
-	if r.advanceMonthName() {
-		r.commit() // provisionally consume this
-
-		// Linux journald logs and others similar like this:
-		//  - Feb 29 23:00:14
-		//  - Apr-10 23:43:46.807
-		//  - Jul  1 00:21:28
-		if (r.advanceChar('-') || (r.advanceChar(' ') && (r.advanceChar(' ') || true))) && r.advanceUintUpTo(31, 2) && r.advanceChar(' ') && r.advanceTime(false) {
-			r.commit()
-			// This is already a timestamp, but let's try to match subseconds as well
-			if r.advanceChar('.') && r.advanceUint() {
-				r.commit()
-			} else {
-				r.resetHead()
-			}
-			r.emit(false, placeholderTimestamp)
-			return true
-		}
-		r.cur -= 3 // unconsume
-		r.resetHead()
-		return false
-	}
-	r.resetHead()
-
-	return false
-}
-
-func (r *replacer) wasNumPreservingKeyword() bool {
-	for _, key := range numPreservingKeys {
-		pPos := r.cur - 1 - len(key)
-		if pPos < -1 {
-			return false // all subsequent keys are longer
-		}
-		if pPos != -1 && !boundaryChars[r.source[pPos]] {
-			continue
-		}
-		if bytes.HasPrefix(r.source[pPos+1:], key) {
-			return true
-		}
-	}
-	return false
-}
-
-func (r *replacer) replaceWithPlaceholders() {
-	lineLen := len(r.source)
-
-	var c byte
-	onBoundary := true
-	for r.cur = 0; r.cur < lineLen; r.cur++ {
-		c = r.source[r.cur]
-		switch {
-		// If we're currently not at a boundary, the only thing we need to check
-		// is whether the current char would create a boundary in the next iteration.
-		case !onBoundary:
-			onBoundary = boundaryChars[c]
-
-			// We weren't at a boundary and now we are, so check if we match one
-			// of the special keywords that will preserve numbers
-			if onBoundary {
-				r.preserveNextNumbers = r.wasNumPreservingKeyword()
-			}
-
-		// If we've reached this far, it means we're currently at a boundary!
-
-		// A lot of very complex logic if we encounter a number at a boundary,
-		// so we move that to a helper function.
-		case '0' <= c && c <= '9':
-			r.copyUpToCurrent()
-			onBoundary = r.handleNumberStart(false)
-
-		// Handle negative numbers, potentially
-		case c == '-':
-			next := r.cur + 1
-			// This might be a number, a date, an IP address, etc. So we don't
-			// know if this is a minus sign to replace or a dash to copy yet.
-			if next < lineLen && '0' <= r.source[next] && r.source[next] <= '9' {
-				// Copy everything before the dash, but mark it as consumed.
-				r.copyUpToCurrent()
-				r.cur++
-				r.consumeUpToCurrent()
-				onBoundary = r.handleNumberStart(true)
-			} else {
-				onBoundary = true
-			}
-
-		// Try to match weird timestamps. They require a lot of remaining
-		// length, generally start with a capitalized day of the week or month
-		// name (1 upper case letter followed by 2 lower case letters).
-		//
-		// We are basically looking for something that may match this here:
-		//   Mon|Tue|Wed|Thu|Fri|Sat|Sun|Jan|Feb|Mar|Apr|May|Jul|Jun|Aug|Sep|Oct|Nov|Dec
-		//
-		// The detailed check would be performed by the actual handler:
-		case 'A' <= c && c <= 'W' && lineLen-r.cur >= 14 &&
-			'a' <= r.source[r.cur+1] && r.source[r.cur+1] <= 'u' &&
-			'b' <= r.source[r.cur+2] && r.source[r.cur+2] <= 'y':
-			r.copyUpToCurrent()
-			onBoundary = r.handleWeirdTimestamp()
-
-		// This could be the start of an lower case hex string:
-		case 'a' <= c && c <= 'f':
-			r.copyUpToCurrent()
-			r.resetHead()
-			onBoundary = r.handleHex(false, 0, 'a', 'f', true)
-
-		// This could be the start of an upper case hex string:
-		case 'A' <= c && c <= 'F':
-			r.copyUpToCurrent()
-			r.resetHead()
-			onBoundary = r.handleHex(false, 0, 'A', 'F', true)
-
-		// If we haven't actually matched anything, update whether we're still
-		// on a boundary character and continue onto the next one.
-		default:
-			onBoundary = boundaryChars[c]
-		}
-	}
-
-	if r.cur > r.tail {
-		r.dest = append(r.dest, r.source[r.tail:]...)
-		r.consumeUpToCurrent()
-	}
-}
-
-func Preprocess(content []byte) []byte {
-	// ~floor(120%), to allow for some expansion from replacements, hopefully
-	// without needing to allocate more memory
-	r := replacer{source: content, dest: make([]byte, 0, len(content)*120/100)}
-	r.replaceWithPlaceholders()
-	return r.dest
-}
--- a/pkg/pattern/tokenization/tokenization.go
+++ b/pkg/pattern/tokenization/tokenization.go
@ -1,198 +0,0 @@
-package tokenization
-
-import (
-	"unsafe"
-)
-
-const placeholderEndOfLine = "<...>"
-
-// Outside of quoted strings, these are the delimiters between tokens. However,
-// they are not going to be a part of the tokens themselves and will be replaced
-// by spaces in the actual log template.
-var delimiters = [256]bool{0: true, '\t': true, '\n': true, '\v': true, '\f': true, '\r': true, ' ': true}
-
-type tokenizer struct {
-	// Input
-	rawLine   []byte
-	maxTokens int
-
-	// State and position iterators
-	buf        []byte
-	tpos       int
-	tokenCount int
-	maybeJSON  bool
-
-	// Result values, the values in the `tokens` slice reference line and shouldn't
-	// allocate new memory.
-	line   string
-	tokens []string
-}
-
-func (t *tokenizer) countOrSaveToken(endTokenPos int) {
-	if t.tokens != nil {
-		// Intentionally written like this and not with append(), so this can
-		// panic if we ever exceed the preallocated slice size, since that means
-		// we have a nasty bug in handleNextToken() below.
-		t.tokens[t.tokenCount] = t.line[t.tpos:endTokenPos]
-	}
-	t.tokenCount++
-	t.tpos = endTokenPos
-}
-
-func (t *tokenizer) handleNextToken() bool {
-	escaped := false
-	var c, curQuoteChar byte
-	curQuotePos := -1
-
-	lineLen := len(t.line)
-	for p := t.tpos; p < lineLen; p++ {
-		c = t.line[p]
-		switch {
-
-		// If the previous character was a backslash, we ignore the next
-		// character, unless it was an non-token delimiter (space, tab, etc.)
-		// outside of a quoted string.
-		case escaped:
-			if curQuotePos < 0 && delimiters[c] {
-				t.countOrSaveToken(p + 1)
-				return true
-			} else {
-				escaped = false
-			}
-
-		// If we weren't already escaped and we encounter a backslash, toggle
-		// the escaped flag and ignore the current byte.
-		case c == '\\':
-			escaped = true
-
-		// Non-ASCII / UTF8 / invalid character, consider it a part of the
-		// current token, for lack of a better efficient option...
-		case c > 127:
-			// Intentionally blank, continues to the next char
-
-		// If we are currently in a quoted part of the string, the current
-		// character is also part of the current token. The only special case
-		// here is if the current character is a matching quote, that means
-		// we'll no longer be quoted.
-		case curQuotePos >= 0:
-			if c == curQuoteChar { // end quote
-				curQuotePos = -1
-			}
-
-		// If we encounter a qoute character and we were not already in a quoted
-		// part of the line, mark that we are now in a quote from that type.
-		case c == '"' || c == '\'' || c == '`':
-			curQuoteChar = c
-			curQuotePos = p
-
-		// If we encounter a delimiter outside of a quote, count or save the
-		// token and skip the delimiter.
-		case delimiters[c]:
-			t.countOrSaveToken(p + 1)
-			return true
-
-		// Handle likely JSON object keys that have been serialized without
-		// spaces. For example, something like this:
-		//   `{"context":{"taskId":1},"message":"starting",...}`
-		//
-		// If the line starts or ends with curly braces, we consider it might be
-		// a JSON log and try to detect the `":` part of the message that isn't
-		// followed by a delimiter character. If we find that pattern, we
-		// consider everything up to and including the `:` character as a
-		// separate token.
-		//
-		// Similarly, we try to detect the `,"` pattern and also split a token
-		// before the comma value. The `p > t.tpos` check is crucial here,
-		// because it ensures that we're not at the start of a token, i.e. there
-		// wasn't a delimiter right before the comma.
-		case t.maybeJSON && p > t.tpos && (c == ':' || c == ',') && p+1 < lineLen:
-			if c == ':' && t.line[p-1] == '"' && !delimiters[t.line[p+1]] {
-				t.countOrSaveToken(p + 1)
-				return true
-			}
-			if c == ',' && t.line[p+1] == '"' {
-				t.countOrSaveToken(p)
-				return true
-			}
-		}
-
-		// By default we do nothing, simply advance one character forward
-		// because the current character was a part of the current token.
-	}
-
-	// We have an unterminated single quote at position `curQuotePos`. To handle
-	// this edge case somewhat gracefully, we can emit everything up to that
-	// unterminated quote and the quote itself as a single token, and continue
-	// fairly normally from there.
-	if curQuotePos > 0 {
-		t.countOrSaveToken(curQuotePos + 1)
-		return true
-	}
-
-	if t.tpos < len(t.line) {
-		t.countOrSaveToken(len(t.line))
-		return true
-	}
-
-	return false
-}
-
-// This function is called twice! The first time it counts the tokens but
-// doesn't save them. Afterwards we allocate the tokens return slice with
-// exactly the correct capacity and we call it again, this time to save them.
-func (t *tokenizer) process() {
-	// We want to handle the end of the string as a single token, so we start
-	// the loop from 1.
-	for i := 1; i < t.maxTokens; i++ {
-		if !t.handleNextToken() {
-			break
-		}
-	}
-
-	if t.tpos >= len(t.line) {
-		return
-	}
-
-	// We have token count more than or equal to maxTokens, add one last token
-	// containing placeholderEndOfLine to signal that.
-	if t.tokens != nil {
-		t.tokens[t.tokenCount] = placeholderEndOfLine
-	}
-	t.tokenCount++
-	t.tpos += len(placeholderEndOfLine)
-}
-
-func (t *tokenizer) tokenize() []string {
-	t.buf = Preprocess(t.rawLine)
-
-	// We use unsafe to convert buf to a string without any new allocations.
-	// This is safe because t.buf won't be used or referenced anywhere else
-	// besides here from now on.
-	t.line = unsafe.String(unsafe.SliceData(t.buf), len(t.buf))
-
-	if len(t.line) >= 2 && (t.line[0] == '{' || t.line[len(t.line)-1] == '}') {
-		t.maybeJSON = true
-	}
-
-	t.process()
-
-	// If we have super long lines (more than twice the size we need to get the
-	// maxTokens we want), copy just the part we need so the tokens don't hold a
-	// reference to the original huge []byte slice.
-	if t.tokenCount == t.maxTokens && t.tpos*2 < len(t.line) {
-		t.line = string(t.buf[0 : t.tpos+1])
-	}
-
-	t.tokens = make([]string, t.tokenCount) // intentionally like this, see comment in countOrSaveToken()
-	t.tokenCount = 0
-	t.tpos = 0
-	t.process()
-
-	return t.tokens
-}
-
-func PreprocessAndTokenize(content []byte) []string {
-	t := tokenizer{rawLine: content, maxTokens: 100} // TODO: parametrize maxTokens
-
-	return t.tokenize()
-}
--- a/pkg/pattern/tokenization/tokenization_benchmark_test.go
+++ b/pkg/pattern/tokenization/tokenization_benchmark_test.go
@ -1,26 +0,0 @@
-package tokenization
-
-import (
-	"fmt"
-	"testing"
-)
-
-var sampleLogBenchCount = 0
-
-func BenchmarkTokenizationTestCases(b *testing.B) {
-	for i, tc := range tokenizationRealisticTestCases {
-		line := []byte(tc.line)
-		b.Run(fmt.Sprintf("test-case-%d", i), func(b *testing.B) {
-			for i := 0; i < b.N; i++ {
-				PreprocessAndTokenize(line)
-			}
-		})
-	}
-}
-
-func BenchmarkTokenizationPlayground(b *testing.B) {
-	line := []byte(`level=info ts=2023-09-06T00:59:59.982171323Z caller=metrics.go:160 component=frontend org_id=29 traceID=4b93729ff3efabd0 latency=fast query="{stream=\"stdout\",pod=\"loki-canary-nl54q\"} " query_hash=1280418884 query_type=limited range_type=range length=20s start_delta=2h54m30.690801022s end_delta=2h54m10.690801238s step=1s duration=13.926955ms status=200 limit=1000 returned_lines=0 throughput=16MB total_bytes=219kB total_bytes_non_indexed_labels=2.1kB lines_per_second=14935 total_lines=208 post_filter_lines=208 total_entries=41 store_chunks_download_time=1.592805ms queue_time=127µs splits=0 shards=0 chunk_refs_fetch_time=3.599883ms cache_chunk_req=1 cache_chunk_hit=1 cache_chunk_bytes_stored=0 cache_chunk_bytes_fetched=480079 cache_chunk_download_time=1.307396ms cache_index_req=0 cache_index_hit=0 cache_index_download_time=0s cache_stats_results_req=1 cache_stats_results_hit=1 cache_stats_results_download_time=361.913µs cache_result_req=0 cache_result_hit=0 cache_result_download_time=0s token_id=gcom-1234`)
-	for i := 0; i < b.N; i++ {
-		PreprocessAndTokenize(line)
-	}
-}
--- a/pkg/pattern/tokenization/tokenization_test.go
+++ b/pkg/pattern/tokenization/tokenization_test.go
@ -1,387 +0,0 @@
-package tokenization
-
-import (
-	"fmt"
-	"testing"
-	"time"
-
-	"github.com/stretchr/testify/assert"
-	"github.com/stretchr/testify/require"
-)
-
-type tokenizationTestCase struct {
-	line      string
-	expResult []string
-}
-
-var tokenizationCornerTestCases = []tokenizationTestCase{
-	{
-		"",
-		[]string{},
-	},
-	{
-		" foo  ",
-		// TOFIX: maybe this is not exactly what we want? should we try to combine empty space into a single token?
-		[]string{" ", "foo ", " "},
-	},
-	{
-		"foo bar baz",
-		[]string{"foo ", "bar ", "baz"},
-	},
-	{
-		"\nfoo\t  bar baz\r\n",
-		// TOFIX: same as above - should we combine empty space into a single token?
-		[]string{"\n", "foo\t", " ", " ", "bar ", "baz\r", "\n"},
-	},
-	{
-		"ends single char C",
-		[]string{"ends ", "single ", "char ", "C"},
-	},
-	{
-		"0 1 2 3 4 5 6 7 8 9 a b c d e f g h i j k l m n o p q r s t u v w x y z A B C D E F G H I J K L M N O P Q R S T U V W X Y Z Z Y X W V U T S R Q P O N M L K J I H G F E D C B A z y x w v u t s r q p o n m l k j i h g f e d c b a 9 8 7 6 5 4 3 2 1 0",
-		// The tail end of strings longer than maxTokens is returned as a single token
-		[]string{"<NUM> ", "<NUM> ", "<NUM> ", "<NUM> ", "<NUM> ", "<NUM> ", "<NUM> ", "<NUM> ", "<NUM> ", "<NUM> ", "a ", "b ", "c ", "d ", "e ", "f ", "g ", "h ", "i ", "j ", "k ", "l ", "m ", "n ", "o ", "p ", "q ", "r ", "s ", "t ", "u ", "v ", "w ", "x ", "y ", "z ", "A ", "B ", "C ", "D ", "E ", "F ", "G ", "H ", "I ", "J ", "K ", "L ", "M ", "N ", "O ", "P ", "Q ", "R ", "S ", "T ", "U ", "V ", "W ", "X ", "Y ", "Z ", "Z ", "Y ", "X ", "W ", "V ", "U ", "T ", "S ", "R ", "Q ", "P ", "O ", "N ", "M ", "L ", "K ", "J ", "I ", "H ", "G ", "F ", "E ", "D ", "C ", "B ", "A ", "z ", "y ", "x ", "w ", "v ", "u ", "t ", "s ", "r ", "q ", "p ", "<...>"},
-	},
-	{
-		`a "quoted string"`,
-		[]string{"a ", `"quoted string"`},
-	},
-	{
-		`a "quoted string with \" escapes!"`,
-		[]string{"a ", `"quoted string with \" escapes!"`},
-	},
-	{
-		`a 'singly quoted string"'`,
-		[]string{"a ", `'singly quoted string"'`},
-	},
-	{
-		`a 'singly quoted string" \''`,
-		[]string{"a ", `'singly quoted string" \''`},
-	},
-	{
-		`a 'singly quoted string" \\\''`,
-		[]string{"a ", `'singly quoted string" \\\''`},
-	},
-	{
-		`a'twisted excappe\\' "with an unterminated quote" 'at_the_end`,
-		[]string{`a'twisted excappe\\' `, `"with an unterminated quote" `, `'`, "at_the_end"},
-	},
-	{
-		`a "quoted string 'inception'"!` + "`woot`'much`wow'",
-		[]string{"a ", `"quoted string 'inception'"!` + "`woot`'much`wow'"},
-	},
-	{
-		`unterminated"quote`,
-		[]string{`unterminated"`, `quote`},
-	},
-	{
-		"`mix`" + ` "and" 'match'`,
-		[]string{"`mix` ", `"and" `, `'match'`},
-	},
-	{
-		"`mix`" + ` "and" 'match'`,
-		[]string{"`mix` ", `"and" `, `'match'`},
-	},
-	{
-		`{"json":"uninterrupted \"logline\"","foo":"bar"}`,
-		[]string{`{"json":`, `"uninterrupted \"logline\""`, `,"foo":`, `"bar"}`},
-	},
-	{
-		`{"weirdo1":`,
-		[]string{`{"weirdo1":`},
-	},
-	{
-		// This and the corner case below are not optimal, though they're
-		// unlikely to be encountered in the real world, so it's hopefully a
-		// nice tradeoff for the performance of not needing a full JSON parser.
-		`{"weirdo2":}`,
-		[]string{`{"weirdo2":`, `}`},
-	},
-	{
-		`,"weirdo3":}`,
-		[]string{`,"weirdo3":`, `}`},
-	},
-	{
-		// We deliberately do treat "escaped" whitespaces outside of quotes as
-		// delimiters, i.e. whitespaces outside of strings cannot be escaped.
-		`weird\ escape`,
-		[]string{`weird\ `, `escape`},
-	},
-	{
-		"-3.14-foo 0.0.0.0/24-0.0.0.1-255.255.255.255-256.255.255.255 1337-ber 0.12-ber n0tnumb3er 12faux -123.0.1.123 -123 -1231.11 333. 123.456. 123.45-",
-		[]string{"<NUM>-foo ", "<IP>/<NUM>-<IP>-<IP>-<NUM>.<NUM>.<NUM>.<NUM> ", "<NUM>-ber ", "<NUM>-ber ", "n0tnumb3er ", "12faux ", "-<IP> ", "<NUM> ", "<NUM> ", "<NUM>. ", "<NUM>.<NUM>. ", "<NUM>-"},
-	},
-	{
-		"2022-12-31 12:12:31 3022-12-31 12:12:31-Jul  1 00:21:28",
-		[]string{"<TIMESTAMP> ", "<NUM>-<NUM>-<NUM> ", "<NUM>:<NUM>:<NUM>-<TIMESTAMP>"},
-	},
-	{
-		"2022/12/01 12:12:31 - 2022/13/32 12:12:31",
-		[]string{"<TIMESTAMP> ", "- ", "<NUM>/<NUM>/<NUM> ", "<NUM>:<NUM>:<NUM>"},
-	},
-	{
-		"UUIDS: 123e4567-e89b-12d3-a456-426614174000, 550E8400-E29B-41D4-A716-446655440000, -00000000-0000-0000-0000-000000000000, 12345678-dead-beef-1337-000000000000 {c6ad1a63-10b5-460e-ab2c-05c13604539d} ''<A3AE4842-E9AA-4C27-9509-772DB3CC3190>''",
-		[]string{"UUIDS: ", "<UUID>, ", "<UUID>, ", "-<UUID>, ", "<UUID> ", "{<UUID>} ", "''<<UUID>>''"},
-	},
-	// Mixed case UUID and hex strings are ignored, to limit false positives
-	{
-		"Not UUIDS: 123e4567-E89B-12d3-a456-426614174000, 1234567-dead-beef-1337-00000000000a",
-		[]string{"Not ", "UUIDS: ", "123e4567-E89B-12d3-a456-<NUM>, ", "<NUM>-dead-beef-<NUM>-<HEX>"},
-	},
-	{
-		"Hexes: 0x0123456789 0xabcdef0123 deadbeef1337-ABCDEF0123456?0123456789ab:FFFFAAAAFFFF Curses: 0x012345678 dEaDbeef1337 abcdefabcde ABCDEFABCDE 0xASDFASDFASDF abcdef0123456NOT",
-		[]string{"Hexes: ", "<HEX> ", "<HEX> ", "<HEX>-<HEX>?<HEX>:<HEX> ", "Curses: ", "0x012345678 ", "dEaDbeef1337 ", "abcdefabcde ", "ABCDEFABCDE ", "0xASDFASDFASDF ", "abcdef0123456NOT"},
-	},
-	{
-		"30546354_3313121680 0_123_456_789 foo_123",
-		[]string{"<NUM>_<NUM> ", "<NUM>_<NUM>_<NUM>_<NUM> ", "foo_<NUM>"},
-	},
-	{
-		`3.31ms/1h2m|-12h2m6.1s 31m "165m2.1s(6h0m12.05us)" -451325.31µs 6m23μs 123h21m3.4124561s/0s/-0.0123ms`,
-		[]string{"<DURATION>/<DURATION>|<DURATION> ", "<DURATION> ", `"<DURATION>(<DURATION>)" `, "<DURATION> ", "<DURATION> ", "<DURATION>/<DURATION>/<DURATION>"},
-	},
-	{
-		// Invalid duration values
-		"3.31.1ms 3h121m3.4124561s 1h0.12s 100usa 0.12msa",
-		[]string{"<NUM>.<DURATION> ", "3h121m3.<DURATION> ", "1h0.<DURATION> ", "100usa ", "0.12msa"},
-	},
-	{
-		// We only consider integers to be valid bytesizes in bytes (0.2B doesn't make sense)
-		"2Mib 0.12KB-5GB 3.12kb 123Gbps 124mbit:512Tbit 5 B;124.1 KB/3b - 2b or 2 BeNot 13.37 b 3 b",
-		[]string{
-			"<BYTESIZE> ", "<BYTESIZE>-<BYTESIZE> ", "<BYTESIZE> ", "<BYTESIZE> ", "<BYTESIZE>:<BYTESIZE> ",
-			"<BYTESIZE>;<BYTESIZE>/<BYTESIZE> ", "- ", "<BYTESIZE> ", "or ", "<NUM> ", "BeNot ", "<NUM> ", "b ", "<BYTESIZE>"},
-	},
-	{
-		`status=123 status_code:500 status 200 status="-1" status_code:"404" httpStatus=200`,
-		[]string{"status=123 ", "status_code:500 ", "status ", "200 ", `status="-1" `, `status_code:"404" `, "httpStatus=200"},
-	},
-	{
-		`status_code_foo=123 status_code:500.1 status 2023-09-06T00:59:59.98 status:"404KiB"`,
-		[]string{"status_code_foo=<NUM> ", "status_code:<NUM> ", "status ", "<TIMESTAMP> ", `status:"<BYTESIZE>"`},
-	},
-}
-
-// TODO: add these to the benchmark tests
-var tokenizationRealisticTestCases = []tokenizationTestCase{
-	// logfmt from loki, metrics.go with a lot of number values of various types
-	{
-		`level=info ts=2023-09-06T00:59:59.982171323Z caller=metrics.go:160 component=frontend org_id=29 traceID=4b93729ff3efabd0 latency=fast query="{stream=\"stdout\",pod=\"loki-canary-nl54q\"} " query_hash=1280418884 query_type=limited range_type=range length=20s start_delta=2h54m30.690801022s end_delta=2h54m10.690801238s step=1s duration=13.926955ms status=200 limit=1000 returned_lines=0 throughput=16MB total_bytes=219kB total_bytes_non_indexed_labels=2.1kB lines_per_second=14935 total_lines=208 post_filter_lines=208 total_entries=41 store_chunks_download_time=1.592805ms queue_time=127µs splits=0 shards=0 chunk_refs_fetch_time=3.599883ms cache_chunk_req=1 cache_chunk_hit=1 cache_chunk_bytes_stored=0 cache_chunk_bytes_fetched=480079 cache_chunk_download_time=1.307396ms cache_index_req=0 cache_index_hit=0 cache_index_download_time=0s cache_stats_results_req=1 cache_stats_results_hit=1 cache_stats_results_download_time=361.913µs cache_result_req=0 cache_result_hit=0 cache_result_download_time=0s token_id=gcom-1234`,
-
-		[]string{
-			"level=info ", "ts=<TIMESTAMP> ", "caller=metrics.go:<NUM> ", "component=frontend ", "org_id=<NUM> ", "traceID=<HEX> ", "latency=fast ", `query="{stream=\"stdout\",pod=\"loki-canary-nl54q\"} " `, "query_hash=<NUM> ", "query_type=limited ", "range_type=range ", "length=<DURATION> ", "start_delta=<DURATION> ", "end_delta=<DURATION> ", "step=<DURATION> ", "duration=<DURATION> ", "status=200 ", "limit=<NUM> ", "returned_lines=<NUM> ", "throughput=<BYTESIZE> ", "total_bytes=<BYTESIZE> ", "total_bytes_non_indexed_labels=<BYTESIZE> ", "lines_per_second=<NUM> ", "total_lines=<NUM> ", "post_filter_lines=<NUM> ", "total_entries=<NUM> ", "store_chunks_download_time=<DURATION> ", "queue_time=<DURATION> ", "splits=<NUM> ", "shards=<NUM> ", "chunk_refs_fetch_time=<DURATION> ", "cache_chunk_req=<NUM> ", "cache_chunk_hit=<NUM> ", "cache_chunk_bytes_stored=<NUM> ", "cache_chunk_bytes_fetched=<NUM> ", "cache_chunk_download_time=<DURATION> ", "cache_index_req=<NUM> ", "cache_index_hit=<NUM> ", "cache_index_download_time=<DURATION> ", "cache_stats_results_req=<NUM> ", "cache_stats_results_hit=<NUM> ", "cache_stats_results_download_time=<DURATION> ", "cache_result_req=<NUM> ", "cache_result_hit=<NUM> ", "cache_result_download_time=<DURATION> ", "token_id=gcom-<NUM>",
-		},
-	},
-	// logfmt from loki, with string multi-word messages
-	{
-		`level=debug ts=2023-09-06T00:59:59.98214402Z caller=shard_resolver.go:114 bytes=205kB chunks=2 streams=2 entries=200 msg="queried index" type=single matchers="{stream=\"stdout\", pod=\"loki-canary-v75j4\"}" duration=9.498885ms from=2023-09-06T00:48:53.138Z through=2023-09-06T00:49:43.138Z length=50s`,
-		[]string{
-			"level=debug ", "ts=<TIMESTAMP> ", "caller=shard_resolver.go:<NUM> ", "bytes=<BYTESIZE> ", "chunks=<NUM> ", "streams=<NUM> ", "entries=<NUM> ", `msg="queried index" `, "type=single ", `matchers="{stream=\"stdout\", pod=\"loki-canary-v75j4\"}" `, "duration=<DURATION> ", "from=<TIMESTAMP> ", "through=<TIMESTAMP> ", "length=<DURATION>",
-		},
-	},
-	// tricky loki distributor message:
-	{
-		`level=debug ts=2024-07-12T12:25:06.175464934Z caller=push.go:146 org_id=29 traceID=7af4f918eab1c80f msg="push request parsed" path=/loki/api/v1/push contentType=application/x-protobuf contentEncoding= bodySize="8.8 kB" streams=11 entries=43 streamLabelsSize="3.4 kB" entriesSize="19 kB" structuredMetadataSize="71 B" totalSize="22 kB" mostRecentLagMs=167 adaptiveLogsDroppedLines=10 adaptiveLogsDroppedSize=4965 adaptiveLogsMatchedLines=37`,
-		[]string{
-			"level=debug ", "ts=<TIMESTAMP> ", "caller=push.go:<NUM> ", "org_id=<NUM> ", "traceID=<HEX> ", `msg="push request parsed" `, "path=/loki/api/v1/push ", "contentType=application/x-protobuf ", "contentEncoding= ", `bodySize="<BYTESIZE>" `, "streams=<NUM> ", "entries=<NUM> ", `streamLabelsSize="<BYTESIZE>" `, `entriesSize="<BYTESIZE>" `, `structuredMetadataSize="<BYTESIZE>" `, `totalSize="<BYTESIZE>" `, "mostRecentLagMs=<NUM> ", "adaptiveLogsDroppedLines=<NUM> ", "adaptiveLogsDroppedSize=<NUM> ", "adaptiveLogsMatchedLines=<NUM>",
-		},
-	},
-	// random JSON logs
-	{
-		`{"timestamp": "2022-12-23T12:34:56Z", "level": "debug", "message": "Server starting", "server_id": "abcdefghij", "start_time": "2022-12-23T12:30:00Z"}`,
-		[]string{`{"timestamp": `, `"<TIMESTAMP>", `, `"level": `, `"debug", `, `"message": `, `"Server starting", `, `"server_id": `, `"abcdefghij", `, `"start_time": `, `"<TIMESTAMP>"}`},
-	},
-	{
-		// JSON logs without spaces between elements, like how JavaScript's JSON.Stringify() produces:
-		`{"context":{"taskId":1},"message":"starting task ID 1","sequence":0,"time":1506776210000,"version":"1.0.0"}`,
-		[]string{`{"context":`, `{"taskId":`, `<NUM>}`, `,"message":`, `"starting task ID <NUM>"`, `,"sequence":`, `<NUM>`, `,"time":`, `<NUM>`, `,"version":`, `"<NUM>.<NUM>.<NUM>"}`},
-	},
-	// Android logs from https://github.com/logpai/logparser/blob/main/data/loghub_2k/Android/Android_2k.log
-	{
-		// This test case has an unterminated double quote
-		//
-		// TOFIX:
-		//  - timestamp is not correctly detected
-		//  - empty "" token?
-		`03-17 16:13:40.345  1702 14638 D PowerManagerService: release:lock=166121161, flg=0x0, tag="RILJ_ACK_WL", name=com.android.phone", ws=null, uid=1001, pid=2626`,
-		[]string{"<NUM>-<NUM> ", "<NUM>:<NUM>:<NUM> ", " ", "<NUM> ", "<NUM> ", "D ", "PowerManagerService: ", "release:lock=<NUM>, ", "flg=0x0, ", `tag="RILJ_ACK_WL", `, `name=com.android.phone"`, ", ", "ws=null, ", "uid=<NUM>, ", "pid=<NUM>"},
-	},
-	{
-		// TOFIX:
-		//  - timestamp is not correctly detected
-		//  - empty "" tokens
-		`03-17 16:13:47.518  1702  8671 D ActivityManager: Skipping, withExcluded: false, tr.intent:Intent { act=android.intent.action.VIEW dat=file:///storage/emulated/0/Tencent/QQfile_recv/b.apk typ=application/vnd.android.package-archive flg=0x10800000 cmp=com.android.packageinstaller/.PackageInstallerActivity (has extras) }`,
-		[]string{
-			"<NUM>-<NUM> ", "<NUM>:<NUM>:<NUM> ", " ", "<NUM> ", " ", "<NUM> ", "D ", "ActivityManager: ", "Skipping, ", "withExcluded: ", "false, ", "tr.intent:Intent ", "{ ", "act=android.intent.action.VIEW ", "dat=file:///storage/emulated/<NUM>/Tencent/QQfile_recv/b.apk ", "typ=application/vnd.android.package-archive ", "flg=0x10800000 ", "cmp=com.android.packageinstaller/.PackageInstallerActivity ", "(has ", "extras) ", "}",
-		},
-	},
-	// Apache logs from https://github.com/logpai/logparser/blob/main/data/loghub_2k/Apache/Apache_2k.log
-	{
-		`[Mon Dec 05 13:16:27 2005] [notice] jk2_init() Found child 5877 in scoreboard slot 9`,
-		[]string{"[<TIMESTAMP>] ", "[notice] ", "jk2_init() ", "Found ", "child ", "<NUM> ", "in ", "scoreboard ", "slot ", "<NUM>"},
-	},
-	{
-		`[Mon Dec 05 19:14:11 2005] [notice] workerEnv.init() ok /etc/httpd/conf/workers2.properties`,
-		[]string{"[<TIMESTAMP>] ", "[notice] ", "workerEnv.init() ", "ok ", "/etc/httpd/conf/workers2.properties"},
-	},
-	// nginx logs by running `docker run -p 80:80 -v $(pwd):/usr/share/nginx/html nginx` locally
-	{
-		`2024/03/27 14:31:42 [error] 29#29: *1 directory index of "/usr/share/nginx/html/" is forbidden, client: 172.17.0.1, server: localhost, request: "GET / HTTP/1.1", host: "127.0.0.1"`,
-		[]string{"<TIMESTAMP> ", "[error] ", "<NUM>#<NUM>: ", "*<NUM> ", "directory ", "index ", "of ", "\"/usr/share/nginx/html/\" ", "is ", "forbidden, ", "client: ", "<IP>, ", "server: ", "localhost, ", "request: ", `"GET / HTTP/<NUM>", `, "host: ", "\"<IP>\""},
-	},
-	{
-		// TOFIX:
-		//  - probably not all numbers should be replaced with <NUM>, e.g. for "*1", "(2:", "HTTP/1.1" it's definitely a worse UX
-		`2024/03/27 14:34:37 [error] 29#29: *1 open() "/usr/share/nginx/html/test url with spaces" failed (2: No such file or directory), client: 172.17.0.1, server: localhost, request: "GET /test%20url%20with%20spaces HTTP/1.1", host: "127.0.0.1"
-		172.17.0.1 - - [31/Mar/2024:14:34:37 +0000] "GET /test%20url%20with%20spaces HTTP/1.1" 404 153 "-" "Mozilla/5.0 (X11; Linux x86_64; rv:123.0) Gecko/20100101 Firefox/123.0" "-"`,
-		[]string{"<TIMESTAMP> ", "[error] ", "<NUM>#<NUM>: ", "*<NUM> ", "open() ", `"/usr/share/nginx/html/test url with spaces" `, "failed ", "(<NUM>: ", "No ", "such ", "file ", "or ", "directory), ", "client: ", "<IP>, ", "server: ", "localhost, ", "request: ", `"GET /test%20url%20with%20spaces HTTP/<NUM>", `, "host: ", "\"<IP>\"\n", "\t", "\t", "<IP> ", "- ", "- ", "[<TIMESTAMP>] ", `"GET /test%20url%20with%20spaces HTTP/<NUM>" `, "<NUM> ", "<NUM> ", "\"-\" ", `"Mozilla/<NUM> (X11; Linux x86_<NUM>; rv:<NUM>) Gecko/<NUM> Firefox/<NUM>" `, `"-"`},
-	},
-	// Linux systemd (journalctl) logs
-	{
-		`Mar 27 11:52:21 hostname systemd-networkd[2043]: enp6s0: LLDP Rx: Invoking callback for 'refreshed' event.`,
-		[]string{"<TIMESTAMP> ", "hostname ", "systemd-networkd[<NUM>]: ", "enp6s0: ", "LLDP ", "Rx: ", "Invoking ", "callback ", "for ", "'refreshed' ", "event."},
-	},
-	{
-		`Feb 29 23:00:14 nixos dbus-daemon[11432]: [system] Activating via systemd: service name='org.opensuse.Snapper' unit='snapperd.service' requested by ':1.324' (uid=0 pid=22089 comm="/nix/store/7rgimysvkczzyiaq4fkfymyjad4vbd9c-snappe" label="kernel")`,
-		[]string{"<TIMESTAMP> ", "nixos ", "dbus-daemon[<NUM>]: ", "[system] ", "Activating ", "via ", "systemd: ", "service ", "name='org.opensuse.Snapper' ", "unit='snapperd.service' ", "requested ", "by ", "':<NUM>' ", "(uid=<NUM> ", "pid=<NUM> ", "comm=\"/nix/store/7rgimysvkczzyiaq4fkfymyjad4vbd9c-snappe\" ", "label=\"kernel\")"},
-	},
-	// Random slack logs:
-	{
-		`Apr-10 23:43:46.807 [API-Q] (T02S4RCS0) c37dfd20-1712781826.804 conversations.suggestions is ACTIVE`,
-		[]string{"<TIMESTAMP> ", "[API-Q] ", "(T02S4RCS0) ", "c37dfd20-<NUM> ", "conversations.suggestions ", "is ", "ACTIVE"},
-	},
-	{
-		`Apr-11 00:01:57.743 [DEVICE-PERMISSIONS-MA] Permissions saved to local storage: {"permissions":{"microphone":"granted","camera":"prompt","screen":"prompt"},"timestamp":1712782917742}`,
-		[]string{"<TIMESTAMP> ", "[DEVICE-PERMISSIONS-MA] ", "Permissions ", "saved ", "to ", "local ", "storage: ", `{"permissions":`, `{"microphone":`, `"granted"`, `,"camera":`, `"prompt"`, `,"screen":`, `"prompt"}`, `,"timestamp":`, `<NUM>}`},
-	},
-	// Another weird log from loki:
-	{
-		`ts=2023-09-06T00:59:59.900879737Z caller=spanlogger.go:86 user=29 level=debug msg="querying ingester" params="selector={stream=\"stdout\", pod=\"loki-canary-t98wq\"}, direction=BACKWARD, start=2023-09-05 23:20:28.030285153 +0000 UTC, end=2023-09-05 23:20:48.030285153 +0000 UTC, limit=1000, shards="`,
-		[]string{"ts=<TIMESTAMP> ", "caller=spanlogger.go:<NUM> ", "user=<NUM> ", "level=debug ", `msg="querying ingester" `, `params="selector={stream=\"stdout\", pod=\"loki-canary-t98wq\"}, direction=BACKWARD, start=<TIMESTAMP>, end=<TIMESTAMP>, limit=<NUM>, shards="`},
-	},
-	// {``, []string{}},
-}
-
-func getTimestampTests(t *testing.T) []tokenizationTestCase {
-	timeStamps := []string{
-		"2023-01-03T15:04:05.999999999Z",
-		"2024-02-29T11:12:03.33+02:00",
-		"2022-03-02T23:59:59Z",
-		"2024-04-01T01:33:59+03:00",
-		"2021-05-07T00:00:00-04:30",
-		"1999-06-12T13:37:31.123+03:00",
-		"2005-07-17T12:00:00Z",
-		"1988-08-03T00:00:00Z",
-		"2020-09-30T00:00:59.9999+03:00",
-		"2034-10-31T12:34:56.7890+11:23",
-		"2034-11-01T01:23:45.67890Z",
-		"2025-12-31T01:23:45.67890Z",
-	}
-
-	timestampConst := string(placeholderTimestamp)
-	layouts := []struct {
-		layout    string
-		expResult []string
-	}{
-		// TOFIX: all of the commented out lines.
-
-		//{time.Layout, []string{timestampConst}},
-		{time.ANSIC, []string{timestampConst}},
-		{time.UnixDate, []string{timestampConst}},
-		{"Mon _2 Jan 15:04:05 MST 2006", []string{timestampConst}}, // linux `date`
-		{time.RubyDate, []string{timestampConst}},
-		{time.RFC822, []string{timestampConst}},
-		{time.RFC822Z, []string{timestampConst}},
-		{time.RFC850, []string{timestampConst}},
-		{time.RFC1123, []string{timestampConst}},
-		{time.RFC1123Z, []string{timestampConst}},
-		{time.RFC3339, []string{timestampConst}},
-		{time.RFC3339Nano, []string{timestampConst}},
-		//{time.Stamp, []string{timestampConst}},
-		//{time.StampMilli, []string{timestampConst}},
-		//{time.StampMicro, []string{timestampConst}},
-		//{time.StampNano, []string{timestampConst}},
-		{time.DateTime, []string{timestampConst}},
-
-		// TOFIX: maybe consider these timestamps?
-		{time.DateOnly, []string{"<NUM>-<NUM>-<NUM>"}},
-		{time.TimeOnly, []string{"<NUM>:<NUM>:<NUM>"}},
-	}
-
-	result := make([]tokenizationTestCase, 0, len(timeStamps)*len(layouts))
-	for _, tss := range timeStamps {
-		ts, err := time.Parse(time.RFC3339Nano, tss)
-		require.NoError(t, err)
-		for _, layout := range layouts {
-			result = append(result, tokenizationTestCase{
-				line:      ts.Format(layout.layout),
-				expResult: layout.expResult,
-			})
-		}
-	}
-	return result
-}
-
-type tokenizationTestCasePack struct {
-	name  string
-	cases []tokenizationTestCase
-}
-
-func TestTokenization(t *testing.T) {
-	packs := []tokenizationTestCasePack{
-		{"cornerTestCases", tokenizationCornerTestCases},
-		{"timestamps", getTimestampTests(t)},
-		{"realisticTestCases", tokenizationRealisticTestCases},
-	}
-
-	for i, pack := range packs {
-		pack := pack
-		t.Run(fmt.Sprintf("test-pack-%d-%s", i, pack.name), func(t *testing.T) {
-			for j, tc := range pack.cases {
-				tc := tc
-				t.Run(fmt.Sprintf("case-%d", j), func(t *testing.T) {
-					result := PreprocessAndTokenize([]byte(tc.line))
-					assert.Equal(
-						t, tc.expResult, result,
-						fmt.Sprintf("Log line: %q\nActual result slice: %#v", tc.line, result),
-					)
-				})
-			}
-		})
-	}
-}
-
-func TestTokenizationMemcpy(t *testing.T) {
-	buf := make([]byte, 0, 1000)
-	for i := 0; i < 300; i++ {
-		buf = append(buf, 'a', ' ')
-	}
-	tokenizer := tokenizer{rawLine: buf, maxTokens: 10}
-	tokenizer.tokenize()
-	require.Less(t, len(tokenizer.line), 100)
-}
-
-// Useful for running single test cases in isolation
-func TestTokenizationPlayground(t *testing.T) {
-	tc := tokenizationTestCase{
-		"foo 121113.21231 bar 123.0.1.123 -123 -1231.11",
-		[]string{"foo ", "<NUM> ", "bar ", "<IP> ", "<NUM> ", "<NUM>"},
-	}
-	result := PreprocessAndTokenize([]byte(tc.line))
-	assert.Equal(
-		t, tc.expResult, result,
-		fmt.Sprintf("Log line: %q\nActual result slice: %#v", tc.line, result),
-	)
-}
-
-func TestReplacementPlayground(t *testing.T) {
-	line := "Jul  1 00:21:28"
-	exp := "<TIMESTAMP>"
-	r := replacer{source: []byte(line)}
-	r.replaceWithPlaceholders()
-
-	assert.Equal(
-		t, exp, string(r.dest),
-		fmt.Sprintf("Log line: %q", line),
-	)
-}