Like Prometheus, but for logs.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 
loki/pkg/engine/executor/parse_logfmt.go

158 lines
4.7 KiB

package executor
import (
"sort"
"unsafe"
"github.com/apache/arrow-go/v18/arrow"
"github.com/apache/arrow-go/v18/arrow/array"
"github.com/apache/arrow-go/v18/arrow/memory"
"github.com/grafana/loki/v3/pkg/engine/internal/types"
"github.com/grafana/loki/v3/pkg/logql/log/logfmt"
)
// BuildLogfmtColumns builds Arrow columns from logfmt input lines
// Returns the column headers, the Arrow columns, and any error
func BuildLogfmtColumns(input *array.String, requestedKeys []string, allocator memory.Allocator) ([]string, []arrow.Array) {
columnBuilders := make(map[string]*array.StringBuilder)
columnOrder := parseKeys(input, requestedKeys, columnBuilders, allocator)
// Build final arrays
columns := make([]arrow.Array, 0, len(columnOrder))
headers := make([]string, 0, len(columnOrder))
for _, key := range columnOrder {
builder := columnBuilders[key]
columns = append(columns, builder.NewArray())
headers = append(headers, key)
builder.Release()
}
return headers, columns
}
// parseKeys discovers columns dynamically as lines are parsed
func parseKeys(input *array.String, requestedKeys []string, columnBuilders map[string]*array.StringBuilder, allocator memory.Allocator) []string {
columnOrder := []string{}
var errorBuilder, errorDetailsBuilder *array.StringBuilder
hasErrorColumns := false
for i := 0; i < input.Len(); i++ {
line := input.Value(i)
parsed, err := tokenizeLogfmt(line, requestedKeys)
// Handle error columns
if err != nil {
// Create error columns on first error
if !hasErrorColumns {
errorBuilder = array.NewStringBuilder(allocator)
errorDetailsBuilder = array.NewStringBuilder(allocator)
columnBuilders[types.ColumnNameParsedError] = errorBuilder
columnBuilders[types.ColumnNameParsedErrorDetails] = errorDetailsBuilder
columnOrder = append(columnOrder, types.ColumnNameParsedError, types.ColumnNameParsedErrorDetails)
hasErrorColumns = true
// Backfill NULLs for previous rows
for j := 0; j < i; j++ {
errorBuilder.AppendNull()
errorDetailsBuilder.AppendNull()
}
}
// Append error values
errorBuilder.Append(types.LogfmtParserErrorType)
errorDetailsBuilder.Append(err.Error())
} else if hasErrorColumns {
// No error on this row, but we have error columns
errorBuilder.AppendNull()
errorDetailsBuilder.AppendNull()
}
// Track which keys we've seen this row
seenKeys := make(map[string]bool)
if hasErrorColumns {
// Mark error columns as seen so we don't append nulls for them
seenKeys[types.ColumnNameParsedError] = true
seenKeys[types.ColumnNameParsedErrorDetails] = true
}
// Add values for parsed keys
for key, value := range parsed {
seenKeys[key] = true
builder, exists := columnBuilders[key]
if !exists {
// New column discovered - create and backfill
builder = array.NewStringBuilder(allocator)
columnBuilders[key] = builder
columnOrder = append(columnOrder, key)
// Backfill NULLs for previous rows
builder.AppendNulls(i)
}
builder.Append(value)
}
// Append NULLs for columns not in this row
for _, key := range columnOrder {
if !seenKeys[key] {
columnBuilders[key].AppendNull()
}
}
}
// Sort column order for consistency
sort.Strings(columnOrder)
return columnOrder
}
// tokenizeLogfmt parses logfmt input using the standard decoder
// Returns a map of key-value pairs with last-wins semantics for duplicates
// If requestedKeys is provided, the result will be filtered to only include those keys
func tokenizeLogfmt(input string, requestedKeys []string) (map[string]string, error) {
result := make(map[string]string)
var requestedKeyLookup map[string]struct{}
if len(requestedKeys) > 0 {
requestedKeyLookup = make(map[string]struct{}, len(requestedKeys))
for _, key := range requestedKeys {
requestedKeyLookup[key] = struct{}{}
}
}
decoder := logfmt.NewDecoder(unsafeBytes(input))
for !decoder.EOL() && decoder.ScanKeyval() {
key := unsafeString(decoder.Key())
if requestedKeyLookup != nil {
if _, wantKey := requestedKeyLookup[key]; !wantKey {
continue
}
}
val := decoder.Value()
if len(val) == 0 {
//TODO: retain empty values if --keep-empty is set
continue
}
// Last-wins semantics for duplicates
result[key] = unsafeString(decoder.Value())
}
// Check for parsing errors
if err := decoder.Err(); err != nil {
return result, err
}
return result, nil
}
// unsafeBytes converts a string to []byte without allocation
func unsafeBytes(s string) []byte {
return unsafe.Slice(unsafe.StringData(s), len(s))
}
// unsafeString converts a []byte to string without allocation
func unsafeString(b []byte) string {
return unsafe.String(unsafe.SliceData(b), len(b))
}