package executor import ( "fmt" "strings" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/array" "github.com/cespare/xxhash/v2" "github.com/grafana/loki/v3/pkg/engine/internal/semconv" ) var ( separator = []byte{0} ) // columnForIdent returns the column ([arrow.Array]) and its column index in the schema of the given input batch ([arrow.RecordBatch]). // It returns an optional error in case the column with the fully qualified name of the identifier could not be found, // or there are where multiple columns with the same name in the schema. // In case of an error, the returned column index is -1. func columnForIdent(ident *semconv.Identifier, batch arrow.RecordBatch) (arrow.Array, int, error) { return columnForFQN(ident.FQN(), batch) } func columnForFQN(fqn string, batch arrow.RecordBatch) (arrow.Array, int, error) { indices := batch.Schema().FieldIndices(fqn) if len(indices) == 0 { return nil, -1, fmt.Errorf("column not found for %s", fqn) } if len(indices) > 1 { return nil, -1, fmt.Errorf("multiple columns found for %s", fqn) } return batch.Column(indices[0]), indices[0], nil } // labelValuesCache returns label values for a given row in range and vector aggregators, but cache them in order // to reduce object allocations for repeated label sets. Strings are copied to avoid referencing the Arrow data buffer. type labelValuesCache struct { digest *xxhash.Digest cache map[uint64][]string } func newLabelValuesCache() *labelValuesCache { return &labelValuesCache{ digest: xxhash.New(), cache: make(map[uint64][]string), } } func (c *labelValuesCache) getLabelValues(arrays []*array.String, row int) []string { // Compute an xxhash over the non-null label values to form the cache key. c.digest.Reset() for _, arr := range arrays { if !arr.IsNull(row) { _, _ = c.digest.Write(separator) _, _ = c.digest.WriteString(arr.Value(row)) } } key := c.digest.Sum64() labelValues, ok := c.cache[key] // In case of a cache miss, scan the row again and copy the label values. if !ok { labelValues = make([]string, 0, len(arrays)) for _, arr := range arrays { if !arr.IsNull(row) { labelValues = append(labelValues, strings.Clone(arr.Value(row))) } } c.cache[key] = labelValues } return labelValues } // fieldsCache returns labels for a given row in range and vector aggregators, but cache them in order // to reduce object allocations for repeated label sets. It first scans the row for non-null labels and computes xxhash. // In case of a cache miss it scans the row again and allocates arrays for label names. type fieldsCache struct { digest *xxhash.Digest cache map[uint64][]arrow.Field } func newFieldsCache() *fieldsCache { return &fieldsCache{ digest: xxhash.New(), cache: make(map[uint64][]arrow.Field), } } func (c *fieldsCache) getFields(arrays []*array.String, fields []arrow.Field, row int) []arrow.Field { c.digest.Reset() for i, arr := range arrays { if !arr.IsNull(row) { _, _ = c.digest.Write(separator) _, _ = c.digest.WriteString(fields[i].Name) } } key := c.digest.Sum64() labels, ok := c.cache[key] if !ok { labels = make([]arrow.Field, 0, len(arrays)) for i, arr := range arrays { if !arr.IsNull(row) { labels = append(labels, fields[i]) } } c.cache[key] = labels } return labels }