Like Prometheus, but for logs.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 
loki/pkg/engine/executor/pipeline_test.go

182 lines
4.6 KiB

package executor
import (
"errors"
"strings"
"testing"
"github.com/apache/arrow-go/v18/arrow"
"github.com/apache/arrow-go/v18/arrow/csv"
"github.com/apache/arrow-go/v18/arrow/memory"
"github.com/stretchr/testify/require"
)
// CSVToArrow converts a CSV string to an Arrow record based on the provided schema.
// It uses the Arrow CSV reader for parsing.
func CSVToArrow(fields []arrow.Field, csvData string) (arrow.Record, error) {
return CSVToArrowWithAllocator(memory.NewGoAllocator(), fields, csvData)
}
// CSVToArrowWithAllocator converts a CSV string to an Arrow record based on the provided schema
// using the specified memory allocator. It reads all rows from the CSV into a single record.
func CSVToArrowWithAllocator(allocator memory.Allocator, fields []arrow.Field, csvData string) (arrow.Record, error) {
// Create schema
schema := arrow.NewSchema(fields, nil)
// Set up CSV reader with stringified data
input := strings.NewReader(csvData)
reader := csv.NewReader(
input,
schema,
csv.WithAllocator(allocator),
csv.WithNullReader(true),
csv.WithComma(','),
csv.WithChunk(-1), // Read all rows
)
defer reader.Release()
if !reader.Next() {
return nil, errors.New("failed to read CSV data")
}
return reader.Record(), nil
}
// BufferedPipeline is a pipeline implementation that reads from a fixed set of Arrow records.
// It implements the Pipeline interface and serves as a simple source for testing and data injection.
type BufferedPipeline struct {
records []arrow.Record
current int
state state
}
// NewBufferedPipeline creates a new BufferedPipeline from a set of Arrow records.
// The pipeline will return these records in sequence.
func NewBufferedPipeline(records ...arrow.Record) *BufferedPipeline {
for _, rec := range records {
if rec != nil {
rec.Retain()
}
}
return &BufferedPipeline{
records: records,
current: -1, // Start before the first record
state: failureState(EOF),
}
}
// Read implements Pipeline.
// It advances to the next record and returns EOF when all records have been read.
func (p *BufferedPipeline) Read() error {
// Release previous record if it exists
if p.state.batch != nil {
p.state.batch.Release()
}
p.current++
if p.current >= len(p.records) {
p.state = failureState(EOF)
return EOF
}
p.state = successState(p.records[p.current])
return nil
}
// Value implements Pipeline.
// It returns the current record and error state.
func (p *BufferedPipeline) Value() (arrow.Record, error) {
return p.state.Value()
}
// Close implements Pipeline.
// It releases all records being held.
func (p *BufferedPipeline) Close() {
for _, rec := range p.records {
if rec != nil {
rec.Release()
}
}
if p.state.batch != nil {
p.state.batch.Release()
p.state.batch = nil
}
p.records = nil
}
// Inputs implements Pipeline.
// CSV pipeline is a source, so it has no inputs.
func (p *BufferedPipeline) Inputs() []Pipeline {
return nil
}
// Transport implements Pipeline.
// CSVPipeline is always considered a Local transport.
func (p *BufferedPipeline) Transport() Transport {
return Local
}
func TestCSVPipeline(t *testing.T) {
// Define test schema
fields := []arrow.Field{
{Name: "name", Type: arrow.BinaryTypes.String},
{Name: "age", Type: arrow.PrimitiveTypes.Int32},
}
schema := arrow.NewSchema(fields, nil)
// Create test data
csvData1 := "Alice,30\nBob,25"
csvData2 := "Charlie,35\nDave,40"
// Convert to Arrow records
record1, err := CSVToArrow(fields, csvData1)
require.NoError(t, err)
defer record1.Release()
record2, err := CSVToArrow(fields, csvData2)
require.NoError(t, err)
defer record2.Release()
// Create a CSVPipeline with the test records
pipeline := NewBufferedPipeline(record1, record2)
defer pipeline.Close()
// Test pipeline behavior
t.Run("should have correct transport type", func(t *testing.T) {
require.Equal(t, Local, pipeline.Transport())
})
t.Run("should have no inputs", func(t *testing.T) {
require.Empty(t, pipeline.Inputs())
})
t.Run("should return records in order", func(t *testing.T) {
// First read should return the first record
err := pipeline.Read()
require.NoError(t, err)
batch, err := pipeline.Value()
require.NoError(t, err)
require.Equal(t, record1.NumRows(), batch.NumRows())
require.Equal(t, schema, batch.Schema())
// Second read should return the second record
err = pipeline.Read()
require.NoError(t, err)
batch, err = pipeline.Value()
require.NoError(t, err)
require.Equal(t, record2.NumRows(), batch.NumRows())
require.Equal(t, schema, batch.Schema())
// Third read should return EOF
err = pipeline.Read()
require.Equal(t, EOF, err)
_, err = pipeline.Value()
require.Equal(t, EOF, err)
})
}