Like Prometheus, but for logs.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 
loki/pkg/dataobj/internal/dataset/reader_basic.go

369 lines
11 KiB

package dataset
import (
"context"
"errors"
"fmt"
"io"
"iter"
"slices"
)
// basicReader is a low-level reader that reads rows from a set of columns.
//
// basicReader lazily reads pages from columns as they are iterated over; see
// [Reader] for a higher-level implementation that supports predicates and
// batching page downloads.
type basicReader struct {
columns []Column
readers []*columnReader
columnLookup map[Column]int // Index into columns and readers
buf []Value // Buffer for reading values from columns
nextRow int64
}
// newBasicReader returns a new basicReader that reads rows from the given set
// of columns.
func newBasicReader(set []Column) *basicReader {
var br basicReader
br.Reset(set)
return &br
}
// Read is a convenience wrapper around [basicReader.ReadColumns] that reads up
// to the next len(s) rows across the entire column set owned by [basicReader].
func (pr *basicReader) Read(ctx context.Context, s []Row) (n int, err error) {
return pr.ReadColumns(ctx, pr.columns, s)
}
// ReadColumns reads up to the next len(s) rows from a subset of columns and
// stores them into s. It returns the number of rows read and any error
// encountered. At the end of the column set used by basicReader, ReadColumns
// returns 0, [io.EOF].
//
// Row.Values will be populated with one element per column in the order of the
// overall column set owned by basicReader.
//
// After calling ReadColumns, additional columns in s can be filled using
// [basicReader.Fill].
func (pr *basicReader) ReadColumns(ctx context.Context, columns []Column, s []Row) (n int, err error) {
if len(columns) == 0 {
return 0, fmt.Errorf("no columns to read")
}
// The implementation of ReadColumns can be expressed as a fill from
// pr.nextRow to pr.nextRow + len(s).
//
// We initialize the row numbers of the entire slice of rows to simplicity,
// even if we only fill a subset of them.
for i := range s {
s[i].Index = int(pr.nextRow + int64(i))
}
n, err = pr.fill(ctx, columns, s)
pr.nextRow += int64(n)
return n, err
}
// Fill fills values for the given columns into the provided rows. It returns
// the number of rows filled and any error encountered.
//
// s must be initialized such that s[i].Index specifies which row to fill
// values for.
//
// s[i].Values will be populated with one element per column in the order of
// the column set provided to [newBasicReader] or [basicReader.Reset].
//
// This allows callers to use Fill to implement efficient filtering:
//
// 1. Fill is called with columns to use for filtering.
// 2. The caller applies filtered on filled rows, removing any row that does
// not pass the filter.
// 3. The caller calls Fill again with the remaining columns.
//
// Fill is most efficient when calls to Fill move each column in columns
// forward: that is, each filled row is in sorted order with no repeats across
// calls.
//
// Fill does not advance the offset of the basicReader.
func (pr *basicReader) Fill(ctx context.Context, columns []Column, s []Row) (n int, err error) {
if len(columns) == 0 {
return 0, fmt.Errorf("no columns to fill")
}
for partition := range partitionRows(s) {
pn, err := pr.fill(ctx, columns, partition)
n += pn
if err != nil {
return n, err
} else if pn == 0 {
break
}
}
return n, nil
}
// partitionRows returns an iterator over a slice of rows that partitions the
// slice into groups of consecutive, non-repeating row incidices. Gaps between
// rows are treated as two different partitions.
func partitionRows(s []Row) iter.Seq[[]Row] {
return func(yield func([]Row) bool) {
if len(s) == 0 {
return
}
start := 0
for i := 1; i < len(s); i++ {
if s[i].Index != s[i-1].Index+1 {
if !yield(s[start:i]) {
return
}
start = i
}
}
yield(s[start:])
}
}
// fill implements fill for a single slice of rows that are consecutive and
// have no gaps between them.
func (pr *basicReader) fill(ctx context.Context, columns []Column, s []Row) (n int, err error) {
if len(s) == 0 {
return 0, nil
}
pr.buf = slices.Grow(pr.buf, len(s))
pr.buf = pr.buf[:len(s)]
startRow := int64(s[0].Index)
// Ensure that each Row.Values slice has enough capacity to store all values.
for i := range s {
s[i].Values = slices.Grow(s[i].Values, len(pr.columns))
s[i].Values = s[i].Values[:len(pr.columns)]
}
for n < len(s) {
var (
// maxRead tracks the maximum number of read rows across all columns. This
// is required because columns are not guaranteed to have the same number
// of rows, and we want to advance startRow by the maximum number of rows
// read.
maxRead int
// atEOF is true if all columns report EOF. We default to true and set it
// to false if any column returns a non-EOF error.
atEOF = true
)
for _, column := range columns {
columnIndex, ok := pr.columnLookup[column]
if !ok {
return n, fmt.Errorf("column %v is not owned by basicReader", column)
}
// We want to allow readers to reuse memory of [Value]s in s while
// allowing the caller to retain ownership over that memory; to do this
// safely, we copy memory from s into pr.buf (for the given column index)
// for our decoders to use.
//
// If we didn't do this, then memory backing [Value]s are owned by both
// basicReader and the caller, which can lead to memory reuse bugs.
pr.buf = reuseRowsBuffer(pr.buf, s[n:], columnIndex)
r := pr.readers[columnIndex]
_, err := r.Seek(startRow, io.SeekStart)
if err != nil {
return n, fmt.Errorf("seeking to row %d in column %d: %w", startRow, columnIndex, err)
}
cn, err := r.Read(ctx, pr.buf[:len(s)-n])
if err != nil && !errors.Is(err, io.EOF) {
// If reading a column fails, we return immediately without advancing
// our row offset for this batch. This retains the state of the reader
// and ensures that every call to Read reads every column.
//
// However, callers that choose to retry failed reads will suffer
// performance penalties: all columns up to and including the failing
// column will seek backwards to startRow, which requires starting
// over from the top of a page.
return n, fmt.Errorf("reading column %d: %w", columnIndex, err)
} else if err == nil {
atEOF = false
}
maxRead = max(maxRead, cn)
for i := range cn {
s[n+i].Values[columnIndex] = pr.buf[i]
}
}
// We check for atEOF here instead of maxRead == 0 to preserve the pattern
// of io.Reader: readers may return 0, nil even when they're not at EOF.
if maxRead == 0 && atEOF {
return n, io.EOF
}
// Some columns may have read fewer rows than maxRead. These columns need
// to fill in the remainder of the rows (up to maxRead) with NULL values;
// otherwise there may be non-NULL values from a previous call to Read that
// would give corrupted results.
for _, column := range columns {
columnIndex := pr.columnLookup[column]
r := pr.readers[columnIndex]
columnRow, err := r.Seek(0, io.SeekCurrent)
if err != nil {
// The seek call above can never fail. However, if it does, it
// indicates corrupted data if any column read less than maxRead. We
// can't recover from this state, so we panic.
panic(fmt.Sprintf("seeking to current row in column %d: %v", pr.columnLookup[column], err))
}
columnRead := columnRow - startRow
for i := columnRead; i < int64(maxRead); i++ {
s[n+int(i)].Values[columnIndex] = Value{}
}
}
n += maxRead
startRow += int64(maxRead)
}
return n, nil
}
// reuseValuesBuffer prepares dst for reading up to len(src) values. Non-NULL
// values are appended to dst, with the remainder of the slice set to NULL.
//
// The resulting slice is len(src).
func reuseRowsBuffer(dst []Value, src []Row, columnIndex int) []Value {
dst = slices.Grow(dst, len(src))
dst = dst[:0]
for _, row := range src {
if columnIndex >= len(row.Values) {
continue
}
value := row.Values[columnIndex]
if value.IsNil() {
continue
}
dst = append(dst, value)
}
filledLength := len(dst)
dst = dst[:len(src)]
clear(dst[filledLength:])
return dst
}
// Seek sets the row offset for the next Read call, interpreted according to
// whence:
//
// - [io.SeekStart] seeks relative to the start of the column set,
// - [io.SeekCurrent] seeks relative to the current offset, and
// - [io.SeekEnd] seeks relative to the end (for example, offset = -2
// specifies the penultimate row of the column set).
//
// Seek returns the new offset relative to the start of the column set or an
// error, if any.
//
// To retrieve the current offset without modification, call Seek with 0 and
// [io.SeekCurrent].
//
// Seeking to an offset before the start of the column set is an error. Seeking
// to beyond the end of the column set will cause the next Read or ReadColumns
// to return io.EOF.
func (pr *basicReader) Seek(offset int64, whence int) (int64, error) {
switch whence {
case io.SeekStart:
if offset < 0 {
return 0, errors.New("invalid offset")
}
pr.nextRow = offset
case io.SeekCurrent:
if pr.nextRow+offset < 0 {
return 0, errors.New("invalid offset")
}
pr.nextRow += offset
case io.SeekEnd:
lastRow := int64(pr.maxRows())
if lastRow+offset < 0 {
return 0, errors.New("invalid offset")
}
pr.nextRow = lastRow + offset
default:
return 0, fmt.Errorf("invalid whence value %d", whence)
}
return pr.nextRow, nil
}
// maxRows returns the total number of rows across the column set, determined
// by the column with the most rows.
func (pr *basicReader) maxRows() int {
var rows int
for _, c := range pr.columns {
rows = max(rows, c.ColumnInfo().RowsCount)
}
return rows
}
// Reset resets the basicReader to read from the start of the provided columns.
// This permits reusing a basicReader rather than allocating a new one.
func (pr *basicReader) Reset(columns []Column) {
if pr.columnLookup == nil {
pr.columnLookup = make(map[Column]int, len(columns))
} else {
clear(pr.columnLookup)
}
// Reset existing readers, which takes the place of otherwise closing
// existing ones.
pr.columns = columns
for i := 0; i < len(pr.readers) && i < len(columns); i++ {
pr.readers[i].Reset(columns[i])
pr.columnLookup[columns[i]] = i
}
// Create new readers for any additional columns.
for i := len(pr.readers); i < len(columns); i++ {
pr.readers = append(pr.readers, newColumnReader(columns[i]))
pr.columnLookup[columns[i]] = i
}
// Close and clear out remaining readers. This needs to clear beyond the
// final length of the pr.readers slice (up to its full capacity) so elements
// beyond the length can be garbage collected.
pr.readers = pr.readers[:len(columns)]
closeAndClear(pr.readers[len(columns):cap(pr.readers)])
pr.nextRow = 0
}
func closeAndClear(r []*columnReader) {
for _, c := range r {
if c != nil {
c.Close()
}
}
clear(r)
}
// Close closes the basicReader. Closed basicReaders can be reused by calling
// [basicReader.Reset].
func (pr *basicReader) Close() error {
for _, r := range pr.readers {
if err := r.Close(); err != nil {
return err
}
}
return nil
}