mirror of https://github.com/grafana/loki
chore(dataobj): column building (#15634)
This commit adds the ability to accumulate sequences of dataset.Value into a column, which is split up across multiple pages. Each page is broken down into two parts: * A bitmap-encoded sequence of booleans, where 1 indicates a row has a value and 0 indicates the row is NULL, and * the encoded sequence of non-NULL values, whose encoding is determined by the column options. The sequence of non-NULL values is then optionally compressed. This commit also includes initial support for reading these columns, starting with internal-only helper utilities for unit tests.pull/15652/head
parent
672f91c3f9
commit
f2fc0c26c2
@ -0,0 +1,25 @@ |
||||
package dataset |
||||
|
||||
import "github.com/grafana/loki/v3/pkg/dataobj/internal/metadata/datasetmd" |
||||
|
||||
// Helper types.
|
||||
type ( |
||||
// ColumnInfo describes a column.
|
||||
ColumnInfo struct { |
||||
Name string // Name of the column, if any.
|
||||
Type datasetmd.ValueType // Type of values in the column.
|
||||
Compression datasetmd.CompressionType // Compression used for the column.
|
||||
|
||||
RowsCount int // Total number of rows in the column.
|
||||
CompressedSize int // Total size of all pages in the column after compression.
|
||||
UncompressedSize int // Total size of all pages in the column before compression.
|
||||
|
||||
Statistics *datasetmd.Statistics // Optional statistics for the column.
|
||||
} |
||||
) |
||||
|
||||
// MemColumn holds a set of pages of a common type.
|
||||
type MemColumn struct { |
||||
Info ColumnInfo // Information about the column.
|
||||
Pages []*MemPage // The set of pages in the column.
|
||||
} |
||||
@ -0,0 +1,173 @@ |
||||
package dataset |
||||
|
||||
import ( |
||||
"fmt" |
||||
|
||||
"github.com/grafana/loki/v3/pkg/dataobj/internal/metadata/datasetmd" |
||||
) |
||||
|
||||
// BuilderOptions configures common settings for building pages.
|
||||
type BuilderOptions struct { |
||||
// PageSizeHint is the soft limit for the size of the page. Builders try to
|
||||
// fill pages as close to this size as possible, but the actual size may be
|
||||
// slightly larger or smaller.
|
||||
PageSizeHint int |
||||
|
||||
// Value is the value type of data to write.
|
||||
Value datasetmd.ValueType |
||||
|
||||
// Encoding is the encoding algorithm to use for values.
|
||||
Encoding datasetmd.EncodingType |
||||
|
||||
// Compression is the compression algorithm to use for values.
|
||||
Compression datasetmd.CompressionType |
||||
} |
||||
|
||||
// A ColumnBuilder builds a sequence of [Value] entries of a common type into a
|
||||
// column. Values are accumulated into a buffer and then flushed into
|
||||
// [MemPage]s once the size of data exceeds a configurable limit.
|
||||
type ColumnBuilder struct { |
||||
name string |
||||
opts BuilderOptions |
||||
|
||||
rows int // Total number of rows in the column.
|
||||
|
||||
pages []*MemPage |
||||
builder *pageBuilder |
||||
} |
||||
|
||||
// NewColumnBuilder creates a new ColumnBuilder from the optional name and
|
||||
// provided options. NewColumnBuilder returns an error if the options are
|
||||
// invalid.
|
||||
func NewColumnBuilder(name string, opts BuilderOptions) (*ColumnBuilder, error) { |
||||
builder, err := newPageBuilder(opts) |
||||
if err != nil { |
||||
return nil, fmt.Errorf("creating page builder: %w", err) |
||||
} |
||||
|
||||
return &ColumnBuilder{ |
||||
name: name, |
||||
opts: opts, |
||||
|
||||
builder: builder, |
||||
}, nil |
||||
} |
||||
|
||||
// Append adds a new value into cb with the given zero-indexed row number. If
|
||||
// the row number is higher than the current number of rows in cb, null values
|
||||
// are added up to the new row.
|
||||
//
|
||||
// Append returns an error if the row number is out-of-order.
|
||||
func (cb *ColumnBuilder) Append(row int, value Value) error { |
||||
if row < cb.rows { |
||||
return fmt.Errorf("row %d is older than current row %d", row, cb.rows) |
||||
} |
||||
|
||||
// We give two attempts to append the data to the buffer; if the buffer is
|
||||
// full, we cut a page and then append to the newly reset buffer.
|
||||
//
|
||||
// The second iteration should never fail, as the buffer will always be empty
|
||||
// then.
|
||||
for range 2 { |
||||
if cb.append(row, value) { |
||||
cb.rows = row + 1 |
||||
return nil |
||||
} |
||||
|
||||
cb.flushPage() |
||||
} |
||||
|
||||
panic("ColumnBuilder.Append: failed to append value to fresh buffer") |
||||
} |
||||
|
||||
// Backfill adds NULLs into cb up to (but not including) the provided row
|
||||
// number. If values exist up to the provided row number, Backfill does
|
||||
// nothing.
|
||||
func (cb *ColumnBuilder) Backfill(row int) { |
||||
// We give two attempts to append the data to the buffer; if the buffer is
|
||||
// full, we cut a page and then append again to the newly reset buffer.
|
||||
//
|
||||
// The second iteration should never fail, as the buffer will always be
|
||||
// empty.
|
||||
for range 2 { |
||||
if cb.backfill(row) { |
||||
return |
||||
} |
||||
cb.flushPage() |
||||
} |
||||
|
||||
panic("ColumnBuilder.Backfill: failed to backfill buffer") |
||||
} |
||||
|
||||
func (cb *ColumnBuilder) backfill(row int) bool { |
||||
for row > cb.rows { |
||||
if !cb.builder.AppendNull() { |
||||
return false |
||||
} |
||||
cb.rows++ |
||||
} |
||||
|
||||
return true |
||||
} |
||||
|
||||
func (cb *ColumnBuilder) append(row int, value Value) bool { |
||||
// Backfill up to row.
|
||||
if !cb.backfill(row) { |
||||
return false |
||||
} |
||||
return cb.builder.Append(value) |
||||
} |
||||
|
||||
// Flush converts data in cb into a [MemColumn]. Afterwards, cb is reset to a
|
||||
// fresh state and can be reused.
|
||||
func (cb *ColumnBuilder) Flush() (*MemColumn, error) { |
||||
cb.flushPage() |
||||
|
||||
info := ColumnInfo{ |
||||
Name: cb.name, |
||||
Type: cb.opts.Value, |
||||
|
||||
Compression: cb.opts.Compression, |
||||
} |
||||
|
||||
// TODO(rfratto): Should we compute column-wide statistics if they're
|
||||
// available in pages?
|
||||
//
|
||||
// That would potentially work for min/max values, but not for count
|
||||
// distinct, unless we had a way to pass sketches around.
|
||||
|
||||
for _, page := range cb.pages { |
||||
info.RowsCount += page.Info.RowCount |
||||
info.CompressedSize += page.Info.CompressedSize |
||||
info.UncompressedSize += page.Info.UncompressedSize |
||||
} |
||||
|
||||
column := &MemColumn{ |
||||
Info: info, |
||||
Pages: cb.pages, |
||||
} |
||||
|
||||
cb.Reset() |
||||
return column, nil |
||||
} |
||||
|
||||
func (cb *ColumnBuilder) flushPage() { |
||||
if cb.builder.Rows() == 0 { |
||||
return |
||||
} |
||||
|
||||
page, err := cb.builder.Flush() |
||||
if err != nil { |
||||
// Flush should only return an error when it's empty, which we already
|
||||
// ensure it's not in the lines above.
|
||||
panic(fmt.Sprintf("failed to flush page: %s", err)) |
||||
} |
||||
cb.pages = append(cb.pages, page) |
||||
} |
||||
|
||||
// Reset clears all data in cb and resets it to a fresh state.
|
||||
func (cb *ColumnBuilder) Reset() { |
||||
cb.rows = 0 |
||||
cb.pages = nil |
||||
cb.builder.Reset() |
||||
} |
||||
@ -0,0 +1,20 @@ |
||||
package dataset |
||||
|
||||
import "github.com/grafana/loki/v3/pkg/dataobj/internal/result" |
||||
|
||||
func iterMemColumn(col *MemColumn) result.Seq[Value] { |
||||
return result.Iter(func(yield func(Value) bool) error { |
||||
for _, page := range col.Pages { |
||||
for result := range iterMemPage(page, col.Info.Type, col.Info.Compression) { |
||||
val, err := result.Value() |
||||
if err != nil { |
||||
return err |
||||
} else if !yield(val) { |
||||
return nil |
||||
} |
||||
} |
||||
} |
||||
|
||||
return nil |
||||
}) |
||||
} |
||||
@ -0,0 +1,61 @@ |
||||
package dataset |
||||
|
||||
import ( |
||||
"testing" |
||||
|
||||
"github.com/stretchr/testify/require" |
||||
|
||||
"github.com/grafana/loki/v3/pkg/dataobj/internal/metadata/datasetmd" |
||||
) |
||||
|
||||
func TestColumnBuilder_ReadWrite(t *testing.T) { |
||||
in := []string{ |
||||
"hello, world!", |
||||
"", |
||||
"this is a test of the emergency broadcast system", |
||||
"this is only a test", |
||||
"if this were a real emergency, you would be instructed to panic", |
||||
"but it's not, so don't", |
||||
"", |
||||
"this concludes the test", |
||||
"thank you for your cooperation", |
||||
"goodbye", |
||||
} |
||||
|
||||
opts := BuilderOptions{ |
||||
// Set the size to 0 so each column has exactly one value.
|
||||
PageSizeHint: 0, |
||||
Value: datasetmd.VALUE_TYPE_STRING, |
||||
Compression: datasetmd.COMPRESSION_TYPE_ZSTD, |
||||
Encoding: datasetmd.ENCODING_TYPE_PLAIN, |
||||
} |
||||
b, err := NewColumnBuilder("", opts) |
||||
require.NoError(t, err) |
||||
|
||||
for i, s := range in { |
||||
require.NoError(t, b.Append(i, StringValue(s))) |
||||
} |
||||
|
||||
col, err := b.Flush() |
||||
require.NoError(t, err) |
||||
require.Equal(t, datasetmd.VALUE_TYPE_STRING, col.Info.Type) |
||||
require.Greater(t, len(col.Pages), 1) |
||||
|
||||
t.Log("Uncompressed size: ", col.Info.UncompressedSize) |
||||
t.Log("Compressed size: ", col.Info.CompressedSize) |
||||
t.Log("Pages: ", len(col.Pages)) |
||||
|
||||
var actual []string |
||||
for result := range iterMemColumn(col) { |
||||
val, err := result.Value() |
||||
require.NoError(t, err) |
||||
|
||||
if val.IsNil() || val.IsZero() { |
||||
actual = append(actual, "") |
||||
} else { |
||||
require.Equal(t, datasetmd.VALUE_TYPE_STRING, val.Type()) |
||||
actual = append(actual, val.String()) |
||||
} |
||||
} |
||||
require.Equal(t, in, actual) |
||||
} |
||||
@ -0,0 +1,99 @@ |
||||
package dataset |
||||
|
||||
import ( |
||||
"bytes" |
||||
"encoding/binary" |
||||
"fmt" |
||||
"hash/crc32" |
||||
"io" |
||||
|
||||
"github.com/golang/snappy" |
||||
"github.com/klauspost/compress/zstd" |
||||
|
||||
"github.com/grafana/loki/v3/pkg/dataobj/internal/metadata/datasetmd" |
||||
) |
||||
|
||||
// Helper types.
|
||||
type ( |
||||
// PageData holds the raw data for a page. Data is formatted as:
|
||||
//
|
||||
// <uvarint(presence-bitmap-size)> <presence-bitmap> <values-data>
|
||||
//
|
||||
// The presence-bitmap is a bitmap-encoded sequence of booleans, where values
|
||||
// describe which rows are present (1) or nil (0). The presence bitmap is
|
||||
// always stored uncompressed.
|
||||
//
|
||||
// values-data is then the encoded and optionally compressed sequence of
|
||||
// non-NULL values.
|
||||
PageData []byte |
||||
|
||||
// PageInfo describes a page.
|
||||
PageInfo struct { |
||||
UncompressedSize int // UncompressedSize is the size of a page before compression.
|
||||
CompressedSize int // CompressedSize is the size of a page after compression.
|
||||
CRC32 uint32 // CRC32 checksum of the page after encoding and compression.
|
||||
RowCount int // RowCount is the number of rows in the page, including NULLs.
|
||||
|
||||
Encoding datasetmd.EncodingType // Encoding used for values in the page.
|
||||
Stats *datasetmd.Statistics // Optional statistics for the page.
|
||||
} |
||||
) |
||||
|
||||
// MemPage holds an encoded (and optionally compressed) sequence of [Value]
|
||||
// entries of a common type. Use [ColumnBuilder] to construct sets of pages.
|
||||
type MemPage struct { |
||||
Info PageInfo // Information about the page.
|
||||
Data PageData // Data for the page.
|
||||
} |
||||
|
||||
var checksumTable = crc32.MakeTable(crc32.Castagnoli) |
||||
|
||||
// reader returns a reader for decompressed page data. Reader returns an error
|
||||
// if the CRC32 fails to validate.
|
||||
func (p *MemPage) reader(compression datasetmd.CompressionType) (presence io.Reader, values io.ReadCloser, err error) { |
||||
if actual := crc32.Checksum(p.Data, checksumTable); p.Info.CRC32 != actual { |
||||
return nil, nil, fmt.Errorf("invalid CRC32 checksum %x, expected %x", actual, p.Info.CRC32) |
||||
} |
||||
|
||||
bitmapSize, n := binary.Uvarint(p.Data) |
||||
if n <= 0 { |
||||
return nil, nil, fmt.Errorf("reading presence bitmap size: %w", err) |
||||
} |
||||
|
||||
var ( |
||||
bitmapReader = bytes.NewReader(p.Data[n : n+int(bitmapSize)]) |
||||
compressedDataReader = bytes.NewReader(p.Data[n+int(bitmapSize):]) |
||||
) |
||||
|
||||
switch compression { |
||||
case datasetmd.COMPRESSION_TYPE_UNSPECIFIED, datasetmd.COMPRESSION_TYPE_NONE: |
||||
return bitmapReader, io.NopCloser(compressedDataReader), nil |
||||
|
||||
case datasetmd.COMPRESSION_TYPE_SNAPPY: |
||||
sr := snappy.NewReader(compressedDataReader) |
||||
return bitmapReader, io.NopCloser(sr), nil |
||||
|
||||
case datasetmd.COMPRESSION_TYPE_ZSTD: |
||||
zr, err := zstd.NewReader(compressedDataReader) |
||||
if err != nil { |
||||
return nil, nil, fmt.Errorf("opening zstd reader: %w", err) |
||||
} |
||||
return bitmapReader, newZstdReader(zr), nil |
||||
} |
||||
|
||||
panic(fmt.Sprintf("dataset.MemPage.reader: unknown compression type %q", compression.String())) |
||||
} |
||||
|
||||
// zstdReader implements [io.ReadCloser] for a [zstd.Decoder].
|
||||
type zstdReader struct{ *zstd.Decoder } |
||||
|
||||
// newZstdReader returns a new [io.ReadCloser] for a [zstd.Decoder].
|
||||
func newZstdReader(dec *zstd.Decoder) io.ReadCloser { |
||||
return &zstdReader{Decoder: dec} |
||||
} |
||||
|
||||
// Close implements [io.Closer].
|
||||
func (r *zstdReader) Close() error { |
||||
r.Decoder.Close() |
||||
return nil |
||||
} |
||||
@ -0,0 +1,240 @@ |
||||
package dataset |
||||
|
||||
import ( |
||||
"bytes" |
||||
"encoding/binary" |
||||
"fmt" |
||||
"hash/crc32" |
||||
|
||||
"github.com/grafana/loki/v3/pkg/dataobj/internal/metadata/datasetmd" |
||||
"github.com/grafana/loki/v3/pkg/dataobj/internal/streamio" |
||||
) |
||||
|
||||
// pageBuilder accumulates sequences of [Value] in memory until reaching a
|
||||
// configurable size limit. A [MemPage] can then be created from a PageBuiler
|
||||
// by calling [pageBuilder.Flush].
|
||||
type pageBuilder struct { |
||||
// Each pageBuilder writes two sets of data.
|
||||
//
|
||||
// The first set of data is a presence bitmap which tells readers which rows
|
||||
// are present. Use use 1 to indicate presence and 0 to indicate absence
|
||||
// (NULL). This bitmap always uses bitmap encoding regardless of the encoding
|
||||
// type used for values.
|
||||
//
|
||||
// The second set of data is the encoded set of non-NULL values. As an
|
||||
// optimization, the zero value is treated as NULL.
|
||||
//
|
||||
// The two sets of data are accmumulated into separate buffers, with the
|
||||
// presence bitmap being written uncompresed and the values being written
|
||||
// with the configured compression type, if any.
|
||||
//
|
||||
// To orchestrate building two sets of data, we have a few components:
|
||||
//
|
||||
// * The final buffers which hold encoded and potentially compressed data.
|
||||
// * The writer performing compression for values.
|
||||
// * The encoders that write values.
|
||||
|
||||
opts BuilderOptions |
||||
|
||||
presenceBuffer *bytes.Buffer // presenceBuffer holds the encoded presence bitmap.
|
||||
valuesBuffer *bytes.Buffer // valuesBuffer holds encoded and optionally compressed values.
|
||||
|
||||
valuesWriter *compressWriter // Compresses data and writes to valuesBuffer.
|
||||
|
||||
presenceEnc *bitmapEncoder |
||||
valuesEnc valueEncoder |
||||
|
||||
rows int // Number of rows appended to the builder.
|
||||
} |
||||
|
||||
// newPageBuilder creates a new pageBuilder that stores a sequence of [Value]s.
|
||||
// newPageBuilder returns an error if there is no encoder available for the
|
||||
// combination of opts.Value and opts.Encoding.
|
||||
func newPageBuilder(opts BuilderOptions) (*pageBuilder, error) { |
||||
var ( |
||||
presenceBuffer = bytes.NewBuffer(nil) |
||||
valuesBuffer = bytes.NewBuffer(make([]byte, 0, opts.PageSizeHint)) |
||||
|
||||
valuesWriter = newCompressWriter(valuesBuffer, opts.Compression) |
||||
) |
||||
|
||||
presenceEnc := newBitmapEncoder(presenceBuffer) |
||||
valuesEnc, ok := newValueEncoder(opts.Value, opts.Encoding, valuesWriter) |
||||
if !ok { |
||||
return nil, fmt.Errorf("no encoder available for %s/%s", opts.Value, opts.Encoding) |
||||
} |
||||
|
||||
return &pageBuilder{ |
||||
opts: opts, |
||||
|
||||
presenceBuffer: presenceBuffer, |
||||
valuesBuffer: valuesBuffer, |
||||
|
||||
valuesWriter: valuesWriter, |
||||
|
||||
presenceEnc: presenceEnc, |
||||
valuesEnc: valuesEnc, |
||||
}, nil |
||||
} |
||||
|
||||
// Append appends value into the pageBuilder. Append returns true if the data
|
||||
// was appended; false if the pageBuilder is full.
|
||||
func (b *pageBuilder) Append(value Value) bool { |
||||
if value.IsNil() || value.IsZero() { |
||||
return b.AppendNull() |
||||
} |
||||
|
||||
// We can't accurately know whether adding value would tip us over the page
|
||||
// size: we don't know the current state of the encoders and we don't know
|
||||
// for sure how much space value will fill.
|
||||
//
|
||||
// We use a rough estimate which will tend to overshoot the page size, making
|
||||
// sure we rarely go over.
|
||||
if sz := b.estimatedSize(); sz > 0 && sz+valueSize(value) > b.opts.PageSizeHint { |
||||
return false |
||||
} |
||||
|
||||
// The following calls won't fail; they only return errors when the
|
||||
// underlying writers fail, which ours cannot.
|
||||
if err := b.presenceEnc.Encode(Uint64Value(1)); err != nil { |
||||
panic(fmt.Sprintf("pageBuilder.Append: encoding presence bitmap entry: %v", err)) |
||||
} |
||||
if err := b.valuesEnc.Encode(value); err != nil { |
||||
panic(fmt.Sprintf("pageBuilder.Append: encoding value: %v", err)) |
||||
} |
||||
|
||||
b.rows++ |
||||
return true |
||||
} |
||||
|
||||
// AppendNull appends a NULL value to the Builder. AppendNull returns true if
|
||||
// the NULL was appended, or false if the Builder is full.
|
||||
func (b *pageBuilder) AppendNull() bool { |
||||
// See comment in Append for why we can only estimate the cost of appending a
|
||||
// value.
|
||||
//
|
||||
// Here we assume appending a NULL costs one byte, but in reality most NULLs
|
||||
// have no cost depending on the state of our bitmap encoder.
|
||||
if sz := b.estimatedSize(); sz > 0 && sz+1 > b.opts.PageSizeHint { |
||||
return false |
||||
} |
||||
|
||||
// The following call won't fail; it only returns an error when the
|
||||
// underlying writer fails, which ours cannot.
|
||||
if err := b.presenceEnc.Encode(Uint64Value(0)); err != nil { |
||||
panic(fmt.Sprintf("Builder.AppendNull: encoding presence bitmap entry: %v", err)) |
||||
} |
||||
|
||||
b.rows++ |
||||
return true |
||||
} |
||||
|
||||
func valueSize(v Value) int { |
||||
switch v.Type() { |
||||
case datasetmd.VALUE_TYPE_INT64: |
||||
// Assuming that int64s are written as varints.
|
||||
return streamio.VarintSize(v.Int64()) |
||||
|
||||
case datasetmd.VALUE_TYPE_UINT64: |
||||
// Assuming that uint64s are written as uvarints.
|
||||
return streamio.UvarintSize(v.Uint64()) |
||||
|
||||
case datasetmd.VALUE_TYPE_STRING: |
||||
// Assuming that strings are PLAIN encoded using their length and bytes.
|
||||
str := v.String() |
||||
return binary.Size(len(str)) + len(str) |
||||
} |
||||
|
||||
return 0 |
||||
} |
||||
|
||||
// estimatedSize returns the estimated uncompressed size of the builder in
|
||||
// bytes.
|
||||
func (b *pageBuilder) estimatedSize() int { |
||||
// This estimate doesn't account for any values in encoders which haven't
|
||||
// been flushed yet. However, encoder buffers are usually small enough that
|
||||
// we wouldn't massively overshoot our estimate.
|
||||
return b.presenceBuffer.Len() + b.valuesWriter.BytesWritten() |
||||
} |
||||
|
||||
// Rows returns the number of rows appended to the pageBuilder.
|
||||
func (b *pageBuilder) Rows() int { return b.rows } |
||||
|
||||
// Flush converts data in pageBuilder into a [MemPage], and returns it.
|
||||
// Afterwards, pageBuilder is reset to a fresh state and can be reused. Flush
|
||||
// returns an error if the pageBuilder is empty.
|
||||
//
|
||||
// To avoid computing useless stats, the Stats field of the returned Page is
|
||||
// unset. If stats are needed for a page, callers should compute them by
|
||||
// iterating over the returned Page.
|
||||
func (b *pageBuilder) Flush() (*MemPage, error) { |
||||
if b.rows == 0 { |
||||
return nil, fmt.Errorf("no data to flush") |
||||
} |
||||
|
||||
// Before we can build the page we need to finish flushing our encoders and writers.
|
||||
if err := b.presenceEnc.Flush(); err != nil { |
||||
return nil, fmt.Errorf("flushing presence encoder: %w", err) |
||||
} else if err := b.valuesEnc.Flush(); err != nil { |
||||
return nil, fmt.Errorf("flushing values encoder: %w", err) |
||||
} else if err := b.valuesWriter.Flush(); err != nil { |
||||
return nil, fmt.Errorf("flushing values writer: %w", err) |
||||
} |
||||
|
||||
// The final data of our page is the combination of the presence bitmap and
|
||||
// the values. To denote when one ends and the other begins, we prepend the
|
||||
// data with the size of the presence bitmap as a uvarint. See the doc
|
||||
// comment of [PageData] for more information.
|
||||
var ( |
||||
headerSize = streamio.UvarintSize(uint64(b.presenceBuffer.Len())) |
||||
presenceSize = b.presenceBuffer.Len() |
||||
valuesSize = b.valuesBuffer.Len() |
||||
|
||||
finalData = bytes.NewBuffer(make([]byte, 0, headerSize+presenceSize+valuesSize)) |
||||
) |
||||
|
||||
if err := streamio.WriteUvarint(finalData, uint64(b.presenceBuffer.Len())); err != nil { |
||||
return nil, fmt.Errorf("writing presence buffer size: %w", err) |
||||
} else if _, err := b.presenceBuffer.WriteTo(finalData); err != nil { |
||||
return nil, fmt.Errorf("writing presence buffer: %w", err) |
||||
} else if _, err := b.valuesBuffer.WriteTo(finalData); err != nil { |
||||
return nil, fmt.Errorf("writing values buffer: %w", err) |
||||
} |
||||
|
||||
checksum := crc32.Checksum(finalData.Bytes(), checksumTable) |
||||
|
||||
page := MemPage{ |
||||
Info: PageInfo{ |
||||
UncompressedSize: headerSize + presenceSize + b.valuesWriter.BytesWritten(), |
||||
CompressedSize: finalData.Len(), |
||||
CRC32: checksum, |
||||
RowCount: b.rows, |
||||
|
||||
Encoding: b.opts.Encoding, |
||||
|
||||
// TODO(rfratto): At the moment we don't compute stats because they're
|
||||
// not going to be valuable in every scenario: the min/max values for log
|
||||
// lines is less useful compared to the min/max values for timestamps.
|
||||
//
|
||||
// In the future, we may wish to add more options to pageBuilder to tell
|
||||
// it to compute a subset of stats to avoid needing a second iteration
|
||||
// over the page to compute them.
|
||||
Stats: nil, |
||||
}, |
||||
|
||||
Data: finalData.Bytes(), |
||||
} |
||||
|
||||
b.Reset() // Reset state before returning.
|
||||
return &page, nil |
||||
} |
||||
|
||||
// Reset resets the pageBuilder to a fresh state, allowing it to be reused.
|
||||
func (b *pageBuilder) Reset() { |
||||
b.presenceBuffer.Reset() |
||||
b.valuesBuffer.Reset() |
||||
b.valuesWriter.Reset(b.valuesBuffer) |
||||
b.presenceBuffer.Reset() |
||||
b.valuesEnc.Reset(b.valuesWriter) |
||||
b.rows = 0 |
||||
} |
||||
@ -0,0 +1,123 @@ |
||||
package dataset |
||||
|
||||
import ( |
||||
"bufio" |
||||
"fmt" |
||||
"io" |
||||
|
||||
"github.com/golang/snappy" |
||||
"github.com/klauspost/compress/zstd" |
||||
|
||||
"github.com/grafana/loki/v3/pkg/dataobj/internal/metadata/datasetmd" |
||||
"github.com/grafana/loki/v3/pkg/dataobj/internal/streamio" |
||||
) |
||||
|
||||
// A compressWriter is a [streamio.Writer] that compresses data passed to it.
|
||||
type compressWriter struct { |
||||
// To be able to implmeent [io.ByteWriter], we always write directly to buf,
|
||||
// which then flushes to w once it's full.
|
||||
|
||||
w io.WriteCloser // Compressing writer.
|
||||
buf *bufio.Writer // Buffered writer in front of w to be able to call WriteByte.
|
||||
|
||||
compression datasetmd.CompressionType // Compression type being used.
|
||||
rawBytes int // Number of uncompressed bytes written.
|
||||
} |
||||
|
||||
var _ streamio.Writer = (*compressWriter)(nil) |
||||
|
||||
func newCompressWriter(w io.Writer, ty datasetmd.CompressionType) *compressWriter { |
||||
c := compressWriter{compression: ty} |
||||
c.Reset(w) |
||||
return &c |
||||
} |
||||
|
||||
// Write writes p to c.
|
||||
func (c *compressWriter) Write(p []byte) (n int, err error) { |
||||
n, err = c.buf.Write(p) |
||||
c.rawBytes += n |
||||
return |
||||
} |
||||
|
||||
// WriteByte writes a single byte to c.
|
||||
func (c *compressWriter) WriteByte(b byte) error { |
||||
if err := c.buf.WriteByte(b); err != nil { |
||||
return err |
||||
} |
||||
c.rawBytes++ |
||||
return nil |
||||
} |
||||
|
||||
// Flush compresses any pending uncompressed data in the buffer.
|
||||
func (c *compressWriter) Flush() error { |
||||
// Flush our buffer first so c.w is up to date.
|
||||
if err := c.buf.Flush(); err != nil { |
||||
return fmt.Errorf("flushing buffer: %w", err) |
||||
} |
||||
|
||||
// c.w may not support Flush (such as when using no compression), so we check
|
||||
// first.
|
||||
if f, ok := c.w.(interface{ Flush() error }); ok { |
||||
if err := f.Flush(); err != nil { |
||||
return fmt.Errorf("flushing compressing writer: %w", err) |
||||
} |
||||
} |
||||
|
||||
return nil |
||||
} |
||||
|
||||
// Reset discards the writer's state and switches the compressor to write to w.
|
||||
// This permits reusing a compressWriter rather than allocating a new one.
|
||||
func (c *compressWriter) Reset(w io.Writer) { |
||||
resetter, ok := c.w.(interface{ Reset(io.Writer) }) |
||||
switch ok { |
||||
case true: |
||||
resetter.Reset(w) |
||||
default: |
||||
// c.w is unset or doesn't support Reset; build a new writer.
|
||||
var compressedWriter io.WriteCloser |
||||
|
||||
switch c.compression { |
||||
case datasetmd.COMPRESSION_TYPE_UNSPECIFIED, datasetmd.COMPRESSION_TYPE_NONE: |
||||
compressedWriter = nopCloseWriter{w} |
||||
|
||||
case datasetmd.COMPRESSION_TYPE_SNAPPY: |
||||
compressedWriter = snappy.NewBufferedWriter(w) |
||||
|
||||
case datasetmd.COMPRESSION_TYPE_ZSTD: |
||||
zw, err := zstd.NewWriter(w, zstd.WithEncoderLevel(zstd.SpeedBestCompression)) |
||||
if err != nil { |
||||
panic(fmt.Sprintf("compressWriter.Reset: creating zstd writer: %v", err)) |
||||
} |
||||
compressedWriter = zw |
||||
|
||||
default: |
||||
panic(fmt.Sprintf("compressWriter.Reset: unknown compression type %v", c.compression)) |
||||
} |
||||
|
||||
c.w = compressedWriter |
||||
} |
||||
|
||||
if c.buf != nil { |
||||
c.buf.Reset(c.w) |
||||
} else { |
||||
c.buf = bufio.NewWriter(c.w) |
||||
} |
||||
c.rawBytes = 0 |
||||
} |
||||
|
||||
// BytesWritten returns the number of uncompressed bytes written to c.
|
||||
func (c *compressWriter) BytesWritten() int { return c.rawBytes } |
||||
|
||||
// Close flushes and then closes c.
|
||||
func (c *compressWriter) Close() error { |
||||
if err := c.Flush(); err != nil { |
||||
return err |
||||
} |
||||
return c.w.Close() |
||||
} |
||||
|
||||
type nopCloseWriter struct{ w io.Writer } |
||||
|
||||
func (w nopCloseWriter) Write(p []byte) (n int, err error) { return w.w.Write(p) } |
||||
func (w nopCloseWriter) Close() error { return nil } |
||||
@ -0,0 +1,53 @@ |
||||
package dataset |
||||
|
||||
import ( |
||||
"bufio" |
||||
"errors" |
||||
"fmt" |
||||
"io" |
||||
|
||||
"github.com/grafana/loki/v3/pkg/dataobj/internal/metadata/datasetmd" |
||||
"github.com/grafana/loki/v3/pkg/dataobj/internal/result" |
||||
) |
||||
|
||||
func iterMemPage(p *MemPage, valueType datasetmd.ValueType, compressionType datasetmd.CompressionType) result.Seq[Value] { |
||||
return result.Iter(func(yield func(Value) bool) error { |
||||
presenceReader, valuesReader, err := p.reader(compressionType) |
||||
if err != nil { |
||||
return fmt.Errorf("opening page for reading: %w", err) |
||||
} |
||||
defer valuesReader.Close() |
||||
|
||||
presenceDec := newBitmapDecoder(bufio.NewReader(presenceReader)) |
||||
valuesDec, ok := newValueDecoder(valueType, p.Info.Encoding, bufio.NewReader(valuesReader)) |
||||
if !ok { |
||||
return fmt.Errorf("no decoder available for %s/%s", valueType, p.Info.Encoding) |
||||
} |
||||
|
||||
for { |
||||
var value Value |
||||
|
||||
present, err := presenceDec.Decode() |
||||
if errors.Is(err, io.EOF) { |
||||
return nil |
||||
} else if err != nil { |
||||
return fmt.Errorf("decoding presence bitmap: %w", err) |
||||
} else if present.Type() != datasetmd.VALUE_TYPE_UINT64 { |
||||
return fmt.Errorf("unexpected presence type %s", present.Type()) |
||||
} |
||||
|
||||
// value is currently nil. If the presence bitmap says our row has a
|
||||
// value, we decode it into value.
|
||||
if present.Uint64() == 1 { |
||||
value, err = valuesDec.Decode() |
||||
if err != nil { |
||||
return fmt.Errorf("decoding value: %w", err) |
||||
} |
||||
} |
||||
|
||||
if !yield(value) { |
||||
return nil |
||||
} |
||||
} |
||||
}) |
||||
} |
||||
@ -0,0 +1,83 @@ |
||||
package dataset |
||||
|
||||
import ( |
||||
"math/rand" |
||||
"testing" |
||||
"time" |
||||
|
||||
"github.com/stretchr/testify/require" |
||||
|
||||
"github.com/grafana/loki/v3/pkg/dataobj/internal/metadata/datasetmd" |
||||
) |
||||
|
||||
func Test_pageBuilder_WriteRead(t *testing.T) { |
||||
in := []string{ |
||||
"hello, world!", |
||||
"", |
||||
"this is a test of the emergency broadcast system", |
||||
"this is only a test", |
||||
"if this were a real emergency, you would be instructed to panic", |
||||
"but it's not, so don't", |
||||
"", |
||||
"this concludes the test", |
||||
"thank you for your cooperation", |
||||
"goodbye", |
||||
} |
||||
|
||||
opts := BuilderOptions{ |
||||
PageSizeHint: 1024, |
||||
Value: datasetmd.VALUE_TYPE_STRING, |
||||
Compression: datasetmd.COMPRESSION_TYPE_ZSTD, |
||||
Encoding: datasetmd.ENCODING_TYPE_PLAIN, |
||||
} |
||||
b, err := newPageBuilder(opts) |
||||
require.NoError(t, err) |
||||
|
||||
for _, s := range in { |
||||
require.True(t, b.Append(StringValue(s))) |
||||
} |
||||
|
||||
page, err := b.Flush() |
||||
require.NoError(t, err) |
||||
|
||||
t.Log("Uncompressed size: ", page.Info.UncompressedSize) |
||||
t.Log("Compressed size: ", page.Info.CompressedSize) |
||||
|
||||
var actual []string |
||||
for result := range iterMemPage(page, opts.Value, opts.Compression) { |
||||
val, err := result.Value() |
||||
require.NoError(t, err) |
||||
|
||||
if val.IsNil() || val.IsZero() { |
||||
actual = append(actual, "") |
||||
} else { |
||||
require.Equal(t, datasetmd.VALUE_TYPE_STRING, val.Type()) |
||||
actual = append(actual, val.String()) |
||||
} |
||||
} |
||||
require.Equal(t, in, actual) |
||||
} |
||||
|
||||
func Test_pageBuilder_Fill(t *testing.T) { |
||||
opts := BuilderOptions{ |
||||
PageSizeHint: 1_500_000, |
||||
Value: datasetmd.VALUE_TYPE_INT64, |
||||
Compression: datasetmd.COMPRESSION_TYPE_NONE, |
||||
Encoding: datasetmd.ENCODING_TYPE_DELTA, |
||||
} |
||||
buf, err := newPageBuilder(opts) |
||||
require.NoError(t, err) |
||||
|
||||
ts := time.Now().UTC() |
||||
for buf.Append(Int64Value(ts.UnixNano())) { |
||||
ts = ts.Add(time.Duration(rand.Intn(5000)) * time.Millisecond) |
||||
} |
||||
|
||||
page, err := buf.Flush() |
||||
require.NoError(t, err) |
||||
require.Equal(t, page.Info.UncompressedSize, page.Info.CompressedSize) |
||||
|
||||
t.Log("Uncompressed size: ", page.Info.UncompressedSize) |
||||
t.Log("Compressed size: ", page.Info.CompressedSize) |
||||
t.Log("Row count: ", page.Info.RowCount) |
||||
} |
||||
@ -0,0 +1,101 @@ |
||||
// Package result provides utilities for dealing with iterators that can fail
|
||||
// during iteration.
|
||||
//
|
||||
// Result is useful to make it harder for callers to ignore errors. Using
|
||||
// iter.Seq2[V, error] can make it easy to accidentally ignore errors:
|
||||
//
|
||||
// func myIter() iter.Seq2[V, error] { ... }
|
||||
//
|
||||
// func main() {
|
||||
// for v := range myIter() { /* errors are ignored! */ }
|
||||
// }
|
||||
package result |
||||
|
||||
import ( |
||||
"errors" |
||||
"iter" |
||||
) |
||||
|
||||
// Result is a type used for representing a result from an operation that can
|
||||
// fail.
|
||||
type Result[V any] struct { |
||||
value V // Valid only if err is nil.
|
||||
err error |
||||
} |
||||
|
||||
// Value returns a successful result with the given value.
|
||||
func Value[V any](v V) Result[V] { |
||||
return Result[V]{value: v} |
||||
} |
||||
|
||||
// Error returns a failed result with the given error.
|
||||
func Error[V any](err error) Result[V] { |
||||
return Result[V]{err: err} |
||||
} |
||||
|
||||
// Value returns r's value and error.
|
||||
func (r Result[V]) Value() (V, error) { |
||||
return r.value, r.err |
||||
} |
||||
|
||||
// MustValue returns r's value. If r is an error, MustValue panics.
|
||||
func (r Result[V]) MustValue() V { |
||||
if r.err != nil { |
||||
panic(r.err) |
||||
} |
||||
return r.value |
||||
} |
||||
|
||||
// Err returns r's error, if any.
|
||||
func (r Result[V]) Err() error { |
||||
return r.err |
||||
} |
||||
|
||||
// Seq is an iterator over sequences of result values. When called as
|
||||
// seq(yield), seq calls yield(r) for each value r in the sequence, stopping
|
||||
// early if yield returns false.
|
||||
//
|
||||
// See the [iter] package for more information on iterators.
|
||||
type Seq[V any] func(yield func(Result[V]) bool) |
||||
|
||||
// Iter produces a new Seq[V] from a given function that can fail. Values
|
||||
// passed to yield are wrapped in a call to [Value], while a non-nil error is
|
||||
// wrapped in a call to [Error].
|
||||
//
|
||||
// Iter makes it easier to write failable iterators and removes the need to
|
||||
// manually wrap values and errors into a [Result].
|
||||
func Iter[V any](seq func(yield func(V) bool) error) Seq[V] { |
||||
return func(yield func(Result[V]) bool) { |
||||
err := seq(func(v V) bool { return yield(Value(v)) }) |
||||
if err != nil { |
||||
yield(Error[V](err)) |
||||
} |
||||
} |
||||
} |
||||
|
||||
// Pull converts the "push-style" Result iterator sequence seq into a
|
||||
// "pull-style" iterator accessed by the two functions next and stop.
|
||||
//
|
||||
// Pull is a wrapper around [iter.Pull].
|
||||
func Pull[V any](seq Seq[V]) (next func() (Result[V], bool), stop func()) { |
||||
iseq := iter.Seq[Result[V]](seq) |
||||
return iter.Pull(iseq) |
||||
} |
||||
|
||||
// Collect collects values from seq into a new slice and returns it. Any errors
|
||||
// from seq are joined and returned as the second value.
|
||||
func Collect[V any](seq Seq[V]) ([]V, error) { |
||||
var ( |
||||
vals []V |
||||
errs []error |
||||
) |
||||
for res := range seq { |
||||
val, err := res.Value() |
||||
if err != nil { |
||||
errs = append(errs, err) |
||||
} else { |
||||
vals = append(vals, val) |
||||
} |
||||
} |
||||
return vals, errors.Join(errs...) |
||||
} |
||||
Loading…
Reference in new issue