mirror of https://github.com/grafana/loki
chore(dataobj): Reintroduce sorting of the logs section (#15906)
parent
5e4df21e67
commit
948f5c5f3e
@ -0,0 +1,312 @@ |
||||
package logs |
||||
|
||||
import ( |
||||
"cmp" |
||||
"context" |
||||
"fmt" |
||||
"slices" |
||||
|
||||
"github.com/grafana/loki/v3/pkg/dataobj/internal/dataset" |
||||
"github.com/grafana/loki/v3/pkg/dataobj/internal/metadata/datasetmd" |
||||
"github.com/grafana/loki/v3/pkg/dataobj/internal/metadata/logsmd" |
||||
"github.com/grafana/loki/v3/pkg/dataobj/internal/result" |
||||
) |
||||
|
||||
// A table is a collection of columns that form a logs section.
|
||||
type table struct { |
||||
StreamID *tableColumn |
||||
Timestamp *tableColumn |
||||
Metadatas []*tableColumn |
||||
Message *tableColumn |
||||
} |
||||
|
||||
type tableColumn struct { |
||||
*dataset.MemColumn |
||||
|
||||
Type logsmd.ColumnType |
||||
} |
||||
|
||||
var _ dataset.Dataset = (*table)(nil) |
||||
|
||||
// ListColumns implements [dataset.Dataset].
|
||||
func (t *table) ListColumns(_ context.Context) result.Seq[dataset.Column] { |
||||
return result.Iter(func(yield func(dataset.Column) bool) error { |
||||
if !yield(t.StreamID) { |
||||
return nil |
||||
} |
||||
if !yield(t.Timestamp) { |
||||
return nil |
||||
} |
||||
for _, metadata := range t.Metadatas { |
||||
if !yield(metadata) { |
||||
return nil |
||||
} |
||||
} |
||||
if !yield(t.Message) { |
||||
return nil |
||||
} |
||||
|
||||
return nil |
||||
}) |
||||
} |
||||
|
||||
// ListPages implements [dataset.Dataset].
|
||||
func (t *table) ListPages(ctx context.Context, columns []dataset.Column) result.Seq[dataset.Pages] { |
||||
return result.Iter(func(yield func(dataset.Pages) bool) error { |
||||
for _, c := range columns { |
||||
pages, err := result.Collect(c.ListPages(ctx)) |
||||
if err != nil { |
||||
return err |
||||
} else if !yield(dataset.Pages(pages)) { |
||||
return nil |
||||
} |
||||
} |
||||
|
||||
return nil |
||||
}) |
||||
} |
||||
|
||||
// ReadPages implements [dataset.Dataset].
|
||||
func (t *table) ReadPages(ctx context.Context, pages []dataset.Page) result.Seq[dataset.PageData] { |
||||
return result.Iter(func(yield func(dataset.PageData) bool) error { |
||||
for _, p := range pages { |
||||
data, err := p.ReadPage(ctx) |
||||
if err != nil { |
||||
return err |
||||
} else if !yield(data) { |
||||
return nil |
||||
} |
||||
} |
||||
|
||||
return nil |
||||
}) |
||||
|
||||
} |
||||
|
||||
// Size returns the total size of the table in bytes.
|
||||
func (t *table) Size() int { |
||||
var size int |
||||
|
||||
size += t.StreamID.ColumnInfo().CompressedSize |
||||
size += t.Timestamp.ColumnInfo().CompressedSize |
||||
for _, metadata := range t.Metadatas { |
||||
size += metadata.ColumnInfo().CompressedSize |
||||
} |
||||
size += t.Message.ColumnInfo().CompressedSize |
||||
|
||||
return size |
||||
} |
||||
|
||||
// A tableBuffer holds a set of column builders used for constructing tables.
|
||||
// The zero value is ready for use.
|
||||
type tableBuffer struct { |
||||
streamID *dataset.ColumnBuilder |
||||
timestamp *dataset.ColumnBuilder |
||||
|
||||
metadatas []*dataset.ColumnBuilder |
||||
metadataLookup map[string]int // map of metadata key to index in metadatas
|
||||
usedMetadatas map[*dataset.ColumnBuilder]string // metadata with its name.
|
||||
|
||||
message *dataset.ColumnBuilder |
||||
} |
||||
|
||||
// StreamID gets or creates a stream ID column for the buffer.
|
||||
func (b *tableBuffer) StreamID(pageSize int) *dataset.ColumnBuilder { |
||||
if b.streamID != nil { |
||||
return b.streamID |
||||
} |
||||
|
||||
col, err := dataset.NewColumnBuilder("", dataset.BuilderOptions{ |
||||
PageSizeHint: pageSize, |
||||
Value: datasetmd.VALUE_TYPE_INT64, |
||||
Encoding: datasetmd.ENCODING_TYPE_DELTA, |
||||
Compression: datasetmd.COMPRESSION_TYPE_NONE, |
||||
}) |
||||
if err != nil { |
||||
// We control the Value/Encoding tuple so this can't fail; if it does,
|
||||
// we're left in an unrecoverable state where nothing can be encoded
|
||||
// properly so we panic.
|
||||
panic(fmt.Sprintf("creating stream ID column: %v", err)) |
||||
} |
||||
|
||||
b.streamID = col |
||||
return col |
||||
} |
||||
|
||||
// Timestamp gets or creates a timestamp column for the buffer.
|
||||
func (b *tableBuffer) Timestamp(pageSize int) *dataset.ColumnBuilder { |
||||
if b.timestamp != nil { |
||||
return b.timestamp |
||||
} |
||||
|
||||
col, err := dataset.NewColumnBuilder("", dataset.BuilderOptions{ |
||||
PageSizeHint: pageSize, |
||||
Value: datasetmd.VALUE_TYPE_INT64, |
||||
Encoding: datasetmd.ENCODING_TYPE_DELTA, |
||||
Compression: datasetmd.COMPRESSION_TYPE_NONE, |
||||
}) |
||||
if err != nil { |
||||
// We control the Value/Encoding tuple so this can't fail; if it does,
|
||||
// we're left in an unrecoverable state where nothing can be encoded
|
||||
// properly so we panic.
|
||||
panic(fmt.Sprintf("creating timestamp column: %v", err)) |
||||
} |
||||
|
||||
b.timestamp = col |
||||
return col |
||||
} |
||||
|
||||
// Metadata gets or creates a metadata column for the buffer. To remove created
|
||||
// metadata columns, call [tableBuffer.CleanupMetadatas].
|
||||
func (b *tableBuffer) Metadata(key string, pageSize int, compressionOpts dataset.CompressionOptions) *dataset.ColumnBuilder { |
||||
if b.usedMetadatas == nil { |
||||
b.usedMetadatas = make(map[*dataset.ColumnBuilder]string) |
||||
} |
||||
|
||||
index, ok := b.metadataLookup[key] |
||||
if ok { |
||||
builder := b.metadatas[index] |
||||
b.usedMetadatas[builder] = key |
||||
return builder |
||||
} |
||||
|
||||
col, err := dataset.NewColumnBuilder(key, dataset.BuilderOptions{ |
||||
PageSizeHint: pageSize, |
||||
Value: datasetmd.VALUE_TYPE_STRING, |
||||
Encoding: datasetmd.ENCODING_TYPE_PLAIN, |
||||
Compression: datasetmd.COMPRESSION_TYPE_ZSTD, |
||||
CompressionOptions: compressionOpts, |
||||
}) |
||||
if err != nil { |
||||
// We control the Value/Encoding tuple so this can't fail; if it does,
|
||||
// we're left in an unrecoverable state where nothing can be encoded
|
||||
// properly so we panic.
|
||||
panic(fmt.Sprintf("creating metadata column: %v", err)) |
||||
} |
||||
|
||||
b.metadatas = append(b.metadatas, col) |
||||
|
||||
if b.metadataLookup == nil { |
||||
b.metadataLookup = make(map[string]int) |
||||
} |
||||
b.metadataLookup[key] = len(b.metadatas) - 1 |
||||
b.usedMetadatas[col] = key |
||||
return col |
||||
} |
||||
|
||||
// Message gets or creates a message column for the buffer.
|
||||
func (b *tableBuffer) Message(pageSize int, compressionOpts dataset.CompressionOptions) *dataset.ColumnBuilder { |
||||
if b.message != nil { |
||||
return b.message |
||||
} |
||||
|
||||
col, err := dataset.NewColumnBuilder("", dataset.BuilderOptions{ |
||||
PageSizeHint: pageSize, |
||||
Value: datasetmd.VALUE_TYPE_STRING, |
||||
Encoding: datasetmd.ENCODING_TYPE_PLAIN, |
||||
Compression: datasetmd.COMPRESSION_TYPE_ZSTD, |
||||
CompressionOptions: compressionOpts, |
||||
}) |
||||
if err != nil { |
||||
// We control the Value/Encoding tuple so this can't fail; if it does,
|
||||
// we're left in an unrecoverable state where nothing can be encoded
|
||||
// properly so we panic.
|
||||
panic(fmt.Sprintf("creating messages column: %v", err)) |
||||
} |
||||
|
||||
b.message = col |
||||
return col |
||||
} |
||||
|
||||
// Reset resets the buffer to its initial state.
|
||||
func (b *tableBuffer) Reset() { |
||||
if b.streamID != nil { |
||||
b.streamID.Reset() |
||||
} |
||||
if b.timestamp != nil { |
||||
b.timestamp.Reset() |
||||
} |
||||
if b.message != nil { |
||||
b.message.Reset() |
||||
} |
||||
for _, md := range b.metadatas { |
||||
md.Reset() |
||||
} |
||||
|
||||
// We don't want to keep all metadata columns around forever, so we only
|
||||
// retain the columns that were used in the last Flush.
|
||||
var ( |
||||
newMetadatas = make([]*dataset.ColumnBuilder, 0, len(b.metadatas)) |
||||
newMetadataLookup = make(map[string]int, len(b.metadatas)) |
||||
) |
||||
for _, md := range b.metadatas { |
||||
if b.usedMetadatas == nil { |
||||
break // Nothing was used.
|
||||
} |
||||
|
||||
key, used := b.usedMetadatas[md] |
||||
if !used { |
||||
continue |
||||
} |
||||
|
||||
newMetadatas = append(newMetadatas, md) |
||||
newMetadataLookup[key] = len(newMetadatas) - 1 |
||||
} |
||||
b.metadatas = newMetadatas |
||||
b.metadataLookup = newMetadataLookup |
||||
clear(b.usedMetadatas) // Reset the used cache for next time.
|
||||
} |
||||
|
||||
// Flush flushes the buffer into a table. Flush returns an error if the stream,
|
||||
// timestamp, or messages column was never appended to.
|
||||
//
|
||||
// Only metadata columns that were appended to since the last flush are included in the table.
|
||||
func (b *tableBuffer) Flush() (*table, error) { |
||||
defer b.Reset() |
||||
|
||||
if b.streamID == nil { |
||||
return nil, fmt.Errorf("no stream column") |
||||
} else if b.timestamp == nil { |
||||
return nil, fmt.Errorf("no timestamp column") |
||||
} else if b.message == nil { |
||||
return nil, fmt.Errorf("no message column") |
||||
} |
||||
|
||||
var ( |
||||
// Flush never returns an error so we ignore it here to keep the code simple.
|
||||
//
|
||||
// TODO(rfratto): remove error return from Flush to clean up code.
|
||||
|
||||
streamID, _ = b.streamID.Flush() |
||||
timestamp, _ = b.timestamp.Flush() |
||||
messages, _ = b.message.Flush() |
||||
|
||||
metadatas = make([]*tableColumn, 0, len(b.metadatas)) |
||||
) |
||||
|
||||
for _, metadataBuilder := range b.metadatas { |
||||
if b.usedMetadatas == nil { |
||||
continue |
||||
} else if _, ok := b.usedMetadatas[metadataBuilder]; !ok { |
||||
continue |
||||
} |
||||
|
||||
// Each metadata column may have a different number of rows compared to
|
||||
// other columns. Since adding NULLs isn't free, we don't call Backfill
|
||||
// here.
|
||||
metadata, _ := metadataBuilder.Flush() |
||||
metadatas = append(metadatas, &tableColumn{metadata, logsmd.COLUMN_TYPE_METADATA}) |
||||
} |
||||
|
||||
// Sort metadata columns by name for consistency.
|
||||
slices.SortFunc(metadatas, func(a, b *tableColumn) int { |
||||
return cmp.Compare(a.ColumnInfo().Name, b.ColumnInfo().Name) |
||||
}) |
||||
|
||||
return &table{ |
||||
StreamID: &tableColumn{streamID, logsmd.COLUMN_TYPE_STREAM_ID}, |
||||
Timestamp: &tableColumn{timestamp, logsmd.COLUMN_TYPE_TIMESTAMP}, |
||||
Metadatas: metadatas, |
||||
Message: &tableColumn{messages, logsmd.COLUMN_TYPE_MESSAGE}, |
||||
}, nil |
||||
} |
||||
@ -0,0 +1,54 @@ |
||||
package logs |
||||
|
||||
import ( |
||||
"cmp" |
||||
"slices" |
||||
|
||||
"github.com/grafana/loki/v3/pkg/dataobj/internal/dataset" |
||||
) |
||||
|
||||
// buildTable builds a table from the set of provided records. The records are
|
||||
// sorted with [sortRecords] prior to building the table.
|
||||
func buildTable(buf *tableBuffer, pageSize int, compressionOpts dataset.CompressionOptions, records []Record) *table { |
||||
sortRecords(records) |
||||
|
||||
buf.Reset() |
||||
|
||||
var ( |
||||
streamIDBuilder = buf.StreamID(pageSize) |
||||
timestampBuilder = buf.Timestamp(pageSize) |
||||
messageBuilder = buf.Message(pageSize, compressionOpts) |
||||
) |
||||
|
||||
for i, record := range records { |
||||
// Append only fails if given out-of-order data, where the provided row
|
||||
// number is less than the previous row number. That can't happen here, so
|
||||
// to keep the code readable we ignore the error values.
|
||||
|
||||
_ = streamIDBuilder.Append(i, dataset.Int64Value(record.StreamID)) |
||||
_ = timestampBuilder.Append(i, dataset.Int64Value(record.Timestamp.UnixNano())) |
||||
_ = messageBuilder.Append(i, dataset.StringValue(record.Line)) |
||||
|
||||
for _, md := range record.Metadata { |
||||
metadataBuilder := buf.Metadata(md.Name, pageSize, compressionOpts) |
||||
_ = metadataBuilder.Append(i, dataset.StringValue(md.Value)) |
||||
} |
||||
} |
||||
|
||||
table, err := buf.Flush() |
||||
if err != nil { |
||||
// Unreachable; we always ensure every required column is created.
|
||||
panic(err) |
||||
} |
||||
return table |
||||
} |
||||
|
||||
// sortRecords sorts the set of records by stream ID and timestamp.
|
||||
func sortRecords(records []Record) { |
||||
slices.SortFunc(records, func(a, b Record) int { |
||||
if res := cmp.Compare(a.StreamID, b.StreamID); res != 0 { |
||||
return res |
||||
} |
||||
return a.Timestamp.Compare(b.Timestamp) |
||||
}) |
||||
} |
||||
@ -0,0 +1,151 @@ |
||||
package logs |
||||
|
||||
import ( |
||||
"cmp" |
||||
"context" |
||||
"fmt" |
||||
"math" |
||||
|
||||
"github.com/grafana/loki/v3/pkg/dataobj/internal/dataset" |
||||
"github.com/grafana/loki/v3/pkg/dataobj/internal/metadata/logsmd" |
||||
"github.com/grafana/loki/v3/pkg/dataobj/internal/result" |
||||
"github.com/grafana/loki/v3/pkg/util/loser" |
||||
) |
||||
|
||||
// mergeTables merges the provided sorted tables into a new single sorted table
|
||||
// using k-way merge.
|
||||
func mergeTables(buf *tableBuffer, pageSize int, compressionOpts dataset.CompressionOptions, tables []*table) (*table, error) { |
||||
buf.Reset() |
||||
|
||||
var ( |
||||
streamIDBuilder = buf.StreamID(pageSize) |
||||
timestampBuilder = buf.Timestamp(pageSize) |
||||
messageBuilder = buf.Message(pageSize, compressionOpts) |
||||
) |
||||
|
||||
var ( |
||||
tableSequences = make([]*tableSequence, 0, len(tables)) |
||||
) |
||||
for _, t := range tables { |
||||
dsetColumns, err := result.Collect(t.ListColumns(context.Background())) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
|
||||
seq := dataset.Iter(context.Background(), dsetColumns) |
||||
next, stop := result.Pull(seq) |
||||
defer stop() |
||||
|
||||
tableSequences = append(tableSequences, &tableSequence{ |
||||
columns: dsetColumns, |
||||
|
||||
pull: next, stop: stop, |
||||
}) |
||||
} |
||||
|
||||
maxValue := result.Value(dataset.Row{ |
||||
Index: math.MaxInt, |
||||
Values: []dataset.Value{ |
||||
dataset.Int64Value(math.MaxInt64), |
||||
dataset.Int64Value(math.MaxInt64), |
||||
}, |
||||
}) |
||||
|
||||
var rows int |
||||
|
||||
tree := loser.New(tableSequences, maxValue, tableSequenceValue, rowResultLess, tableSequenceStop) |
||||
for tree.Next() { |
||||
seq := tree.Winner() |
||||
|
||||
row, err := tableSequenceValue(seq).Value() |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
|
||||
for i, column := range seq.columns { |
||||
// column is guaranteed to be a *tableColumn since we got it from *table.
|
||||
column := column.(*tableColumn) |
||||
|
||||
// dataset.Iter returns values in the same order as the number of
|
||||
// columns.
|
||||
value := row.Values[i] |
||||
|
||||
switch column.Type { |
||||
case logsmd.COLUMN_TYPE_STREAM_ID: |
||||
_ = streamIDBuilder.Append(rows, value) |
||||
case logsmd.COLUMN_TYPE_TIMESTAMP: |
||||
_ = timestampBuilder.Append(rows, value) |
||||
case logsmd.COLUMN_TYPE_METADATA: |
||||
columnBuilder := buf.Metadata(column.Info.Name, pageSize, compressionOpts) |
||||
_ = columnBuilder.Append(rows, value) |
||||
case logsmd.COLUMN_TYPE_MESSAGE: |
||||
_ = messageBuilder.Append(rows, value) |
||||
default: |
||||
return nil, fmt.Errorf("unknown column type %s", column.Type) |
||||
} |
||||
} |
||||
|
||||
rows++ |
||||
} |
||||
|
||||
return buf.Flush() |
||||
} |
||||
|
||||
type tableSequence struct { |
||||
curValue result.Result[dataset.Row] |
||||
|
||||
columns []dataset.Column |
||||
|
||||
pull func() (result.Result[dataset.Row], bool) |
||||
stop func() |
||||
} |
||||
|
||||
var _ loser.Sequence = (*tableSequence)(nil) |
||||
|
||||
func (seq *tableSequence) Next() bool { |
||||
val, ok := seq.pull() |
||||
seq.curValue = val |
||||
return ok |
||||
} |
||||
|
||||
func tableSequenceValue(seq *tableSequence) result.Result[dataset.Row] { return seq.curValue } |
||||
|
||||
func tableSequenceStop(seq *tableSequence) { seq.stop() } |
||||
|
||||
func rowResultLess(a, b result.Result[dataset.Row]) bool { |
||||
var ( |
||||
aRow, aErr = a.Value() |
||||
bRow, bErr = b.Value() |
||||
) |
||||
|
||||
// Put errors first so we return errors early.
|
||||
if aErr != nil { |
||||
return true |
||||
} else if bErr != nil { |
||||
return false |
||||
} |
||||
|
||||
return compareRows(aRow, bRow) < 0 |
||||
} |
||||
|
||||
// compareRows compares two rows by their first two columns. compareRows panics
|
||||
// if a or b doesn't have at least two columns, if the first column isn't a
|
||||
// int64-encoded stream ID, or if the second column isn't an int64-encoded
|
||||
// timestamp.
|
||||
func compareRows(a, b dataset.Row) int { |
||||
// The first two columns of each row are *always* stream ID and timestamp.
|
||||
//
|
||||
// TODO(rfratto): Can we find a safer way of doing this?
|
||||
var ( |
||||
aStreamID = a.Values[0].Int64() |
||||
bStreamID = b.Values[0].Int64() |
||||
|
||||
aTimestamp = a.Values[1].Int64() |
||||
bTimestamp = b.Values[1].Int64() |
||||
) |
||||
|
||||
if res := cmp.Compare(aStreamID, bStreamID); res != 0 { |
||||
return res |
||||
} |
||||
return cmp.Compare(aTimestamp, bTimestamp) |
||||
} |
||||
@ -0,0 +1,81 @@ |
||||
package logs |
||||
|
||||
import ( |
||||
"context" |
||||
"strings" |
||||
"testing" |
||||
"time" |
||||
|
||||
"github.com/stretchr/testify/require" |
||||
|
||||
"github.com/grafana/loki/v3/pkg/dataobj/internal/dataset" |
||||
"github.com/grafana/loki/v3/pkg/dataobj/internal/metadata/datasetmd" |
||||
"github.com/grafana/loki/v3/pkg/dataobj/internal/result" |
||||
) |
||||
|
||||
func Test_table_metadataCleanup(t *testing.T) { |
||||
var buf tableBuffer |
||||
initBuffer(&buf) |
||||
|
||||
_ = buf.Metadata("foo", 1024, dataset.CompressionOptions{}) |
||||
_ = buf.Metadata("bar", 1024, dataset.CompressionOptions{}) |
||||
|
||||
table, err := buf.Flush() |
||||
require.NoError(t, err) |
||||
require.Equal(t, 2, len(table.Metadatas)) |
||||
|
||||
initBuffer(&buf) |
||||
_ = buf.Metadata("bar", 1024, dataset.CompressionOptions{}) |
||||
|
||||
table, err = buf.Flush() |
||||
require.NoError(t, err) |
||||
require.Equal(t, 1, len(table.Metadatas)) |
||||
require.Equal(t, "bar", table.Metadatas[0].Info.Name) |
||||
} |
||||
|
||||
func initBuffer(buf *tableBuffer) { |
||||
buf.StreamID(1024) |
||||
buf.Timestamp(1024) |
||||
buf.Message(1024, dataset.CompressionOptions{}) |
||||
} |
||||
|
||||
func Test_mergeTables(t *testing.T) { |
||||
var buf tableBuffer |
||||
|
||||
var ( |
||||
tableA = buildTable(&buf, 1024, dataset.CompressionOptions{}, []Record{ |
||||
{StreamID: 1, Timestamp: time.Unix(1, 0), Line: "hello"}, |
||||
{StreamID: 2, Timestamp: time.Unix(2, 0), Line: "are"}, |
||||
{StreamID: 3, Timestamp: time.Unix(3, 0), Line: "goodbye"}, |
||||
}) |
||||
|
||||
tableB = buildTable(&buf, 1024, dataset.CompressionOptions{}, []Record{ |
||||
{StreamID: 1, Timestamp: time.Unix(2, 0), Line: "world"}, |
||||
{StreamID: 3, Timestamp: time.Unix(1, 0), Line: "you"}, |
||||
}) |
||||
|
||||
tableC = buildTable(&buf, 1024, dataset.CompressionOptions{}, []Record{ |
||||
{StreamID: 2, Timestamp: time.Unix(1, 0), Line: "how"}, |
||||
{StreamID: 3, Timestamp: time.Unix(2, 0), Line: "doing?"}, |
||||
}) |
||||
) |
||||
|
||||
mergedTable, err := mergeTables(&buf, 1024, dataset.CompressionOptions{}, []*table{tableA, tableB, tableC}) |
||||
require.NoError(t, err) |
||||
|
||||
mergedColumns, err := result.Collect(mergedTable.ListColumns(context.Background())) |
||||
require.NoError(t, err) |
||||
|
||||
var actual []string |
||||
|
||||
for result := range dataset.Iter(context.Background(), mergedColumns) { |
||||
row, err := result.Value() |
||||
require.NoError(t, err) |
||||
require.Len(t, row.Values, 3) |
||||
require.Equal(t, datasetmd.VALUE_TYPE_STRING, row.Values[2].Type()) |
||||
|
||||
actual = append(actual, row.Values[2].String()) |
||||
} |
||||
|
||||
require.Equal(t, "hello world how are you doing? goodbye", strings.Join(actual, " ")) |
||||
} |
||||
@ -0,0 +1,72 @@ |
||||
package bufpool |
||||
|
||||
import ( |
||||
"bytes" |
||||
"math" |
||||
"sync" |
||||
) |
||||
|
||||
type bucket struct { |
||||
size uint64 |
||||
pool sync.Pool |
||||
} |
||||
|
||||
var buckets []*bucket |
||||
|
||||
// Bucket sizes are exponentially sized from 1KiB to 64GiB. The max boundary is
|
||||
// picked arbitrarily.
|
||||
const ( |
||||
bucketMin uint64 = 1024 |
||||
bucketMax uint64 = 1 << 36 /* 64 GiB */ |
||||
) |
||||
|
||||
func init() { |
||||
nextBucket := bucketMin |
||||
|
||||
for { |
||||
// Capture the size so New refers to the correct size per bucket.
|
||||
buckets = append(buckets, &bucket{ |
||||
size: nextBucket, |
||||
pool: sync.Pool{ |
||||
New: func() any { |
||||
// We don't preallocate the buffer here; this will help a bucket pool
|
||||
// to be filled with buffers of varying sizes within that bucket.
|
||||
//
|
||||
// If we *did* preallocate the buffer, then any call to
|
||||
// [bytes.Buffer.Grow] beyond the bucket size would immediately cause
|
||||
// it to double in size, placing it in the next bucket.
|
||||
return bytes.NewBuffer(nil) |
||||
}, |
||||
}, |
||||
}) |
||||
|
||||
// Exponentially grow the bucket size up to bucketMax.
|
||||
nextBucket *= 2 |
||||
if nextBucket > bucketMax { |
||||
break |
||||
} |
||||
} |
||||
|
||||
// Catch-all for buffers bigger than bucketMax.
|
||||
buckets = append(buckets, &bucket{ |
||||
size: math.MaxUint64, |
||||
pool: sync.Pool{ |
||||
New: func() any { |
||||
return bytes.NewBuffer(nil) |
||||
}, |
||||
}, |
||||
}) |
||||
} |
||||
|
||||
// findBucket returns the first bucket that is large enough to hold size.
|
||||
func findBucket(size uint64) *bucket { |
||||
for _, b := range buckets { |
||||
if b.size >= size { |
||||
return b |
||||
} |
||||
} |
||||
|
||||
// We shouldn't be able to reach this point; the final bucket is sized for
|
||||
// anything, but if we do reach this we'll return the last bucket anyway.
|
||||
return buckets[len(buckets)-1] |
||||
} |
||||
@ -0,0 +1,41 @@ |
||||
// Package bufpool offers a pool of [*bytes.Buffer] objects that are placed
|
||||
// into exponentially sized buckets.
|
||||
//
|
||||
// Bucketing prevents the memory cost of a pool from permanently increasing
|
||||
// when a large buffer is placed into the pool.
|
||||
package bufpool |
||||
|
||||
import ( |
||||
"bytes" |
||||
) |
||||
|
||||
// Get returns a buffer from the pool for the given size. Returned buffers are
|
||||
// reset and ready for writes.
|
||||
//
|
||||
// The capacity of the returned buffer is guaranteed to be at least size.
|
||||
func Get(size int) *bytes.Buffer { |
||||
if size < 0 { |
||||
size = 0 |
||||
} |
||||
|
||||
b := findBucket(uint64(size)) |
||||
|
||||
buf := b.pool.Get().(*bytes.Buffer) |
||||
buf.Reset() |
||||
buf.Grow(size) |
||||
return buf |
||||
} |
||||
|
||||
// Put returns a buffer to the pool. The buffer is placed into an appropriate
|
||||
// bucket based on its current capacity.
|
||||
func Put(buf *bytes.Buffer) { |
||||
if buf == nil { |
||||
return |
||||
} |
||||
|
||||
b := findBucket(uint64(buf.Cap())) |
||||
if b == nil { |
||||
return |
||||
} |
||||
b.pool.Put(buf) |
||||
} |
||||
@ -0,0 +1,36 @@ |
||||
package bufpool |
||||
|
||||
import ( |
||||
"fmt" |
||||
"math" |
||||
"testing" |
||||
|
||||
"github.com/stretchr/testify/require" |
||||
) |
||||
|
||||
func Test_findBucket(t *testing.T) { |
||||
tt := []struct { |
||||
size uint64 |
||||
expect uint64 |
||||
}{ |
||||
{size: 0, expect: 1024}, |
||||
{size: 512, expect: 1024}, |
||||
{size: 1024, expect: 1024}, |
||||
{size: 1025, expect: 2048}, |
||||
{size: (1 << 36), expect: (1 << 36)}, |
||||
{size: (1 << 37), expect: math.MaxUint64}, |
||||
} |
||||
|
||||
for _, tc := range tt { |
||||
t.Run(fmt.Sprintf("size=%d", tc.size), func(t *testing.T) { |
||||
got := findBucket(tc.size).size |
||||
require.Equal(t, tc.expect, got) |
||||
}) |
||||
} |
||||
} |
||||
|
||||
func Test(t *testing.T) { |
||||
buf := Get(1_500_000) |
||||
require.NotNil(t, buf) |
||||
require.Less(t, buf.Cap(), 2<<20, "buffer should not have grown to next bucket size") |
||||
} |
||||
@ -0,0 +1,11 @@ |
||||
// Package sliceclear provides a way to clear and truncate the length of a
|
||||
// slice.
|
||||
package sliceclear |
||||
|
||||
// Clear zeroes out all values in s and returns s[:0]. Clear allows memory of
|
||||
// previous elements in the slice to be reclained by the garbage collector
|
||||
// while still allowing the underlying slice memory to be reused.
|
||||
func Clear[Slice ~[]E, E any](s Slice) Slice { |
||||
clear(s) |
||||
return s[:0] |
||||
} |
||||
@ -0,0 +1,28 @@ |
||||
package sliceclear_test |
||||
|
||||
import ( |
||||
"testing" |
||||
|
||||
"github.com/stretchr/testify/require" |
||||
|
||||
"github.com/grafana/loki/v3/pkg/dataobj/internal/util/sliceclear" |
||||
) |
||||
|
||||
func Test(t *testing.T) { |
||||
s := make([]*int, 0, 10) |
||||
for i := 0; i < 10; i++ { |
||||
s = append(s, new(int)) |
||||
} |
||||
|
||||
s = sliceclear.Clear(s) |
||||
require.Equal(t, 10, cap(s)) |
||||
require.Equal(t, 0, len(s)) |
||||
|
||||
// Reexpand s to its full capacity and ensure that all elements have been
|
||||
// zeroed out.
|
||||
full := s[:cap(s)] |
||||
require.Equal(t, 10, len(full)) |
||||
for i := 0; i < 10; i++ { |
||||
require.Nil(t, full[i], "element %d was not zeroed; this can cause memory leaks", i) |
||||
} |
||||
} |
||||
Loading…
Reference in new issue