feat: convert WalSegmentWriter to io.ReadSeeker (#13340)

Signed-off-by: Vladyslav Diachenko <vlad.diachenko@grafana.com>
pull/13342/head
Vladyslav Diachenko 11 months ago committed by GitHub
parent d0f56eeb0a
commit 19c050926e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
  1. 54
      pkg/storage/wal/segment.go
  2. 55
      pkg/storage/wal/segment_test.go

@ -19,6 +19,7 @@ import (
"github.com/grafana/loki/v3/pkg/storage/wal/chunks"
"github.com/grafana/loki/v3/pkg/storage/wal/index"
"github.com/grafana/loki/v3/pkg/util/encoding"
"github.com/grafana/loki/v3/pkg/util/pool"
)
// LOKW is the magic number for the Loki WAL format.
@ -32,6 +33,8 @@ var (
}
},
}
// 512kb - 20 mb
encodedWalSegmentBufferPool = pool.NewBuffer(512*1024, 20*1024*1024, 2)
)
func init() {
@ -60,6 +63,10 @@ func (s *streamSegment) Reset() {
s.entries = s.entries[:0]
}
func (s *streamSegment) WriteTo(w io.Writer) (n int64, err error) {
return chunks.WriteChunk(w, s.entries, chunks.EncodingSnappy)
}
// NewWalSegmentWriter creates a new WalSegmentWriter.
func NewWalSegmentWriter() (*SegmentWriter, error) {
idxWriter, err := index.NewWriter()
@ -212,6 +219,7 @@ func (b *SegmentWriter) WriteTo(w io.Writer) (int64, error) {
// write index len 4b
b.buf1.PutBE32int(n)
n, err = w.Write(b.buf1.Get())
b.buf1.Reset()
if err != nil {
return total, err
}
@ -234,10 +242,6 @@ func (b *SegmentWriter) WriteTo(w io.Writer) (int64, error) {
return total, nil
}
func (s *streamSegment) WriteTo(w io.Writer) (n int64, err error) {
return chunks.WriteChunk(w, s.entries, chunks.EncodingSnappy)
}
// Reset clears the writer.
// After calling Reset, the writer can be reused.
func (b *SegmentWriter) Reset() {
@ -246,10 +250,50 @@ func (b *SegmentWriter) Reset() {
streamSegmentPool.Put(s)
}
b.streams = make(map[streamID]*streamSegment, 64)
b.buf1.Reset()
b.inputSize = 0
}
func (b *SegmentWriter) ToReader() (io.ReadSeekCloser, error) {
// snappy compression rate is ~5x , but we can not predict it, so we need to allocate bigger buffer to avoid allocations
buffer := encodedWalSegmentBufferPool.Get(int(b.inputSize / 3))
_, err := b.WriteTo(buffer)
if err != nil {
return nil, fmt.Errorf("failed to write segment to create a reader: %w", err)
}
return NewEncodedSegmentReader(buffer), nil
}
var (
_ io.ReadSeekCloser = &EncodedSegmentReader{}
)
type EncodedSegmentReader struct {
delegate io.ReadSeeker
encodedContent *bytes.Buffer
}
func NewEncodedSegmentReader(encodedContent *bytes.Buffer) *EncodedSegmentReader {
return &EncodedSegmentReader{
encodedContent: encodedContent,
delegate: bytes.NewReader(encodedContent.Bytes()),
}
}
func (e *EncodedSegmentReader) Read(p []byte) (n int, err error) {
return e.delegate.Read(p)
}
func (e *EncodedSegmentReader) Seek(offset int64, whence int) (int64, error) {
return e.delegate.Seek(offset, whence)
}
func (e *EncodedSegmentReader) Close() error {
encodedWalSegmentBufferPool.Put(e.encodedContent)
e.encodedContent = nil
e.delegate = nil
return nil
}
// InputSize returns the total size of the input data written to the writer.
// It doesn't account for timestamps and labels.
func (b *SegmentWriter) InputSize() int64 {

@ -5,7 +5,6 @@ import (
"context"
"fmt"
"sort"
"sync"
"testing"
"time"
@ -333,30 +332,40 @@ func BenchmarkWrites(b *testing.B) {
dst := bytes.NewBuffer(make([]byte, 0, inputSize))
pool := sync.Pool{
New: func() interface{} {
writer, err := NewWalSegmentWriter()
if err != nil {
panic(err)
}
return writer
},
}
writer, err := NewWalSegmentWriter()
require.NoError(b, err)
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
writer := pool.Get().(*SegmentWriter)
for _, d := range data {
writer.Append(d.tenant, d.labels, d.lbls, d.entries)
}
dst.Reset()
writer.Reset()
encodedLength, err := writer.WriteTo(dst)
require.NoError(b, err)
b.Run("WriteTo", func(b *testing.B) {
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
dst.Reset()
n, err := writer.WriteTo(dst)
require.NoError(b, err)
require.EqualValues(b, encodedLength, n)
}
})
for _, d := range data {
writer.Append(d.tenant, d.labels, d.lbls, d.entries)
bytesBuf := make([]byte, inputSize)
b.Run("Reader", func(b *testing.B) {
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
var err error
reader, err := writer.ToReader()
require.NoError(b, err)
n, err := reader.Read(bytesBuf)
require.NoError(b, err)
require.EqualValues(b, encodedLength, n)
require.NoError(b, reader.Close())
}
n, err := writer.WriteTo(dst)
require.NoError(b, err)
require.True(b, n > 0)
pool.Put(writer)
}
})
}

Loading…
Cancel
Save