mirror of https://github.com/grafana/loki
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
2297 lines
72 KiB
2297 lines
72 KiB
package chunkenc
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"encoding/binary"
|
|
"fmt"
|
|
"hash"
|
|
"math"
|
|
"math/rand"
|
|
"sort"
|
|
"strconv"
|
|
"strings"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/dustin/go-humanize"
|
|
"github.com/prometheus/prometheus/model/labels"
|
|
"github.com/stretchr/testify/assert"
|
|
"github.com/stretchr/testify/require"
|
|
|
|
"github.com/grafana/loki/pkg/push"
|
|
|
|
"github.com/grafana/loki/v3/pkg/chunkenc/testdata"
|
|
"github.com/grafana/loki/v3/pkg/compression"
|
|
"github.com/grafana/loki/v3/pkg/iter"
|
|
"github.com/grafana/loki/v3/pkg/logproto"
|
|
"github.com/grafana/loki/v3/pkg/logql/log"
|
|
"github.com/grafana/loki/v3/pkg/logql/syntax"
|
|
"github.com/grafana/loki/v3/pkg/logqlmodel/stats"
|
|
"github.com/grafana/loki/v3/pkg/storage/chunk"
|
|
"github.com/grafana/loki/v3/pkg/util/constants"
|
|
"github.com/grafana/loki/v3/pkg/util/filter"
|
|
)
|
|
|
|
var testEncodings = []compression.Codec{
|
|
compression.None,
|
|
compression.GZIP,
|
|
compression.LZ4_64k,
|
|
compression.LZ4_256k,
|
|
compression.LZ4_1M,
|
|
compression.LZ4_4M,
|
|
compression.Snappy,
|
|
compression.Flate,
|
|
compression.Zstd,
|
|
}
|
|
|
|
var (
|
|
testBlockSize = 256 * 1024
|
|
testTargetSize = 1500 * 1024
|
|
testBlockSizes = []int{64 * 1024, 256 * 1024, 512 * 1024}
|
|
countExtractor = func() log.StreamSampleExtractor {
|
|
ex, err := log.NewLineSampleExtractor(log.CountExtractor, nil, nil, false, false)
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
return ex.ForStream(labels.Labels{})
|
|
}()
|
|
bytesExtractor = func() log.StreamSampleExtractor {
|
|
ex, err := log.NewLineSampleExtractor(log.BytesExtractor, nil, nil, false, false)
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
return ex.ForStream(labels.Labels{})
|
|
}()
|
|
allPossibleFormats = []struct {
|
|
headBlockFmt HeadBlockFmt
|
|
chunkFormat byte
|
|
}{
|
|
{
|
|
headBlockFmt: OrderedHeadBlockFmt,
|
|
chunkFormat: ChunkFormatV2,
|
|
},
|
|
{
|
|
headBlockFmt: OrderedHeadBlockFmt,
|
|
chunkFormat: ChunkFormatV3,
|
|
},
|
|
{
|
|
headBlockFmt: UnorderedHeadBlockFmt,
|
|
chunkFormat: ChunkFormatV3,
|
|
},
|
|
{
|
|
headBlockFmt: UnorderedWithStructuredMetadataHeadBlockFmt,
|
|
chunkFormat: ChunkFormatV4,
|
|
},
|
|
}
|
|
)
|
|
|
|
const (
|
|
DefaultTestHeadBlockFmt = UnorderedWithStructuredMetadataHeadBlockFmt
|
|
lblPing = "ping"
|
|
lblPong = "pong"
|
|
)
|
|
|
|
func TestBlocksInclusive(t *testing.T) {
|
|
for _, enc := range testEncodings {
|
|
for _, format := range allPossibleFormats {
|
|
chunkfmt, headfmt := format.chunkFormat, format.headBlockFmt
|
|
chk := NewMemChunk(chunkfmt, enc, headfmt, testBlockSize, testTargetSize)
|
|
dup, err := chk.Append(logprotoEntry(1, "1"))
|
|
require.False(t, dup)
|
|
require.Nil(t, err)
|
|
err = chk.cut()
|
|
require.Nil(t, err)
|
|
|
|
blocks := chk.Blocks(time.Unix(0, 1), time.Unix(0, 1))
|
|
require.Equal(t, 1, len(blocks))
|
|
require.Equal(t, 1, blocks[0].Entries())
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestBlock(t *testing.T) {
|
|
for _, enc := range testEncodings {
|
|
for _, format := range allPossibleFormats {
|
|
chunkFormat, headBlockFmt := format.chunkFormat, format.headBlockFmt
|
|
t.Run(fmt.Sprintf("encoding:%v chunkFormat:%v headBlockFmt:%v", enc, chunkFormat, headBlockFmt), func(t *testing.T) {
|
|
t.Parallel()
|
|
chk := newMemChunkWithFormat(chunkFormat, enc, headBlockFmt, testBlockSize, testTargetSize)
|
|
cases := []struct {
|
|
ts int64
|
|
str string
|
|
bytes float64
|
|
lbs []logproto.LabelAdapter
|
|
cut bool
|
|
}{
|
|
{
|
|
ts: 1,
|
|
str: "hello, world!",
|
|
bytes: float64(len("hello, world!")),
|
|
},
|
|
{
|
|
ts: 2,
|
|
str: "hello, world2!",
|
|
bytes: float64(len("hello, world2!")),
|
|
lbs: []logproto.LabelAdapter{
|
|
{Name: "app", Value: "myapp"},
|
|
},
|
|
},
|
|
{
|
|
ts: 3,
|
|
str: "hello, world3!",
|
|
bytes: float64(len("hello, world3!")),
|
|
lbs: []logproto.LabelAdapter{
|
|
{Name: "a", Value: "a"},
|
|
{Name: "b", Value: "b"},
|
|
},
|
|
},
|
|
{
|
|
ts: 4,
|
|
str: "hello, world4!",
|
|
bytes: float64(len("hello, world4!")),
|
|
},
|
|
{
|
|
ts: 5,
|
|
str: "hello, world5!",
|
|
bytes: float64(len("hello, world5!")),
|
|
},
|
|
{
|
|
ts: 6,
|
|
str: "hello, world6!",
|
|
bytes: float64(len("hello, world6!")),
|
|
cut: true,
|
|
},
|
|
{
|
|
ts: 7,
|
|
str: "hello, world7!",
|
|
bytes: float64(len("hello, world7!")),
|
|
},
|
|
{
|
|
ts: 8,
|
|
str: "hello, worl\nd8!",
|
|
bytes: float64(len("hello, worl\nd8!")),
|
|
},
|
|
{
|
|
ts: 8,
|
|
str: "hello, world 8, 2!",
|
|
bytes: float64(len("hello, world 8, 2!")),
|
|
},
|
|
{
|
|
ts: 8,
|
|
str: "hello, world 8, 3!",
|
|
bytes: float64(len("hello, world 8, 3!")),
|
|
},
|
|
{
|
|
ts: 9,
|
|
str: "",
|
|
bytes: float64(len("")),
|
|
},
|
|
{
|
|
ts: 10,
|
|
str: "hello, world10!",
|
|
bytes: float64(len("hello, world10!")),
|
|
lbs: []logproto.LabelAdapter{
|
|
{Name: "a", Value: "a2"},
|
|
{Name: "b", Value: "b"},
|
|
},
|
|
},
|
|
}
|
|
|
|
for _, c := range cases {
|
|
dup, err := chk.Append(logprotoEntryWithStructuredMetadata(c.ts, c.str, c.lbs))
|
|
require.False(t, dup)
|
|
require.NoError(t, err)
|
|
if c.cut {
|
|
require.NoError(t, chk.cut())
|
|
}
|
|
}
|
|
|
|
noopStreamPipeline := log.NewNoopPipeline().ForStream(labels.Labels{})
|
|
|
|
it, err := chk.Iterator(context.Background(), time.Unix(0, 0), time.Unix(0, math.MaxInt64), logproto.FORWARD, noopStreamPipeline)
|
|
require.NoError(t, err)
|
|
|
|
idx := 0
|
|
for it.Next() {
|
|
e := it.At()
|
|
require.Equal(t, cases[idx].ts, e.Timestamp.UnixNano())
|
|
require.Equal(t, cases[idx].str, e.Line)
|
|
if chunkFormat < ChunkFormatV4 {
|
|
require.Equal(t, labels.EmptyLabels().String(), it.Labels())
|
|
require.Empty(t, e.StructuredMetadata)
|
|
} else {
|
|
if len(cases[idx].lbs) > 0 {
|
|
require.Equal(t, push.LabelsAdapter(cases[idx].lbs), e.StructuredMetadata)
|
|
}
|
|
|
|
expectedLabels := logproto.FromLabelAdaptersToLabels(cases[idx].lbs).String()
|
|
require.Equal(t, expectedLabels, it.Labels())
|
|
}
|
|
idx++
|
|
}
|
|
|
|
require.NoError(t, it.Err())
|
|
require.NoError(t, it.Close())
|
|
require.Equal(t, len(cases), idx)
|
|
|
|
sampleIt := chk.SampleIterator(context.Background(), time.Unix(0, 0), time.Unix(0, math.MaxInt64), countExtractor)
|
|
idx = 0
|
|
for sampleIt.Next() {
|
|
s := sampleIt.At()
|
|
require.Equal(t, cases[idx].ts, s.Timestamp)
|
|
require.Equal(t, 1., s.Value)
|
|
require.NotEmpty(t, s.Hash)
|
|
idx++
|
|
}
|
|
|
|
require.NoError(t, sampleIt.Err())
|
|
require.NoError(t, sampleIt.Close())
|
|
require.Equal(t, len(cases), idx)
|
|
t.Run("multi-extractor", func(t *testing.T) {
|
|
// Wrap extractors in variant extractors so they get a variant index we can use later for differentiating counts and bytes
|
|
extractors := []log.StreamSampleExtractor{
|
|
log.NewVariantsStreamSampleExtractorWrapper(0, countExtractor),
|
|
log.NewVariantsStreamSampleExtractorWrapper(1, bytesExtractor),
|
|
}
|
|
sampleIt = chk.SampleIterator(context.Background(), time.Unix(0, 0), time.Unix(0, math.MaxInt64), extractors...)
|
|
idx = 0
|
|
|
|
// variadic arguments can't guarantee order, so we're going to store the expected and actual values
|
|
// and do an ElementsMatch on them.
|
|
var actualCounts = make([]float64, 0, len(cases))
|
|
var actualBytes = make([]float64, 0, len(cases))
|
|
|
|
var expectedCounts = make([]float64, 0, len(cases))
|
|
var expectedBytes = make([]float64, 0, len(cases))
|
|
for _, c := range cases {
|
|
expectedCounts = append(expectedCounts, 1.)
|
|
expectedBytes = append(expectedBytes, c.bytes)
|
|
}
|
|
|
|
// 2 extractors, expect 2 samples per original timestamp
|
|
for sampleIt.Next() {
|
|
s := sampleIt.At()
|
|
require.Equal(t, cases[idx].ts, s.Timestamp)
|
|
require.NotEmpty(t, s.Hash)
|
|
lbls := sampleIt.Labels()
|
|
if strings.Contains(lbls, fmt.Sprintf(`%s="0"`, constants.VariantLabel)) {
|
|
actualCounts = append(actualCounts, s.Value)
|
|
} else {
|
|
actualBytes = append(actualBytes, s.Value)
|
|
}
|
|
|
|
require.True(t, sampleIt.Next())
|
|
s = sampleIt.At()
|
|
require.Equal(t, cases[idx].ts, s.Timestamp)
|
|
require.NotEmpty(t, s.Hash)
|
|
lbls = sampleIt.Labels()
|
|
if strings.Contains(lbls, fmt.Sprintf(`%s="0"`, constants.VariantLabel)) {
|
|
actualCounts = append(actualCounts, s.Value)
|
|
} else {
|
|
actualBytes = append(actualBytes, s.Value)
|
|
}
|
|
|
|
idx++
|
|
}
|
|
|
|
require.ElementsMatch(t, expectedCounts, actualCounts)
|
|
require.ElementsMatch(t, expectedBytes, actualBytes)
|
|
|
|
require.NoError(t, sampleIt.Err())
|
|
require.NoError(t, sampleIt.Close())
|
|
require.Equal(t, len(cases), idx)
|
|
})
|
|
|
|
t.Run("bounded-iteration", func(t *testing.T) {
|
|
it, err := chk.Iterator(context.Background(), time.Unix(0, 3), time.Unix(0, 7), logproto.FORWARD, noopStreamPipeline)
|
|
require.NoError(t, err)
|
|
|
|
idx := 2
|
|
for it.Next() {
|
|
e := it.At()
|
|
require.Equal(t, cases[idx].ts, e.Timestamp.UnixNano())
|
|
require.Equal(t, cases[idx].str, e.Line)
|
|
idx++
|
|
}
|
|
require.NoError(t, it.Err())
|
|
require.Equal(t, 6, idx)
|
|
})
|
|
})
|
|
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestCorruptChunk(t *testing.T) {
|
|
for _, enc := range testEncodings {
|
|
for _, format := range allPossibleFormats {
|
|
chunkfmt, headfmt := format.chunkFormat, format.headBlockFmt
|
|
|
|
t.Run(enc.String(), func(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
chk := NewMemChunk(chunkfmt, enc, headfmt, testBlockSize, testTargetSize)
|
|
cases := []struct {
|
|
data []byte
|
|
}{
|
|
// Data that should not decode as lines from a chunk in any encoding.
|
|
{data: []byte{0}},
|
|
{data: []byte{1}},
|
|
{data: []byte("asdfasdfasdfqwyteqwtyeq")},
|
|
}
|
|
|
|
ctx, start, end := context.Background(), time.Unix(0, 0), time.Unix(0, math.MaxInt64)
|
|
for i, c := range cases {
|
|
chk.blocks = []block{{b: c.data}}
|
|
noopStreamPipeline := log.NewNoopPipeline().ForStream(labels.Labels{})
|
|
it, err := chk.Iterator(ctx, start, end, logproto.FORWARD, noopStreamPipeline)
|
|
require.NoError(t, err, "case %d", i)
|
|
|
|
idx := 0
|
|
for it.Next() {
|
|
idx++
|
|
}
|
|
require.Error(t, it.Err(), "case %d", i)
|
|
require.NoError(t, it.Close())
|
|
}
|
|
})
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestReadFormatV1(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
c := NewMemChunk(ChunkFormatV3, compression.GZIP, DefaultTestHeadBlockFmt, testBlockSize, testTargetSize)
|
|
fillChunk(c)
|
|
// overrides to v1 for testing that specific version.
|
|
c.format = ChunkFormatV1
|
|
|
|
b, err := c.Bytes()
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
|
|
r, err := NewByteChunk(b, testBlockSize, testTargetSize)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
|
|
noopStreamPipeline := log.NewNoopPipeline().ForStream(labels.Labels{})
|
|
it, err := r.Iterator(context.Background(), time.Unix(0, 0), time.Unix(0, math.MaxInt64), logproto.FORWARD, noopStreamPipeline)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
|
|
i := int64(0)
|
|
for it.Next() {
|
|
require.Equal(t, i, it.At().Timestamp.UnixNano())
|
|
require.Equal(t, testdata.LogString(i), it.At().Line)
|
|
|
|
i++
|
|
}
|
|
}
|
|
|
|
// Test all encodings by populating a memchunk, serializing it,
|
|
// re-loading with NewByteChunk, serializing it again, and re-loading into via NewByteChunk once more.
|
|
// This tests the integrity of transfer between the following:
|
|
// 1) memory populated chunks <-> []byte loaded chunks
|
|
// 2) []byte loaded chunks <-> []byte loaded chunks
|
|
func TestRoundtripV2(t *testing.T) {
|
|
for _, testData := range allPossibleFormats {
|
|
for _, enc := range testEncodings {
|
|
t.Run(testNameWithFormats(enc, testData.chunkFormat, testData.headBlockFmt), func(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
c := newMemChunkWithFormat(testData.chunkFormat, enc, testData.headBlockFmt, testBlockSize, testTargetSize)
|
|
populated := fillChunk(c)
|
|
|
|
assertLines := func(c *MemChunk) {
|
|
require.Equal(t, enc, c.Encoding())
|
|
noopStreamPipeline := log.NewNoopPipeline().ForStream(labels.Labels{})
|
|
it, err := c.Iterator(context.Background(), time.Unix(0, 0), time.Unix(0, math.MaxInt64), logproto.FORWARD, noopStreamPipeline)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
|
|
i := int64(0)
|
|
var data int64
|
|
for it.Next() {
|
|
require.Equal(t, i, it.At().Timestamp.UnixNano())
|
|
require.Equal(t, testdata.LogString(i), it.At().Line)
|
|
|
|
data += int64(len(it.At().Line))
|
|
i++
|
|
}
|
|
require.Equal(t, populated, data)
|
|
}
|
|
|
|
assertLines(c)
|
|
|
|
// test MemChunk -> NewByteChunk loading
|
|
b, err := c.Bytes()
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
|
|
r, err := NewByteChunk(b, testBlockSize, testTargetSize)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
assertLines(r)
|
|
|
|
// test NewByteChunk -> NewByteChunk loading
|
|
rOut, err := r.Bytes()
|
|
require.Nil(t, err)
|
|
|
|
loaded, err := NewByteChunk(rOut, testBlockSize, testTargetSize)
|
|
require.Nil(t, err)
|
|
|
|
assertLines(loaded)
|
|
})
|
|
}
|
|
}
|
|
}
|
|
|
|
func testNameWithFormats(enc compression.Codec, chunkFormat byte, headBlockFmt HeadBlockFmt) string {
|
|
return fmt.Sprintf("encoding:%v chunkFormat:%v headBlockFmt:%v", enc, chunkFormat, headBlockFmt)
|
|
}
|
|
|
|
func TestRoundtripV3(t *testing.T) {
|
|
for _, enc := range testEncodings {
|
|
for _, format := range allPossibleFormats {
|
|
chunkfmt, headfmt := format.chunkFormat, format.headBlockFmt
|
|
t.Run(fmt.Sprintf("%v-%v", format, enc), func(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
c := NewMemChunk(chunkfmt, enc, headfmt, testBlockSize, testTargetSize)
|
|
_ = fillChunk(c)
|
|
|
|
b, err := c.Bytes()
|
|
require.Nil(t, err)
|
|
r, err := NewByteChunk(b, testBlockSize, testTargetSize)
|
|
require.Nil(t, err)
|
|
|
|
b2, err := r.Bytes()
|
|
require.Nil(t, err)
|
|
require.Equal(t, b, b2)
|
|
})
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestSerialization(t *testing.T) {
|
|
for _, testData := range allPossibleFormats {
|
|
for _, enc := range testEncodings {
|
|
// run tests with and without structured metadata since it is optional
|
|
for _, appendWithStructuredMetadata := range []bool{false, true} {
|
|
testName := testNameWithFormats(enc, testData.chunkFormat, testData.headBlockFmt)
|
|
if appendWithStructuredMetadata {
|
|
testName = fmt.Sprintf("%s - append structured metadata", testName)
|
|
} else {
|
|
testName = fmt.Sprintf("%s - without structured metadata", testName)
|
|
}
|
|
t.Run(testName, func(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
chk := NewMemChunk(testData.chunkFormat, enc, testData.headBlockFmt, testBlockSize, testTargetSize)
|
|
chk.format = testData.chunkFormat
|
|
numSamples := 50000
|
|
var entry *logproto.Entry
|
|
|
|
for i := 0; i < numSamples; i++ {
|
|
entry = logprotoEntry(int64(i), strconv.Itoa(i))
|
|
if appendWithStructuredMetadata {
|
|
entry.StructuredMetadata = []logproto.LabelAdapter{{Name: "foo", Value: strconv.Itoa(i)}}
|
|
}
|
|
dup, err := chk.Append(entry)
|
|
require.False(t, dup)
|
|
require.NoError(t, err)
|
|
}
|
|
require.NoError(t, chk.Close())
|
|
|
|
byt, err := chk.Bytes()
|
|
require.NoError(t, err)
|
|
|
|
bc, err := NewByteChunk(byt, testBlockSize, testTargetSize)
|
|
require.NoError(t, err)
|
|
|
|
it, err := bc.Iterator(context.Background(), time.Unix(0, 0), time.Unix(0, math.MaxInt64), logproto.FORWARD, log.NewNoopPipeline().ForStream(labels.Labels{}))
|
|
require.NoError(t, err)
|
|
for i := 0; i < numSamples; i++ {
|
|
require.True(t, it.Next())
|
|
|
|
e := it.At()
|
|
require.Equal(t, int64(i), e.Timestamp.UnixNano())
|
|
require.Equal(t, strconv.Itoa(i), e.Line)
|
|
if appendWithStructuredMetadata && testData.chunkFormat >= ChunkFormatV4 {
|
|
require.Equal(t, labels.FromStrings("foo", strconv.Itoa(i)).String(), it.Labels())
|
|
require.Equal(t, labels.FromStrings("foo", strconv.Itoa(i)), logproto.FromLabelAdaptersToLabels(e.StructuredMetadata))
|
|
} else {
|
|
require.Equal(t, labels.EmptyLabels().String(), it.Labels())
|
|
require.Nil(t, e.StructuredMetadata)
|
|
}
|
|
}
|
|
require.NoError(t, it.Err())
|
|
|
|
countExtractor := func() log.StreamSampleExtractor {
|
|
ex, err := log.NewLineSampleExtractor(log.CountExtractor, nil, nil, false, false)
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
return ex.ForStream(labels.Labels{})
|
|
}()
|
|
extractors := []log.StreamSampleExtractor{countExtractor, countExtractor}
|
|
|
|
sampleIt := bc.SampleIterator(context.Background(), time.Unix(0, 0), time.Unix(0, math.MaxInt64), extractors...)
|
|
for i := 0; i < numSamples; i++ {
|
|
require.True(t, sampleIt.Next(), i)
|
|
|
|
s := sampleIt.At()
|
|
require.Equal(t, int64(i), s.Timestamp)
|
|
require.Equal(t, 1., s.Value)
|
|
if appendWithStructuredMetadata && testData.chunkFormat >= ChunkFormatV4 {
|
|
require.Equal(t, labels.FromStrings("foo", strconv.Itoa(i)).String(), sampleIt.Labels())
|
|
} else {
|
|
require.Equal(t, labels.EmptyLabels().String(), sampleIt.Labels())
|
|
}
|
|
|
|
// check that the second extractor is returning samples as well
|
|
require.True(t, sampleIt.Next())
|
|
s = sampleIt.At()
|
|
require.Equal(t, int64(i), s.Timestamp)
|
|
require.Equal(t, 1., s.Value)
|
|
}
|
|
require.NoError(t, sampleIt.Err())
|
|
|
|
byt2, err := chk.Bytes()
|
|
require.NoError(t, err)
|
|
|
|
require.True(t, bytes.Equal(byt, byt2))
|
|
})
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestChunkFilling(t *testing.T) {
|
|
for _, testData := range allPossibleFormats {
|
|
for _, enc := range testEncodings {
|
|
t.Run(testNameWithFormats(enc, testData.chunkFormat, testData.headBlockFmt), func(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
chk := newMemChunkWithFormat(testData.chunkFormat, enc, testData.headBlockFmt, testBlockSize, 0)
|
|
chk.blockSize = 1024
|
|
|
|
// We should be able to append only 10KB of logs.
|
|
maxBytes := chk.blockSize * blocksPerChunk
|
|
lineSize := 512
|
|
lines := maxBytes / lineSize
|
|
|
|
logLine := string(make([]byte, lineSize))
|
|
entry := &logproto.Entry{
|
|
Timestamp: time.Unix(0, 0),
|
|
Line: logLine,
|
|
}
|
|
|
|
i := int64(0)
|
|
for ; chk.SpaceFor(entry) && i < 30; i++ {
|
|
entry.Timestamp = time.Unix(0, i)
|
|
dup, err := chk.Append(entry)
|
|
require.False(t, dup)
|
|
require.NoError(t, err)
|
|
}
|
|
|
|
require.Equal(t, int64(lines), i)
|
|
|
|
noopStreamPipeline := log.NewNoopPipeline().ForStream(labels.Labels{})
|
|
it, err := chk.Iterator(context.Background(), time.Unix(0, 0), time.Unix(0, 100), logproto.FORWARD, noopStreamPipeline)
|
|
require.NoError(t, err)
|
|
i = 0
|
|
for it.Next() {
|
|
entry := it.At()
|
|
require.Equal(t, i, entry.Timestamp.UnixNano())
|
|
i++
|
|
}
|
|
|
|
require.Equal(t, int64(lines), i)
|
|
})
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestGZIPChunkTargetSize(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
chk := NewMemChunk(ChunkFormatV3, compression.GZIP, DefaultTestHeadBlockFmt, testBlockSize, testTargetSize)
|
|
|
|
lineSize := 512
|
|
entry := &logproto.Entry{
|
|
Timestamp: time.Unix(0, 0),
|
|
Line: "",
|
|
}
|
|
|
|
// Use a random number to generate random log data, otherwise the gzip compression is way too good
|
|
// and the following loop has to run waaayyyyy to many times
|
|
// Using the same seed should guarantee the same random numbers and same test data.
|
|
r := rand.New(rand.NewSource(99))
|
|
|
|
i := int64(0)
|
|
|
|
for ; chk.SpaceFor(entry) && i < 5000; i++ {
|
|
logLine := make([]byte, lineSize)
|
|
for j := range logLine {
|
|
logLine[j] = byte(r.Int())
|
|
}
|
|
entry = &logproto.Entry{
|
|
Timestamp: time.Unix(0, 0),
|
|
Line: string(logLine),
|
|
}
|
|
entry.Timestamp = time.Unix(0, i)
|
|
dup, err := chk.Append(entry)
|
|
require.False(t, dup)
|
|
require.NoError(t, err)
|
|
}
|
|
|
|
// 5000 is a limit ot make sure the test doesn't run away, we shouldn't need this many log lines to make 1MB chunk
|
|
require.NotEqual(t, 5000, i)
|
|
|
|
require.NoError(t, chk.Close())
|
|
|
|
require.Equal(t, 0, chk.head.UncompressedSize())
|
|
|
|
// Even though the seed is static above and results should be deterministic,
|
|
// we will allow +/- 10% variance
|
|
minSize := int(float64(testTargetSize) * 0.9)
|
|
maxSize := int(float64(testTargetSize) * 1.1)
|
|
require.Greater(t, chk.CompressedSize(), minSize)
|
|
require.Less(t, chk.CompressedSize(), maxSize)
|
|
|
|
// Also verify our utilization is close to 1.0
|
|
ut := chk.Utilization()
|
|
require.Greater(t, ut, 0.99)
|
|
require.Less(t, ut, 1.01)
|
|
}
|
|
|
|
func TestMemChunk_AppendOutOfOrder(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
type tester func(t *testing.T, chk *MemChunk)
|
|
|
|
tests := map[string]tester{
|
|
"append out of order in the same block": func(t *testing.T, chk *MemChunk) {
|
|
dup, err := chk.Append(logprotoEntry(5, "test"))
|
|
assert.False(t, dup)
|
|
assert.NoError(t, err)
|
|
dup, err = chk.Append(logprotoEntry(6, "test"))
|
|
assert.False(t, dup)
|
|
assert.NoError(t, err)
|
|
|
|
if chk.headFmt == OrderedHeadBlockFmt {
|
|
dup, err = chk.Append(logprotoEntry(1, "test"))
|
|
assert.EqualError(t, err, ErrOutOfOrder.Error())
|
|
assert.False(t, dup)
|
|
} else {
|
|
dup, err = chk.Append(logprotoEntry(1, "test"))
|
|
assert.False(t, dup)
|
|
assert.NoError(t, err)
|
|
}
|
|
},
|
|
"append out of order in a new block right after cutting the previous one": func(t *testing.T, chk *MemChunk) {
|
|
dup, err := chk.Append(logprotoEntry(5, "test"))
|
|
assert.False(t, dup)
|
|
assert.NoError(t, err)
|
|
dup, err = chk.Append(logprotoEntry(6, "test"))
|
|
assert.False(t, dup)
|
|
assert.NoError(t, err)
|
|
assert.NoError(t, chk.cut())
|
|
|
|
if chk.headFmt == OrderedHeadBlockFmt {
|
|
dup, err = chk.Append(logprotoEntry(1, "test"))
|
|
assert.False(t, dup)
|
|
assert.EqualError(t, err, ErrOutOfOrder.Error())
|
|
} else {
|
|
dup, err = chk.Append(logprotoEntry(1, "test"))
|
|
assert.False(t, dup)
|
|
assert.NoError(t, err)
|
|
}
|
|
},
|
|
"append out of order in a new block after multiple cuts": func(t *testing.T, chk *MemChunk) {
|
|
dup, err := chk.Append(logprotoEntry(5, "test"))
|
|
assert.False(t, dup)
|
|
assert.NoError(t, err)
|
|
assert.NoError(t, chk.cut())
|
|
|
|
dup, err = chk.Append(logprotoEntry(6, "test"))
|
|
assert.False(t, dup)
|
|
assert.NoError(t, err)
|
|
assert.NoError(t, chk.cut())
|
|
|
|
if chk.headFmt == OrderedHeadBlockFmt {
|
|
dup, err = chk.Append(logprotoEntry(1, "test"))
|
|
assert.False(t, dup)
|
|
assert.EqualError(t, err, ErrOutOfOrder.Error())
|
|
} else {
|
|
dup, err = chk.Append(logprotoEntry(1, "test"))
|
|
assert.False(t, dup)
|
|
assert.NoError(t, err)
|
|
}
|
|
},
|
|
}
|
|
|
|
for _, f := range HeadBlockFmts {
|
|
for testName, tester := range tests {
|
|
t.Run(testName, func(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
tester(t, NewMemChunk(ChunkFormatV3, compression.GZIP, f, testBlockSize, testTargetSize))
|
|
})
|
|
}
|
|
}
|
|
}
|
|
|
|
func BenchmarkEncodingsAndChunkSize(b *testing.B) {
|
|
type res struct {
|
|
name string
|
|
count uint64
|
|
size uint64
|
|
compressedSize uint64
|
|
ratio float64
|
|
}
|
|
var result []res
|
|
|
|
resBuffer := make([]byte, 0, 50*1024*1024)
|
|
for _, enc := range testEncodings {
|
|
for _, bs := range testBlockSizes {
|
|
for fi, f := range allPossibleFormats {
|
|
name := fmt.Sprintf("%s_block_size_%s_format_%d", enc.String(), humanize.Bytes(uint64(bs)), fi)
|
|
b.Run(name, func(b *testing.B) {
|
|
var insertedTotal, compressedTotal, count uint64
|
|
for range b.N {
|
|
c := newMemChunkWithFormat(f.chunkFormat, enc, f.headBlockFmt, bs, testTargetSize)
|
|
inserted := fillChunk(c)
|
|
insertedTotal += uint64(inserted)
|
|
cb, err := c.BytesWith(resBuffer)
|
|
if err != nil {
|
|
b.Fatal(err)
|
|
}
|
|
compressedTotal += uint64(len(cb))
|
|
count++
|
|
}
|
|
|
|
averageRatio := float64(insertedTotal) / float64(compressedTotal)
|
|
result = append(result, res{
|
|
name: name,
|
|
count: count,
|
|
size: insertedTotal,
|
|
compressedSize: compressedTotal,
|
|
ratio: averageRatio,
|
|
})
|
|
b.ReportMetric(averageRatio, "compression_ratio")
|
|
b.ReportMetric(float64(insertedTotal)/float64(count*1024), "avg_size_kb")
|
|
b.ReportMetric(float64(compressedTotal)/float64(count*1024), "avg_compressed_size_kb")
|
|
})
|
|
}
|
|
}
|
|
}
|
|
sort.Slice(result, func(i, j int) bool {
|
|
return result[i].ratio > result[j].ratio
|
|
})
|
|
fmt.Printf("%s\t%s\t%s\t%s\t%s\n", "name", "count", "uncompressed", "compressed", "ratio")
|
|
for _, r := range result {
|
|
fmt.Printf("%s\t(count %d)\n%s\t%s\t%f\n", r.name, r.count, humanize.Bytes(r.size/r.count), humanize.Bytes(r.compressedSize/r.count), r.ratio)
|
|
}
|
|
}
|
|
|
|
func TestChunkStats(t *testing.T) {
|
|
c := NewMemChunk(ChunkFormatV4, compression.Snappy, DefaultTestHeadBlockFmt, testBlockSize, 0)
|
|
first := time.Now()
|
|
entry := &logproto.Entry{
|
|
Timestamp: first,
|
|
Line: `ts=2020-03-16T13:58:33.459Z caller=dedupe.go:112 component=remote level=debug remote_name=3ea44a url=https:/blan.goo.net/api/prom/push msg=QueueManager.updateShardsLoop lowerBound=45.5 desiredShards=56.724401194003136 upperBound=84.5`,
|
|
}
|
|
inserted := 0
|
|
// fill the chunk with known data size.
|
|
for {
|
|
if !c.SpaceFor(entry) {
|
|
break
|
|
}
|
|
if _, err := c.Append(entry); err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
inserted++
|
|
entry.Timestamp = entry.Timestamp.Add(time.Nanosecond)
|
|
}
|
|
// For each entry: timestamp <varint>, line size <varint>, line <bytes>, num of labels in structured metadata <varint>
|
|
expectedSize := inserted * (len(entry.Line) + 3*binary.MaxVarintLen64)
|
|
statsCtx, ctx := stats.NewContext(context.Background())
|
|
|
|
noopStreamPipeline := log.NewNoopPipeline().ForStream(labels.Labels{})
|
|
it, err := c.Iterator(ctx, first.Add(-time.Hour), entry.Timestamp.Add(time.Hour), logproto.BACKWARD, noopStreamPipeline)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
//nolint:revive
|
|
for it.Next() {
|
|
}
|
|
if err := it.Close(); err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
// test on a chunk filling up
|
|
s := statsCtx.Result(time.Since(first), 0, 0)
|
|
require.Equal(t, int64(expectedSize), s.Summary.TotalBytesProcessed)
|
|
require.Equal(t, int64(inserted), s.Summary.TotalLinesProcessed)
|
|
|
|
require.Equal(t, int64(expectedSize), s.TotalDecompressedBytes())
|
|
require.Equal(t, int64(inserted), s.TotalDecompressedLines())
|
|
|
|
b, err := c.Bytes()
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
|
|
// test on a new chunk.
|
|
cb, err := NewByteChunk(b, testBlockSize, testTargetSize)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
statsCtx, ctx = stats.NewContext(context.Background())
|
|
it, err = cb.Iterator(ctx, first.Add(-time.Hour), entry.Timestamp.Add(time.Hour), logproto.BACKWARD, noopStreamPipeline)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
//nolint:revive
|
|
for it.Next() {
|
|
}
|
|
if err := it.Close(); err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
s = statsCtx.Result(time.Since(first), 0, 0)
|
|
require.Equal(t, int64(expectedSize), s.Summary.TotalBytesProcessed)
|
|
require.Equal(t, int64(inserted), s.Summary.TotalLinesProcessed)
|
|
|
|
require.Equal(t, int64(expectedSize), s.TotalDecompressedBytes())
|
|
require.Equal(t, int64(inserted), s.TotalDecompressedLines())
|
|
}
|
|
|
|
func TestIteratorClose(t *testing.T) {
|
|
for _, f := range allPossibleFormats {
|
|
for _, enc := range testEncodings {
|
|
t.Run(enc.String(), func(t *testing.T) {
|
|
for _, test := range []func(iter iter.EntryIterator, t *testing.T){
|
|
func(iter iter.EntryIterator, t *testing.T) {
|
|
// close without iterating
|
|
if err := iter.Close(); err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
},
|
|
func(iter iter.EntryIterator, t *testing.T) {
|
|
// close after iterating
|
|
for iter.Next() {
|
|
_ = iter.At()
|
|
}
|
|
if err := iter.Close(); err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
},
|
|
func(iter iter.EntryIterator, t *testing.T) {
|
|
// close after a single iteration
|
|
iter.Next()
|
|
_ = iter.At()
|
|
if err := iter.Close(); err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
},
|
|
} {
|
|
c := newMemChunkWithFormat(f.chunkFormat, enc, f.headBlockFmt, testBlockSize, testTargetSize)
|
|
inserted := fillChunk(c)
|
|
noopStreamPipeline := log.NewNoopPipeline().ForStream(labels.Labels{})
|
|
iter, err := c.Iterator(context.Background(), time.Unix(0, 0), time.Unix(0, inserted), logproto.BACKWARD, noopStreamPipeline)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
test(iter, t)
|
|
}
|
|
})
|
|
}
|
|
}
|
|
}
|
|
|
|
func BenchmarkWrite(b *testing.B) {
|
|
entry := &logproto.Entry{
|
|
Timestamp: time.Unix(0, 0),
|
|
Line: testdata.LogString(0),
|
|
}
|
|
i := int64(0)
|
|
|
|
for _, f := range HeadBlockFmts {
|
|
for _, enc := range testEncodings {
|
|
for _, withStructuredMetadata := range []bool{false, true} {
|
|
name := fmt.Sprintf("%v-%v", f, enc)
|
|
if withStructuredMetadata {
|
|
name += "-withStructuredMetadata"
|
|
}
|
|
b.Run(name, func(b *testing.B) {
|
|
uncompressedBytes, compressedBytes := 0, 0
|
|
for n := 0; n < b.N; n++ {
|
|
c := NewMemChunk(ChunkFormatV3, enc, f, testBlockSize, testTargetSize)
|
|
// adds until full so we trigger cut which serialize using gzip
|
|
for c.SpaceFor(entry) {
|
|
_, _ = c.Append(entry)
|
|
entry.Timestamp = time.Unix(0, i)
|
|
entry.Line = testdata.LogString(i)
|
|
if withStructuredMetadata {
|
|
entry.StructuredMetadata = []logproto.LabelAdapter{
|
|
{Name: "foo", Value: fmt.Sprint(i)},
|
|
}
|
|
}
|
|
i++
|
|
}
|
|
uncompressedBytes += c.UncompressedSize()
|
|
compressedBytes += c.CompressedSize()
|
|
}
|
|
b.SetBytes(int64(uncompressedBytes) / int64(b.N))
|
|
b.ReportMetric(float64(compressedBytes)/float64(uncompressedBytes)*100, "%compressed")
|
|
})
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
type nomatchPipeline struct{}
|
|
|
|
func (nomatchPipeline) BaseLabels() log.LabelsResult { return log.EmptyLabelsResult }
|
|
func (nomatchPipeline) Process(_ int64, line []byte, _ labels.Labels) ([]byte, log.LabelsResult, bool) {
|
|
return line, nil, false
|
|
}
|
|
|
|
func (nomatchPipeline) ProcessString(_ int64, line string, _ labels.Labels) (string, log.LabelsResult, bool) {
|
|
return line, nil, false
|
|
}
|
|
|
|
func (nomatchPipeline) ReferencedStructuredMetadata() bool {
|
|
return false
|
|
}
|
|
|
|
func BenchmarkRead(b *testing.B) {
|
|
for _, bs := range testBlockSizes {
|
|
for _, enc := range testEncodings {
|
|
name := fmt.Sprintf("%s_%s", enc.String(), humanize.Bytes(uint64(bs)))
|
|
b.Run(name, func(b *testing.B) {
|
|
chunks, size := generateData(enc, 5, bs, testTargetSize)
|
|
_, ctx := stats.NewContext(context.Background())
|
|
b.ResetTimer()
|
|
for n := 0; n < b.N; n++ {
|
|
for _, c := range chunks {
|
|
// use forward iterator for benchmark -- backward iterator does extra allocations by keeping entries in memory
|
|
iterator, err := c.Iterator(ctx, time.Unix(0, 0), time.Now(), logproto.FORWARD, nomatchPipeline{})
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
for iterator.Next() {
|
|
_ = iterator.At()
|
|
}
|
|
if err := iterator.Close(); err != nil {
|
|
b.Fatal(err)
|
|
}
|
|
}
|
|
}
|
|
b.SetBytes(int64(size))
|
|
})
|
|
}
|
|
}
|
|
|
|
for _, bs := range testBlockSizes {
|
|
for _, enc := range testEncodings {
|
|
name := fmt.Sprintf("sample_%s_%s", enc.String(), humanize.Bytes(uint64(bs)))
|
|
b.Run(name, func(b *testing.B) {
|
|
chunks, size := generateData(enc, 5, bs, testTargetSize)
|
|
_, ctx := stats.NewContext(context.Background())
|
|
b.ResetTimer()
|
|
bytesRead := uint64(0)
|
|
for n := 0; n < b.N; n++ {
|
|
for _, c := range chunks {
|
|
iterator := c.SampleIterator(ctx, time.Unix(0, 0), time.Now(), countExtractor)
|
|
for iterator.Next() {
|
|
_ = iterator.At()
|
|
}
|
|
if err := iterator.Close(); err != nil {
|
|
b.Fatal(err)
|
|
}
|
|
}
|
|
bytesRead += size
|
|
}
|
|
b.SetBytes(int64(bytesRead) / int64(b.N))
|
|
})
|
|
}
|
|
}
|
|
}
|
|
|
|
type noopTestPipeline struct{}
|
|
|
|
func (noopTestPipeline) BaseLabels() log.LabelsResult { return log.EmptyLabelsResult }
|
|
func (noopTestPipeline) Process(_ int64, line []byte, _ labels.Labels) ([]byte, log.LabelsResult, bool) {
|
|
return line, nil, false
|
|
}
|
|
|
|
func (noopTestPipeline) ProcessString(_ int64, line string, _ labels.Labels) (string, log.LabelsResult, bool) {
|
|
return line, nil, false
|
|
}
|
|
|
|
func (noopTestPipeline) ReferencedStructuredMetadata() bool {
|
|
return false
|
|
}
|
|
|
|
func BenchmarkBackwardIterator(b *testing.B) {
|
|
for _, bs := range testBlockSizes {
|
|
b.Run(humanize.Bytes(uint64(bs)), func(b *testing.B) {
|
|
b.ReportAllocs()
|
|
c := NewMemChunk(ChunkFormatV4, compression.Snappy, DefaultTestHeadBlockFmt, bs, testTargetSize)
|
|
_ = fillChunk(c)
|
|
b.ResetTimer()
|
|
for n := 0; n < b.N; n++ {
|
|
noop := noopTestPipeline{}
|
|
iterator, err := c.Iterator(context.Background(), time.Unix(0, 0), time.Now(), logproto.BACKWARD, noop)
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
for iterator.Next() {
|
|
_ = iterator.At()
|
|
}
|
|
if err := iterator.Close(); err != nil {
|
|
b.Fatal(err)
|
|
}
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestGenerateDataSize(t *testing.T) {
|
|
for _, enc := range testEncodings {
|
|
t.Run(enc.String(), func(t *testing.T) {
|
|
chunks, size := generateData(enc, 50, testBlockSize, testTargetSize)
|
|
|
|
bytesRead := uint64(0)
|
|
for _, c := range chunks {
|
|
noopStreamPipeline := log.NewNoopPipeline().ForStream(labels.Labels{})
|
|
// use forward iterator for benchmark -- backward iterator does extra allocations by keeping entries in memory
|
|
iterator, err := c.Iterator(context.TODO(), time.Unix(0, 0), time.Now(), logproto.FORWARD, noopStreamPipeline)
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
for iterator.Next() {
|
|
e := iterator.At()
|
|
bytesRead += uint64(len(e.Line))
|
|
}
|
|
if err := iterator.Close(); err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
}
|
|
|
|
require.Equal(t, size, bytesRead)
|
|
})
|
|
}
|
|
}
|
|
|
|
func BenchmarkHeadBlockIterator(b *testing.B) {
|
|
for _, j := range []int{100000, 50000, 15000, 10000} {
|
|
for _, withStructuredMetadata := range []bool{false, true} {
|
|
b.Run(fmt.Sprintf("size=%d structuredMetadata=%v", j, withStructuredMetadata), func(b *testing.B) {
|
|
h := headBlock{}
|
|
|
|
var structuredMetadata labels.Labels
|
|
if withStructuredMetadata {
|
|
structuredMetadata = labels.FromStrings("foo", "foo")
|
|
}
|
|
|
|
for i := 0; i < j; i++ {
|
|
if _, err := h.Append(int64(i), "this is the append string", structuredMetadata); err != nil {
|
|
b.Fatal(err)
|
|
}
|
|
}
|
|
|
|
b.ResetTimer()
|
|
|
|
for n := 0; n < b.N; n++ {
|
|
noopStreamPipeline := log.NewNoopPipeline().ForStream(labels.Labels{})
|
|
iter := h.Iterator(context.Background(), logproto.BACKWARD, 0, math.MaxInt64, noopStreamPipeline)
|
|
|
|
for iter.Next() {
|
|
_ = iter.At()
|
|
}
|
|
}
|
|
})
|
|
}
|
|
}
|
|
}
|
|
|
|
func BenchmarkHeadBlockSampleIterator(b *testing.B) {
|
|
for _, j := range []int{20000, 10000, 8000, 5000} {
|
|
for _, withStructuredMetadata := range []bool{false, true} {
|
|
b.Run(fmt.Sprintf("size=%d structuredMetadata=%v", j, withStructuredMetadata), func(b *testing.B) {
|
|
h := headBlock{}
|
|
|
|
var structuredMetadata labels.Labels
|
|
if withStructuredMetadata {
|
|
structuredMetadata = labels.FromStrings("foo", "foo")
|
|
}
|
|
|
|
for i := 0; i < j; i++ {
|
|
if _, err := h.Append(int64(i), "this is the append string", structuredMetadata); err != nil {
|
|
b.Fatal(err)
|
|
}
|
|
}
|
|
|
|
b.ResetTimer()
|
|
|
|
for n := 0; n < b.N; n++ {
|
|
iter := h.SampleIterator(context.Background(), 0, math.MaxInt64, countExtractor)
|
|
|
|
for iter.Next() {
|
|
_ = iter.At()
|
|
}
|
|
iter.Close()
|
|
}
|
|
})
|
|
}
|
|
}
|
|
}
|
|
|
|
func BenchmarkHeadBlockSampleIterator_WithMultipleExtractors(b *testing.B) {
|
|
for _, j := range []int{20000, 10000, 8000, 5000} {
|
|
for _, withStructuredMetadata := range []bool{false, true} {
|
|
b.Run(fmt.Sprintf("size=%d structuredMetadata=%v", j, withStructuredMetadata), func(b *testing.B) {
|
|
h := headBlock{}
|
|
|
|
var structuredMetadata labels.Labels
|
|
if withStructuredMetadata {
|
|
structuredMetadata = labels.FromStrings("foo", "foo")
|
|
}
|
|
|
|
for i := 0; i < j; i++ {
|
|
if _, err := h.Append(int64(i), "this is the append string", structuredMetadata); err != nil {
|
|
b.Fatal(err)
|
|
}
|
|
}
|
|
|
|
b.ResetTimer()
|
|
|
|
for n := 0; n < b.N; n++ {
|
|
iter := h.SampleIterator(context.Background(), 0, math.MaxInt64, countExtractor, bytesExtractor)
|
|
|
|
for iter.Next() {
|
|
_ = iter.At()
|
|
}
|
|
iter.Close()
|
|
}
|
|
})
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestMemChunk_IteratorBounds(t *testing.T) {
|
|
createChunk := func() *MemChunk {
|
|
t.Helper()
|
|
c := NewMemChunk(ChunkFormatV3, compression.None, DefaultTestHeadBlockFmt, 1e6, 1e6)
|
|
|
|
if _, err := c.Append(&logproto.Entry{
|
|
Timestamp: time.Unix(0, 1),
|
|
Line: "1",
|
|
}); err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
if _, err := c.Append(&logproto.Entry{
|
|
Timestamp: time.Unix(0, 2),
|
|
Line: "2",
|
|
}); err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
return c
|
|
}
|
|
|
|
for _, tt := range []struct {
|
|
mint, maxt time.Time
|
|
direction logproto.Direction
|
|
expect []bool // array of expected values for next call in sequence
|
|
}{
|
|
{time.Unix(0, 0), time.Unix(0, 1), logproto.FORWARD, []bool{false}},
|
|
{time.Unix(0, 1), time.Unix(0, 2), logproto.FORWARD, []bool{true, false}},
|
|
{time.Unix(0, 1), time.Unix(0, 3), logproto.FORWARD, []bool{true, true, false}},
|
|
{time.Unix(0, 2), time.Unix(0, 3), logproto.FORWARD, []bool{true, false}},
|
|
|
|
{time.Unix(0, 0), time.Unix(0, 1), logproto.BACKWARD, []bool{false}},
|
|
{time.Unix(0, 1), time.Unix(0, 2), logproto.BACKWARD, []bool{true, false}},
|
|
{time.Unix(0, 1), time.Unix(0, 3), logproto.BACKWARD, []bool{true, true, false}},
|
|
{time.Unix(0, 2), time.Unix(0, 3), logproto.BACKWARD, []bool{true, false}},
|
|
} {
|
|
t.Run(
|
|
fmt.Sprintf("mint:%d,maxt:%d,direction:%s", tt.mint.UnixNano(), tt.maxt.UnixNano(), tt.direction),
|
|
func(t *testing.T) {
|
|
c := createChunk()
|
|
|
|
noopStreamPipeline := log.NewNoopPipeline().ForStream(labels.Labels{})
|
|
// testing headchunk
|
|
it, err := c.Iterator(context.Background(), tt.mint, tt.maxt, tt.direction, noopStreamPipeline)
|
|
require.NoError(t, err)
|
|
for idx, expected := range tt.expect {
|
|
require.Equal(t, expected, it.Next(), "idx: %s", idx)
|
|
}
|
|
require.NoError(t, it.Close())
|
|
|
|
// testing chunk blocks
|
|
require.NoError(t, c.cut())
|
|
it, err = c.Iterator(context.Background(), tt.mint, tt.maxt, tt.direction, noopStreamPipeline)
|
|
require.NoError(t, err)
|
|
for i := range tt.expect {
|
|
require.Equal(t, tt.expect[i], it.Next())
|
|
}
|
|
require.NoError(t, it.Close())
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestMemchunkLongLine(t *testing.T) {
|
|
for _, enc := range testEncodings {
|
|
t.Run(enc.String(), func(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
c := NewMemChunk(ChunkFormatV3, enc, DefaultTestHeadBlockFmt, testBlockSize, testTargetSize)
|
|
for i := 1; i <= 10; i++ {
|
|
dup, err := c.Append(&logproto.Entry{Timestamp: time.Unix(0, int64(i)), Line: strings.Repeat("e", 200000)})
|
|
require.False(t, dup)
|
|
require.NoError(t, err)
|
|
}
|
|
noopStreamPipeline := log.NewNoopPipeline().ForStream(labels.Labels{})
|
|
it, err := c.Iterator(context.Background(), time.Unix(0, 0), time.Unix(0, 100), logproto.FORWARD, noopStreamPipeline)
|
|
require.NoError(t, err)
|
|
for i := 1; i <= 10; i++ {
|
|
require.True(t, it.Next())
|
|
}
|
|
require.False(t, it.Next())
|
|
})
|
|
}
|
|
}
|
|
|
|
// Ensure passing a reusable []byte doesn't affect output
|
|
func TestBytesWith(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
exp, err := NewMemChunk(ChunkFormatV3, compression.None, DefaultTestHeadBlockFmt, testBlockSize, testTargetSize).BytesWith(nil)
|
|
require.Nil(t, err)
|
|
out, err := NewMemChunk(ChunkFormatV3, compression.None, DefaultTestHeadBlockFmt, testBlockSize, testTargetSize).BytesWith([]byte{1, 2, 3})
|
|
require.Nil(t, err)
|
|
|
|
require.Equal(t, exp, out)
|
|
}
|
|
|
|
func TestCheckpointEncoding(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
blockSize, targetSize := 256*1024, 1500*1024
|
|
for _, f := range allPossibleFormats {
|
|
t.Run(testNameWithFormats(compression.Snappy, f.chunkFormat, f.headBlockFmt), func(t *testing.T) {
|
|
c := newMemChunkWithFormat(f.chunkFormat, compression.Snappy, f.headBlockFmt, blockSize, targetSize)
|
|
|
|
// add a few entries
|
|
for i := 0; i < 5; i++ {
|
|
entry := &logproto.Entry{
|
|
Timestamp: time.Unix(int64(i), 0),
|
|
Line: fmt.Sprintf("hi there - %d", i),
|
|
StructuredMetadata: push.LabelsAdapter{{
|
|
Name: fmt.Sprintf("name%d", i),
|
|
Value: fmt.Sprintf("val%d", i),
|
|
}},
|
|
}
|
|
require.Equal(t, true, c.SpaceFor(entry))
|
|
dup, err := c.Append(entry)
|
|
require.False(t, dup)
|
|
require.Nil(t, err)
|
|
}
|
|
|
|
// cut it
|
|
require.Nil(t, c.cut())
|
|
|
|
// ensure we have cut a block and head block is empty
|
|
require.Equal(t, 1, len(c.blocks))
|
|
require.True(t, c.head.IsEmpty())
|
|
|
|
// check entries with empty head
|
|
var chk, head bytes.Buffer
|
|
var err error
|
|
var cpy *MemChunk
|
|
err = c.SerializeForCheckpointTo(&chk, &head)
|
|
require.Nil(t, err)
|
|
|
|
cpy, err = MemchunkFromCheckpoint(chk.Bytes(), head.Bytes(), f.headBlockFmt, blockSize, targetSize)
|
|
require.Nil(t, err)
|
|
|
|
if f.chunkFormat <= ChunkFormatV2 {
|
|
for i := range c.blocks {
|
|
c.blocks[i].uncompressedSize = 0
|
|
}
|
|
}
|
|
|
|
require.Equal(t, c, cpy)
|
|
|
|
// add a few more to head
|
|
for i := 5; i < 10; i++ {
|
|
entry := &logproto.Entry{
|
|
Timestamp: time.Unix(int64(i), 0),
|
|
Line: fmt.Sprintf("hi there - %d", i),
|
|
}
|
|
require.Equal(t, true, c.SpaceFor(entry))
|
|
dup, err := c.Append(entry)
|
|
require.False(t, dup)
|
|
require.Nil(t, err)
|
|
}
|
|
|
|
// ensure new blocks are not cut
|
|
require.Equal(t, 1, len(c.blocks))
|
|
|
|
chk.Reset()
|
|
head.Reset()
|
|
err = c.SerializeForCheckpointTo(&chk, &head)
|
|
require.Nil(t, err)
|
|
|
|
cpy, err = MemchunkFromCheckpoint(chk.Bytes(), head.Bytes(), f.headBlockFmt, blockSize, targetSize)
|
|
require.Nil(t, err)
|
|
|
|
if f.chunkFormat <= ChunkFormatV2 {
|
|
for i := range c.blocks {
|
|
c.blocks[i].uncompressedSize = 0
|
|
}
|
|
}
|
|
|
|
require.Equal(t, c, cpy)
|
|
})
|
|
}
|
|
}
|
|
|
|
var (
|
|
streams = []logproto.Stream{}
|
|
series = []logproto.Series{}
|
|
)
|
|
|
|
func BenchmarkBufferedIteratorLabels(b *testing.B) {
|
|
for _, f := range HeadBlockFmts {
|
|
b.Run(f.String(), func(b *testing.B) {
|
|
c := NewMemChunk(ChunkFormatV3, compression.Snappy, f, testBlockSize, testTargetSize)
|
|
_ = fillChunk(c)
|
|
|
|
labelsSet := []labels.Labels{
|
|
labels.FromStrings(
|
|
"cluster", "us-central1",
|
|
"stream", "stdout",
|
|
"filename", "/var/log/pods/loki-prod_query-frontend-6894f97b98-89q2n_eac98024-f60f-44af-a46f-d099bc99d1e7/query-frontend/0.log",
|
|
"namespace", "loki-dev",
|
|
"job", "loki-prod/query-frontend",
|
|
"container", "query-frontend",
|
|
"pod", "query-frontend-6894f97b98-89q2n",
|
|
),
|
|
labels.FromStrings(
|
|
"cluster", "us-central2",
|
|
"stream", "stderr",
|
|
"filename", "/var/log/pods/loki-prod_querier-6894f97b98-89q2n_eac98024-f60f-44af-a46f-d099bc99d1e7/query-frontend/0.log",
|
|
"namespace", "loki-dev",
|
|
"job", "loki-prod/querier",
|
|
"container", "querier",
|
|
"pod", "querier-6894f97b98-89q2n",
|
|
),
|
|
}
|
|
for _, test := range []string{
|
|
`{app="foo"}`,
|
|
`{app="foo"} != "foo"`,
|
|
`{app="foo"} != "foo" | logfmt `,
|
|
`{app="foo"} != "foo" | logfmt | duration > 10ms`,
|
|
`{app="foo"} != "foo" | logfmt | duration > 10ms and component="tsdb"`,
|
|
} {
|
|
b.Run(test, func(b *testing.B) {
|
|
b.ReportAllocs()
|
|
expr, err := syntax.ParseLogSelector(test, true)
|
|
if err != nil {
|
|
b.Fatal(err)
|
|
}
|
|
p, err := expr.Pipeline()
|
|
if err != nil {
|
|
b.Fatal(err)
|
|
}
|
|
var iters []iter.EntryIterator
|
|
for _, lbs := range labelsSet {
|
|
it, err := c.Iterator(context.Background(), time.Unix(0, 0), time.Now(), logproto.FORWARD, p.ForStream(lbs))
|
|
if err != nil {
|
|
b.Fatal(err)
|
|
}
|
|
iters = append(iters, it)
|
|
}
|
|
b.ResetTimer()
|
|
for n := 0; n < b.N; n++ {
|
|
for _, it := range iters {
|
|
for it.Next() {
|
|
streams = append(streams, logproto.Stream{Labels: it.Labels(), Entries: []logproto.Entry{it.At()}})
|
|
}
|
|
}
|
|
}
|
|
streams = streams[:0]
|
|
})
|
|
}
|
|
|
|
for _, test := range []string{
|
|
`rate({app="foo"}[1m])`,
|
|
`sum by (cluster) (rate({app="foo"}[10s]))`,
|
|
`sum by (cluster) (rate({app="foo"} != "foo" [10s]))`,
|
|
`sum by (cluster) (rate({app="foo"} != "foo" | logfmt[10s]))`,
|
|
`sum by (caller) (rate({app="foo"} != "foo" | logfmt[10s]))`,
|
|
`sum by (cluster) (rate({app="foo"} != "foo" | logfmt | duration > 10ms[10s]))`,
|
|
`sum by (cluster) (rate({app="foo"} != "foo" | logfmt | duration > 10ms and component="tsdb"[1m]))`,
|
|
} {
|
|
b.Run(test, func(b *testing.B) {
|
|
b.ReportAllocs()
|
|
expr, err := syntax.ParseSampleExpr(test)
|
|
if err != nil {
|
|
b.Fatal(err)
|
|
}
|
|
ex, err := expr.Extractors()
|
|
if err != nil {
|
|
b.Fatal(err)
|
|
}
|
|
var iters []iter.SampleIterator
|
|
for _, lbs := range labelsSet {
|
|
streamExtractors := make([]log.StreamSampleExtractor, 0, len(ex))
|
|
for _, extractor := range ex {
|
|
streamExtractors = append(streamExtractors, extractor.ForStream(lbs))
|
|
}
|
|
iters = append(
|
|
iters,
|
|
c.SampleIterator(
|
|
context.Background(),
|
|
time.Unix(0, 0),
|
|
time.Now(),
|
|
streamExtractors...),
|
|
)
|
|
}
|
|
b.ResetTimer()
|
|
for n := 0; n < b.N; n++ {
|
|
for _, it := range iters {
|
|
for it.Next() {
|
|
series = append(series, logproto.Series{Labels: it.Labels(), Samples: []logproto.Sample{it.At()}})
|
|
}
|
|
}
|
|
}
|
|
series = series[:0]
|
|
})
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
func Test_HeadIteratorReverse(t *testing.T) {
|
|
for _, testData := range allPossibleFormats {
|
|
t.Run(testNameWithFormats(compression.Snappy, testData.chunkFormat, testData.headBlockFmt), func(t *testing.T) {
|
|
c := newMemChunkWithFormat(testData.chunkFormat, compression.Snappy, testData.headBlockFmt, testBlockSize, testTargetSize)
|
|
genEntry := func(i int64) *logproto.Entry {
|
|
return &logproto.Entry{
|
|
Timestamp: time.Unix(0, i),
|
|
Line: fmt.Sprintf(`msg="%d"`, i),
|
|
}
|
|
}
|
|
var i int64
|
|
for e := genEntry(i); c.SpaceFor(e); e, i = genEntry(i+1), i+1 {
|
|
dup, err := c.Append(e)
|
|
require.False(t, dup)
|
|
require.NoError(t, err)
|
|
}
|
|
|
|
assertOrder := func(t *testing.T, total int64) {
|
|
expr, err := syntax.ParseLogSelector(`{app="foo"} | logfmt`, true)
|
|
require.NoError(t, err)
|
|
p, err := expr.Pipeline()
|
|
require.NoError(t, err)
|
|
it, err := c.Iterator(context.TODO(), time.Unix(0, 0), time.Unix(0, i), logproto.BACKWARD, p.ForStream(labels.FromStrings("app", "foo")))
|
|
require.NoError(t, err)
|
|
for it.Next() {
|
|
total--
|
|
require.Equal(t, total, it.At().Timestamp.UnixNano())
|
|
}
|
|
}
|
|
|
|
assertOrder(t, i)
|
|
// let's try again without the headblock.
|
|
require.NoError(t, c.cut())
|
|
assertOrder(t, i)
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestMemChunk_Rebound(t *testing.T) {
|
|
chkFrom := time.Unix(0, 0)
|
|
chkThrough := chkFrom.Add(time.Hour)
|
|
originalChunk := buildTestMemChunk(t, chkFrom, chkThrough)
|
|
|
|
for _, tc := range []struct {
|
|
name string
|
|
sliceFrom, sliceTo time.Time
|
|
err error
|
|
}{
|
|
{
|
|
name: "slice whole chunk",
|
|
sliceFrom: chkFrom,
|
|
sliceTo: chkThrough,
|
|
},
|
|
{
|
|
name: "slice first half",
|
|
sliceFrom: chkFrom,
|
|
sliceTo: chkFrom.Add(30 * time.Minute),
|
|
},
|
|
{
|
|
name: "slice second half",
|
|
sliceFrom: chkFrom.Add(30 * time.Minute),
|
|
sliceTo: chkThrough,
|
|
},
|
|
{
|
|
name: "slice in the middle",
|
|
sliceFrom: chkFrom.Add(15 * time.Minute),
|
|
sliceTo: chkFrom.Add(45 * time.Minute),
|
|
},
|
|
{
|
|
name: "slice interval not aligned with sample intervals",
|
|
sliceFrom: chkFrom.Add(time.Second),
|
|
sliceTo: chkThrough.Add(-time.Second),
|
|
},
|
|
{
|
|
name: "slice out of bounds without overlap",
|
|
err: chunk.ErrSliceNoDataInRange,
|
|
sliceFrom: chkThrough.Add(time.Minute),
|
|
sliceTo: chkThrough.Add(time.Hour),
|
|
},
|
|
{
|
|
name: "slice out of bounds with overlap",
|
|
sliceFrom: chkFrom.Add(10 * time.Minute),
|
|
sliceTo: chkThrough.Add(10 * time.Minute),
|
|
},
|
|
} {
|
|
t.Run(tc.name, func(t *testing.T) {
|
|
newChunk, err := originalChunk.Rebound(tc.sliceFrom, tc.sliceTo, nil)
|
|
if tc.err != nil {
|
|
require.Equal(t, tc.err, err)
|
|
return
|
|
}
|
|
require.NoError(t, err)
|
|
|
|
// iterate originalChunk from slice start to slice end + nanosecond. Adding a nanosecond here to be inclusive of sample at end time.
|
|
originalChunkItr, err := originalChunk.Iterator(context.Background(), tc.sliceFrom, tc.sliceTo.Add(time.Nanosecond), logproto.FORWARD, log.NewNoopPipeline().ForStream(labels.Labels{}))
|
|
require.NoError(t, err)
|
|
|
|
// iterate newChunk for whole chunk interval which should include all the samples in the chunk and hence align it with expected values.
|
|
newChunkItr, err := newChunk.Iterator(context.Background(), chkFrom, chkThrough, logproto.FORWARD, log.NewNoopPipeline().ForStream(labels.Labels{}))
|
|
require.NoError(t, err)
|
|
|
|
for {
|
|
originalChunksHasMoreSamples := originalChunkItr.Next()
|
|
newChunkHasMoreSamples := newChunkItr.Next()
|
|
|
|
// either both should have samples or none of them
|
|
require.Equal(t, originalChunksHasMoreSamples, newChunkHasMoreSamples)
|
|
if !originalChunksHasMoreSamples {
|
|
break
|
|
}
|
|
|
|
require.Equal(t, originalChunkItr.At(), newChunkItr.At())
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
func buildTestMemChunk(t *testing.T, from, through time.Time) *MemChunk {
|
|
chk := NewMemChunk(ChunkFormatV3, compression.GZIP, DefaultTestHeadBlockFmt, defaultBlockSize, 0)
|
|
for ; from.Before(through); from = from.Add(time.Second) {
|
|
_, err := chk.Append(&logproto.Entry{
|
|
Line: from.String(),
|
|
Timestamp: from,
|
|
})
|
|
require.NoError(t, err)
|
|
}
|
|
|
|
return chk
|
|
}
|
|
|
|
func TestMemChunk_ReboundAndFilter_with_filter(t *testing.T) {
|
|
chkFrom := time.Unix(1, 0) // headBlock.Append treats Unix time 0 as not set so we have to use a later time
|
|
chkFromPlus5 := chkFrom.Add(5 * time.Second)
|
|
chkThrough := chkFrom.Add(10 * time.Second)
|
|
chkThroughPlus1 := chkThrough.Add(1 * time.Second)
|
|
|
|
for _, tc := range []struct {
|
|
name string
|
|
testMemChunk *MemChunk
|
|
filterFunc filter.Func
|
|
err error
|
|
nrMatching int
|
|
nrNotMatching int
|
|
}{
|
|
{
|
|
name: "no matches",
|
|
testMemChunk: buildFilterableTestMemChunk(t, chkFrom, chkThrough, nil, nil, false),
|
|
filterFunc: func(_ time.Time, in string, _ labels.Labels) bool {
|
|
return strings.HasPrefix(in, "matching")
|
|
},
|
|
nrMatching: 0,
|
|
nrNotMatching: 10,
|
|
},
|
|
{
|
|
name: "some lines removed",
|
|
testMemChunk: buildFilterableTestMemChunk(t, chkFrom, chkThrough, &chkFrom, &chkFromPlus5, false),
|
|
filterFunc: func(_ time.Time, in string, _ labels.Labels) bool {
|
|
return strings.HasPrefix(in, "matching")
|
|
},
|
|
nrMatching: 5,
|
|
nrNotMatching: 5,
|
|
},
|
|
{
|
|
name: "all lines match",
|
|
testMemChunk: buildFilterableTestMemChunk(t, chkFrom, chkThrough, &chkFrom, &chkThroughPlus1, false),
|
|
filterFunc: func(_ time.Time, in string, _ labels.Labels) bool {
|
|
return strings.HasPrefix(in, "matching")
|
|
},
|
|
err: chunk.ErrSliceNoDataInRange,
|
|
},
|
|
|
|
// Test cases with structured metadata
|
|
{
|
|
name: "no matches - chunk without structured metadata",
|
|
testMemChunk: buildFilterableTestMemChunk(t, chkFrom, chkThrough, &chkFrom, &chkThroughPlus1, false),
|
|
filterFunc: func(_ time.Time, _ string, structuredMetadata labels.Labels) bool {
|
|
return structuredMetadata.Get(lblPing) == lblPong
|
|
},
|
|
nrMatching: 0,
|
|
nrNotMatching: 10,
|
|
},
|
|
{
|
|
name: "structured metadata not matching",
|
|
testMemChunk: buildFilterableTestMemChunk(t, chkFrom, chkThrough, &chkFrom, &chkThroughPlus1, true),
|
|
filterFunc: func(_ time.Time, _ string, structuredMetadata labels.Labels) bool {
|
|
return structuredMetadata.Get("ding") == "dong"
|
|
},
|
|
nrMatching: 0,
|
|
nrNotMatching: 10,
|
|
},
|
|
{
|
|
name: "some lines removed - with structured metadata",
|
|
testMemChunk: buildFilterableTestMemChunk(t, chkFrom, chkThrough, &chkFrom, &chkFromPlus5, true),
|
|
filterFunc: func(_ time.Time, _ string, structuredMetadata labels.Labels) bool {
|
|
return structuredMetadata.Get(lblPing) == lblPong
|
|
},
|
|
nrMatching: 5,
|
|
nrNotMatching: 5,
|
|
},
|
|
{
|
|
name: "all lines match - with structured metadata",
|
|
testMemChunk: buildFilterableTestMemChunk(t, chkFrom, chkThrough, &chkFrom, &chkThroughPlus1, true),
|
|
filterFunc: func(_ time.Time, in string, structuredMetadata labels.Labels) bool {
|
|
return structuredMetadata.Get(lblPing) == lblPong && strings.HasPrefix(in, "matching")
|
|
},
|
|
err: chunk.ErrSliceNoDataInRange,
|
|
},
|
|
} {
|
|
t.Run(tc.name, func(t *testing.T) {
|
|
originalChunk := tc.testMemChunk
|
|
newChunk, err := originalChunk.Rebound(chkFrom, chkThrough, tc.filterFunc)
|
|
if tc.err != nil {
|
|
require.Equal(t, tc.err, err)
|
|
return
|
|
}
|
|
require.NoError(t, err)
|
|
|
|
// iterate originalChunk from slice start to slice end + nanosecond. Adding a nanosecond here to be inclusive of sample at end time.
|
|
originalChunkItr, err := originalChunk.Iterator(context.Background(), chkFrom, chkThrough.Add(time.Nanosecond), logproto.FORWARD, log.NewNoopPipeline().ForStream(labels.Labels{}))
|
|
require.NoError(t, err)
|
|
originalChunkSamples := 0
|
|
for originalChunkItr.Next() {
|
|
originalChunkSamples++
|
|
}
|
|
require.Equal(t, tc.nrMatching+tc.nrNotMatching, originalChunkSamples)
|
|
|
|
// iterate newChunk for whole chunk interval which should include all the samples in the chunk and hence align it with expected values.
|
|
newChunkItr, err := newChunk.Iterator(context.Background(), chkFrom, chkThrough.Add(time.Nanosecond), logproto.FORWARD, log.NewNoopPipeline().ForStream(labels.Labels{}))
|
|
require.NoError(t, err)
|
|
newChunkSamples := 0
|
|
for newChunkItr.Next() {
|
|
newChunkSamples++
|
|
}
|
|
require.Equal(t, tc.nrNotMatching, newChunkSamples)
|
|
})
|
|
}
|
|
}
|
|
|
|
func buildFilterableTestMemChunk(t *testing.T, from, through time.Time, matchingFrom, matchingTo *time.Time, withStructuredMetadata bool) *MemChunk {
|
|
chk := NewMemChunk(ChunkFormatV4, compression.GZIP, DefaultTestHeadBlockFmt, defaultBlockSize, 0)
|
|
t.Logf("from : %v", from.String())
|
|
t.Logf("through: %v", through.String())
|
|
var structuredMetadata push.LabelsAdapter
|
|
if withStructuredMetadata {
|
|
structuredMetadata = push.LabelsAdapter{{Name: lblPing, Value: lblPong}}
|
|
}
|
|
for from.Before(through) {
|
|
// If a line is between matchingFrom and matchingTo add the prefix "matching"
|
|
if matchingFrom != nil && matchingTo != nil &&
|
|
(from.Equal(*matchingFrom) || (from.After(*matchingFrom) && (from.Before(*matchingTo)))) {
|
|
t.Logf("%v matching line", from.String())
|
|
_, err := chk.Append(&logproto.Entry{
|
|
Line: fmt.Sprintf("matching %v", from.String()),
|
|
Timestamp: from,
|
|
StructuredMetadata: structuredMetadata,
|
|
})
|
|
require.NoError(t, err)
|
|
} else {
|
|
t.Logf("%v non-match line", from.String())
|
|
var structuredMetadata push.LabelsAdapter
|
|
if withStructuredMetadata {
|
|
structuredMetadata = push.LabelsAdapter{{Name: "ding", Value: "dong"}}
|
|
}
|
|
_, err := chk.Append(&logproto.Entry{
|
|
Line: from.String(),
|
|
Timestamp: from,
|
|
StructuredMetadata: structuredMetadata,
|
|
})
|
|
require.NoError(t, err)
|
|
}
|
|
from = from.Add(time.Second)
|
|
}
|
|
|
|
return chk
|
|
}
|
|
|
|
func TestMemChunk_SpaceFor(t *testing.T) {
|
|
for _, tc := range []struct {
|
|
desc string
|
|
|
|
nBlocks int
|
|
targetSize int
|
|
headSize int
|
|
cutBlockSize int
|
|
entry logproto.Entry
|
|
|
|
expect bool
|
|
expectFunc func(chunkFormat byte, headFmt HeadBlockFmt) bool
|
|
}{
|
|
{
|
|
desc: "targetSize not defined",
|
|
nBlocks: blocksPerChunk - 1,
|
|
entry: logproto.Entry{
|
|
Timestamp: time.Unix(0, 0),
|
|
Line: "a",
|
|
},
|
|
expect: true,
|
|
},
|
|
{
|
|
desc: "targetSize not defined and too many blocks",
|
|
nBlocks: blocksPerChunk + 1,
|
|
entry: logproto.Entry{
|
|
Timestamp: time.Unix(0, 0),
|
|
Line: "a",
|
|
},
|
|
expect: false,
|
|
},
|
|
{
|
|
desc: "head too big",
|
|
targetSize: 10,
|
|
headSize: 100,
|
|
cutBlockSize: 0,
|
|
entry: logproto.Entry{
|
|
Timestamp: time.Unix(0, 0),
|
|
Line: "a",
|
|
},
|
|
expect: false,
|
|
},
|
|
{
|
|
desc: "cut blocks too big",
|
|
targetSize: 10,
|
|
headSize: 0,
|
|
cutBlockSize: 100,
|
|
entry: logproto.Entry{
|
|
Timestamp: time.Unix(0, 0),
|
|
Line: "a",
|
|
},
|
|
expect: false,
|
|
},
|
|
{
|
|
desc: "entry fits",
|
|
targetSize: 10,
|
|
headSize: 0,
|
|
cutBlockSize: 0,
|
|
entry: logproto.Entry{
|
|
Timestamp: time.Unix(0, 0),
|
|
Line: strings.Repeat("a", 9),
|
|
},
|
|
expect: true,
|
|
},
|
|
{
|
|
desc: "entry fits with structured metadata",
|
|
targetSize: 10,
|
|
headSize: 0,
|
|
cutBlockSize: 0,
|
|
entry: logproto.Entry{
|
|
Timestamp: time.Unix(0, 0),
|
|
Line: strings.Repeat("a", 2),
|
|
StructuredMetadata: []logproto.LabelAdapter{
|
|
{Name: "foo", Value: strings.Repeat("a", 2)},
|
|
},
|
|
},
|
|
expect: true,
|
|
},
|
|
{
|
|
desc: "entry too big",
|
|
targetSize: 10,
|
|
headSize: 0,
|
|
cutBlockSize: 0,
|
|
entry: logproto.Entry{
|
|
Timestamp: time.Unix(0, 0),
|
|
Line: strings.Repeat("a", 100),
|
|
},
|
|
expect: false,
|
|
},
|
|
{
|
|
desc: "entry too big because structured metadata",
|
|
targetSize: 10,
|
|
headSize: 0,
|
|
cutBlockSize: 0,
|
|
entry: logproto.Entry{
|
|
Timestamp: time.Unix(0, 0),
|
|
Line: strings.Repeat("a", 5),
|
|
StructuredMetadata: []logproto.LabelAdapter{
|
|
{Name: "foo", Value: strings.Repeat("a", 5)},
|
|
},
|
|
},
|
|
|
|
expectFunc: func(chunkFormat byte, _ HeadBlockFmt) bool {
|
|
// Succeed unless we're using chunk format v4, which should
|
|
// take the structured metadata into account.
|
|
return chunkFormat < ChunkFormatV4
|
|
},
|
|
},
|
|
} {
|
|
t.Run(tc.desc, func(t *testing.T) {
|
|
for _, format := range allPossibleFormats {
|
|
t.Run(fmt.Sprintf("chunk_v%d_head_%s", format.chunkFormat, format.headBlockFmt), func(t *testing.T) {
|
|
chk := newMemChunkWithFormat(format.chunkFormat, compression.None, format.headBlockFmt, 1024, tc.targetSize)
|
|
|
|
chk.blocks = make([]block, tc.nBlocks)
|
|
chk.cutBlockSize = tc.cutBlockSize
|
|
for i := 0; i < tc.headSize; i++ {
|
|
dup, err := chk.head.Append(int64(i), "a", labels.EmptyLabels())
|
|
require.False(t, dup)
|
|
require.NoError(t, err)
|
|
}
|
|
|
|
expect := tc.expect
|
|
if tc.expectFunc != nil {
|
|
expect = tc.expectFunc(format.chunkFormat, format.headBlockFmt)
|
|
}
|
|
|
|
require.Equal(t, expect, chk.SpaceFor(&tc.entry))
|
|
})
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestMemChunk_IteratorWithStructuredMetadata(t *testing.T) {
|
|
for _, enc := range testEncodings {
|
|
t.Run(enc.String(), func(t *testing.T) {
|
|
streamLabels := labels.FromStrings("job", "fake")
|
|
chk := newMemChunkWithFormat(ChunkFormatV4, enc, UnorderedWithStructuredMetadataHeadBlockFmt, testBlockSize, testTargetSize)
|
|
dup, err := chk.Append(logprotoEntryWithStructuredMetadata(1, "lineA", []logproto.LabelAdapter{
|
|
{Name: "traceID", Value: "123"},
|
|
{Name: "user", Value: "a"},
|
|
}))
|
|
require.False(t, dup)
|
|
require.NoError(t, err)
|
|
dup, err = chk.Append(logprotoEntryWithStructuredMetadata(2, "lineB", []logproto.LabelAdapter{
|
|
{Name: "traceID", Value: "456"},
|
|
{Name: "user", Value: "b"},
|
|
}))
|
|
require.False(t, dup)
|
|
require.NoError(t, err)
|
|
require.NoError(t, chk.cut())
|
|
dup, err = chk.Append(logprotoEntryWithStructuredMetadata(3, "lineC", []logproto.LabelAdapter{
|
|
{Name: "traceID", Value: "789"},
|
|
{Name: "user", Value: "c"},
|
|
}))
|
|
require.False(t, dup)
|
|
require.NoError(t, err)
|
|
dup, err = chk.Append(logprotoEntryWithStructuredMetadata(4, "lineD", []logproto.LabelAdapter{
|
|
{Name: "traceID", Value: "123"},
|
|
{Name: "user", Value: "d"},
|
|
}))
|
|
require.False(t, dup)
|
|
require.NoError(t, err)
|
|
|
|
// The expected bytes is the sum of bytes decompressed and bytes read from the head chunk.
|
|
// First we add the bytes read from the store (aka decompressed). That's
|
|
// structuredMetadataBytes = n. lines * (n. labels <int> + (2 * n. structuredMetadataSymbols * symbol <int>))
|
|
// lineBytes = n. lines * (ts <int> + line length <int> + line)
|
|
expectedStructuredMetadataBytes := 2 * (binary.MaxVarintLen64 + (2 * 2 * binary.MaxVarintLen64))
|
|
lineBytes := 2 * (2*binary.MaxVarintLen64 + len("lineA"))
|
|
// Now we add the bytes read from the head chunk. That's
|
|
// structuredMetadataBytes = n. lines * (2 * n. structuredMetadataSymbols * symbol <uint32>)
|
|
// lineBytes = n. lines * (line)
|
|
expectedStructuredMetadataBytes += 2 * (2 * 2 * 4)
|
|
lineBytes += 2 * (len("lineC"))
|
|
// Finally, the expected total bytes is the line bytes + structured metadata bytes
|
|
expectedBytes := lineBytes + expectedStructuredMetadataBytes
|
|
|
|
for _, tc := range []struct {
|
|
name string
|
|
query string
|
|
expectedLines []string
|
|
expectedStreams []string
|
|
expectedStructuredMetadata [][]logproto.LabelAdapter
|
|
}{
|
|
{
|
|
name: "no-filter",
|
|
query: `{job="fake"}`,
|
|
expectedLines: []string{"lineA", "lineB", "lineC", "lineD"},
|
|
expectedStreams: []string{
|
|
labels.FromStrings("job", "fake", "traceID", "123", "user", "a").String(),
|
|
labels.FromStrings("job", "fake", "traceID", "456", "user", "b").String(),
|
|
labels.FromStrings("job", "fake", "traceID", "789", "user", "c").String(),
|
|
labels.FromStrings("job", "fake", "traceID", "123", "user", "d").String(),
|
|
},
|
|
expectedStructuredMetadata: [][]logproto.LabelAdapter{
|
|
logproto.FromLabelsToLabelAdapters(labels.FromStrings("traceID", "123", "user", "a")),
|
|
logproto.FromLabelsToLabelAdapters(labels.FromStrings("traceID", "456", "user", "b")),
|
|
logproto.FromLabelsToLabelAdapters(labels.FromStrings("traceID", "789", "user", "c")),
|
|
logproto.FromLabelsToLabelAdapters(labels.FromStrings("traceID", "123", "user", "d")),
|
|
},
|
|
},
|
|
{
|
|
name: "filter",
|
|
query: `{job="fake"} | traceID="789"`,
|
|
expectedLines: []string{"lineC"},
|
|
expectedStreams: []string{
|
|
labels.FromStrings("job", "fake", "traceID", "789", "user", "c").String(),
|
|
},
|
|
expectedStructuredMetadata: [][]logproto.LabelAdapter{
|
|
logproto.FromLabelsToLabelAdapters(labels.FromStrings("traceID", "789", "user", "c")),
|
|
},
|
|
},
|
|
{
|
|
name: "filter-regex-or",
|
|
query: `{job="fake"} | traceID=~"456|789"`,
|
|
expectedLines: []string{"lineB", "lineC"},
|
|
expectedStreams: []string{
|
|
labels.FromStrings("job", "fake", "traceID", "456", "user", "b").String(),
|
|
labels.FromStrings("job", "fake", "traceID", "789", "user", "c").String(),
|
|
},
|
|
expectedStructuredMetadata: [][]logproto.LabelAdapter{
|
|
logproto.FromLabelsToLabelAdapters(labels.FromStrings("traceID", "456", "user", "b")),
|
|
logproto.FromLabelsToLabelAdapters(labels.FromStrings("traceID", "789", "user", "c")),
|
|
},
|
|
},
|
|
{
|
|
name: "filter-regex-contains",
|
|
query: `{job="fake"} | traceID=~".*5.*"`,
|
|
expectedLines: []string{"lineB"},
|
|
expectedStreams: []string{
|
|
labels.FromStrings("job", "fake", "traceID", "456", "user", "b").String(),
|
|
},
|
|
expectedStructuredMetadata: [][]logproto.LabelAdapter{
|
|
logproto.FromLabelsToLabelAdapters(labels.FromStrings("traceID", "456", "user", "b")),
|
|
},
|
|
},
|
|
{
|
|
name: "filter-regex-complex",
|
|
query: `{job="fake"} | traceID=~"^[0-9]2.*"`,
|
|
expectedLines: []string{"lineA", "lineD"},
|
|
expectedStreams: []string{
|
|
labels.FromStrings("job", "fake", "traceID", "123", "user", "a").String(),
|
|
labels.FromStrings("job", "fake", "traceID", "123", "user", "d").String(),
|
|
},
|
|
expectedStructuredMetadata: [][]logproto.LabelAdapter{
|
|
logproto.FromLabelsToLabelAdapters(labels.FromStrings("traceID", "123", "user", "a")),
|
|
logproto.FromLabelsToLabelAdapters(labels.FromStrings("traceID", "123", "user", "d")),
|
|
},
|
|
},
|
|
{
|
|
name: "multiple-filters",
|
|
query: `{job="fake"} | traceID="123" | user="d"`,
|
|
expectedLines: []string{"lineD"},
|
|
expectedStreams: []string{
|
|
labels.FromStrings("job", "fake", "traceID", "123", "user", "d").String(),
|
|
},
|
|
expectedStructuredMetadata: [][]logproto.LabelAdapter{
|
|
logproto.FromLabelsToLabelAdapters(labels.FromStrings("traceID", "123", "user", "d")),
|
|
},
|
|
},
|
|
{
|
|
name: "keep",
|
|
query: `{job="fake"} | keep job, user`,
|
|
expectedLines: []string{"lineA", "lineB", "lineC", "lineD"},
|
|
expectedStreams: []string{
|
|
labels.FromStrings("job", "fake", "user", "a").String(),
|
|
labels.FromStrings("job", "fake", "user", "b").String(),
|
|
labels.FromStrings("job", "fake", "user", "c").String(),
|
|
labels.FromStrings("job", "fake", "user", "d").String(),
|
|
},
|
|
expectedStructuredMetadata: [][]logproto.LabelAdapter{
|
|
logproto.FromLabelsToLabelAdapters(labels.FromStrings("user", "a")),
|
|
logproto.FromLabelsToLabelAdapters(labels.FromStrings("user", "b")),
|
|
logproto.FromLabelsToLabelAdapters(labels.FromStrings("user", "c")),
|
|
logproto.FromLabelsToLabelAdapters(labels.FromStrings("user", "d")),
|
|
},
|
|
},
|
|
{
|
|
name: "keep-filter",
|
|
query: `{job="fake"} | keep job, user="b"`,
|
|
expectedLines: []string{"lineA", "lineB", "lineC", "lineD"},
|
|
expectedStreams: []string{
|
|
labels.FromStrings("job", "fake").String(),
|
|
labels.FromStrings("job", "fake", "user", "b").String(),
|
|
labels.FromStrings("job", "fake").String(),
|
|
labels.FromStrings("job", "fake").String(),
|
|
},
|
|
expectedStructuredMetadata: [][]logproto.LabelAdapter{
|
|
logproto.FromLabelsToLabelAdapters(labels.FromStrings("user", "b")),
|
|
},
|
|
},
|
|
{
|
|
name: "drop",
|
|
query: `{job="fake"} | drop traceID`,
|
|
expectedLines: []string{"lineA", "lineB", "lineC", "lineD"},
|
|
expectedStreams: []string{
|
|
labels.FromStrings("job", "fake", "user", "a").String(),
|
|
labels.FromStrings("job", "fake", "user", "b").String(),
|
|
labels.FromStrings("job", "fake", "user", "c").String(),
|
|
labels.FromStrings("job", "fake", "user", "d").String(),
|
|
},
|
|
expectedStructuredMetadata: [][]logproto.LabelAdapter{
|
|
logproto.FromLabelsToLabelAdapters(labels.FromStrings("user", "a")),
|
|
logproto.FromLabelsToLabelAdapters(labels.FromStrings("user", "b")),
|
|
logproto.FromLabelsToLabelAdapters(labels.FromStrings("user", "c")),
|
|
logproto.FromLabelsToLabelAdapters(labels.FromStrings("user", "d")),
|
|
},
|
|
},
|
|
{
|
|
name: "drop-filter",
|
|
query: `{job="fake"} | drop traceID="123"`,
|
|
expectedLines: []string{"lineA", "lineB", "lineC", "lineD"},
|
|
expectedStreams: []string{
|
|
labels.FromStrings("job", "fake", "user", "a").String(),
|
|
labels.FromStrings("job", "fake", "traceID", "456", "user", "b").String(),
|
|
labels.FromStrings("job", "fake", "traceID", "789", "user", "c").String(),
|
|
labels.FromStrings("job", "fake", "user", "d").String(),
|
|
},
|
|
expectedStructuredMetadata: [][]logproto.LabelAdapter{
|
|
logproto.FromLabelsToLabelAdapters(labels.FromStrings("user", "a")),
|
|
logproto.FromLabelsToLabelAdapters(labels.FromStrings("traceID", "456", "user", "b")),
|
|
logproto.FromLabelsToLabelAdapters(labels.FromStrings("traceID", "789", "user", "c")),
|
|
logproto.FromLabelsToLabelAdapters(labels.FromStrings("user", "d")),
|
|
},
|
|
},
|
|
} {
|
|
t.Run(tc.name, func(t *testing.T) {
|
|
t.Run("log", func(t *testing.T) {
|
|
expr, err := syntax.ParseLogSelector(tc.query, true)
|
|
require.NoError(t, err)
|
|
|
|
pipeline, err := expr.Pipeline()
|
|
require.NoError(t, err)
|
|
|
|
// We will run the test twice so the iterator will be created twice.
|
|
// This is to ensure that the iterator is correctly closed.
|
|
for i := 0; i < 2; i++ {
|
|
sts, ctx := stats.NewContext(context.Background())
|
|
it, err := chk.Iterator(ctx, time.Unix(0, 0), time.Unix(0, math.MaxInt64), logproto.FORWARD, pipeline.ForStream(streamLabels))
|
|
require.NoError(t, err)
|
|
|
|
var lines []string
|
|
var streams []string
|
|
var structuredMetadata [][]logproto.LabelAdapter
|
|
for it.Next() {
|
|
require.NoError(t, it.Err())
|
|
e := it.At()
|
|
lines = append(lines, e.Line)
|
|
streams = append(streams, it.Labels())
|
|
|
|
if len(e.StructuredMetadata) > 0 {
|
|
structuredMetadata = append(structuredMetadata, e.StructuredMetadata)
|
|
}
|
|
require.Empty(t, e.Parsed)
|
|
}
|
|
assert.ElementsMatch(t, tc.expectedLines, lines)
|
|
assert.ElementsMatch(t, tc.expectedStreams, streams)
|
|
assert.ElementsMatch(t, tc.expectedStructuredMetadata, structuredMetadata)
|
|
|
|
resultStats := sts.Result(0, 0, len(lines))
|
|
require.Equal(t, int64(expectedBytes), resultStats.Summary.TotalBytesProcessed)
|
|
require.Equal(t, int64(expectedStructuredMetadataBytes), resultStats.Summary.TotalStructuredMetadataBytesProcessed)
|
|
}
|
|
})
|
|
|
|
t.Run("metric", func(t *testing.T) {
|
|
query := fmt.Sprintf(`count_over_time(%s [1d])`, tc.query)
|
|
expr, err := syntax.ParseSampleExpr(query)
|
|
require.NoError(t, err)
|
|
|
|
extractors, err := expr.Extractors()
|
|
require.NoError(t, err)
|
|
|
|
// We will run the test twice so the iterator will be created twice.
|
|
// This is to ensure that the iterator is correctly closed.
|
|
for i := 0; i < 2; i++ {
|
|
sts, ctx := stats.NewContext(context.Background())
|
|
|
|
streamExtractors := make(
|
|
[]log.StreamSampleExtractor,
|
|
0,
|
|
len(extractors),
|
|
)
|
|
for _, extractor := range extractors {
|
|
streamExtractors = append(
|
|
streamExtractors,
|
|
extractor.ForStream(streamLabels),
|
|
)
|
|
}
|
|
it := chk.SampleIterator(
|
|
ctx,
|
|
time.Unix(0, 0),
|
|
time.Unix(0, math.MaxInt64),
|
|
streamExtractors...)
|
|
|
|
var sumValues int
|
|
var streams []string
|
|
for it.Next() {
|
|
require.NoError(t, it.Err())
|
|
e := it.At()
|
|
sumValues += int(e.Value)
|
|
streams = append(streams, it.Labels())
|
|
}
|
|
require.Equal(t, len(tc.expectedLines), sumValues)
|
|
assert.ElementsMatch(t, tc.expectedStreams, streams)
|
|
|
|
resultStats := sts.Result(0, 0, 0)
|
|
require.Equal(t, int64(expectedBytes), resultStats.Summary.TotalBytesProcessed)
|
|
require.Equal(t, int64(expectedStructuredMetadataBytes), resultStats.Summary.TotalStructuredMetadataBytesProcessed)
|
|
}
|
|
})
|
|
})
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestDecodeChunkIncorrectBlockOffset(t *testing.T) {
|
|
// use small block size to build multiple blocks in the test chunk
|
|
blockSize := 10
|
|
|
|
for _, format := range allPossibleFormats {
|
|
t.Run(fmt.Sprintf("chunkFormat:%v headBlockFmt:%v", format.chunkFormat, format.headBlockFmt), func(t *testing.T) {
|
|
for incorrectOffsetBlockNum := 0; incorrectOffsetBlockNum < 3; incorrectOffsetBlockNum++ {
|
|
t.Run(fmt.Sprintf("inorrect offset block: %d", incorrectOffsetBlockNum), func(t *testing.T) {
|
|
chk := NewMemChunk(format.chunkFormat, compression.None, format.headBlockFmt, blockSize, testTargetSize)
|
|
ts := time.Now().Unix()
|
|
for i := 0; i < 3; i++ {
|
|
dup, err := chk.Append(&logproto.Entry{
|
|
Timestamp: time.Now(),
|
|
Line: fmt.Sprintf("%d-%d", ts, i),
|
|
StructuredMetadata: []logproto.LabelAdapter{
|
|
{Name: "foo", Value: fmt.Sprintf("%d-%d", ts, i)},
|
|
},
|
|
})
|
|
require.NoError(t, err)
|
|
require.False(t, dup)
|
|
}
|
|
|
|
require.Len(t, chk.blocks, 3)
|
|
|
|
b, err := chk.Bytes()
|
|
require.NoError(t, err)
|
|
|
|
metasOffset := binary.BigEndian.Uint64(b[len(b)-8:])
|
|
|
|
w := bytes.NewBuffer(nil)
|
|
eb := EncodeBufferPool.Get().(*encbuf)
|
|
defer EncodeBufferPool.Put(eb)
|
|
|
|
crc32Hash := crc32HashPool.Get().(hash.Hash32)
|
|
defer crc32HashPool.Put(crc32Hash)
|
|
|
|
crc32Hash.Reset()
|
|
eb.reset()
|
|
|
|
// BEGIN - code copied from writeTo func starting from encoding of block metas to change offset of a block
|
|
eb.putUvarint(len(chk.blocks))
|
|
|
|
for i, b := range chk.blocks {
|
|
eb.putUvarint(b.numEntries)
|
|
eb.putVarint64(b.mint)
|
|
eb.putVarint64(b.maxt)
|
|
// change offset of one block
|
|
blockOffset := b.offset
|
|
if i == incorrectOffsetBlockNum {
|
|
blockOffset += 5
|
|
}
|
|
eb.putUvarint(blockOffset)
|
|
if chk.format >= ChunkFormatV3 {
|
|
eb.putUvarint(b.uncompressedSize)
|
|
}
|
|
eb.putUvarint(len(b.b))
|
|
}
|
|
metasLen := len(eb.get())
|
|
eb.putHash(crc32Hash)
|
|
|
|
_, err = w.Write(eb.get())
|
|
require.NoError(t, err)
|
|
|
|
if chk.format >= ChunkFormatV4 {
|
|
// Write structured metadata offset and length
|
|
eb.reset()
|
|
|
|
eb.putBE64int(int(binary.BigEndian.Uint64(b[len(b)-32:])))
|
|
eb.putBE64int(int(binary.BigEndian.Uint64(b[len(b)-24:])))
|
|
_, err = w.Write(eb.get())
|
|
require.NoError(t, err)
|
|
}
|
|
|
|
// Write the metasOffset.
|
|
eb.reset()
|
|
if chk.format >= ChunkFormatV4 {
|
|
eb.putBE64int(metasLen)
|
|
}
|
|
eb.putBE64int(int(metasOffset))
|
|
_, err = w.Write(eb.get())
|
|
require.NoError(t, err)
|
|
// END - code copied from writeTo func
|
|
|
|
// build chunk using pre-block meta section + rewritten remainder of the chunk with incorrect offset for a block
|
|
chkWithIncorrectOffset := make([]byte, int(metasOffset)+w.Len())
|
|
copy(chkWithIncorrectOffset, b[:metasOffset])
|
|
copy(chkWithIncorrectOffset[metasOffset:], w.Bytes())
|
|
|
|
// decoding the problematic chunk should succeed
|
|
decodedChkWithIncorrectOffset, err := newByteChunk(chkWithIncorrectOffset, blockSize, testTargetSize, false)
|
|
require.NoError(t, err)
|
|
|
|
require.Len(t, decodedChkWithIncorrectOffset.blocks, len(chk.blocks))
|
|
|
|
// both chunks should have same log lines
|
|
origChunkItr, err := chk.Iterator(context.Background(), time.Unix(0, 0), time.Unix(0, math.MaxInt64), logproto.FORWARD, log.NewNoopPipeline().ForStream(labels.Labels{}))
|
|
require.NoError(t, err)
|
|
|
|
corruptChunkItr, err := decodedChkWithIncorrectOffset.Iterator(context.Background(), time.Unix(0, 0), time.Unix(0, math.MaxInt64), logproto.FORWARD, log.NewNoopPipeline().ForStream(labels.Labels{}))
|
|
require.NoError(t, err)
|
|
|
|
numEntriesFound := 0
|
|
for origChunkItr.Next() {
|
|
numEntriesFound++
|
|
require.True(t, corruptChunkItr.Next())
|
|
require.Equal(t, origChunkItr.At(), corruptChunkItr.At())
|
|
}
|
|
|
|
require.False(t, corruptChunkItr.Next())
|
|
require.Equal(t, 3, numEntriesFound)
|
|
})
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|