mirror of https://github.com/grafana/loki
[bloom-compactor] downloading chunks in batches (#11649)
**What this PR does / why we need it**: Added chunks batches iterator to download chunks in batches instead of downloading all of them at once. Otherwise, when the stream contains a lot of chunks, it can lead to OOM. **Special notes for your reviewer**: **Checklist** - [x] Reviewed the [`CONTRIBUTING.md`](https://github.com/grafana/loki/blob/main/CONTRIBUTING.md) guide (**required**) - [x] Documentation added - [x] Tests updated - [ ] `CHANGELOG.md` updated - [ ] If the change is worth mentioning in the release notes, add `add-to-release-notes` label - [ ] Changes that require user attention or interaction to upgrade are documented in `docs/sources/setup/upgrade/_index.md` - [ ] For Helm chart changes bump the Helm chart version in `production/helm/loki/Chart.yaml` and update `production/helm/loki/CHANGELOG.md` and `production/helm/loki/README.md`. [Example PR](pull/11670/head^2d10549e3ec
) - [ ] If the change is deprecating or removing a configuration option, update the `deprecated-config.yaml` and `deleted-config.yaml` files respectively in the `tools/deprecated-config-checker` directory. [Example PR](0d4416a4b0
) --------- Signed-off-by: Vladyslav Diachenko <vlad.diachenko@grafana.com>
parent
6ae46dc6ef
commit
a5aa8b315d
@ -0,0 +1,48 @@ |
||||
package bloomcompactor |
||||
|
||||
import ( |
||||
"context" |
||||
"errors" |
||||
|
||||
"github.com/grafana/loki/pkg/storage/chunk" |
||||
) |
||||
|
||||
type chunksBatchesIterator struct { |
||||
context context.Context |
||||
client chunkClient |
||||
chunksToDownload []chunk.Chunk |
||||
batchSize int |
||||
|
||||
currentBatch []chunk.Chunk |
||||
err error |
||||
} |
||||
|
||||
func newChunkBatchesIterator(context context.Context, client chunkClient, chunksToDownload []chunk.Chunk, batchSize int) (*chunksBatchesIterator, error) { |
||||
if batchSize <= 0 { |
||||
return nil, errors.New("batchSize must be greater than 0") |
||||
} |
||||
return &chunksBatchesIterator{context: context, client: client, chunksToDownload: chunksToDownload, batchSize: batchSize}, nil |
||||
} |
||||
|
||||
func (c *chunksBatchesIterator) Next() bool { |
||||
if len(c.chunksToDownload) == 0 { |
||||
return false |
||||
} |
||||
batchSize := c.batchSize |
||||
chunksToDownloadCount := len(c.chunksToDownload) |
||||
if chunksToDownloadCount < batchSize { |
||||
batchSize = chunksToDownloadCount |
||||
} |
||||
chunksToDownload := c.chunksToDownload[:batchSize] |
||||
c.chunksToDownload = c.chunksToDownload[batchSize:] |
||||
c.currentBatch, c.err = c.client.GetChunks(c.context, chunksToDownload) |
||||
return c.err == nil |
||||
} |
||||
|
||||
func (c *chunksBatchesIterator) Err() error { |
||||
return c.err |
||||
} |
||||
|
||||
func (c *chunksBatchesIterator) At() []chunk.Chunk { |
||||
return c.currentBatch |
||||
} |
@ -0,0 +1,96 @@ |
||||
package bloomcompactor |
||||
|
||||
import ( |
||||
"context" |
||||
"errors" |
||||
"testing" |
||||
|
||||
"github.com/stretchr/testify/require" |
||||
|
||||
"github.com/grafana/loki/pkg/storage/chunk" |
||||
tsdbindex "github.com/grafana/loki/pkg/storage/stores/shipper/indexshipper/tsdb/index" |
||||
) |
||||
|
||||
func Test_chunksBatchesIterator(t *testing.T) { |
||||
tests := map[string]struct { |
||||
batchSize int |
||||
chunksToDownload []chunk.Chunk |
||||
constructorError error |
||||
|
||||
hadNextCount int |
||||
}{ |
||||
"expected error if batch size is set to 0": { |
||||
batchSize: 0, |
||||
constructorError: errors.New("batchSize must be greater than 0"), |
||||
}, |
||||
"expected no error if there are no chunks": { |
||||
hadNextCount: 0, |
||||
batchSize: 10, |
||||
}, |
||||
"expected 1 call to the client": { |
||||
chunksToDownload: createFakeChunks(10), |
||||
hadNextCount: 1, |
||||
batchSize: 20, |
||||
}, |
||||
"expected 1 call to the client(2)": { |
||||
chunksToDownload: createFakeChunks(10), |
||||
hadNextCount: 1, |
||||
batchSize: 10, |
||||
}, |
||||
"expected 2 calls to the client": { |
||||
chunksToDownload: createFakeChunks(10), |
||||
hadNextCount: 2, |
||||
batchSize: 6, |
||||
}, |
||||
"expected 10 calls to the client": { |
||||
chunksToDownload: createFakeChunks(10), |
||||
hadNextCount: 10, |
||||
batchSize: 1, |
||||
}, |
||||
} |
||||
for name, data := range tests { |
||||
t.Run(name, func(t *testing.T) { |
||||
client := &fakeClient{} |
||||
iterator, err := newChunkBatchesIterator(context.Background(), client, data.chunksToDownload, data.batchSize) |
||||
if data.constructorError != nil { |
||||
require.Equal(t, err, data.constructorError) |
||||
return |
||||
} |
||||
hadNextCount := 0 |
||||
var downloadedChunks []chunk.Chunk |
||||
for iterator.Next() { |
||||
hadNextCount++ |
||||
downloaded := iterator.At() |
||||
downloadedChunks = append(downloadedChunks, downloaded...) |
||||
require.LessOrEqual(t, len(downloaded), data.batchSize) |
||||
} |
||||
require.NoError(t, iterator.Err()) |
||||
require.Equal(t, data.chunksToDownload, downloadedChunks) |
||||
require.Equal(t, data.hadNextCount, client.callsCount) |
||||
require.Equal(t, data.hadNextCount, hadNextCount) |
||||
}) |
||||
} |
||||
} |
||||
|
||||
func createFakeChunks(count int) []chunk.Chunk { |
||||
metas := make([]tsdbindex.ChunkMeta, 0, count) |
||||
for i := 0; i < count; i++ { |
||||
metas = append(metas, tsdbindex.ChunkMeta{ |
||||
Checksum: uint32(i), |
||||
MinTime: int64(i), |
||||
MaxTime: int64(i + 100), |
||||
KB: uint32(i * 100), |
||||
Entries: uint32(i * 10), |
||||
}) |
||||
} |
||||
return makeChunkRefs(metas, "fake", 0xFFFF) |
||||
} |
||||
|
||||
type fakeClient struct { |
||||
callsCount int |
||||
} |
||||
|
||||
func (f *fakeClient) GetChunks(_ context.Context, chunks []chunk.Chunk) ([]chunk.Chunk, error) { |
||||
f.callsCount++ |
||||
return chunks, nil |
||||
} |
Loading…
Reference in new issue