@ -4,17 +4,13 @@ import (
"context"
"fmt"
"io"
"math"
"time"
"github.com/go-kit/log"
"github.com/go-kit/log/level"
"github.com/pkg/errors"
"github.com/prometheus/common/model"
"github.com/grafana/loki/pkg/chunkenc"
"github.com/grafana/loki/pkg/logproto"
logql_log "github.com/grafana/loki/pkg/logql/log"
v1 "github.com/grafana/loki/pkg/storage/bloom/v1"
"github.com/grafana/loki/pkg/storage/chunk"
"github.com/grafana/loki/pkg/storage/chunk/fetcher"
@ -48,7 +44,8 @@ type SimpleBloomGenerator struct {
userID string
store v1 . Iterator [ * v1 . Series ]
chunkLoader ChunkLoader
blocksIter v1 . CloseableIterator [ * bloomshipper . CloseableBlockQuerier ]
blocksIter v1 . ResettableIterator [ * v1 . SeriesWithBloom ]
skipped [ ] v1 . BlockMetadata
// options to build blocks with
opts v1 . BlockOptions
@ -70,7 +67,7 @@ func NewSimpleBloomGenerator(
opts v1 . BlockOptions ,
store v1 . Iterator [ * v1 . Series ] ,
chunkLoader ChunkLoader ,
blocksIter v1 . CloseableIterator [ * bloomshipper . CloseableBlockQuerier ] ,
blocksIter v1 . ResettableIterator [ * v1 . SeriesWithBloom ] ,
readWriterFn func ( ) ( v1 . BlockWriter , v1 . BlockReader ) ,
metrics * Metrics ,
logger log . Logger ,
@ -107,44 +104,41 @@ func (s *SimpleBloomGenerator) populator(ctx context.Context) func(series *v1.Se
}
func ( s * SimpleBloomGenerator ) Generate ( ctx context . Context ) ( [ ] v1 . BlockMetadata , [ ] io . Closer , v1 . Iterator [ * v1 . Block ] , error ) {
skippedBlocks := make ( [ ] v1 . BlockMetadata , 0 )
toClose := make ( [ ] io . Closer , 0 )
blocksMatchingSchema := make ( [ ] * bloomshipper . CloseableBlockQuerier , 0 )
func ( s * SimpleBloomGenerator ) Generate ( ctx context . Context ) v1 . Iterator [ * v1 . Block ] {
level . Debug ( s . logger ) . Log ( "msg" , "generating bloom filters for blocks" , "schema" , fmt . Sprintf ( "%+v" , s . opts . Schema ) )
for s . blocksIter . Next ( ) && s . blocksIter . Err ( ) == nil {
block := s . blocksIter . At ( )
toClose = append ( toClose , block )
logger := log . With ( s . logger , "block" , block . BlockRef )
md , err := block . Metadata ( )
schema := md . Options . Schema
if err != nil {
level . Warn ( logger ) . Log ( "msg" , "failed to get schema for block" , "err" , err )
skippedBlocks = append ( skippedBlocks , md )
continue
}
if ! s . opts . Schema . Compatible ( schema ) {
level . Warn ( logger ) . Log ( "msg" , "block schema incompatible with options" , "generator_schema" , fmt . Sprintf ( "%+v" , s . opts . Schema ) , "block_schema" , fmt . Sprintf ( "%+v" , schema ) )
skippedBlocks = append ( skippedBlocks , md )
continue
}
level . Debug ( logger ) . Log ( "msg" , "adding compatible block to bloom generation inputs" )
blocksMatchingSchema = append ( blocksMatchingSchema , block )
}
series := v1 . NewPeekingIter ( s . store )
if s . blocksIter . Err ( ) != nil {
// should we ignore the error and continue with the blocks we got?
return skippedBlocks , toClose , v1 . NewSliceIter ( [ ] * v1 . Block { } ) , s . blocksIter . Err ( )
// TODO: Use interface
impl , ok := s . blocksIter . ( * blockLoadingIter )
if ok {
impl . Filter (
func ( bq * bloomshipper . CloseableBlockQuerier ) bool {
logger := log . With ( s . logger , "block" , bq . BlockRef )
md , err := bq . Metadata ( )
schema := md . Options . Schema
if err != nil {
level . Warn ( logger ) . Log ( "msg" , "failed to get schema for block" , "err" , err )
s . skipped = append ( s . skipped , md )
bq . Close ( ) // close unused querier
return false
}
if ! s . opts . Schema . Compatible ( schema ) {
level . Warn ( logger ) . Log ( "msg" , "block schema incompatible with options" , "generator_schema" , fmt . Sprintf ( "%+v" , s . opts . Schema ) , "block_schema" , fmt . Sprintf ( "%+v" , schema ) )
s . skipped = append ( s . skipped , md )
bq . Close ( ) // close unused querier
return false
}
level . Debug ( logger ) . Log ( "msg" , "adding compatible block to bloom generation inputs" )
return true
} ,
)
}
level . Debug ( s . logger ) . Log ( "msg" , "generating bloom filters for blocks" , "num_blocks" , len ( blocksMatchingSchema ) , "skipped_blocks" , len ( skippedBlocks ) , "schema" , fmt . Sprintf ( "%+v" , s . opts . Schema ) )
series := v1 . NewPeekingIter ( s . store )
blockIter := NewLazyBlockBuilderIterator ( ctx , s . opts , s . populator ( ctx ) , s . readWriterFn , series , blocksMatchingSchema )
return skippedBlocks , toClose , blockIter , nil
return NewLazyBlockBuilderIterator ( ctx , s . opts , s . populator ( ctx ) , s . readWriterFn , series , s . blocksIter )
}
// LazyBlockBuilderIterator is a lazy iterator over blocks that builds
@ -155,11 +149,10 @@ type LazyBlockBuilderIterator struct {
populate func ( * v1 . Series , * v1 . Bloom ) error
readWriterFn func ( ) ( v1 . BlockWriter , v1 . BlockReader )
series v1 . PeekingIterator [ * v1 . Series ]
blocks [ ] * bloomshipper . CloseableBlockQuerier
blocks v1 . ResettableIterator [ * v1 . SeriesWithBloom ]
blocksAsPeekingIter [ ] v1 . PeekingIterator [ * v1 . SeriesWithBloom ]
curr * v1 . Block
err error
curr * v1 . Block
err error
}
func NewLazyBlockBuilderIterator (
@ -168,20 +161,16 @@ func NewLazyBlockBuilderIterator(
populate func ( * v1 . Series , * v1 . Bloom ) error ,
readWriterFn func ( ) ( v1 . BlockWriter , v1 . BlockReader ) ,
series v1 . PeekingIterator [ * v1 . Series ] ,
blocks [ ] * bloomshipper . CloseableBlockQuerier ,
blocks v1 . ResettableIterator [ * v1 . SeriesWithBloom ] ,
) * LazyBlockBuilderIterator {
it := & LazyBlockBuilderIterator {
return & LazyBlockBuilderIterator {
ctx : ctx ,
opts : opts ,
populate : populate ,
readWriterFn : readWriterFn ,
series : series ,
blocks : blocks ,
blocksAsPeekingIter : make ( [ ] v1 . PeekingIterator [ * v1 . SeriesWithBloom ] , len ( blocks ) ) ,
}
return it
}
func ( b * LazyBlockBuilderIterator ) Next ( ) bool {
@ -190,21 +179,17 @@ func (b *LazyBlockBuilderIterator) Next() bool {
return false
}
// reset all the blocks to the start
for i , block := range b . blocks {
if err := block . Reset ( ) ; err != nil {
b . err = errors . Wrapf ( err , "failed to reset block iterator %d" , i )
return false
}
b . blocksAsPeekingIter [ i ] = v1 . NewPeekingIter [ * v1 . SeriesWithBloom ] ( block )
}
if err := b . ctx . Err ( ) ; err != nil {
b . err = errors . Wrap ( err , "context canceled" )
return false
}
mergeBuilder := v1 . NewMergeBuilder ( b . blocksAsPeekingIter , b . series , b . populate )
if err := b . blocks . Reset ( ) ; err != nil {
b . err = errors . Wrap ( err , "reset blocks iterator" )
return false
}
mergeBuilder := v1 . NewMergeBuilder ( b . blocks , b . series , b . populate )
writer , reader := b . readWriterFn ( )
blockBuilder , err := v1 . NewBlockBuilder ( b . opts , writer )
if err != nil {
@ -292,137 +277,3 @@ func (s *StoreChunkLoader) Load(ctx context.Context, userID string, series *v1.S
itr : newBatchedChunkLoader ( ctx , fetchers , inputs , s . metrics , batchedLoaderDefaultBatchSize ) ,
} , nil
}
type Fetcher [ A , B any ] interface {
Fetch ( ctx context . Context , inputs [ ] A ) ( [ ] B , error )
}
type FetchFunc [ A , B any ] func ( ctx context . Context , inputs [ ] A ) ( [ ] B , error )
func ( f FetchFunc [ A , B ] ) Fetch ( ctx context . Context , inputs [ ] A ) ( [ ] B , error ) {
return f ( ctx , inputs )
}
// batchedLoader implements `v1.Iterator[v1.ChunkRefWithIter]` in batches
// to ensure memory is bounded while loading chunks
// TODO(owen-d): testware
type batchedLoader [ A , B , C any ] struct {
metrics * Metrics
batchSize int
ctx context . Context
fetchers [ ] Fetcher [ A , B ]
work [ ] [ ] A
mapper func ( B ) ( C , error )
cur C
batch [ ] B
err error
}
const batchedLoaderDefaultBatchSize = 50
func newBatchedLoader [ A , B , C any ] (
ctx context . Context ,
fetchers [ ] Fetcher [ A , B ] ,
inputs [ ] [ ] A ,
mapper func ( B ) ( C , error ) ,
batchSize int ,
) * batchedLoader [ A , B , C ] {
return & batchedLoader [ A , B , C ] {
batchSize : max ( batchSize , 1 ) ,
ctx : ctx ,
fetchers : fetchers ,
work : inputs ,
mapper : mapper ,
}
}
func ( b * batchedLoader [ A , B , C ] ) Next ( ) bool {
// iterate work until we have non-zero length batch
for len ( b . batch ) == 0 {
// empty batch + no work remaining = we're done
if len ( b . work ) == 0 {
return false
}
// setup next batch
next := b . work [ 0 ]
batchSize := min ( b . batchSize , len ( next ) )
toFetch := next [ : batchSize ]
fetcher := b . fetchers [ 0 ]
// update work
b . work [ 0 ] = b . work [ 0 ] [ batchSize : ]
if len ( b . work [ 0 ] ) == 0 {
// if we've exhausted work from this set of inputs,
// set pointer to next set of inputs
// and their respective fetcher
b . work = b . work [ 1 : ]
b . fetchers = b . fetchers [ 1 : ]
}
// there was no work in this batch; continue (should not happen)
if len ( toFetch ) == 0 {
continue
}
b . batch , b . err = fetcher . Fetch ( b . ctx , toFetch )
// error fetching, short-circuit iteration
if b . err != nil {
return false
}
}
return b . prepNext ( )
}
func ( b * batchedLoader [ _ , B , C ] ) prepNext ( ) bool {
b . cur , b . err = b . mapper ( b . batch [ 0 ] )
b . batch = b . batch [ 1 : ]
return b . err == nil
}
func newBatchedChunkLoader (
ctx context . Context ,
fetchers [ ] Fetcher [ chunk . Chunk , chunk . Chunk ] ,
inputs [ ] [ ] chunk . Chunk ,
metrics * Metrics ,
batchSize int ,
) * batchedLoader [ chunk . Chunk , chunk . Chunk , v1 . ChunkRefWithIter ] {
mapper := func ( c chunk . Chunk ) ( v1 . ChunkRefWithIter , error ) {
chk := c . Data . ( * chunkenc . Facade ) . LokiChunk ( )
metrics . chunkSize . Observe ( float64 ( chk . UncompressedSize ( ) ) )
itr , err := chk . Iterator (
ctx ,
time . Unix ( 0 , 0 ) ,
time . Unix ( 0 , math . MaxInt64 ) ,
logproto . FORWARD ,
logql_log . NewNoopPipeline ( ) . ForStream ( c . Metric ) ,
)
if err != nil {
return v1 . ChunkRefWithIter { } , err
}
return v1 . ChunkRefWithIter {
Ref : v1 . ChunkRef {
Start : c . From ,
End : c . Through ,
Checksum : c . Checksum ,
} ,
Itr : itr ,
} , nil
}
return newBatchedLoader ( ctx , fetchers , inputs , mapper , batchSize )
}
func ( b * batchedLoader [ _ , _ , C ] ) At ( ) C {
return b . cur
}
func ( b * batchedLoader [ _ , _ , _ ] ) Err ( ) error {
return b . err
}