loki/pkg/bloomgateway/processor_test.go

package bloomgateway

import (
	"context"
	"math/rand"
	"sync"
	"testing"
	"time"

	"github.com/prometheus/common/model"
	"github.com/stretchr/testify/require"
	"go.uber.org/atomic"

	"github.com/grafana/loki/pkg/logql/syntax"
	v1 "github.com/grafana/loki/pkg/storage/bloom/v1"
	"github.com/grafana/loki/pkg/storage/stores/shipper/bloomshipper"
)

var _ store = &dummyStore{}

type dummyStore struct {
	metas     []bloomshipper.Meta
	blocks    []bloomshipper.BlockRef
	querieres []bloomshipper.BlockQuerierWithFingerprintRange
}

func (s *dummyStore) ResolveMetas(_ context.Context, _ bloomshipper.MetaSearchParams) ([][]bloomshipper.MetaRef, []*bloomshipper.Fetcher, error) {
	//TODO(chaudum) Filter metas based on search params
	refs := make([]bloomshipper.MetaRef, 0, len(s.metas))
	for _, meta := range s.metas {
		refs = append(refs, meta.MetaRef)
	}
	return [][]bloomshipper.MetaRef{refs}, []*bloomshipper.Fetcher{nil}, nil
}

func (s *dummyStore) FetchMetas(_ context.Context, _ bloomshipper.MetaSearchParams) ([]bloomshipper.Meta, error) {
	//TODO(chaudum) Filter metas based on search params
	return s.metas, nil
}

func (s *dummyStore) FetchBlocks(_ context.Context, _ []bloomshipper.BlockRef) ([]bloomshipper.BlockDirectory, error) {
	panic("don't call me")
}

func (s *dummyStore) Fetcher(_ model.Time) (*bloomshipper.Fetcher, error) {
	return nil, nil
}

func (s *dummyStore) Client(_ model.Time) (bloomshipper.Client, error) {
	return nil, nil
}

func (s *dummyStore) Stop() {
}

func (s *dummyStore) LoadBlocks(_ context.Context, refs []bloomshipper.BlockRef) (v1.Iterator[bloomshipper.BlockQuerierWithFingerprintRange], error) {
	result := make([]bloomshipper.BlockQuerierWithFingerprintRange, len(s.querieres))

	for _, ref := range refs {
		for _, bq := range s.querieres {
			if ref.Bounds.Equal(bq.FingerprintBounds) {
				result = append(result, bq)
			}
		}
	}

	rand.Shuffle(len(result), func(i, j int) {
		result[i], result[j] = result[j], result[i]
	})

	return v1.NewSliceIter(result), nil
}

func TestProcessor(t *testing.T) {
	ctx := context.Background()
	tenant := "fake"
	now := mktime("2024-01-27 12:00")

	t.Run("dummy", func(t *testing.T) {
		blocks, metas, queriers, data := createBlocks(t, tenant, 10, now.Add(-1*time.Hour), now, 0x0000, 0x1000)
		p := &processor{
			store: &dummyStore{
				querieres: queriers,
				metas:     metas,
				blocks:    blocks,
			},
		}

		chunkRefs := createQueryInputFromBlockData(t, tenant, data, 10)
		swb := seriesWithBounds{
			series: groupRefs(t, chunkRefs),
			bounds: model.Interval{
				Start: now.Add(-1 * time.Hour),
				End:   now,
			},
			day: truncateDay(now),
		}
		filters := []syntax.LineFilter{
			{Ty: 0, Match: "no match"},
		}

		t.Log("series", len(swb.series))
		task, _ := NewTask(ctx, "fake", swb, filters)
		tasks := []Task{task}

		results := atomic.NewInt64(0)
		var wg sync.WaitGroup
		for i := range tasks {
			wg.Add(1)
			go func(ta Task) {
				defer wg.Done()
				for range ta.resCh {
					results.Inc()
				}
				t.Log("done", results.Load())
			}(tasks[i])
		}

		err := p.run(ctx, tasks)
		wg.Wait()
		require.NoError(t, err)
		require.Equal(t, int64(len(swb.series)), results.Load())
	})
}
Bloom Gateway: Extract processing logic from bloom worker into separate struct (#11812) What this PR does / why we need it: The processor executes a set of tasks. It cleanly separates I/O from logic and therefore is more testable than the current worker implementation. https://github.com/grafana/loki/blob/9602214abf5f0b016f1cad90e921d1e4d969856c/pkg/bloomgateway/processor.go#L33-L36 Note: The processor is not used yet in the worker. Certain parts must be refactored when #11810 is merged. --------- Signed-off-by: Christian Haudum <christian.haudum@gmail.com> 2 years ago			`package bloomgateway`

			`import (`
			`"context"`
			`"math/rand"`
			`"sync"`
			`"testing"`
			`"time"`

			`"github.com/prometheus/common/model"`
			`"github.com/stretchr/testify/require"`
			`"go.uber.org/atomic"`

			`"github.com/grafana/loki/pkg/logql/syntax"`
			`v1 "github.com/grafana/loki/pkg/storage/bloom/v1"`
			`"github.com/grafana/loki/pkg/storage/stores/shipper/bloomshipper"`
			`)`

			`var _ store = &dummyStore{}`

			`type dummyStore struct {`
			`metas []bloomshipper.Meta`
			`blocks []bloomshipper.BlockRef`
			`querieres []bloomshipper.BlockQuerierWithFingerprintRange`
			`}`

Bloom shipper: Restructure bloom store (#11828) What this PR does / why we need it: This PR changes the structure of how the bloom client * The `BloomStore` is the top component interface which implements the `Store` and the `Client` interfaces. The store holds a store entry for each schema period. * The `bloomStoreEntry` implements the `Store` and `Client` interfaces. It holds a bloom client for a single schema period. Additionally, the store entry also exposes a fetcher for metas (and in the future for blocks), which is responsible for getting data from cache or storage (if not available in cache). * The `BloomClient` implement the `Client` interface. The bloom client uses an object client for the schema period of the bloom client. * The `Fetcher` can fetch and cache metas. This structure is very similar to what we use for the chunk store. Note Before implementing `FetchBlocks()` in the `BloomStore`, I want to implement the new `FingerprintBounds` type also in the shipper and bloom gateway code to be consistent across components, as well as to make use of the new utilities exposed by them. The store implementation probably needs some specific test cases around using the correct store entry for different schema period configs. The code however is mostly taken from the chunk store implementation. --------- Signed-off-by: Christian Haudum <christian.haudum@gmail.com> 2 years ago			`func (s dummyStore) ResolveMetas(_ context.Context, _ bloomshipper.MetaSearchParams) ([][]bloomshipper.MetaRef, []bloomshipper.Fetcher, error) {`
			`//TODO(chaudum) Filter metas based on search params`
			`refs := make([]bloomshipper.MetaRef, 0, len(s.metas))`
			`for _, meta := range s.metas {`
			`refs = append(refs, meta.MetaRef)`
			`}`
			`return [][]bloomshipper.MetaRef{refs}, []*bloomshipper.Fetcher{nil}, nil`
			`}`

			`func (s *dummyStore) FetchMetas(_ context.Context, _ bloomshipper.MetaSearchParams) ([]bloomshipper.Meta, error) {`
Bloom Gateway: Extract processing logic from bloom worker into separate struct (#11812) What this PR does / why we need it: The processor executes a set of tasks. It cleanly separates I/O from logic and therefore is more testable than the current worker implementation. https://github.com/grafana/loki/blob/9602214abf5f0b016f1cad90e921d1e4d969856c/pkg/bloomgateway/processor.go#L33-L36 Note: The processor is not used yet in the worker. Certain parts must be refactored when #11810 is merged. --------- Signed-off-by: Christian Haudum <christian.haudum@gmail.com> 2 years ago			`//TODO(chaudum) Filter metas based on search params`
			`return s.metas, nil`
			`}`

(chore) Bloom store: Rewrite block downloading/caching in bloom store fetcher (#11857) What this PR does / why we need it: 1. The `BloomStore` interface gets a new function `FetchBlocks()`, which accepts `[]BlockRef` and returns `[]BlockDirectory`. The fetcher implements a new function `FetchBlocks()` which returns a list of block directories. A block directory represents a local file path that contains the extracted files of a bloom block. It also holds a counter of active readers that access the directory. This is used for safely deleting the directory in case it needs to be removed from disk (eg. max disk/cache size reached). The fetcher resolves the block directory from three places: 1. Cache: The in-memory cache that holds actively accessed directories and keeps track of used disk size. 2. Filesystem: In case the cache was emptied (e.g. when restarting the process) but the block directory is present on disk, it can be re-assambled into a bloom directory that can be put to cache. 3. Storage: If the directory is not present locally, the block archive is downloaded and extracted and the block directory is put to cache. 2. The `{Meta,Block}Client` interfaces are unified: Both clients define the same set of operations: Get{Meta,Block} Get{Meta,Block}s Put{Meta,Block} Delete{Meta,Block}s 3. The `blockDownloader` is replaced by a simple queue powered by a channel. The queue is implemented using generics, so it can be re-used for different request/response types. 4. Code related to "block caching" moved into a separate file `cache.go` and unused code is removed. --------- Signed-off-by: Christian Haudum <christian.haudum@gmail.com> Co-authored-by: Owen Diehl <ow.diehl@gmail.com> 2 years ago			`func (s *dummyStore) FetchBlocks(_ context.Context, _ []bloomshipper.BlockRef) ([]bloomshipper.BlockDirectory, error) {`
			`panic("don't call me")`
			`}`

Extend BloomStore with `Client(model.Time)` function (#11881) What this PR does / why we need it: For certain operations, we want direct access to the `BloomClient`. Since every schema period has its own client, the `Client()` function accepts a `model.Timestamp` for which the client should be returned. The function may return an error, if no client for the given time could be found. --------- Signed-off-by: Christian Haudum <christian.haudum@gmail.com> 2 years ago			`func (s dummyStore) Fetcher(_ model.Time) (bloomshipper.Fetcher, error) {`
			`return nil, nil`
			`}`

			`func (s *dummyStore) Client(_ model.Time) (bloomshipper.Client, error) {`
			`return nil, nil`
Bloom shipper: Restructure bloom store (#11828) What this PR does / why we need it: This PR changes the structure of how the bloom client * The `BloomStore` is the top component interface which implements the `Store` and the `Client` interfaces. The store holds a store entry for each schema period. * The `bloomStoreEntry` implements the `Store` and `Client` interfaces. It holds a bloom client for a single schema period. Additionally, the store entry also exposes a fetcher for metas (and in the future for blocks), which is responsible for getting data from cache or storage (if not available in cache). * The `BloomClient` implement the `Client` interface. The bloom client uses an object client for the schema period of the bloom client. * The `Fetcher` can fetch and cache metas. This structure is very similar to what we use for the chunk store. Note Before implementing `FetchBlocks()` in the `BloomStore`, I want to implement the new `FingerprintBounds` type also in the shipper and bloom gateway code to be consistent across components, as well as to make use of the new utilities exposed by them. The store implementation probably needs some specific test cases around using the correct store entry for different schema period configs. The code however is mostly taken from the chunk store implementation. --------- Signed-off-by: Christian Haudum <christian.haudum@gmail.com> 2 years ago			`}`

			`func (s *dummyStore) Stop() {`
			`}`

Bloom Gateway: Extract processing logic from bloom worker into separate struct (#11812) What this PR does / why we need it: The processor executes a set of tasks. It cleanly separates I/O from logic and therefore is more testable than the current worker implementation. https://github.com/grafana/loki/blob/9602214abf5f0b016f1cad90e921d1e4d969856c/pkg/bloomgateway/processor.go#L33-L36 Note: The processor is not used yet in the worker. Certain parts must be refactored when #11810 is merged. --------- Signed-off-by: Christian Haudum <christian.haudum@gmail.com> 2 years ago			`func (s *dummyStore) LoadBlocks(_ context.Context, refs []bloomshipper.BlockRef) (v1.Iterator[bloomshipper.BlockQuerierWithFingerprintRange], error) {`
			`result := make([]bloomshipper.BlockQuerierWithFingerprintRange, len(s.querieres))`

			`for _, ref := range refs {`
			`for _, bq := range s.querieres {`
Blooms/bounds integration (#11848) Replaces `Ref`'s min/max fingerprint fields with `v1.FingerprintBounds` and improves a bunch of checks that use it. Builds on top of https://github.com/grafana/loki/pull/11847 2 years ago			`if ref.Bounds.Equal(bq.FingerprintBounds) {`
Bloom Gateway: Extract processing logic from bloom worker into separate struct (#11812) What this PR does / why we need it: The processor executes a set of tasks. It cleanly separates I/O from logic and therefore is more testable than the current worker implementation. https://github.com/grafana/loki/blob/9602214abf5f0b016f1cad90e921d1e4d969856c/pkg/bloomgateway/processor.go#L33-L36 Note: The processor is not used yet in the worker. Certain parts must be refactored when #11810 is merged. --------- Signed-off-by: Christian Haudum <christian.haudum@gmail.com> 2 years ago			`result = append(result, bq)`
			`}`
			`}`
			`}`

			`rand.Shuffle(len(result), func(i, j int) {`
			`result[i], result[j] = result[j], result[i]`
			`})`

			`return v1.NewSliceIter(result), nil`
			`}`

			`func TestProcessor(t *testing.T) {`
			`ctx := context.Background()`
			`tenant := "fake"`
			`now := mktime("2024-01-27 12:00")`

			`t.Run("dummy", func(t *testing.T) {`
			`blocks, metas, queriers, data := createBlocks(t, tenant, 10, now.Add(-1*time.Hour), now, 0x0000, 0x1000)`
			`p := &processor{`
			`store: &dummyStore{`
			`querieres: queriers,`
			`metas: metas,`
			`blocks: blocks,`
			`},`
			`}`

			`chunkRefs := createQueryInputFromBlockData(t, tenant, data, 10)`
			`swb := seriesWithBounds{`
			`series: groupRefs(t, chunkRefs),`
			`bounds: model.Interval{`
			`Start: now.Add(-1 * time.Hour),`
			`End: now,`
			`},`
			`day: truncateDay(now),`
			`}`
			`filters := []syntax.LineFilter{`
			`{Ty: 0, Match: "no match"},`
			`}`

			`t.Log("series", len(swb.series))`
			`task, _ := NewTask(ctx, "fake", swb, filters)`
			`tasks := []Task{task}`

			`results := atomic.NewInt64(0)`
			`var wg sync.WaitGroup`
			`for i := range tasks {`
			`wg.Add(1)`
			`go func(ta Task) {`
			`defer wg.Done()`
			`for range ta.resCh {`
			`results.Inc()`
			`}`
			`t.Log("done", results.Load())`
			`}(tasks[i])`
			`}`

			`err := p.run(ctx, tasks)`
			`wg.Wait()`
			`require.NoError(t, err)`
			`require.Equal(t, int64(len(swb.series)), results.Load())`
			`})`
			`}`