mirror of https://github.com/grafana/loki
Bloom Compactor: Optimize check for fingerprint ownership (#11389)
Calling `c.sharding.OwnsFingerprint(tenant, uint64(fingerprint))` for each Series of a TSDB index is very expensive, because it not only creates the tenant's sub-ring but also needs to check the fingerprint against it. Instead, we can pre-calculate the current instance's token ranges and check if the (uint32 converted) fingerprint is contained within these ranges. Signed-off-by: Christian Haudum <christian.haudum@gmail.com>pull/11448/head
parent
c573defcbb
commit
c4f5a57bc8
@ -0,0 +1,37 @@ |
||||
package bloomutils |
||||
|
||||
import ( |
||||
"io" |
||||
|
||||
v1 "github.com/grafana/loki/pkg/storage/bloom/v1" |
||||
) |
||||
|
||||
// sortMergeIterator implements v1.Iterator
|
||||
type sortMergeIterator[T any, C comparable, R any] struct { |
||||
curr *R |
||||
heap *v1.HeapIterator[v1.IndexedValue[C]] |
||||
items []T |
||||
transform func(T, C, *R) *R |
||||
err error |
||||
} |
||||
|
||||
func (it *sortMergeIterator[T, C, R]) Next() bool { |
||||
ok := it.heap.Next() |
||||
if !ok { |
||||
it.err = io.EOF |
||||
return false |
||||
} |
||||
|
||||
group := it.heap.At() |
||||
it.curr = it.transform(it.items[group.Index()], group.Value(), it.curr) |
||||
|
||||
return true |
||||
} |
||||
|
||||
func (it *sortMergeIterator[T, C, R]) At() R { |
||||
return *it.curr |
||||
} |
||||
|
||||
func (it *sortMergeIterator[T, C, R]) Err() error { |
||||
return it.err |
||||
} |
||||
@ -0,0 +1,146 @@ |
||||
// This file contains a bunch of utility functions for bloom components.
|
||||
// TODO: Find a better location for this package
|
||||
|
||||
package bloomutils |
||||
|
||||
import ( |
||||
"math" |
||||
"sort" |
||||
|
||||
"github.com/grafana/dskit/ring" |
||||
"golang.org/x/exp/slices" |
||||
|
||||
v1 "github.com/grafana/loki/pkg/storage/bloom/v1" |
||||
) |
||||
|
||||
type InstanceWithTokenRange struct { |
||||
Instance ring.InstanceDesc |
||||
MinToken, MaxToken uint32 |
||||
} |
||||
|
||||
func (i InstanceWithTokenRange) Cmp(token uint32) v1.BoundsCheck { |
||||
if token < i.MinToken { |
||||
return v1.Before |
||||
} else if token > i.MaxToken { |
||||
return v1.After |
||||
} |
||||
return v1.Overlap |
||||
} |
||||
|
||||
type InstancesWithTokenRange []InstanceWithTokenRange |
||||
|
||||
func (i InstancesWithTokenRange) Contains(token uint32) bool { |
||||
for _, instance := range i { |
||||
if instance.Cmp(token) == v1.Overlap { |
||||
return true |
||||
} |
||||
} |
||||
return false |
||||
} |
||||
|
||||
// GetInstanceTokenRange calculates the token range for a specific instance
|
||||
// with given id based on the first token in the ring.
|
||||
// This assumes that each instance in the ring is configured with only a single
|
||||
// token.
|
||||
func GetInstanceWithTokenRange(id string, instances []ring.InstanceDesc) InstancesWithTokenRange { |
||||
|
||||
// Sorting the tokens of the instances would not be necessary if there is
|
||||
// only a single token per instances, however, since we only assume one
|
||||
// token, but don't enforce one token, we keep the sorting.
|
||||
for _, inst := range instances { |
||||
sort.Slice(inst.Tokens, func(i, j int) bool { |
||||
return inst.Tokens[i] < inst.Tokens[j] |
||||
}) |
||||
} |
||||
|
||||
// Sort instances
|
||||
sort.Slice(instances, func(i, j int) bool { |
||||
return instances[i].Tokens[0] < instances[j].Tokens[0] |
||||
}) |
||||
|
||||
idx := slices.IndexFunc(instances, func(inst ring.InstanceDesc) bool { |
||||
return inst.Id == id |
||||
}) |
||||
|
||||
// instance with Id == id not found
|
||||
if idx == -1 { |
||||
return InstancesWithTokenRange{} |
||||
} |
||||
|
||||
i := uint32(idx) |
||||
n := uint32(len(instances)) |
||||
step := math.MaxUint32 / n |
||||
|
||||
minToken := step * i |
||||
maxToken := step*i + step - 1 |
||||
if i == n-1 { |
||||
// extend the last token tange to MaxUint32
|
||||
maxToken = math.MaxUint32 |
||||
} |
||||
|
||||
return InstancesWithTokenRange{ |
||||
{MinToken: minToken, MaxToken: maxToken, Instance: instances[i]}, |
||||
} |
||||
} |
||||
|
||||
// GetInstancesWithTokenRanges calculates the token ranges for a specific
|
||||
// instance with given id based on all tokens in the ring.
|
||||
// If the instances in the ring are configured with a single token, such as the
|
||||
// bloom compactor, use GetInstanceWithTokenRange() instead.
|
||||
func GetInstancesWithTokenRanges(id string, instances []ring.InstanceDesc) InstancesWithTokenRange { |
||||
servers := make([]InstanceWithTokenRange, 0, len(instances)) |
||||
it := NewInstanceSortMergeIterator(instances) |
||||
var firstInst ring.InstanceDesc |
||||
var lastToken uint32 |
||||
for it.Next() { |
||||
if firstInst.Id == "" { |
||||
firstInst = it.At().Instance |
||||
} |
||||
if it.At().Instance.Id == id { |
||||
servers = append(servers, it.At()) |
||||
} |
||||
lastToken = it.At().MaxToken |
||||
} |
||||
// append token range from lastToken+1 to MaxUint32
|
||||
// only if the instance with the first token is the current one
|
||||
if len(servers) > 0 && firstInst.Id == id { |
||||
servers = append(servers, InstanceWithTokenRange{ |
||||
MinToken: lastToken + 1, |
||||
MaxToken: math.MaxUint32, |
||||
Instance: servers[0].Instance, |
||||
}) |
||||
} |
||||
return servers |
||||
} |
||||
|
||||
// NewInstanceSortMergeIterator creates an iterator that yields instanceWithToken elements
|
||||
// where the token of the elements are sorted in ascending order.
|
||||
func NewInstanceSortMergeIterator(instances []ring.InstanceDesc) v1.Iterator[InstanceWithTokenRange] { |
||||
it := &sortMergeIterator[ring.InstanceDesc, uint32, InstanceWithTokenRange]{ |
||||
items: instances, |
||||
transform: func(item ring.InstanceDesc, val uint32, prev *InstanceWithTokenRange) *InstanceWithTokenRange { |
||||
var prevToken uint32 |
||||
if prev != nil { |
||||
prevToken = prev.MaxToken + 1 |
||||
} |
||||
return &InstanceWithTokenRange{Instance: item, MinToken: prevToken, MaxToken: val} |
||||
}, |
||||
} |
||||
sequences := make([]v1.PeekingIterator[v1.IndexedValue[uint32]], 0, len(instances)) |
||||
for i := range instances { |
||||
sort.Slice(instances[i].Tokens, func(a, b int) bool { |
||||
return instances[i].Tokens[a] < instances[i].Tokens[b] |
||||
}) |
||||
iter := v1.NewIterWithIndex[uint32](v1.NewSliceIter(instances[i].Tokens), i) |
||||
sequences = append(sequences, v1.NewPeekingIter[v1.IndexedValue[uint32]](iter)) |
||||
} |
||||
it.heap = v1.NewHeapIterator( |
||||
func(i, j v1.IndexedValue[uint32]) bool { |
||||
return i.Value() < j.Value() |
||||
}, |
||||
sequences..., |
||||
) |
||||
it.err = nil |
||||
|
||||
return it |
||||
} |
||||
@ -0,0 +1,112 @@ |
||||
package bloomutils |
||||
|
||||
import ( |
||||
"math" |
||||
"testing" |
||||
|
||||
"github.com/grafana/dskit/ring" |
||||
"github.com/stretchr/testify/require" |
||||
) |
||||
|
||||
func TestBloomGatewayClient_SortInstancesByToken(t *testing.T) { |
||||
input := []ring.InstanceDesc{ |
||||
{Id: "1", Tokens: []uint32{5, 9}}, |
||||
{Id: "2", Tokens: []uint32{3, 7}}, |
||||
{Id: "3", Tokens: []uint32{1}}, |
||||
} |
||||
expected := []InstanceWithTokenRange{ |
||||
{Instance: input[2], MinToken: 0, MaxToken: 1}, |
||||
{Instance: input[1], MinToken: 2, MaxToken: 3}, |
||||
{Instance: input[0], MinToken: 4, MaxToken: 5}, |
||||
{Instance: input[1], MinToken: 6, MaxToken: 7}, |
||||
{Instance: input[0], MinToken: 8, MaxToken: 9}, |
||||
} |
||||
|
||||
var i int |
||||
it := NewInstanceSortMergeIterator(input) |
||||
for it.Next() { |
||||
t.Log(expected[i], it.At()) |
||||
require.Equal(t, expected[i], it.At()) |
||||
i++ |
||||
} |
||||
} |
||||
|
||||
func TestBloomGatewayClient_GetInstancesWithTokenRanges(t *testing.T) { |
||||
t.Run("instance does not own first token in the ring", func(t *testing.T) { |
||||
input := []ring.InstanceDesc{ |
||||
{Id: "1", Tokens: []uint32{5, 9}}, |
||||
{Id: "2", Tokens: []uint32{3, 7}}, |
||||
{Id: "3", Tokens: []uint32{1}}, |
||||
} |
||||
expected := InstancesWithTokenRange{ |
||||
{Instance: input[1], MinToken: 2, MaxToken: 3}, |
||||
{Instance: input[1], MinToken: 6, MaxToken: 7}, |
||||
} |
||||
|
||||
result := GetInstancesWithTokenRanges("2", input) |
||||
require.Equal(t, expected, result) |
||||
}) |
||||
|
||||
t.Run("instance owns first token in the ring", func(t *testing.T) { |
||||
input := []ring.InstanceDesc{ |
||||
{Id: "1", Tokens: []uint32{5, 9}}, |
||||
{Id: "2", Tokens: []uint32{3, 7}}, |
||||
{Id: "3", Tokens: []uint32{1}}, |
||||
} |
||||
expected := InstancesWithTokenRange{ |
||||
{Instance: input[2], MinToken: 0, MaxToken: 1}, |
||||
{Instance: input[2], MinToken: 10, MaxToken: math.MaxUint32}, |
||||
} |
||||
|
||||
result := GetInstancesWithTokenRanges("3", input) |
||||
require.Equal(t, expected, result) |
||||
}) |
||||
} |
||||
|
||||
func TestBloomGatewayClient_GetInstanceWithTokenRange(t *testing.T) { |
||||
for name, tc := range map[string]struct { |
||||
id string |
||||
input []ring.InstanceDesc |
||||
expected InstancesWithTokenRange |
||||
}{ |
||||
"first instance includes 0 token": { |
||||
id: "3", |
||||
input: []ring.InstanceDesc{ |
||||
{Id: "1", Tokens: []uint32{3}}, |
||||
{Id: "2", Tokens: []uint32{5}}, |
||||
{Id: "3", Tokens: []uint32{1}}, |
||||
}, |
||||
expected: InstancesWithTokenRange{ |
||||
{Instance: ring.InstanceDesc{Id: "3", Tokens: []uint32{1}}, MinToken: 0, MaxToken: math.MaxUint32/3 - 1}, |
||||
}, |
||||
}, |
||||
"middle instance": { |
||||
id: "1", |
||||
input: []ring.InstanceDesc{ |
||||
{Id: "1", Tokens: []uint32{3}}, |
||||
{Id: "2", Tokens: []uint32{5}}, |
||||
{Id: "3", Tokens: []uint32{1}}, |
||||
}, |
||||
expected: InstancesWithTokenRange{ |
||||
{Instance: ring.InstanceDesc{Id: "1", Tokens: []uint32{3}}, MinToken: math.MaxUint32 / 3, MaxToken: math.MaxUint32/3*2 - 1}, |
||||
}, |
||||
}, |
||||
"last instance includes MaxUint32 token": { |
||||
id: "2", |
||||
input: []ring.InstanceDesc{ |
||||
{Id: "1", Tokens: []uint32{3}}, |
||||
{Id: "2", Tokens: []uint32{5}}, |
||||
{Id: "3", Tokens: []uint32{1}}, |
||||
}, |
||||
expected: InstancesWithTokenRange{ |
||||
{Instance: ring.InstanceDesc{Id: "2", Tokens: []uint32{5}}, MinToken: math.MaxUint32 / 3 * 2, MaxToken: math.MaxUint32}, |
||||
}, |
||||
}, |
||||
} { |
||||
tc := tc |
||||
t.Run(name, func(t *testing.T) { |
||||
result := GetInstanceWithTokenRange(tc.id, tc.input) |
||||
require.Equal(t, tc.expected, result) |
||||
}) |
||||
} |
||||
} |
||||
@ -0,0 +1,70 @@ |
||||
package v1 |
||||
|
||||
type IndexedValue[T any] struct { |
||||
idx int |
||||
val T |
||||
} |
||||
|
||||
func (iv IndexedValue[T]) Value() T { |
||||
return iv.val |
||||
} |
||||
|
||||
func (iv IndexedValue[T]) Index() int { |
||||
return iv.idx |
||||
} |
||||
|
||||
type IterWithIndex[T any] struct { |
||||
Iterator[T] |
||||
zero T // zero value of T
|
||||
cache IndexedValue[T] |
||||
} |
||||
|
||||
func (it *IterWithIndex[T]) At() IndexedValue[T] { |
||||
it.cache.val = it.Iterator.At() |
||||
return it.cache |
||||
} |
||||
|
||||
func NewIterWithIndex[T any](iter Iterator[T], idx int) Iterator[IndexedValue[T]] { |
||||
return &IterWithIndex[T]{ |
||||
Iterator: iter, |
||||
cache: IndexedValue[T]{idx: idx}, |
||||
} |
||||
} |
||||
|
||||
type SliceIterWithIndex[T any] struct { |
||||
xs []T // source slice
|
||||
pos int // position within the slice
|
||||
zero T // zero value of T
|
||||
cache IndexedValue[T] |
||||
} |
||||
|
||||
func (it *SliceIterWithIndex[T]) Next() bool { |
||||
it.pos++ |
||||
return it.pos < len(it.xs) |
||||
} |
||||
|
||||
func (it *SliceIterWithIndex[T]) Err() error { |
||||
return nil |
||||
} |
||||
|
||||
func (it *SliceIterWithIndex[T]) At() IndexedValue[T] { |
||||
it.cache.val = it.xs[it.pos] |
||||
return it.cache |
||||
} |
||||
|
||||
func (it *SliceIterWithIndex[T]) Peek() (IndexedValue[T], bool) { |
||||
if it.pos+1 >= len(it.xs) { |
||||
it.cache.val = it.zero |
||||
return it.cache, false |
||||
} |
||||
it.cache.val = it.xs[it.pos+1] |
||||
return it.cache, true |
||||
} |
||||
|
||||
func NewSliceIterWithIndex[T any](xs []T, idx int) PeekingIterator[IndexedValue[T]] { |
||||
return &SliceIterWithIndex[T]{ |
||||
xs: xs, |
||||
pos: -1, |
||||
cache: IndexedValue[T]{idx: idx}, |
||||
} |
||||
} |
||||
@ -0,0 +1,35 @@ |
||||
package v1 |
||||
|
||||
import ( |
||||
"testing" |
||||
|
||||
"github.com/stretchr/testify/require" |
||||
) |
||||
|
||||
func TestSliceIterWithIndex(t *testing.T) { |
||||
t.Run("SliceIterWithIndex implements PeekingIterator interface", func(t *testing.T) { |
||||
xs := []string{"a", "b", "c"} |
||||
it := NewSliceIterWithIndex(xs, 123) |
||||
|
||||
// peek at first item
|
||||
p, ok := it.Peek() |
||||
require.True(t, ok) |
||||
require.Equal(t, "a", p.val) |
||||
require.Equal(t, 123, p.idx) |
||||
|
||||
// proceed to first item
|
||||
require.True(t, it.Next()) |
||||
require.Equal(t, "a", it.At().val) |
||||
require.Equal(t, 123, it.At().idx) |
||||
|
||||
// proceed to second and third item
|
||||
require.True(t, it.Next()) |
||||
require.True(t, it.Next()) |
||||
|
||||
// peek at non-existing fourth item
|
||||
p, ok = it.Peek() |
||||
require.False(t, ok) |
||||
require.Equal(t, "", p.val) // "" is zero value for type string
|
||||
require.Equal(t, 123, p.idx) |
||||
}) |
||||
} |
||||
Loading…
Reference in new issue