Add basic structure of bloom compactor (#10748)

This pull request adds the basic structure for the new bloom compactor component.

- Adds new `bloom-compactor` target that runs with with multiple instances joined by a ring
- Adds boilerplate functions to index blooms and compact blocks

The main goal of this PR is to provide a basic functionality on which future smaller PRs can build upon. Since the code path is completely separate in a new component, it is not used anywhere at the moment.
pull/10934/head
Poyzan 2 years ago committed by GitHub
parent abc4ee29c0
commit 0832256d7b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 106
      docs/sources/configure/_index.md
  2. 246
      pkg/bloomcompactor/bloomcompactor.go
  3. 36
      pkg/bloomcompactor/config.go
  4. 208
      pkg/bloomcompactor/ringmanager.go
  5. 27
      pkg/loki/config_wrapper.go
  6. 10
      pkg/loki/loki.go
  7. 46
      pkg/loki/modules.go
  8. 4
      pkg/storage/bloom/v1/block.go
  9. 8
      pkg/storage/bloom/v1/bloom.go
  10. 4
      pkg/storage/bloom/v1/builder_test.go
  11. 6
      tools/doc-generator/parse/root_blocks.go

@ -161,6 +161,11 @@ Pass the `-config.expand-env` flag at the command line to enable this way of set
# object store.
[index_gateway: <index_gateway>]
# The bloom_compactor block configures the Loki bloom compactor server,
# responsible for compacting stream indexes into bloom filters and merging them
# as bloom blocks
[bloom_compactor: <bloom_compactor>]
# The bloom_gateway block configures the Loki bloom gateway server, responsible
# for serving queries for filtering chunks based on filter expressions.
[bloom_gateway: <bloom_gateway>]
@ -2457,6 +2462,105 @@ compactor_ring:
[deletion_mode: <string> | default = ""]
```
### bloom_compactor
The `bloom_compactor` block configures the Loki bloom compactor server, responsible for compacting stream indexes into bloom filters and merging them as bloom blocks
```yaml
# Defines the ring to be used by the bloom-compactor servers. In case this isn't
# configured, this block supports inheriting configuration from the common ring
# section.
ring:
kvstore:
# Backend storage to use for the ring. Supported values are: consul, etcd,
# inmemory, memberlist, multi.
# CLI flag: -bloom-compactor.ring.store
[store: <string> | default = "consul"]
# The prefix for the keys in the store. Should end with a /.
# CLI flag: -bloom-compactor.ring.prefix
[prefix: <string> | default = "collectors/"]
# Configuration for a Consul client. Only applies if the selected kvstore is
# consul.
# The CLI flags prefix for this block configuration is: bloom-compactor.ring
[consul: <consul>]
# Configuration for an ETCD v3 client. Only applies if the selected kvstore
# is etcd.
# The CLI flags prefix for this block configuration is: bloom-compactor.ring
[etcd: <etcd>]
multi:
# Primary backend storage used by multi-client.
# CLI flag: -bloom-compactor.ring.multi.primary
[primary: <string> | default = ""]
# Secondary backend storage used by multi-client.
# CLI flag: -bloom-compactor.ring.multi.secondary
[secondary: <string> | default = ""]
# Mirror writes to secondary store.
# CLI flag: -bloom-compactor.ring.multi.mirror-enabled
[mirror_enabled: <boolean> | default = false]
# Timeout for storing value to secondary store.
# CLI flag: -bloom-compactor.ring.multi.mirror-timeout
[mirror_timeout: <duration> | default = 2s]
# Period at which to heartbeat to the ring. 0 = disabled.
# CLI flag: -bloom-compactor.ring.heartbeat-period
[heartbeat_period: <duration> | default = 15s]
# The heartbeat timeout after which compactors are considered unhealthy within
# the ring. 0 = never (timeout disabled).
# CLI flag: -bloom-compactor.ring.heartbeat-timeout
[heartbeat_timeout: <duration> | default = 1m]
# File path where tokens are stored. If empty, tokens are not stored at
# shutdown and restored at startup.
# CLI flag: -bloom-compactor.ring.tokens-file-path
[tokens_file_path: <string> | default = ""]
# True to enable zone-awareness and replicate blocks across different
# availability zones.
# CLI flag: -bloom-compactor.ring.zone-awareness-enabled
[zone_awareness_enabled: <boolean> | default = false]
# Instance ID to register in the ring.
# CLI flag: -bloom-compactor.ring.instance-id
[instance_id: <string> | default = "<hostname>"]
# Name of network interface to read address from.
# CLI flag: -bloom-compactor.ring.instance-interface-names
[instance_interface_names: <list of strings> | default = [<private network interfaces>]]
# Port to advertise in the ring (defaults to server.grpc-listen-port).
# CLI flag: -bloom-compactor.ring.instance-port
[instance_port: <int> | default = 0]
# IP address to advertise in the ring.
# CLI flag: -bloom-compactor.ring.instance-addr
[instance_addr: <string> | default = ""]
# The availability zone where this instance is running. Required if
# zone-awareness is enabled.
# CLI flag: -bloom-compactor.ring.instance-availability-zone
[instance_availability_zone: <string> | default = ""]
# Enable using a IPv6 instance address.
# CLI flag: -bloom-compactor.ring.instance-enable-ipv6
[instance_enable_ipv6: <boolean> | default = false]
# Flag to enable or disable the usage of the bloom-compactor component.
# CLI flag: -bloom-compactor.enabled
[enabled: <boolean> | default = false]
[working_directory: <string> | default = ""]
[max_look_back_period: <duration>]
```
### limits_config
The `limits_config` block configures global and per-tenant limits in Loki.
@ -3490,6 +3594,7 @@ ring:
Configuration for a Consul client. Only applies if the selected kvstore is `consul`. The supported CLI flags `<prefix>` used to reference this configuration block are:
- `bloom-compactor.ring`
- `bloom-gateway.ring`
- `common.storage.ring`
- `compactor.ring`
@ -3535,6 +3640,7 @@ Configuration for a Consul client. Only applies if the selected kvstore is `cons
Configuration for an ETCD v3 client. Only applies if the selected kvstore is `etcd`. The supported CLI flags `<prefix>` used to reference this configuration block are:
- `bloom-compactor.ring`
- `bloom-gateway.ring`
- `common.storage.ring`
- `compactor.ring`

@ -0,0 +1,246 @@
/*
Bloom-compactor
This is a standalone service that is responsible for compacting TSDB indexes into bloomfilters.
It creates and merges bloomfilters into an aggregated form, called bloom-blocks.
It maintains a list of references between bloom-blocks and TSDB indexes in files called meta.jsons.
Bloom-compactor regularly runs to check for changes in meta.jsons and runs compaction only upon changes in TSDBs.
bloomCompactor.Compactor
| // Read/Write path
bloomshipper.Store**
|
bloomshipper.Shipper
|
bloomshipper.BloomClient
|
ObjectClient
|
.....................service boundary
|
object storage
*/
package bloomcompactor
import (
"context"
"fmt"
"os"
"path/filepath"
"time"
"github.com/go-kit/log"
"github.com/grafana/dskit/ring"
"github.com/grafana/dskit/services"
"github.com/prometheus/client_golang/prometheus"
"github.com/grafana/loki/pkg/storage"
v1 "github.com/grafana/loki/pkg/storage/bloom/v1"
"github.com/grafana/loki/pkg/storage/bloom/v1/filter"
"github.com/grafana/loki/pkg/storage/config"
"github.com/grafana/loki/pkg/storage/stores/shipper/bloomshipper"
)
type Compactor struct {
services.Service
cfg Config
logger log.Logger
bloomCompactorRing ring.ReadRing
periodConfigs []config.PeriodConfig
// temporary workaround until store has implemented read/write shipper interface
bloomShipperClient bloomshipper.Client
bloomStore bloomshipper.Store
}
func New(cfg Config,
readRing ring.ReadRing,
storageCfg storage.Config,
periodConfigs []config.PeriodConfig,
logger log.Logger,
clientMetrics storage.ClientMetrics,
_ prometheus.Registerer) (*Compactor, error) {
c := &Compactor{
cfg: cfg,
logger: logger,
bloomCompactorRing: readRing,
periodConfigs: periodConfigs,
}
client, err := bloomshipper.NewBloomClient(periodConfigs, storageCfg, clientMetrics)
if err != nil {
return nil, err
}
shipper, err := bloomshipper.NewShipper(
client,
storageCfg.BloomShipperConfig,
logger,
)
if err != nil {
return nil, err
}
store, err := bloomshipper.NewBloomStore(shipper)
if err != nil {
return nil, err
}
// temporary workaround until store has implemented read/write shipper interface
c.bloomShipperClient = client
c.bloomStore = store
// TODO use a new service with a loop
c.Service = services.NewIdleService(c.starting, c.stopping)
return c, nil
}
func (c *Compactor) starting(_ context.Context) error {
return nil
}
func (c *Compactor) stopping(_ error) error {
return nil
}
// TODO Get fpRange owned by the compactor instance
func NoopGetFingerprintRange() (uint64, uint64) { return 0, 0 }
// TODO List Users from TSDB and add logic to owned user via ring
func NoopGetUserID() string { return "" }
// TODO get series from objectClient (TSDB) instead of params
func NoopGetSeries() *v1.Series { return nil }
// TODO Then get chunk data from series
func NoopGetChunks() []byte { return nil }
// part1: Create a compact method that assumes no block/meta files exists (eg first compaction)
// part2: Write logic to check first for existing block/meta files and does above.
func (c *Compactor) compactNewChunks(ctx context.Context, dst string) (err error) {
//part1
series := NoopGetSeries()
data := NoopGetChunks()
bloom := v1.Bloom{Sbf: *filter.NewDefaultScalableBloomFilter(0.01)}
// create bloom filters from that.
bloom.Sbf.Add([]byte(fmt.Sprint(data)))
// block and seriesList
seriesList := []v1.SeriesWithBloom{
{
Series: series,
Bloom: &bloom,
},
}
writer := v1.NewDirectoryBlockWriter(dst)
builder, err := v1.NewBlockBuilder(
v1.BlockOptions{
SeriesPageSize: 100,
BloomPageSize: 10 << 10,
}, writer)
if err != nil {
return err
}
// BuildFrom closes itself
err = builder.BuildFrom(v1.NewSliceIter[v1.SeriesWithBloom](seriesList))
if err != nil {
return err
}
// TODO Ask Owen, shall we expose a method to expose these paths on BlockWriter?
indexPath := filepath.Join(dst, "series")
bloomPath := filepath.Join(dst, "bloom")
blockRef := bloomshipper.BlockRef{
IndexPath: indexPath,
BlockPath: bloomPath,
}
blocks := []bloomshipper.Block{
{
BlockRef: blockRef,
// TODO point to the data to be read
Data: nil,
},
}
meta := bloomshipper.Meta{
// After successful compaction there should be no tombstones
Tombstones: make([]bloomshipper.BlockRef, 0),
Blocks: []bloomshipper.BlockRef{blockRef},
}
err = c.bloomShipperClient.PutMeta(ctx, meta)
if err != nil {
return err
}
_, err = c.bloomShipperClient.PutBlocks(ctx, blocks)
if err != nil {
return err
}
// TODO may need to change return value of this func
return nil
}
func (c *Compactor) runCompact(ctx context.Context) error {
//TODO set MaxLookBackPeriod to Max ingester accepts
maxLookBackPeriod := c.cfg.MaxLookBackPeriod
stFp, endFp := NoopGetFingerprintRange()
tenantID := NoopGetUserID()
end := time.Now().UTC().UnixMilli()
start := end - maxLookBackPeriod.Milliseconds()
metaSearchParams := bloomshipper.MetaSearchParams{
TenantID: tenantID,
MinFingerprint: stFp,
MaxFingerprint: endFp,
StartTimestamp: start,
EndTimestamp: end,
}
metas, err := c.bloomShipperClient.GetMetas(ctx, metaSearchParams)
if err != nil {
return err
}
if len(metas) == 0 {
//run compaction from scratch
tempDst := os.TempDir()
err = c.compactNewChunks(ctx, tempDst)
if err != nil {
return err
}
} else {
// part 2
// When already compacted metas exists
// Deduplicate index paths
uniqueIndexPaths := make(map[string]struct{})
for _, meta := range metas {
for _, blockRef := range meta.Blocks {
uniqueIndexPaths[blockRef.IndexPath] = struct{}{}
}
}
// TODO complete part 2 - discuss with Owen - add part to compare chunks and blocks.
//1. for each period at hand, get TSDB table indexes for given fp range
//2. Check blocks for given uniqueIndexPaths and TSDBindexes
// if bloomBlock refs are a superset (covers TSDBIndexes plus more outside of range)
// create a new meta.json file, tombstone unused index/block paths.
//else if: there are TSDBindexes that are not covered in bloomBlocks (a subset)
//then call compactNewChunks on them and create a new meta.json
//else: all good, no compaction
}
return nil
}

@ -0,0 +1,36 @@
package bloomcompactor
import (
"flag"
"time"
"github.com/grafana/loki/pkg/util"
)
// Config configures the bloom-compactor component.
type Config struct {
// Ring configures the ring store used to save and retrieve the different Bloom-Compactor instances.
// In case it isn't explicitly set, it follows the same behavior of the other rings (ex: using the common configuration
// section and the ingester configuration by default).
RingCfg RingCfg `yaml:"ring,omitempty" doc:"description=Defines the ring to be used by the bloom-compactor servers. In case this isn't configured, this block supports inheriting configuration from the common ring section."`
// Enabled configures whether bloom-compactors should be used to compact index values into bloomfilters
Enabled bool `yaml:"enabled"`
WorkingDirectory string `yaml:"working_directory"`
MaxLookBackPeriod time.Duration `yaml:"max_look_back_period"`
}
// RegisterFlags registers flags for the Bloom-Compactor configuration.
func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
cfg.RingCfg.RegisterFlags("bloom-compactor.", "collectors/", f)
f.BoolVar(&cfg.Enabled, "bloom-compactor.enabled", false, "Flag to enable or disable the usage of the bloom-compactor component.")
}
// RingCfg is a wrapper for our internally used ring configuration plus the replication factor.
type RingCfg struct {
// RingConfig configures the Bloom-Compactor ring.
util.RingConfig `yaml:",inline"`
}
func (cfg *RingCfg) RegisterFlags(prefix, storePrefix string, f *flag.FlagSet) {
cfg.RingConfig.RegisterFlagsWithPrefix(prefix, storePrefix, f)
}

@ -0,0 +1,208 @@
package bloomcompactor
import (
"context"
"net/http"
"time"
"github.com/go-kit/log"
"github.com/go-kit/log/level"
"github.com/grafana/dskit/kv"
"github.com/grafana/dskit/ring"
"github.com/grafana/dskit/services"
"github.com/pkg/errors"
"github.com/prometheus/client_golang/prometheus"
)
const (
// ringAutoForgetUnhealthyPeriods is how many consecutive timeout periods an unhealthy instance
// in the ring will be automatically removed.
ringAutoForgetUnhealthyPeriods = 10
// ringNameForServer is the name of the ring used by the bloom-compactor server.
ringNameForServer = "bloom-compactor"
// start with a single instance
ringNumTokens = 1
ringCheckPeriod = 3 * time.Second
// ringKey is the key under which we register different instances of bloom-compactor in the KVStore.
ringKey = "bloom-compactor"
replicationFactor = 1
)
type RingManager struct {
services.Service
cfg Config
logger log.Logger
subservices *services.Manager
subservicesWatcher *services.FailureWatcher
RingLifecycler *ring.BasicLifecycler
Ring *ring.Ring
}
func NewRingManager(cfg Config, logger log.Logger, registerer prometheus.Registerer) (*RingManager, error) {
rm := &RingManager{
cfg: cfg, logger: logger,
}
// instantiate kv store.
ringStore, err := kv.NewClient(
rm.cfg.RingCfg.KVStore,
ring.GetCodec(),
kv.RegistererWithKVName(prometheus.WrapRegistererWithPrefix("loki_", registerer), "bloom-compactor-ring-manager"),
rm.logger,
)
if err != nil {
return nil, errors.Wrap(err, "bloom-compactor ring manager failed to create KV store client")
}
lifecyclerCfg, err := rm.cfg.RingCfg.ToLifecyclerConfig(ringNumTokens, rm.logger)
if err != nil {
return nil, errors.Wrap(err, "invalid ring lifecycler config")
}
// Define lifecycler delegates in reverse order (last to be called defined first because they're
// chained via "next delegate").
delegate := ring.BasicLifecyclerDelegate(rm)
delegate = ring.NewLeaveOnStoppingDelegate(delegate, rm.logger)
delegate = ring.NewTokensPersistencyDelegate(rm.cfg.RingCfg.TokensFilePath, ring.JOINING, delegate, rm.logger)
delegate = ring.NewAutoForgetDelegate(ringAutoForgetUnhealthyPeriods*rm.cfg.RingCfg.HeartbeatTimeout, delegate, rm.logger)
rm.RingLifecycler, err = ring.NewBasicLifecycler(lifecyclerCfg, ringNameForServer, ringKey, ringStore, delegate, rm.logger, registerer)
if err != nil {
return nil, errors.Wrap(err, "failed to create bloom-compactor ring manager lifecycler")
}
// instantiate ring.
ringCfg := rm.cfg.RingCfg.ToRingConfig(replicationFactor)
rm.Ring, err = ring.NewWithStoreClientAndStrategy(
ringCfg,
ringNameForServer,
ringKey,
ringStore,
ring.NewIgnoreUnhealthyInstancesReplicationStrategy(),
prometheus.WrapRegistererWithPrefix("loki_", registerer),
rm.logger,
)
if err != nil {
return nil, errors.Wrap(err, "bloom-compactor ring manager failed to create ring client")
}
svcs := []services.Service{rm.RingLifecycler, rm.Ring}
rm.subservices, err = services.NewManager(svcs...)
if err != nil {
return nil, errors.Wrap(err, "new bloom services manager in server mode")
}
rm.subservicesWatcher = services.NewFailureWatcher()
rm.subservicesWatcher.WatchManager(rm.subservices)
rm.Service = services.NewBasicService(rm.starting, rm.running, rm.stopping)
return rm, nil
}
// starting implements the Lifecycler interface and is one of the lifecycle hooks.
func (rm *RingManager) starting(ctx context.Context) (err error) {
// In case this function will return error we want to unregister the instance
// from the ring. We do it ensuring dependencies are gracefully stopped if they
// were already started.
defer func() {
if err == nil || rm.subservices == nil {
return
}
if stopErr := services.StopManagerAndAwaitStopped(context.Background(), rm.subservices); stopErr != nil {
level.Error(rm.logger).Log("msg", "failed to gracefully stop bloom-compactor ring manager dependencies", "err", stopErr)
}
}()
if err := services.StartManagerAndAwaitHealthy(ctx, rm.subservices); err != nil {
return errors.Wrap(err, "unable to start bloom-compactor ring manager subservices")
}
// The BasicLifecycler does not automatically move state to ACTIVE such that any additional work that
// someone wants to do can be done before becoming ACTIVE. For the bloom-compactor we don't currently
// have any additional work so we can become ACTIVE right away.
// Wait until the ring client detected this instance in the JOINING
// state to make sure that when we'll run the initial sync we already
// know the tokens assigned to this instance.
level.Info(rm.logger).Log("msg", "waiting until bloom-compactor is JOINING in the ring")
if err := ring.WaitInstanceState(ctx, rm.Ring, rm.RingLifecycler.GetInstanceID(), ring.JOINING); err != nil {
return err
}
level.Info(rm.logger).Log("msg", "bloom-compactor is JOINING in the ring")
if err = rm.RingLifecycler.ChangeState(ctx, ring.ACTIVE); err != nil {
return errors.Wrapf(err, "switch instance to %s in the ring", ring.ACTIVE)
}
// Wait until the ring client detected this instance in the ACTIVE state to
// make sure that when we'll run the loop it won't be detected as a ring
// topology change.
level.Info(rm.logger).Log("msg", "waiting until bloom-compactor is ACTIVE in the ring")
if err := ring.WaitInstanceState(ctx, rm.Ring, rm.RingLifecycler.GetInstanceID(), ring.ACTIVE); err != nil {
return err
}
level.Info(rm.logger).Log("msg", "bloom-compactor is ACTIVE in the ring")
return nil
}
// running implements the Lifecycler interface and is one of the lifecycle hooks.
func (rm *RingManager) running(ctx context.Context) error {
t := time.NewTicker(ringCheckPeriod)
defer t.Stop()
for {
select {
case <-ctx.Done():
return nil
case err := <-rm.subservicesWatcher.Chan():
return errors.Wrap(err, "running bloom-compactor ring manager subservice failed")
case <-t.C:
continue
}
}
}
// stopping implements the Lifecycler interface and is one of the lifecycle hooks.
func (rm *RingManager) stopping(_ error) error {
level.Debug(rm.logger).Log("msg", "stopping bloom-compactor ring manager")
return services.StopManagerAndAwaitStopped(context.Background(), rm.subservices)
}
func (rm *RingManager) ServeHTTP(w http.ResponseWriter, req *http.Request) {
rm.Ring.ServeHTTP(w, req)
}
func (rm *RingManager) OnRingInstanceRegister(_ *ring.BasicLifecycler, ringDesc ring.Desc, instanceExists bool, _ string, instanceDesc ring.InstanceDesc) (ring.InstanceState, ring.Tokens) {
// When we initialize the bloom-compactor instance in the ring we want to start from
// a clean situation, so whatever is the state we set it JOINING, while we keep existing
// tokens (if any) or the ones loaded from file.
var tokens []uint32
if instanceExists {
tokens = instanceDesc.GetTokens()
}
takenTokens := ringDesc.GetTokens()
gen := ring.NewRandomTokenGenerator()
newTokens := gen.GenerateTokens(ringNumTokens-len(tokens), takenTokens)
// Tokens sorting will be enforced by the parent caller.
tokens = append(tokens, newTokens...)
return ring.JOINING, tokens
}
func (rm *RingManager) OnRingInstanceTokens(_ *ring.BasicLifecycler, _ ring.Tokens) {
}
func (rm *RingManager) OnRingInstanceStopping(_ *ring.BasicLifecycler) {
}
func (rm *RingManager) OnRingInstanceHeartbeat(_ *ring.BasicLifecycler, _ *ring.Desc, _ *ring.InstanceDesc) {
}

@ -303,6 +303,19 @@ func applyConfigToRings(r, defaults *ConfigWrapper, rc util.RingConfig, mergeWit
r.IndexGateway.Ring.KVStore = rc.KVStore
}
// BloomCompactor
if mergeWithExisting || reflect.DeepEqual(r.BloomCompactor.RingCfg, defaults.BloomCompactor.RingCfg) {
r.BloomCompactor.RingCfg.HeartbeatTimeout = rc.HeartbeatTimeout
r.BloomCompactor.RingCfg.HeartbeatPeriod = rc.HeartbeatPeriod
r.BloomCompactor.RingCfg.InstancePort = rc.InstancePort
r.BloomCompactor.RingCfg.InstanceAddr = rc.InstanceAddr
r.BloomCompactor.RingCfg.InstanceID = rc.InstanceID
r.BloomCompactor.RingCfg.InstanceInterfaceNames = rc.InstanceInterfaceNames
r.BloomCompactor.RingCfg.InstanceZone = rc.InstanceZone
r.BloomCompactor.RingCfg.ZoneAwarenessEnabled = rc.ZoneAwarenessEnabled
r.BloomCompactor.RingCfg.KVStore = rc.KVStore
}
// BloomGateway
if mergeWithExisting || reflect.DeepEqual(r.BloomGateway.Ring, defaults.BloomGateway.Ring) {
r.BloomGateway.Ring.HeartbeatTimeout = rc.HeartbeatTimeout
@ -339,12 +352,21 @@ func applyTokensFilePath(cfg *ConfigWrapper) error {
}
cfg.QueryScheduler.SchedulerRing.TokensFilePath = f
// Index Gateway
f, err = tokensFile(cfg, "indexgateway.tokens")
if err != nil {
return err
}
cfg.IndexGateway.Ring.TokensFilePath = f
// Bloom-Compactor
f, err = tokensFile(cfg, "bloom-compactor.tokens")
if err != nil {
return err
}
cfg.BloomCompactor.RingCfg.TokensFilePath = f
// Bloom-Gateway
f, err = tokensFile(cfg, "bloomgateway.tokens")
if err != nil {
return err
@ -432,6 +454,10 @@ func appendLoopbackInterface(cfg, defaults *ConfigWrapper) {
cfg.IndexGateway.Ring.InstanceInterfaceNames = append(cfg.IndexGateway.Ring.InstanceInterfaceNames, loopbackIface)
}
if reflect.DeepEqual(cfg.BloomCompactor.RingCfg.InstanceInterfaceNames, defaults.BloomCompactor.RingCfg.InstanceInterfaceNames) {
cfg.BloomCompactor.RingCfg.InstanceInterfaceNames = append(cfg.BloomCompactor.RingCfg.InstanceInterfaceNames, loopbackIface)
}
if reflect.DeepEqual(cfg.BloomGateway.Ring.InstanceInterfaceNames, defaults.BloomGateway.Ring.InstanceInterfaceNames) {
cfg.BloomGateway.Ring.InstanceInterfaceNames = append(cfg.BloomGateway.Ring.InstanceInterfaceNames, loopbackIface)
}
@ -448,6 +474,7 @@ func applyMemberlistConfig(r *ConfigWrapper) {
r.QueryScheduler.SchedulerRing.KVStore.Store = memberlistStr
r.CompactorConfig.CompactorRing.KVStore.Store = memberlistStr
r.IndexGateway.Ring.KVStore.Store = memberlistStr
r.BloomCompactor.RingCfg.KVStore.Store = memberlistStr
r.BloomGateway.Ring.KVStore.Store = memberlistStr
}

@ -30,6 +30,7 @@ import (
"google.golang.org/grpc/health/grpc_health_v1"
"github.com/grafana/loki/pkg/analytics"
"github.com/grafana/loki/pkg/bloomcompactor"
"github.com/grafana/loki/pkg/bloomgateway"
"github.com/grafana/loki/pkg/compactor"
compactorclient "github.com/grafana/loki/pkg/compactor/client"
@ -85,6 +86,7 @@ type Config struct {
IngesterClient ingester_client.Config `yaml:"ingester_client,omitempty"`
Ingester ingester.Config `yaml:"ingester,omitempty"`
IndexGateway indexgateway.Config `yaml:"index_gateway"`
BloomCompactor bloomcompactor.Config `yaml:"bloom_compactor"`
BloomGateway bloomgateway.Config `yaml:"bloom_gateway"`
StorageConfig storage.Config `yaml:"storage_config,omitempty"`
ChunkStoreConfig config.ChunkStoreConfig `yaml:"chunk_store_config,omitempty"`
@ -165,6 +167,7 @@ func (c *Config) RegisterFlags(f *flag.FlagSet) {
c.MemberlistKV.RegisterFlags(f)
c.Tracing.RegisterFlags(f)
c.CompactorConfig.RegisterFlags(f)
c.BloomCompactor.RegisterFlags(f)
c.QueryScheduler.RegisterFlags(f)
c.Analytics.RegisterFlags(f)
}
@ -313,6 +316,7 @@ type Loki struct {
querySchedulerRingManager *scheduler.RingManager
usageReport *analytics.Reporter
indexGatewayRingManager *indexgateway.RingManager
bloomCompactorRingManager *bloomcompactor.RingManager
bloomGatewayRingManager *bloomgateway.RingManager
clientMetrics storage.ClientMetrics
@ -590,6 +594,8 @@ func (t *Loki) setupModuleManager() error {
mm.RegisterModule(RuleEvaluator, t.initRuleEvaluator, modules.UserInvisibleModule)
mm.RegisterModule(TableManager, t.initTableManager)
mm.RegisterModule(Compactor, t.initCompactor)
mm.RegisterModule(BloomCompactor, t.initBloomCompactor)
mm.RegisterModule(BloomCompactorRing, t.initBloomCompactorRing, modules.UserInvisibleModule)
mm.RegisterModule(IndexGateway, t.initIndexGateway)
mm.RegisterModule(IndexGatewayRing, t.initIndexGatewayRing, modules.UserInvisibleModule)
mm.RegisterModule(IndexGatewayInterceptors, t.initIndexGatewayInterceptors, modules.UserInvisibleModule)
@ -625,6 +631,7 @@ func (t *Loki) setupModuleManager() error {
Compactor: {Server, Overrides, MemberlistKV, Analytics},
IndexGateway: {Server, Store, IndexGatewayRing, IndexGatewayInterceptors, Analytics},
BloomGateway: {Server, BloomGatewayRing, Analytics},
BloomCompactor: {Server, BloomCompactorRing, Analytics},
IngesterQuerier: {Ring},
QuerySchedulerRing: {Overrides, MemberlistKV},
IndexGatewayRing: {Overrides, MemberlistKV},
@ -688,8 +695,9 @@ func (t *Loki) setupModuleManager() error {
deps[QueryFrontend] = append(deps[QueryFrontend], QueryScheduler)
}
//TODO(poyzannur) not sure this is needed for BloomCompactor
if t.Cfg.LegacyReadTarget {
deps[Read] = append(deps[Read], QueryScheduler, Ruler, Compactor, IndexGateway, BloomGateway)
deps[Read] = append(deps[Read], QueryScheduler, Ruler, Compactor, IndexGateway, BloomGateway, BloomCompactor)
}
if t.Cfg.InternalServer.Enable {

@ -33,6 +33,8 @@ import (
"github.com/prometheus/common/model"
"github.com/prometheus/common/version"
"github.com/grafana/loki/pkg/bloomcompactor"
"github.com/grafana/loki/pkg/analytics"
"github.com/grafana/loki/pkg/bloomgateway"
"github.com/grafana/loki/pkg/compactor"
@ -110,6 +112,8 @@ const (
IndexGatewayInterceptors string = "index-gateway-interceptors"
QueryScheduler string = "query-scheduler"
QuerySchedulerRing string = "query-scheduler-ring"
BloomCompactor string = "bloom-compactor"
BloomCompactorRing string = "bloom-compactor-ring"
All string = "all"
Read string = "read"
Write string = "write"
@ -247,10 +251,11 @@ func (t *Loki) initRuntimeConfig() (services.Service, error) {
// Update config fields using runtime config. Only if multiKV is used for given ring these returned functions will be
// called and register the listener.
//
// By doing the initialization here instead of per-module init function, we avoid the problem
// of projects based on Loki forgetting the wiring if they override module's init method (they also don't have access to private symbols).
t.Cfg.CompactorConfig.CompactorRing.KVStore.Multi.ConfigProvider = multiClientRuntimeConfigChannel(t.runtimeConfig)
t.Cfg.BloomCompactor.RingCfg.KVStore.Multi.ConfigProvider = multiClientRuntimeConfigChannel(t.runtimeConfig)
t.Cfg.Distributor.DistributorRing.KVStore.Multi.ConfigProvider = multiClientRuntimeConfigChannel(t.runtimeConfig)
t.Cfg.IndexGateway.Ring.KVStore.Multi.ConfigProvider = multiClientRuntimeConfigChannel(t.runtimeConfig)
t.Cfg.BloomGateway.Ring.KVStore.Multi.ConfigProvider = multiClientRuntimeConfigChannel(t.runtimeConfig)
@ -1333,6 +1338,45 @@ func (t *Loki) initIndexGatewayInterceptors() (services.Service, error) {
return nil, nil
}
func (t *Loki) initBloomCompactor() (services.Service, error) {
logger := log.With(util_log.Logger, "component", "bloom-compactor")
compactor, err := bloomcompactor.New(t.Cfg.BloomCompactor,
t.ring,
t.Cfg.StorageConfig,
t.Cfg.SchemaConfig.Configs,
logger,
t.clientMetrics,
prometheus.DefaultRegisterer)
if err != nil {
return nil, err
}
return compactor, nil
}
func (t *Loki) initBloomCompactorRing() (services.Service, error) {
t.Cfg.BloomCompactor.RingCfg.ListenPort = t.Cfg.Server.GRPCListenPort
// is LegacyMode needed?
//legacyReadMode := t.Cfg.LegacyReadTarget && t.isModuleActive(Read)
rm, err := bloomcompactor.NewRingManager(t.Cfg.BloomCompactor, util_log.Logger, prometheus.DefaultRegisterer)
if err != nil {
return nil, gerrors.Wrap(err, "error initializing bloom-compactor ring manager")
}
t.bloomCompactorRingManager = rm
t.Server.HTTP.Path("/bloomcompactor/ring").Methods("GET", "POST").Handler(t.bloomCompactorRingManager)
if t.Cfg.InternalServer.Enable {
t.InternalServer.HTTP.Path("/bloomcompactor/ring").Methods("GET", "POST").Handler(t.bloomCompactorRingManager)
}
return t.bloomCompactorRingManager, nil
}
func (t *Loki) initQueryScheduler() (services.Service, error) {
s, err := scheduler.NewScheduler(t.Cfg.QueryScheduler, t.Overrides, util_log.Logger, t.querySchedulerRingManager, prometheus.DefaultRegisterer)
if err != nil {

@ -141,7 +141,7 @@ func (bq *BlockQuerier) CheckChunksForSeries(fp model.Fingerprint, chks ChunkRef
// First, see if the search passes the series level bloom before checking for chunks individually
for _, search := range searches {
if !bloom.sbf.Test(search) {
if !bloom.Sbf.Test(search) {
// the entire series bloom didn't pass one of the searches,
// so we can skip checking chunks individually.
// We still return all chunks that are not included in the bloom
@ -161,7 +161,7 @@ outer:
// TODO(owen-d): meld chunk + search into a single byte slice from the block schema
var combined = search
if !bloom.sbf.Test(combined) {
if !bloom.Sbf.Test(combined) {
continue outer
}
}

@ -13,14 +13,14 @@ import (
)
type Bloom struct {
sbf filter.ScalableBloomFilter
Sbf filter.ScalableBloomFilter
}
func (b *Bloom) Encode(enc *encoding.Encbuf) error {
// divide by 8 b/c bloom capacity is measured in bits, but we want bytes
buf := bytes.NewBuffer(BlockPool.Get(int(b.sbf.Capacity() / 8)))
buf := bytes.NewBuffer(BlockPool.Get(int(b.Sbf.Capacity() / 8)))
_, err := b.sbf.WriteTo(buf)
_, err := b.Sbf.WriteTo(buf)
if err != nil {
return errors.Wrap(err, "encoding bloom filter")
}
@ -36,7 +36,7 @@ func (b *Bloom) Decode(dec *encoding.Decbuf) error {
ln := dec.Uvarint()
data := dec.Bytes(ln)
_, err := b.sbf.ReadFrom(bytes.NewReader(data))
_, err := b.Sbf.ReadFrom(bytes.NewReader(data))
if err != nil {
return errors.Wrap(err, "decoding bloom filter")
}

@ -27,8 +27,8 @@ func mkBasicSeriesWithBlooms(n int, fromFp, throughFp model.Fingerprint, fromTs,
}
var bloom Bloom
bloom.sbf = *filter.NewScalableBloomFilter(1024, 0.01, 0.8)
bloom.sbf.Add([]byte(fmt.Sprint(i)))
bloom.Sbf = *filter.NewScalableBloomFilter(1024, 0.01, 0.8)
bloom.Sbf.Add([]byte(fmt.Sprint(i)))
seriesList = append(seriesList, SeriesWithBloom{
Series: &series,

@ -14,6 +14,7 @@ import (
"github.com/grafana/dskit/server"
"github.com/grafana/loki/pkg/analytics"
"github.com/grafana/loki/pkg/bloomcompactor"
"github.com/grafana/loki/pkg/bloomgateway"
"github.com/grafana/loki/pkg/compactor"
"github.com/grafana/loki/pkg/distributor"
@ -123,6 +124,11 @@ var (
StructType: []reflect.Type{reflect.TypeOf(compactor.Config{})},
Desc: "The compactor block configures the compactor component, which compacts index shards for performance. `-boltdb.shipper.compactor.` prefix is deprecated, please use `-compactor.` instead.",
},
{
Name: "bloom_compactor",
StructType: []reflect.Type{reflect.TypeOf(bloomcompactor.Config{})},
Desc: "The bloom_compactor block configures the Loki bloom compactor server, responsible for compacting stream indexes into bloom filters and merging them as bloom blocks",
},
{
Name: "limits_config",
StructType: []reflect.Type{reflect.TypeOf(validation.Limits{})},

Loading…
Cancel
Save