mirror of https://github.com/grafana/loki
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
509 lines
14 KiB
509 lines
14 KiB
package bloomcompactor
|
|
|
|
import (
|
|
"context"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/go-kit/log"
|
|
"github.com/go-kit/log/level"
|
|
"github.com/grafana/dskit/backoff"
|
|
"github.com/grafana/dskit/concurrency"
|
|
"github.com/grafana/dskit/multierror"
|
|
"github.com/grafana/dskit/ring"
|
|
"github.com/grafana/dskit/services"
|
|
"github.com/pkg/errors"
|
|
"github.com/prometheus/client_golang/prometheus"
|
|
"github.com/prometheus/common/model"
|
|
|
|
"github.com/grafana/loki/v3/pkg/bloomutils"
|
|
"github.com/grafana/loki/v3/pkg/storage"
|
|
v1 "github.com/grafana/loki/v3/pkg/storage/bloom/v1"
|
|
"github.com/grafana/loki/v3/pkg/storage/config"
|
|
"github.com/grafana/loki/v3/pkg/storage/stores"
|
|
"github.com/grafana/loki/v3/pkg/storage/stores/shipper/bloomshipper"
|
|
utillog "github.com/grafana/loki/v3/pkg/util/log"
|
|
util_ring "github.com/grafana/loki/v3/pkg/util/ring"
|
|
)
|
|
|
|
var (
|
|
RingOp = ring.NewOp([]ring.InstanceState{ring.JOINING, ring.ACTIVE}, nil)
|
|
)
|
|
|
|
/*
|
|
Bloom-compactor
|
|
|
|
This is a standalone service that is responsible for compacting TSDB indexes into bloomfilters.
|
|
It creates and merges bloomfilters into an aggregated form, called bloom-blocks.
|
|
It maintains a list of references between bloom-blocks and TSDB indexes in files called meta.jsons.
|
|
|
|
Bloom-compactor regularly runs to check for changes in meta.jsons and runs compaction only upon changes in TSDBs.
|
|
*/
|
|
type Compactor struct {
|
|
services.Service
|
|
|
|
cfg Config
|
|
schemaCfg config.SchemaConfig
|
|
logger log.Logger
|
|
limits Limits
|
|
|
|
tsdbStore TSDBStore
|
|
// TODO(owen-d): ShardingStrategy
|
|
controller *SimpleBloomController
|
|
retentionManager *RetentionManager
|
|
|
|
// temporary workaround until bloomStore has implemented read/write shipper interface
|
|
bloomStore bloomshipper.Store
|
|
|
|
sharding util_ring.TenantSharding
|
|
|
|
metrics *Metrics
|
|
}
|
|
|
|
func New(
|
|
cfg Config,
|
|
schemaCfg config.SchemaConfig,
|
|
storeCfg storage.Config,
|
|
clientMetrics storage.ClientMetrics,
|
|
fetcherProvider stores.ChunkFetcherProvider,
|
|
ring ring.ReadRing,
|
|
ringLifeCycler *ring.BasicLifecycler,
|
|
limits Limits,
|
|
store bloomshipper.StoreWithMetrics,
|
|
logger log.Logger,
|
|
r prometheus.Registerer,
|
|
) (*Compactor, error) {
|
|
utillog.WarnExperimentalUse("Bloom Compactor", logger)
|
|
c := &Compactor{
|
|
cfg: cfg,
|
|
schemaCfg: schemaCfg,
|
|
logger: logger,
|
|
sharding: util_ring.NewTenantShuffleSharding(ring, ringLifeCycler, limits.BloomCompactorShardSize),
|
|
limits: limits,
|
|
bloomStore: store,
|
|
metrics: NewMetrics(r, store.BloomMetrics()),
|
|
}
|
|
|
|
tsdbStore, err := NewTSDBStores(schemaCfg, storeCfg, clientMetrics, logger)
|
|
if err != nil {
|
|
return nil, errors.Wrap(err, "failed to create TSDB store")
|
|
}
|
|
c.tsdbStore = tsdbStore
|
|
|
|
chunkLoader := NewStoreChunkLoader(
|
|
fetcherProvider,
|
|
c.metrics,
|
|
)
|
|
|
|
c.controller = NewSimpleBloomController(
|
|
c.tsdbStore,
|
|
c.bloomStore,
|
|
chunkLoader,
|
|
c.limits,
|
|
c.metrics,
|
|
c.logger,
|
|
)
|
|
|
|
c.retentionManager = NewRetentionManager(
|
|
c.cfg.RetentionConfig,
|
|
c.limits,
|
|
c.bloomStore,
|
|
newFirstTokenRetentionSharding(ring, ringLifeCycler),
|
|
c.metrics,
|
|
c.logger,
|
|
)
|
|
|
|
c.Service = services.NewBasicService(c.starting, c.running, c.stopping)
|
|
return c, nil
|
|
}
|
|
|
|
func (c *Compactor) starting(_ context.Context) (err error) {
|
|
c.metrics.compactorRunning.Set(1)
|
|
return err
|
|
}
|
|
|
|
func (c *Compactor) stopping(_ error) error {
|
|
c.metrics.compactorRunning.Set(0)
|
|
return nil
|
|
}
|
|
|
|
func (c *Compactor) running(ctx context.Context) error {
|
|
// run once at beginning
|
|
if err := c.runOne(ctx); err != nil {
|
|
return err
|
|
}
|
|
|
|
ticker := time.NewTicker(c.cfg.CompactionInterval)
|
|
defer ticker.Stop()
|
|
for {
|
|
select {
|
|
case <-ctx.Done():
|
|
err := ctx.Err()
|
|
level.Debug(c.logger).Log("msg", "compactor context done", "err", err)
|
|
return err
|
|
|
|
case <-ticker.C:
|
|
if err := c.runOne(ctx); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
func runWithRetries(
|
|
ctx context.Context,
|
|
minBackoff, maxBackoff time.Duration,
|
|
maxRetries int,
|
|
f func(ctx context.Context) error,
|
|
) error {
|
|
var lastErr error
|
|
|
|
retries := backoff.New(ctx, backoff.Config{
|
|
MinBackoff: minBackoff,
|
|
MaxBackoff: maxBackoff,
|
|
MaxRetries: maxRetries,
|
|
})
|
|
|
|
for retries.Ongoing() {
|
|
lastErr = f(ctx)
|
|
if lastErr == nil {
|
|
return nil
|
|
}
|
|
|
|
retries.Wait()
|
|
}
|
|
|
|
return lastErr
|
|
}
|
|
|
|
type tenantTableRange struct {
|
|
tenant string
|
|
table config.DayTable
|
|
ownershipRange v1.FingerprintBounds
|
|
|
|
finished bool
|
|
queueTime, startTime, endTime time.Time
|
|
}
|
|
|
|
func (c *Compactor) tenants(ctx context.Context, table config.DayTable) (*v1.SliceIter[string], error) {
|
|
tenants, err := c.tsdbStore.UsersForPeriod(ctx, table)
|
|
if err != nil {
|
|
return nil, errors.Wrap(err, "getting tenants")
|
|
}
|
|
|
|
return v1.NewSliceIter(tenants), nil
|
|
}
|
|
|
|
// ownsTenant returns the ownership range for the tenant, if the compactor owns the tenant, and an error.
|
|
func (c *Compactor) ownsTenant(tenant string) ([]v1.FingerprintBounds, bool, error) {
|
|
if !c.limits.BloomCompactorEnabled(tenant) {
|
|
return nil, false, nil
|
|
}
|
|
tenantRing, owned := c.sharding.OwnsTenant(tenant)
|
|
if !owned {
|
|
return nil, false, nil
|
|
}
|
|
|
|
// TODO(owen-d): use <ReadRing>.GetTokenRangesForInstance()
|
|
// when it's supported for non zone-aware rings
|
|
// instead of doing all this manually
|
|
|
|
rs, err := tenantRing.GetAllHealthy(RingOp)
|
|
if err != nil {
|
|
return nil, false, errors.Wrap(err, "getting ring healthy instances")
|
|
}
|
|
|
|
ranges, err := bloomutils.TokenRangesForInstance(c.cfg.Ring.InstanceID, rs.Instances)
|
|
if err != nil {
|
|
return nil, false, errors.Wrap(err, "getting token ranges for instance")
|
|
}
|
|
|
|
keyspaces := bloomutils.KeyspacesFromTokenRanges(ranges)
|
|
return keyspaces, true, nil
|
|
}
|
|
|
|
// runs a single round of compaction for all relevant tenants and tables
|
|
func (c *Compactor) runOne(ctx context.Context) error {
|
|
c.metrics.compactionsStarted.Inc()
|
|
start := time.Now()
|
|
level.Info(c.logger).Log("msg", "running bloom compaction", "workers", c.cfg.WorkerParallelism)
|
|
var workersErr, retentionErr error
|
|
var wg sync.WaitGroup
|
|
input := make(chan *tenantTableRange)
|
|
|
|
// Launch retention (will return instantly if retention is disabled or not owned by this compactor)
|
|
wg.Add(1)
|
|
go func() {
|
|
retentionErr = c.retentionManager.Apply(ctx)
|
|
wg.Done()
|
|
}()
|
|
|
|
tables := c.tables(time.Now())
|
|
level.Debug(c.logger).Log("msg", "loaded tables", "tables", tables.TotalDays())
|
|
|
|
tracker, err := newCompactionTracker(tables.TotalDays())
|
|
if err != nil {
|
|
return errors.Wrap(err, "creating compaction tracker")
|
|
}
|
|
|
|
wg.Add(1)
|
|
go func() {
|
|
workersErr = c.runWorkers(ctx, input, tracker)
|
|
wg.Done()
|
|
}()
|
|
|
|
err = c.loadWork(ctx, tables, input, tracker)
|
|
|
|
wg.Wait()
|
|
duration := time.Since(start)
|
|
err = multierror.New(retentionErr, workersErr, err, ctx.Err()).Err()
|
|
|
|
if err != nil {
|
|
level.Error(c.logger).Log("msg", "compaction iteration failed", "err", err, "duration", duration)
|
|
c.metrics.compactionCompleted.WithLabelValues(statusFailure).Inc()
|
|
c.metrics.compactionTime.WithLabelValues(statusFailure).Observe(time.Since(start).Seconds())
|
|
return err
|
|
}
|
|
|
|
c.metrics.compactionCompleted.WithLabelValues(statusSuccess).Inc()
|
|
c.metrics.compactionTime.WithLabelValues(statusSuccess).Observe(time.Since(start).Seconds())
|
|
level.Info(c.logger).Log("msg", "compaction iteration completed", "duration", duration)
|
|
return nil
|
|
}
|
|
|
|
func (c *Compactor) tables(ts time.Time) *dayRangeIterator {
|
|
// adjust the minimum by one to make it inclusive, which is more intuitive
|
|
// for a configuration variable
|
|
adjustedMin := min(c.cfg.MinTableOffset - 1)
|
|
minCompactionDelta := time.Duration(adjustedMin) * config.ObjectStorageIndexRequiredPeriod
|
|
maxCompactionDelta := time.Duration(c.cfg.MaxTableOffset) * config.ObjectStorageIndexRequiredPeriod
|
|
|
|
from := ts.Add(-maxCompactionDelta).UnixNano() / int64(config.ObjectStorageIndexRequiredPeriod) * int64(config.ObjectStorageIndexRequiredPeriod)
|
|
through := ts.Add(-minCompactionDelta).UnixNano() / int64(config.ObjectStorageIndexRequiredPeriod) * int64(config.ObjectStorageIndexRequiredPeriod)
|
|
|
|
fromDay := config.NewDayTime(model.TimeFromUnixNano(from))
|
|
throughDay := config.NewDayTime(model.TimeFromUnixNano(through))
|
|
level.Debug(c.logger).Log("msg", "loaded tables for compaction", "from", fromDay, "through", throughDay)
|
|
return newDayRangeIterator(fromDay, throughDay, c.schemaCfg)
|
|
}
|
|
|
|
func (c *Compactor) loadWork(
|
|
ctx context.Context,
|
|
tables *dayRangeIterator,
|
|
ch chan<- *tenantTableRange,
|
|
tracker *compactionTracker,
|
|
) error {
|
|
|
|
for tables.Next() && tables.Err() == nil && ctx.Err() == nil {
|
|
table := tables.At()
|
|
|
|
level.Debug(c.logger).Log("msg", "loading work for table", "table", table)
|
|
|
|
tenants, err := c.tenants(ctx, table)
|
|
if err != nil {
|
|
return errors.Wrap(err, "getting tenants")
|
|
}
|
|
nTenants := tenants.Len()
|
|
|
|
type ownedTenant struct {
|
|
tenant string
|
|
ownershipRanges []v1.FingerprintBounds
|
|
}
|
|
|
|
// build owned tenants separately and load them all prior to compaction in order to
|
|
// accurately report progress
|
|
var ownedTenants []ownedTenant
|
|
|
|
for tenants.Next() && tenants.Err() == nil && ctx.Err() == nil {
|
|
c.metrics.tenantsDiscovered.Inc()
|
|
tenant := tenants.At()
|
|
ownershipRanges, owns, err := c.ownsTenant(tenant)
|
|
if err != nil {
|
|
return errors.Wrap(err, "checking tenant ownership")
|
|
}
|
|
if !owns {
|
|
level.Debug(c.logger).Log("msg", "skipping tenant", "tenant", tenant, "table", table)
|
|
c.metrics.tenantsSkipped.Inc()
|
|
continue
|
|
}
|
|
c.metrics.tenantsOwned.Inc()
|
|
ownedTenants = append(ownedTenants, ownedTenant{tenant, ownershipRanges})
|
|
}
|
|
if err := tenants.Err(); err != nil {
|
|
level.Error(c.logger).Log("msg", "error iterating tenants", "err", err)
|
|
return errors.Wrap(err, "iterating tenants")
|
|
}
|
|
|
|
level.Debug(c.logger).Log("msg", "loaded tenants", "table", table, "tenants", nTenants, "owned_tenants", len(ownedTenants))
|
|
tracker.registerTable(table.DayTime, len(ownedTenants))
|
|
|
|
for _, t := range ownedTenants {
|
|
// loop over ranges, registering them in the tracker;
|
|
// we add them to the tracker before queueing them
|
|
// so progress reporting is aware of all tenant/table
|
|
// pairs prior to execution. Otherwise, progress could
|
|
// decrease over time as more work is discovered.
|
|
var inputs []*tenantTableRange
|
|
for _, ownershipRange := range t.ownershipRanges {
|
|
tt := tenantTableRange{
|
|
tenant: t.tenant,
|
|
table: table,
|
|
ownershipRange: ownershipRange,
|
|
}
|
|
tracker.update(tt.tenant, tt.table.DayTime, tt.ownershipRange, tt.ownershipRange.Min)
|
|
inputs = append(inputs, &tt)
|
|
}
|
|
|
|
// iterate the inputs, queueing them
|
|
for _, tt := range inputs {
|
|
level.Debug(c.logger).Log("msg", "enqueueing work for tenant", "tenant", tt.tenant, "table", table, "ownership", tt.ownershipRange.String())
|
|
tt.queueTime = time.Now() // accurrately report queue time
|
|
select {
|
|
case ch <- tt:
|
|
case <-ctx.Done():
|
|
return ctx.Err()
|
|
}
|
|
}
|
|
}
|
|
|
|
if err := tenants.Err(); err != nil {
|
|
level.Error(c.logger).Log("msg", "error iterating tenants", "err", err)
|
|
return errors.Wrap(err, "iterating tenants")
|
|
}
|
|
|
|
}
|
|
|
|
if err := tables.Err(); err != nil {
|
|
level.Error(c.logger).Log("msg", "error iterating tables", "err", err)
|
|
return errors.Wrap(err, "iterating tables")
|
|
}
|
|
|
|
close(ch)
|
|
return ctx.Err()
|
|
}
|
|
|
|
func (c *Compactor) runWorkers(
|
|
ctx context.Context,
|
|
input <-chan *tenantTableRange,
|
|
tracker *compactionTracker,
|
|
) error {
|
|
|
|
// TODO(owen-d): refactor for cleanliness
|
|
reporterCtx, cancel := context.WithCancel(ctx)
|
|
var wg sync.WaitGroup
|
|
wg.Add(1)
|
|
go func() {
|
|
ticker := time.NewTicker(30 * time.Second)
|
|
for {
|
|
select {
|
|
case <-ticker.C:
|
|
c.metrics.progress.Set(tracker.progress())
|
|
case <-reporterCtx.Done():
|
|
c.metrics.progress.Set(tracker.progress())
|
|
wg.Done()
|
|
ticker.Stop()
|
|
return
|
|
}
|
|
}
|
|
}()
|
|
|
|
err := concurrency.ForEachJob(ctx, c.cfg.WorkerParallelism, c.cfg.WorkerParallelism, func(ctx context.Context, idx int) error {
|
|
|
|
for {
|
|
select {
|
|
case <-ctx.Done():
|
|
return ctx.Err()
|
|
|
|
case tt, ok := <-input:
|
|
if !ok {
|
|
return nil
|
|
}
|
|
c.metrics.tenantsStarted.Inc()
|
|
err := c.compactTenantTable(ctx, tt, tracker)
|
|
duration := tt.endTime.Sub(tt.startTime)
|
|
c.metrics.timePerTenant.WithLabelValues(tt.tenant).Add(duration.Seconds())
|
|
progress := tracker.progress()
|
|
|
|
if err != nil {
|
|
c.metrics.tenantTableRanges.WithLabelValues(statusFailure).Inc()
|
|
return errors.Wrapf(
|
|
err,
|
|
"compacting tenant table (%s) for tenant (%s) with ownership (%s)",
|
|
tt.table,
|
|
tt.tenant,
|
|
tt.ownershipRange,
|
|
)
|
|
}
|
|
level.Debug(c.logger).Log(
|
|
"msg", "finished compacting tenant table",
|
|
"tenant", tt.tenant,
|
|
"table", tt.table,
|
|
"ownership", tt.ownershipRange.String(),
|
|
"duration", duration,
|
|
"current_progress", progress,
|
|
)
|
|
c.metrics.tenantTableRanges.WithLabelValues(statusSuccess).Inc()
|
|
}
|
|
}
|
|
|
|
})
|
|
cancel()
|
|
wg.Wait()
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
func (c *Compactor) compactTenantTable(ctx context.Context, tt *tenantTableRange, tracker *compactionTracker) error {
|
|
level.Info(c.logger).Log("msg", "compacting", "org_id", tt.tenant, "table", tt.table, "ownership", tt.ownershipRange.String())
|
|
tt.startTime = time.Now()
|
|
err := c.controller.compactTenant(ctx, tt.table, tt.tenant, tt.ownershipRange, tracker)
|
|
tt.finished = true
|
|
tt.endTime = time.Now()
|
|
tracker.update(tt.tenant, tt.table.DayTime, tt.ownershipRange, tt.ownershipRange.Max)
|
|
level.Info(c.logger).Log("msg", "finished compacting", "org_id", tt.tenant, "table", tt.table, "ownership", tt.ownershipRange.String(), "err", err)
|
|
return err
|
|
}
|
|
|
|
type dayRangeIterator struct {
|
|
min, max, cur config.DayTime
|
|
curPeriod config.PeriodConfig
|
|
schemaCfg config.SchemaConfig
|
|
err error
|
|
}
|
|
|
|
func newDayRangeIterator(min, max config.DayTime, schemaCfg config.SchemaConfig) *dayRangeIterator {
|
|
return &dayRangeIterator{min: min, max: max, cur: min.Dec(), schemaCfg: schemaCfg}
|
|
}
|
|
|
|
func (r *dayRangeIterator) TotalDays() int {
|
|
offset := r.cur
|
|
if r.cur.Before(r.min) {
|
|
offset = r.min
|
|
}
|
|
return int(r.max.Sub(offset.Time) / config.ObjectStorageIndexRequiredPeriod)
|
|
}
|
|
|
|
func (r *dayRangeIterator) Next() bool {
|
|
r.cur = r.cur.Inc()
|
|
if !r.cur.Before(r.max) {
|
|
return false
|
|
}
|
|
|
|
period, err := r.schemaCfg.SchemaForTime(r.cur.ModelTime())
|
|
if err != nil {
|
|
r.err = errors.Wrapf(err, "getting schema for time (%s)", r.cur)
|
|
return false
|
|
}
|
|
r.curPeriod = period
|
|
|
|
return true
|
|
}
|
|
|
|
func (r *dayRangeIterator) At() config.DayTable {
|
|
return config.NewDayTable(r.cur, r.curPeriod.IndexTables.Prefix)
|
|
}
|
|
|
|
func (r *dayRangeIterator) Err() error {
|
|
return nil
|
|
}
|
|
|