mirror of https://github.com/grafana/loki
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
641 lines
19 KiB
641 lines
19 KiB
package builder
|
|
|
|
import (
|
|
"context"
|
|
"flag"
|
|
"fmt"
|
|
"math"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/go-kit/log"
|
|
"github.com/go-kit/log/level"
|
|
"github.com/grafana/dskit/backoff"
|
|
"github.com/grafana/dskit/grpcclient"
|
|
"github.com/grafana/dskit/services"
|
|
"github.com/pkg/errors"
|
|
"github.com/prometheus/client_golang/prometheus"
|
|
"github.com/twmb/franz-go/pkg/kgo"
|
|
"golang.org/x/sync/errgroup"
|
|
|
|
"github.com/grafana/loki/v3/pkg/blockbuilder/types"
|
|
"github.com/grafana/loki/v3/pkg/compression"
|
|
"github.com/grafana/loki/v3/pkg/kafka"
|
|
"github.com/grafana/loki/v3/pkg/kafka/client"
|
|
"github.com/grafana/loki/v3/pkg/logproto"
|
|
"github.com/grafana/loki/v3/pkg/storage/chunk"
|
|
"github.com/grafana/loki/v3/pkg/storage/config"
|
|
"github.com/grafana/loki/v3/pkg/storage/stores"
|
|
"github.com/grafana/loki/v3/pkg/storage/stores/shipper/indexshipper/tsdb/index"
|
|
storagetypes "github.com/grafana/loki/v3/pkg/storage/types"
|
|
"github.com/grafana/loki/v3/pkg/util/flagext"
|
|
util_log "github.com/grafana/loki/v3/pkg/util/log"
|
|
)
|
|
|
|
type Config struct {
|
|
ConcurrentFlushes int `yaml:"concurrent_flushes"`
|
|
ConcurrentWriters int `yaml:"concurrent_writers"`
|
|
|
|
BlockSize flagext.ByteSize `yaml:"chunk_block_size"`
|
|
TargetChunkSize flagext.ByteSize `yaml:"chunk_target_size"`
|
|
ChunkEncoding string `yaml:"chunk_encoding"`
|
|
parsedEncoding compression.Codec `yaml:"-"` // placeholder for validated encoding
|
|
MaxChunkAge time.Duration `yaml:"max_chunk_age"`
|
|
|
|
Backoff backoff.Config `yaml:"backoff_config"`
|
|
WorkerParallelism int `yaml:"worker_parallelism"`
|
|
SyncInterval time.Duration `yaml:"sync_interval"`
|
|
PollInterval time.Duration `yaml:"poll_interval"`
|
|
|
|
SchedulerAddress string `yaml:"scheduler_address"`
|
|
// SchedulerGRPCClientConfig configures the gRPC connection between the block-builder and its scheduler.
|
|
SchedulerGRPCClientConfig grpcclient.Config `yaml:"scheduler_grpc_client_config"`
|
|
}
|
|
|
|
func (cfg *Config) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) {
|
|
f.IntVar(&cfg.ConcurrentFlushes, prefix+"concurrent-flushes", 1, "How many flushes can happen concurrently")
|
|
f.IntVar(&cfg.ConcurrentWriters, prefix+"concurrent-writers", 1, "How many workers to process writes, defaults to number of available cpus")
|
|
_ = cfg.BlockSize.Set("256KB")
|
|
f.Var(&cfg.BlockSize, prefix+"chunks-block-size", "The targeted _uncompressed_ size in bytes of a chunk block When this threshold is exceeded the head block will be cut and compressed inside the chunk.")
|
|
_ = cfg.TargetChunkSize.Set(fmt.Sprint(onePointFiveMB))
|
|
f.Var(&cfg.TargetChunkSize, prefix+"chunk-target-size", "A target _compressed_ size in bytes for chunks. This is a desired size not an exact size, chunks may be slightly bigger or significantly smaller if they get flushed for other reasons (e.g. chunk_idle_period). A value of 0 creates chunks with a fixed 10 blocks, a non zero value will create chunks with a variable number of blocks to meet the target size.")
|
|
f.StringVar(&cfg.ChunkEncoding, prefix+"chunk-encoding", compression.Snappy.String(), fmt.Sprintf("The algorithm to use for compressing chunk. (%s)", compression.SupportedCodecs()))
|
|
f.DurationVar(&cfg.MaxChunkAge, prefix+"max-chunk-age", 2*time.Hour, "The maximum duration of a timeseries chunk in memory. If a timeseries runs for longer than this, the current chunk will be flushed to the store and a new chunk created.")
|
|
f.DurationVar(&cfg.SyncInterval, prefix+"sync-interval", 30*time.Second, "The interval at which to sync job status with the scheduler.")
|
|
f.DurationVar(&cfg.PollInterval, prefix+"poll-interval", 30*time.Second, "The interval at which to poll for new jobs.")
|
|
f.IntVar(&cfg.WorkerParallelism, prefix+"worker-parallelism", 1, "The number of workers to run in parallel to process jobs.")
|
|
f.StringVar(&cfg.SchedulerAddress, prefix+"scheduler-address", "", "Address of the scheduler in the format described here: https://github.com/grpc/grpc/blob/master/doc/naming.md")
|
|
|
|
cfg.SchedulerGRPCClientConfig.RegisterFlagsWithPrefix(prefix+"scheduler-grpc-client.", f)
|
|
cfg.Backoff.RegisterFlagsWithPrefix(prefix+"backoff.", f)
|
|
}
|
|
|
|
// RegisterFlags registers flags.
|
|
func (cfg *Config) RegisterFlags(flags *flag.FlagSet) {
|
|
cfg.RegisterFlagsWithPrefix("blockbuilder.", flags)
|
|
}
|
|
|
|
func (cfg *Config) Validate() error {
|
|
enc, err := compression.ParseCodec(cfg.ChunkEncoding)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
cfg.parsedEncoding = enc
|
|
|
|
if cfg.SyncInterval <= 0 {
|
|
return errors.New("sync interval must be greater than 0")
|
|
}
|
|
|
|
if cfg.PollInterval <= 0 {
|
|
return errors.New("poll interval must be greater than 0")
|
|
}
|
|
|
|
if cfg.WorkerParallelism < 1 {
|
|
return errors.New("worker parallelism must be greater than 0")
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// BlockBuilder is a slimmed-down version of the ingester, intended to
|
|
// ingest logs without WALs. Broadly, it accumulates logs into per-tenant chunks in the same way the existing ingester does,
|
|
// without a WAL. Index (TSDB) creation is also not an out-of-band procedure and must be called directly. In essence, this
|
|
// allows us to buffer data, flushing chunks to storage as necessary, and then when ready to commit this, relevant TSDBs (one per period) are created and flushed to storage. This allows an external caller to prepare a batch of data, build relevant chunks+indices, ensure they're flushed, and then return. As long as chunk+index creation is deterministic, this operation is also
|
|
// idempotent, making retries simple and impossible to introduce duplicate data.
|
|
// It contains the following methods:
|
|
// - `Append(context.Context, logproto.PushRequest) error`
|
|
// Adds a push request to ingested data. May flush existing chunks when they're full/etc.
|
|
// - `Commit(context.Context) error`
|
|
// Serializes (cuts) any buffered data into chunks, flushes them to storage, then creates + flushes TSDB indices
|
|
// containing all chunk references. Finally, clears internal state.
|
|
type BlockBuilder struct {
|
|
services.Service
|
|
types.BuilderTransport
|
|
|
|
id string
|
|
cfg Config
|
|
kafkaCfg kafka.Config
|
|
periodicConfigs []config.PeriodConfig
|
|
|
|
metrics *builderMetrics
|
|
logger log.Logger
|
|
registerer prometheus.Registerer
|
|
|
|
decoder *kafka.Decoder
|
|
store stores.ChunkWriter
|
|
objStore *MultiStore
|
|
|
|
jobsMtx sync.RWMutex
|
|
inflightJobs map[string]*types.Job
|
|
}
|
|
|
|
func NewBlockBuilder(
|
|
id string,
|
|
cfg Config,
|
|
kafkaCfg kafka.Config,
|
|
periodicConfigs []config.PeriodConfig,
|
|
store stores.ChunkWriter,
|
|
objStore *MultiStore,
|
|
logger log.Logger,
|
|
registerer prometheus.Registerer,
|
|
) (*BlockBuilder,
|
|
error,
|
|
) {
|
|
decoder, err := kafka.NewDecoder()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
t, err := types.NewGRPCTransportFromAddress(cfg.SchedulerAddress, cfg.SchedulerGRPCClientConfig, registerer)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("create grpc transport: %w", err)
|
|
}
|
|
|
|
i := &BlockBuilder{
|
|
id: id,
|
|
cfg: cfg,
|
|
kafkaCfg: kafkaCfg,
|
|
periodicConfigs: periodicConfigs,
|
|
metrics: newBuilderMetrics(registerer),
|
|
logger: logger,
|
|
registerer: registerer,
|
|
decoder: decoder,
|
|
store: store,
|
|
objStore: objStore,
|
|
inflightJobs: make(map[string]*types.Job),
|
|
BuilderTransport: t,
|
|
}
|
|
|
|
i.Service = services.NewBasicService(nil, i.running, nil)
|
|
return i, nil
|
|
}
|
|
|
|
func (i *BlockBuilder) running(ctx context.Context) error {
|
|
errgrp, ctx := errgroup.WithContext(ctx)
|
|
for j := 0; j < i.cfg.WorkerParallelism; j++ {
|
|
workerID := fmt.Sprintf("block-builder-worker-%d", j)
|
|
errgrp.Go(func() error {
|
|
c, err := client.NewReaderClient(
|
|
"blockbuilder",
|
|
i.kafkaCfg,
|
|
log.With(i.logger, "component", workerID),
|
|
i.registerer,
|
|
)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
var waitFor time.Duration
|
|
for {
|
|
select {
|
|
case <-ctx.Done():
|
|
return nil
|
|
case <-time.After(waitFor):
|
|
gotJob, err := i.runOne(ctx, c, workerID)
|
|
if err != nil {
|
|
level.Error(i.logger).Log("msg", "block builder run failed", "err", err)
|
|
}
|
|
|
|
// poll only when there are no jobs
|
|
if gotJob {
|
|
waitFor = 0
|
|
} else {
|
|
waitFor = i.cfg.PollInterval
|
|
}
|
|
}
|
|
}
|
|
})
|
|
}
|
|
|
|
errgrp.Go(func() error {
|
|
ticker := time.NewTicker(i.cfg.SyncInterval)
|
|
defer ticker.Stop()
|
|
|
|
for {
|
|
select {
|
|
case <-ctx.Done():
|
|
return nil
|
|
case <-ticker.C:
|
|
if err := i.syncJobs(ctx); err != nil {
|
|
level.Error(i.logger).Log("msg", "failed to sync jobs", "err", err)
|
|
}
|
|
}
|
|
}
|
|
})
|
|
|
|
return errgrp.Wait()
|
|
}
|
|
|
|
func (i *BlockBuilder) syncJobs(ctx context.Context) error {
|
|
i.jobsMtx.RLock()
|
|
defer i.jobsMtx.RUnlock()
|
|
|
|
for _, job := range i.inflightJobs {
|
|
if err := i.SendSyncJob(ctx, &types.SyncJobRequest{
|
|
BuilderID: i.id,
|
|
Job: job,
|
|
}); err != nil {
|
|
level.Error(i.logger).Log("msg", "failed to sync job", "err", err)
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (i *BlockBuilder) runOne(ctx context.Context, c *kgo.Client, workerID string) (bool, error) {
|
|
// assuming GetJob blocks/polls until a job is available
|
|
resp, err := i.SendGetJobRequest(ctx, &types.GetJobRequest{
|
|
BuilderID: workerID,
|
|
})
|
|
if err != nil {
|
|
return false, errors.Wrap(err, "requesting job")
|
|
}
|
|
|
|
if !resp.OK {
|
|
level.Info(i.logger).Log("msg", "no available job to process")
|
|
return false, nil
|
|
}
|
|
|
|
job := resp.Job
|
|
logger := log.With(
|
|
i.logger,
|
|
"worker_id", workerID,
|
|
"partition", job.Partition(),
|
|
"job_min_offset", job.Offsets().Min,
|
|
"job_max_offset", job.Offsets().Max,
|
|
)
|
|
|
|
i.jobsMtx.Lock()
|
|
i.inflightJobs[job.ID()] = job
|
|
i.metrics.inflightJobs.Set(float64(len(i.inflightJobs)))
|
|
i.jobsMtx.Unlock()
|
|
|
|
completion := &types.CompleteJobRequest{
|
|
BuilderID: workerID,
|
|
Job: job,
|
|
Success: true,
|
|
}
|
|
if processErr := i.processJob(ctx, c, job, logger); processErr != nil {
|
|
level.Error(i.logger).Log("msg", "failed to process job", "err", processErr)
|
|
err = errors.Wrap(processErr, "processing job")
|
|
completion.Success = false
|
|
}
|
|
|
|
// remove from inflight jobs to stop sending sync requests
|
|
i.jobsMtx.Lock()
|
|
delete(i.inflightJobs, job.ID())
|
|
i.metrics.inflightJobs.Set(float64(len(i.inflightJobs)))
|
|
i.jobsMtx.Unlock()
|
|
|
|
if _, err := withBackoff(
|
|
ctx,
|
|
i.cfg.Backoff,
|
|
func() (res struct{}, err error) {
|
|
if err = i.SendCompleteJob(ctx, completion); err != nil {
|
|
level.Error(i.logger).Log("msg", "failed to mark the job as complete", "err", err)
|
|
}
|
|
return
|
|
},
|
|
); err != nil {
|
|
return true, err
|
|
}
|
|
|
|
return true, err
|
|
}
|
|
|
|
func (i *BlockBuilder) processJob(ctx context.Context, c *kgo.Client, job *types.Job, logger log.Logger) (err error) {
|
|
level.Debug(logger).Log("msg", "beginning job")
|
|
start := time.Now()
|
|
|
|
indexer := newTsdbCreator()
|
|
appender := newAppender(i.id,
|
|
i.cfg,
|
|
i.periodicConfigs,
|
|
i.store,
|
|
i.objStore,
|
|
logger,
|
|
i.metrics,
|
|
)
|
|
|
|
var lastOffset int64
|
|
p := newPipeline(ctx)
|
|
|
|
// Pipeline stage 1: Process the job offsets and write records to inputCh
|
|
// This stage reads from the partition and feeds records into the input channel
|
|
// When complete, it stores the last processed offset and closes the channel
|
|
inputCh := make(chan []AppendInput)
|
|
p.AddStageWithCleanup(
|
|
"load records",
|
|
1,
|
|
func(ctx context.Context) error {
|
|
lastOffset, err = i.loadRecords(ctx, c, job.Partition(), job.Offsets(), inputCh)
|
|
return errors.Wrap(err, "loading records")
|
|
},
|
|
func(ctx context.Context) error {
|
|
level.Debug(logger).Log(
|
|
"msg", "finished loading records",
|
|
"ctx_error", ctx.Err(),
|
|
"last_offset", lastOffset,
|
|
"total_records", lastOffset-job.Offsets().Min,
|
|
)
|
|
close(inputCh)
|
|
return nil
|
|
},
|
|
)
|
|
|
|
// Stage 2: Process input records and generate chunks
|
|
// This stage receives AppendInput batches, appends them to appropriate instances,
|
|
// and forwards any cut chunks to the chunks channel for flushing.
|
|
// ConcurrentWriters workers process inputs in parallel to maximize throughput.
|
|
flush := make(chan *chunk.Chunk)
|
|
p.AddStageWithCleanup(
|
|
"appender",
|
|
i.cfg.ConcurrentWriters,
|
|
func(ctx context.Context) error {
|
|
for {
|
|
select {
|
|
case <-ctx.Done():
|
|
return ctx.Err()
|
|
case inputs, ok := <-inputCh:
|
|
// inputs are finished; we're done
|
|
if !ok {
|
|
return nil
|
|
}
|
|
|
|
for _, input := range inputs {
|
|
cut, err := appender.Append(ctx, input)
|
|
if err != nil {
|
|
level.Error(logger).Log("msg", "failed to append records", "err", err)
|
|
return errors.Wrap(err, "appending records")
|
|
}
|
|
|
|
for _, chk := range cut {
|
|
select {
|
|
case <-ctx.Done():
|
|
return ctx.Err()
|
|
case flush <- chk:
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
},
|
|
func(ctx context.Context) (err error) {
|
|
defer func() {
|
|
level.Debug(logger).Log(
|
|
"msg", "finished appender",
|
|
"err", err,
|
|
"ctx_error", ctx.Err(),
|
|
)
|
|
}()
|
|
defer close(flush)
|
|
|
|
// once we're done appending, cut all remaining chunks.
|
|
chks, err := appender.CutRemainingChunks(ctx)
|
|
if err != nil {
|
|
return errors.Wrap(err, "cutting remaining chunks")
|
|
}
|
|
|
|
for _, chk := range chks {
|
|
select {
|
|
case <-ctx.Done():
|
|
return ctx.Err()
|
|
case flush <- chk:
|
|
}
|
|
}
|
|
return nil
|
|
},
|
|
)
|
|
|
|
// Stage 3: Flush chunks to storage
|
|
// This stage receives chunks from the chunks channel and flushes them to storage
|
|
// using ConcurrentFlushes workers for parallel processing
|
|
p.AddStage(
|
|
"flusher",
|
|
i.cfg.ConcurrentFlushes,
|
|
func(ctx context.Context) error {
|
|
for {
|
|
select {
|
|
case <-ctx.Done():
|
|
return ctx.Err()
|
|
case chk, ok := <-flush:
|
|
if !ok {
|
|
return nil
|
|
}
|
|
if _, err := withBackoff(
|
|
ctx,
|
|
i.cfg.Backoff, // retry forever
|
|
func() (res struct{}, err error) {
|
|
err = i.store.PutOne(ctx, chk.From, chk.Through, *chk)
|
|
if err != nil {
|
|
level.Error(logger).Log("msg", "failed to flush chunk", "err", err)
|
|
i.metrics.chunksFlushFailures.Inc()
|
|
return res, errors.Wrap(err, "flushing chunk")
|
|
}
|
|
appender.reportFlushedChunkStatistics(chk)
|
|
|
|
// write flushed chunk to index
|
|
approxKB := math.Round(float64(chk.Data.UncompressedSize()) / float64(1<<10))
|
|
meta := index.ChunkMeta{
|
|
Checksum: chk.ChunkRef.Checksum,
|
|
MinTime: int64(chk.ChunkRef.From),
|
|
MaxTime: int64(chk.ChunkRef.Through),
|
|
KB: uint32(approxKB),
|
|
Entries: uint32(chk.Data.Entries()),
|
|
}
|
|
err = indexer.Append(chk.UserID, chk.Metric, chk.ChunkRef.Fingerprint, index.ChunkMetas{meta})
|
|
if err != nil {
|
|
level.Error(logger).Log("msg", "failed to append chunk to index", "err", err)
|
|
return res, errors.Wrap(err, "appending chunk to index")
|
|
}
|
|
|
|
return
|
|
},
|
|
); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
}
|
|
},
|
|
)
|
|
|
|
err = p.Run()
|
|
level.Debug(logger).Log(
|
|
"msg", "finished chunk creation",
|
|
"err", err,
|
|
)
|
|
if err != nil {
|
|
return errors.Wrap(err, "running pipeline")
|
|
}
|
|
|
|
var (
|
|
nodeName = i.id
|
|
tableRanges = config.GetIndexStoreTableRanges(storagetypes.TSDBType, i.periodicConfigs)
|
|
)
|
|
|
|
built, err := indexer.create(ctx, nodeName, tableRanges)
|
|
if err != nil {
|
|
level.Error(logger).Log("msg", "failed to build index", "err", err)
|
|
return errors.Wrap(err, "building index")
|
|
}
|
|
|
|
u := newUploader(i.objStore)
|
|
for _, db := range built {
|
|
if _, err := withBackoff(ctx, i.cfg.Backoff, func() (res struct{}, err error) {
|
|
err = u.Put(ctx, db)
|
|
if err != nil {
|
|
level.Error(util_log.Logger).Log(
|
|
"msg", "failed to upload tsdb",
|
|
"path", db.id.Path(),
|
|
)
|
|
return res, errors.Wrap(err, "uploading tsdb")
|
|
}
|
|
|
|
level.Debug(logger).Log(
|
|
"msg", "uploaded tsdb",
|
|
"name", db.id.Name(),
|
|
)
|
|
return
|
|
}); err != nil {
|
|
return errors.Wrap(err, "running pipeline")
|
|
}
|
|
}
|
|
|
|
// log success
|
|
level.Info(logger).Log(
|
|
"msg", "successfully processed job",
|
|
"last_offset", lastOffset,
|
|
"duration", time.Since(start),
|
|
"records", lastOffset-job.Offsets().Min,
|
|
)
|
|
|
|
return nil
|
|
}
|
|
|
|
func (i *BlockBuilder) loadRecords(ctx context.Context, c *kgo.Client, partitionID int32, offsets types.Offsets, ch chan<- []AppendInput) (lastConsumedOffset int64, err error) {
|
|
// Use NoResetOffset to avoid resetting the offset to the beginning of the partition when the requested offset is out of range.
|
|
// This could happen if the requested records are already outside of retention period. We should fail the job is such cases leaving the scheduler to make a decision.
|
|
c.AddConsumePartitions(map[string]map[int32]kgo.Offset{
|
|
i.kafkaCfg.Topic: {partitionID: kgo.NoResetOffset().At(offsets.Min)},
|
|
})
|
|
defer c.RemoveConsumePartitions(map[string][]int32{
|
|
i.kafkaCfg.Topic: {partitionID},
|
|
})
|
|
|
|
lastConsumedOffset = offsets.Min - 1
|
|
var (
|
|
boff = backoff.New(ctx, i.cfg.Backoff)
|
|
consecutiveTimeouts = 0
|
|
maxTimeouts = 3
|
|
)
|
|
|
|
// since offsets.Max is exclusive, can point to an offset that doesn't exist,
|
|
// so we only poll until we reach the end of the records we need to process (offsets.Max-1).
|
|
// this prevents us from polling indefinitely for records that don't exist.
|
|
for lastConsumedOffset < offsets.Max-1 && boff.Ongoing() {
|
|
if consecutiveTimeouts >= maxTimeouts {
|
|
return lastConsumedOffset, fmt.Errorf("exceeded maximum consecutive timeouts (%d) while polling records", maxTimeouts)
|
|
}
|
|
|
|
if err := context.Cause(ctx); err != nil {
|
|
return lastConsumedOffset, err
|
|
}
|
|
|
|
// Add timeout for each poll operation
|
|
pollCtx, cancel := context.WithTimeout(ctx, 30*time.Second)
|
|
fs := c.PollRecords(pollCtx, int(offsets.Max-lastConsumedOffset))
|
|
cancel()
|
|
|
|
if err := fs.Err(); err != nil {
|
|
if errors.Is(err, context.DeadlineExceeded) {
|
|
level.Warn(i.logger).Log(
|
|
"msg", "timeout polling records",
|
|
"partition", partitionID,
|
|
"last_offset", lastConsumedOffset,
|
|
"target_offset", offsets.Max,
|
|
)
|
|
boff.Wait()
|
|
consecutiveTimeouts++
|
|
continue
|
|
}
|
|
level.Error(i.logger).Log(
|
|
"msg", "failed to poll records",
|
|
"err", err,
|
|
"partition", partitionID,
|
|
"last_offset", lastConsumedOffset,
|
|
)
|
|
boff.Wait()
|
|
continue
|
|
}
|
|
|
|
if fs.Empty() {
|
|
// No more records available
|
|
break
|
|
}
|
|
|
|
// Reset backoff on successful poll
|
|
boff.Reset()
|
|
consecutiveTimeouts = 0
|
|
|
|
converted := make([]AppendInput, 0, fs.NumRecords())
|
|
for iter := fs.RecordIter(); !iter.Done(); {
|
|
record := iter.Next()
|
|
if record.Offset >= offsets.Max {
|
|
level.Debug(i.logger).Log("msg", "record offset exceeds job max offset. stop processing", "record offset", record.Offset, "max offset", offsets.Max)
|
|
break
|
|
}
|
|
|
|
stream, labels, err := i.decoder.Decode(record.Value)
|
|
if err != nil {
|
|
return lastConsumedOffset, errors.Wrap(err, "failed to decode record")
|
|
}
|
|
|
|
lastConsumedOffset = record.Offset
|
|
|
|
if len(stream.Entries) == 0 {
|
|
continue
|
|
}
|
|
|
|
// decorder reuses entries slice, so we need to copy it
|
|
entries := make([]logproto.Entry, len(stream.Entries))
|
|
copy(entries, stream.Entries)
|
|
|
|
converted = append(converted, AppendInput{
|
|
tenant: string(record.Key),
|
|
labels: labels,
|
|
labelsStr: stream.Labels,
|
|
entries: entries,
|
|
})
|
|
}
|
|
|
|
if len(converted) > 0 {
|
|
select {
|
|
case ch <- converted:
|
|
case <-ctx.Done():
|
|
return lastConsumedOffset, ctx.Err()
|
|
}
|
|
}
|
|
}
|
|
|
|
return lastConsumedOffset, boff.Err()
|
|
}
|
|
|
|
func withBackoff[T any](
|
|
ctx context.Context,
|
|
config backoff.Config,
|
|
fn func() (T, error),
|
|
) (T, error) {
|
|
var zero T
|
|
|
|
var boff = backoff.New(ctx, config)
|
|
for boff.Ongoing() {
|
|
res, err := fn()
|
|
if err != nil {
|
|
boff.Wait()
|
|
continue
|
|
}
|
|
return res, nil
|
|
}
|
|
|
|
return zero, boff.ErrCause()
|
|
}
|
|
|