|
|
|
|
@ -32,7 +32,7 @@ type buildRequest struct { |
|
|
|
|
|
|
|
|
|
// buildResult represents the result of an index build operation
|
|
|
|
|
type buildResult struct { |
|
|
|
|
indexPath string |
|
|
|
|
indexPath string // Index path when ErrBuilderFull causes multiple flushes
|
|
|
|
|
records []*kgo.Record // Records to commit after successful build
|
|
|
|
|
err error |
|
|
|
|
} |
|
|
|
|
@ -65,6 +65,8 @@ type serialIndexer struct { |
|
|
|
|
// Download pipeline
|
|
|
|
|
downloadQueue chan metastore.ObjectWrittenEvent |
|
|
|
|
downloadedObjects chan downloadedObject |
|
|
|
|
skipDownloads chan struct{} // Signal to skip downloading new objects
|
|
|
|
|
skipMode bool // Track whether we're in skip mode
|
|
|
|
|
|
|
|
|
|
// Worker management
|
|
|
|
|
buildRequestChan chan buildRequest |
|
|
|
|
@ -96,6 +98,7 @@ func newSerialIndexer( |
|
|
|
|
buildRequestChan: make(chan buildRequest, cfg.QueueSize), |
|
|
|
|
downloadQueue: make(chan metastore.ObjectWrittenEvent, 32), |
|
|
|
|
downloadedObjects: make(chan downloadedObject, 1), |
|
|
|
|
skipDownloads: make(chan struct{}), |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Initialize dskit Service
|
|
|
|
|
@ -135,6 +138,9 @@ func (si *serialIndexer) stopping(_ error) error { |
|
|
|
|
close(si.downloadQueue) |
|
|
|
|
close(si.buildRequestChan) |
|
|
|
|
|
|
|
|
|
// Close the skip downloads channel to signal any waiting download workers
|
|
|
|
|
close(si.skipDownloads) |
|
|
|
|
|
|
|
|
|
// Wait for workers to finish
|
|
|
|
|
si.downloadWorkerWg.Wait() |
|
|
|
|
si.buildWorkerWg.Wait() |
|
|
|
|
@ -196,6 +202,8 @@ func (si *serialIndexer) downloadWorker(ctx context.Context) { |
|
|
|
|
level.Debug(si.logger).Log("msg", "download worker started") |
|
|
|
|
defer level.Debug(si.logger).Log("msg", "download worker stopped") |
|
|
|
|
|
|
|
|
|
si.skipMode = false // Track whether we're in skip mode
|
|
|
|
|
|
|
|
|
|
for { |
|
|
|
|
select { |
|
|
|
|
case event, ok := <-si.downloadQueue: |
|
|
|
|
@ -204,6 +212,12 @@ func (si *serialIndexer) downloadWorker(ctx context.Context) { |
|
|
|
|
return |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
if si.skipMode { |
|
|
|
|
// Skip downloading, just drain the queue
|
|
|
|
|
level.Debug(si.logger).Log("msg", "skipping download due to skip signal", "object_path", event.ObjectPath) |
|
|
|
|
continue |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
objLogger := log.With(si.logger, "object_path", event.ObjectPath) |
|
|
|
|
downloadStart := time.Now() |
|
|
|
|
|
|
|
|
|
@ -247,6 +261,11 @@ func (si *serialIndexer) downloadWorker(ctx context.Context) { |
|
|
|
|
return |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
case <-si.skipDownloads: |
|
|
|
|
level.Debug(si.logger).Log("msg", "download worker received skip signal, entering skip mode") |
|
|
|
|
si.skipMode = true |
|
|
|
|
// Continue in the loop to drain the downloadQueue
|
|
|
|
|
|
|
|
|
|
case <-ctx.Done(): |
|
|
|
|
return |
|
|
|
|
} |
|
|
|
|
@ -302,7 +321,7 @@ func (si *serialIndexer) processBuildRequest(req buildRequest) buildResult { |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Build the index using internal method
|
|
|
|
|
indexPath, err := si.buildIndex(req.ctx, events, req.partition) |
|
|
|
|
indexPath, processed, err := si.buildIndex(req.ctx, events, req.partition) |
|
|
|
|
|
|
|
|
|
// Update metrics
|
|
|
|
|
buildTime := time.Since(start) |
|
|
|
|
@ -310,18 +329,18 @@ func (si *serialIndexer) processBuildRequest(req buildRequest) buildResult { |
|
|
|
|
|
|
|
|
|
if err != nil { |
|
|
|
|
level.Error(si.logger).Log("msg", "failed to build index", |
|
|
|
|
"partition", req.partition, "err", err, "duration", buildTime) |
|
|
|
|
"partition", req.partition, "err", err, "duration", buildTime, "processed", processed) |
|
|
|
|
return buildResult{err: err} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
level.Debug(si.logger).Log("msg", "successfully built index", |
|
|
|
|
"partition", req.partition, "index_path", indexPath, "duration", buildTime, |
|
|
|
|
"events", len(events)) |
|
|
|
|
"events", len(events), "processed", processed) |
|
|
|
|
|
|
|
|
|
// Extract records for committing
|
|
|
|
|
records := make([]*kgo.Record, len(req.events)) |
|
|
|
|
for i, buffered := range req.events { |
|
|
|
|
records[i] = buffered.record |
|
|
|
|
// Extract records for committing - only for processed events
|
|
|
|
|
records := make([]*kgo.Record, processed) |
|
|
|
|
for i := range processed { |
|
|
|
|
records[i] = req.events[i].record |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
return buildResult{ |
|
|
|
|
@ -331,8 +350,11 @@ func (si *serialIndexer) processBuildRequest(req buildRequest) buildResult { |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// buildIndex is the core index building logic (moved from builder)
|
|
|
|
|
func (si *serialIndexer) buildIndex(ctx context.Context, events []metastore.ObjectWrittenEvent, partition int32) (string, error) { |
|
|
|
|
// buildIndex is writing all metastore events to a single index object. It
|
|
|
|
|
// returns the index path and the number of events processed or an error if the index object is not created.
|
|
|
|
|
// The number of events processed can be less than the number of events if the builder becomes full
|
|
|
|
|
// when the trigger is triggerTypeAppend.
|
|
|
|
|
func (si *serialIndexer) buildIndex(ctx context.Context, events []metastore.ObjectWrittenEvent, partition int32) (string, int, error) { |
|
|
|
|
level.Debug(si.logger).Log("msg", "building index", "events", len(events), "partition", partition) |
|
|
|
|
start := time.Now() |
|
|
|
|
|
|
|
|
|
@ -340,30 +362,32 @@ func (si *serialIndexer) buildIndex(ctx context.Context, events []metastore.Obje |
|
|
|
|
writeTime, err := time.Parse(time.RFC3339, events[0].WriteTime) |
|
|
|
|
if err != nil { |
|
|
|
|
level.Error(si.logger).Log("msg", "failed to parse write time", "err", err) |
|
|
|
|
return "", err |
|
|
|
|
return "", 0, err |
|
|
|
|
} |
|
|
|
|
si.builderMetrics.setProcessingDelay(writeTime) |
|
|
|
|
|
|
|
|
|
// Trigger the downloads
|
|
|
|
|
for _, event := range events { |
|
|
|
|
select { |
|
|
|
|
case si.downloadQueue <- event: |
|
|
|
|
// Successfully sent event for download
|
|
|
|
|
case <-ctx.Done(): |
|
|
|
|
return "", ctx.Err() |
|
|
|
|
return "", 0, ctx.Err() |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Process the results as they are downloaded
|
|
|
|
|
// Process downloaded objects, handling ErrBuilderFull
|
|
|
|
|
processingErrors := multierror.New() |
|
|
|
|
for range len(events) { |
|
|
|
|
|
|
|
|
|
processed := 0 |
|
|
|
|
for processed < len(events) { |
|
|
|
|
var obj downloadedObject |
|
|
|
|
select { |
|
|
|
|
case obj = <-si.downloadedObjects: |
|
|
|
|
case <-ctx.Done(): |
|
|
|
|
return "", ctx.Err() |
|
|
|
|
return "", processed, ctx.Err() |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
processed++ |
|
|
|
|
objLogger := log.With(si.logger, "object_path", obj.event.ObjectPath) |
|
|
|
|
level.Debug(objLogger).Log("msg", "processing object") |
|
|
|
|
|
|
|
|
|
@ -372,26 +396,78 @@ func (si *serialIndexer) buildIndex(ctx context.Context, events []metastore.Obje |
|
|
|
|
continue |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
reader, err := dataobj.FromReaderAt(bytes.NewReader(*obj.objectBytes), int64(len(*obj.objectBytes))) |
|
|
|
|
if err != nil { |
|
|
|
|
processingErrors.Add(fmt.Errorf("failed to read object: %w", err)) |
|
|
|
|
// Process this object
|
|
|
|
|
if err := si.processObject(ctx, objLogger, obj); err != nil { |
|
|
|
|
processingErrors.Add(fmt.Errorf("failed to process object: %w", err)) |
|
|
|
|
continue |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
if err := si.calculator.Calculate(ctx, objLogger, reader, obj.event.ObjectPath); err != nil { |
|
|
|
|
processingErrors.Add(fmt.Errorf("failed to calculate index: %w", err)) |
|
|
|
|
continue |
|
|
|
|
// Check if builder became full during processing
|
|
|
|
|
if si.calculator.IsFull() { |
|
|
|
|
// Signal download worker to skip downloading new objects
|
|
|
|
|
select { |
|
|
|
|
case si.skipDownloads <- struct{}{}: |
|
|
|
|
level.Debug(si.logger).Log("msg", "sent skip signal to download worker") |
|
|
|
|
default: |
|
|
|
|
// Channel might be full or already closed, continue anyway
|
|
|
|
|
level.Debug(si.logger).Log("msg", "failed to send skip signal to download worker, channel full or closed") |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Drain any remaining downloaded objects to prevent goroutine leaks
|
|
|
|
|
drainLoop: |
|
|
|
|
for { |
|
|
|
|
select { |
|
|
|
|
case <-si.downloadedObjects: |
|
|
|
|
// Drain the channel, continue draining
|
|
|
|
|
case <-ctx.Done(): |
|
|
|
|
break drainLoop |
|
|
|
|
default: |
|
|
|
|
break drainLoop // No more objects to drain
|
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
break |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
if processingErrors.Err() != nil { |
|
|
|
|
return "", processingErrors.Err() |
|
|
|
|
return "", processed, processingErrors.Err() |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Flush current index and start fresh for each trigger type means:
|
|
|
|
|
// - append: Either the calculator became full and the remaining events will be processed by the next trigger
|
|
|
|
|
// or all events have been processed, so we flush the current index and start fresh
|
|
|
|
|
// - max-idle: All events have been processed, so we flush the current index and start fresh
|
|
|
|
|
indexPath, flushErr := si.flushIndex(ctx, partition) |
|
|
|
|
if flushErr != nil { |
|
|
|
|
return "", processed, fmt.Errorf("failed to flush index after processing full object: %w", flushErr) |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
level.Debug(si.logger).Log("msg", "finished building index files", "partition", partition, |
|
|
|
|
"events", len(events), "processed", processed, "index_path", indexPath, "duration", time.Since(start)) |
|
|
|
|
|
|
|
|
|
return indexPath, processed, nil |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// processObject handles processing a single downloaded object
|
|
|
|
|
func (si *serialIndexer) processObject(ctx context.Context, objLogger log.Logger, obj downloadedObject) error { |
|
|
|
|
reader, err := dataobj.FromReaderAt(bytes.NewReader(*obj.objectBytes), int64(len(*obj.objectBytes))) |
|
|
|
|
if err != nil { |
|
|
|
|
return fmt.Errorf("failed to read object: %w", err) |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
return si.calculator.Calculate(ctx, objLogger, reader, obj.event.ObjectPath) |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// flushIndex flushes the current calculator state to an index object
|
|
|
|
|
func (si *serialIndexer) flushIndex(ctx context.Context, partition int32) (string, error) { |
|
|
|
|
tenantTimeRanges := si.calculator.TimeRanges() |
|
|
|
|
if len(tenantTimeRanges) == 0 { |
|
|
|
|
return "", nil // Nothing to flush
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
obj, closer, err := si.calculator.Flush() |
|
|
|
|
if err != nil { |
|
|
|
|
return "", fmt.Errorf("failed to flush builder: %w", err) |
|
|
|
|
return "", fmt.Errorf("failed to flush calculator: %w", err) |
|
|
|
|
} |
|
|
|
|
defer closer.Close() |
|
|
|
|
|
|
|
|
|
@ -415,9 +491,11 @@ func (si *serialIndexer) buildIndex(ctx context.Context, events []metastore.Obje |
|
|
|
|
return "", fmt.Errorf("failed to update metastore ToC file: %w", err) |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
level.Debug(si.logger).Log("msg", "finished building new index file", "partition", partition, |
|
|
|
|
"events", len(events), "size", obj.Size(), "duration", time.Since(start), |
|
|
|
|
"tenants", len(tenantTimeRanges), "path", key) |
|
|
|
|
si.calculator.Reset() |
|
|
|
|
si.skipMode = false // Reset skip mode
|
|
|
|
|
|
|
|
|
|
level.Debug(si.logger).Log("msg", "flushed index object", "partition", partition, |
|
|
|
|
"path", key, "size", obj.Size(), "tenants", len(tenantTimeRanges)) |
|
|
|
|
|
|
|
|
|
return key, nil |
|
|
|
|
} |
|
|
|
|
|