@ -30,36 +30,35 @@ import (
"math"
"os"
"path/filepath"
"strconv"
"strings"
"sort"
"time"
"github.com/go-kit/log/level"
"github.com/pkg/errors"
"github.com/prometheus/common/model"
"github.com/grafana/loki/pkg/logproto"
"github.com/grafana/loki/pkg/storage/chunk"
chunk_client "github.com/grafana/loki/pkg/storage/chunk/client"
"github.com/grafana/loki/pkg/storage/stores/shipper/indexshipper/downloads"
shipperindex "github.com/grafana/loki/pkg/storage/stores/shipper/indexshipper/index"
index_storage "github.com/grafana/loki/pkg/storage/stores/shipper/indexshipper/storage"
"github.com/grafana/loki/pkg/storage/stores/shipper/indexshipper/tsdb"
tsdbindex "github.com/grafana/loki/pkg/storage/stores/shipper/indexshipper/tsdb/index"
util_log "github.com/grafana/loki/pkg/util/log"
"github.com/go-kit/log"
"github.com/grafana/dskit/ring"
"github.com/go-kit/log/level"
"github.com/grafana/dskit/backoff"
"github.com/grafana/dskit/concurrency"
"github.com/grafana/dskit/multierror"
"github.com/grafana/dskit/services"
"github.com/pkg/errors"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
"github.com/prometheus/prometheus/model/labels"
"github.com/grafana/loki/pkg/compactor/retention"
"github.com/grafana/loki/pkg/logproto"
"github.com/grafana/loki/pkg/storage"
v1 "github.com/grafana/loki/pkg/storage/bloom/v1"
"github.com/grafana/loki/pkg/storage/bloom/v1/filter"
"github.com/grafana/loki/pkg/storage/chunk"
chunk_client "github.com/grafana/loki/pkg/storage/chunk/client"
"github.com/grafana/loki/pkg/storage/config"
"github.com/grafana/loki/pkg/storage/stores/shipper/bloomshipper"
"github.com/grafana/loki/pkg/storage/stores/shipper/indexshipper"
shipperindex "github.com/grafana/loki/pkg/storage/stores/shipper/indexshipper/index"
index_storage "github.com/grafana/loki/pkg/storage/stores/shipper/indexshipper/storage"
"github.com/grafana/loki/pkg/storage/stores/shipper/indexshipper/tsdb"
tsdbindex "github.com/grafana/loki/pkg/storage/stores/shipper/indexshipper/tsdb/index"
"github.com/grafana/loki/pkg/util"
)
const (
@ -70,15 +69,20 @@ const (
type Compactor struct {
services . Service
cfg Config
logger log . Logger
bloomCompactorRing ring . ReadRing
cfg Config
logger log . Logger
schemaCfg config . SchemaConfig
limits Limits
// temporary workaround until store has implemented read/write shipper interface
bloomShipperClient bloomshipper . Client
// Client used to run operations on the bucket storing bloom blocks.
storeClients map [ config . DayTime ] storeClient
// temporary workaround until store has implemented read/write shipper interface
bloomShipperClient bloomshipper . Client
sharding ShardingStrategy
metrics * metrics
}
type storeClient struct {
@ -88,21 +92,25 @@ type storeClient struct {
indexShipper indexshipper . IndexShipper
}
func New ( cfg Config ,
readRing ring . ReadRin g,
func New (
cfg Confi g,
storageCfg storage . Config ,
schemaConfig config . SchemaConfig ,
limits downloads . Limits ,
limits Limits ,
logger log . Logger ,
sharding ShardingStrategy ,
clientMetrics storage . ClientMetrics ,
_ prometheus . Registerer ) ( * Compactor , error ) {
r prometheus . Registerer ,
) ( * Compactor , error ) {
c := & Compactor {
cfg : cfg ,
logger : logger ,
bloomCompactorRing : readRing ,
cfg : cfg ,
logger : logger ,
schemaCfg : schemaConfig ,
sharding : sharding ,
limits : limits ,
}
//Configure BloomClient for meta.json management
// Configure BloomClient for meta.json management
bloomClient , err := bloomshipper . NewBloomClient ( schemaConfig . Configs , storageCfg , clientMetrics )
if err != nil {
return nil , err
@ -118,11 +126,11 @@ func New(cfg Config,
case config . BoltDBShipperType :
indexStorageCfg = storageCfg . BoltDBShipperConfig . Config
default :
level . Warn ( util_log . L ogger) . Log ( "msg" , "skipping period because index type is unsupported" )
level . Warn ( c . l ogger) . Log ( "msg" , "skipping period because index type is unsupported" )
continue
}
//Configure ObjectClient and IndexShipper for series and chunk management
// Configure ObjectClient and IndexShipper for series and chunk management
objectClient , err := storage . NewObjectClient ( periodicConfig . ObjectType , storageCfg , clientMetrics )
if err != nil {
return nil , fmt . Errorf ( "error creating object client '%s': %w" , periodicConfig . ObjectType , err )
@ -157,33 +165,274 @@ func New(cfg Config,
chunk : chunk_client . NewClient ( objectClient , nil , schemaConfig ) ,
indexShipper : indexShipper ,
}
}
// temporary workaround until store has implemented read/write shipper interface
c . bloomShipperClient = bloomClient
// TODO use a new service with a loop
c . Service = services . NewIdleService ( c . starting , c . stopping )
c . metrics = newMetrics ( r )
c . metrics . compactionRunInterval . Set ( cfg . CompactionInterval . Seconds ( ) )
c . Service = services . NewBasicService ( c . starting , c . running , c . stopping )
return c , nil
}
func ( c * Compactor ) starting ( _ context . Context ) error {
return nil
func ( c * Compactor ) starting ( _ context . Context ) ( err error ) {
c . metrics . compactorRunning . Set ( 1 )
return err
}
func ( c * Compactor ) running ( ctx context . Context ) error {
// Run an initial compaction before starting the interval.
if err := c . runCompaction ( ctx ) ; err != nil {
level . Error ( c . logger ) . Log ( "msg" , "failed to run compaction" , "err" , err )
}
ticker := time . NewTicker ( util . DurationWithJitter ( c . cfg . CompactionInterval , 0.05 ) )
defer ticker . Stop ( )
for {
select {
case <- ticker . C :
c . metrics . compactionRunsStarted . Inc ( )
if err := c . runCompaction ( ctx ) ; err != nil {
c . metrics . compactionRunsErred . Inc ( )
level . Error ( c . logger ) . Log ( "msg" , "failed to run compaction" , "err" , err )
continue
}
c . metrics . compactionRunsCompleted . Inc ( )
case <- ctx . Done ( ) :
return nil
}
}
}
func ( c * Compactor ) stopping ( _ error ) error {
c . metrics . compactorRunning . Set ( 0 )
return nil
}
type Series struct { // TODO this can be replaced with Job struct based on Salva's ring work.
tableName , tenant string
labels labels . Labels
fingerPrint model . Fingerprint
chunks [ ] chunk . Chunk
from , through model . Time
indexPath string
func ( c * Compactor ) runCompaction ( ctx context . Context ) error {
var tables [ ] string
for _ , sc := range c . storeClients {
// refresh index list cache since previous compaction would have changed the index files in the object store
sc . index . RefreshIndexTableNamesCache ( ctx )
tbls , err := sc . index . ListTables ( ctx )
if err != nil {
return fmt . Errorf ( "failed to list tables: %w" , err )
}
tables = append ( tables , tbls ... )
}
// process most recent tables first
tablesIntervals := getIntervalsForTables ( tables )
sortTablesByRange ( tables , tablesIntervals )
parallelism := c . cfg . MaxCompactionParallelism
if parallelism == 0 {
parallelism = len ( tables )
}
// TODO(salvacorts): We currently parallelize at the table level. We may want to parallelize at the tenant and job level as well.
// To do that, we should create a worker pool with c.cfg.MaxCompactionParallelism number of workers.
errs := multierror . New ( )
_ = concurrency . ForEachJob ( ctx , len ( tables ) , parallelism , func ( ctx context . Context , i int ) error {
tableName := tables [ i ]
logger := log . With ( c . logger , "table" , tableName )
level . Info ( logger ) . Log ( "msg" , "compacting table" )
err := c . compactTable ( ctx , logger , tableName , tablesIntervals [ tableName ] )
if err != nil {
errs . Add ( err )
return nil
}
level . Info ( logger ) . Log ( "msg" , "finished compacting table" )
return nil
} )
return errs . Err ( )
}
func ( c * Compactor ) compactTable ( ctx context . Context , logger log . Logger , tableName string , tableInterval model . Interval ) error {
// Ensure the context has not been canceled (ie. compactor shutdown has been triggered).
if err := ctx . Err ( ) ; err != nil {
return fmt . Errorf ( "interrupting compaction of table: %w" , err )
}
schemaCfg , ok := schemaPeriodForTable ( c . schemaCfg , tableName )
if ! ok {
level . Error ( logger ) . Log ( "msg" , "skipping compaction since we can't find schema for table" )
return nil
}
sc , ok := c . storeClients [ schemaCfg . From ]
if ! ok {
return fmt . Errorf ( "index store client not found for period starting at %s" , schemaCfg . From . String ( ) )
}
_ , tenants , err := sc . index . ListFiles ( ctx , tableName , false )
if err != nil {
return fmt . Errorf ( "failed to list files for table %s: %w" , tableName , err )
}
c . metrics . compactionRunDiscoveredTenants . Add ( float64 ( len ( tenants ) ) )
level . Info ( logger ) . Log ( "msg" , "discovered tenants from bucket" , "users" , len ( tenants ) )
return c . compactUsers ( ctx , logger , sc , tableName , tableInterval , tenants )
}
// See: https://github.com/grafana/mimir/blob/34852137c332d4050e53128481f4f6417daee91e/pkg/compactor/compactor.go#L566-L689
func ( c * Compactor ) compactUsers ( ctx context . Context , logger log . Logger , sc storeClient , tableName string , tableInterval model . Interval , tenants [ ] string ) error {
// Keep track of tenants owned by this shard, so that we can delete the local files for all other users.
errs := multierror . New ( )
ownedTenants := make ( map [ string ] struct { } , len ( tenants ) )
for _ , tenant := range tenants {
tenantLogger := log . With ( logger , "tenant" , tenant )
// Ensure the context has not been canceled (ie. compactor shutdown has been triggered).
if err := ctx . Err ( ) ; err != nil {
return fmt . Errorf ( "interrupting compaction of tenants: %w" , err )
}
// Skip this table if it is too new/old for the tenant limits.
now := model . Now ( )
tableMinAge := c . limits . BloomCompactorMinTableAge ( tenant )
tableMaxAge := c . limits . BloomCompactorMaxTableAge ( tenant )
if tableMinAge > 0 && tableInterval . End . After ( now . Add ( - tableMinAge ) ) {
level . Debug ( tenantLogger ) . Log ( "msg" , "skipping tenant because table is too new " , "table-min-age" , tableMinAge , "table-end" , tableInterval . End , "now" , now )
continue
}
if tableMaxAge > 0 && tableInterval . Start . Before ( now . Add ( - tableMaxAge ) ) {
level . Debug ( tenantLogger ) . Log ( "msg" , "skipping tenant because table is too old" , "table-max-age" , tableMaxAge , "table-start" , tableInterval . Start , "now" , now )
continue
}
// Ensure the tenant ID belongs to our shard.
if ! c . sharding . OwnsTenant ( tenant ) {
c . metrics . compactionRunSkippedTenants . Inc ( )
level . Debug ( tenantLogger ) . Log ( "msg" , "skipping tenant because it is not owned by this shard" )
continue
}
ownedTenants [ tenant ] = struct { } { }
if err := c . compactTenantWithRetries ( ctx , tenantLogger , sc , tableName , tenant ) ; err != nil {
switch {
case errors . Is ( err , context . Canceled ) :
// We don't want to count shutdowns as failed compactions because we will pick up with the rest of the compaction after the restart.
level . Info ( tenantLogger ) . Log ( "msg" , "compaction for tenant was interrupted by a shutdown" )
return nil
default :
c . metrics . compactionRunFailedTenants . Inc ( )
level . Error ( tenantLogger ) . Log ( "msg" , "failed to compact tenant" , "err" , err )
errs . Add ( err )
}
continue
}
c . metrics . compactionRunSucceededTenants . Inc ( )
level . Info ( tenantLogger ) . Log ( "msg" , "successfully compacted tenant" )
}
return errs . Err ( )
// TODO: Delete local files for unowned tenants, if there are any.
}
func ( c * Compactor ) compactTenant ( ctx context . Context , logger log . Logger , sc storeClient , tableName string , tenant string ) error {
level . Info ( logger ) . Log ( "msg" , "starting compaction of tenant" )
// Ensure the context has not been canceled (ie. compactor shutdown has been triggered).
if err := ctx . Err ( ) ; err != nil {
return err
}
// Tokenizer is not thread-safe so we need one per goroutine.
bt , _ := v1 . NewBloomTokenizer ( prometheus . DefaultRegisterer )
// TODO: Use ForEachConcurrent?
errs := multierror . New ( )
if err := sc . indexShipper . ForEach ( ctx , tableName , tenant , func ( isMultiTenantIndex bool , idx shipperindex . Index ) error {
if isMultiTenantIndex {
return fmt . Errorf ( "unexpected multi-tenant" )
}
// TODO: Make these casts safely
if err := idx . ( * tsdb . TSDBFile ) . Index . ( * tsdb . TSDBIndex ) . ForSeries (
ctx , nil ,
0 , math . MaxInt64 , // TODO: Replace with MaxLookBackPeriod
func ( labels labels . Labels , fingerprint model . Fingerprint , chksMetas [ ] tsdbindex . ChunkMeta ) {
job := NewJob ( tenant , tableName , idx . Path ( ) , fingerprint , labels , chksMetas )
jobLogger := log . With ( logger , "job" , job . String ( ) )
ownsJob , err := c . sharding . OwnsJob ( job )
if err != nil {
c . metrics . compactionRunUnownedJobs . Inc ( )
level . Error ( jobLogger ) . Log ( "msg" , "failed to check if compactor owns job" , "err" , err )
errs . Add ( err )
return
}
if ! ownsJob {
c . metrics . compactionRunUnownedJobs . Inc ( )
level . Debug ( jobLogger ) . Log ( "msg" , "skipping job because it is not owned by this shard" )
return
}
if err := c . runCompact ( ctx , jobLogger , job , c . bloomShipperClient , bt , sc ) ; err != nil {
c . metrics . compactionRunFailedJobs . Inc ( )
errs . Add ( errors . Wrap ( err , "runBloomCompact" ) )
return
}
c . metrics . compactionRunSucceededJobs . Inc ( )
} ,
) ; err != nil {
errs . Add ( err )
}
return nil
} ) ; err != nil {
errs . Add ( err )
}
return errs . Err ( )
}
func runWithRetries (
ctx context . Context ,
minBackoff , maxBackoff time . Duration ,
maxRetries int ,
f func ( ctx context . Context ) error ,
) error {
var lastErr error
retries := backoff . New ( ctx , backoff . Config {
MinBackoff : minBackoff ,
MaxBackoff : maxBackoff ,
MaxRetries : maxRetries ,
} )
for retries . Ongoing ( ) {
lastErr = f ( ctx )
if lastErr == nil {
return nil
}
retries . Wait ( )
}
return lastErr
}
func ( c * Compactor ) compactTenantWithRetries ( ctx context . Context , logger log . Logger , sc storeClient , tableName string , tenant string ) error {
return runWithRetries (
ctx ,
c . cfg . RetryMinBackoff ,
c . cfg . RetryMaxBackoff ,
c . cfg . CompactionRetries ,
func ( ctx context . Context ) error {
return c . compactTenant ( ctx , logger , sc , tableName , tenant )
} ,
)
}
func makeChunkRefs ( chksMetas [ ] tsdbindex . ChunkMeta , tenant string , fp model . Fingerprint ) [ ] chunk . Chunk {
@ -204,39 +453,44 @@ func makeChunkRefs(chksMetas []tsdbindex.ChunkMeta, tenant string, fp model.Fing
}
// TODO Revisit this step once v1/bloom lib updated to combine blooms in the same series
func buildBloomBlock ( bloomForChks v1 . SeriesWithBloom , series Series , workingDir string ) ( bloomshipper . Block , error ) {
localDst := createLocalDirName ( workingDir , series )
func buildBloomBlock ( ctx context . Context , logger log . Logger , bloomForChks v1 . SeriesWithBloom , job Job , workingDir string ) ( bloomshipper . Block , error ) {
// Ensure the context has not been canceled (ie. compactor shutdown has been triggered).
if err := ctx . Err ( ) ; err != nil {
return bloomshipper . Block { } , err
}
//write bloom to a local dir
localDst := createLocalDirName ( workingDir , job )
// write bloom to a local dir
builder , err := v1 . NewBlockBuilder ( v1 . NewBlockOptions ( ) , v1 . NewDirectoryBlockWriter ( localDst ) )
if err != nil {
level . Info ( util_log . L ogger) . Log ( "creating builder" , err )
level . Error ( l ogger) . Log ( "creating builder" , err )
return bloomshipper . Block { } , err
}
checksum , err := builder . BuildFrom ( v1 . NewSliceIter ( [ ] v1 . SeriesWithBloom { bloomForChks } ) )
if err != nil {
level . Info ( util_log . L ogger) . Log ( "writing bloom" , err )
level . Error ( l ogger) . Log ( "writing bloom" , err )
return bloomshipper . Block { } , err
}
blockFile , err := os . Open ( filepath . Join ( localDst , bloomFileName ) )
if err != nil {
level . Info ( util_log . L ogger) . Log ( "reading bloomBlock" , err )
level . Error ( l ogger) . Log ( "reading bloomBlock" , err )
}
blocks := bloomshipper . Block {
BlockRef : bloomshipper . BlockRef {
Ref : bloomshipper . Ref {
TenantID : series . tenant ,
TableName : series . tableName ,
MinFingerprint : uint64 ( series . fingerPrint ) , //TODO will change once we compact multiple blooms into a block
MaxFingerprint : uint64 ( series . fingerPrint ) ,
StartTimestamp : series . from . Unix ( ) ,
EndTimestamp : series . through . Unix ( ) ,
TenantID : job . Tenant ( ) ,
TableName : job . TableName ( ) ,
MinFingerprint : uint64 ( job . Fingerprint ( ) ) , // TODO will change once we compact multiple blooms into a block
MaxFingerprint : uint64 ( job . Fingerprint ( ) ) ,
StartTimestamp : job . From ( ) . Unix ( ) ,
EndTimestamp : job . Through ( ) . Unix ( ) ,
Checksum : checksum ,
} ,
IndexPath : series . indexPath ,
IndexPath : job . IndexPath ( ) ,
} ,
Data : blockFile ,
}
@ -244,50 +498,21 @@ func buildBloomBlock(bloomForChks v1.SeriesWithBloom, series Series, workingDir
return blocks , nil
}
// TODO Will be replaced with ring implementation in https://github.com/grafana/loki/pull/11154/
func listSeriesForBlooms ( ctx context . Context , objectClient storeClient ) ( [ ] Series , error ) {
// Returns all the TSDB files, including subdirectories
prefix := "index/"
indices , _ , err := objectClient . object . List ( ctx , prefix , "" )
if err != nil {
return nil , err
}
var result [ ] Series
for _ , index := range indices {
s := strings . Split ( index . Key , "/" )
if len ( s ) > 3 {
tableName := s [ 1 ]
if ! strings . HasPrefix ( tableName , "loki_" ) || strings . Contains ( tableName , "backup" ) {
continue
}
userID := s [ 2 ]
_ , err := strconv . Atoi ( userID )
if err != nil {
continue
}
result = append ( result , Series { tableName : tableName , tenant : userID , indexPath : index . Key } )
}
}
return result , nil
}
func createLocalDirName ( workingDir string , series Series ) string {
dir := fmt . Sprintf ( "bloomBlock-%s-%s-%s-%s-%s-%s" , series . tableName , series . tenant , series . fingerPrint , series . fingerPrint , series . from , series . through )
func createLocalDirName ( workingDir string , job Job ) string {
dir := fmt . Sprintf ( "bloomBlock-%s-%s-%s-%s-%s-%s" , job . TableName ( ) , job . Tenant ( ) , job . Fingerprint ( ) , job . Fingerprint ( ) , job . From ( ) , job . Through ( ) )
return filepath . Join ( workingDir , dir )
}
func CompactNewChunks ( ctx context . Context , series Series , bt * v1 . BloomTokenizer , bloomShipperClient bloomshipper . Client , dst string ) ( err error ) {
func CompactNewChunks ( ctx context . Context , logger log . Logger , job Job , chunks [ ] chunk . Chunk , bt * v1 . BloomTokenizer , bloomShipperClient bloomshipper . Client , dst string ) ( err error ) {
// Ensure the context has not been canceled (ie. compactor shutdown has been triggered).
if err := ctx . Err ( ) ; err != nil {
return err
}
// Create a bloom for this series
bloomForChks := v1 . SeriesWithBloom {
Series : & v1 . Series {
Fingerprint : series . fingerPrint ,
Fingerprint : job . Fingerprint ( ) ,
} ,
Bloom : & v1 . Bloom {
ScalableBloomFilter : * filter . NewDefaultScalableBloomFilter ( fpRate ) ,
@ -295,19 +520,18 @@ func CompactNewChunks(ctx context.Context, series Series, bt *v1.BloomTokenizer,
}
// Tokenize data into n-grams
bt . PopulateSeriesWithBloom ( & bloomForChks , series . chunks )
bt . PopulateSeriesWithBloom ( & bloomForChks , chunks )
// Build and upload bloomBlock to storage
blocks , err := buildBloomBlock ( bloomForChks , series , dst )
blocks , err := buildBloomBlock ( ctx , logger , bloomForChks , job , dst )
if err != nil {
level . Info ( util_log . L ogger) . Log ( "building bloomBlocks" , err )
level . Error ( l ogger) . Log ( "building bloomBlocks" , err )
return
}
storedBlocks , err := bloomShipperClient . PutBlocks ( ctx , [ ] bloomshipper . Block { blocks } )
if err != nil {
level . Info ( util_log . L ogger) . Log ( "putting blocks to storage" , err )
level . Error ( l ogger) . Log ( "putting blocks to storage" , err )
return
}
@ -319,108 +543,80 @@ func CompactNewChunks(ctx context.Context, series Series, bt *v1.BloomTokenizer,
Blocks : storedBlockRefs ,
}
//TODO move this to an outer layer, otherwise creates a meta per block
// TODO move this to an outer layer, otherwise creates a meta per block
err = bloomShipperClient . PutMeta ( ctx , meta )
if err != nil {
level . Info ( util_log . L ogger) . Log ( "putting meta.json to storage" , err )
level . Error ( l ogger) . Log ( "putting meta.json to storage" , err )
return
}
return nil
}
func ( c * Compactor ) runCompact ( ctx context . Context , bloomShipperClient bloomshipper . Client , storeClient storeClient ) error {
series , err := listSeriesForBlooms ( ctx , storeClient )
// TODO tokenizer is not thread-safe
// consider moving to Job/worker level with https://github.com/grafana/loki/pull/11154/
// create a tokenizer
bt , _ := v1 . NewBloomTokenizer ( prometheus . DefaultRegisterer )
if err != nil {
func ( c * Compactor ) runCompact ( ctx context . Context , logger log . Logger , job Job , bloomShipperClient bloomshipper . Client , bt * v1 . BloomTokenizer , storeClient storeClient ) error {
// Ensure the context has not been canceled (ie. compactor shutdown has been triggered).
if err := ctx . Err ( ) ; err != nil {
return err
}
for _ , s := range series {
err := storeClient . indexShipper . ForEach ( ctx , s . tableName , s . tenant , func ( isMultiTenantIndex bool , idx shipperindex . Index ) error {
if isMultiTenantIndex {
return nil
}
// TODO call bloomShipperClient.GetMetas to get existing meta.json
var metas [ ] bloomshipper . Meta
// TODO make this casting safe
_ = idx . ( * tsdb . TSDBFile ) . Index . ( * tsdb . TSDBIndex ) . ForSeries (
ctx ,
nil , // Process all shards
0 , math . MaxInt64 , // Replace with MaxLookBackPeriod
// Get chunks for a series label and a fp
func ( ls labels . Labels , fp model . Fingerprint , chksMetas [ ] tsdbindex . ChunkMeta ) {
// TODO call bloomShipperClient.GetMetas to get existing meta.json
var metas [ ] bloomshipper . Meta
if len ( metas ) == 0 {
// Get chunks data from list of chunkRefs
chks , err := storeClient . chunk . GetChunks (
ctx ,
makeChunkRefs ( chksMetas , s . tenant , fp ) ,
)
if err != nil {
level . Info ( util_log . Logger ) . Log ( "getting chunks" , err )
return
}
// effectively get min and max of timestamps of the list of chunks in a series
// There must be a better way to get this, ordering chunkRefs by timestamp doesn't fully solve it
// chunk files name have this info in ObjectStore, but it's not really exposed
minFrom := model . Latest
maxThrough := model . Earliest
for _ , c := range chks {
if minFrom > c . From {
minFrom = c . From
}
if maxThrough < c . From {
maxThrough = c . Through
}
}
series := Series {
tableName : s . tableName ,
tenant : s . tenant ,
labels : ls ,
fingerPrint : fp ,
chunks : chks ,
from : minFrom ,
through : maxThrough ,
indexPath : s . indexPath ,
}
err = CompactNewChunks ( ctx , series , bt , bloomShipperClient , c . cfg . WorkingDirectory )
if err != nil {
return
}
} else {
// TODO complete part 2 - periodic compaction for delta from previous period
// When already compacted metas exists
// Deduplicate index paths
uniqueIndexPaths := make ( map [ string ] struct { } )
for _ , meta := range metas {
for _ , blockRef := range meta . Blocks {
uniqueIndexPaths [ blockRef . IndexPath ] = struct { } { }
//...
}
}
}
} )
return nil
} )
if len ( metas ) == 0 {
// Get chunks data from list of chunkRefs
chks , err := storeClient . chunk . GetChunks (
ctx ,
makeChunkRefs ( job . Chunks ( ) , job . Tenant ( ) , job . Fingerprint ( ) ) ,
)
if err != nil {
return err
}
err = CompactNewChunks ( ctx , logger , job , chks , bt , bloomShipperClient , c . cfg . WorkingDirectory )
if err != nil {
return errors . Wrap ( err , "getting each series" )
return err
}
} else {
// TODO complete part 2 - periodic compaction for delta from previous period
// When already compacted metas exists
// Deduplicate index paths
uniqueIndexPaths := make ( map [ string ] struct { } )
for _ , meta := range metas {
for _ , blockRef := range meta . Blocks {
uniqueIndexPaths [ blockRef . IndexPath ] = struct { } { }
// ...
}
}
}
return nil
}
func getIntervalsForTables ( tables [ ] string ) map [ string ] model . Interval {
tablesIntervals := make ( map [ string ] model . Interval , len ( tables ) )
for _ , table := range tables {
tablesIntervals [ table ] = retention . ExtractIntervalFromTableName ( table )
}
return tablesIntervals
}
func sortTablesByRange ( tables [ ] string , intervals map [ string ] model . Interval ) {
sort . Slice ( tables , func ( i , j int ) bool {
// less than if start time is after produces a most recent first sort order
return intervals [ tables [ i ] ] . Start . After ( intervals [ tables [ j ] ] . Start )
} )
}
// TODO: comes from pkg/compactor/compactor.go
func schemaPeriodForTable ( cfg config . SchemaConfig , tableName string ) ( config . PeriodConfig , bool ) {
tableInterval := retention . ExtractIntervalFromTableName ( tableName )
schemaCfg , err := cfg . SchemaForTime ( tableInterval . Start )
if err != nil || schemaCfg . IndexTables . TableFor ( tableInterval . Start ) != tableName {
return config . PeriodConfig { } , false
}
return schemaCfg , true
}