compactor" track number of times table compactions were skipped consecutively (#11604)

**What this PR does / why we need it**:
Earlier we had a counter for the number of times tables were skipped by
compaction due to them being locked by retention. Counter makes it hard
to reliably write an alert without making it noisy. I am changing the
counter to gauge which would count the number of times tables were
consecutively skipped by compaction. This would make it easier to write
alerts.

**Checklist**
- [x] Tests updated

---------

Co-authored-by: Ashwanth <iamashwanth@gmail.com>
pull/11606/head
Sandeep Sukhani 2 years ago committed by GitHub
parent cd3cf6291f
commit 3a7940af1c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 4
      pkg/compactor/compactor.go
  2. 7
      pkg/compactor/compactor_test.go
  3. 8
      pkg/compactor/metrics.go

@ -643,6 +643,10 @@ func (c *Compactor) CompactTable(ctx context.Context, tableName string, applyRet
level.Error(util_log.Logger).Log("msg", "failed to compact files", "table", tableName, "err", err)
return err
}
if !applyRetention {
c.metrics.skippedCompactingLockedTables.WithLabelValues(tableName).Set(0)
}
return nil
}

@ -420,7 +420,12 @@ func TestCompactor_TableLocking(t *testing.T) {
if tc.applyRetention {
require.Equal(t, float64(0), testutil.ToFloat64(compactor.metrics.skippedCompactingLockedTables.WithLabelValues(tc.lockTable)))
} else {
require.Equal(t, float64(1), testutil.ToFloat64(compactor.metrics.skippedCompactingLockedTables.WithLabelValues(tc.lockTable)))
// we only lock table during first run so second run should reset the skip count metric to 0
skipCount := float64(0)
if n == 1 {
skipCount = 1
}
require.Equal(t, skipCount, testutil.ToFloat64(compactor.metrics.skippedCompactingLockedTables.WithLabelValues(tc.lockTable)))
}
}

@ -18,7 +18,7 @@ type metrics struct {
applyRetentionOperationDurationSeconds prometheus.Gauge
applyRetentionLastSuccess prometheus.Gauge
compactorRunning prometheus.Gauge
skippedCompactingLockedTables *prometheus.CounterVec
skippedCompactingLockedTables *prometheus.GaugeVec
}
func newMetrics(r prometheus.Registerer) *metrics {
@ -58,10 +58,10 @@ func newMetrics(r prometheus.Registerer) *metrics {
Name: "compactor_running",
Help: "Value will be 1 if compactor is currently running on this instance",
}),
skippedCompactingLockedTables: promauto.With(r).NewCounterVec(prometheus.CounterOpts{
skippedCompactingLockedTables: promauto.With(r).NewGaugeVec(prometheus.GaugeOpts{
Namespace: "loki_compactor",
Name: "skipped_compacting_locked_table_total",
Help: "Count of uncompacted tables being skipped due to them being locked by retention",
Name: "locked_table_successive_compaction_skips",
Help: "Number of times uncompacted tables were consecutively skipped due to them being locked by retention",
}, []string{"table_name"}),
}

Loading…
Cancel
Save