compaction: Separate metrics for tracking retention and compaction (#11263)

**What this PR does / why we need it**:
In PR #9884, we separated the retention loop from compaction to avoid
blocking compaction for too long due to some intensive delete requests.
Currently, we track retention and compaction using the same metrics.
This PR adds separate metrics for monitoring retention operation. I have
also updated the Retention dashboard to use the new metrics.
pull/11242/head^2
Sandeep Sukhani 2 years ago committed by GitHub
parent 5535267f2c
commit 6a62b8cf42
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 1
      integration/loki_micro_services_delete_test.go
  2. 28
      pkg/compactor/compactor.go
  3. 40
      pkg/compactor/compactor_test.go
  4. 46
      pkg/compactor/metrics.go
  5. 398
      production/loki-mixin-compiled-ssd/dashboards/loki-retention.json
  6. 398
      production/loki-mixin-compiled/dashboards/loki-retention.json
  7. 30
      production/loki-mixin/dashboards/loki-retention.libsonnet

@ -216,6 +216,7 @@ func TestMicroServicesDeleteRequest(t *testing.T) {
validateQueryResponse := func(expectedStreams []client.StreamValues, resp *client.Response) {
t.Helper()
assert.Equal(t, "success", resp.Status)
assert.Equal(t, "streams", resp.Data.ResultType)
require.Len(t, resp.Data.Stream, len(expectedStreams))

@ -128,6 +128,11 @@ func (cfg *Config) Validate() error {
cfg.ApplyRetentionInterval = cfg.CompactionInterval
}
if cfg.ApplyRetentionInterval == cfg.CompactionInterval {
// add some jitter to avoid running retention and compaction at same time
cfg.ApplyRetentionInterval += minDuration(10*time.Minute, cfg.ApplyRetentionInterval/2)
}
if err := config.ValidatePathPrefix(cfg.DeleteRequestStoreKeyPrefix); err != nil {
return fmt.Errorf("validate delete store path prefix: %w", err)
}
@ -604,7 +609,7 @@ func (c *Compactor) CompactTable(ctx context.Context, tableName string, applyRet
}
if hasUncompactedIndex {
c.metrics.skippedCompactingLockedTables.Inc()
c.metrics.skippedCompactingLockedTables.WithLabelValues(tableName).Inc()
level.Warn(util_log.Logger).Log("msg", "skipped compacting table which likely has uncompacted index since it is locked by retention", "table_name", tableName)
}
return nil
@ -657,14 +662,19 @@ func (c *Compactor) RunCompaction(ctx context.Context, applyRetention bool) (err
if err != nil {
status = statusFailure
}
withRetentionLabelValue := fmt.Sprintf("%v", applyRetention)
c.metrics.compactTablesOperationTotal.WithLabelValues(status, withRetentionLabelValue).Inc()
if applyRetention {
c.metrics.applyRetentionOperationTotal.WithLabelValues(status).Inc()
} else {
c.metrics.compactTablesOperationTotal.WithLabelValues(status).Inc()
}
runtime := time.Since(start)
if status == statusSuccess {
c.metrics.compactTablesOperationDurationSeconds.WithLabelValues(withRetentionLabelValue).Set(runtime.Seconds())
c.metrics.compactTablesOperationLastSuccess.WithLabelValues(withRetentionLabelValue).SetToCurrentTime()
if applyRetention {
c.metrics.applyRetentionOperationDurationSeconds.Set(runtime.Seconds())
c.metrics.applyRetentionLastSuccess.SetToCurrentTime()
} else {
c.metrics.compactTablesOperationDurationSeconds.Set(runtime.Seconds())
c.metrics.compactTablesOperationLastSuccess.SetToCurrentTime()
}
}
@ -874,3 +884,11 @@ func schemaPeriodForTable(cfg config.SchemaConfig, tableName string) (config.Per
return schemaCfg, true
}
func minDuration(x time.Duration, y time.Duration) time.Duration {
if x < y {
return x
}
return y
}

@ -348,7 +348,7 @@ func TestCompactor_TableLocking(t *testing.T) {
lockTable string
applyRetention bool
compactionShouldTimeout bool
retentionShouldTimeout bool
}{
{
name: "no table locked - not applying retention",
@ -362,10 +362,10 @@ func TestCompactor_TableLocking(t *testing.T) {
lockTable: fmt.Sprintf("%s%d", indexTablePrefix, tableNumEnd),
},
{
name: "first table locked - applying retention",
lockTable: fmt.Sprintf("%s%d", indexTablePrefix, tableNumEnd),
applyRetention: true,
compactionShouldTimeout: true,
name: "first table locked - applying retention",
lockTable: fmt.Sprintf("%s%d", indexTablePrefix, tableNumEnd),
applyRetention: true,
retentionShouldTimeout: true,
},
} {
t.Run(tc.name, func(t *testing.T) {
@ -389,30 +389,38 @@ func TestCompactor_TableLocking(t *testing.T) {
defer cancel()
err := compactor.RunCompaction(ctx, tc.applyRetention)
// compaction should not timeout after first run since we won't be locking the table
if n == 1 && tc.compactionShouldTimeout {
// retention should not timeout after first run since we won't be locking the table
if n == 1 && tc.retentionShouldTimeout {
require.ErrorIs(t, err, context.DeadlineExceeded)
require.Equal(t, float64(1), testutil.ToFloat64(compactor.metrics.compactTablesOperationTotal.WithLabelValues(statusFailure, "true")))
require.Equal(t, float64(0), testutil.ToFloat64(compactor.metrics.compactTablesOperationTotal.WithLabelValues(statusFailure, "false")))
require.Equal(t, float64(1), testutil.ToFloat64(compactor.metrics.applyRetentionOperationTotal.WithLabelValues(statusFailure)))
require.Equal(t, float64(0), testutil.ToFloat64(compactor.metrics.compactTablesOperationTotal.WithLabelValues(statusFailure)))
return
}
require.NoError(t, err)
if n > 1 && tc.compactionShouldTimeout {
// this should be the first successful run if compaction was expected to be timeout out during first run
require.Equal(t, float64(1), testutil.ToFloat64(compactor.metrics.compactTablesOperationTotal.WithLabelValues(statusSuccess, fmt.Sprintf("%v", tc.applyRetention))))
if n > 1 && tc.applyRetention && tc.retentionShouldTimeout {
// this should be the first successful run if retention was expected to timeout out during first run
require.Equal(t, float64(1), testutil.ToFloat64(compactor.metrics.applyRetentionOperationTotal.WithLabelValues(statusSuccess)))
} else {
// else it should have succeeded during all the n runs
require.Equal(t, float64(n), testutil.ToFloat64(compactor.metrics.compactTablesOperationTotal.WithLabelValues(statusSuccess, fmt.Sprintf("%v", tc.applyRetention))))
if tc.applyRetention {
require.Equal(t, float64(n), testutil.ToFloat64(compactor.metrics.applyRetentionOperationTotal.WithLabelValues(statusSuccess)))
} else {
require.Equal(t, float64(n), testutil.ToFloat64(compactor.metrics.compactTablesOperationTotal.WithLabelValues(statusSuccess)))
}
}
if tc.applyRetention {
require.Equal(t, float64(0), testutil.ToFloat64(compactor.metrics.compactTablesOperationTotal.WithLabelValues(statusSuccess)))
} else {
require.Equal(t, float64(0), testutil.ToFloat64(compactor.metrics.applyRetentionOperationTotal.WithLabelValues(statusSuccess)))
}
require.Equal(t, float64(0), testutil.ToFloat64(compactor.metrics.compactTablesOperationTotal.WithLabelValues(statusSuccess, fmt.Sprintf("%v", !tc.applyRetention))))
// if the table was locked and compaction ran without retention then only locked table should have been skipped
if tc.lockTable != "" {
if tc.applyRetention {
require.Equal(t, float64(0), testutil.ToFloat64(compactor.metrics.skippedCompactingLockedTables))
require.Equal(t, float64(0), testutil.ToFloat64(compactor.metrics.skippedCompactingLockedTables.WithLabelValues(tc.lockTable)))
} else {
require.Equal(t, float64(1), testutil.ToFloat64(compactor.metrics.skippedCompactingLockedTables))
require.Equal(t, float64(1), testutil.ToFloat64(compactor.metrics.skippedCompactingLockedTables.WithLabelValues(tc.lockTable)))
}
}

@ -8,17 +8,17 @@ import (
const (
statusFailure = "failure"
statusSuccess = "success"
lblWithRetention = "with_retention"
)
type metrics struct {
compactTablesOperationTotal *prometheus.CounterVec
compactTablesOperationDurationSeconds *prometheus.GaugeVec
compactTablesOperationLastSuccess *prometheus.GaugeVec
applyRetentionLastSuccess prometheus.Gauge
compactorRunning prometheus.Gauge
skippedCompactingLockedTables prometheus.Counter
compactTablesOperationTotal *prometheus.CounterVec
compactTablesOperationDurationSeconds prometheus.Gauge
compactTablesOperationLastSuccess prometheus.Gauge
applyRetentionOperationTotal *prometheus.CounterVec
applyRetentionOperationDurationSeconds prometheus.Gauge
applyRetentionLastSuccess prometheus.Gauge
compactorRunning prometheus.Gauge
skippedCompactingLockedTables *prometheus.CounterVec
}
func newMetrics(r prometheus.Registerer) *metrics {
@ -26,18 +26,28 @@ func newMetrics(r prometheus.Registerer) *metrics {
compactTablesOperationTotal: promauto.With(r).NewCounterVec(prometheus.CounterOpts{
Namespace: "loki_boltdb_shipper",
Name: "compact_tables_operation_total",
Help: "Total number of tables compaction done by status and with/without retention",
}, []string{"status", lblWithRetention}),
compactTablesOperationDurationSeconds: promauto.With(r).NewGaugeVec(prometheus.GaugeOpts{
Help: "Total number of tables compaction done by status",
}, []string{"status"}),
compactTablesOperationDurationSeconds: promauto.With(r).NewGauge(prometheus.GaugeOpts{
Namespace: "loki_boltdb_shipper",
Name: "compact_tables_operation_duration_seconds",
Help: "Time (in seconds) spent in compacting all the tables with/without retention",
}, []string{lblWithRetention}),
compactTablesOperationLastSuccess: promauto.With(r).NewGaugeVec(prometheus.GaugeOpts{
Help: "Time (in seconds) spent in compacting all the tables",
}),
compactTablesOperationLastSuccess: promauto.With(r).NewGauge(prometheus.GaugeOpts{
Namespace: "loki_boltdb_shipper",
Name: "compact_tables_operation_last_successful_run_timestamp_seconds",
Help: "Unix timestamp of the last successful compaction run",
}, []string{lblWithRetention}),
}),
applyRetentionOperationTotal: promauto.With(r).NewCounterVec(prometheus.CounterOpts{
Namespace: "loki_compactor",
Name: "apply_retention_operation_total",
Help: "Total number of attempts done to apply retention with status",
}, []string{"status"}),
applyRetentionOperationDurationSeconds: promauto.With(r).NewGauge(prometheus.GaugeOpts{
Namespace: "loki_compactor",
Name: "apply_retention_operation_duration_seconds",
Help: "Time (in seconds) spent in applying retention",
}),
applyRetentionLastSuccess: promauto.With(r).NewGauge(prometheus.GaugeOpts{
Namespace: "loki_boltdb_shipper",
Name: "apply_retention_last_successful_run_timestamp_seconds",
@ -48,11 +58,11 @@ func newMetrics(r prometheus.Registerer) *metrics {
Name: "compactor_running",
Help: "Value will be 1 if compactor is currently running on this instance",
}),
skippedCompactingLockedTables: promauto.With(r).NewCounter(prometheus.CounterOpts{
skippedCompactingLockedTables: promauto.With(r).NewCounterVec(prometheus.CounterOpts{
Namespace: "loki_compactor",
Name: "skipped_compacting_locked_tables_total",
Name: "skipped_compacting_locked_table_total",
Help: "Count of uncompacted tables being skipped due to them being locked by retention",
}),
}, []string{"table_name"}),
}
return &m

@ -375,7 +375,7 @@
"renderer": "flot",
"seriesOverrides": [ ],
"spaceLength": 10,
"span": 4,
"span": 6,
"stack": false,
"steppedLine": false,
"targets": [
@ -389,7 +389,7 @@
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "Last Compact and Mark Operation Success",
"title": "Last Compact Tables Operation Success",
"tooltip": {
"shared": true,
"sort": 2,
@ -449,7 +449,7 @@
"renderer": "flot",
"seriesOverrides": [ ],
"spaceLength": 10,
"span": 4,
"span": 6,
"stack": false,
"steppedLine": false,
"targets": [
@ -465,7 +465,7 @@
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "Compact and Mark Operations Duration",
"title": "Compact Tables Operations Duration",
"tooltip": {
"shared": true,
"sort": 2,
@ -497,7 +497,19 @@
"show": false
}
]
},
}
],
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
"title": "Compaction",
"titleSize": "h6"
},
{
"collapse": false,
"height": "250px",
"panels": [
{
"aliasColors": { },
"bars": false,
@ -525,7 +537,83 @@
"renderer": "flot",
"seriesOverrides": [ ],
"spaceLength": 10,
"span": 4,
"span": 6,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "sum(increase(loki_compactor_skipped_compacting_locked_table_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__range]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{table_name}}",
"legendLink": null,
"step": 10
}
],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "Number of times Tables were skipped during Compaction",
"tooltip": {
"shared": true,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
]
},
{
"aliasColors": { },
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"id": 7,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [ ],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [ ],
"spaceLength": 10,
"span": 6,
"stack": false,
"steppedLine": false,
"targets": [
@ -541,7 +629,279 @@
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "Compact and Mark Operations Per Status",
"title": "Compact Tables Operations Per Status",
"tooltip": {
"shared": true,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
]
}
],
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
"title": "",
"titleSize": "h6"
},
{
"collapse": false,
"height": "250px",
"panels": [
{
"aliasColors": { },
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fieldConfig": {
"defaults": {
"color": {
"fixedColor": "blue",
"mode": "fixed"
},
"custom": { },
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
},
"unit": "dateTimeFromNow"
}
},
"fill": 1,
"id": 8,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [ ],
"nullPointMode": "null as zero",
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"text": { },
"textMode": "auto"
},
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [ ],
"spaceLength": 10,
"span": 4,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "loki_compactor_apply_retention_last_successful_run_timestamp_seconds{cluster=~\"$cluster\", namespace=~\"$namespace\"} * 1e3",
"format": "time_series",
"instant": true,
"refId": "A"
}
],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "Last Mark Operation Success",
"tooltip": {
"shared": true,
"sort": 2,
"value_type": "individual"
},
"type": "stat",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
]
},
{
"aliasColors": { },
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"id": 9,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [ ],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [ ],
"spaceLength": 10,
"span": 4,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "loki_compactor_apply_retention_operation_duration_seconds{cluster=~\"$cluster\", namespace=~\"$namespace\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "duration",
"legendLink": null,
"step": 10
}
],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "Mark Operations Duration",
"tooltip": {
"shared": true,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "s",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
]
},
{
"aliasColors": { },
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"id": 10,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [ ],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [ ],
"spaceLength": 10,
"span": 4,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "sum by (status)(rate(loki_compactor_apply_retention_operation_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{success}}",
"legendLink": null,
"step": 10
}
],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "Mark Operations Per Status",
"tooltip": {
"shared": true,
"sort": 2,
@ -579,7 +939,7 @@
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
"title": "Compact and Mark",
"title": "Retention",
"titleSize": "h6"
},
{
@ -593,7 +953,7 @@
"dashes": false,
"datasource": "$datasource",
"fill": 10,
"id": 7,
"id": 11,
"legend": {
"avg": false,
"current": false,
@ -669,7 +1029,7 @@
"dashes": false,
"datasource": "$datasource",
"fill": 10,
"id": 8,
"id": 12,
"legend": {
"avg": false,
"current": false,
@ -745,7 +1105,7 @@
"dashes": false,
"datasource": "$datasource",
"fill": 10,
"id": 9,
"id": 13,
"legend": {
"avg": false,
"current": false,
@ -834,7 +1194,7 @@
"datasource": "$datasource",
"fill": 1,
"format": "short",
"id": 10,
"id": 14,
"legend": {
"avg": false,
"current": false,
@ -909,7 +1269,7 @@
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"id": 11,
"id": 15,
"legend": {
"avg": false,
"current": false,
@ -1014,7 +1374,7 @@
"datasource": "$datasource",
"fill": 1,
"format": "short",
"id": 12,
"id": 16,
"legend": {
"avg": false,
"current": false,
@ -1089,7 +1449,7 @@
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"id": 13,
"id": 17,
"legend": {
"avg": false,
"current": false,
@ -1193,7 +1553,7 @@
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"id": 14,
"id": 18,
"legend": {
"avg": false,
"current": false,
@ -1269,7 +1629,7 @@
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"id": 15,
"id": 19,
"legend": {
"avg": false,
"current": false,
@ -1345,7 +1705,7 @@
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"id": 16,
"id": 20,
"legend": {
"avg": false,
"current": false,
@ -1428,7 +1788,7 @@
"panels": [
{
"datasource": "$loki_datasource",
"id": 17,
"id": 21,
"span": 12,
"targets": [
{

@ -375,7 +375,7 @@
"renderer": "flot",
"seriesOverrides": [ ],
"spaceLength": 10,
"span": 4,
"span": 6,
"stack": false,
"steppedLine": false,
"targets": [
@ -389,7 +389,7 @@
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "Last Compact and Mark Operation Success",
"title": "Last Compact Tables Operation Success",
"tooltip": {
"shared": true,
"sort": 2,
@ -449,7 +449,7 @@
"renderer": "flot",
"seriesOverrides": [ ],
"spaceLength": 10,
"span": 4,
"span": 6,
"stack": false,
"steppedLine": false,
"targets": [
@ -465,7 +465,7 @@
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "Compact and Mark Operations Duration",
"title": "Compact Tables Operations Duration",
"tooltip": {
"shared": true,
"sort": 2,
@ -497,7 +497,19 @@
"show": false
}
]
},
}
],
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
"title": "Compaction",
"titleSize": "h6"
},
{
"collapse": false,
"height": "250px",
"panels": [
{
"aliasColors": { },
"bars": false,
@ -525,7 +537,83 @@
"renderer": "flot",
"seriesOverrides": [ ],
"spaceLength": 10,
"span": 4,
"span": 6,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "sum(increase(loki_compactor_skipped_compacting_locked_table_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__range]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{table_name}}",
"legendLink": null,
"step": 10
}
],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "Number of times Tables were skipped during Compaction",
"tooltip": {
"shared": true,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
]
},
{
"aliasColors": { },
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"id": 7,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [ ],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [ ],
"spaceLength": 10,
"span": 6,
"stack": false,
"steppedLine": false,
"targets": [
@ -541,7 +629,279 @@
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "Compact and Mark Operations Per Status",
"title": "Compact Tables Operations Per Status",
"tooltip": {
"shared": true,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
]
}
],
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
"title": "",
"titleSize": "h6"
},
{
"collapse": false,
"height": "250px",
"panels": [
{
"aliasColors": { },
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fieldConfig": {
"defaults": {
"color": {
"fixedColor": "blue",
"mode": "fixed"
},
"custom": { },
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
},
"unit": "dateTimeFromNow"
}
},
"fill": 1,
"id": 8,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [ ],
"nullPointMode": "null as zero",
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"text": { },
"textMode": "auto"
},
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [ ],
"spaceLength": 10,
"span": 4,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "loki_compactor_apply_retention_last_successful_run_timestamp_seconds{cluster=~\"$cluster\", namespace=~\"$namespace\"} * 1e3",
"format": "time_series",
"instant": true,
"refId": "A"
}
],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "Last Mark Operation Success",
"tooltip": {
"shared": true,
"sort": 2,
"value_type": "individual"
},
"type": "stat",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
]
},
{
"aliasColors": { },
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"id": 9,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [ ],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [ ],
"spaceLength": 10,
"span": 4,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "loki_compactor_apply_retention_operation_duration_seconds{cluster=~\"$cluster\", namespace=~\"$namespace\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "duration",
"legendLink": null,
"step": 10
}
],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "Mark Operations Duration",
"tooltip": {
"shared": true,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "s",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
]
},
{
"aliasColors": { },
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"id": 10,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [ ],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [ ],
"spaceLength": 10,
"span": 4,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "sum by (status)(rate(loki_compactor_apply_retention_operation_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{success}}",
"legendLink": null,
"step": 10
}
],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "Mark Operations Per Status",
"tooltip": {
"shared": true,
"sort": 2,
@ -579,7 +939,7 @@
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
"title": "Compact and Mark",
"title": "Retention",
"titleSize": "h6"
},
{
@ -593,7 +953,7 @@
"dashes": false,
"datasource": "$datasource",
"fill": 10,
"id": 7,
"id": 11,
"legend": {
"avg": false,
"current": false,
@ -669,7 +1029,7 @@
"dashes": false,
"datasource": "$datasource",
"fill": 10,
"id": 8,
"id": 12,
"legend": {
"avg": false,
"current": false,
@ -745,7 +1105,7 @@
"dashes": false,
"datasource": "$datasource",
"fill": 10,
"id": 9,
"id": 13,
"legend": {
"avg": false,
"current": false,
@ -834,7 +1194,7 @@
"datasource": "$datasource",
"fill": 1,
"format": "short",
"id": 10,
"id": 14,
"legend": {
"avg": false,
"current": false,
@ -909,7 +1269,7 @@
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"id": 11,
"id": 15,
"legend": {
"avg": false,
"current": false,
@ -1014,7 +1374,7 @@
"datasource": "$datasource",
"fill": 1,
"format": "short",
"id": 12,
"id": 16,
"legend": {
"avg": false,
"current": false,
@ -1089,7 +1449,7 @@
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"id": 13,
"id": 17,
"legend": {
"avg": false,
"current": false,
@ -1193,7 +1553,7 @@
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"id": 14,
"id": 18,
"legend": {
"avg": false,
"current": false,
@ -1269,7 +1629,7 @@
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"id": 15,
"id": 19,
"legend": {
"avg": false,
"current": false,
@ -1345,7 +1705,7 @@
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"id": 16,
"id": 20,
"legend": {
"avg": false,
"current": false,
@ -1428,7 +1788,7 @@
"panels": [
{
"datasource": "$loki_datasource",
"id": 17,
"id": 21,
"span": 12,
"targets": [
{

@ -25,20 +25,42 @@ local utils = import 'mixin-utils/utils.libsonnet';
)
.addRow(
$.row('Compact and Mark')
$.row('Compaction')
.addPanel(
$.fromNowPanel('Last Compact and Mark Operation Success', 'loki_boltdb_shipper_compact_tables_operation_last_successful_run_timestamp_seconds')
$.fromNowPanel('Last Compact Tables Operation Success', 'loki_boltdb_shipper_compact_tables_operation_last_successful_run_timestamp_seconds')
)
.addPanel(
$.panel('Compact and Mark Operations Duration') +
$.panel('Compact Tables Operations Duration') +
$.queryPanel(['loki_boltdb_shipper_compact_tables_operation_duration_seconds{%s}' % $.namespaceMatcher()], ['duration']) +
{ yaxes: $.yaxes('s') },
)
)
.addRow(
$.row('')
.addPanel(
$.panel('Compact and Mark Operations Per Status') +
$.panel('Number of times Tables were skipped during Compaction') +
$.queryPanel(['sum(increase(loki_compactor_skipped_compacting_locked_table_total{%s}[$__range]))' % $.namespaceMatcher()], ['{{table_name}}']),
)
.addPanel(
$.panel('Compact Tables Operations Per Status') +
$.queryPanel(['sum by (status)(rate(loki_boltdb_shipper_compact_tables_operation_total{%s}[$__rate_interval]))' % $.namespaceMatcher()], ['{{success}}']),
)
)
.addRow(
$.row('Retention')
.addPanel(
$.fromNowPanel('Last Mark Operation Success', 'loki_compactor_apply_retention_last_successful_run_timestamp_seconds')
)
.addPanel(
$.panel('Mark Operations Duration') +
$.queryPanel(['loki_compactor_apply_retention_operation_duration_seconds{%s}' % $.namespaceMatcher()], ['duration']) +
{ yaxes: $.yaxes('s') },
)
.addPanel(
$.panel('Mark Operations Per Status') +
$.queryPanel(['sum by (status)(rate(loki_compactor_apply_retention_operation_total{%s}[$__rate_interval]))' % $.namespaceMatcher()], ['{{success}}']),
)
)
.addRow(
$.row('Per Table Marker')
.addPanel(

Loading…
Cancel
Save