compaction: Separate metrics for tracking retention and compaction (#11263)

**What this PR does / why we need it**:
In PR #9884, we separated the retention loop from compaction to avoid
blocking compaction for too long due to some intensive delete requests.
Currently, we track retention and compaction using the same metrics.
This PR adds separate metrics for monitoring retention operation. I have
also updated the Retention dashboard to use the new metrics.
pull/11242/head^2
Sandeep Sukhani 2 years ago committed by GitHub
parent 5535267f2c
commit 6a62b8cf42
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 1
      integration/loki_micro_services_delete_test.go
  2. 28
      pkg/compactor/compactor.go
  3. 40
      pkg/compactor/compactor_test.go
  4. 46
      pkg/compactor/metrics.go
  5. 398
      production/loki-mixin-compiled-ssd/dashboards/loki-retention.json
  6. 398
      production/loki-mixin-compiled/dashboards/loki-retention.json
  7. 30
      production/loki-mixin/dashboards/loki-retention.libsonnet

@ -216,6 +216,7 @@ func TestMicroServicesDeleteRequest(t *testing.T) {
validateQueryResponse := func(expectedStreams []client.StreamValues, resp *client.Response) { validateQueryResponse := func(expectedStreams []client.StreamValues, resp *client.Response) {
t.Helper() t.Helper()
assert.Equal(t, "success", resp.Status)
assert.Equal(t, "streams", resp.Data.ResultType) assert.Equal(t, "streams", resp.Data.ResultType)
require.Len(t, resp.Data.Stream, len(expectedStreams)) require.Len(t, resp.Data.Stream, len(expectedStreams))

@ -128,6 +128,11 @@ func (cfg *Config) Validate() error {
cfg.ApplyRetentionInterval = cfg.CompactionInterval cfg.ApplyRetentionInterval = cfg.CompactionInterval
} }
if cfg.ApplyRetentionInterval == cfg.CompactionInterval {
// add some jitter to avoid running retention and compaction at same time
cfg.ApplyRetentionInterval += minDuration(10*time.Minute, cfg.ApplyRetentionInterval/2)
}
if err := config.ValidatePathPrefix(cfg.DeleteRequestStoreKeyPrefix); err != nil { if err := config.ValidatePathPrefix(cfg.DeleteRequestStoreKeyPrefix); err != nil {
return fmt.Errorf("validate delete store path prefix: %w", err) return fmt.Errorf("validate delete store path prefix: %w", err)
} }
@ -604,7 +609,7 @@ func (c *Compactor) CompactTable(ctx context.Context, tableName string, applyRet
} }
if hasUncompactedIndex { if hasUncompactedIndex {
c.metrics.skippedCompactingLockedTables.Inc() c.metrics.skippedCompactingLockedTables.WithLabelValues(tableName).Inc()
level.Warn(util_log.Logger).Log("msg", "skipped compacting table which likely has uncompacted index since it is locked by retention", "table_name", tableName) level.Warn(util_log.Logger).Log("msg", "skipped compacting table which likely has uncompacted index since it is locked by retention", "table_name", tableName)
} }
return nil return nil
@ -657,14 +662,19 @@ func (c *Compactor) RunCompaction(ctx context.Context, applyRetention bool) (err
if err != nil { if err != nil {
status = statusFailure status = statusFailure
} }
withRetentionLabelValue := fmt.Sprintf("%v", applyRetention) if applyRetention {
c.metrics.compactTablesOperationTotal.WithLabelValues(status, withRetentionLabelValue).Inc() c.metrics.applyRetentionOperationTotal.WithLabelValues(status).Inc()
} else {
c.metrics.compactTablesOperationTotal.WithLabelValues(status).Inc()
}
runtime := time.Since(start) runtime := time.Since(start)
if status == statusSuccess { if status == statusSuccess {
c.metrics.compactTablesOperationDurationSeconds.WithLabelValues(withRetentionLabelValue).Set(runtime.Seconds())
c.metrics.compactTablesOperationLastSuccess.WithLabelValues(withRetentionLabelValue).SetToCurrentTime()
if applyRetention { if applyRetention {
c.metrics.applyRetentionOperationDurationSeconds.Set(runtime.Seconds())
c.metrics.applyRetentionLastSuccess.SetToCurrentTime() c.metrics.applyRetentionLastSuccess.SetToCurrentTime()
} else {
c.metrics.compactTablesOperationDurationSeconds.Set(runtime.Seconds())
c.metrics.compactTablesOperationLastSuccess.SetToCurrentTime()
} }
} }
@ -874,3 +884,11 @@ func schemaPeriodForTable(cfg config.SchemaConfig, tableName string) (config.Per
return schemaCfg, true return schemaCfg, true
} }
func minDuration(x time.Duration, y time.Duration) time.Duration {
if x < y {
return x
}
return y
}

@ -348,7 +348,7 @@ func TestCompactor_TableLocking(t *testing.T) {
lockTable string lockTable string
applyRetention bool applyRetention bool
compactionShouldTimeout bool retentionShouldTimeout bool
}{ }{
{ {
name: "no table locked - not applying retention", name: "no table locked - not applying retention",
@ -362,10 +362,10 @@ func TestCompactor_TableLocking(t *testing.T) {
lockTable: fmt.Sprintf("%s%d", indexTablePrefix, tableNumEnd), lockTable: fmt.Sprintf("%s%d", indexTablePrefix, tableNumEnd),
}, },
{ {
name: "first table locked - applying retention", name: "first table locked - applying retention",
lockTable: fmt.Sprintf("%s%d", indexTablePrefix, tableNumEnd), lockTable: fmt.Sprintf("%s%d", indexTablePrefix, tableNumEnd),
applyRetention: true, applyRetention: true,
compactionShouldTimeout: true, retentionShouldTimeout: true,
}, },
} { } {
t.Run(tc.name, func(t *testing.T) { t.Run(tc.name, func(t *testing.T) {
@ -389,30 +389,38 @@ func TestCompactor_TableLocking(t *testing.T) {
defer cancel() defer cancel()
err := compactor.RunCompaction(ctx, tc.applyRetention) err := compactor.RunCompaction(ctx, tc.applyRetention)
// compaction should not timeout after first run since we won't be locking the table // retention should not timeout after first run since we won't be locking the table
if n == 1 && tc.compactionShouldTimeout { if n == 1 && tc.retentionShouldTimeout {
require.ErrorIs(t, err, context.DeadlineExceeded) require.ErrorIs(t, err, context.DeadlineExceeded)
require.Equal(t, float64(1), testutil.ToFloat64(compactor.metrics.compactTablesOperationTotal.WithLabelValues(statusFailure, "true"))) require.Equal(t, float64(1), testutil.ToFloat64(compactor.metrics.applyRetentionOperationTotal.WithLabelValues(statusFailure)))
require.Equal(t, float64(0), testutil.ToFloat64(compactor.metrics.compactTablesOperationTotal.WithLabelValues(statusFailure, "false"))) require.Equal(t, float64(0), testutil.ToFloat64(compactor.metrics.compactTablesOperationTotal.WithLabelValues(statusFailure)))
return return
} }
require.NoError(t, err) require.NoError(t, err)
if n > 1 && tc.compactionShouldTimeout { if n > 1 && tc.applyRetention && tc.retentionShouldTimeout {
// this should be the first successful run if compaction was expected to be timeout out during first run // this should be the first successful run if retention was expected to timeout out during first run
require.Equal(t, float64(1), testutil.ToFloat64(compactor.metrics.compactTablesOperationTotal.WithLabelValues(statusSuccess, fmt.Sprintf("%v", tc.applyRetention)))) require.Equal(t, float64(1), testutil.ToFloat64(compactor.metrics.applyRetentionOperationTotal.WithLabelValues(statusSuccess)))
} else { } else {
// else it should have succeeded during all the n runs // else it should have succeeded during all the n runs
require.Equal(t, float64(n), testutil.ToFloat64(compactor.metrics.compactTablesOperationTotal.WithLabelValues(statusSuccess, fmt.Sprintf("%v", tc.applyRetention)))) if tc.applyRetention {
require.Equal(t, float64(n), testutil.ToFloat64(compactor.metrics.applyRetentionOperationTotal.WithLabelValues(statusSuccess)))
} else {
require.Equal(t, float64(n), testutil.ToFloat64(compactor.metrics.compactTablesOperationTotal.WithLabelValues(statusSuccess)))
}
}
if tc.applyRetention {
require.Equal(t, float64(0), testutil.ToFloat64(compactor.metrics.compactTablesOperationTotal.WithLabelValues(statusSuccess)))
} else {
require.Equal(t, float64(0), testutil.ToFloat64(compactor.metrics.applyRetentionOperationTotal.WithLabelValues(statusSuccess)))
} }
require.Equal(t, float64(0), testutil.ToFloat64(compactor.metrics.compactTablesOperationTotal.WithLabelValues(statusSuccess, fmt.Sprintf("%v", !tc.applyRetention))))
// if the table was locked and compaction ran without retention then only locked table should have been skipped // if the table was locked and compaction ran without retention then only locked table should have been skipped
if tc.lockTable != "" { if tc.lockTable != "" {
if tc.applyRetention { if tc.applyRetention {
require.Equal(t, float64(0), testutil.ToFloat64(compactor.metrics.skippedCompactingLockedTables)) require.Equal(t, float64(0), testutil.ToFloat64(compactor.metrics.skippedCompactingLockedTables.WithLabelValues(tc.lockTable)))
} else { } else {
require.Equal(t, float64(1), testutil.ToFloat64(compactor.metrics.skippedCompactingLockedTables)) require.Equal(t, float64(1), testutil.ToFloat64(compactor.metrics.skippedCompactingLockedTables.WithLabelValues(tc.lockTable)))
} }
} }

@ -8,17 +8,17 @@ import (
const ( const (
statusFailure = "failure" statusFailure = "failure"
statusSuccess = "success" statusSuccess = "success"
lblWithRetention = "with_retention"
) )
type metrics struct { type metrics struct {
compactTablesOperationTotal *prometheus.CounterVec compactTablesOperationTotal *prometheus.CounterVec
compactTablesOperationDurationSeconds *prometheus.GaugeVec compactTablesOperationDurationSeconds prometheus.Gauge
compactTablesOperationLastSuccess *prometheus.GaugeVec compactTablesOperationLastSuccess prometheus.Gauge
applyRetentionLastSuccess prometheus.Gauge applyRetentionOperationTotal *prometheus.CounterVec
compactorRunning prometheus.Gauge applyRetentionOperationDurationSeconds prometheus.Gauge
skippedCompactingLockedTables prometheus.Counter applyRetentionLastSuccess prometheus.Gauge
compactorRunning prometheus.Gauge
skippedCompactingLockedTables *prometheus.CounterVec
} }
func newMetrics(r prometheus.Registerer) *metrics { func newMetrics(r prometheus.Registerer) *metrics {
@ -26,18 +26,28 @@ func newMetrics(r prometheus.Registerer) *metrics {
compactTablesOperationTotal: promauto.With(r).NewCounterVec(prometheus.CounterOpts{ compactTablesOperationTotal: promauto.With(r).NewCounterVec(prometheus.CounterOpts{
Namespace: "loki_boltdb_shipper", Namespace: "loki_boltdb_shipper",
Name: "compact_tables_operation_total", Name: "compact_tables_operation_total",
Help: "Total number of tables compaction done by status and with/without retention", Help: "Total number of tables compaction done by status",
}, []string{"status", lblWithRetention}), }, []string{"status"}),
compactTablesOperationDurationSeconds: promauto.With(r).NewGaugeVec(prometheus.GaugeOpts{ compactTablesOperationDurationSeconds: promauto.With(r).NewGauge(prometheus.GaugeOpts{
Namespace: "loki_boltdb_shipper", Namespace: "loki_boltdb_shipper",
Name: "compact_tables_operation_duration_seconds", Name: "compact_tables_operation_duration_seconds",
Help: "Time (in seconds) spent in compacting all the tables with/without retention", Help: "Time (in seconds) spent in compacting all the tables",
}, []string{lblWithRetention}), }),
compactTablesOperationLastSuccess: promauto.With(r).NewGaugeVec(prometheus.GaugeOpts{ compactTablesOperationLastSuccess: promauto.With(r).NewGauge(prometheus.GaugeOpts{
Namespace: "loki_boltdb_shipper", Namespace: "loki_boltdb_shipper",
Name: "compact_tables_operation_last_successful_run_timestamp_seconds", Name: "compact_tables_operation_last_successful_run_timestamp_seconds",
Help: "Unix timestamp of the last successful compaction run", Help: "Unix timestamp of the last successful compaction run",
}, []string{lblWithRetention}), }),
applyRetentionOperationTotal: promauto.With(r).NewCounterVec(prometheus.CounterOpts{
Namespace: "loki_compactor",
Name: "apply_retention_operation_total",
Help: "Total number of attempts done to apply retention with status",
}, []string{"status"}),
applyRetentionOperationDurationSeconds: promauto.With(r).NewGauge(prometheus.GaugeOpts{
Namespace: "loki_compactor",
Name: "apply_retention_operation_duration_seconds",
Help: "Time (in seconds) spent in applying retention",
}),
applyRetentionLastSuccess: promauto.With(r).NewGauge(prometheus.GaugeOpts{ applyRetentionLastSuccess: promauto.With(r).NewGauge(prometheus.GaugeOpts{
Namespace: "loki_boltdb_shipper", Namespace: "loki_boltdb_shipper",
Name: "apply_retention_last_successful_run_timestamp_seconds", Name: "apply_retention_last_successful_run_timestamp_seconds",
@ -48,11 +58,11 @@ func newMetrics(r prometheus.Registerer) *metrics {
Name: "compactor_running", Name: "compactor_running",
Help: "Value will be 1 if compactor is currently running on this instance", Help: "Value will be 1 if compactor is currently running on this instance",
}), }),
skippedCompactingLockedTables: promauto.With(r).NewCounter(prometheus.CounterOpts{ skippedCompactingLockedTables: promauto.With(r).NewCounterVec(prometheus.CounterOpts{
Namespace: "loki_compactor", Namespace: "loki_compactor",
Name: "skipped_compacting_locked_tables_total", Name: "skipped_compacting_locked_table_total",
Help: "Count of uncompacted tables being skipped due to them being locked by retention", Help: "Count of uncompacted tables being skipped due to them being locked by retention",
}), }, []string{"table_name"}),
} }
return &m return &m

@ -375,7 +375,7 @@
"renderer": "flot", "renderer": "flot",
"seriesOverrides": [ ], "seriesOverrides": [ ],
"spaceLength": 10, "spaceLength": 10,
"span": 4, "span": 6,
"stack": false, "stack": false,
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
@ -389,7 +389,7 @@
"thresholds": [ ], "thresholds": [ ],
"timeFrom": null, "timeFrom": null,
"timeShift": null, "timeShift": null,
"title": "Last Compact and Mark Operation Success", "title": "Last Compact Tables Operation Success",
"tooltip": { "tooltip": {
"shared": true, "shared": true,
"sort": 2, "sort": 2,
@ -449,7 +449,7 @@
"renderer": "flot", "renderer": "flot",
"seriesOverrides": [ ], "seriesOverrides": [ ],
"spaceLength": 10, "spaceLength": 10,
"span": 4, "span": 6,
"stack": false, "stack": false,
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
@ -465,7 +465,7 @@
"thresholds": [ ], "thresholds": [ ],
"timeFrom": null, "timeFrom": null,
"timeShift": null, "timeShift": null,
"title": "Compact and Mark Operations Duration", "title": "Compact Tables Operations Duration",
"tooltip": { "tooltip": {
"shared": true, "shared": true,
"sort": 2, "sort": 2,
@ -497,7 +497,19 @@
"show": false "show": false
} }
] ]
}, }
],
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
"title": "Compaction",
"titleSize": "h6"
},
{
"collapse": false,
"height": "250px",
"panels": [
{ {
"aliasColors": { }, "aliasColors": { },
"bars": false, "bars": false,
@ -525,7 +537,83 @@
"renderer": "flot", "renderer": "flot",
"seriesOverrides": [ ], "seriesOverrides": [ ],
"spaceLength": 10, "spaceLength": 10,
"span": 4, "span": 6,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "sum(increase(loki_compactor_skipped_compacting_locked_table_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__range]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{table_name}}",
"legendLink": null,
"step": 10
}
],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "Number of times Tables were skipped during Compaction",
"tooltip": {
"shared": true,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
]
},
{
"aliasColors": { },
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"id": 7,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [ ],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [ ],
"spaceLength": 10,
"span": 6,
"stack": false, "stack": false,
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
@ -541,7 +629,279 @@
"thresholds": [ ], "thresholds": [ ],
"timeFrom": null, "timeFrom": null,
"timeShift": null, "timeShift": null,
"title": "Compact and Mark Operations Per Status", "title": "Compact Tables Operations Per Status",
"tooltip": {
"shared": true,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
]
}
],
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
"title": "",
"titleSize": "h6"
},
{
"collapse": false,
"height": "250px",
"panels": [
{
"aliasColors": { },
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fieldConfig": {
"defaults": {
"color": {
"fixedColor": "blue",
"mode": "fixed"
},
"custom": { },
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
},
"unit": "dateTimeFromNow"
}
},
"fill": 1,
"id": 8,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [ ],
"nullPointMode": "null as zero",
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"text": { },
"textMode": "auto"
},
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [ ],
"spaceLength": 10,
"span": 4,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "loki_compactor_apply_retention_last_successful_run_timestamp_seconds{cluster=~\"$cluster\", namespace=~\"$namespace\"} * 1e3",
"format": "time_series",
"instant": true,
"refId": "A"
}
],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "Last Mark Operation Success",
"tooltip": {
"shared": true,
"sort": 2,
"value_type": "individual"
},
"type": "stat",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
]
},
{
"aliasColors": { },
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"id": 9,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [ ],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [ ],
"spaceLength": 10,
"span": 4,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "loki_compactor_apply_retention_operation_duration_seconds{cluster=~\"$cluster\", namespace=~\"$namespace\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "duration",
"legendLink": null,
"step": 10
}
],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "Mark Operations Duration",
"tooltip": {
"shared": true,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "s",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
]
},
{
"aliasColors": { },
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"id": 10,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [ ],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [ ],
"spaceLength": 10,
"span": 4,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "sum by (status)(rate(loki_compactor_apply_retention_operation_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{success}}",
"legendLink": null,
"step": 10
}
],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "Mark Operations Per Status",
"tooltip": { "tooltip": {
"shared": true, "shared": true,
"sort": 2, "sort": 2,
@ -579,7 +939,7 @@
"repeatIteration": null, "repeatIteration": null,
"repeatRowId": null, "repeatRowId": null,
"showTitle": true, "showTitle": true,
"title": "Compact and Mark", "title": "Retention",
"titleSize": "h6" "titleSize": "h6"
}, },
{ {
@ -593,7 +953,7 @@
"dashes": false, "dashes": false,
"datasource": "$datasource", "datasource": "$datasource",
"fill": 10, "fill": 10,
"id": 7, "id": 11,
"legend": { "legend": {
"avg": false, "avg": false,
"current": false, "current": false,
@ -669,7 +1029,7 @@
"dashes": false, "dashes": false,
"datasource": "$datasource", "datasource": "$datasource",
"fill": 10, "fill": 10,
"id": 8, "id": 12,
"legend": { "legend": {
"avg": false, "avg": false,
"current": false, "current": false,
@ -745,7 +1105,7 @@
"dashes": false, "dashes": false,
"datasource": "$datasource", "datasource": "$datasource",
"fill": 10, "fill": 10,
"id": 9, "id": 13,
"legend": { "legend": {
"avg": false, "avg": false,
"current": false, "current": false,
@ -834,7 +1194,7 @@
"datasource": "$datasource", "datasource": "$datasource",
"fill": 1, "fill": 1,
"format": "short", "format": "short",
"id": 10, "id": 14,
"legend": { "legend": {
"avg": false, "avg": false,
"current": false, "current": false,
@ -909,7 +1269,7 @@
"dashes": false, "dashes": false,
"datasource": "$datasource", "datasource": "$datasource",
"fill": 1, "fill": 1,
"id": 11, "id": 15,
"legend": { "legend": {
"avg": false, "avg": false,
"current": false, "current": false,
@ -1014,7 +1374,7 @@
"datasource": "$datasource", "datasource": "$datasource",
"fill": 1, "fill": 1,
"format": "short", "format": "short",
"id": 12, "id": 16,
"legend": { "legend": {
"avg": false, "avg": false,
"current": false, "current": false,
@ -1089,7 +1449,7 @@
"dashes": false, "dashes": false,
"datasource": "$datasource", "datasource": "$datasource",
"fill": 1, "fill": 1,
"id": 13, "id": 17,
"legend": { "legend": {
"avg": false, "avg": false,
"current": false, "current": false,
@ -1193,7 +1553,7 @@
"dashes": false, "dashes": false,
"datasource": "$datasource", "datasource": "$datasource",
"fill": 1, "fill": 1,
"id": 14, "id": 18,
"legend": { "legend": {
"avg": false, "avg": false,
"current": false, "current": false,
@ -1269,7 +1629,7 @@
"dashes": false, "dashes": false,
"datasource": "$datasource", "datasource": "$datasource",
"fill": 1, "fill": 1,
"id": 15, "id": 19,
"legend": { "legend": {
"avg": false, "avg": false,
"current": false, "current": false,
@ -1345,7 +1705,7 @@
"dashes": false, "dashes": false,
"datasource": "$datasource", "datasource": "$datasource",
"fill": 1, "fill": 1,
"id": 16, "id": 20,
"legend": { "legend": {
"avg": false, "avg": false,
"current": false, "current": false,
@ -1428,7 +1788,7 @@
"panels": [ "panels": [
{ {
"datasource": "$loki_datasource", "datasource": "$loki_datasource",
"id": 17, "id": 21,
"span": 12, "span": 12,
"targets": [ "targets": [
{ {

@ -375,7 +375,7 @@
"renderer": "flot", "renderer": "flot",
"seriesOverrides": [ ], "seriesOverrides": [ ],
"spaceLength": 10, "spaceLength": 10,
"span": 4, "span": 6,
"stack": false, "stack": false,
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
@ -389,7 +389,7 @@
"thresholds": [ ], "thresholds": [ ],
"timeFrom": null, "timeFrom": null,
"timeShift": null, "timeShift": null,
"title": "Last Compact and Mark Operation Success", "title": "Last Compact Tables Operation Success",
"tooltip": { "tooltip": {
"shared": true, "shared": true,
"sort": 2, "sort": 2,
@ -449,7 +449,7 @@
"renderer": "flot", "renderer": "flot",
"seriesOverrides": [ ], "seriesOverrides": [ ],
"spaceLength": 10, "spaceLength": 10,
"span": 4, "span": 6,
"stack": false, "stack": false,
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
@ -465,7 +465,7 @@
"thresholds": [ ], "thresholds": [ ],
"timeFrom": null, "timeFrom": null,
"timeShift": null, "timeShift": null,
"title": "Compact and Mark Operations Duration", "title": "Compact Tables Operations Duration",
"tooltip": { "tooltip": {
"shared": true, "shared": true,
"sort": 2, "sort": 2,
@ -497,7 +497,19 @@
"show": false "show": false
} }
] ]
}, }
],
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
"title": "Compaction",
"titleSize": "h6"
},
{
"collapse": false,
"height": "250px",
"panels": [
{ {
"aliasColors": { }, "aliasColors": { },
"bars": false, "bars": false,
@ -525,7 +537,83 @@
"renderer": "flot", "renderer": "flot",
"seriesOverrides": [ ], "seriesOverrides": [ ],
"spaceLength": 10, "spaceLength": 10,
"span": 4, "span": 6,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "sum(increase(loki_compactor_skipped_compacting_locked_table_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__range]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{table_name}}",
"legendLink": null,
"step": 10
}
],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "Number of times Tables were skipped during Compaction",
"tooltip": {
"shared": true,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
]
},
{
"aliasColors": { },
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"id": 7,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [ ],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [ ],
"spaceLength": 10,
"span": 6,
"stack": false, "stack": false,
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
@ -541,7 +629,279 @@
"thresholds": [ ], "thresholds": [ ],
"timeFrom": null, "timeFrom": null,
"timeShift": null, "timeShift": null,
"title": "Compact and Mark Operations Per Status", "title": "Compact Tables Operations Per Status",
"tooltip": {
"shared": true,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
]
}
],
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
"title": "",
"titleSize": "h6"
},
{
"collapse": false,
"height": "250px",
"panels": [
{
"aliasColors": { },
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fieldConfig": {
"defaults": {
"color": {
"fixedColor": "blue",
"mode": "fixed"
},
"custom": { },
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
},
"unit": "dateTimeFromNow"
}
},
"fill": 1,
"id": 8,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [ ],
"nullPointMode": "null as zero",
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"text": { },
"textMode": "auto"
},
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [ ],
"spaceLength": 10,
"span": 4,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "loki_compactor_apply_retention_last_successful_run_timestamp_seconds{cluster=~\"$cluster\", namespace=~\"$namespace\"} * 1e3",
"format": "time_series",
"instant": true,
"refId": "A"
}
],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "Last Mark Operation Success",
"tooltip": {
"shared": true,
"sort": 2,
"value_type": "individual"
},
"type": "stat",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
]
},
{
"aliasColors": { },
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"id": 9,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [ ],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [ ],
"spaceLength": 10,
"span": 4,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "loki_compactor_apply_retention_operation_duration_seconds{cluster=~\"$cluster\", namespace=~\"$namespace\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "duration",
"legendLink": null,
"step": 10
}
],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "Mark Operations Duration",
"tooltip": {
"shared": true,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "s",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
]
},
{
"aliasColors": { },
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"id": 10,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [ ],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [ ],
"spaceLength": 10,
"span": 4,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "sum by (status)(rate(loki_compactor_apply_retention_operation_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{success}}",
"legendLink": null,
"step": 10
}
],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "Mark Operations Per Status",
"tooltip": { "tooltip": {
"shared": true, "shared": true,
"sort": 2, "sort": 2,
@ -579,7 +939,7 @@
"repeatIteration": null, "repeatIteration": null,
"repeatRowId": null, "repeatRowId": null,
"showTitle": true, "showTitle": true,
"title": "Compact and Mark", "title": "Retention",
"titleSize": "h6" "titleSize": "h6"
}, },
{ {
@ -593,7 +953,7 @@
"dashes": false, "dashes": false,
"datasource": "$datasource", "datasource": "$datasource",
"fill": 10, "fill": 10,
"id": 7, "id": 11,
"legend": { "legend": {
"avg": false, "avg": false,
"current": false, "current": false,
@ -669,7 +1029,7 @@
"dashes": false, "dashes": false,
"datasource": "$datasource", "datasource": "$datasource",
"fill": 10, "fill": 10,
"id": 8, "id": 12,
"legend": { "legend": {
"avg": false, "avg": false,
"current": false, "current": false,
@ -745,7 +1105,7 @@
"dashes": false, "dashes": false,
"datasource": "$datasource", "datasource": "$datasource",
"fill": 10, "fill": 10,
"id": 9, "id": 13,
"legend": { "legend": {
"avg": false, "avg": false,
"current": false, "current": false,
@ -834,7 +1194,7 @@
"datasource": "$datasource", "datasource": "$datasource",
"fill": 1, "fill": 1,
"format": "short", "format": "short",
"id": 10, "id": 14,
"legend": { "legend": {
"avg": false, "avg": false,
"current": false, "current": false,
@ -909,7 +1269,7 @@
"dashes": false, "dashes": false,
"datasource": "$datasource", "datasource": "$datasource",
"fill": 1, "fill": 1,
"id": 11, "id": 15,
"legend": { "legend": {
"avg": false, "avg": false,
"current": false, "current": false,
@ -1014,7 +1374,7 @@
"datasource": "$datasource", "datasource": "$datasource",
"fill": 1, "fill": 1,
"format": "short", "format": "short",
"id": 12, "id": 16,
"legend": { "legend": {
"avg": false, "avg": false,
"current": false, "current": false,
@ -1089,7 +1449,7 @@
"dashes": false, "dashes": false,
"datasource": "$datasource", "datasource": "$datasource",
"fill": 1, "fill": 1,
"id": 13, "id": 17,
"legend": { "legend": {
"avg": false, "avg": false,
"current": false, "current": false,
@ -1193,7 +1553,7 @@
"dashes": false, "dashes": false,
"datasource": "$datasource", "datasource": "$datasource",
"fill": 1, "fill": 1,
"id": 14, "id": 18,
"legend": { "legend": {
"avg": false, "avg": false,
"current": false, "current": false,
@ -1269,7 +1629,7 @@
"dashes": false, "dashes": false,
"datasource": "$datasource", "datasource": "$datasource",
"fill": 1, "fill": 1,
"id": 15, "id": 19,
"legend": { "legend": {
"avg": false, "avg": false,
"current": false, "current": false,
@ -1345,7 +1705,7 @@
"dashes": false, "dashes": false,
"datasource": "$datasource", "datasource": "$datasource",
"fill": 1, "fill": 1,
"id": 16, "id": 20,
"legend": { "legend": {
"avg": false, "avg": false,
"current": false, "current": false,
@ -1428,7 +1788,7 @@
"panels": [ "panels": [
{ {
"datasource": "$loki_datasource", "datasource": "$loki_datasource",
"id": 17, "id": 21,
"span": 12, "span": 12,
"targets": [ "targets": [
{ {

@ -25,20 +25,42 @@ local utils = import 'mixin-utils/utils.libsonnet';
) )
.addRow( .addRow(
$.row('Compact and Mark') $.row('Compaction')
.addPanel( .addPanel(
$.fromNowPanel('Last Compact and Mark Operation Success', 'loki_boltdb_shipper_compact_tables_operation_last_successful_run_timestamp_seconds') $.fromNowPanel('Last Compact Tables Operation Success', 'loki_boltdb_shipper_compact_tables_operation_last_successful_run_timestamp_seconds')
) )
.addPanel( .addPanel(
$.panel('Compact and Mark Operations Duration') + $.panel('Compact Tables Operations Duration') +
$.queryPanel(['loki_boltdb_shipper_compact_tables_operation_duration_seconds{%s}' % $.namespaceMatcher()], ['duration']) + $.queryPanel(['loki_boltdb_shipper_compact_tables_operation_duration_seconds{%s}' % $.namespaceMatcher()], ['duration']) +
{ yaxes: $.yaxes('s') }, { yaxes: $.yaxes('s') },
) )
)
.addRow(
$.row('')
.addPanel( .addPanel(
$.panel('Compact and Mark Operations Per Status') + $.panel('Number of times Tables were skipped during Compaction') +
$.queryPanel(['sum(increase(loki_compactor_skipped_compacting_locked_table_total{%s}[$__range]))' % $.namespaceMatcher()], ['{{table_name}}']),
)
.addPanel(
$.panel('Compact Tables Operations Per Status') +
$.queryPanel(['sum by (status)(rate(loki_boltdb_shipper_compact_tables_operation_total{%s}[$__rate_interval]))' % $.namespaceMatcher()], ['{{success}}']), $.queryPanel(['sum by (status)(rate(loki_boltdb_shipper_compact_tables_operation_total{%s}[$__rate_interval]))' % $.namespaceMatcher()], ['{{success}}']),
) )
) )
.addRow(
$.row('Retention')
.addPanel(
$.fromNowPanel('Last Mark Operation Success', 'loki_compactor_apply_retention_last_successful_run_timestamp_seconds')
)
.addPanel(
$.panel('Mark Operations Duration') +
$.queryPanel(['loki_compactor_apply_retention_operation_duration_seconds{%s}' % $.namespaceMatcher()], ['duration']) +
{ yaxes: $.yaxes('s') },
)
.addPanel(
$.panel('Mark Operations Per Status') +
$.queryPanel(['sum by (status)(rate(loki_compactor_apply_retention_operation_total{%s}[$__rate_interval]))' % $.namespaceMatcher()], ['{{success}}']),
)
)
.addRow( .addRow(
$.row('Per Table Marker') $.row('Per Table Marker')
.addPanel( .addPanel(

Loading…
Cancel
Save