diff --git a/production/loki-mixin/dashboards.libsonnet b/production/loki-mixin/dashboards.libsonnet index e4d2b7c531..2f3e5fb9c5 100644 --- a/production/loki-mixin/dashboards.libsonnet +++ b/production/loki-mixin/dashboards.libsonnet @@ -1,4 +1,5 @@ (import 'config.libsonnet') + +(import 'dashboards/loki-retention.libsonnet') + (import 'dashboards/loki-chunks.libsonnet') + (import 'dashboards/loki-logs.libsonnet') + (import 'dashboards/loki-operational.libsonnet') + @@ -6,4 +7,3 @@ (import 'dashboards/loki-writes.libsonnet') + (import 'dashboards/loki-writes-resources.libsonnet') + (import 'dashboards/loki-reads-resources.libsonnet') - diff --git a/production/loki-mixin/dashboards/dashboard-utils.libsonnet b/production/loki-mixin/dashboards/dashboard-utils.libsonnet index 0a04634a62..33434d73da 100644 --- a/production/loki-mixin/dashboards/dashboard-utils.libsonnet +++ b/production/loki-mixin/dashboards/dashboard-utils.libsonnet @@ -4,13 +4,28 @@ local utils = import 'mixin-utils/utils.libsonnet'; // Override the dashboard constructor to add: // - default tags, // - some links that propagate the selectred cluster. - dashboard(title):: - super.dashboard(title) + { + dashboard(title, uid=''):: + super.dashboard(title, uid) + { addRowIf(condition, row):: if condition then self.addRow(row) else self, - + addLog(name='logs'):: self { + templating+: { + list+: [ + { + hide: 0, + label: null, + name: name, + options: [], + query: 'loki', + refresh: 1, + regex: '', + type: 'datasource', + }, + ], + }, + }, addClusterSelectorTemplates(multi=true):: local d = self { tags: $._config.tags, @@ -30,11 +45,10 @@ local utils = import 'mixin-utils/utils.libsonnet'; if multi then d.addMultiTemplate('cluster', 'loki_build_info', 'cluster') - .addMultiTemplate('namespace', 'loki_build_info', 'namespace') + .addMultiTemplate('namespace', 'loki_build_info', 'namespace') else d.addTemplate('cluster', 'loki_build_info', 'cluster') - .addTemplate('namespace', 'loki_build_info', 'namespace'), - + .addTemplate('namespace', 'loki_build_info', 'namespace'), }, jobMatcher(job):: @@ -42,7 +56,66 @@ local utils = import 'mixin-utils/utils.libsonnet'; namespaceMatcher():: 'cluster=~"$cluster", namespace=~"$namespace"', - + logPanel(title, selector, datasource='$logs'):: { + title: title, + type: 'logs', + datasource: datasource, + targets: [ + { + refId: 'A', + expr: selector, + }, + ], + }, + fromNowPanel(title, metric_name):: + $.panel(title) + + { + type: 'stat', + title: title, + fieldConfig: { + defaults: { + custom: {}, + thresholds: { + mode: 'absolute', + steps: [ + { + color: 'green', + value: null, + }, + ], + }, + color: { + mode: 'fixed', + fixedColor: 'blue', + }, + unit: 'dateTimeFromNow', + }, + }, + targets: [ + { + expr: '%s{%s} * 1e3' % [metric_name, $.namespaceMatcher()], + refId: 'A', + instant: true, + format: 'time_series', + }, + ], + options: { + reduceOptions: { + values: false, + calcs: [ + 'lastNotNull', + ], + fields: '', + }, + orientation: 'auto', + text: {}, + textMode: 'auto', + colorMode: 'value', + graphMode: 'area', + justifyMode: 'auto', + }, + datasource: '$datasource', + }, containerCPUUsagePanel(title, containerName):: $.panel(title) + $.queryPanel([ @@ -95,4 +168,4 @@ local utils = import 'mixin-utils/utils.libsonnet'; ||| ignoring(%s) group_right() (label_replace(count by(%s, %s, device) (container_fs_writes_bytes_total{%s,container="%s",device!~".*sda.*"}), "device", "$1", "device", "/dev/(.*)") * 0) ||| % [$._config.per_instance_label, $._config.per_node_label, $._config.per_instance_label, $.namespaceMatcher(), containerName], -} \ No newline at end of file +} diff --git a/production/loki-mixin/dashboards/loki-retention.libsonnet b/production/loki-mixin/dashboards/loki-retention.libsonnet new file mode 100644 index 0000000000..beaa6bd863 --- /dev/null +++ b/production/loki-mixin/dashboards/loki-retention.libsonnet @@ -0,0 +1,97 @@ +local g = import 'grafana-builder/grafana.libsonnet'; +local utils = import 'mixin-utils/utils.libsonnet'; + +(import 'dashboard-utils.libsonnet') { + grafanaDashboards+:: + { + 'loki-retention.json': + ($.dashboard('Loki / Retention', uid='retention')) + .addClusterSelectorTemplates(false) + .addLog() + .addRow( + $.row('Ressource Usage') + .addPanel( + $.containerCPUUsagePanel('CPU', 'compactor'), + ) + .addPanel( + $.containerMemoryWorkingSetPanel('Memory (workingset)', 'compactor'), + ) + .addPanel( + $.goHeapInUsePanel('Memory (go heap inuse)', 'compactor'), + ) + + ) + .addRow( + $.row('Compact and Mark') + .addPanel( + $.fromNowPanel('Last Compact and Mark Operation Success', 'loki_boltdb_shipper_compact_tables_operation_last_successful_run_timestamp_seconds') + ) + .addPanel( + $.panel('Compact and Mark Operations Duration') + + $.queryPanel(['loki_boltdb_shipper_compact_tables_operation_duration_seconds{%s}' % $.namespaceMatcher()], ['duration']) + + { yaxes: $.yaxes('s') }, + ) + .addPanel( + $.panel('Compact and Mark Operations Per Status') + + $.queryPanel(['sum by (status)(rate(loki_boltdb_shipper_compact_tables_operation_total{%s}[$__rate_interval]))' % $.namespaceMatcher()], ['{{success}}']), + ) + ) + .addRow( + $.row('Per Table Marker') + .addPanel( + $.panel('Processed Tables Per Action') + + $.queryPanel(['count by(action)(loki_boltdb_shipper_retention_marker_table_processed_total{%s})' % $.namespaceMatcher()], ['{{action}}']) + $.stack, + ) + .addPanel( + $.panel('Modified Tables') + + $.queryPanel(['count by(table,action)(loki_boltdb_shipper_retention_marker_table_processed_total{%s , action=~"modified|deleted"})' % $.namespaceMatcher()], ['{{table}}-{{action}}']) + $.stack, + ) + .addPanel( + $.panel('Marks Creation Rate Per Table') + + $.queryPanel(['sum by (table)(rate(loki_boltdb_shipper_retention_marker_count_total{%s}[$__rate_interval])) >0' % $.namespaceMatcher()], ['{{table}}']) + $.stack, + ) + ) + .addRow( + $.row('') + .addPanel( + $.panel('Marked Chunks (24h)') + + $.statPanel('sum (increase(loki_boltdb_shipper_retention_marker_count_total{%s}[24h]))' % $.namespaceMatcher(), 'short') + ) + .addPanel( + $.panel('Mark Table Latency') + + $.latencyPanel('loki_boltdb_shipper_retention_marker_table_processed_duration_seconds', '{%s}' % $.namespaceMatcher()) + ) + ) + .addRow( + $.row('Sweeper') + .addPanel( + $.panel('Delete Chunks (24h)') + + $.statPanel('sum (increase(loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_count{%s}[24h]))' % $.namespaceMatcher(), 'short') + ) + .addPanel( + $.panel('Delete Latency') + + $.latencyPanel('loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds', '{%s}' % $.namespaceMatcher()) + ) + ) + .addRow( + $.row('') + .addPanel( + $.fromNowPanel('Sweep Lag', 'loki_boltdb_shipper_retention_sweeper_marker_file_processing_current_time') + ) + .addPanel( + $.panel('Marks Files to Process') + + $.queryPanel(['loki_boltdb_shipper_retention_sweeper_marker_files_current{%s}' % $.namespaceMatcher()], ['count']), + ) + .addPanel( + $.panel('Delete Rate Per Status') + + $.queryPanel(['sum by (status)(rate(loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_count{%s}[$__rate_interval]))' % $.namespaceMatcher()], ['{{status}}']), + ) + ) + .addRow( + $.row('Logs') + .addPanel( + $.logPanel('Compactor Logs', '{container="compactor", %s}' % $.namespaceMatcher()), + ) + ), + }, +}