Import Canary dashboard into Loki mixin. (#7304)

Summary:
This imports our canary into the Loki mixin so it can be used by the
Helm chart. The dashboard is disabled by default. It can be enabled by
setting

```jsonnet
{
  _config+:: {
    canary+: {
      enabled: true,
    },
  },
}
```

Co-authored-by: Vladyslav Diachenko <82767850+vlad-diachenko@users.noreply.github.com>
pull/7376/head
Karsten Jeschkies 3 years ago committed by GitHub
parent a7c7f075e2
commit 6d495a393e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 1
      production/loki-mixin/dashboards.libsonnet
  2. 151
      production/loki-mixin/dashboards/loki-canary-dashboard.libsonnet
  3. 9
      production/loki-mixin/jsonnetfile.json
  4. 10
      production/loki-mixin/jsonnetfile.lock.json

@ -8,4 +8,5 @@
(import 'dashboards/loki-writes-resources.libsonnet') + (import 'dashboards/loki-writes-resources.libsonnet') +
(import 'dashboards/loki-reads-resources.libsonnet') + (import 'dashboards/loki-reads-resources.libsonnet') +
(import 'dashboards/loki-deletion.libsonnet') + (import 'dashboards/loki-deletion.libsonnet') +
(import 'dashboards/loki-canary-dashboard.libsonnet') +
(import 'dashboards/recording-rules.libsonnet') (import 'dashboards/recording-rules.libsonnet')

@ -0,0 +1,151 @@
local vendor_config = import 'github.com/grafana/mimir/operations/mimir-mixin/config.libsonnet';
local vendor_utils = import 'github.com/grafana/mimir/operations/mimir-mixin/dashboards/dashboard-utils.libsonnet';
local g = import 'grafana-builder/grafana.libsonnet';
local grafana = import 'grafonnet/grafana.libsonnet';
{
_config+:: {
canary+: {
enabled: false,
},
},
grafanaDashboards+: if !$._config.canary.enabled then {} else {
local dashboard = (
vendor_utils {
_config:: vendor_config._config + $._config {
product: 'Loki',
dashboard_prefix: 'Loki / ',
tags: ['loki'],
},
}
),
'loki-canary.json':
// The dashboard() function automatically adds the "Loki / " prefix to the dashboard title.
// This logic is inherited from mimir-mixin.
dashboard.dashboard('Canary')
// We can't make use of simplified template selectors from the loki dashboard utils until we port the cortex dashboard utils panel/grid functionality.
.addTemplate('cluster', 'loki_build_info', 'cluster')
.addTemplate('namespace', 'loki_build_info{cluster=~"$cluster"}', 'namespace')
+ {
// This dashboard uses the new grid system in order to place panels (using gridPos).
// Because of this we can't use the mixin's addRow() and addPanel().
schemaVersion: 27,
rows: null,
// ugly hack, copy pasta the tag/link
// code from the loki-mixin
tags: ['loki'],
links: [
{
asDropdown: true,
icon: 'external link',
includeVars: true,
keepTime: true,
tags: $._config.tags,
targetBlank: false,
title: 'Loki Dashboards',
type: 'dashboards',
},
],
panels: [
// grid row 1
dashboard.panel('Canary Entries Total') +
dashboard.newStatPanel('sum(count(loki_canary_entries_total{cluster=~"$cluster",namespace=~"$namespace"}))', unit='short') +
{ gridPos: { h: 4, w: 3, x: 0, y: 0 } },
dashboard.panel('Canary Logs Total') +
dashboard.newStatPanel('sum(increase(loki_canary_entries_total{cluster=~"$cluster",namespace=~"$namespace"}[$__range]))', unit='short') +
{ gridPos: { h: 4, w: 3, x: 3, y: 0 } },
dashboard.panel('Missing') +
dashboard.newStatPanel('sum(increase(loki_canary_missing_entries_total{cluster=~"$cluster",namespace=~"$namespace"}[$__range]))', unit='short') +
{ gridPos: { h: 4, w: 3, x: 6, y: 0 } },
dashboard.panel('Spotcheck Missing') +
dashboard.newStatPanel('sum(increase(loki_canary_spot_check_missing_entries_total{cluster=~"$cluster",namespace=~"$namespace"}[$__range]))', unit='short') +
{ gridPos: { h: 4, w: 3, x: 9, y: 0 } },
// grid row 2
dashboard.panel('Spotcheck Total') +
dashboard.newStatPanel('sum(increase(loki_canary_spot_check_entries_total{cluster=~"$cluster",namespace=~"$namespace"}[$__range]))', unit='short') +
{ gridPos: { h: 4, w: 3, x: 0, y: 4 } },
dashboard.panel('Metric Test Error %') +
dashboard.newStatPanel('((sum(loki_canary_metric_test_expected{cluster=~"$cluster",namespace=~"$namespace"}) - sum(loki_canary_metric_test_actual{cluster=~"$cluster",namespace=~"$namespace"}))/(sum(loki_canary_metric_test_actual{cluster=~"$cluster",namespace=~"$namespace"}))) * 100') +
{ gridPos: { h: 4, w: 3, x: 3, y: 4 } },
dashboard.panel('Missing %') +
dashboard.newStatPanel('(sum(increase(loki_canary_missing_entries_total{cluster=~"$cluster",namespace=~"$namespace"}[$__range]))/sum(increase(loki_canary_entries_total{cluster=~"$cluster",namespace=~"$namespace"}[$__range])))*100') +
{ gridPos: { h: 4, w: 3, x: 6, y: 4 } },
dashboard.panel('Spotcheck Missing %') +
dashboard.newStatPanel('(sum(increase(loki_canary_spot_check_missing_entries_total{cluster=~"$cluster",namespace=~"$namespace"}[$__range]))/sum(increase(loki_canary_spot_check_entries_total{cluster=~"$cluster",namespace=~"$namespace"}[$__range]))) * 100') +
{ gridPos: { h: 4, w: 3, x: 9, y: 4 } },
// grid row 3
dashboard.panel('Metric Test Expected') +
dashboard.newStatPanel('sum(loki_canary_metric_test_expected{cluster=~"$cluster",namespace=~"$namespace"})', unit='short') +
{ gridPos: { h: 4, w: 3, x: 0, y: 8 } },
dashboard.panel('Metric Test Actual') +
dashboard.newStatPanel('sum(loki_canary_metric_test_actual{cluster=~"$cluster",namespace=~"$namespace"})', unit='short') +
{ gridPos: { h: 4, w: 3, x: 3, y: 8 } },
dashboard.panel('Websocket Missing') +
dashboard.newStatPanel('sum(increase(loki_canary_websocket_missing_entries_total{cluster=~"$cluster",namespace=~"$namespace"}[$__range]))', unit='short') +
{ gridPos: { h: 4, w: 3, x: 6, y: 8 } },
dashboard.panel('Websocket Missing %') +
dashboard.newStatPanel('(sum(increase(loki_canary_websocket_missing_entries_total{cluster=~"$cluster",namespace=~"$namespace"}[$__range]))/sum(increase(loki_canary_entries_total{cluster=~"$cluster",namespace=~"$namespace"}[$__range])))*100') +
{ gridPos: { h: 4, w: 3, x: 9, y: 8 } },
// end of grid
dashboard.panel('Log Write to read Latency Percentiles') +
dashboard.queryPanel([
'histogram_quantile(0.95, sum(rate(loki_canary_response_latency_seconds_bucket{cluster=~"$cluster",namespace=~"$namespace"}[$__rate_interval])) by (le))',
'histogram_quantile(0.50, sum(rate(loki_canary_response_latency_seconds_bucket{cluster=~"$cluster",namespace=~"$namespace"}[$__rate_interval])) by (le))',
], ['p95', 'p50']) +
{ gridPos: { h: 6, w: 12, x: 12, y: 0 } },
grafana.heatmapPanel.new(
'Log Write to Read Latency',
datasource='$datasource',
tooltip_showHistogram=true,
color_colorScheme='interpolateReds',
legend_show=false,
).addTargets(
[
grafana.prometheus.target(
'sum(rate(loki_canary_response_latency_seconds_bucket{cluster=~"$cluster",namespace=~"$namespace"}[$__rate_interval])) by (le)',
legendFormat='{{le}}',
format='heatmap',
),
],
) +
{ gridPos: { h: 6, w: 12, x: 12, y: 12 } },
dashboard.panel('Spot Check Query') +
dashboard.queryPanel([
'histogram_quantile(0.99, sum(rate(loki_canary_spot_check_request_duration_seconds_bucket{cluster=~"$cluster",namespace=~"$namespace"}[$__rate_interval])) by (le))',
'histogram_quantile(0.50, sum(rate(loki_canary_spot_check_request_duration_seconds_bucket{cluster=~"$cluster",namespace=~"$namespace"}[$__rate_interval])) by (le))',
], ['p99', 'p95']) +
{ gridPos: { h: 6, w: 12, x: 0, y: 14 } },
dashboard.panel('Metric Test Query') +
dashboard.queryPanel([
'histogram_quantile(0.99, sum(rate(loki_canary_metric_test_request_duration_seconds_bucket{cluster=~"$cluster",namespace=~"$namespace"}[15m])) by (le))',
'histogram_quantile(0.50, sum(rate(loki_canary_metric_test_request_duration_seconds_bucket{cluster=~"$cluster",namespace=~"$namespace"}[15m])) by (le))',
], ['p99', 'p95'],) +
{ gridPos: { h: 6, w: 12, x: 12, y: 14 } },
dashboard.panel('Spot Check Missing %') +
dashboard.queryPanel('topk(20, (sum by (cluster, pod) (increase(loki_canary_spot_check_missing_entries_total{cluster=~"$cluster",namespace=~"$namespace"}[$__rate_interval]))/sum by (cluster, pod) (increase(loki_canary_spot_check_entries_total{cluster=~"$cluster",namespace=~"$namespace"}[$__rate_interval])) * 100)) > 0', '') +
{ gridPos: { h: 6, w: 12, x: 0, y: 20 } },
g.panel('Missing logs') +
g.queryPanel('topk(20,(sum by (cluster, pod)(increase(loki_canary_missing_entries_total{cluster=~"$cluster",namespace=~"$namespace"}[$__rate_interval]))/sum by (cluster, pod)(increase(loki_canary_entries_total{cluster=~"$cluster",namespace=~"$namespace"}[$__rate_interval])))*100) > 0', 'Missing {{ cluster }} {{ pod }}') +
{ gridPos: { h: 6, w: 12, x: 12, y: 20 } },
],
},
},
}

@ -28,6 +28,15 @@
}, },
"version": "master" "version": "master"
}, },
{
"source": {
"git": {
"remote": "https://github.com/grafana/mimir.git",
"subdir": "operations/mimir-mixin"
}
},
"version": "main"
},
{ {
"source": { "source": {
"git": { "git": {

@ -31,6 +31,16 @@
"version": "3f71e00a64810075b5d5f969cc6d0e419cbdebc4", "version": "3f71e00a64810075b5d5f969cc6d0e419cbdebc4",
"sum": "v6fuqqQp9rHZbsxN9o79QzOpUlwYZEJ84DxTCZMCYeU=" "sum": "v6fuqqQp9rHZbsxN9o79QzOpUlwYZEJ84DxTCZMCYeU="
}, },
{
"source": {
"git": {
"remote": "https://github.com/grafana/mimir.git",
"subdir": "operations/mimir-mixin"
}
},
"version": "91986521f324c84a9cf869529bd901f077ddf8bc",
"sum": "eBp1Oo3j0YiI5hv9YrZb0lJQxEOC17rP3pZiKM/R3Zo="
},
{ {
"source": { "source": {
"git": { "git": {

Loading…
Cancel
Save