mirror of https://github.com/grafana/loki
Import Canary dashboard into Loki mixin. (#7304)
Summary: This imports our canary into the Loki mixin so it can be used by the Helm chart. The dashboard is disabled by default. It can be enabled by setting ```jsonnet { _config+:: { canary+: { enabled: true, }, }, } ``` Co-authored-by: Vladyslav Diachenko <82767850+vlad-diachenko@users.noreply.github.com>pull/7376/head
parent
a7c7f075e2
commit
6d495a393e
@ -0,0 +1,151 @@ |
|||||||
|
local vendor_config = import 'github.com/grafana/mimir/operations/mimir-mixin/config.libsonnet'; |
||||||
|
local vendor_utils = import 'github.com/grafana/mimir/operations/mimir-mixin/dashboards/dashboard-utils.libsonnet'; |
||||||
|
local g = import 'grafana-builder/grafana.libsonnet'; |
||||||
|
local grafana = import 'grafonnet/grafana.libsonnet'; |
||||||
|
|
||||||
|
{ |
||||||
|
_config+:: { |
||||||
|
canary+: { |
||||||
|
enabled: false, |
||||||
|
}, |
||||||
|
}, |
||||||
|
grafanaDashboards+: if !$._config.canary.enabled then {} else { |
||||||
|
local dashboard = ( |
||||||
|
vendor_utils { |
||||||
|
_config:: vendor_config._config + $._config { |
||||||
|
product: 'Loki', |
||||||
|
dashboard_prefix: 'Loki / ', |
||||||
|
tags: ['loki'], |
||||||
|
}, |
||||||
|
} |
||||||
|
), |
||||||
|
'loki-canary.json': |
||||||
|
// The dashboard() function automatically adds the "Loki / " prefix to the dashboard title. |
||||||
|
// This logic is inherited from mimir-mixin. |
||||||
|
dashboard.dashboard('Canary') |
||||||
|
// We can't make use of simplified template selectors from the loki dashboard utils until we port the cortex dashboard utils panel/grid functionality. |
||||||
|
.addTemplate('cluster', 'loki_build_info', 'cluster') |
||||||
|
.addTemplate('namespace', 'loki_build_info{cluster=~"$cluster"}', 'namespace') |
||||||
|
+ { |
||||||
|
// This dashboard uses the new grid system in order to place panels (using gridPos). |
||||||
|
// Because of this we can't use the mixin's addRow() and addPanel(). |
||||||
|
schemaVersion: 27, |
||||||
|
rows: null, |
||||||
|
// ugly hack, copy pasta the tag/link |
||||||
|
// code from the loki-mixin |
||||||
|
tags: ['loki'], |
||||||
|
links: [ |
||||||
|
{ |
||||||
|
asDropdown: true, |
||||||
|
icon: 'external link', |
||||||
|
includeVars: true, |
||||||
|
keepTime: true, |
||||||
|
tags: $._config.tags, |
||||||
|
targetBlank: false, |
||||||
|
title: 'Loki Dashboards', |
||||||
|
type: 'dashboards', |
||||||
|
}, |
||||||
|
], |
||||||
|
panels: [ |
||||||
|
// grid row 1 |
||||||
|
dashboard.panel('Canary Entries Total') + |
||||||
|
dashboard.newStatPanel('sum(count(loki_canary_entries_total{cluster=~"$cluster",namespace=~"$namespace"}))', unit='short') + |
||||||
|
{ gridPos: { h: 4, w: 3, x: 0, y: 0 } }, |
||||||
|
|
||||||
|
dashboard.panel('Canary Logs Total') + |
||||||
|
dashboard.newStatPanel('sum(increase(loki_canary_entries_total{cluster=~"$cluster",namespace=~"$namespace"}[$__range]))', unit='short') + |
||||||
|
{ gridPos: { h: 4, w: 3, x: 3, y: 0 } }, |
||||||
|
|
||||||
|
dashboard.panel('Missing') + |
||||||
|
dashboard.newStatPanel('sum(increase(loki_canary_missing_entries_total{cluster=~"$cluster",namespace=~"$namespace"}[$__range]))', unit='short') + |
||||||
|
{ gridPos: { h: 4, w: 3, x: 6, y: 0 } }, |
||||||
|
|
||||||
|
dashboard.panel('Spotcheck Missing') + |
||||||
|
dashboard.newStatPanel('sum(increase(loki_canary_spot_check_missing_entries_total{cluster=~"$cluster",namespace=~"$namespace"}[$__range]))', unit='short') + |
||||||
|
{ gridPos: { h: 4, w: 3, x: 9, y: 0 } }, |
||||||
|
|
||||||
|
// grid row 2 |
||||||
|
dashboard.panel('Spotcheck Total') + |
||||||
|
dashboard.newStatPanel('sum(increase(loki_canary_spot_check_entries_total{cluster=~"$cluster",namespace=~"$namespace"}[$__range]))', unit='short') + |
||||||
|
{ gridPos: { h: 4, w: 3, x: 0, y: 4 } }, |
||||||
|
|
||||||
|
dashboard.panel('Metric Test Error %') + |
||||||
|
dashboard.newStatPanel('((sum(loki_canary_metric_test_expected{cluster=~"$cluster",namespace=~"$namespace"}) - sum(loki_canary_metric_test_actual{cluster=~"$cluster",namespace=~"$namespace"}))/(sum(loki_canary_metric_test_actual{cluster=~"$cluster",namespace=~"$namespace"}))) * 100') + |
||||||
|
{ gridPos: { h: 4, w: 3, x: 3, y: 4 } }, |
||||||
|
|
||||||
|
dashboard.panel('Missing %') + |
||||||
|
dashboard.newStatPanel('(sum(increase(loki_canary_missing_entries_total{cluster=~"$cluster",namespace=~"$namespace"}[$__range]))/sum(increase(loki_canary_entries_total{cluster=~"$cluster",namespace=~"$namespace"}[$__range])))*100') + |
||||||
|
{ gridPos: { h: 4, w: 3, x: 6, y: 4 } }, |
||||||
|
|
||||||
|
dashboard.panel('Spotcheck Missing %') + |
||||||
|
dashboard.newStatPanel('(sum(increase(loki_canary_spot_check_missing_entries_total{cluster=~"$cluster",namespace=~"$namespace"}[$__range]))/sum(increase(loki_canary_spot_check_entries_total{cluster=~"$cluster",namespace=~"$namespace"}[$__range]))) * 100') + |
||||||
|
{ gridPos: { h: 4, w: 3, x: 9, y: 4 } }, |
||||||
|
|
||||||
|
// grid row 3 |
||||||
|
dashboard.panel('Metric Test Expected') + |
||||||
|
dashboard.newStatPanel('sum(loki_canary_metric_test_expected{cluster=~"$cluster",namespace=~"$namespace"})', unit='short') + |
||||||
|
{ gridPos: { h: 4, w: 3, x: 0, y: 8 } }, |
||||||
|
|
||||||
|
dashboard.panel('Metric Test Actual') + |
||||||
|
dashboard.newStatPanel('sum(loki_canary_metric_test_actual{cluster=~"$cluster",namespace=~"$namespace"})', unit='short') + |
||||||
|
{ gridPos: { h: 4, w: 3, x: 3, y: 8 } }, |
||||||
|
|
||||||
|
dashboard.panel('Websocket Missing') + |
||||||
|
dashboard.newStatPanel('sum(increase(loki_canary_websocket_missing_entries_total{cluster=~"$cluster",namespace=~"$namespace"}[$__range]))', unit='short') + |
||||||
|
{ gridPos: { h: 4, w: 3, x: 6, y: 8 } }, |
||||||
|
|
||||||
|
dashboard.panel('Websocket Missing %') + |
||||||
|
dashboard.newStatPanel('(sum(increase(loki_canary_websocket_missing_entries_total{cluster=~"$cluster",namespace=~"$namespace"}[$__range]))/sum(increase(loki_canary_entries_total{cluster=~"$cluster",namespace=~"$namespace"}[$__range])))*100') + |
||||||
|
{ gridPos: { h: 4, w: 3, x: 9, y: 8 } }, |
||||||
|
// end of grid |
||||||
|
|
||||||
|
dashboard.panel('Log Write to read Latency Percentiles') + |
||||||
|
dashboard.queryPanel([ |
||||||
|
'histogram_quantile(0.95, sum(rate(loki_canary_response_latency_seconds_bucket{cluster=~"$cluster",namespace=~"$namespace"}[$__rate_interval])) by (le))', |
||||||
|
'histogram_quantile(0.50, sum(rate(loki_canary_response_latency_seconds_bucket{cluster=~"$cluster",namespace=~"$namespace"}[$__rate_interval])) by (le))', |
||||||
|
], ['p95', 'p50']) + |
||||||
|
{ gridPos: { h: 6, w: 12, x: 12, y: 0 } }, |
||||||
|
|
||||||
|
grafana.heatmapPanel.new( |
||||||
|
'Log Write to Read Latency', |
||||||
|
datasource='$datasource', |
||||||
|
tooltip_showHistogram=true, |
||||||
|
color_colorScheme='interpolateReds', |
||||||
|
legend_show=false, |
||||||
|
).addTargets( |
||||||
|
[ |
||||||
|
grafana.prometheus.target( |
||||||
|
'sum(rate(loki_canary_response_latency_seconds_bucket{cluster=~"$cluster",namespace=~"$namespace"}[$__rate_interval])) by (le)', |
||||||
|
legendFormat='{{le}}', |
||||||
|
format='heatmap', |
||||||
|
), |
||||||
|
], |
||||||
|
) + |
||||||
|
{ gridPos: { h: 6, w: 12, x: 12, y: 12 } }, |
||||||
|
|
||||||
|
dashboard.panel('Spot Check Query') + |
||||||
|
dashboard.queryPanel([ |
||||||
|
'histogram_quantile(0.99, sum(rate(loki_canary_spot_check_request_duration_seconds_bucket{cluster=~"$cluster",namespace=~"$namespace"}[$__rate_interval])) by (le))', |
||||||
|
'histogram_quantile(0.50, sum(rate(loki_canary_spot_check_request_duration_seconds_bucket{cluster=~"$cluster",namespace=~"$namespace"}[$__rate_interval])) by (le))', |
||||||
|
], ['p99', 'p95']) + |
||||||
|
{ gridPos: { h: 6, w: 12, x: 0, y: 14 } }, |
||||||
|
|
||||||
|
dashboard.panel('Metric Test Query') + |
||||||
|
dashboard.queryPanel([ |
||||||
|
'histogram_quantile(0.99, sum(rate(loki_canary_metric_test_request_duration_seconds_bucket{cluster=~"$cluster",namespace=~"$namespace"}[15m])) by (le))', |
||||||
|
'histogram_quantile(0.50, sum(rate(loki_canary_metric_test_request_duration_seconds_bucket{cluster=~"$cluster",namespace=~"$namespace"}[15m])) by (le))', |
||||||
|
], ['p99', 'p95'],) + |
||||||
|
{ gridPos: { h: 6, w: 12, x: 12, y: 14 } }, |
||||||
|
|
||||||
|
dashboard.panel('Spot Check Missing %') + |
||||||
|
dashboard.queryPanel('topk(20, (sum by (cluster, pod) (increase(loki_canary_spot_check_missing_entries_total{cluster=~"$cluster",namespace=~"$namespace"}[$__rate_interval]))/sum by (cluster, pod) (increase(loki_canary_spot_check_entries_total{cluster=~"$cluster",namespace=~"$namespace"}[$__rate_interval])) * 100)) > 0', '') + |
||||||
|
{ gridPos: { h: 6, w: 12, x: 0, y: 20 } }, |
||||||
|
|
||||||
|
g.panel('Missing logs') + |
||||||
|
g.queryPanel('topk(20,(sum by (cluster, pod)(increase(loki_canary_missing_entries_total{cluster=~"$cluster",namespace=~"$namespace"}[$__rate_interval]))/sum by (cluster, pod)(increase(loki_canary_entries_total{cluster=~"$cluster",namespace=~"$namespace"}[$__rate_interval])))*100) > 0', 'Missing {{ cluster }} {{ pod }}') + |
||||||
|
{ gridPos: { h: 6, w: 12, x: 12, y: 20 } }, |
||||||
|
|
||||||
|
], |
||||||
|
}, |
||||||
|
}, |
||||||
|
} |
Loading…
Reference in new issue