[Mixin] Update clusterlabel usage (#8247)

Signed-off-by: Whyeasy <Whyeasy@users.noreply.github.com>
pull/8402/head
Charlie N 2 years ago committed by GitHub
parent 027c2ef957
commit 340f62ac7a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 4
      CHANGELOG.md
  2. 2
      production/loki-mixin-compiled-ssd/alerts.yaml
  3. 6
      production/loki-mixin-compiled-ssd/dashboards/loki-reads.json
  4. 6
      production/loki-mixin-compiled-ssd/dashboards/loki-writes.json
  5. 84
      production/loki-mixin-compiled-ssd/rules.yaml
  6. 2
      production/loki-mixin-compiled/alerts.yaml
  7. 24
      production/loki-mixin-compiled/dashboards/loki-reads.json
  8. 18
      production/loki-mixin-compiled/dashboards/loki-writes.json
  9. 84
      production/loki-mixin-compiled/rules.yaml
  10. 4
      production/loki-mixin/alerts.libsonnet
  11. 17
      production/loki-mixin/dashboards/loki-reads.libsonnet
  12. 14
      production/loki-mixin/dashboards/loki-writes.libsonnet
  13. 6
      production/loki-mixin/recording_rules.libsonnet

@ -76,6 +76,10 @@
#### Jsonnet
* [7923](https://github.com/grafana/loki/pull/7923) **manohar-koukuntla**: Add zone aware ingesters in jsonnet deployment
##### Fixes
* [8247](https://github.com/grafana/loki/pull/8247) **Whyeasy** fix usage of cluster label within Mixin.
#### Build
* [7938](https://github.com/grafana/loki/pull/7938) **ssncferreira**: Add DroneCI pipeline step to validate configuration flags documentation generation.

@ -26,7 +26,7 @@ groups:
message: |
{{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}s 99th percentile latency.
expr: |
namespace_job_route:loki_request_duration_seconds:99quantile{route!~"(?i).*tail.*|/schedulerpb.SchedulerForQuerier/QuerierLoop"} > 1
cluster_namespace_job_route:loki_request_duration_seconds:99quantile{route!~"(?i).*tail.*|/schedulerpb.SchedulerForQuerier/QuerierLoop"} > 1
for: 15m
labels:
severity: critical

@ -142,7 +142,7 @@
"steppedLine": false,
"targets": [
{
"expr": "histogram_quantile(0.99, sum by (le,route) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\", cluster=~\"$cluster\"})) * 1e3",
"expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"})) * 1e3",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{ route }} 99th Percentile",
@ -150,7 +150,7 @@
"step": 10
},
{
"expr": "histogram_quantile(0.50, sum by (le,route) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\", cluster=~\"$cluster\"})) * 1e3",
"expr": "histogram_quantile(0.50, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"})) * 1e3",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{ route }} 50th Percentile",
@ -158,7 +158,7 @@
"step": 10
},
{
"expr": "1e3 * sum(job_route:loki_request_duration_seconds_sum:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\", cluster=~\"$cluster\"}) by (route) / sum(job_route:loki_request_duration_seconds_count:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\", cluster=~\"$cluster\"}) by (route) ",
"expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"}) by (route) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"}) by (route) ",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{ route }} Average",

@ -142,7 +142,7 @@
"steppedLine": false,
"targets": [
{
"expr": "histogram_quantile(0.99, sum by (le) (job:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", cluster=~\"$cluster\"})) * 1e3",
"expr": "histogram_quantile(0.99, sum by (le) (cluster_job:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-write\"})) * 1e3",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "99th Percentile",
@ -150,7 +150,7 @@
"step": 10
},
{
"expr": "histogram_quantile(0.50, sum by (le) (job:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", cluster=~\"$cluster\"})) * 1e3",
"expr": "histogram_quantile(0.50, sum by (le) (cluster_job:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-write\"})) * 1e3",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "50th Percentile",
@ -158,7 +158,7 @@
"step": 10
},
{
"expr": "1e3 * sum(job:loki_request_duration_seconds_sum:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", cluster=~\"$cluster\"}) / sum(job:loki_request_duration_seconds_count:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", cluster=~\"$cluster\"})",
"expr": "1e3 * sum(cluster_job:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-write\"}) / sum(cluster_job:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-write\"})",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "Average",

@ -2,48 +2,52 @@ groups:
- name: loki_rules
rules:
- expr: histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket[1m]))
by (le, job))
record: job:loki_request_duration_seconds:99quantile
by (le, cluster, job))
record: cluster_job:loki_request_duration_seconds:99quantile
- expr: histogram_quantile(0.50, sum(rate(loki_request_duration_seconds_bucket[1m]))
by (le, job))
record: job:loki_request_duration_seconds:50quantile
- expr: sum(rate(loki_request_duration_seconds_sum[1m])) by (job) / sum(rate(loki_request_duration_seconds_count[1m]))
by (job)
record: job:loki_request_duration_seconds:avg
- expr: sum(rate(loki_request_duration_seconds_bucket[1m])) by (le, job)
record: job:loki_request_duration_seconds_bucket:sum_rate
- expr: sum(rate(loki_request_duration_seconds_sum[1m])) by (job)
record: job:loki_request_duration_seconds_sum:sum_rate
- expr: sum(rate(loki_request_duration_seconds_count[1m])) by (job)
record: job:loki_request_duration_seconds_count:sum_rate
by (le, cluster, job))
record: cluster_job:loki_request_duration_seconds:50quantile
- expr: sum(rate(loki_request_duration_seconds_sum[1m])) by (cluster, job) / sum(rate(loki_request_duration_seconds_count[1m]))
by (cluster, job)
record: cluster_job:loki_request_duration_seconds:avg
- expr: sum(rate(loki_request_duration_seconds_bucket[1m])) by (le, cluster, job)
record: cluster_job:loki_request_duration_seconds_bucket:sum_rate
- expr: sum(rate(loki_request_duration_seconds_sum[1m])) by (cluster, job)
record: cluster_job:loki_request_duration_seconds_sum:sum_rate
- expr: sum(rate(loki_request_duration_seconds_count[1m])) by (cluster, job)
record: cluster_job:loki_request_duration_seconds_count:sum_rate
- expr: histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket[1m]))
by (le, job, route))
record: job_route:loki_request_duration_seconds:99quantile
by (le, cluster, job, route))
record: cluster_job_route:loki_request_duration_seconds:99quantile
- expr: histogram_quantile(0.50, sum(rate(loki_request_duration_seconds_bucket[1m]))
by (le, job, route))
record: job_route:loki_request_duration_seconds:50quantile
- expr: sum(rate(loki_request_duration_seconds_sum[1m])) by (job, route) / sum(rate(loki_request_duration_seconds_count[1m]))
by (job, route)
record: job_route:loki_request_duration_seconds:avg
- expr: sum(rate(loki_request_duration_seconds_bucket[1m])) by (le, job, route)
record: job_route:loki_request_duration_seconds_bucket:sum_rate
- expr: sum(rate(loki_request_duration_seconds_sum[1m])) by (job, route)
record: job_route:loki_request_duration_seconds_sum:sum_rate
- expr: sum(rate(loki_request_duration_seconds_count[1m])) by (job, route)
record: job_route:loki_request_duration_seconds_count:sum_rate
by (le, cluster, job, route))
record: cluster_job_route:loki_request_duration_seconds:50quantile
- expr: sum(rate(loki_request_duration_seconds_sum[1m])) by (cluster, job, route)
/ sum(rate(loki_request_duration_seconds_count[1m])) by (cluster, job, route)
record: cluster_job_route:loki_request_duration_seconds:avg
- expr: sum(rate(loki_request_duration_seconds_bucket[1m])) by (le, cluster, job,
route)
record: cluster_job_route:loki_request_duration_seconds_bucket:sum_rate
- expr: sum(rate(loki_request_duration_seconds_sum[1m])) by (cluster, job, route)
record: cluster_job_route:loki_request_duration_seconds_sum:sum_rate
- expr: sum(rate(loki_request_duration_seconds_count[1m])) by (cluster, job, route)
record: cluster_job_route:loki_request_duration_seconds_count:sum_rate
- expr: histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket[1m]))
by (le, namespace, job, route))
record: namespace_job_route:loki_request_duration_seconds:99quantile
by (le, cluster, namespace, job, route))
record: cluster_namespace_job_route:loki_request_duration_seconds:99quantile
- expr: histogram_quantile(0.50, sum(rate(loki_request_duration_seconds_bucket[1m]))
by (le, namespace, job, route))
record: namespace_job_route:loki_request_duration_seconds:50quantile
- expr: sum(rate(loki_request_duration_seconds_sum[1m])) by (namespace, job, route)
/ sum(rate(loki_request_duration_seconds_count[1m])) by (namespace, job, route)
record: namespace_job_route:loki_request_duration_seconds:avg
- expr: sum(rate(loki_request_duration_seconds_bucket[1m])) by (le, namespace, job,
route)
record: namespace_job_route:loki_request_duration_seconds_bucket:sum_rate
- expr: sum(rate(loki_request_duration_seconds_sum[1m])) by (namespace, job, route)
record: namespace_job_route:loki_request_duration_seconds_sum:sum_rate
- expr: sum(rate(loki_request_duration_seconds_count[1m])) by (namespace, job, route)
record: namespace_job_route:loki_request_duration_seconds_count:sum_rate
by (le, cluster, namespace, job, route))
record: cluster_namespace_job_route:loki_request_duration_seconds:50quantile
- expr: sum(rate(loki_request_duration_seconds_sum[1m])) by (cluster, namespace,
job, route) / sum(rate(loki_request_duration_seconds_count[1m])) by (cluster,
namespace, job, route)
record: cluster_namespace_job_route:loki_request_duration_seconds:avg
- expr: sum(rate(loki_request_duration_seconds_bucket[1m])) by (le, cluster, namespace,
job, route)
record: cluster_namespace_job_route:loki_request_duration_seconds_bucket:sum_rate
- expr: sum(rate(loki_request_duration_seconds_sum[1m])) by (cluster, namespace,
job, route)
record: cluster_namespace_job_route:loki_request_duration_seconds_sum:sum_rate
- expr: sum(rate(loki_request_duration_seconds_count[1m])) by (cluster, namespace,
job, route)
record: cluster_namespace_job_route:loki_request_duration_seconds_count:sum_rate

@ -26,7 +26,7 @@ groups:
message: |
{{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}s 99th percentile latency.
expr: |
namespace_job_route:loki_request_duration_seconds:99quantile{route!~"(?i).*tail.*|/schedulerpb.SchedulerForQuerier/QuerierLoop"} > 1
cluster_namespace_job_route:loki_request_duration_seconds:99quantile{route!~"(?i).*tail.*|/schedulerpb.SchedulerForQuerier/QuerierLoop"} > 1
for: 15m
labels:
severity: critical

@ -142,7 +142,7 @@
"steppedLine": false,
"targets": [
{
"expr": "histogram_quantile(0.99, sum by (le,route) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/query-frontend\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\", cluster=~\"$cluster\"})) * 1e3",
"expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"})) * 1e3",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{ route }} 99th Percentile",
@ -150,7 +150,7 @@
"step": 10
},
{
"expr": "histogram_quantile(0.50, sum by (le,route) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/query-frontend\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\", cluster=~\"$cluster\"})) * 1e3",
"expr": "histogram_quantile(0.50, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"})) * 1e3",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{ route }} 50th Percentile",
@ -158,7 +158,7 @@
"step": 10
},
{
"expr": "1e3 * sum(job_route:loki_request_duration_seconds_sum:sum_rate{job=~\"($namespace)/query-frontend\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\", cluster=~\"$cluster\"}) by (route) / sum(job_route:loki_request_duration_seconds_count:sum_rate{job=~\"($namespace)/query-frontend\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\", cluster=~\"$cluster\"}) by (route) ",
"expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"}) by (route) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"}) by (route) ",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{ route }} Average",
@ -330,7 +330,7 @@
"steppedLine": false,
"targets": [
{
"expr": "histogram_quantile(0.99, sum by (le,route) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/querier\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\", cluster=~\"$cluster\"})) * 1e3",
"expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/querier\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"})) * 1e3",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{ route }} 99th Percentile",
@ -338,7 +338,7 @@
"step": 10
},
{
"expr": "histogram_quantile(0.50, sum by (le,route) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/querier\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\", cluster=~\"$cluster\"})) * 1e3",
"expr": "histogram_quantile(0.50, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/querier\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"})) * 1e3",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{ route }} 50th Percentile",
@ -346,7 +346,7 @@
"step": 10
},
{
"expr": "1e3 * sum(job_route:loki_request_duration_seconds_sum:sum_rate{job=~\"($namespace)/querier\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\", cluster=~\"$cluster\"}) by (route) / sum(job_route:loki_request_duration_seconds_count:sum_rate{job=~\"($namespace)/querier\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\", cluster=~\"$cluster\"}) by (route) ",
"expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/querier\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"}) by (route) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/querier\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"}) by (route) ",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{ route }} Average",
@ -518,7 +518,7 @@
"steppedLine": false,
"targets": [
{
"expr": "histogram_quantile(0.99, sum by (le,route) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/ingester\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\", cluster=~\"$cluster\"})) * 1e3",
"expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"})) * 1e3",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{ route }} 99th Percentile",
@ -526,7 +526,7 @@
"step": 10
},
{
"expr": "histogram_quantile(0.50, sum by (le,route) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/ingester\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\", cluster=~\"$cluster\"})) * 1e3",
"expr": "histogram_quantile(0.50, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"})) * 1e3",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{ route }} 50th Percentile",
@ -534,7 +534,7 @@
"step": 10
},
{
"expr": "1e3 * sum(job_route:loki_request_duration_seconds_sum:sum_rate{job=~\"($namespace)/ingester\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\", cluster=~\"$cluster\"}) by (route) / sum(job_route:loki_request_duration_seconds_count:sum_rate{job=~\"($namespace)/ingester\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\", cluster=~\"$cluster\"}) by (route) ",
"expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}) by (route) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}) by (route) ",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{ route }} Average",
@ -706,7 +706,7 @@
"steppedLine": false,
"targets": [
{
"expr": "histogram_quantile(0.99, sum by (le,route) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/ingester-zone.*\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\", cluster=~\"$cluster\"})) * 1e3",
"expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"})) * 1e3",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{ route }} 99th Percentile",
@ -714,7 +714,7 @@
"step": 10
},
{
"expr": "histogram_quantile(0.50, sum by (le,route) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/ingester-zone.*\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\", cluster=~\"$cluster\"})) * 1e3",
"expr": "histogram_quantile(0.50, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"})) * 1e3",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{ route }} 50th Percentile",
@ -722,7 +722,7 @@
"step": 10
},
{
"expr": "1e3 * sum(job_route:loki_request_duration_seconds_sum:sum_rate{job=~\"($namespace)/ingester-zone.*\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\", cluster=~\"$cluster\"}) by (route) / sum(job_route:loki_request_duration_seconds_count:sum_rate{job=~\"($namespace)/ingester-zone.*\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\", cluster=~\"$cluster\"}) by (route) ",
"expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}) by (route) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}) by (route) ",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{ route }} Average",

@ -142,7 +142,7 @@
"steppedLine": false,
"targets": [
{
"expr": "histogram_quantile(0.99, sum by (le) (job:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/distributor\", cluster=~\"$cluster\"})) * 1e3",
"expr": "histogram_quantile(0.99, sum by (le) (cluster_job:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/distributor\"})) * 1e3",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "99th Percentile",
@ -150,7 +150,7 @@
"step": 10
},
{
"expr": "histogram_quantile(0.50, sum by (le) (job:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/distributor\", cluster=~\"$cluster\"})) * 1e3",
"expr": "histogram_quantile(0.50, sum by (le) (cluster_job:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/distributor\"})) * 1e3",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "50th Percentile",
@ -158,7 +158,7 @@
"step": 10
},
{
"expr": "1e3 * sum(job:loki_request_duration_seconds_sum:sum_rate{job=~\"($namespace)/distributor\", cluster=~\"$cluster\"}) / sum(job:loki_request_duration_seconds_count:sum_rate{job=~\"($namespace)/distributor\", cluster=~\"$cluster\"})",
"expr": "1e3 * sum(cluster_job:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/distributor\"}) / sum(cluster_job:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/distributor\"})",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "Average",
@ -330,7 +330,7 @@
"steppedLine": false,
"targets": [
{
"expr": "histogram_quantile(0.99, sum by (le) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/ingester-zone.*\", route=\"/logproto.Pusher/Push\", cluster=~\"$cluster\"})) * 1e3",
"expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=\"/logproto.Pusher/Push\"})) * 1e3",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "99th Percentile",
@ -338,7 +338,7 @@
"step": 10
},
{
"expr": "histogram_quantile(0.50, sum by (le) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/ingester-zone.*\", route=\"/logproto.Pusher/Push\", cluster=~\"$cluster\"})) * 1e3",
"expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=\"/logproto.Pusher/Push\"})) * 1e3",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "50th Percentile",
@ -346,7 +346,7 @@
"step": 10
},
{
"expr": "1e3 * sum(job_route:loki_request_duration_seconds_sum:sum_rate{job=~\"($namespace)/ingester-zone.*\", route=\"/logproto.Pusher/Push\", cluster=~\"$cluster\"}) / sum(job_route:loki_request_duration_seconds_count:sum_rate{job=~\"($namespace)/ingester-zone.*\", route=\"/logproto.Pusher/Push\", cluster=~\"$cluster\"})",
"expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=\"/logproto.Pusher/Push\"}) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=\"/logproto.Pusher/Push\"})",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "Average",
@ -518,7 +518,7 @@
"steppedLine": false,
"targets": [
{
"expr": "histogram_quantile(0.99, sum by (le) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/ingester\", route=\"/logproto.Pusher/Push\", cluster=~\"$cluster\"})) * 1e3",
"expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=\"/logproto.Pusher/Push\"})) * 1e3",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "99th Percentile",
@ -526,7 +526,7 @@
"step": 10
},
{
"expr": "histogram_quantile(0.50, sum by (le) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/ingester\", route=\"/logproto.Pusher/Push\", cluster=~\"$cluster\"})) * 1e3",
"expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=\"/logproto.Pusher/Push\"})) * 1e3",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "50th Percentile",
@ -534,7 +534,7 @@
"step": 10
},
{
"expr": "1e3 * sum(job_route:loki_request_duration_seconds_sum:sum_rate{job=~\"($namespace)/ingester\", route=\"/logproto.Pusher/Push\", cluster=~\"$cluster\"}) / sum(job_route:loki_request_duration_seconds_count:sum_rate{job=~\"($namespace)/ingester\", route=\"/logproto.Pusher/Push\", cluster=~\"$cluster\"})",
"expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=\"/logproto.Pusher/Push\"}) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=\"/logproto.Pusher/Push\"})",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "Average",

@ -2,48 +2,52 @@ groups:
- name: loki_rules
rules:
- expr: histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket[1m]))
by (le, job))
record: job:loki_request_duration_seconds:99quantile
by (le, cluster, job))
record: cluster_job:loki_request_duration_seconds:99quantile
- expr: histogram_quantile(0.50, sum(rate(loki_request_duration_seconds_bucket[1m]))
by (le, job))
record: job:loki_request_duration_seconds:50quantile
- expr: sum(rate(loki_request_duration_seconds_sum[1m])) by (job) / sum(rate(loki_request_duration_seconds_count[1m]))
by (job)
record: job:loki_request_duration_seconds:avg
- expr: sum(rate(loki_request_duration_seconds_bucket[1m])) by (le, job)
record: job:loki_request_duration_seconds_bucket:sum_rate
- expr: sum(rate(loki_request_duration_seconds_sum[1m])) by (job)
record: job:loki_request_duration_seconds_sum:sum_rate
- expr: sum(rate(loki_request_duration_seconds_count[1m])) by (job)
record: job:loki_request_duration_seconds_count:sum_rate
by (le, cluster, job))
record: cluster_job:loki_request_duration_seconds:50quantile
- expr: sum(rate(loki_request_duration_seconds_sum[1m])) by (cluster, job) / sum(rate(loki_request_duration_seconds_count[1m]))
by (cluster, job)
record: cluster_job:loki_request_duration_seconds:avg
- expr: sum(rate(loki_request_duration_seconds_bucket[1m])) by (le, cluster, job)
record: cluster_job:loki_request_duration_seconds_bucket:sum_rate
- expr: sum(rate(loki_request_duration_seconds_sum[1m])) by (cluster, job)
record: cluster_job:loki_request_duration_seconds_sum:sum_rate
- expr: sum(rate(loki_request_duration_seconds_count[1m])) by (cluster, job)
record: cluster_job:loki_request_duration_seconds_count:sum_rate
- expr: histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket[1m]))
by (le, job, route))
record: job_route:loki_request_duration_seconds:99quantile
by (le, cluster, job, route))
record: cluster_job_route:loki_request_duration_seconds:99quantile
- expr: histogram_quantile(0.50, sum(rate(loki_request_duration_seconds_bucket[1m]))
by (le, job, route))
record: job_route:loki_request_duration_seconds:50quantile
- expr: sum(rate(loki_request_duration_seconds_sum[1m])) by (job, route) / sum(rate(loki_request_duration_seconds_count[1m]))
by (job, route)
record: job_route:loki_request_duration_seconds:avg
- expr: sum(rate(loki_request_duration_seconds_bucket[1m])) by (le, job, route)
record: job_route:loki_request_duration_seconds_bucket:sum_rate
- expr: sum(rate(loki_request_duration_seconds_sum[1m])) by (job, route)
record: job_route:loki_request_duration_seconds_sum:sum_rate
- expr: sum(rate(loki_request_duration_seconds_count[1m])) by (job, route)
record: job_route:loki_request_duration_seconds_count:sum_rate
by (le, cluster, job, route))
record: cluster_job_route:loki_request_duration_seconds:50quantile
- expr: sum(rate(loki_request_duration_seconds_sum[1m])) by (cluster, job, route)
/ sum(rate(loki_request_duration_seconds_count[1m])) by (cluster, job, route)
record: cluster_job_route:loki_request_duration_seconds:avg
- expr: sum(rate(loki_request_duration_seconds_bucket[1m])) by (le, cluster, job,
route)
record: cluster_job_route:loki_request_duration_seconds_bucket:sum_rate
- expr: sum(rate(loki_request_duration_seconds_sum[1m])) by (cluster, job, route)
record: cluster_job_route:loki_request_duration_seconds_sum:sum_rate
- expr: sum(rate(loki_request_duration_seconds_count[1m])) by (cluster, job, route)
record: cluster_job_route:loki_request_duration_seconds_count:sum_rate
- expr: histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket[1m]))
by (le, namespace, job, route))
record: namespace_job_route:loki_request_duration_seconds:99quantile
by (le, cluster, namespace, job, route))
record: cluster_namespace_job_route:loki_request_duration_seconds:99quantile
- expr: histogram_quantile(0.50, sum(rate(loki_request_duration_seconds_bucket[1m]))
by (le, namespace, job, route))
record: namespace_job_route:loki_request_duration_seconds:50quantile
- expr: sum(rate(loki_request_duration_seconds_sum[1m])) by (namespace, job, route)
/ sum(rate(loki_request_duration_seconds_count[1m])) by (namespace, job, route)
record: namespace_job_route:loki_request_duration_seconds:avg
- expr: sum(rate(loki_request_duration_seconds_bucket[1m])) by (le, namespace, job,
route)
record: namespace_job_route:loki_request_duration_seconds_bucket:sum_rate
- expr: sum(rate(loki_request_duration_seconds_sum[1m])) by (namespace, job, route)
record: namespace_job_route:loki_request_duration_seconds_sum:sum_rate
- expr: sum(rate(loki_request_duration_seconds_count[1m])) by (namespace, job, route)
record: namespace_job_route:loki_request_duration_seconds_count:sum_rate
by (le, cluster, namespace, job, route))
record: cluster_namespace_job_route:loki_request_duration_seconds:50quantile
- expr: sum(rate(loki_request_duration_seconds_sum[1m])) by (cluster, namespace,
job, route) / sum(rate(loki_request_duration_seconds_count[1m])) by (cluster,
namespace, job, route)
record: cluster_namespace_job_route:loki_request_duration_seconds:avg
- expr: sum(rate(loki_request_duration_seconds_bucket[1m])) by (le, cluster, namespace,
job, route)
record: cluster_namespace_job_route:loki_request_duration_seconds_bucket:sum_rate
- expr: sum(rate(loki_request_duration_seconds_sum[1m])) by (cluster, namespace,
job, route)
record: cluster_namespace_job_route:loki_request_duration_seconds_sum:sum_rate
- expr: sum(rate(loki_request_duration_seconds_count[1m])) by (cluster, namespace,
job, route)
record: cluster_namespace_job_route:loki_request_duration_seconds_count:sum_rate

@ -39,8 +39,8 @@
{
alert: 'LokiRequestLatency',
expr: |||
namespace_job_route:loki_request_duration_seconds:99quantile{route!~"(?i).*tail.*|/schedulerpb.SchedulerForQuerier/QuerierLoop"} > 1
|||,
%s_namespace_job_route:loki_request_duration_seconds:99quantile{route!~"(?i).*tail.*|/schedulerpb.SchedulerForQuerier/QuerierLoop"} > 1
||| % $._config.per_cluster_label,
'for': '15m',
labels: {
severity: 'critical',

@ -56,8 +56,7 @@ local utils = import 'mixin-utils/utils.libsonnet';
$.panel('Latency') +
utils.latencyRecordingRulePanel(
'loki_request_duration_seconds',
dashboards['loki-reads.json'].matchers.cortexgateway + [utils.selector.re('route', http_routes)],
extra_selectors=dashboards['loki-reads.json'].clusterMatchers,
dashboards['loki-reads.json'].clusterMatchers + dashboards['loki-reads.json'].matchers.cortexgateway + [utils.selector.re('route', http_routes)],
sum_by=['route']
)
)
@ -72,8 +71,7 @@ local utils = import 'mixin-utils/utils.libsonnet';
$.panel('Latency') +
utils.latencyRecordingRulePanel(
'loki_request_duration_seconds',
dashboards['loki-reads.json'].matchers.queryFrontend + [utils.selector.re('route', http_routes)],
extra_selectors=dashboards['loki-reads.json'].clusterMatchers,
dashboards['loki-reads.json'].clusterMatchers + dashboards['loki-reads.json'].matchers.queryFrontend + [utils.selector.re('route', http_routes)],
sum_by=['route']
)
)
@ -89,8 +87,7 @@ local utils = import 'mixin-utils/utils.libsonnet';
$.panel('Latency') +
utils.latencyRecordingRulePanel(
'loki_request_duration_seconds',
dashboards['loki-reads.json'].matchers.querier + [utils.selector.re('route', http_routes)],
extra_selectors=dashboards['loki-reads.json'].clusterMatchers,
dashboards['loki-reads.json'].clusterMatchers + dashboards['loki-reads.json'].matchers.querier + [utils.selector.re('route', http_routes)],
sum_by=['route']
)
)
@ -106,8 +103,7 @@ local utils = import 'mixin-utils/utils.libsonnet';
$.panel('Latency') +
utils.latencyRecordingRulePanel(
'loki_request_duration_seconds',
dashboards['loki-reads.json'].matchers.ingester + [utils.selector.re('route', grpc_routes)],
extra_selectors=dashboards['loki-reads.json'].clusterMatchers,
dashboards['loki-reads.json'].clusterMatchers + dashboards['loki-reads.json'].matchers.ingester + [utils.selector.re('route', grpc_routes)],
sum_by=['route']
)
)
@ -124,8 +120,7 @@ local utils = import 'mixin-utils/utils.libsonnet';
$.panel('Latency') +
utils.latencyRecordingRulePanel(
'loki_request_duration_seconds',
dashboards['loki-reads.json'].matchers.ingesterZoneAware + [utils.selector.re('route', grpc_routes)],
extra_selectors=dashboards['loki-reads.json'].clusterMatchers,
dashboards['loki-reads.json'].clusterMatchers + dashboards['loki-reads.json'].matchers.ingesterZoneAware + [utils.selector.re('route', grpc_routes)],
sum_by=['route']
)
)
@ -153,7 +148,7 @@ local utils = import 'mixin-utils/utils.libsonnet';
$.panel('Latency') +
utils.latencyRecordingRulePanel(
'loki_bigtable_request_duration_seconds',
dashboards['loki-reads.json'].matchers.querier + [utils.selector.eq('operation', '/google.bigtable.v2.Bigtable/ReadRows')]
dashboards['loki-reads.json'].clusterMatchers + dashboards['loki-reads.json'].matchers.querier + [utils.selector.eq('operation', '/google.bigtable.v2.Bigtable/ReadRows')]
)
)
)

@ -49,8 +49,7 @@ local utils = import 'mixin-utils/utils.libsonnet';
$.panel('Latency') +
utils.latencyRecordingRulePanel(
'loki_request_duration_seconds',
dashboards['loki-writes.json'].matchers.cortexgateway + [utils.selector.re('route', 'api_prom_push|loki_api_v1_push')],
extra_selectors=dashboards['loki-writes.json'].clusterMatchers
dashboards['loki-writes.json'].clusterMatchers + dashboards['loki-writes.json'].matchers.cortexgateway + [utils.selector.re('route', 'api_prom_push|loki_api_v1_push')],
)
)
)
@ -64,8 +63,7 @@ local utils = import 'mixin-utils/utils.libsonnet';
$.panel('Latency') +
utils.latencyRecordingRulePanel(
'loki_request_duration_seconds',
dashboards['loki-writes.json'].matchers.distributor,
extra_selectors=dashboards['loki-writes.json'].clusterMatchers
dashboards['loki-writes.json'].clusterMatchers + dashboards['loki-writes.json'].matchers.distributor,
)
)
)
@ -80,8 +78,7 @@ local utils = import 'mixin-utils/utils.libsonnet';
$.panel('Latency') +
utils.latencyRecordingRulePanel(
'loki_request_duration_seconds',
dashboards['loki-writes.json'].matchers.ingester_zone + [utils.selector.eq('route', '/logproto.Pusher/Push')],
extra_selectors=dashboards['loki-writes.json'].clusterMatchers
dashboards['loki-writes.json'].clusterMatchers + dashboards['loki-writes.json'].matchers.ingester_zone + [utils.selector.eq('route', '/logproto.Pusher/Push')],
)
)
)
@ -97,8 +94,7 @@ local utils = import 'mixin-utils/utils.libsonnet';
$.panel('Latency') +
utils.latencyRecordingRulePanel(
'loki_request_duration_seconds',
dashboards['loki-writes.json'].matchers.ingester + [utils.selector.eq('route', '/logproto.Pusher/Push')],
extra_selectors=dashboards['loki-writes.json'].clusterMatchers
dashboards['loki-writes.json'].clusterMatchers + dashboards['loki-writes.json'].matchers.ingester + [utils.selector.eq('route', '/logproto.Pusher/Push')],
)
)
)
@ -125,7 +121,7 @@ local utils = import 'mixin-utils/utils.libsonnet';
$.panel('Latency') +
utils.latencyRecordingRulePanel(
'loki_bigtable_request_duration_seconds',
dashboards['loki-writes.json'].clusterMatchers + dashboards['loki-writes.json'].matchers.ingester + [utils.selector.eq('operation', '/google.bigtable.v2.Bigtable/MutateRows')]
dashboards['loki-writes.json'].clusterMatchers + dashboards['loki-writes.json'].clusterMatchers + dashboards['loki-writes.json'].matchers.ingester + [utils.selector.eq('operation', '/google.bigtable.v2.Bigtable/MutateRows')]
)
)
)

@ -5,9 +5,9 @@ local utils = import 'mixin-utils/utils.libsonnet';
groups+: [{
name: 'loki_rules',
rules:
utils.histogramRules('loki_request_duration_seconds', ['job']) +
utils.histogramRules('loki_request_duration_seconds', ['job', 'route']) +
utils.histogramRules('loki_request_duration_seconds', ['namespace', 'job', 'route']),
utils.histogramRules('loki_request_duration_seconds', [$._config.per_cluster_label, 'job']) +
utils.histogramRules('loki_request_duration_seconds', [$._config.per_cluster_label, 'job', 'route']) +
utils.histogramRules('loki_request_duration_seconds', [$._config.per_cluster_label, 'namespace', 'job', 'route']),
}],
},
}

Loading…
Cancel
Save