Like Prometheus, but for logs.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 
loki/production/loki-mixin/alerts.libsonnet

73 lines
2.2 KiB

{
prometheusAlerts+:: {
groups+: [
{
name: 'loki_alerts',
rules: [
{
alert: 'LokiRequestErrors',
expr: |||
100 * sum(rate(loki_request_duration_seconds_count{status_code=~"5.."}[1m])) by (namespace, job, route)
/
sum(rate(loki_request_duration_seconds_count[1m])) by (namespace, job, route)
> 10
|||,
'for': '15m',
labels: {
severity: 'critical',
},
annotations: {
message: |||
{{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}% errors.
|||,
},
},
{
alert: 'LokiRequestPanics',
expr: |||
sum(increase(loki_panic_total[10m])) by (namespace, job) > 0
|||,
labels: {
severity: 'critical',
},
annotations: {
message: |||
{{ $labels.job }} is experiencing {{ printf "%.2f" $value }}% increase of panics.
|||,
},
},
{
alert: 'LokiRequestLatency',
expr: |||
namespace_job_route:loki_request_duration_seconds:99quantile{route!~"(?i).*tail.*"} > 1
|||,
'for': '15m',
labels: {
severity: 'critical',
},
annotations: {
message: |||
{{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}s 99th percentile latency.
|||,
},
},
{
alert: 'LokiTooManyCompactorsRunning',
expr: |||
sum(loki_boltdb_shipper_compactor_running) by (namespace) > 1
|||,
'for': '5m',
labels: {
severity: 'warning',
},
annotations: {
message: |||
{{ $labels.namespace }} has had {{ printf "%.0f" $value }} compactors running for more than 5m. Only one compactor should run at a time.
|||,
},
},
],
},
],
},
}