Like Prometheus, but for logs.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 
loki/production/promtail-mixin/alerts.libsonnet

74 lines
2.3 KiB

{
prometheusAlerts+:: {
groups+: [
{
name: 'promtail_alerts',
rules: [
{
alert: 'PromtailRequestsErrors',
expr: |||
100 * sum(rate(promtail_request_duration_seconds_count{status_code=~"5..|failed"}[1m])) by (namespace, job, route, instance)
/
sum(rate(promtail_request_duration_seconds_count[1m])) by (namespace, job, route, instance)
> 10
|||,
'for': '15m',
labels: {
severity: 'critical',
},
annotations: {
message: |||
{{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}% errors.
|||,
},
},
{
alert: 'PromtailRequestLatency',
expr: |||
job_status_code_namespace:promtail_request_duration_seconds:99quantile > 1
|||,
'for': '15m',
labels: {
severity: 'critical',
},
annotations: {
message: |||
{{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}s 99th percentile latency.
|||,
},
},
{
alert: 'PromtailFileLagging',
expr: |||
abs(promtail_file_bytes_total - promtail_read_bytes_total) > 1e6
|||,
'for': '15m',
labels: {
severity: 'warning',
},
annotations: {
message: |||
{{ $labels.instance }} {{ $labels.job }} {{ $labels.path }} has been lagging by more than 1MB for more than 15m.
|||,
},
},
{
alert: 'PromtailFileMissing',
expr: |||
promtail_file_bytes_total unless promtail_read_bytes_total
|||,
'for': '15m',
labels: {
severity: 'critical',
},
annotations: {
message: |||
{{ $labels.instance }} {{ $labels.job }} {{ $labels.path }} matches the glob but is not being tailed.
|||,
},
},
],
},
],
},
}