Like Prometheus, but for logs.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
loki/pkg/validation/exporter.go

98 lines
2.4 KiB

package validation
import (
"reflect"
"time"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
"github.com/grafana/loki/v3/pkg/util/flagext"
)
type ExportedLimits interface {
AllByUserID() map[string]*Limits
DefaultLimits() *Limits
}
type OverridesExporter struct {
overrides ExportedLimits
tenantDesc *prometheus.Desc
defaultsDesc *prometheus.Desc
}
// TODO(jordanrushing): break out overrides from defaults?
func NewOverridesExporter(overrides ExportedLimits) *OverridesExporter {
return &OverridesExporter{
overrides: overrides,
tenantDesc: prometheus.NewDesc(
"loki_overrides",
"Resource limit overrides applied to tenants",
[]string{"limit_name", "user"},
nil,
),
defaultsDesc: prometheus.NewDesc(
"loki_overrides_defaults",
"Default values for resource limit overrides applied to tenants",
[]string{"limit_name"},
nil,
),
}
}
func (oe *OverridesExporter) Describe(ch chan<- *prometheus.Desc) {
ch <- oe.tenantDesc
ch <- oe.defaultsDesc
}
func (oe *OverridesExporter) Collect(ch chan<- prometheus.Metric) {
extract := func(val reflect.Value, i int) (float64, bool) {
switch val.Field(i).Interface().(type) {
case int, time.Duration:
return float64(val.Field(i).Int()), true
case model.Duration:
return float64(val.Field(i).Interface().(model.Duration)), true
config: adds `frontend.max-query-capacity` to tune per-tenant query capacity (#11284) **What this PR does / why we need it**: Adds a new config `frontend.max-query-capacity` that allows users to configure what portion of the the available querier replicas can be used by a tenant. `max_query_capacity` is the corresponding YAML option that can be configured in limits or runtime overrides. For example, setting this to 0.5 would allow a tenant to use half of the available queriers. This complements the existing `frontend.max-queriers-per-tenant`. When both are configured, the smaller value of the resulting querier replica count is considered: ``` min(frontend.max-queriers-per-tenant, ceil(querier_replicas * frontend.max-query-capacity)) ``` *All* queriers will handle requests for a tenant if neither limits are applied. **Which issue(s) this PR fixes**: Fixes #<issue number> **Special notes for your reviewer**: noticed that we don't pass down the shuffle sharding limits for frontend (only using it with schedulers) https://github.com/grafana/loki/blob/26f097162a856db48ecbd16bef2f0b750029855b/pkg/loki/modules.go#L895 but the [docs](https://github.com/grafana/loki/blob/26f097162a856db48ecbd16bef2f0b750029855b/pkg/validation/limits.go#L276) mention that`frontend.max-queriers-per-tenant` applies to frontend as well. ``` This option only works with queriers connecting to the query-frontend / query-scheduler, not when using downstream URL. ``` **Checklist** - [x] Reviewed the [`CONTRIBUTING.md`](https://github.com/grafana/loki/blob/main/CONTRIBUTING.md) guide (**required**) - [x] Documentation added - [x] Tests updated - [x] `CHANGELOG.md` updated - [ ] If the change is worth mentioning in the release notes, add `add-to-release-notes` label - [ ] Changes that require user attention or interaction to upgrade are documented in `docs/sources/setup/upgrade/_index.md` - [ ] For Helm chart changes bump the Helm chart version in `production/helm/loki/Chart.yaml` and update `production/helm/loki/CHANGELOG.md` and `production/helm/loki/README.md`. [Example PR](https://github.com/grafana/loki/commit/d10549e3ece02120974929894ee333d07755d213) - [ ] If the change is deprecating or removing a configuration option, update the `deprecated-config.yaml` and `deleted-config.yaml` files respectively in the `tools/deprecated-config-checker` directory. [Example PR](https://github.com/grafana/loki/pull/10840/commits/0d4416a4b03739583349934b96f272fb4f685d15) --------- Co-authored-by: J Stickler <julie.stickler@grafana.com> Co-authored-by: Danny Kopping <danny.kopping@grafana.com>
2 years ago
case uint, flagext.ByteSize:
return float64(val.Field(i).Uint()), true
case float64:
return val.Field(i).Float(), true
case bool:
v := 0.0
if val.Field(i).Bool() {
v = 1.0
}
return v, true
default:
return 0, false
}
}
defs := reflect.ValueOf(oe.overrides.DefaultLimits()).Elem()
for i := 0; i < defs.NumField(); i++ {
if v, ok := extract(defs, i); ok {
metricLabelValue := defs.Type().Field(i).Tag.Get("yaml")
ch <- prometheus.MustNewConstMetric(oe.defaultsDesc, prometheus.GaugeValue, v, metricLabelValue)
}
}
for tenant, limits := range oe.overrides.AllByUserID() {
rv := reflect.ValueOf(limits).Elem()
for i := 0; i < rv.NumField(); i++ {
v, ok := extract(rv, i)
// Only report fields which are explicitly overridden
if !ok || rv.Field(i).Interface() == defs.Field(i).Interface() {
continue
}
metricLabelValue := rv.Type().Field(i).Tag.Get("yaml")
ch <- prometheus.MustNewConstMetric(oe.tenantDesc, prometheus.GaugeValue, v, metricLabelValue, tenant)
}
}
}