Change default of metrics.namespace (#11110)

**What this PR does / why we need it**:

Change the default for a subset of metrics. After this all Loki metrics
should start with "loki_". This changes the following metrics:

cortex_distributor_ingester_clients
cortex_dns_failures_total
cortex_dns_lookups_total
cortex_dns_provider_results
cortex_frontend_query_range_duration_seconds_bucket
cortex_frontend_query_range_duration_seconds_count
cortex_frontend_query_range_duration_seconds_sum
cortex_ingester_flush_queue_length
cortex_kv_request_duration_seconds_bucket
cortex_kv_request_duration_seconds_count
cortex_kv_request_duration_seconds_sum
cortex_member_consul_heartbeats_total
cortex_prometheus_last_evaluation_samples
cortex_prometheus_notifications_alertmanagers_discovered
cortex_prometheus_notifications_dropped_total
cortex_prometheus_notifications_errors_total
cortex_prometheus_notifications_latency_seconds
cortex_prometheus_notifications_latency_seconds_count
cortex_prometheus_notifications_latency_seconds_sum
cortex_prometheus_notifications_queue_capacity
cortex_prometheus_notifications_queue_length
cortex_prometheus_notifications_sent_total
cortex_prometheus_rule_evaluation_duration_seconds
cortex_prometheus_rule_evaluation_duration_seconds_count
cortex_prometheus_rule_evaluation_duration_seconds_sum
cortex_prometheus_rule_evaluation_failures_total
cortex_prometheus_rule_evaluations_total
cortex_prometheus_rule_group_duration_seconds
cortex_prometheus_rule_group_duration_seconds_count
cortex_prometheus_rule_group_duration_seconds_sum
cortex_prometheus_rule_group_interval_seconds
cortex_prometheus_rule_group_iterations_missed_total
cortex_prometheus_rule_group_iterations_total
cortex_prometheus_rule_group_last_duration_seconds
cortex_prometheus_rule_group_last_evaluation_timestamp_seconds
cortex_prometheus_rule_group_rules
cortex_query_frontend_connected_schedulers
cortex_query_frontend_queries_in_progress
cortex_query_frontend_retries_bucket
cortex_query_frontend_retries_count
cortex_query_frontend_retries_sum
cortex_query_scheduler_connected_frontend_clients
cortex_query_scheduler_connected_querier_clients
cortex_query_scheduler_inflight_requests
cortex_query_scheduler_inflight_requests_count
cortex_query_scheduler_inflight_requests_sum
cortex_query_scheduler_queue_duration_seconds_bucket
cortex_query_scheduler_queue_duration_seconds_count
cortex_query_scheduler_queue_duration_seconds_sum
cortex_query_scheduler_queue_length
cortex_query_scheduler_running
cortex_quota_cgroup_cpu_max
cortex_quota_cgroup_cpu_period
cortex_quota_cpu_count
cortex_quota_gomaxprocs
cortex_ring_member_heartbeats_total
cortex_ring_member_tokens_owned
cortex_ring_member_tokens_to_own
cortex_ring_members
cortex_ring_oldest_member_timestamp
cortex_ring_tokens_total
cortex_ruler_client_request_duration_seconds_bucket
cortex_ruler_client_request_duration_seconds_count
cortex_ruler_client_request_duration_seconds_sum
cortex_ruler_clients
cortex_ruler_config_last_reload_successful
cortex_ruler_config_last_reload_successful_seconds
cortex_ruler_config_updates_total
cortex_ruler_managers_total
cortex_ruler_ring_check_errors_total
cortex_ruler_sync_rules_total


**Checklist**
- [X] Reviewed the
[`CONTRIBUTING.md`](https://github.com/grafana/loki/blob/main/CONTRIBUTING.md)
guide (**required**)
- [ ] Documentation added
- [ ] Tests updated
- [X] `CHANGELOG.md` updated
- [ ] If the change is worth mentioning in the release notes, add
`add-to-release-notes` label
- [ ] Changes that require user attention or interaction to upgrade are
documented in `docs/sources/setup/upgrade/_index.md`
- [ ] For Helm chart changes bump the Helm chart version in
`production/helm/loki/Chart.yaml` and update
`production/helm/loki/CHANGELOG.md` and
`production/helm/loki/README.md`. [Example
PR](d10549e3ec)
- [ ] If the change is deprecating or removing a configuration option,
update the `deprecated-config.yaml` and `deleted-config.yaml` files
respectively in the `tools/deprecated-config-checker` directory.
[Example
PR](0d4416a4b0)

---------

Signed-off-by: Michel Hollands <michel.hollands@gmail.com>
Co-authored-by: Ashwanth <iamashwanth@gmail.com>
pull/11104/head
Michel Hollands 2 years ago committed by GitHub
parent e93f5bfd93
commit 380f902c30
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 1
      CHANGELOG.md
  2. 3
      cmd/migrate/main.go
  3. 3
      docs/sources/configure/_index.md
  4. 79
      docs/sources/setup/upgrade/_index.md
  5. 4
      integration/loki_micro_services_test.go
  6. 2
      pkg/loki/loki.go

@ -6,6 +6,7 @@
##### Enhancements
* [11110](https://github.com/grafana/loki/pull/11003) **MichelHollands**: Change the default of the `metrics-namespace` flag to 'loki'.
* [11086](https://github.com/grafana/loki/pull/11086) **kandrew5**: Helm: Allow topologySpreadConstraints
* [11003](https://github.com/grafana/loki/pull/11003) **MichelHollands**: Add the `metrics-namespace` flag to change the namespace of metrics currently using cortex as namespace.
* [10096](https://github.com/grafana/loki/pull/10096) **aschleck**: Storage: Allow setting a constant prefix for all created keys

@ -24,6 +24,7 @@ import (
"github.com/grafana/loki/pkg/storage/config"
"github.com/grafana/loki/pkg/storage/stores/shipper/indexshipper"
"github.com/grafana/loki/pkg/util/cfg"
"github.com/grafana/loki/pkg/util/constants"
util_log "github.com/grafana/loki/pkg/util/log"
"github.com/grafana/loki/pkg/validation"
)
@ -48,7 +49,7 @@ func main() {
batch := flag.Int("batchLen", 500, "Specify how many chunks to read/write in one batch")
shardBy := flag.Duration("shardBy", 6*time.Hour, "Break down the total interval into shards of this size, making this too small can lead to syncing a lot of duplicate chunks")
parallel := flag.Int("parallel", 8, "How many parallel threads to process each shard")
metricsNamespace := flag.String("metrics.namespace", "cortex", "Namespace of the generated metrics")
metricsNamespace := flag.String("metrics.namespace", constants.Loki, "Namespace of the generated metrics")
flag.Parse()
go func() {

@ -226,8 +226,9 @@ Pass the `-config.expand-env` flag at the command line to enable this way of set
[shutdown_delay: <duration> | default = 0s]
# Namespace of the metrics that in previous releases had cortex as namespace.
# This setting is deprecated and will be removed in the next minor release.
# CLI flag: -metrics-namespace
[metrics_namespace: <string> | default = "cortex"]
[metrics_namespace: <string> | default = "loki"]
```
### server

@ -182,6 +182,85 @@ If you using a [legacy index type]({{< relref "../../storage#index-storage" >}})
- Already deprecated metric `querier_cache_stale_gets_total` is now removed.
#### Metrics namespace
Some Loki metrics started with the prefix `cortex_`. In this release they will be changed so they start with `loki_`. To keep them at `cortex_` change the `metrics_namespace` from the default `loki` to `cortex`. These metrics will be changed:
- `cortex_distributor_ingester_clients`
- `cortex_dns_failures_total`
- `cortex_dns_lookups_total`
- `cortex_dns_provider_results`
- `cortex_frontend_query_range_duration_seconds_bucket`
- `cortex_frontend_query_range_duration_seconds_count`
- `cortex_frontend_query_range_duration_seconds_sum`
- `cortex_ingester_flush_queue_length`
- `cortex_kv_request_duration_seconds_bucket`
- `cortex_kv_request_duration_seconds_count`
- `cortex_kv_request_duration_seconds_sum`
- `cortex_member_consul_heartbeats_total`
- `cortex_prometheus_last_evaluation_samples`
- `cortex_prometheus_notifications_alertmanagers_discovered`
- `cortex_prometheus_notifications_dropped_total`
- `cortex_prometheus_notifications_errors_total`
- `cortex_prometheus_notifications_latency_seconds`
- `cortex_prometheus_notifications_latency_seconds_count`
- `cortex_prometheus_notifications_latency_seconds_sum`
- `cortex_prometheus_notifications_queue_capacity`
- `cortex_prometheus_notifications_queue_length`
- `cortex_prometheus_notifications_sent_total`
- `cortex_prometheus_rule_evaluation_duration_seconds`
- `cortex_prometheus_rule_evaluation_duration_seconds_count`
- `cortex_prometheus_rule_evaluation_duration_seconds_sum`
- `cortex_prometheus_rule_evaluation_failures_total`
- `cortex_prometheus_rule_evaluations_total`
- `cortex_prometheus_rule_group_duration_seconds`
- `cortex_prometheus_rule_group_duration_seconds_count`
- `cortex_prometheus_rule_group_duration_seconds_sum`
- `cortex_prometheus_rule_group_interval_seconds`
- `cortex_prometheus_rule_group_iterations_missed_total`
- `cortex_prometheus_rule_group_iterations_total`
- `cortex_prometheus_rule_group_last_duration_seconds`
- `cortex_prometheus_rule_group_last_evaluation_timestamp_seconds`
- `cortex_prometheus_rule_group_rules`
- `cortex_query_frontend_connected_schedulers`
- `cortex_query_frontend_queries_in_progress`
- `cortex_query_frontend_retries_bucket`
- `cortex_query_frontend_retries_count`
- `cortex_query_frontend_retries_sum`
- `cortex_query_scheduler_connected_frontend_clients`
- `cortex_query_scheduler_connected_querier_clients`
- `cortex_query_scheduler_inflight_requests`
- `cortex_query_scheduler_inflight_requests_count`
- `cortex_query_scheduler_inflight_requests_sum`
- `cortex_query_scheduler_queue_duration_seconds_bucket`
- `cortex_query_scheduler_queue_duration_seconds_count`
- `cortex_query_scheduler_queue_duration_seconds_sum`
- `cortex_query_scheduler_queue_length`
- `cortex_query_scheduler_running`
- `cortex_quota_cgroup_cpu_max`
- `cortex_quota_cgroup_cpu_period`
- `cortex_quota_cpu_count`
- `cortex_quota_gomaxprocs`
- `cortex_ring_member_heartbeats_total`
- `cortex_ring_member_tokens_owned`
- `cortex_ring_member_tokens_to_own`
- `cortex_ring_members`
- `cortex_ring_oldest_member_timestamp`
- `cortex_ring_tokens_total`
- `cortex_ruler_client_request_duration_seconds_bucket`
- `cortex_ruler_client_request_duration_seconds_count`
- `cortex_ruler_client_request_duration_seconds_sum`
- `cortex_ruler_clients`
- `cortex_ruler_config_last_reload_successful`
- `cortex_ruler_config_last_reload_successful_seconds`
- `cortex_ruler_config_updates_total`
- `cortex_ruler_managers_total`
- `cortex_ruler_ring_check_errors_total`
- `cortex_ruler_sync_rules_total`
The `metrics_namespace` setting is deprecated already. It will be removed in the next minor release. The default prefix will be `loki` then.
### LogCLI
#### Store for retrieving remote schema

@ -544,14 +544,14 @@ func TestSchedulerRing(t *testing.T) {
// Check metrics to see if query scheduler is connected with query-frontend
metrics, err := cliQueryScheduler.Metrics()
require.NoError(t, err)
return getMetricValue(t, "cortex_query_scheduler_connected_frontend_clients", metrics) == 5
return getMetricValue(t, "loki_query_scheduler_connected_frontend_clients", metrics) == 5
}, 5*time.Second, 500*time.Millisecond)
require.Eventually(t, func() bool {
// Check metrics to see if query scheduler is connected with query-frontend
metrics, err := cliQueryScheduler.Metrics()
require.NoError(t, err)
return getMetricValue(t, "cortex_query_scheduler_connected_querier_clients", metrics) == 4
return getMetricValue(t, "loki_query_scheduler_connected_querier_clients", metrics) == 4
}, 5*time.Second, 500*time.Millisecond)
})

@ -149,7 +149,7 @@ func (c *Config) RegisterFlags(f *flag.FlagSet) {
f.DurationVar(&c.ShutdownDelay, "shutdown-delay", 0, "How long to wait between SIGTERM and shutdown. After receiving SIGTERM, Loki will report 503 Service Unavailable status via /ready endpoint.")
f.StringVar(&c.MetricsNamespace, "metrics-namespace", "cortex", "Namespace of the metrics that in previous releases had cortex as namespace.")
f.StringVar(&c.MetricsNamespace, "metrics-namespace", constants.Loki, "Namespace of the metrics that in previous releases had cortex as namespace. This setting is deprecated and will be removed in the next minor release.")
c.registerServerFlagsWithChangedDefaultValues(f)
c.Common.RegisterFlags(f)

Loading…
Cancel
Save