Change default of metrics.namespace (#11110)

**What this PR does / why we need it**: Change the default for a subset of metrics. After this all Loki metrics should start with "loki_". This changes the following metrics: cortex_distributor_ingester_clients cortex_dns_failures_total cortex_dns_lookups_total cortex_dns_provider_results cortex_frontend_query_range_duration_seconds_bucket cortex_frontend_query_range_duration_seconds_count cortex_frontend_query_range_duration_seconds_sum cortex_ingester_flush_queue_length cortex_kv_request_duration_seconds_bucket cortex_kv_request_duration_seconds_count cortex_kv_request_duration_seconds_sum cortex_member_consul_heartbeats_total cortex_prometheus_last_evaluation_samples cortex_prometheus_notifications_alertmanagers_discovered cortex_prometheus_notifications_dropped_total cortex_prometheus_notifications_errors_total cortex_prometheus_notifications_latency_seconds cortex_prometheus_notifications_latency_seconds_count cortex_prometheus_notifications_latency_seconds_sum cortex_prometheus_notifications_queue_capacity cortex_prometheus_notifications_queue_length cortex_prometheus_notifications_sent_total cortex_prometheus_rule_evaluation_duration_seconds cortex_prometheus_rule_evaluation_duration_seconds_count cortex_prometheus_rule_evaluation_duration_seconds_sum cortex_prometheus_rule_evaluation_failures_total cortex_prometheus_rule_evaluations_total cortex_prometheus_rule_group_duration_seconds cortex_prometheus_rule_group_duration_seconds_count cortex_prometheus_rule_group_duration_seconds_sum cortex_prometheus_rule_group_interval_seconds cortex_prometheus_rule_group_iterations_missed_total cortex_prometheus_rule_group_iterations_total cortex_prometheus_rule_group_last_duration_seconds cortex_prometheus_rule_group_last_evaluation_timestamp_seconds cortex_prometheus_rule_group_rules cortex_query_frontend_connected_schedulers cortex_query_frontend_queries_in_progress cortex_query_frontend_retries_bucket cortex_query_frontend_retries_count cortex_query_frontend_retries_sum cortex_query_scheduler_connected_frontend_clients cortex_query_scheduler_connected_querier_clients cortex_query_scheduler_inflight_requests cortex_query_scheduler_inflight_requests_count cortex_query_scheduler_inflight_requests_sum cortex_query_scheduler_queue_duration_seconds_bucket cortex_query_scheduler_queue_duration_seconds_count cortex_query_scheduler_queue_duration_seconds_sum cortex_query_scheduler_queue_length cortex_query_scheduler_running cortex_quota_cgroup_cpu_max cortex_quota_cgroup_cpu_period cortex_quota_cpu_count cortex_quota_gomaxprocs cortex_ring_member_heartbeats_total cortex_ring_member_tokens_owned cortex_ring_member_tokens_to_own cortex_ring_members cortex_ring_oldest_member_timestamp cortex_ring_tokens_total cortex_ruler_client_request_duration_seconds_bucket cortex_ruler_client_request_duration_seconds_count cortex_ruler_client_request_duration_seconds_sum cortex_ruler_clients cortex_ruler_config_last_reload_successful cortex_ruler_config_last_reload_successful_seconds cortex_ruler_config_updates_total cortex_ruler_managers_total cortex_ruler_ring_check_errors_total cortex_ruler_sync_rules_total **Checklist** - [X] Reviewed the [`CONTRIBUTING.md`](https://github.com/grafana/loki/blob/main/CONTRIBUTING.md) guide (**required**) - [ ] Documentation added - [ ] Tests updated - [X] `CHANGELOG.md` updated - [ ] If the change is worth mentioning in the release notes, add `add-to-release-notes` label - [ ] Changes that require user attention or interaction to upgrade are documented in `docs/sources/setup/upgrade/_index.md` - [ ] For Helm chart changes bump the Helm chart version in `production/helm/loki/Chart.yaml` and update `production/helm/loki/CHANGELOG.md` and `production/helm/loki/README.md`. [Example PR](d10549e3ec) - [ ] If the change is deprecating or removing a configuration option, update the `deprecated-config.yaml` and `deleted-config.yaml` files respectively in the `tools/deprecated-config-checker` directory. [Example PR](0d4416a4b0) --------- Signed-off-by: Michel Hollands <michel.hollands@gmail.com> Co-authored-by: Ashwanth <iamashwanth@gmail.com>
2 years ago · 380f902c30
parent e93f5bfd93
commit 380f902c30
6 changed files with 87 additions and 5 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -6,6 +6,7 @@

 ##### Enhancements

+* [11110](https://github.com/grafana/loki/pull/11003) **MichelHollands**: Change the default of the `metrics-namespace` flag to 'loki'.
 * [11086](https://github.com/grafana/loki/pull/11086) **kandrew5**: Helm: Allow topologySpreadConstraints
 * [11003](https://github.com/grafana/loki/pull/11003) **MichelHollands**: Add the `metrics-namespace` flag to change the namespace of metrics currently using cortex as namespace.
 * [10096](https://github.com/grafana/loki/pull/10096) **aschleck**: Storage: Allow setting a constant prefix for all created keys
--- a/cmd/migrate/main.go
+++ b/cmd/migrate/main.go
@ -24,6 +24,7 @@ import (
 	"github.com/grafana/loki/pkg/storage/config"
 	"github.com/grafana/loki/pkg/storage/stores/shipper/indexshipper"
 	"github.com/grafana/loki/pkg/util/cfg"
+	"github.com/grafana/loki/pkg/util/constants"
 	util_log "github.com/grafana/loki/pkg/util/log"
 	"github.com/grafana/loki/pkg/validation"
 )
@ -48,7 +49,7 @@ func main() {
 	batch := flag.Int("batchLen", 500, "Specify how many chunks to read/write in one batch")
 	shardBy := flag.Duration("shardBy", 6*time.Hour, "Break down the total interval into shards of this size, making this too small can lead to syncing a lot of duplicate chunks")
 	parallel := flag.Int("parallel", 8, "How many parallel threads to process each shard")
-	metricsNamespace := flag.String("metrics.namespace", "cortex", "Namespace of the generated metrics")
+	metricsNamespace := flag.String("metrics.namespace", constants.Loki, "Namespace of the generated metrics")
 	flag.Parse()

 	go func() {
--- a/docs/sources/configure/_index.md
+++ b/docs/sources/configure/_index.md
@ -226,8 +226,9 @@ Pass the `-config.expand-env` flag at the command line to enable this way of set
 [shutdown_delay: <duration> | default = 0s]

 # Namespace of the metrics that in previous releases had cortex as namespace.
+# This setting is deprecated and will be removed in the next minor release.
 # CLI flag: -metrics-namespace
-[metrics_namespace: <string> | default = "cortex"]
+[metrics_namespace: <string> | default = "loki"]
 ```

 ### server
--- a/docs/sources/setup/upgrade/_index.md
+++ b/docs/sources/setup/upgrade/_index.md
@ -182,6 +182,85 @@ If you using a [legacy index type]({{< relref "../../storage#index-storage" >}})

 - Already deprecated metric `querier_cache_stale_gets_total` is now removed.

+#### Metrics namespace
+
+Some Loki metrics started with the prefix `cortex_`. In this release they will be changed so they start with `loki_`. To keep them at `cortex_` change the `metrics_namespace` from the default `loki` to `cortex`. These metrics will be changed:
+
+ - `cortex_distributor_ingester_clients`
+ - `cortex_dns_failures_total`
+ - `cortex_dns_lookups_total`
+ - `cortex_dns_provider_results`
+ - `cortex_frontend_query_range_duration_seconds_bucket`
+ - `cortex_frontend_query_range_duration_seconds_count`
+ - `cortex_frontend_query_range_duration_seconds_sum`
+ - `cortex_ingester_flush_queue_length`
+ - `cortex_kv_request_duration_seconds_bucket`
+ - `cortex_kv_request_duration_seconds_count`
+ - `cortex_kv_request_duration_seconds_sum`
+ - `cortex_member_consul_heartbeats_total`
+ - `cortex_prometheus_last_evaluation_samples`
+ - `cortex_prometheus_notifications_alertmanagers_discovered`
+ - `cortex_prometheus_notifications_dropped_total`
+ - `cortex_prometheus_notifications_errors_total`
+ - `cortex_prometheus_notifications_latency_seconds`
+ - `cortex_prometheus_notifications_latency_seconds_count`
+ - `cortex_prometheus_notifications_latency_seconds_sum`
+ - `cortex_prometheus_notifications_queue_capacity`
+ - `cortex_prometheus_notifications_queue_length`
+ - `cortex_prometheus_notifications_sent_total`
+ - `cortex_prometheus_rule_evaluation_duration_seconds`
+ - `cortex_prometheus_rule_evaluation_duration_seconds_count`
+ - `cortex_prometheus_rule_evaluation_duration_seconds_sum`
+ - `cortex_prometheus_rule_evaluation_failures_total`
+ - `cortex_prometheus_rule_evaluations_total`
+ - `cortex_prometheus_rule_group_duration_seconds`
+ - `cortex_prometheus_rule_group_duration_seconds_count`
+ - `cortex_prometheus_rule_group_duration_seconds_sum`
+ - `cortex_prometheus_rule_group_interval_seconds`
+ - `cortex_prometheus_rule_group_iterations_missed_total`
+ - `cortex_prometheus_rule_group_iterations_total`
+ - `cortex_prometheus_rule_group_last_duration_seconds`
+ - `cortex_prometheus_rule_group_last_evaluation_timestamp_seconds`
+ - `cortex_prometheus_rule_group_rules`
+ - `cortex_query_frontend_connected_schedulers`
+ - `cortex_query_frontend_queries_in_progress`
+ - `cortex_query_frontend_retries_bucket`
+ - `cortex_query_frontend_retries_count`
+ - `cortex_query_frontend_retries_sum`
+ - `cortex_query_scheduler_connected_frontend_clients`
+ - `cortex_query_scheduler_connected_querier_clients`
+ - `cortex_query_scheduler_inflight_requests`
+ - `cortex_query_scheduler_inflight_requests_count`
+ - `cortex_query_scheduler_inflight_requests_sum`
+ - `cortex_query_scheduler_queue_duration_seconds_bucket`
+ - `cortex_query_scheduler_queue_duration_seconds_count`
+ - `cortex_query_scheduler_queue_duration_seconds_sum`
+ - `cortex_query_scheduler_queue_length`
+ - `cortex_query_scheduler_running`
+ - `cortex_quota_cgroup_cpu_max`
+ - `cortex_quota_cgroup_cpu_period`
+ - `cortex_quota_cpu_count`
+ - `cortex_quota_gomaxprocs`
+ - `cortex_ring_member_heartbeats_total`
+ - `cortex_ring_member_tokens_owned`
+ - `cortex_ring_member_tokens_to_own`
+ - `cortex_ring_members`
+ - `cortex_ring_oldest_member_timestamp`
+ - `cortex_ring_tokens_total`
+ - `cortex_ruler_client_request_duration_seconds_bucket`
+ - `cortex_ruler_client_request_duration_seconds_count`
+ - `cortex_ruler_client_request_duration_seconds_sum`
+ - `cortex_ruler_clients`
+ - `cortex_ruler_config_last_reload_successful`
+ - `cortex_ruler_config_last_reload_successful_seconds`
+ - `cortex_ruler_config_updates_total`
+ - `cortex_ruler_managers_total`
+ - `cortex_ruler_ring_check_errors_total`
+ - `cortex_ruler_sync_rules_total`
+
+
+The `metrics_namespace` setting is deprecated already. It will be removed in the next minor release. The default prefix will be `loki` then.
+
 ### LogCLI

 #### Store for retrieving remote schema
--- a/integration/loki_micro_services_test.go
+++ b/integration/loki_micro_services_test.go
@ -544,14 +544,14 @@ func TestSchedulerRing(t *testing.T) {
 			// Check metrics to see if query scheduler is connected with query-frontend
 			metrics, err := cliQueryScheduler.Metrics()
 			require.NoError(t, err)
-			return getMetricValue(t, "cortex_query_scheduler_connected_frontend_clients", metrics) == 5
+			return getMetricValue(t, "loki_query_scheduler_connected_frontend_clients", metrics) == 5
 		}, 5*time.Second, 500*time.Millisecond)

 		require.Eventually(t, func() bool {
 			// Check metrics to see if query scheduler is connected with query-frontend
 			metrics, err := cliQueryScheduler.Metrics()
 			require.NoError(t, err)
-			return getMetricValue(t, "cortex_query_scheduler_connected_querier_clients", metrics) == 4
+			return getMetricValue(t, "loki_query_scheduler_connected_querier_clients", metrics) == 4
 		}, 5*time.Second, 500*time.Millisecond)
 	})

--- a/pkg/loki/loki.go
+++ b/pkg/loki/loki.go
@ -149,7 +149,7 @@ func (c *Config) RegisterFlags(f *flag.FlagSet) {

 	f.DurationVar(&c.ShutdownDelay, "shutdown-delay", 0, "How long to wait between SIGTERM and shutdown. After receiving SIGTERM, Loki will report 503 Service Unavailable status via /ready endpoint.")

-	f.StringVar(&c.MetricsNamespace, "metrics-namespace", "cortex", "Namespace of the metrics that in previous releases had cortex as namespace.")
+	f.StringVar(&c.MetricsNamespace, "metrics-namespace", constants.Loki, "Namespace of the metrics that in previous releases had cortex as namespace. This setting is deprecated and will be removed in the next minor release.")

 	c.registerServerFlagsWithChangedDefaultValues(f)
 	c.Common.RegisterFlags(f)