From 14b2c093c19e17103e564b0aa6af0cf16ff0e5bc Mon Sep 17 00:00:00 2001 From: Kaviraj Date: Wed, 17 Mar 2021 16:13:03 +0100 Subject: [PATCH] Add unique promtail_instance id to labels for gcptarget (#3501) * Add unique promtail_instance id to labels for gcptarget Rationale: To make labelset unique per promtail instance to avoid out-of-order errors * Update docs/sources/clients/promtail/scraping.md Co-authored-by: Danny Kopping Co-authored-by: Danny Kopping --- docs/sources/clients/promtail/scraping.md | 4 ++++ go.mod | 1 + pkg/promtail/targets/gcplog/formatter.go | 13 +++++++++++++ pkg/promtail/targets/gcplog/formatter_test.go | 11 +++++++---- vendor/modules.txt | 1 + 5 files changed, 26 insertions(+), 4 deletions(-) diff --git a/docs/sources/clients/promtail/scraping.md b/docs/sources/clients/promtail/scraping.md index 167efdd8a5..d938beb02a 100644 --- a/docs/sources/clients/promtail/scraping.md +++ b/docs/sources/clients/promtail/scraping.md @@ -206,6 +206,10 @@ Before using `gcplog` target, GCP should be [configured](../gcplog-cloud) with p It also support `relabeling` and `pipeline` stages just like other targets. +Log entries scraped by `gcplog` will add an additional label called `promtail_instance`. This label uniquely identifies each promtail instance trying to scrape gcplog (from a single `subscription_id`). +We need this unique identifier to avoid out-of-order errors from Loki servers. +Because say two promtail instances rewrite timestamp of log entries(with same labelset) at the same time may reach Loki servers at different times can cause Loki servers to reject it. + ## Syslog Receiver Promtail supports receiving [IETF Syslog (RFC5424)](https://tools.ietf.org/html/rfc5424) diff --git a/go.mod b/go.mod index e9a62b68a1..0b9f108d72 100644 --- a/go.mod +++ b/go.mod @@ -52,6 +52,7 @@ require ( github.com/prometheus/client_model v0.2.0 github.com/prometheus/common v0.18.0 github.com/prometheus/prometheus v1.8.2-0.20210215121130-6f488061dfb4 + github.com/satori/go.uuid v1.2.1-0.20181028125025-b2ce2384e17b github.com/segmentio/fasthash v1.0.2 github.com/shurcooL/httpfs v0.0.0-20190707220628-8d4bc4ba7749 github.com/shurcooL/vfsgen v0.0.0-20200824052919-0d455de96546 diff --git a/pkg/promtail/targets/gcplog/formatter.go b/pkg/promtail/targets/gcplog/formatter.go index 96382bad00..c5a9a87627 100644 --- a/pkg/promtail/targets/gcplog/formatter.go +++ b/pkg/promtail/targets/gcplog/formatter.go @@ -10,12 +10,17 @@ import ( "github.com/prometheus/common/model" "github.com/prometheus/prometheus/pkg/labels" "github.com/prometheus/prometheus/pkg/relabel" + uuid "github.com/satori/go.uuid" "github.com/grafana/loki/pkg/logproto" "github.com/grafana/loki/pkg/promtail/api" "github.com/grafana/loki/pkg/util" ) +var ( + instanceID = uuid.NewV4() +) + // LogEntry that will be written to the pubsub topic. // According to the following spec. // https://cloud.google.com/logging/docs/reference/v2/rest/v2/LogEntry @@ -54,6 +59,14 @@ func format( lbs := labels.NewBuilder(nil) lbs.Set("resource_type", ge.Resource.Type) + // `promtail_instance` uniquely identifies each promtail instance trying + // to scrape gcplog(from single subscription_id). + // + // We need this unique identifier to avoid out-of-order errors from Loki servers. + // Because say two promtail instances rewrite timestamp of log entries(with same labelset) + // at the same time may reach Loki servers at different times can cause Loki servers to reject it. + lbs.Set("promtail_instance", instanceID.String()) + // labels from gcp log entry. Add it as internal labels for k, v := range ge.Resource.Labels { lbs.Set("__"+util.SnakeCase(k), v) diff --git a/pkg/promtail/targets/gcplog/formatter_test.go b/pkg/promtail/targets/gcplog/formatter_test.go index 00464b0fe8..0853dbb57e 100644 --- a/pkg/promtail/targets/gcplog/formatter_test.go +++ b/pkg/promtail/targets/gcplog/formatter_test.go @@ -56,6 +56,7 @@ func TestFormat(t *testing.T) { "resource_type": "gcs", "backend_service_name": "http-loki", "bucket_name": "loki-bucket", + "promtail_instance": model.LabelValue(instanceID.String()), }, Entry: logproto.Entry{ Timestamp: mustTime(t, "2020-12-22T15:01:23.045123456Z"), @@ -74,8 +75,9 @@ func TestFormat(t *testing.T) { useIncomingTimestamp: true, expected: api.Entry{ Labels: model.LabelSet{ - "jobname": "pubsub-test", - "resource_type": "gcs", + "jobname": "pubsub-test", + "resource_type": "gcs", + "promtail_instance": model.LabelValue(instanceID.String()), }, Entry: logproto.Entry{ Timestamp: mustTime(t, "2020-12-22T15:01:23.045123456Z"), @@ -93,8 +95,9 @@ func TestFormat(t *testing.T) { }, expected: api.Entry{ Labels: model.LabelSet{ - "jobname": "pubsub-test", - "resource_type": "gcs", + "jobname": "pubsub-test", + "resource_type": "gcs", + "promtail_instance": model.LabelValue(instanceID.String()), }, Entry: logproto.Entry{ Timestamp: time.Now(), diff --git a/vendor/modules.txt b/vendor/modules.txt index 6def38fbed..cacbd15e5a 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -846,6 +846,7 @@ github.com/rs/xid # github.com/samuel/go-zookeeper v0.0.0-20201211165307-7117e9ea2414 github.com/samuel/go-zookeeper/zk # github.com/satori/go.uuid v1.2.1-0.20181028125025-b2ce2384e17b => github.com/satori/go.uuid v1.2.0 +## explicit github.com/satori/go.uuid # github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529 github.com/sean-/seed