promtail: add multi-tenant support (#1135)

* promtail: added tenant_id support to promtail client * promtail: added tenant stage to dinamically override the tenant ID * promtail: documented client's batch struct and improved tenant stage config pointer usage * Added static value support to tenant stage
6 years ago · 04f58c880f
parent 3d2c643bba
commit 04f58c880f
13 changed files with 756 additions and 119 deletions
--- a/docs/clients/promtail/configuration.md
+++ b/docs/clients/promtail/configuration.md
@ -21,6 +21,7 @@ and how to scrape logs from files.
            * [metric_counter](#metric_counter)
            * [metric_gauge](#metric_gauge)
            * [metric_histogram](#metric_histogram)
+        * [tenant_stage](#tenant_stage)
    * [journal_config](#journal_config)
    * [relabel_config](#relabel_config)
    * [static_config](#static_config)
@ -135,6 +136,11 @@ Loki:
 # URL for the Distributor.
 url: <string>

+# The tenant ID used by default to push logs to Loki. If omitted or empty
+# it assumes Loki is running in single-tenant mode and no X-Scope-OrgID header
+# it sent.
+[tenant_id: <string>]
+
 # Maximum amount of time to wait before sending a batch, even if that
 # batch isn't full.
 [batchwait: <duration> | default = 1s]
@ -275,7 +281,8 @@ set of key-value pairs that is passed around from stage to stage.
    <timestamp_stage> |
    <output_stage> |
    <labels_stage> |
-    <metrics_stage>
+    <metrics_stage> |
+    <tenant_stage>
  ]
 ```

@ -536,6 +543,23 @@ config:
    - <int>
 ```

+#### tenant_stage
+
+The tenant stage is an action stage that sets the tenant ID for the log entry
+picking it from a field in the extracted data map.
+
+```yaml
+tenant:
+  # Name from extracted data to whose value should be set as tenant ID.
+  # Either source or value config option is required, but not both (they
+  # are mutually exclusive).
+  [ source: <string> ]
+
+  # Value to use to set the tenant ID when this stage is executed. Useful
+  # when this stage is included within a conditional pipeline with "match".
+  [ value: <string> ]
+```
+
 ### journal_config

 The `journal_config` block configures reading from the systemd journal from
--- a/docs/clients/promtail/stages/README.md
+++ b/docs/clients/promtail/stages/README.md
@ -20,6 +20,7 @@ Action stages:
  * [output](./output.md): Set the log line text.
  * [labels](./labels.md): Update the label set for the log entry.
  * [metrics](./metrics.md): Calculate metrics based on extracted data.
+  * [tenant](./tenant.md): Set the tenant ID value to use for the log entry.

 Filtering stages:

--- a/docs/clients/promtail/stages/match.md
+++ b/docs/clients/promtail/stages/match.md
@ -32,7 +32,8 @@ match:
        <timestamp_stage> |
        <output_stage> |
        <labels_stage> |
-        <metrics_stage>
+        <metrics_stage> |
+        <tenant_stage>
      ]
 ```

--- a/docs/clients/promtail/stages/tenant.md
+++ b/docs/clients/promtail/stages/tenant.md
@ -0,0 +1,80 @@
+# `tenant` stage
+
+The tenant stage is an action stage that sets the tenant ID for the log entry
+picking it from a field in the extracted data map. If the field is missing, the
+default promtail client [`tenant_id`](../configuration.md#client_config) will
+be used.
+
+
+## Schema
+
+```yaml
+tenant:
+  # Name from extracted data to whose value should be set as tenant ID.
+  # Either source or value config option is required, but not both (they
+  # are mutually exclusive).
+  [ source: <string> ]
+
+  # Value to use to set the tenant ID when this stage is executed. Useful
+  # when this stage is included within a conditional pipeline with "match".
+  [ value: <string> ]
+```
+
+### Example: extract the tenant ID from a structured log
+
+For the given pipeline:
+
+```yaml
+pipeline_stages:
+  - json:
+      expressions:
+        customer_id: customer_id
+  - tenant:
+      source: customer_id
+```
+
+Given the following log line:
+
+```json
+{"customer_id":"1","log":"log message\n","stream":"stderr","time":"2019-04-30T02:12:41.8443515Z"}
+```
+
+The first stage would extract `customer_id` into the extracted map with a value of
+`1`. The tenant stage would set the `X-Scope-OrgID` request header (used by Loki to
+identify the tenant) to the value of the `customer_id` extracted data, which is `1`.
+
+
+### Example: override the tenant ID with the configured value
+
+For the given pipeline:
+
+```yaml
+pipeline_stages:
+  - json:
+      expressions:
+        app:
+        message:
+  - labels:
+      app:
+  - match:
+      selector: '{app="api"}'
+      stages:
+        - tenant:
+            value: "team-api"
+  - output:
+      source: message
+```
+
+Given the following log line:
+
+```json
+{"app":"api","log":"log message\n","stream":"stderr","time":"2019-04-30T02:12:41.8443515Z"}
+```
+
+The pipeline would:
+
+1. Decode the JSON log
+2. Set the label `app="api"`
+3. Process the `match` stage checking if the `{app="api"}` selector matches
+   and - whenever it matches - run the sub stages. The `tenant` sub stage
+   would override the tenant with the value `"team-api"`.
--- a/pkg/logentry/stages/stage.go
+++ b/pkg/logentry/stages/stage.go
@ -21,6 +21,7 @@ const (
 	StageTypeMatch     = "match"
 	StageTypeTemplate  = "template"
 	StageTypePipeline  = "pipeline"
+	StageTypeTenant    = "tenant"
 )

 // Stage takes an existing set of labels, timestamp and log entry and returns either a possibly mutated
@ -94,6 +95,11 @@ func New(logger log.Logger, jobName *string, stageType string,
 		if err != nil {
 			return nil, err
 		}
+	case StageTypeTenant:
+		s, err = newTenantStage(logger, cfg)
+		if err != nil {
+			return nil, err
+		}
 	default:
 		return nil, errors.Errorf("Unknown stage type: %s", stageType)
 	}
--- a/pkg/logentry/stages/tenant.go
+++ b/pkg/logentry/stages/tenant.go
@ -0,0 +1,106 @@
+package stages
+
+import (
+	"reflect"
+	"time"
+
+	"github.com/go-kit/kit/log"
+	"github.com/go-kit/kit/log/level"
+	"github.com/grafana/loki/pkg/promtail/constants"
+	"github.com/mitchellh/mapstructure"
+	"github.com/pkg/errors"
+	"github.com/prometheus/common/model"
+)
+
+const (
+	ErrTenantStageEmptySourceOrValue        = "source or value config are required"
+	ErrTenantStageConflictingSourceAndValue = "source and value are mutually exclusive: you should set source or value but not both"
+)
+
+type tenantStage struct {
+	cfg    TenantConfig
+	logger log.Logger
+}
+
+type TenantConfig struct {
+	Source string `mapstructure:"source"`
+	Value  string `mapstructure:"value"`
+}
+
+// validateTenantConfig validates the tenant stage configuration
+func validateTenantConfig(c TenantConfig) error {
+	if c.Source == "" && c.Value == "" {
+		return errors.New(ErrTenantStageEmptySourceOrValue)
+	}
+
+	if c.Source != "" && c.Value != "" {
+		return errors.New(ErrTenantStageConflictingSourceAndValue)
+	}
+
+	return nil
+}
+
+// newTenantStage creates a new tenant stage to override the tenant ID from extracted data
+func newTenantStage(logger log.Logger, configs interface{}) (*tenantStage, error) {
+	cfg := TenantConfig{}
+	err := mapstructure.Decode(configs, &cfg)
+	if err != nil {
+		return nil, err
+	}
+
+	err = validateTenantConfig(cfg)
+	if err != nil {
+		return nil, err
+	}
+
+	return &tenantStage{
+		cfg:    cfg,
+		logger: logger,
+	}, nil
+}
+
+// Process implements Stage
+func (s *tenantStage) Process(labels model.LabelSet, extracted map[string]interface{}, t *time.Time, entry *string) {
+	var tenantID string
+
+	// Get tenant ID from source or configured value
+	if s.cfg.Source != "" {
+		tenantID = s.getTenantFromSourceField(extracted)
+	} else {
+		tenantID = s.cfg.Value
+	}
+
+	// Skip an empty tenant ID (ie. failed to get the tenant from the source)
+	if tenantID == "" {
+		return
+	}
+
+	labels[constants.ReservedLabelTenantID] = model.LabelValue(tenantID)
+}
+
+// Name implements Stage
+func (s *tenantStage) Name() string {
+	return StageTypeTenant
+}
+
+func (s *tenantStage) getTenantFromSourceField(extracted map[string]interface{}) string {
+	// Get the tenant ID from the source data
+	value, ok := extracted[s.cfg.Source]
+	if !ok {
+		if Debug {
+			level.Debug(s.logger).Log("msg", "the tenant source does not exist in the extracted data", "source", s.cfg.Source)
+		}
+		return ""
+	}
+
+	// Convert the value to string
+	tenantID, err := getString(value)
+	if err != nil {
+		if Debug {
+			level.Debug(s.logger).Log("msg", "failed to convert value to string", "err", err, "type", reflect.TypeOf(value).String())
+		}
+		return ""
+	}
+
+	return tenantID
+}
--- a/pkg/logentry/stages/tenant_test.go
+++ b/pkg/logentry/stages/tenant_test.go
@ -0,0 +1,156 @@
+package stages
+
+import (
+	"testing"
+	"time"
+
+	"github.com/cortexproject/cortex/pkg/util"
+	"github.com/grafana/loki/pkg/promtail/constants"
+	lokiutil "github.com/grafana/loki/pkg/util"
+	"github.com/prometheus/common/model"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+func TestTenantStage_Validation(t *testing.T) {
+	t.Parallel()
+
+	tests := map[string]struct {
+		config      *TenantConfig
+		expectedErr *string
+	}{
+		"should pass on source config option set": {
+			config: &TenantConfig{
+				Source: "tenant",
+			},
+			expectedErr: nil,
+		},
+		"should pass on value config option set": {
+			config: &TenantConfig{
+				Value: "team-a",
+			},
+			expectedErr: nil,
+		},
+		"should fail on missing source and value": {
+			config:      &TenantConfig{},
+			expectedErr: lokiutil.StringRef(ErrTenantStageEmptySourceOrValue),
+		},
+		"should fail on empty source": {
+			config: &TenantConfig{
+				Source: "",
+			},
+			expectedErr: lokiutil.StringRef(ErrTenantStageEmptySourceOrValue),
+		},
+		"should fail on empty value": {
+			config: &TenantConfig{
+				Value: "",
+			},
+			expectedErr: lokiutil.StringRef(ErrTenantStageEmptySourceOrValue),
+		},
+		"should fail on both source and value set": {
+			config: &TenantConfig{
+				Source: "tenant",
+				Value:  "team-a",
+			},
+			expectedErr: lokiutil.StringRef(ErrTenantStageConflictingSourceAndValue),
+		},
+	}
+
+	for testName, testData := range tests {
+		testData := testData
+
+		t.Run(testName, func(t *testing.T) {
+			stage, err := newTenantStage(util.Logger, testData.config)
+
+			if testData.expectedErr != nil {
+				assert.EqualError(t, err, *testData.expectedErr)
+				assert.Nil(t, stage)
+			} else {
+				assert.NoError(t, err)
+				assert.NotNil(t, stage)
+			}
+		})
+	}
+}
+
+func TestTenantStage_Process(t *testing.T) {
+	t.Parallel()
+
+	tests := map[string]struct {
+		config         *TenantConfig
+		inputLabels    model.LabelSet
+		inputExtracted map[string]interface{}
+		expectedTenant *string
+	}{
+		"should not set the tenant if the source field is not defined in the extracted map": {
+			config:         &TenantConfig{Source: "tenant_id"},
+			inputLabels:    model.LabelSet{},
+			inputExtracted: map[string]interface{}{},
+			expectedTenant: nil,
+		},
+		"should not override the tenant if the source field is not defined in the extracted map": {
+			config:         &TenantConfig{Source: "tenant_id"},
+			inputLabels:    model.LabelSet{constants.ReservedLabelTenantID: "foo"},
+			inputExtracted: map[string]interface{}{},
+			expectedTenant: lokiutil.StringRef("foo"),
+		},
+		"should set the tenant if the source field is defined in the extracted map": {
+			config:         &TenantConfig{Source: "tenant_id"},
+			inputLabels:    model.LabelSet{},
+			inputExtracted: map[string]interface{}{"tenant_id": "bar"},
+			expectedTenant: lokiutil.StringRef("bar"),
+		},
+		"should override the tenant if the source field is defined in the extracted map": {
+			config:         &TenantConfig{Source: "tenant_id"},
+			inputLabels:    model.LabelSet{constants.ReservedLabelTenantID: "foo"},
+			inputExtracted: map[string]interface{}{"tenant_id": "bar"},
+			expectedTenant: lokiutil.StringRef("bar"),
+		},
+		"should not set the tenant if the source field data type can't be converted to string": {
+			config:         &TenantConfig{Source: "tenant_id"},
+			inputLabels:    model.LabelSet{},
+			inputExtracted: map[string]interface{}{"tenant_id": []string{"bar"}},
+			expectedTenant: nil,
+		},
+		"should set the tenant with the configured static value": {
+			config:         &TenantConfig{Value: "bar"},
+			inputLabels:    model.LabelSet{},
+			inputExtracted: map[string]interface{}{},
+			expectedTenant: lokiutil.StringRef("bar"),
+		},
+		"should override the tenant with the configured static value": {
+			config:         &TenantConfig{Value: "bar"},
+			inputLabels:    model.LabelSet{constants.ReservedLabelTenantID: "foo"},
+			inputExtracted: map[string]interface{}{},
+			expectedTenant: lokiutil.StringRef("bar"),
+		},
+	}
+
+	for testName, testData := range tests {
+		testData := testData
+
+		t.Run(testName, func(t *testing.T) {
+			stage, err := newTenantStage(util.Logger, testData.config)
+			require.NoError(t, err)
+
+			// Process and dummy line and ensure nothing has changed except
+			// the tenant reserved label
+			timestamp := time.Unix(1, 1)
+			entry := "hello world"
+			labels := testData.inputLabels.Clone()
+			extracted := testData.inputExtracted
+
+			stage.Process(labels, extracted, &timestamp, &entry)
+
+			assert.Equal(t, time.Unix(1, 1), timestamp)
+			assert.Equal(t, "hello world", entry)
+
+			actualTenant, ok := labels[constants.ReservedLabelTenantID]
+			if testData.expectedTenant == nil {
+				assert.False(t, ok)
+			} else {
+				assert.Equal(t, *testData.expectedTenant, string(actualTenant))
+			}
+		})
+	}
+}
--- a/pkg/promtail/client/batch.go
+++ b/pkg/promtail/client/batch.go
@ -0,0 +1,90 @@
+package client
+
+import (
+	"time"
+
+	"github.com/gogo/protobuf/proto"
+	"github.com/golang/snappy"
+	"github.com/grafana/loki/pkg/logproto"
+	"github.com/prometheus/common/model"
+)
+
+// batch holds pending log streams waiting to be sent to Loki, and it's used
+// to reduce the number of push requests to Loki aggregating multiple log streams
+// and entries in a single batch request. In case of multi-tenant Promtail, log
+// streams for each tenant are stored in a dedicated batch.
+type batch struct {
+	streams   map[model.Fingerprint]*logproto.Stream
+	bytes     int
+	createdAt time.Time
+}
+
+func newBatch(entries ...entry) *batch {
+	b := &batch{
+		streams:   map[model.Fingerprint]*logproto.Stream{},
+		bytes:     0,
+		createdAt: time.Now(),
+	}
+
+	// Add entries to the batch
+	for _, entry := range entries {
+		b.add(entry)
+	}
+
+	return b
+}
+
+// add an entry to the batch
+func (b *batch) add(entry entry) {
+	b.bytes += len(entry.Line)
+
+	// Append the entry to an already existing stream (if any)
+	fp := entry.labels.FastFingerprint()
+	if stream, ok := b.streams[fp]; ok {
+		stream.Entries = append(stream.Entries, entry.Entry)
+		return
+	}
+
+	// Add the entry as a new stream
+	b.streams[fp] = &logproto.Stream{
+		Labels:  entry.labels.String(),
+		Entries: []logproto.Entry{entry.Entry},
+	}
+}
+
+// sizeBytes returns the current batch size in bytes
+func (b *batch) sizeBytes() int {
+	return b.bytes
+}
+
+// sizeBytesAfter returns the size of the batch after the input entry
+// will be added to the batch itself
+func (b *batch) sizeBytesAfter(entry entry) int {
+	return b.bytes + len(entry.Line)
+}
+
+// age of the batch since its creation
+func (b *batch) age() time.Duration {
+	return time.Since(b.createdAt)
+}
+
+// encode the batch as snappy-compressed push request, and returns
+// the encoded bytes and the number of encoded entries
+func (b *batch) encode() ([]byte, int, error) {
+	req := logproto.PushRequest{
+		Streams: make([]*logproto.Stream, 0, len(b.streams)),
+	}
+
+	entriesCount := 0
+	for _, stream := range b.streams {
+		req.Streams = append(req.Streams, stream)
+		entriesCount += len(stream.Entries)
+	}
+
+	buf, err := proto.Marshal(&req)
+	if err != nil {
+		return nil, 0, err
+	}
+	buf = snappy.Encode(nil, buf)
+	return buf, entriesCount, nil
+}
--- a/pkg/promtail/client/batch_test.go
+++ b/pkg/promtail/client/batch_test.go
@ -0,0 +1,105 @@
+package client
+
+import (
+	"testing"
+
+	"github.com/prometheus/common/model"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+func TestBatch_add(t *testing.T) {
+	t.Parallel()
+
+	tests := map[string]struct {
+		inputEntries      []entry
+		expectedSizeBytes int
+	}{
+		"empty batch": {
+			inputEntries:      []entry{},
+			expectedSizeBytes: 0,
+		},
+		"single stream with single log entry": {
+			inputEntries: []entry{
+				{"tenant", model.LabelSet{}, logEntries[0].Entry},
+			},
+			expectedSizeBytes: len(logEntries[0].Entry.Line),
+		},
+		"single stream with multiple log entries": {
+			inputEntries: []entry{
+				{"tenant", model.LabelSet{}, logEntries[0].Entry},
+				{"tenant", model.LabelSet{}, logEntries[1].Entry},
+			},
+			expectedSizeBytes: len(logEntries[0].Entry.Line) + len(logEntries[1].Entry.Line),
+		},
+		"multiple streams with multiple log entries": {
+			inputEntries: []entry{
+				{"tenant", model.LabelSet{"type": "a"}, logEntries[0].Entry},
+				{"tenant", model.LabelSet{"type": "a"}, logEntries[1].Entry},
+				{"tenant", model.LabelSet{"type": "b"}, logEntries[2].Entry},
+			},
+			expectedSizeBytes: len(logEntries[0].Entry.Line) + len(logEntries[1].Entry.Line) + len(logEntries[2].Entry.Line),
+		},
+	}
+
+	for testName, testData := range tests {
+		testData := testData
+
+		t.Run(testName, func(t *testing.T) {
+			b := newBatch()
+
+			for _, entry := range testData.inputEntries {
+				b.add(entry)
+			}
+
+			assert.Equal(t, testData.expectedSizeBytes, b.sizeBytes())
+		})
+	}
+}
+
+func TestBatch_encode(t *testing.T) {
+	t.Parallel()
+
+	tests := map[string]struct {
+		inputBatch           *batch
+		expectedEntriesCount int
+	}{
+		"empty batch": {
+			inputBatch:           newBatch(),
+			expectedEntriesCount: 0,
+		},
+		"single stream with single log entry": {
+			inputBatch: newBatch(
+				entry{"tenant", model.LabelSet{}, logEntries[0].Entry},
+			),
+			expectedEntriesCount: 1,
+		},
+		"single stream with multiple log entries": {
+			inputBatch: newBatch(
+				entry{"tenant", model.LabelSet{}, logEntries[0].Entry},
+				entry{"tenant", model.LabelSet{}, logEntries[1].Entry},
+			),
+			expectedEntriesCount: 2,
+		},
+		"multiple streams with multiple log entries": {
+			inputBatch: newBatch(
+				entry{"tenant", model.LabelSet{"type": "a"}, logEntries[0].Entry},
+				entry{"tenant", model.LabelSet{"type": "a"}, logEntries[1].Entry},
+				entry{"tenant", model.LabelSet{"type": "b"}, logEntries[2].Entry},
+			),
+			expectedEntriesCount: 3,
+		},
+	}
+
+	for testName, testData := range tests {
+		testData := testData
+
+		t.Run(testName, func(t *testing.T) {
+			t.Parallel()
+
+			_, entriesCount, err := testData.inputBatch.encode()
+			require.NoError(t, err)
+			assert.Equal(t, testData.expectedEntriesCount, entriesCount)
+		})
+	}
+}
--- a/pkg/promtail/client/client.go
+++ b/pkg/promtail/client/client.go
@ -12,12 +12,11 @@ import (
 	"time"

 	"github.com/grafana/loki/pkg/promtail/api"
+	"github.com/grafana/loki/pkg/promtail/constants"

 	"github.com/cortexproject/cortex/pkg/util"
 	"github.com/go-kit/kit/log"
 	"github.com/go-kit/kit/log/level"
-	"github.com/gogo/protobuf/proto"
-	"github.com/golang/snappy"

 	"github.com/grafana/loki/pkg/helpers"
 	"github.com/grafana/loki/pkg/logproto"
@ -92,7 +91,8 @@ type client struct {
 }

 type entry struct {
-	labels model.LabelSet
+	tenantID string
+	labels   model.LabelSet
 	logproto.Entry
 }

@ -125,13 +125,26 @@ func New(cfg Config, logger log.Logger) (Client, error) {
 }

 func (c *client) run() {
-	batch := map[model.Fingerprint]*logproto.Stream{}
-	batchSize := 0
-	maxWait := time.NewTicker(c.cfg.BatchWait)
+	batches := map[string]*batch{}
+
+	// Given the client handles multiple batches (1 per tenant) and each batch
+	// can be created at a different point in time, we look for batches whose
+	// max wait time has been reached every 10 times per BatchWait, so that the
+	// maximum delay we have sending batches is 10% of the max waiting time.
+	// We apply a cap of 10ms to the ticker, to avoid too frequent checks in
+	// case the BatchWait is very low.
+	minWaitCheckFrequency := 10 * time.Millisecond
+	maxWaitCheckFrequency := c.cfg.BatchWait / 10
+	if maxWaitCheckFrequency < minWaitCheckFrequency {
+		maxWaitCheckFrequency = minWaitCheckFrequency
+	}
+
+	maxWaitCheck := time.NewTicker(maxWaitCheckFrequency)

 	defer func() {
-		if len(batch) > 0 {
-			c.sendBatch(batch)
+		// Send all pending batches
+		for tenantID, batch := range batches {
+			c.sendBatch(tenantID, batch)
 		}

 		c.wg.Done()
@ -143,35 +156,42 @@ func (c *client) run() {
 			return

 		case e := <-c.entries:
-			if batchSize+len(e.Line) > c.cfg.BatchSize {
-				c.sendBatch(batch)
-				batchSize = 0
-				batch = map[model.Fingerprint]*logproto.Stream{}
-			}
+			batch, ok := batches[e.tenantID]

-			batchSize += len(e.Line)
-			fp := e.labels.FastFingerprint()
-			stream, ok := batch[fp]
+			// If the batch doesn't exist yet, we create a new one with the entry
 			if !ok {
-				stream = &logproto.Stream{
-					Labels: e.labels.String(),
-				}
-				batch[fp] = stream
+				batches[e.tenantID] = newBatch(e)
+				break
+			}
+
+			// If adding the entry to the batch will increase the size over the max
+			// size allowed, we do send the current batch and then create a new one
+			if batch.sizeBytesAfter(e) > c.cfg.BatchSize {
+				c.sendBatch(e.tenantID, batch)
+
+				batches[e.tenantID] = newBatch(e)
+				break
 			}
-			stream.Entries = append(stream.Entries, e.Entry)

-		case <-maxWait.C:
-			if len(batch) > 0 {
-				c.sendBatch(batch)
-				batchSize = 0
-				batch = map[model.Fingerprint]*logproto.Stream{}
+			// The max size of the batch isn't reached, so we can add the entry
+			batch.add(e)
+
+		case <-maxWaitCheck.C:
+			// Send all batches whose max wait time has been reached
+			for tenantID, batch := range batches {
+				if batch.age() < c.cfg.BatchWait {
+					continue
+				}
+
+				c.sendBatch(tenantID, batch)
+				delete(batches, tenantID)
 			}
 		}
 	}
 }

-func (c *client) sendBatch(batch map[model.Fingerprint]*logproto.Stream) {
-	buf, entriesCount, err := encodeBatch(batch)
+func (c *client) sendBatch(tenantID string, batch *batch) {
+	buf, entriesCount, err := batch.encode()
 	if err != nil {
 		level.Error(c.logger).Log("msg", "error encoding batch", "error", err)
 		return
@ -184,7 +204,7 @@ func (c *client) sendBatch(batch map[model.Fingerprint]*logproto.Stream) {
 	var status int
 	for backoff.Ongoing() {
 		start := time.Now()
-		status, err = c.send(ctx, buf)
+		status, err = c.send(ctx, tenantID, buf)
 		requestDuration.WithLabelValues(strconv.Itoa(status), c.cfg.URL.Host).Observe(time.Since(start).Seconds())

 		if err == nil {
@ -209,26 +229,7 @@ func (c *client) sendBatch(batch map[model.Fingerprint]*logproto.Stream) {
 	}
 }

-func encodeBatch(batch map[model.Fingerprint]*logproto.Stream) ([]byte, int, error) {
-	req := logproto.PushRequest{
-		Streams: make([]*logproto.Stream, 0, len(batch)),
-	}
-
-	entriesCount := 0
-	for _, stream := range batch {
-		req.Streams = append(req.Streams, stream)
-		entriesCount += len(stream.Entries)
-	}
-
-	buf, err := proto.Marshal(&req)
-	if err != nil {
-		return nil, 0, err
-	}
-	buf = snappy.Encode(nil, buf)
-	return buf, entriesCount, nil
-}
-
-func (c *client) send(ctx context.Context, buf []byte) (int, error) {
+func (c *client) send(ctx context.Context, tenantID string, buf []byte) (int, error) {
 	ctx, cancel := context.WithTimeout(ctx, c.cfg.Timeout)
 	defer cancel()
 	req, err := http.NewRequest("POST", c.cfg.URL.String(), bytes.NewReader(buf))
@ -238,6 +239,12 @@ func (c *client) send(ctx context.Context, buf []byte) (int, error) {
 	req = req.WithContext(ctx)
 	req.Header.Set("Content-Type", contentType)

+	// If the tenant ID is not empty promtail is running in multi-tenant mode, so
+	// we should send it to Loki
+	if tenantID != "" {
+		req.Header.Set("X-Scope-OrgID", tenantID)
+	}
+
 	resp, err := c.client.Do(req)
 	if err != nil {
 		return -1, err
@ -255,6 +262,22 @@ func (c *client) send(ctx context.Context, buf []byte) (int, error) {
 	return resp.StatusCode, err
 }

+func (c *client) getTenantID(labels model.LabelSet) string {
+	// Check if it has been overridden while processing the pipeline stages
+	if value, ok := labels[constants.ReservedLabelTenantID]; ok {
+		return string(value)
+	}
+
+	// Check if has been specified in the config
+	if c.cfg.TenantID != "" {
+		return c.cfg.TenantID
+	}
+
+	// Defaults to an empty string, which means the X-Scope-OrgID header
+	// will not be sent
+	return ""
+}
+
 // Stop the client.
 func (c *client) Stop() {
 	c.once.Do(func() { close(c.quit) })
@ -267,7 +290,16 @@ func (c *client) Handle(ls model.LabelSet, t time.Time, s string) error {
 		ls = c.externalLabels.Merge(ls)
 	}

-	c.entries <- entry{ls, logproto.Entry{
+	// Get the tenant  ID in case it has been overridden while processing
+	// the pipeline stages, then remove the special label
+	tenantID := c.getTenantID(ls)
+	if _, ok := ls[constants.ReservedLabelTenantID]; ok {
+		// Clone the label set to not manipulate the input one
+		ls = ls.Clone()
+		delete(ls, constants.ReservedLabelTenantID)
+	}
+
+	c.entries <- entry{tenantID, ls, logproto.Entry{
 		Timestamp: t,
 		Line:      s,
 	}}
--- a/pkg/promtail/client/client_test.go
+++ b/pkg/promtail/client/client_test.go
@ -27,18 +27,27 @@ var (
 		{labels: model.LabelSet{}, Entry: logproto.Entry{Timestamp: time.Unix(1, 0).UTC(), Line: "line1"}},
 		{labels: model.LabelSet{}, Entry: logproto.Entry{Timestamp: time.Unix(2, 0).UTC(), Line: "line2"}},
 		{labels: model.LabelSet{}, Entry: logproto.Entry{Timestamp: time.Unix(3, 0).UTC(), Line: "line3"}},
+		{labels: model.LabelSet{"__tenant_id__": "tenant-1"}, Entry: logproto.Entry{Timestamp: time.Unix(4, 0).UTC(), Line: "line4"}},
+		{labels: model.LabelSet{"__tenant_id__": "tenant-1"}, Entry: logproto.Entry{Timestamp: time.Unix(5, 0).UTC(), Line: "line5"}},
+		{labels: model.LabelSet{"__tenant_id__": "tenant-2"}, Entry: logproto.Entry{Timestamp: time.Unix(6, 0).UTC(), Line: "line6"}},
 	}
 )

+type receivedReq struct {
+	tenantID string
+	pushReq  logproto.PushRequest
+}
+
 func TestClient_Handle(t *testing.T) {
 	tests := map[string]struct {
 		clientBatchSize      int
 		clientBatchWait      time.Duration
 		clientMaxRetries     int
+		clientTenantID       string
 		serverResponseStatus int
 		inputEntries         []entry
 		inputDelay           time.Duration
-		expectedBatches      [][]*logproto.Stream
+		expectedReqs         []receivedReq
 		expectedMetrics      string
 	}{
 		"batch log entries together until the batch size is reached": {
@ -47,12 +56,14 @@ func TestClient_Handle(t *testing.T) {
 			clientMaxRetries:     3,
 			serverResponseStatus: 200,
 			inputEntries:         []entry{logEntries[0], logEntries[1], logEntries[2]},
-			expectedBatches: [][]*logproto.Stream{
+			expectedReqs: []receivedReq{
 				{
-					{Labels: "{}", Entries: []logproto.Entry{logEntries[0].Entry, logEntries[1].Entry}},
+					tenantID: "",
+					pushReq:  logproto.PushRequest{Streams: []*logproto.Stream{{Labels: "{}", Entries: []logproto.Entry{logEntries[0].Entry, logEntries[1].Entry}}}},
 				},
 				{
-					{Labels: "{}", Entries: []logproto.Entry{logEntries[2].Entry}},
+					tenantID: "",
+					pushReq:  logproto.PushRequest{Streams: []*logproto.Stream{{Labels: "{}", Entries: []logproto.Entry{logEntries[2].Entry}}}},
 				},
 			},
 			expectedMetrics: `
@ -68,12 +79,14 @@ func TestClient_Handle(t *testing.T) {
 			serverResponseStatus: 200,
 			inputEntries:         []entry{logEntries[0], logEntries[1]},
 			inputDelay:           110 * time.Millisecond,
-			expectedBatches: [][]*logproto.Stream{
+			expectedReqs: []receivedReq{
 				{
-					{Labels: "{}", Entries: []logproto.Entry{logEntries[0].Entry}},
+					tenantID: "",
+					pushReq:  logproto.PushRequest{Streams: []*logproto.Stream{{Labels: "{}", Entries: []logproto.Entry{logEntries[0].Entry}}}},
 				},
 				{
-					{Labels: "{}", Entries: []logproto.Entry{logEntries[1].Entry}},
+					tenantID: "",
+					pushReq:  logproto.PushRequest{Streams: []*logproto.Stream{{Labels: "{}", Entries: []logproto.Entry{logEntries[1].Entry}}}},
 				},
 			},
 			expectedMetrics: `
@ -88,15 +101,18 @@ func TestClient_Handle(t *testing.T) {
 			clientMaxRetries:     3,
 			serverResponseStatus: 500,
 			inputEntries:         []entry{logEntries[0]},
-			expectedBatches: [][]*logproto.Stream{
+			expectedReqs: []receivedReq{
 				{
-					{Labels: "{}", Entries: []logproto.Entry{logEntries[0].Entry}},
+					tenantID: "",
+					pushReq:  logproto.PushRequest{Streams: []*logproto.Stream{{Labels: "{}", Entries: []logproto.Entry{logEntries[0].Entry}}}},
 				},
 				{
-					{Labels: "{}", Entries: []logproto.Entry{logEntries[0].Entry}},
+					tenantID: "",
+					pushReq:  logproto.PushRequest{Streams: []*logproto.Stream{{Labels: "{}", Entries: []logproto.Entry{logEntries[0].Entry}}}},
 				},
 				{
-					{Labels: "{}", Entries: []logproto.Entry{logEntries[0].Entry}},
+					tenantID: "",
+					pushReq:  logproto.PushRequest{Streams: []*logproto.Stream{{Labels: "{}", Entries: []logproto.Entry{logEntries[0].Entry}}}},
 				},
 			},
 			expectedMetrics: `
@ -111,9 +127,10 @@ func TestClient_Handle(t *testing.T) {
 			clientMaxRetries:     3,
 			serverResponseStatus: 400,
 			inputEntries:         []entry{logEntries[0]},
-			expectedBatches: [][]*logproto.Stream{
+			expectedReqs: []receivedReq{
 				{
-					{Labels: "{}", Entries: []logproto.Entry{logEntries[0].Entry}},
+					tenantID: "",
+					pushReq:  logproto.PushRequest{Streams: []*logproto.Stream{{Labels: "{}", Entries: []logproto.Entry{logEntries[0].Entry}}}},
 				},
 			},
 			expectedMetrics: `
@ -122,6 +139,52 @@ func TestClient_Handle(t *testing.T) {
 				promtail_dropped_entries_total{host="__HOST__"} 1.0
 			`,
 		},
+		"batch log entries together honoring the client tenant ID": {
+			clientBatchSize:      100,
+			clientBatchWait:      100 * time.Millisecond,
+			clientMaxRetries:     3,
+			clientTenantID:       "tenant-default",
+			serverResponseStatus: 200,
+			inputEntries:         []entry{logEntries[0], logEntries[1]},
+			expectedReqs: []receivedReq{
+				{
+					tenantID: "tenant-default",
+					pushReq:  logproto.PushRequest{Streams: []*logproto.Stream{{Labels: "{}", Entries: []logproto.Entry{logEntries[0].Entry, logEntries[1].Entry}}}},
+				},
+			},
+			expectedMetrics: `
+				# HELP promtail_sent_entries_total Number of log entries sent to the ingester.
+				# TYPE promtail_sent_entries_total counter
+				promtail_sent_entries_total{host="__HOST__"} 2.0
+			`,
+		},
+		"batch log entries together honoring the tenant ID overridden while processing the pipeline stages": {
+			clientBatchSize:      100,
+			clientBatchWait:      100 * time.Millisecond,
+			clientMaxRetries:     3,
+			clientTenantID:       "tenant-default",
+			serverResponseStatus: 200,
+			inputEntries:         []entry{logEntries[0], logEntries[3], logEntries[4], logEntries[5]},
+			expectedReqs: []receivedReq{
+				{
+					tenantID: "tenant-default",
+					pushReq:  logproto.PushRequest{Streams: []*logproto.Stream{{Labels: "{}", Entries: []logproto.Entry{logEntries[0].Entry}}}},
+				},
+				{
+					tenantID: "tenant-1",
+					pushReq:  logproto.PushRequest{Streams: []*logproto.Stream{{Labels: "{}", Entries: []logproto.Entry{logEntries[3].Entry, logEntries[4].Entry}}}},
+				},
+				{
+					tenantID: "tenant-2",
+					pushReq:  logproto.PushRequest{Streams: []*logproto.Stream{{Labels: "{}", Entries: []logproto.Entry{logEntries[5].Entry}}}},
+				},
+			},
+			expectedMetrics: `
+				# HELP promtail_sent_entries_total Number of log entries sent to the ingester.
+				# TYPE promtail_sent_entries_total counter
+				promtail_sent_entries_total{host="__HOST__"} 4.0
+			`,
+		},
 	}

 	for testName, testData := range tests {
@ -131,7 +194,7 @@ func TestClient_Handle(t *testing.T) {
 			droppedEntries.Reset()

 			// Create a buffer channel where we do enqueue received requests
-			receivedReqsChan := make(chan logproto.PushRequest, 10)
+			receivedReqsChan := make(chan receivedReq, 10)

 			// Start a local HTTP server
 			server := httptest.NewServer(createServerHandler(receivedReqsChan, testData.serverResponseStatus))
@ -152,6 +215,7 @@ func TestClient_Handle(t *testing.T) {
 				BackoffConfig:  util.BackoffConfig{MinBackoff: 1 * time.Millisecond, MaxBackoff: 2 * time.Millisecond, MaxRetries: testData.clientMaxRetries},
 				ExternalLabels: lokiflag.LabelSet{},
 				Timeout:        1 * time.Second,
+				TenantID:       testData.clientTenantID,
 			}

 			c, err := New(cfg, log.NewNopLogger())
@ -169,7 +233,7 @@ func TestClient_Handle(t *testing.T) {

 			// Wait until the expected push requests are received (with a timeout)
 			deadline := time.Now().Add(1 * time.Second)
-			for len(receivedReqsChan) < len(testData.expectedBatches) && time.Now().Before(deadline) {
+			for len(receivedReqsChan) < len(testData.expectedReqs) && time.Now().Before(deadline) {
 				time.Sleep(5 * time.Millisecond)
 			}

@ -178,15 +242,15 @@ func TestClient_Handle(t *testing.T) {
 			close(receivedReqsChan)

 			// Get all push requests received on the server side
-			receivedReqs := make([]logproto.PushRequest, 0)
+			receivedReqs := make([]receivedReq, 0)
 			for req := range receivedReqsChan {
 				receivedReqs = append(receivedReqs, req)
 			}

-			require.Equal(t, len(testData.expectedBatches), len(receivedReqs))
-			for i, batch := range receivedReqs {
-				assert.Equal(t, testData.expectedBatches[i], batch.Streams)
-			}
+			// Due to implementation details (maps iteration ordering is random) we just check
+			// that the expected requests are equal to the received requests, without checking
+			// the exact order which is not guaranteed in case of multi-tenant
+			require.ElementsMatch(t, testData.expectedReqs, receivedReqs)

 			expectedMetrics := strings.Replace(testData.expectedMetrics, "__HOST__", serverURL.Host, -1)
 			err = testutil.GatherAndCompare(prometheus.DefaultGatherer, strings.NewReader(expectedMetrics), "promtail_sent_entries_total", "promtail_dropped_entries_total")
@ -195,52 +259,7 @@ func TestClient_Handle(t *testing.T) {
 	}
 }

-func TestClient_encodeBatch(t *testing.T) {
-	t.Parallel()
-
-	tests := map[string]struct {
-		inputBatch           map[model.Fingerprint]*logproto.Stream
-		expectedEntriesCount int
-	}{
-		"empty batch": {
-			inputBatch:           map[model.Fingerprint]*logproto.Stream{},
-			expectedEntriesCount: 0,
-		},
-		"single stream with single log entry": {
-			inputBatch: map[model.Fingerprint]*logproto.Stream{
-				model.Fingerprint(1): {Labels: "{}", Entries: []logproto.Entry{logEntries[0].Entry}},
-			},
-			expectedEntriesCount: 1,
-		},
-		"single stream with multiple log entries": {
-			inputBatch: map[model.Fingerprint]*logproto.Stream{
-				model.Fingerprint(1): {Labels: "{}", Entries: []logproto.Entry{logEntries[0].Entry, logEntries[1].Entry}},
-			},
-			expectedEntriesCount: 2,
-		},
-		"multiple streams with multiple log entries": {
-			inputBatch: map[model.Fingerprint]*logproto.Stream{
-				model.Fingerprint(1): {Labels: "{}", Entries: []logproto.Entry{logEntries[0].Entry, logEntries[1].Entry}},
-				model.Fingerprint(2): {Labels: "{}", Entries: []logproto.Entry{logEntries[2].Entry}},
-			},
-			expectedEntriesCount: 3,
-		},
-	}
-
-	for testName, testData := range tests {
-		testData := testData
-
-		t.Run(testName, func(t *testing.T) {
-			t.Parallel()
-
-			_, entriesCount, err := encodeBatch(testData.inputBatch)
-			require.NoError(t, err)
-			assert.Equal(t, testData.expectedEntriesCount, entriesCount)
-		})
-	}
-}
-
-func createServerHandler(receivedReqsChan chan logproto.PushRequest, status int) http.HandlerFunc {
+func createServerHandler(receivedReqsChan chan receivedReq, status int) http.HandlerFunc {
 	return http.HandlerFunc(func(rw http.ResponseWriter, req *http.Request) {
 		// Parse the request
 		var pushReq logproto.PushRequest
@ -249,7 +268,11 @@ func createServerHandler(receivedReqsChan chan logproto.PushRequest, status int)
 			return
 		}

-		receivedReqsChan <- pushReq
+		receivedReqsChan <- receivedReq{
+			tenantID: req.Header.Get("X-Scope-OrgID"),
+			pushReq:  pushReq,
+		}
+
 		rw.WriteHeader(status)
 	})
 }
--- a/pkg/promtail/client/config.go
+++ b/pkg/promtail/client/config.go
@ -22,6 +22,10 @@ type Config struct {
 	// The labels to add to any time series or alerts when communicating with loki
 	ExternalLabels lokiflag.LabelSet `yaml:"external_labels,omitempty"`
 	Timeout        time.Duration     `yaml:"timeout"`
+
+	// The tenant ID to use when pushing logs to Loki (empty string means
+	// single tenant mode)
+	TenantID string `yaml:"tenant_id"`
 }

 // RegisterFlags registers flags.
@ -35,6 +39,8 @@ func (c *Config) RegisterFlags(flags *flag.FlagSet) {
 	flag.DurationVar(&c.BackoffConfig.MaxBackoff, "client.max-backoff", 5*time.Second, "Maximum backoff time between retries.")
 	flag.DurationVar(&c.Timeout, "client.timeout", 10*time.Second, "Maximum time to wait for server to respond to a request")
 	flags.Var(&c.ExternalLabels, "client.external-labels", "list of external labels to add to each log (e.g: --client.external-labels=lb1=v1,lb2=v2)")
+
+	flags.StringVar(&c.TenantID, "client.tenant-id", "", "Tenant ID to use when pushing logs to Loki.")
 }

 // UnmarshalYAML implement Yaml Unmarshaler
--- a/pkg/promtail/constants/labels.go
+++ b/pkg/promtail/constants/labels.go
@ -0,0 +1,7 @@
+package constants
+
+const (
+	// Label reserved to override the tenant ID while processing
+	// pipeline stages
+	ReservedLabelTenantID = "__tenant_id__"
+)