promtail: add multi-tenant support (#1135)

* promtail: added tenant_id support to promtail client

* promtail: added tenant stage to dinamically override the tenant ID

* promtail: documented client's batch struct and improved tenant stage config pointer usage

* Added static value support to tenant stage
pull/1239/head
Marco Pracucci 6 years ago committed by Cyril Tovena
parent 3d2c643bba
commit 04f58c880f
  1. 26
      docs/clients/promtail/configuration.md
  2. 1
      docs/clients/promtail/stages/README.md
  3. 3
      docs/clients/promtail/stages/match.md
  4. 80
      docs/clients/promtail/stages/tenant.md
  5. 6
      pkg/logentry/stages/stage.go
  6. 106
      pkg/logentry/stages/tenant.go
  7. 156
      pkg/logentry/stages/tenant_test.go
  8. 90
      pkg/promtail/client/batch.go
  9. 105
      pkg/promtail/client/batch_test.go
  10. 132
      pkg/promtail/client/client.go
  11. 157
      pkg/promtail/client/client_test.go
  12. 6
      pkg/promtail/client/config.go
  13. 7
      pkg/promtail/constants/labels.go

@ -21,6 +21,7 @@ and how to scrape logs from files.
* [metric_counter](#metric_counter)
* [metric_gauge](#metric_gauge)
* [metric_histogram](#metric_histogram)
* [tenant_stage](#tenant_stage)
* [journal_config](#journal_config)
* [relabel_config](#relabel_config)
* [static_config](#static_config)
@ -135,6 +136,11 @@ Loki:
# URL for the Distributor.
url: <string>
# The tenant ID used by default to push logs to Loki. If omitted or empty
# it assumes Loki is running in single-tenant mode and no X-Scope-OrgID header
# it sent.
[tenant_id: <string>]
# Maximum amount of time to wait before sending a batch, even if that
# batch isn't full.
[batchwait: <duration> | default = 1s]
@ -275,7 +281,8 @@ set of key-value pairs that is passed around from stage to stage.
<timestamp_stage> |
<output_stage> |
<labels_stage> |
<metrics_stage>
<metrics_stage> |
<tenant_stage>
]
```
@ -536,6 +543,23 @@ config:
- <int>
```
#### tenant_stage
The tenant stage is an action stage that sets the tenant ID for the log entry
picking it from a field in the extracted data map.
```yaml
tenant:
# Name from extracted data to whose value should be set as tenant ID.
# Either source or value config option is required, but not both (they
# are mutually exclusive).
[ source: <string> ]
# Value to use to set the tenant ID when this stage is executed. Useful
# when this stage is included within a conditional pipeline with "match".
[ value: <string> ]
```
### journal_config
The `journal_config` block configures reading from the systemd journal from

@ -20,6 +20,7 @@ Action stages:
* [output](./output.md): Set the log line text.
* [labels](./labels.md): Update the label set for the log entry.
* [metrics](./metrics.md): Calculate metrics based on extracted data.
* [tenant](./tenant.md): Set the tenant ID value to use for the log entry.
Filtering stages:

@ -32,7 +32,8 @@ match:
<timestamp_stage> |
<output_stage> |
<labels_stage> |
<metrics_stage>
<metrics_stage> |
<tenant_stage>
]
```

@ -0,0 +1,80 @@
# `tenant` stage
The tenant stage is an action stage that sets the tenant ID for the log entry
picking it from a field in the extracted data map. If the field is missing, the
default promtail client [`tenant_id`](../configuration.md#client_config) will
be used.
## Schema
```yaml
tenant:
# Name from extracted data to whose value should be set as tenant ID.
# Either source or value config option is required, but not both (they
# are mutually exclusive).
[ source: <string> ]
# Value to use to set the tenant ID when this stage is executed. Useful
# when this stage is included within a conditional pipeline with "match".
[ value: <string> ]
```
### Example: extract the tenant ID from a structured log
For the given pipeline:
```yaml
pipeline_stages:
- json:
expressions:
customer_id: customer_id
- tenant:
source: customer_id
```
Given the following log line:
```json
{"customer_id":"1","log":"log message\n","stream":"stderr","time":"2019-04-30T02:12:41.8443515Z"}
```
The first stage would extract `customer_id` into the extracted map with a value of
`1`. The tenant stage would set the `X-Scope-OrgID` request header (used by Loki to
identify the tenant) to the value of the `customer_id` extracted data, which is `1`.
### Example: override the tenant ID with the configured value
For the given pipeline:
```yaml
pipeline_stages:
- json:
expressions:
app:
message:
- labels:
app:
- match:
selector: '{app="api"}'
stages:
- tenant:
value: "team-api"
- output:
source: message
```
Given the following log line:
```json
{"app":"api","log":"log message\n","stream":"stderr","time":"2019-04-30T02:12:41.8443515Z"}
```
The pipeline would:
1. Decode the JSON log
2. Set the label `app="api"`
3. Process the `match` stage checking if the `{app="api"}` selector matches
and - whenever it matches - run the sub stages. The `tenant` sub stage
would override the tenant with the value `"team-api"`.

@ -21,6 +21,7 @@ const (
StageTypeMatch = "match"
StageTypeTemplate = "template"
StageTypePipeline = "pipeline"
StageTypeTenant = "tenant"
)
// Stage takes an existing set of labels, timestamp and log entry and returns either a possibly mutated
@ -94,6 +95,11 @@ func New(logger log.Logger, jobName *string, stageType string,
if err != nil {
return nil, err
}
case StageTypeTenant:
s, err = newTenantStage(logger, cfg)
if err != nil {
return nil, err
}
default:
return nil, errors.Errorf("Unknown stage type: %s", stageType)
}

@ -0,0 +1,106 @@
package stages
import (
"reflect"
"time"
"github.com/go-kit/kit/log"
"github.com/go-kit/kit/log/level"
"github.com/grafana/loki/pkg/promtail/constants"
"github.com/mitchellh/mapstructure"
"github.com/pkg/errors"
"github.com/prometheus/common/model"
)
const (
ErrTenantStageEmptySourceOrValue = "source or value config are required"
ErrTenantStageConflictingSourceAndValue = "source and value are mutually exclusive: you should set source or value but not both"
)
type tenantStage struct {
cfg TenantConfig
logger log.Logger
}
type TenantConfig struct {
Source string `mapstructure:"source"`
Value string `mapstructure:"value"`
}
// validateTenantConfig validates the tenant stage configuration
func validateTenantConfig(c TenantConfig) error {
if c.Source == "" && c.Value == "" {
return errors.New(ErrTenantStageEmptySourceOrValue)
}
if c.Source != "" && c.Value != "" {
return errors.New(ErrTenantStageConflictingSourceAndValue)
}
return nil
}
// newTenantStage creates a new tenant stage to override the tenant ID from extracted data
func newTenantStage(logger log.Logger, configs interface{}) (*tenantStage, error) {
cfg := TenantConfig{}
err := mapstructure.Decode(configs, &cfg)
if err != nil {
return nil, err
}
err = validateTenantConfig(cfg)
if err != nil {
return nil, err
}
return &tenantStage{
cfg: cfg,
logger: logger,
}, nil
}
// Process implements Stage
func (s *tenantStage) Process(labels model.LabelSet, extracted map[string]interface{}, t *time.Time, entry *string) {
var tenantID string
// Get tenant ID from source or configured value
if s.cfg.Source != "" {
tenantID = s.getTenantFromSourceField(extracted)
} else {
tenantID = s.cfg.Value
}
// Skip an empty tenant ID (ie. failed to get the tenant from the source)
if tenantID == "" {
return
}
labels[constants.ReservedLabelTenantID] = model.LabelValue(tenantID)
}
// Name implements Stage
func (s *tenantStage) Name() string {
return StageTypeTenant
}
func (s *tenantStage) getTenantFromSourceField(extracted map[string]interface{}) string {
// Get the tenant ID from the source data
value, ok := extracted[s.cfg.Source]
if !ok {
if Debug {
level.Debug(s.logger).Log("msg", "the tenant source does not exist in the extracted data", "source", s.cfg.Source)
}
return ""
}
// Convert the value to string
tenantID, err := getString(value)
if err != nil {
if Debug {
level.Debug(s.logger).Log("msg", "failed to convert value to string", "err", err, "type", reflect.TypeOf(value).String())
}
return ""
}
return tenantID
}

@ -0,0 +1,156 @@
package stages
import (
"testing"
"time"
"github.com/cortexproject/cortex/pkg/util"
"github.com/grafana/loki/pkg/promtail/constants"
lokiutil "github.com/grafana/loki/pkg/util"
"github.com/prometheus/common/model"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestTenantStage_Validation(t *testing.T) {
t.Parallel()
tests := map[string]struct {
config *TenantConfig
expectedErr *string
}{
"should pass on source config option set": {
config: &TenantConfig{
Source: "tenant",
},
expectedErr: nil,
},
"should pass on value config option set": {
config: &TenantConfig{
Value: "team-a",
},
expectedErr: nil,
},
"should fail on missing source and value": {
config: &TenantConfig{},
expectedErr: lokiutil.StringRef(ErrTenantStageEmptySourceOrValue),
},
"should fail on empty source": {
config: &TenantConfig{
Source: "",
},
expectedErr: lokiutil.StringRef(ErrTenantStageEmptySourceOrValue),
},
"should fail on empty value": {
config: &TenantConfig{
Value: "",
},
expectedErr: lokiutil.StringRef(ErrTenantStageEmptySourceOrValue),
},
"should fail on both source and value set": {
config: &TenantConfig{
Source: "tenant",
Value: "team-a",
},
expectedErr: lokiutil.StringRef(ErrTenantStageConflictingSourceAndValue),
},
}
for testName, testData := range tests {
testData := testData
t.Run(testName, func(t *testing.T) {
stage, err := newTenantStage(util.Logger, testData.config)
if testData.expectedErr != nil {
assert.EqualError(t, err, *testData.expectedErr)
assert.Nil(t, stage)
} else {
assert.NoError(t, err)
assert.NotNil(t, stage)
}
})
}
}
func TestTenantStage_Process(t *testing.T) {
t.Parallel()
tests := map[string]struct {
config *TenantConfig
inputLabels model.LabelSet
inputExtracted map[string]interface{}
expectedTenant *string
}{
"should not set the tenant if the source field is not defined in the extracted map": {
config: &TenantConfig{Source: "tenant_id"},
inputLabels: model.LabelSet{},
inputExtracted: map[string]interface{}{},
expectedTenant: nil,
},
"should not override the tenant if the source field is not defined in the extracted map": {
config: &TenantConfig{Source: "tenant_id"},
inputLabels: model.LabelSet{constants.ReservedLabelTenantID: "foo"},
inputExtracted: map[string]interface{}{},
expectedTenant: lokiutil.StringRef("foo"),
},
"should set the tenant if the source field is defined in the extracted map": {
config: &TenantConfig{Source: "tenant_id"},
inputLabels: model.LabelSet{},
inputExtracted: map[string]interface{}{"tenant_id": "bar"},
expectedTenant: lokiutil.StringRef("bar"),
},
"should override the tenant if the source field is defined in the extracted map": {
config: &TenantConfig{Source: "tenant_id"},
inputLabels: model.LabelSet{constants.ReservedLabelTenantID: "foo"},
inputExtracted: map[string]interface{}{"tenant_id": "bar"},
expectedTenant: lokiutil.StringRef("bar"),
},
"should not set the tenant if the source field data type can't be converted to string": {
config: &TenantConfig{Source: "tenant_id"},
inputLabels: model.LabelSet{},
inputExtracted: map[string]interface{}{"tenant_id": []string{"bar"}},
expectedTenant: nil,
},
"should set the tenant with the configured static value": {
config: &TenantConfig{Value: "bar"},
inputLabels: model.LabelSet{},
inputExtracted: map[string]interface{}{},
expectedTenant: lokiutil.StringRef("bar"),
},
"should override the tenant with the configured static value": {
config: &TenantConfig{Value: "bar"},
inputLabels: model.LabelSet{constants.ReservedLabelTenantID: "foo"},
inputExtracted: map[string]interface{}{},
expectedTenant: lokiutil.StringRef("bar"),
},
}
for testName, testData := range tests {
testData := testData
t.Run(testName, func(t *testing.T) {
stage, err := newTenantStage(util.Logger, testData.config)
require.NoError(t, err)
// Process and dummy line and ensure nothing has changed except
// the tenant reserved label
timestamp := time.Unix(1, 1)
entry := "hello world"
labels := testData.inputLabels.Clone()
extracted := testData.inputExtracted
stage.Process(labels, extracted, &timestamp, &entry)
assert.Equal(t, time.Unix(1, 1), timestamp)
assert.Equal(t, "hello world", entry)
actualTenant, ok := labels[constants.ReservedLabelTenantID]
if testData.expectedTenant == nil {
assert.False(t, ok)
} else {
assert.Equal(t, *testData.expectedTenant, string(actualTenant))
}
})
}
}

@ -0,0 +1,90 @@
package client
import (
"time"
"github.com/gogo/protobuf/proto"
"github.com/golang/snappy"
"github.com/grafana/loki/pkg/logproto"
"github.com/prometheus/common/model"
)
// batch holds pending log streams waiting to be sent to Loki, and it's used
// to reduce the number of push requests to Loki aggregating multiple log streams
// and entries in a single batch request. In case of multi-tenant Promtail, log
// streams for each tenant are stored in a dedicated batch.
type batch struct {
streams map[model.Fingerprint]*logproto.Stream
bytes int
createdAt time.Time
}
func newBatch(entries ...entry) *batch {
b := &batch{
streams: map[model.Fingerprint]*logproto.Stream{},
bytes: 0,
createdAt: time.Now(),
}
// Add entries to the batch
for _, entry := range entries {
b.add(entry)
}
return b
}
// add an entry to the batch
func (b *batch) add(entry entry) {
b.bytes += len(entry.Line)
// Append the entry to an already existing stream (if any)
fp := entry.labels.FastFingerprint()
if stream, ok := b.streams[fp]; ok {
stream.Entries = append(stream.Entries, entry.Entry)
return
}
// Add the entry as a new stream
b.streams[fp] = &logproto.Stream{
Labels: entry.labels.String(),
Entries: []logproto.Entry{entry.Entry},
}
}
// sizeBytes returns the current batch size in bytes
func (b *batch) sizeBytes() int {
return b.bytes
}
// sizeBytesAfter returns the size of the batch after the input entry
// will be added to the batch itself
func (b *batch) sizeBytesAfter(entry entry) int {
return b.bytes + len(entry.Line)
}
// age of the batch since its creation
func (b *batch) age() time.Duration {
return time.Since(b.createdAt)
}
// encode the batch as snappy-compressed push request, and returns
// the encoded bytes and the number of encoded entries
func (b *batch) encode() ([]byte, int, error) {
req := logproto.PushRequest{
Streams: make([]*logproto.Stream, 0, len(b.streams)),
}
entriesCount := 0
for _, stream := range b.streams {
req.Streams = append(req.Streams, stream)
entriesCount += len(stream.Entries)
}
buf, err := proto.Marshal(&req)
if err != nil {
return nil, 0, err
}
buf = snappy.Encode(nil, buf)
return buf, entriesCount, nil
}

@ -0,0 +1,105 @@
package client
import (
"testing"
"github.com/prometheus/common/model"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestBatch_add(t *testing.T) {
t.Parallel()
tests := map[string]struct {
inputEntries []entry
expectedSizeBytes int
}{
"empty batch": {
inputEntries: []entry{},
expectedSizeBytes: 0,
},
"single stream with single log entry": {
inputEntries: []entry{
{"tenant", model.LabelSet{}, logEntries[0].Entry},
},
expectedSizeBytes: len(logEntries[0].Entry.Line),
},
"single stream with multiple log entries": {
inputEntries: []entry{
{"tenant", model.LabelSet{}, logEntries[0].Entry},
{"tenant", model.LabelSet{}, logEntries[1].Entry},
},
expectedSizeBytes: len(logEntries[0].Entry.Line) + len(logEntries[1].Entry.Line),
},
"multiple streams with multiple log entries": {
inputEntries: []entry{
{"tenant", model.LabelSet{"type": "a"}, logEntries[0].Entry},
{"tenant", model.LabelSet{"type": "a"}, logEntries[1].Entry},
{"tenant", model.LabelSet{"type": "b"}, logEntries[2].Entry},
},
expectedSizeBytes: len(logEntries[0].Entry.Line) + len(logEntries[1].Entry.Line) + len(logEntries[2].Entry.Line),
},
}
for testName, testData := range tests {
testData := testData
t.Run(testName, func(t *testing.T) {
b := newBatch()
for _, entry := range testData.inputEntries {
b.add(entry)
}
assert.Equal(t, testData.expectedSizeBytes, b.sizeBytes())
})
}
}
func TestBatch_encode(t *testing.T) {
t.Parallel()
tests := map[string]struct {
inputBatch *batch
expectedEntriesCount int
}{
"empty batch": {
inputBatch: newBatch(),
expectedEntriesCount: 0,
},
"single stream with single log entry": {
inputBatch: newBatch(
entry{"tenant", model.LabelSet{}, logEntries[0].Entry},
),
expectedEntriesCount: 1,
},
"single stream with multiple log entries": {
inputBatch: newBatch(
entry{"tenant", model.LabelSet{}, logEntries[0].Entry},
entry{"tenant", model.LabelSet{}, logEntries[1].Entry},
),
expectedEntriesCount: 2,
},
"multiple streams with multiple log entries": {
inputBatch: newBatch(
entry{"tenant", model.LabelSet{"type": "a"}, logEntries[0].Entry},
entry{"tenant", model.LabelSet{"type": "a"}, logEntries[1].Entry},
entry{"tenant", model.LabelSet{"type": "b"}, logEntries[2].Entry},
),
expectedEntriesCount: 3,
},
}
for testName, testData := range tests {
testData := testData
t.Run(testName, func(t *testing.T) {
t.Parallel()
_, entriesCount, err := testData.inputBatch.encode()
require.NoError(t, err)
assert.Equal(t, testData.expectedEntriesCount, entriesCount)
})
}
}

@ -12,12 +12,11 @@ import (
"time"
"github.com/grafana/loki/pkg/promtail/api"
"github.com/grafana/loki/pkg/promtail/constants"
"github.com/cortexproject/cortex/pkg/util"
"github.com/go-kit/kit/log"
"github.com/go-kit/kit/log/level"
"github.com/gogo/protobuf/proto"
"github.com/golang/snappy"
"github.com/grafana/loki/pkg/helpers"
"github.com/grafana/loki/pkg/logproto"
@ -92,7 +91,8 @@ type client struct {
}
type entry struct {
labels model.LabelSet
tenantID string
labels model.LabelSet
logproto.Entry
}
@ -125,13 +125,26 @@ func New(cfg Config, logger log.Logger) (Client, error) {
}
func (c *client) run() {
batch := map[model.Fingerprint]*logproto.Stream{}
batchSize := 0
maxWait := time.NewTicker(c.cfg.BatchWait)
batches := map[string]*batch{}
// Given the client handles multiple batches (1 per tenant) and each batch
// can be created at a different point in time, we look for batches whose
// max wait time has been reached every 10 times per BatchWait, so that the
// maximum delay we have sending batches is 10% of the max waiting time.
// We apply a cap of 10ms to the ticker, to avoid too frequent checks in
// case the BatchWait is very low.
minWaitCheckFrequency := 10 * time.Millisecond
maxWaitCheckFrequency := c.cfg.BatchWait / 10
if maxWaitCheckFrequency < minWaitCheckFrequency {
maxWaitCheckFrequency = minWaitCheckFrequency
}
maxWaitCheck := time.NewTicker(maxWaitCheckFrequency)
defer func() {
if len(batch) > 0 {
c.sendBatch(batch)
// Send all pending batches
for tenantID, batch := range batches {
c.sendBatch(tenantID, batch)
}
c.wg.Done()
@ -143,35 +156,42 @@ func (c *client) run() {
return
case e := <-c.entries:
if batchSize+len(e.Line) > c.cfg.BatchSize {
c.sendBatch(batch)
batchSize = 0
batch = map[model.Fingerprint]*logproto.Stream{}
}
batch, ok := batches[e.tenantID]
batchSize += len(e.Line)
fp := e.labels.FastFingerprint()
stream, ok := batch[fp]
// If the batch doesn't exist yet, we create a new one with the entry
if !ok {
stream = &logproto.Stream{
Labels: e.labels.String(),
}
batch[fp] = stream
batches[e.tenantID] = newBatch(e)
break
}
// If adding the entry to the batch will increase the size over the max
// size allowed, we do send the current batch and then create a new one
if batch.sizeBytesAfter(e) > c.cfg.BatchSize {
c.sendBatch(e.tenantID, batch)
batches[e.tenantID] = newBatch(e)
break
}
stream.Entries = append(stream.Entries, e.Entry)
case <-maxWait.C:
if len(batch) > 0 {
c.sendBatch(batch)
batchSize = 0
batch = map[model.Fingerprint]*logproto.Stream{}
// The max size of the batch isn't reached, so we can add the entry
batch.add(e)
case <-maxWaitCheck.C:
// Send all batches whose max wait time has been reached
for tenantID, batch := range batches {
if batch.age() < c.cfg.BatchWait {
continue
}
c.sendBatch(tenantID, batch)
delete(batches, tenantID)
}
}
}
}
func (c *client) sendBatch(batch map[model.Fingerprint]*logproto.Stream) {
buf, entriesCount, err := encodeBatch(batch)
func (c *client) sendBatch(tenantID string, batch *batch) {
buf, entriesCount, err := batch.encode()
if err != nil {
level.Error(c.logger).Log("msg", "error encoding batch", "error", err)
return
@ -184,7 +204,7 @@ func (c *client) sendBatch(batch map[model.Fingerprint]*logproto.Stream) {
var status int
for backoff.Ongoing() {
start := time.Now()
status, err = c.send(ctx, buf)
status, err = c.send(ctx, tenantID, buf)
requestDuration.WithLabelValues(strconv.Itoa(status), c.cfg.URL.Host).Observe(time.Since(start).Seconds())
if err == nil {
@ -209,26 +229,7 @@ func (c *client) sendBatch(batch map[model.Fingerprint]*logproto.Stream) {
}
}
func encodeBatch(batch map[model.Fingerprint]*logproto.Stream) ([]byte, int, error) {
req := logproto.PushRequest{
Streams: make([]*logproto.Stream, 0, len(batch)),
}
entriesCount := 0
for _, stream := range batch {
req.Streams = append(req.Streams, stream)
entriesCount += len(stream.Entries)
}
buf, err := proto.Marshal(&req)
if err != nil {
return nil, 0, err
}
buf = snappy.Encode(nil, buf)
return buf, entriesCount, nil
}
func (c *client) send(ctx context.Context, buf []byte) (int, error) {
func (c *client) send(ctx context.Context, tenantID string, buf []byte) (int, error) {
ctx, cancel := context.WithTimeout(ctx, c.cfg.Timeout)
defer cancel()
req, err := http.NewRequest("POST", c.cfg.URL.String(), bytes.NewReader(buf))
@ -238,6 +239,12 @@ func (c *client) send(ctx context.Context, buf []byte) (int, error) {
req = req.WithContext(ctx)
req.Header.Set("Content-Type", contentType)
// If the tenant ID is not empty promtail is running in multi-tenant mode, so
// we should send it to Loki
if tenantID != "" {
req.Header.Set("X-Scope-OrgID", tenantID)
}
resp, err := c.client.Do(req)
if err != nil {
return -1, err
@ -255,6 +262,22 @@ func (c *client) send(ctx context.Context, buf []byte) (int, error) {
return resp.StatusCode, err
}
func (c *client) getTenantID(labels model.LabelSet) string {
// Check if it has been overridden while processing the pipeline stages
if value, ok := labels[constants.ReservedLabelTenantID]; ok {
return string(value)
}
// Check if has been specified in the config
if c.cfg.TenantID != "" {
return c.cfg.TenantID
}
// Defaults to an empty string, which means the X-Scope-OrgID header
// will not be sent
return ""
}
// Stop the client.
func (c *client) Stop() {
c.once.Do(func() { close(c.quit) })
@ -267,7 +290,16 @@ func (c *client) Handle(ls model.LabelSet, t time.Time, s string) error {
ls = c.externalLabels.Merge(ls)
}
c.entries <- entry{ls, logproto.Entry{
// Get the tenant ID in case it has been overridden while processing
// the pipeline stages, then remove the special label
tenantID := c.getTenantID(ls)
if _, ok := ls[constants.ReservedLabelTenantID]; ok {
// Clone the label set to not manipulate the input one
ls = ls.Clone()
delete(ls, constants.ReservedLabelTenantID)
}
c.entries <- entry{tenantID, ls, logproto.Entry{
Timestamp: t,
Line: s,
}}

@ -27,18 +27,27 @@ var (
{labels: model.LabelSet{}, Entry: logproto.Entry{Timestamp: time.Unix(1, 0).UTC(), Line: "line1"}},
{labels: model.LabelSet{}, Entry: logproto.Entry{Timestamp: time.Unix(2, 0).UTC(), Line: "line2"}},
{labels: model.LabelSet{}, Entry: logproto.Entry{Timestamp: time.Unix(3, 0).UTC(), Line: "line3"}},
{labels: model.LabelSet{"__tenant_id__": "tenant-1"}, Entry: logproto.Entry{Timestamp: time.Unix(4, 0).UTC(), Line: "line4"}},
{labels: model.LabelSet{"__tenant_id__": "tenant-1"}, Entry: logproto.Entry{Timestamp: time.Unix(5, 0).UTC(), Line: "line5"}},
{labels: model.LabelSet{"__tenant_id__": "tenant-2"}, Entry: logproto.Entry{Timestamp: time.Unix(6, 0).UTC(), Line: "line6"}},
}
)
type receivedReq struct {
tenantID string
pushReq logproto.PushRequest
}
func TestClient_Handle(t *testing.T) {
tests := map[string]struct {
clientBatchSize int
clientBatchWait time.Duration
clientMaxRetries int
clientTenantID string
serverResponseStatus int
inputEntries []entry
inputDelay time.Duration
expectedBatches [][]*logproto.Stream
expectedReqs []receivedReq
expectedMetrics string
}{
"batch log entries together until the batch size is reached": {
@ -47,12 +56,14 @@ func TestClient_Handle(t *testing.T) {
clientMaxRetries: 3,
serverResponseStatus: 200,
inputEntries: []entry{logEntries[0], logEntries[1], logEntries[2]},
expectedBatches: [][]*logproto.Stream{
expectedReqs: []receivedReq{
{
{Labels: "{}", Entries: []logproto.Entry{logEntries[0].Entry, logEntries[1].Entry}},
tenantID: "",
pushReq: logproto.PushRequest{Streams: []*logproto.Stream{{Labels: "{}", Entries: []logproto.Entry{logEntries[0].Entry, logEntries[1].Entry}}}},
},
{
{Labels: "{}", Entries: []logproto.Entry{logEntries[2].Entry}},
tenantID: "",
pushReq: logproto.PushRequest{Streams: []*logproto.Stream{{Labels: "{}", Entries: []logproto.Entry{logEntries[2].Entry}}}},
},
},
expectedMetrics: `
@ -68,12 +79,14 @@ func TestClient_Handle(t *testing.T) {
serverResponseStatus: 200,
inputEntries: []entry{logEntries[0], logEntries[1]},
inputDelay: 110 * time.Millisecond,
expectedBatches: [][]*logproto.Stream{
expectedReqs: []receivedReq{
{
{Labels: "{}", Entries: []logproto.Entry{logEntries[0].Entry}},
tenantID: "",
pushReq: logproto.PushRequest{Streams: []*logproto.Stream{{Labels: "{}", Entries: []logproto.Entry{logEntries[0].Entry}}}},
},
{
{Labels: "{}", Entries: []logproto.Entry{logEntries[1].Entry}},
tenantID: "",
pushReq: logproto.PushRequest{Streams: []*logproto.Stream{{Labels: "{}", Entries: []logproto.Entry{logEntries[1].Entry}}}},
},
},
expectedMetrics: `
@ -88,15 +101,18 @@ func TestClient_Handle(t *testing.T) {
clientMaxRetries: 3,
serverResponseStatus: 500,
inputEntries: []entry{logEntries[0]},
expectedBatches: [][]*logproto.Stream{
expectedReqs: []receivedReq{
{
{Labels: "{}", Entries: []logproto.Entry{logEntries[0].Entry}},
tenantID: "",
pushReq: logproto.PushRequest{Streams: []*logproto.Stream{{Labels: "{}", Entries: []logproto.Entry{logEntries[0].Entry}}}},
},
{
{Labels: "{}", Entries: []logproto.Entry{logEntries[0].Entry}},
tenantID: "",
pushReq: logproto.PushRequest{Streams: []*logproto.Stream{{Labels: "{}", Entries: []logproto.Entry{logEntries[0].Entry}}}},
},
{
{Labels: "{}", Entries: []logproto.Entry{logEntries[0].Entry}},
tenantID: "",
pushReq: logproto.PushRequest{Streams: []*logproto.Stream{{Labels: "{}", Entries: []logproto.Entry{logEntries[0].Entry}}}},
},
},
expectedMetrics: `
@ -111,9 +127,10 @@ func TestClient_Handle(t *testing.T) {
clientMaxRetries: 3,
serverResponseStatus: 400,
inputEntries: []entry{logEntries[0]},
expectedBatches: [][]*logproto.Stream{
expectedReqs: []receivedReq{
{
{Labels: "{}", Entries: []logproto.Entry{logEntries[0].Entry}},
tenantID: "",
pushReq: logproto.PushRequest{Streams: []*logproto.Stream{{Labels: "{}", Entries: []logproto.Entry{logEntries[0].Entry}}}},
},
},
expectedMetrics: `
@ -122,6 +139,52 @@ func TestClient_Handle(t *testing.T) {
promtail_dropped_entries_total{host="__HOST__"} 1.0
`,
},
"batch log entries together honoring the client tenant ID": {
clientBatchSize: 100,
clientBatchWait: 100 * time.Millisecond,
clientMaxRetries: 3,
clientTenantID: "tenant-default",
serverResponseStatus: 200,
inputEntries: []entry{logEntries[0], logEntries[1]},
expectedReqs: []receivedReq{
{
tenantID: "tenant-default",
pushReq: logproto.PushRequest{Streams: []*logproto.Stream{{Labels: "{}", Entries: []logproto.Entry{logEntries[0].Entry, logEntries[1].Entry}}}},
},
},
expectedMetrics: `
# HELP promtail_sent_entries_total Number of log entries sent to the ingester.
# TYPE promtail_sent_entries_total counter
promtail_sent_entries_total{host="__HOST__"} 2.0
`,
},
"batch log entries together honoring the tenant ID overridden while processing the pipeline stages": {
clientBatchSize: 100,
clientBatchWait: 100 * time.Millisecond,
clientMaxRetries: 3,
clientTenantID: "tenant-default",
serverResponseStatus: 200,
inputEntries: []entry{logEntries[0], logEntries[3], logEntries[4], logEntries[5]},
expectedReqs: []receivedReq{
{
tenantID: "tenant-default",
pushReq: logproto.PushRequest{Streams: []*logproto.Stream{{Labels: "{}", Entries: []logproto.Entry{logEntries[0].Entry}}}},
},
{
tenantID: "tenant-1",
pushReq: logproto.PushRequest{Streams: []*logproto.Stream{{Labels: "{}", Entries: []logproto.Entry{logEntries[3].Entry, logEntries[4].Entry}}}},
},
{
tenantID: "tenant-2",
pushReq: logproto.PushRequest{Streams: []*logproto.Stream{{Labels: "{}", Entries: []logproto.Entry{logEntries[5].Entry}}}},
},
},
expectedMetrics: `
# HELP promtail_sent_entries_total Number of log entries sent to the ingester.
# TYPE promtail_sent_entries_total counter
promtail_sent_entries_total{host="__HOST__"} 4.0
`,
},
}
for testName, testData := range tests {
@ -131,7 +194,7 @@ func TestClient_Handle(t *testing.T) {
droppedEntries.Reset()
// Create a buffer channel where we do enqueue received requests
receivedReqsChan := make(chan logproto.PushRequest, 10)
receivedReqsChan := make(chan receivedReq, 10)
// Start a local HTTP server
server := httptest.NewServer(createServerHandler(receivedReqsChan, testData.serverResponseStatus))
@ -152,6 +215,7 @@ func TestClient_Handle(t *testing.T) {
BackoffConfig: util.BackoffConfig{MinBackoff: 1 * time.Millisecond, MaxBackoff: 2 * time.Millisecond, MaxRetries: testData.clientMaxRetries},
ExternalLabels: lokiflag.LabelSet{},
Timeout: 1 * time.Second,
TenantID: testData.clientTenantID,
}
c, err := New(cfg, log.NewNopLogger())
@ -169,7 +233,7 @@ func TestClient_Handle(t *testing.T) {
// Wait until the expected push requests are received (with a timeout)
deadline := time.Now().Add(1 * time.Second)
for len(receivedReqsChan) < len(testData.expectedBatches) && time.Now().Before(deadline) {
for len(receivedReqsChan) < len(testData.expectedReqs) && time.Now().Before(deadline) {
time.Sleep(5 * time.Millisecond)
}
@ -178,15 +242,15 @@ func TestClient_Handle(t *testing.T) {
close(receivedReqsChan)
// Get all push requests received on the server side
receivedReqs := make([]logproto.PushRequest, 0)
receivedReqs := make([]receivedReq, 0)
for req := range receivedReqsChan {
receivedReqs = append(receivedReqs, req)
}
require.Equal(t, len(testData.expectedBatches), len(receivedReqs))
for i, batch := range receivedReqs {
assert.Equal(t, testData.expectedBatches[i], batch.Streams)
}
// Due to implementation details (maps iteration ordering is random) we just check
// that the expected requests are equal to the received requests, without checking
// the exact order which is not guaranteed in case of multi-tenant
require.ElementsMatch(t, testData.expectedReqs, receivedReqs)
expectedMetrics := strings.Replace(testData.expectedMetrics, "__HOST__", serverURL.Host, -1)
err = testutil.GatherAndCompare(prometheus.DefaultGatherer, strings.NewReader(expectedMetrics), "promtail_sent_entries_total", "promtail_dropped_entries_total")
@ -195,52 +259,7 @@ func TestClient_Handle(t *testing.T) {
}
}
func TestClient_encodeBatch(t *testing.T) {
t.Parallel()
tests := map[string]struct {
inputBatch map[model.Fingerprint]*logproto.Stream
expectedEntriesCount int
}{
"empty batch": {
inputBatch: map[model.Fingerprint]*logproto.Stream{},
expectedEntriesCount: 0,
},
"single stream with single log entry": {
inputBatch: map[model.Fingerprint]*logproto.Stream{
model.Fingerprint(1): {Labels: "{}", Entries: []logproto.Entry{logEntries[0].Entry}},
},
expectedEntriesCount: 1,
},
"single stream with multiple log entries": {
inputBatch: map[model.Fingerprint]*logproto.Stream{
model.Fingerprint(1): {Labels: "{}", Entries: []logproto.Entry{logEntries[0].Entry, logEntries[1].Entry}},
},
expectedEntriesCount: 2,
},
"multiple streams with multiple log entries": {
inputBatch: map[model.Fingerprint]*logproto.Stream{
model.Fingerprint(1): {Labels: "{}", Entries: []logproto.Entry{logEntries[0].Entry, logEntries[1].Entry}},
model.Fingerprint(2): {Labels: "{}", Entries: []logproto.Entry{logEntries[2].Entry}},
},
expectedEntriesCount: 3,
},
}
for testName, testData := range tests {
testData := testData
t.Run(testName, func(t *testing.T) {
t.Parallel()
_, entriesCount, err := encodeBatch(testData.inputBatch)
require.NoError(t, err)
assert.Equal(t, testData.expectedEntriesCount, entriesCount)
})
}
}
func createServerHandler(receivedReqsChan chan logproto.PushRequest, status int) http.HandlerFunc {
func createServerHandler(receivedReqsChan chan receivedReq, status int) http.HandlerFunc {
return http.HandlerFunc(func(rw http.ResponseWriter, req *http.Request) {
// Parse the request
var pushReq logproto.PushRequest
@ -249,7 +268,11 @@ func createServerHandler(receivedReqsChan chan logproto.PushRequest, status int)
return
}
receivedReqsChan <- pushReq
receivedReqsChan <- receivedReq{
tenantID: req.Header.Get("X-Scope-OrgID"),
pushReq: pushReq,
}
rw.WriteHeader(status)
})
}

@ -22,6 +22,10 @@ type Config struct {
// The labels to add to any time series or alerts when communicating with loki
ExternalLabels lokiflag.LabelSet `yaml:"external_labels,omitempty"`
Timeout time.Duration `yaml:"timeout"`
// The tenant ID to use when pushing logs to Loki (empty string means
// single tenant mode)
TenantID string `yaml:"tenant_id"`
}
// RegisterFlags registers flags.
@ -35,6 +39,8 @@ func (c *Config) RegisterFlags(flags *flag.FlagSet) {
flag.DurationVar(&c.BackoffConfig.MaxBackoff, "client.max-backoff", 5*time.Second, "Maximum backoff time between retries.")
flag.DurationVar(&c.Timeout, "client.timeout", 10*time.Second, "Maximum time to wait for server to respond to a request")
flags.Var(&c.ExternalLabels, "client.external-labels", "list of external labels to add to each log (e.g: --client.external-labels=lb1=v1,lb2=v2)")
flags.StringVar(&c.TenantID, "client.tenant-id", "", "Tenant ID to use when pushing logs to Loki.")
}
// UnmarshalYAML implement Yaml Unmarshaler

@ -0,0 +1,7 @@
package constants
const (
// Label reserved to override the tenant ID while processing
// pipeline stages
ReservedLabelTenantID = "__tenant_id__"
)
Loading…
Cancel
Save