Promtail: (and also fluent-bit) change the max batch size to 1MB (#2710)

* change the max batch size to 1MB for all the defaults including helm and fluent-bit, attempt to centralize this config a little where possible.

* fix test
pull/2716/head
Ed Welch 5 years ago committed by GitHub
parent 9e6afea5f6
commit d3bf21e774
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 16
      cmd/docker-driver/config.go
  2. 34
      pkg/promtail/client/config.go
  3. 12
      pkg/promtail/client/config_test.go
  4. 2
      production/helm/fluent-bit/Chart.yaml
  5. 2
      production/helm/fluent-bit/values.yaml
  6. 2
      production/helm/loki-stack/Chart.yaml
  7. 2
      production/helm/promtail/Chart.yaml
  8. 8
      production/helm/promtail/values.yaml

@ -67,14 +67,14 @@ const (
var (
defaultClientConfig = client.Config{
BatchWait: 1 * time.Second,
BatchSize: 100 * 1024,
BatchWait: client.BatchWait,
BatchSize: client.BatchSize,
BackoffConfig: cortex_util.BackoffConfig{
MinBackoff: 100 * time.Millisecond,
MaxBackoff: 10 * time.Second,
MaxRetries: 10,
MinBackoff: client.MinBackoff,
MaxBackoff: client.MaxBackoff,
MaxRetries: client.MaxRetries,
},
Timeout: 10 * time.Second,
Timeout: client.Timeout,
}
)
@ -242,8 +242,8 @@ func parseConfig(logCtx logger.Info) (*config, error) {
// other labels coming from docker labels or env selected by user labels, labels-regex, env, env-regex config.
attrs, err := logCtx.ExtraAttributes(func(label string) string {
return strings.ReplaceAll(strings.ReplaceAll(label, "-", "_"), ".", "_")
})
return strings.ReplaceAll(strings.ReplaceAll(label, "-", "_"), ".", "_")
})
if err != nil {
return nil, err
}

@ -11,6 +11,16 @@ import (
lokiflag "github.com/grafana/loki/pkg/util/flagext"
)
// NOTE the helm chart for promtail and fluent-bit also have defaults for these values, please update to match if you make changes here.
const (
BatchWait = 1 * time.Second
BatchSize int = 1024 * 1024
MinBackoff = 500 * time.Millisecond
MaxBackoff = 5 * time.Minute
MaxRetries int = 10
Timeout = 10 * time.Second
)
// Config describes configuration for a HTTP pusher client.
type Config struct {
URL flagext.URLValue
@ -33,13 +43,13 @@ type Config struct {
// prefix. If prefix is a non-empty string, prefix should end with a period.
func (c *Config) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) {
f.Var(&c.URL, prefix+"client.url", "URL of log server")
f.DurationVar(&c.BatchWait, prefix+"client.batch-wait", 1*time.Second, "Maximum wait period before sending batch.")
f.IntVar(&c.BatchSize, prefix+"client.batch-size-bytes", 1024*1024, "Maximum batch size to accrue before sending. ")
f.DurationVar(&c.BatchWait, prefix+"client.batch-wait", BatchWait, "Maximum wait period before sending batch.")
f.IntVar(&c.BatchSize, prefix+"client.batch-size-bytes", BatchSize, "Maximum batch size to accrue before sending. ")
// Default backoff schedule: 0.5s, 1s, 2s, 4s, 8s, 16s, 32s, 64s, 128s, 256s(4.267m) For a total time of 511.5s(8.5m) before logs are lost
f.IntVar(&c.BackoffConfig.MaxRetries, prefix+"client.max-retries", 10, "Maximum number of retires when sending batches.")
f.DurationVar(&c.BackoffConfig.MinBackoff, prefix+"client.min-backoff", 500*time.Millisecond, "Initial backoff time between retries.")
f.DurationVar(&c.BackoffConfig.MaxBackoff, prefix+"client.max-backoff", 5*time.Minute, "Maximum backoff time between retries.")
f.DurationVar(&c.Timeout, prefix+"client.timeout", 10*time.Second, "Maximum time to wait for server to respond to a request")
f.IntVar(&c.BackoffConfig.MaxRetries, prefix+"client.max-retries", MaxRetries, "Maximum number of retires when sending batches.")
f.DurationVar(&c.BackoffConfig.MinBackoff, prefix+"client.min-backoff", MinBackoff, "Initial backoff time between retries.")
f.DurationVar(&c.BackoffConfig.MaxBackoff, prefix+"client.max-backoff", MaxBackoff, "Maximum backoff time between retries.")
f.DurationVar(&c.Timeout, prefix+"client.timeout", Timeout, "Maximum time to wait for server to respond to a request")
f.Var(&c.ExternalLabels, prefix+"client.external-labels", "list of external labels to add to each log (e.g: --client.external-labels=lb1=v1,lb2=v2)")
f.StringVar(&c.TenantID, prefix+"client.tenant-id", "", "Tenant ID to use when pushing logs to Loki.")
@ -61,13 +71,13 @@ func (c *Config) UnmarshalYAML(unmarshal func(interface{}) error) error {
// force sane defaults.
cfg = raw{
BackoffConfig: util.BackoffConfig{
MaxBackoff: 5 * time.Minute,
MaxRetries: 10,
MinBackoff: 500 * time.Millisecond,
MaxBackoff: MaxBackoff,
MaxRetries: MaxRetries,
MinBackoff: MinBackoff,
},
BatchSize: 100 * 1024,
BatchWait: 1 * time.Second,
Timeout: 10 * time.Second,
BatchSize: BatchSize,
BatchWait: BatchWait,
Timeout: Timeout,
}
}

@ -44,13 +44,13 @@ func Test_Config(t *testing.T) {
URL: u,
},
BackoffConfig: util.BackoffConfig{
MaxBackoff: 5 * time.Minute,
MaxRetries: 10,
MinBackoff: 500 * time.Millisecond,
MaxBackoff: MaxBackoff,
MaxRetries: MaxRetries,
MinBackoff: MinBackoff,
},
BatchSize: 100 * 1024,
BatchWait: 1 * time.Second,
Timeout: 10 * time.Second,
BatchSize: BatchSize,
BatchWait: BatchWait,
Timeout: Timeout,
},
},
{

@ -1,6 +1,6 @@
apiVersion: "v1"
name: fluent-bit
version: 0.3.0
version: 0.3.1
appVersion: v1.6.0
kubeVersion: "^1.10.0-0"
description: "Uses fluent-bit Loki go plugin for gathering logs and sending them to Loki"

@ -10,7 +10,7 @@ config:
port: 2020
tenantID: '""'
batchWait: 1
batchSize: 10240
batchSize: 1048576
loglevel: warn
lineFormat: json
k8sLoggingParser: "Off"

@ -1,6 +1,6 @@
apiVersion: "v1"
name: loki-stack
version: 0.41.0
version: 0.41.1
appVersion: v1.6.0
kubeVersion: "^1.10.0-0"
description: "Loki: like Prometheus, but for logs."

@ -1,6 +1,6 @@
apiVersion: "v1"
name: promtail
version: 0.25.0
version: 0.25.1
appVersion: v1.6.0
kubeVersion: "^1.10.0-0"
description: "Responsible for gathering logs and sending them to Loki"

@ -159,18 +159,18 @@ config:
# Maximum wait period before sending batch
batchwait: 1s
# Maximum batch size to accrue before sending, unit is byte
batchsize: 102400
batchsize: 1048576
# Maximum time to wait for server to respond to a request
timeout: 10s
backoff_config:
# Initial backoff time between retries
min_period: 100ms
min_period: 500ms
# Maximum backoff time between retries
max_period: 5s
max_period: 5m
# Maximum number of retries when sending batches, 0 means infinite retries
max_retries: 20
max_retries: 10
# The labels to add to any time series or alerts when communicating with loki
external_labels: {}

Loading…
Cancel
Save