From 1765d7fe4b294b4cadc2fdc36ea83d9002f1a111 Mon Sep 17 00:00:00 2001 From: Sandeep Sukhani Date: Thu, 17 Sep 2020 16:50:06 +0530 Subject: [PATCH] jsonnet for running loki using boltdb-shipper (#2547) --- .../ksonnet/loki/boltdb_shipper.libsonnet | 84 +++++++++++++++++++ production/ksonnet/loki/config.libsonnet | 9 ++ production/ksonnet/loki/images.libsonnet | 1 + production/ksonnet/loki/ingester.libsonnet | 45 ++++++++-- production/ksonnet/loki/loki.libsonnet | 3 + production/ksonnet/loki/querier.libsonnet | 38 ++++++++- 6 files changed, 171 insertions(+), 9 deletions(-) create mode 100644 production/ksonnet/loki/boltdb_shipper.libsonnet diff --git a/production/ksonnet/loki/boltdb_shipper.libsonnet b/production/ksonnet/loki/boltdb_shipper.libsonnet new file mode 100644 index 0000000000..3fb21c3a98 --- /dev/null +++ b/production/ksonnet/loki/boltdb_shipper.libsonnet @@ -0,0 +1,84 @@ +{ + local pvc = $.core.v1.persistentVolumeClaim, + local volumeMount = $.core.v1.volumeMount, + local container = $.core.v1.container, + local statefulSet = $.apps.v1.statefulSet, + local service = $.core.v1.service, + local containerPort = $.core.v1.containerPort, + + _config+:: { + // run ingesters and queriers as statefulsets when using boltdb-shipper to avoid using node disk for storing the index. + stateful_ingesters: if self.using_boltdb_shipper then true else super.stateful_ingesters, + stateful_queriers: if self.using_boltdb_shipper then true else super.stateful_queriers, + + boltdb_shipper_shared_store: error 'must define boltdb_shipper_shared_store', + compactor_pvc_size: '10Gi', + index_period_hours: if self.using_boltdb_shipper then 24 else super.index_period_hours, + loki+: if self.using_boltdb_shipper then { + chunk_store_config+: { + write_dedupe_cache_config:: {}, + }, + storage_config+: { + boltdb_shipper: { + shared_store: $._config.boltdb_shipper_shared_store, + }, + }, + } else {}, + }, + + ingester_args+:: if $._config.using_boltdb_shipper then { + // Persist index in pvc + 'boltdb.shipper.active-index-directory': '/data/index', + + // Use PVC for caching + 'boltdb.shipper.cache-location': '/data/boltdb-cache', + } else {}, + + querier_args+:: if $._config.using_boltdb_shipper then { + // Use PVC for caching + 'boltdb.shipper.cache-location': '/data/boltdb-cache', + } else {}, + + // we don't dedupe index writes when using boltdb-shipper so don't deploy a cache for it. + memcached_index_writes:: if $._config.using_boltdb_shipper then {} else self.memcached_index_writes, + + // Use PVC for compactor instead of node disk. + compactor_data_pvc:: if $._config.using_boltdb_shipper then + pvc.new('compactor-data') + + pvc.mixin.spec.resources.withRequests({ storage: $._config.compactor_pvc_size }) + + pvc.mixin.spec.withAccessModes(['ReadWriteOnce']) + + pvc.mixin.spec.withStorageClassName('fast') + else {}, + + compactor_args:: if $._config.using_boltdb_shipper then { + 'config.file': '/etc/loki/config/config.yaml', + 'boltdb.shipper.compactor.working-directory': '/data/compactor', + 'boltdb.shipper.compactor.shared-store': $._config.boltdb_shipper_shared_store, + target: 'compactor', + } else {}, + + local compactor_ports = + [ + containerPort.new(name='http-metrics', port=$._config.http_listen_port), + ], + + compactor_container:: if $._config.using_boltdb_shipper then + container.new('compactor', $._images.compactor) + + container.withPorts(compactor_ports) + + container.withArgsMixin($.util.mapToFlags($.compactor_args)) + + container.withVolumeMountsMixin([volumeMount.new('compactor-data', '/data')]) + + container.mixin.readinessProbe.httpGet.withPath('/ready') + + container.mixin.readinessProbe.httpGet.withPort($._config.http_listen_port) + + container.mixin.readinessProbe.withTimeoutSeconds(1) + + $.util.resourcesRequests('4', '2Gi') + else {}, + + compactor_statefulset: if $._config.using_boltdb_shipper then + statefulSet.new('compactor', 1, [$.compactor_container], $.compactor_data_pvc) + + statefulSet.mixin.spec.withServiceName('compactor') + + $.config_hash_mixin + + $.util.configVolumeMount('loki', '/etc/loki/config') + + statefulSet.mixin.spec.updateStrategy.withType('RollingUpdate') + + statefulSet.mixin.spec.template.spec.securityContext.withFsGroup(10001) // 10001 is the group ID assigned to Loki in the Dockerfile + else {} +} diff --git a/production/ksonnet/loki/config.libsonnet b/production/ksonnet/loki/config.libsonnet index b7b2a03326..ee6efafa66 100644 --- a/production/ksonnet/loki/config.libsonnet +++ b/production/ksonnet/loki/config.libsonnet @@ -9,6 +9,15 @@ grpc_server_max_msg_size: 100 << 20, // 100MB + // flag for tuning things when boltdb-shipper is current or upcoming index type. + using_boltdb_shipper: false, + + // flags for running ingesters/queriers as a statefulset instead of deployment type. + stateful_ingesters: false, + ingester_pvc_size: '5Gi', + + stateful_queriers: false, + querier_pvc_size: '10Gi', querier: { // This value should be set equal to (or less than) the CPU cores of the system the querier runs. diff --git a/production/ksonnet/loki/images.libsonnet b/production/ksonnet/loki/images.libsonnet index fa7188c492..26328d4d26 100644 --- a/production/ksonnet/loki/images.libsonnet +++ b/production/ksonnet/loki/images.libsonnet @@ -12,5 +12,6 @@ tableManager: self.loki, query_frontend: self.loki, ruler: self.loki, + compactor: self.loki, }, } diff --git a/production/ksonnet/loki/ingester.libsonnet b/production/ksonnet/loki/ingester.libsonnet index 38fb865058..e02a95a290 100644 --- a/production/ksonnet/loki/ingester.libsonnet +++ b/production/ksonnet/loki/ingester.libsonnet @@ -1,10 +1,18 @@ { local container = $.core.v1.container, + local pvc = $.core.v1.persistentVolumeClaim, + local volumeMount = $.core.v1.volumeMount, + local statefulSet = $.apps.v1.statefulSet, ingester_args:: $._config.commonArgs { target: 'ingester', - }, + } + if $._config.stateful_ingesters then + { + // Disable chunk transfer when using statefulset since ingester which is going down won't find another + // ingester which is joining the ring for transferring chunks. + 'ingester.max-transfer-retries': 0, + } else {}, ingester_container:: container.new('ingester', $._images.ingester) + @@ -15,13 +23,17 @@ container.mixin.readinessProbe.withInitialDelaySeconds(15) + container.mixin.readinessProbe.withTimeoutSeconds(1) + $.util.resourcesRequests('1', '5Gi') + - $.util.resourcesLimits('2', '10Gi'), + $.util.resourcesLimits('2', '10Gi') + + if $._config.stateful_ingesters then + container.withVolumeMountsMixin([ + volumeMount.new('ingester-data', '/data'), + ]) else {}, local deployment = $.apps.v1.deployment, local name = 'ingester', - ingester_deployment: + ingester_deployment: if !$._config.stateful_ingesters then deployment.new(name, 3, [$.ingester_container]) + $.config_hash_mixin + $.util.configVolumeMount('loki', '/etc/loki/config') + @@ -30,10 +42,33 @@ deployment.mixin.spec.withMinReadySeconds(60) + deployment.mixin.spec.strategy.rollingUpdate.withMaxSurge(0) + deployment.mixin.spec.strategy.rollingUpdate.withMaxUnavailable(1) + - deployment.mixin.spec.template.spec.withTerminationGracePeriodSeconds(4800), + deployment.mixin.spec.template.spec.withTerminationGracePeriodSeconds(4800) + else {}, + + ingester_data_pvc:: if $._config.stateful_ingesters then + pvc.new('ingester-data') + + pvc.mixin.spec.resources.withRequests({ storage: '10Gi' }) + + pvc.mixin.spec.withAccessModes(['ReadWriteOnce']) + + pvc.mixin.spec.withStorageClassName('fast') + else {}, + + ingester_statefulset: if $._config.stateful_ingesters then + statefulSet.new('ingester', 3, [$.ingester_container], $.ingester_data_pvc) + + statefulSet.mixin.spec.withServiceName('ingester') + + $.config_hash_mixin + + $.util.configVolumeMount('loki', '/etc/loki/config') + + $.util.configVolumeMount('overrides', '/etc/loki/overrides') + + $.util.antiAffinity + + statefulSet.mixin.spec.updateStrategy.withType('RollingUpdate') + + statefulSet.mixin.spec.template.spec.securityContext.withFsGroup(10001) + // 10001 is the group ID assigned to Loki in the Dockerfile + statefulSet.mixin.spec.template.spec.withTerminationGracePeriodSeconds(4800) + else {}, ingester_service: - $.util.serviceFor($.ingester_deployment), + if !$._config.stateful_ingesters then + $.util.serviceFor($.ingester_deployment) + else + $.util.serviceFor($.ingester_statefulset), local podDisruptionBudget = $.policy.v1beta1.podDisruptionBudget, diff --git a/production/ksonnet/loki/loki.libsonnet b/production/ksonnet/loki/loki.libsonnet index 141a2f8e07..2abd6dc7ce 100644 --- a/production/ksonnet/loki/loki.libsonnet +++ b/production/ksonnet/loki/loki.libsonnet @@ -14,5 +14,8 @@ (import 'query-frontend.libsonnet') + (import 'ruler.libsonnet') + +// BoltDB Shipper support +(import 'boltdb_shipper.libsonnet') + + // Supporting services (import 'memcached.libsonnet') diff --git a/production/ksonnet/loki/querier.libsonnet b/production/ksonnet/loki/querier.libsonnet index 6641945a34..625be9e5d9 100644 --- a/production/ksonnet/loki/querier.libsonnet +++ b/production/ksonnet/loki/querier.libsonnet @@ -1,5 +1,8 @@ { local container = $.core.v1.container, + local pvc = $.core.v1.persistentVolumeClaim, + local volumeMount = $.core.v1.volumeMount, + local statefulSet = $.apps.v1.statefulSet, querier_args:: $._config.commonArgs { @@ -14,17 +17,44 @@ container.mixin.readinessProbe.httpGet.withPort($._config.http_listen_port) + container.mixin.readinessProbe.withInitialDelaySeconds(15) + container.mixin.readinessProbe.withTimeoutSeconds(1) + - $.util.resourcesRequests('4', '2Gi'), + $.util.resourcesRequests('4', '2Gi') + + if $._config.stateful_queriers then + container.withVolumeMountsMixin([ + volumeMount.new('querier-data', '/data'), + ]) else {}, local deployment = $.apps.v1.deployment, - querier_deployment: + querier_deployment: if !$._config.stateful_queriers then deployment.new('querier', 3, [$.querier_container]) + $.config_hash_mixin + $.util.configVolumeMount('loki', '/etc/loki/config') + $.util.configVolumeMount('overrides', '/etc/loki/overrides') + - $.util.antiAffinity, + $.util.antiAffinity + else {}, + + // PVC for queriers when running as statefulsets + querier_data_pvc:: if $._config.stateful_queriers then + pvc.new('querier-data') + + pvc.mixin.spec.resources.withRequests({ storage: $._config.querier_pvc_size }) + + pvc.mixin.spec.withAccessModes(['ReadWriteOnce']) + + pvc.mixin.spec.withStorageClassName('fast') + else {}, + + querier_statefulset: if $._config.stateful_queriers then + statefulSet.new('querier', 3, [$.querier_container], $.querier_data_pvc) + + statefulSet.mixin.spec.withServiceName('querier') + + $.config_hash_mixin + + $.util.configVolumeMount('loki', '/etc/loki/config') + + $.util.configVolumeMount('overrides', '/etc/loki/overrides') + + $.util.antiAffinity + + statefulSet.mixin.spec.updateStrategy.withType('RollingUpdate') + + statefulSet.mixin.spec.template.spec.securityContext.withFsGroup(10001) // 10001 is the group ID assigned to Loki in the Dockerfile + else {}, querier_service: - $.util.serviceFor($.querier_deployment), + if !$._config.stateful_queriers then + $.util.serviceFor($.querier_deployment) + else + $.util.serviceFor($.querier_statefulset), }