|
|
|
|
@ -27,7 +27,6 @@ import ( |
|
|
|
|
"github.com/golang/snappy" |
|
|
|
|
|
|
|
|
|
"github.com/prometheus/client_golang/prometheus" |
|
|
|
|
"github.com/prometheus/client_golang/prometheus/promauto" |
|
|
|
|
"github.com/prometheus/prometheus/config" |
|
|
|
|
"github.com/prometheus/prometheus/pkg/labels" |
|
|
|
|
"github.com/prometheus/prometheus/pkg/relabel" |
|
|
|
|
@ -46,8 +45,27 @@ const ( |
|
|
|
|
shardToleranceFraction = 0.3 |
|
|
|
|
) |
|
|
|
|
|
|
|
|
|
var ( |
|
|
|
|
succeededSamplesTotal = promauto.NewCounterVec( |
|
|
|
|
type queueManagerMetrics struct { |
|
|
|
|
succeededSamplesTotal *prometheus.CounterVec |
|
|
|
|
failedSamplesTotal *prometheus.CounterVec |
|
|
|
|
retriedSamplesTotal *prometheus.CounterVec |
|
|
|
|
droppedSamplesTotal *prometheus.CounterVec |
|
|
|
|
enqueueRetriesTotal *prometheus.CounterVec |
|
|
|
|
sentBatchDuration *prometheus.HistogramVec |
|
|
|
|
queueHighestSentTimestamp *prometheus.GaugeVec |
|
|
|
|
queuePendingSamples *prometheus.GaugeVec |
|
|
|
|
shardCapacity *prometheus.GaugeVec |
|
|
|
|
numShards *prometheus.GaugeVec |
|
|
|
|
maxNumShards *prometheus.GaugeVec |
|
|
|
|
minNumShards *prometheus.GaugeVec |
|
|
|
|
desiredNumShards *prometheus.GaugeVec |
|
|
|
|
bytesSent *prometheus.CounterVec |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
func newQueueManagerMetrics(r prometheus.Registerer) *queueManagerMetrics { |
|
|
|
|
m := &queueManagerMetrics{} |
|
|
|
|
|
|
|
|
|
m.succeededSamplesTotal = prometheus.NewCounterVec( |
|
|
|
|
prometheus.CounterOpts{ |
|
|
|
|
Namespace: namespace, |
|
|
|
|
Subsystem: subsystem, |
|
|
|
|
@ -56,7 +74,7 @@ var ( |
|
|
|
|
}, |
|
|
|
|
[]string{remoteName, endpoint}, |
|
|
|
|
) |
|
|
|
|
failedSamplesTotal = promauto.NewCounterVec( |
|
|
|
|
m.failedSamplesTotal = prometheus.NewCounterVec( |
|
|
|
|
prometheus.CounterOpts{ |
|
|
|
|
Namespace: namespace, |
|
|
|
|
Subsystem: subsystem, |
|
|
|
|
@ -65,7 +83,7 @@ var ( |
|
|
|
|
}, |
|
|
|
|
[]string{remoteName, endpoint}, |
|
|
|
|
) |
|
|
|
|
retriedSamplesTotal = promauto.NewCounterVec( |
|
|
|
|
m.retriedSamplesTotal = prometheus.NewCounterVec( |
|
|
|
|
prometheus.CounterOpts{ |
|
|
|
|
Namespace: namespace, |
|
|
|
|
Subsystem: subsystem, |
|
|
|
|
@ -74,7 +92,7 @@ var ( |
|
|
|
|
}, |
|
|
|
|
[]string{remoteName, endpoint}, |
|
|
|
|
) |
|
|
|
|
droppedSamplesTotal = promauto.NewCounterVec( |
|
|
|
|
m.droppedSamplesTotal = prometheus.NewCounterVec( |
|
|
|
|
prometheus.CounterOpts{ |
|
|
|
|
Namespace: namespace, |
|
|
|
|
Subsystem: subsystem, |
|
|
|
|
@ -83,7 +101,7 @@ var ( |
|
|
|
|
}, |
|
|
|
|
[]string{remoteName, endpoint}, |
|
|
|
|
) |
|
|
|
|
enqueueRetriesTotal = promauto.NewCounterVec( |
|
|
|
|
m.enqueueRetriesTotal = prometheus.NewCounterVec( |
|
|
|
|
prometheus.CounterOpts{ |
|
|
|
|
Namespace: namespace, |
|
|
|
|
Subsystem: subsystem, |
|
|
|
|
@ -92,7 +110,7 @@ var ( |
|
|
|
|
}, |
|
|
|
|
[]string{remoteName, endpoint}, |
|
|
|
|
) |
|
|
|
|
sentBatchDuration = promauto.NewHistogramVec( |
|
|
|
|
m.sentBatchDuration = prometheus.NewHistogramVec( |
|
|
|
|
prometheus.HistogramOpts{ |
|
|
|
|
Namespace: namespace, |
|
|
|
|
Subsystem: subsystem, |
|
|
|
|
@ -102,7 +120,7 @@ var ( |
|
|
|
|
}, |
|
|
|
|
[]string{remoteName, endpoint}, |
|
|
|
|
) |
|
|
|
|
queueHighestSentTimestamp = promauto.NewGaugeVec( |
|
|
|
|
m.queueHighestSentTimestamp = prometheus.NewGaugeVec( |
|
|
|
|
prometheus.GaugeOpts{ |
|
|
|
|
Namespace: namespace, |
|
|
|
|
Subsystem: subsystem, |
|
|
|
|
@ -111,7 +129,7 @@ var ( |
|
|
|
|
}, |
|
|
|
|
[]string{remoteName, endpoint}, |
|
|
|
|
) |
|
|
|
|
queuePendingSamples = promauto.NewGaugeVec( |
|
|
|
|
m.queuePendingSamples = prometheus.NewGaugeVec( |
|
|
|
|
prometheus.GaugeOpts{ |
|
|
|
|
Namespace: namespace, |
|
|
|
|
Subsystem: subsystem, |
|
|
|
|
@ -120,7 +138,7 @@ var ( |
|
|
|
|
}, |
|
|
|
|
[]string{remoteName, endpoint}, |
|
|
|
|
) |
|
|
|
|
shardCapacity = promauto.NewGaugeVec( |
|
|
|
|
m.shardCapacity = prometheus.NewGaugeVec( |
|
|
|
|
prometheus.GaugeOpts{ |
|
|
|
|
Namespace: namespace, |
|
|
|
|
Subsystem: subsystem, |
|
|
|
|
@ -129,7 +147,7 @@ var ( |
|
|
|
|
}, |
|
|
|
|
[]string{remoteName, endpoint}, |
|
|
|
|
) |
|
|
|
|
numShards = promauto.NewGaugeVec( |
|
|
|
|
m.numShards = prometheus.NewGaugeVec( |
|
|
|
|
prometheus.GaugeOpts{ |
|
|
|
|
Namespace: namespace, |
|
|
|
|
Subsystem: subsystem, |
|
|
|
|
@ -138,7 +156,7 @@ var ( |
|
|
|
|
}, |
|
|
|
|
[]string{remoteName, endpoint}, |
|
|
|
|
) |
|
|
|
|
maxNumShards = promauto.NewGaugeVec( |
|
|
|
|
m.maxNumShards = prometheus.NewGaugeVec( |
|
|
|
|
prometheus.GaugeOpts{ |
|
|
|
|
Namespace: namespace, |
|
|
|
|
Subsystem: subsystem, |
|
|
|
|
@ -147,7 +165,7 @@ var ( |
|
|
|
|
}, |
|
|
|
|
[]string{remoteName, endpoint}, |
|
|
|
|
) |
|
|
|
|
minNumShards = promauto.NewGaugeVec( |
|
|
|
|
m.minNumShards = prometheus.NewGaugeVec( |
|
|
|
|
prometheus.GaugeOpts{ |
|
|
|
|
Namespace: namespace, |
|
|
|
|
Subsystem: subsystem, |
|
|
|
|
@ -156,7 +174,7 @@ var ( |
|
|
|
|
}, |
|
|
|
|
[]string{remoteName, endpoint}, |
|
|
|
|
) |
|
|
|
|
desiredNumShards = promauto.NewGaugeVec( |
|
|
|
|
m.desiredNumShards = prometheus.NewGaugeVec( |
|
|
|
|
prometheus.GaugeOpts{ |
|
|
|
|
Namespace: namespace, |
|
|
|
|
Subsystem: subsystem, |
|
|
|
|
@ -165,7 +183,7 @@ var ( |
|
|
|
|
}, |
|
|
|
|
[]string{remoteName, endpoint}, |
|
|
|
|
) |
|
|
|
|
bytesSent = promauto.NewCounterVec( |
|
|
|
|
m.bytesSent = prometheus.NewCounterVec( |
|
|
|
|
prometheus.CounterOpts{ |
|
|
|
|
Namespace: namespace, |
|
|
|
|
Subsystem: subsystem, |
|
|
|
|
@ -174,7 +192,27 @@ var ( |
|
|
|
|
}, |
|
|
|
|
[]string{remoteName, endpoint}, |
|
|
|
|
) |
|
|
|
|
) |
|
|
|
|
|
|
|
|
|
if r != nil { |
|
|
|
|
r.MustRegister( |
|
|
|
|
m.succeededSamplesTotal, |
|
|
|
|
m.failedSamplesTotal, |
|
|
|
|
m.retriedSamplesTotal, |
|
|
|
|
m.droppedSamplesTotal, |
|
|
|
|
m.enqueueRetriesTotal, |
|
|
|
|
m.sentBatchDuration, |
|
|
|
|
m.queueHighestSentTimestamp, |
|
|
|
|
m.queuePendingSamples, |
|
|
|
|
m.shardCapacity, |
|
|
|
|
m.numShards, |
|
|
|
|
m.maxNumShards, |
|
|
|
|
m.minNumShards, |
|
|
|
|
m.desiredNumShards, |
|
|
|
|
m.bytesSent, |
|
|
|
|
) |
|
|
|
|
} |
|
|
|
|
return m |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// StorageClient defines an interface for sending a batch of samples to an
|
|
|
|
|
// external timeseries database.
|
|
|
|
|
@ -215,6 +253,7 @@ type QueueManager struct { |
|
|
|
|
|
|
|
|
|
samplesIn, samplesDropped, samplesOut, samplesOutDuration *ewmaRate |
|
|
|
|
|
|
|
|
|
metrics *queueManagerMetrics |
|
|
|
|
highestSentTimestampMetric *maxGauge |
|
|
|
|
pendingSamplesMetric prometheus.Gauge |
|
|
|
|
enqueueRetriesMetric prometheus.Counter |
|
|
|
|
@ -232,7 +271,7 @@ type QueueManager struct { |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// NewQueueManager builds a new QueueManager.
|
|
|
|
|
func NewQueueManager(reg prometheus.Registerer, logger log.Logger, walDir string, samplesIn *ewmaRate, cfg config.QueueConfig, externalLabels labels.Labels, relabelConfigs []*relabel.Config, client StorageClient, flushDeadline time.Duration) *QueueManager { |
|
|
|
|
func NewQueueManager(reg prometheus.Registerer, metrics *queueManagerMetrics, logger log.Logger, walDir string, samplesIn *ewmaRate, cfg config.QueueConfig, externalLabels labels.Labels, relabelConfigs []*relabel.Config, client StorageClient, flushDeadline time.Duration) *QueueManager { |
|
|
|
|
if logger == nil { |
|
|
|
|
logger = log.NewNopLogger() |
|
|
|
|
} |
|
|
|
|
@ -258,6 +297,8 @@ func NewQueueManager(reg prometheus.Registerer, logger log.Logger, walDir string |
|
|
|
|
samplesDropped: newEWMARate(ewmaWeight, shardUpdateDuration), |
|
|
|
|
samplesOut: newEWMARate(ewmaWeight, shardUpdateDuration), |
|
|
|
|
samplesOutDuration: newEWMARate(ewmaWeight, shardUpdateDuration), |
|
|
|
|
|
|
|
|
|
metrics: metrics, |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
t.watcher = wal.NewWatcher(reg, wal.NewWatcherMetrics(reg), logger, client.Name(), t, walDir) |
|
|
|
|
@ -320,21 +361,21 @@ func (t *QueueManager) Start() { |
|
|
|
|
name := t.client.Name() |
|
|
|
|
ep := t.client.Endpoint() |
|
|
|
|
t.highestSentTimestampMetric = &maxGauge{ |
|
|
|
|
Gauge: queueHighestSentTimestamp.WithLabelValues(name, ep), |
|
|
|
|
} |
|
|
|
|
t.pendingSamplesMetric = queuePendingSamples.WithLabelValues(name, ep) |
|
|
|
|
t.enqueueRetriesMetric = enqueueRetriesTotal.WithLabelValues(name, ep) |
|
|
|
|
t.droppedSamplesTotal = droppedSamplesTotal.WithLabelValues(name, ep) |
|
|
|
|
t.numShardsMetric = numShards.WithLabelValues(name, ep) |
|
|
|
|
t.failedSamplesTotal = failedSamplesTotal.WithLabelValues(name, ep) |
|
|
|
|
t.sentBatchDuration = sentBatchDuration.WithLabelValues(name, ep) |
|
|
|
|
t.succeededSamplesTotal = succeededSamplesTotal.WithLabelValues(name, ep) |
|
|
|
|
t.retriedSamplesTotal = retriedSamplesTotal.WithLabelValues(name, ep) |
|
|
|
|
t.shardCapacity = shardCapacity.WithLabelValues(name, ep) |
|
|
|
|
t.maxNumShards = maxNumShards.WithLabelValues(name, ep) |
|
|
|
|
t.minNumShards = minNumShards.WithLabelValues(name, ep) |
|
|
|
|
t.desiredNumShards = desiredNumShards.WithLabelValues(name, ep) |
|
|
|
|
t.bytesSent = bytesSent.WithLabelValues(name, ep) |
|
|
|
|
Gauge: t.metrics.queueHighestSentTimestamp.WithLabelValues(name, ep), |
|
|
|
|
} |
|
|
|
|
t.pendingSamplesMetric = t.metrics.queuePendingSamples.WithLabelValues(name, ep) |
|
|
|
|
t.enqueueRetriesMetric = t.metrics.enqueueRetriesTotal.WithLabelValues(name, ep) |
|
|
|
|
t.droppedSamplesTotal = t.metrics.droppedSamplesTotal.WithLabelValues(name, ep) |
|
|
|
|
t.numShardsMetric = t.metrics.numShards.WithLabelValues(name, ep) |
|
|
|
|
t.failedSamplesTotal = t.metrics.failedSamplesTotal.WithLabelValues(name, ep) |
|
|
|
|
t.sentBatchDuration = t.metrics.sentBatchDuration.WithLabelValues(name, ep) |
|
|
|
|
t.succeededSamplesTotal = t.metrics.succeededSamplesTotal.WithLabelValues(name, ep) |
|
|
|
|
t.retriedSamplesTotal = t.metrics.retriedSamplesTotal.WithLabelValues(name, ep) |
|
|
|
|
t.shardCapacity = t.metrics.shardCapacity.WithLabelValues(name, ep) |
|
|
|
|
t.maxNumShards = t.metrics.maxNumShards.WithLabelValues(name, ep) |
|
|
|
|
t.minNumShards = t.metrics.minNumShards.WithLabelValues(name, ep) |
|
|
|
|
t.desiredNumShards = t.metrics.desiredNumShards.WithLabelValues(name, ep) |
|
|
|
|
t.bytesSent = t.metrics.bytesSent.WithLabelValues(name, ep) |
|
|
|
|
|
|
|
|
|
// Initialise some metrics.
|
|
|
|
|
t.shardCapacity.Set(float64(t.cfg.Capacity)) |
|
|
|
|
@ -374,19 +415,19 @@ func (t *QueueManager) Stop() { |
|
|
|
|
// Delete metrics so we don't have alerts for queues that are gone.
|
|
|
|
|
name := t.client.Name() |
|
|
|
|
ep := t.client.Endpoint() |
|
|
|
|
queueHighestSentTimestamp.DeleteLabelValues(name, ep) |
|
|
|
|
queuePendingSamples.DeleteLabelValues(name, ep) |
|
|
|
|
enqueueRetriesTotal.DeleteLabelValues(name, ep) |
|
|
|
|
droppedSamplesTotal.DeleteLabelValues(name, ep) |
|
|
|
|
numShards.DeleteLabelValues(name, ep) |
|
|
|
|
failedSamplesTotal.DeleteLabelValues(name, ep) |
|
|
|
|
sentBatchDuration.DeleteLabelValues(name, ep) |
|
|
|
|
succeededSamplesTotal.DeleteLabelValues(name, ep) |
|
|
|
|
retriedSamplesTotal.DeleteLabelValues(name, ep) |
|
|
|
|
shardCapacity.DeleteLabelValues(name, ep) |
|
|
|
|
maxNumShards.DeleteLabelValues(name, ep) |
|
|
|
|
minNumShards.DeleteLabelValues(name, ep) |
|
|
|
|
desiredNumShards.DeleteLabelValues(name, ep) |
|
|
|
|
t.metrics.queueHighestSentTimestamp.DeleteLabelValues(name, ep) |
|
|
|
|
t.metrics.queuePendingSamples.DeleteLabelValues(name, ep) |
|
|
|
|
t.metrics.enqueueRetriesTotal.DeleteLabelValues(name, ep) |
|
|
|
|
t.metrics.droppedSamplesTotal.DeleteLabelValues(name, ep) |
|
|
|
|
t.metrics.numShards.DeleteLabelValues(name, ep) |
|
|
|
|
t.metrics.failedSamplesTotal.DeleteLabelValues(name, ep) |
|
|
|
|
t.metrics.sentBatchDuration.DeleteLabelValues(name, ep) |
|
|
|
|
t.metrics.succeededSamplesTotal.DeleteLabelValues(name, ep) |
|
|
|
|
t.metrics.retriedSamplesTotal.DeleteLabelValues(name, ep) |
|
|
|
|
t.metrics.shardCapacity.DeleteLabelValues(name, ep) |
|
|
|
|
t.metrics.maxNumShards.DeleteLabelValues(name, ep) |
|
|
|
|
t.metrics.minNumShards.DeleteLabelValues(name, ep) |
|
|
|
|
t.metrics.desiredNumShards.DeleteLabelValues(name, ep) |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// StoreSeries keeps track of which series we know about for lookups when sending samples to remote.
|
|
|
|
|
|