|
|
|
@ -76,20 +76,25 @@ type Head struct { |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
type headMetrics struct { |
|
|
|
|
activeAppenders prometheus.Gauge |
|
|
|
|
series prometheus.Gauge |
|
|
|
|
seriesCreated prometheus.Counter |
|
|
|
|
seriesRemoved prometheus.Counter |
|
|
|
|
seriesNotFound prometheus.Counter |
|
|
|
|
chunks prometheus.Gauge |
|
|
|
|
chunksCreated prometheus.Counter |
|
|
|
|
chunksRemoved prometheus.Counter |
|
|
|
|
gcDuration prometheus.Summary |
|
|
|
|
minTime prometheus.GaugeFunc |
|
|
|
|
maxTime prometheus.GaugeFunc |
|
|
|
|
samplesAppended prometheus.Counter |
|
|
|
|
walTruncateDuration prometheus.Summary |
|
|
|
|
checkpointDeleteFail prometheus.Counter |
|
|
|
|
activeAppenders prometheus.Gauge |
|
|
|
|
series prometheus.Gauge |
|
|
|
|
seriesCreated prometheus.Counter |
|
|
|
|
seriesRemoved prometheus.Counter |
|
|
|
|
seriesNotFound prometheus.Counter |
|
|
|
|
chunks prometheus.Gauge |
|
|
|
|
chunksCreated prometheus.Counter |
|
|
|
|
chunksRemoved prometheus.Counter |
|
|
|
|
gcDuration prometheus.Summary |
|
|
|
|
minTime prometheus.GaugeFunc |
|
|
|
|
maxTime prometheus.GaugeFunc |
|
|
|
|
samplesAppended prometheus.Counter |
|
|
|
|
walTruncateDuration prometheus.Summary |
|
|
|
|
headTruncateFail prometheus.Counter |
|
|
|
|
headTruncateTotal prometheus.Counter |
|
|
|
|
checkpointDeleteFail prometheus.Counter |
|
|
|
|
checkpointDeleteTotal prometheus.Counter |
|
|
|
|
checkpointCreationFail prometheus.Counter |
|
|
|
|
checkpointCreationTotal prometheus.Counter |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
func newHeadMetrics(h *Head, r prometheus.Registerer) *headMetrics { |
|
|
|
@ -151,9 +156,29 @@ func newHeadMetrics(h *Head, r prometheus.Registerer) *headMetrics { |
|
|
|
|
Name: "prometheus_tsdb_head_samples_appended_total", |
|
|
|
|
Help: "Total number of appended samples.", |
|
|
|
|
}) |
|
|
|
|
m.headTruncateFail = prometheus.NewCounter(prometheus.CounterOpts{ |
|
|
|
|
Name: "prometheus_tsdb_head_truncations_failed_total", |
|
|
|
|
Help: "Total number of head truncations that failed.", |
|
|
|
|
}) |
|
|
|
|
m.headTruncateTotal = prometheus.NewCounter(prometheus.CounterOpts{ |
|
|
|
|
Name: "prometheus_tsdb_head_truncations_total", |
|
|
|
|
Help: "Total number of head truncations attempted.", |
|
|
|
|
}) |
|
|
|
|
m.checkpointDeleteFail = prometheus.NewCounter(prometheus.CounterOpts{ |
|
|
|
|
Name: "prometheus_tsdb_checkpoint_delete_fail", |
|
|
|
|
Help: "Number of times deletion of old checkpoint failed.", |
|
|
|
|
Name: "prometheus_tsdb_checkpoint_deletions_failed_total", |
|
|
|
|
Help: "Total number of checkpoint deletions that failed.", |
|
|
|
|
}) |
|
|
|
|
m.checkpointDeleteTotal = prometheus.NewCounter(prometheus.CounterOpts{ |
|
|
|
|
Name: "prometheus_tsdb_checkpoint_deletions_total", |
|
|
|
|
Help: "Total number of checkpoint deletions attempted.", |
|
|
|
|
}) |
|
|
|
|
m.checkpointCreationFail = prometheus.NewCounter(prometheus.CounterOpts{ |
|
|
|
|
Name: "prometheus_tsdb_checkpoint_creations_failed_total", |
|
|
|
|
Help: "Total number of checkpoint creations that failed.", |
|
|
|
|
}) |
|
|
|
|
m.checkpointCreationTotal = prometheus.NewCounter(prometheus.CounterOpts{ |
|
|
|
|
Name: "prometheus_tsdb_checkpoint_creations_total", |
|
|
|
|
Help: "Total number of checkpoint creations attempted.", |
|
|
|
|
}) |
|
|
|
|
|
|
|
|
|
if r != nil { |
|
|
|
@ -171,7 +196,12 @@ func newHeadMetrics(h *Head, r prometheus.Registerer) *headMetrics { |
|
|
|
|
m.gcDuration, |
|
|
|
|
m.walTruncateDuration, |
|
|
|
|
m.samplesAppended, |
|
|
|
|
m.headTruncateFail, |
|
|
|
|
m.headTruncateTotal, |
|
|
|
|
m.checkpointDeleteFail, |
|
|
|
|
m.checkpointDeleteTotal, |
|
|
|
|
m.checkpointCreationFail, |
|
|
|
|
m.checkpointCreationTotal, |
|
|
|
|
) |
|
|
|
|
} |
|
|
|
|
return m |
|
|
|
@ -427,7 +457,12 @@ func (h *Head) Init() error { |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Truncate removes old data before mint from the head.
|
|
|
|
|
func (h *Head) Truncate(mint int64) error { |
|
|
|
|
func (h *Head) Truncate(mint int64) (err error) { |
|
|
|
|
defer func() { |
|
|
|
|
if err != nil { |
|
|
|
|
h.metrics.headTruncateFail.Inc() |
|
|
|
|
} |
|
|
|
|
}() |
|
|
|
|
initialize := h.MinTime() == math.MaxInt64 |
|
|
|
|
|
|
|
|
|
if h.MinTime() >= mint && !initialize { |
|
|
|
@ -446,6 +481,7 @@ func (h *Head) Truncate(mint int64) error { |
|
|
|
|
return nil |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
h.metrics.headTruncateTotal.Inc() |
|
|
|
|
start := time.Now() |
|
|
|
|
|
|
|
|
|
h.gc() |
|
|
|
@ -475,9 +511,25 @@ func (h *Head) Truncate(mint int64) error { |
|
|
|
|
keep := func(id uint64) bool { |
|
|
|
|
return h.series.getByID(id) != nil |
|
|
|
|
} |
|
|
|
|
if _, err = Checkpoint(h.logger, h.wal, m, n, keep, mint, h.metrics.checkpointDeleteFail); err != nil { |
|
|
|
|
h.metrics.checkpointCreationTotal.Inc() |
|
|
|
|
if _, err = Checkpoint(h.wal, m, n, keep, mint); err != nil { |
|
|
|
|
h.metrics.checkpointCreationFail.Inc() |
|
|
|
|
return errors.Wrap(err, "create checkpoint") |
|
|
|
|
} |
|
|
|
|
if err := h.wal.Truncate(n + 1); err != nil { |
|
|
|
|
// If truncating fails, we'll just try again at the next checkpoint.
|
|
|
|
|
// Leftover segments will just be ignored in the future if there's a checkpoint
|
|
|
|
|
// that supersedes them.
|
|
|
|
|
level.Error(h.logger).Log("msg", "truncating segments failed", "err", err) |
|
|
|
|
} |
|
|
|
|
h.metrics.checkpointDeleteTotal.Inc() |
|
|
|
|
if err := DeleteCheckpoints(h.wal.Dir(), n); err != nil { |
|
|
|
|
// Leftover old checkpoints do not cause problems down the line beyond
|
|
|
|
|
// occupying disk space.
|
|
|
|
|
// They will just be ignored since a higher checkpoint exists.
|
|
|
|
|
level.Error(h.logger).Log("msg", "delete old checkpoints", "err", err) |
|
|
|
|
h.metrics.checkpointDeleteFail.Inc() |
|
|
|
|
} |
|
|
|
|
h.metrics.walTruncateDuration.Observe(time.Since(start).Seconds()) |
|
|
|
|
|
|
|
|
|
level.Info(h.logger).Log("msg", "WAL checkpoint complete", |
|
|
|
|