|
|
|
|
@ -225,7 +225,7 @@ func NewHead(r prometheus.Registerer, l log.Logger, wal *wal.WAL, chunkRange int |
|
|
|
|
values: map[string]stringset{}, |
|
|
|
|
symbols: map[string]struct{}{}, |
|
|
|
|
postings: index.NewUnorderedMemPostings(), |
|
|
|
|
tombstones: NewMemTombstones(), |
|
|
|
|
tombstones: newMemTombstones(), |
|
|
|
|
} |
|
|
|
|
h.metrics = newHeadMetrics(h, r) |
|
|
|
|
|
|
|
|
|
@ -237,22 +237,28 @@ func NewHead(r prometheus.Registerer, l log.Logger, wal *wal.WAL, chunkRange int |
|
|
|
|
// Samples before the mint timestamp are discarded.
|
|
|
|
|
func (h *Head) processWALSamples( |
|
|
|
|
minValidTime int64, |
|
|
|
|
partition, total uint64, |
|
|
|
|
input <-chan []RefSample, output chan<- []RefSample, |
|
|
|
|
) (unknownRefs uint64) { |
|
|
|
|
defer close(output) |
|
|
|
|
|
|
|
|
|
// Mitigate lock contention in getByID.
|
|
|
|
|
refSeries := map[uint64]*memSeries{} |
|
|
|
|
|
|
|
|
|
mint, maxt := int64(math.MaxInt64), int64(math.MinInt64) |
|
|
|
|
|
|
|
|
|
for samples := range input { |
|
|
|
|
for _, s := range samples { |
|
|
|
|
if s.T < minValidTime || s.Ref%total != partition { |
|
|
|
|
if s.T < minValidTime { |
|
|
|
|
continue |
|
|
|
|
} |
|
|
|
|
ms := h.series.getByID(s.Ref) |
|
|
|
|
ms := refSeries[s.Ref] |
|
|
|
|
if ms == nil { |
|
|
|
|
unknownRefs++ |
|
|
|
|
continue |
|
|
|
|
ms = h.series.getByID(s.Ref) |
|
|
|
|
if ms == nil { |
|
|
|
|
unknownRefs++ |
|
|
|
|
continue |
|
|
|
|
} |
|
|
|
|
refSeries[s.Ref] = ms |
|
|
|
|
} |
|
|
|
|
_, chunkCreated := ms.append(s.T, s.V) |
|
|
|
|
if chunkCreated { |
|
|
|
|
@ -310,25 +316,22 @@ func (h *Head) loadWAL(r *wal.Reader) error { |
|
|
|
|
// They are connected through a ring of channels which ensures that all sample batches
|
|
|
|
|
// read from the WAL are processed in order.
|
|
|
|
|
var ( |
|
|
|
|
wg sync.WaitGroup |
|
|
|
|
n = runtime.GOMAXPROCS(0) |
|
|
|
|
firstInput = make(chan []RefSample, 300) |
|
|
|
|
input = firstInput |
|
|
|
|
wg sync.WaitGroup |
|
|
|
|
n = runtime.GOMAXPROCS(0) |
|
|
|
|
inputs = make([]chan []RefSample, n) |
|
|
|
|
outputs = make([]chan []RefSample, n) |
|
|
|
|
) |
|
|
|
|
wg.Add(n) |
|
|
|
|
|
|
|
|
|
for i := 0; i < n; i++ { |
|
|
|
|
output := make(chan []RefSample, 300) |
|
|
|
|
outputs[i] = make(chan []RefSample, 300) |
|
|
|
|
inputs[i] = make(chan []RefSample, 300) |
|
|
|
|
|
|
|
|
|
go func(i int, input <-chan []RefSample, output chan<- []RefSample) { |
|
|
|
|
unknown := h.processWALSamples(minValidTime, uint64(i), uint64(n), input, output) |
|
|
|
|
go func(input <-chan []RefSample, output chan<- []RefSample) { |
|
|
|
|
unknown := h.processWALSamples(minValidTime, input, output) |
|
|
|
|
atomic.AddUint64(&unknownRefs, unknown) |
|
|
|
|
wg.Done() |
|
|
|
|
}(i, input, output) |
|
|
|
|
|
|
|
|
|
// The output feeds the next worker goroutine. For the last worker,
|
|
|
|
|
// it feeds the initial input again to reuse the RefSample slices.
|
|
|
|
|
input = output |
|
|
|
|
}(inputs[i], outputs[i]) |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
var ( |
|
|
|
|
@ -336,6 +339,7 @@ func (h *Head) loadWAL(r *wal.Reader) error { |
|
|
|
|
series []RefSeries |
|
|
|
|
samples []RefSample |
|
|
|
|
tstones []Stone |
|
|
|
|
err error |
|
|
|
|
) |
|
|
|
|
for r.Next() { |
|
|
|
|
series, samples, tstones = series[:0], samples[:0], tstones[:0] |
|
|
|
|
@ -343,7 +347,7 @@ func (h *Head) loadWAL(r *wal.Reader) error { |
|
|
|
|
|
|
|
|
|
switch dec.Type(rec) { |
|
|
|
|
case RecordSeries: |
|
|
|
|
series, err := dec.Series(rec, series) |
|
|
|
|
series, err = dec.Series(rec, series) |
|
|
|
|
if err != nil { |
|
|
|
|
return errors.Wrap(err, "decode series") |
|
|
|
|
} |
|
|
|
|
@ -355,7 +359,8 @@ func (h *Head) loadWAL(r *wal.Reader) error { |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
case RecordSamples: |
|
|
|
|
samples, err := dec.Samples(rec, samples) |
|
|
|
|
samples, err = dec.Samples(rec, samples) |
|
|
|
|
s := samples |
|
|
|
|
if err != nil { |
|
|
|
|
return errors.Wrap(err, "decode samples") |
|
|
|
|
} |
|
|
|
|
@ -364,20 +369,31 @@ func (h *Head) loadWAL(r *wal.Reader) error { |
|
|
|
|
// cause thousands of very large in flight buffers occupying large amounts
|
|
|
|
|
// of unused memory.
|
|
|
|
|
for len(samples) > 0 { |
|
|
|
|
n := 5000 |
|
|
|
|
if len(samples) < n { |
|
|
|
|
n = len(samples) |
|
|
|
|
m := 5000 |
|
|
|
|
if len(samples) < m { |
|
|
|
|
m = len(samples) |
|
|
|
|
} |
|
|
|
|
shards := make([][]RefSample, n) |
|
|
|
|
for i := 0; i < n; i++ { |
|
|
|
|
var buf []RefSample |
|
|
|
|
select { |
|
|
|
|
case buf = <-outputs[i]: |
|
|
|
|
default: |
|
|
|
|
} |
|
|
|
|
shards[i] = buf[:0] |
|
|
|
|
} |
|
|
|
|
for _, sam := range samples[:m] { |
|
|
|
|
mod := sam.Ref % uint64(n) |
|
|
|
|
shards[mod] = append(shards[mod], sam) |
|
|
|
|
} |
|
|
|
|
var buf []RefSample |
|
|
|
|
select { |
|
|
|
|
case buf = <-input: |
|
|
|
|
default: |
|
|
|
|
for i := 0; i < n; i++ { |
|
|
|
|
inputs[i] <- shards[i] |
|
|
|
|
} |
|
|
|
|
firstInput <- append(buf[:0], samples[:n]...) |
|
|
|
|
samples = samples[n:] |
|
|
|
|
samples = samples[m:] |
|
|
|
|
} |
|
|
|
|
samples = s // Keep whole slice for reuse.
|
|
|
|
|
case RecordTombstones: |
|
|
|
|
tstones, err := dec.Tombstones(rec, tstones) |
|
|
|
|
tstones, err = dec.Tombstones(rec, tstones) |
|
|
|
|
if err != nil { |
|
|
|
|
return errors.Wrap(err, "decode tombstones") |
|
|
|
|
} |
|
|
|
|
@ -397,9 +413,11 @@ func (h *Head) loadWAL(r *wal.Reader) error { |
|
|
|
|
return errors.Wrap(r.Err(), "read records") |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Signal termination to first worker and wait for last one to close its output channel.
|
|
|
|
|
close(firstInput) |
|
|
|
|
for range input { |
|
|
|
|
// Signal termination to each worker and wait for it to close its output channel.
|
|
|
|
|
for i := 0; i < n; i++ { |
|
|
|
|
close(inputs[i]) |
|
|
|
|
for range outputs[i] { |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
wg.Wait() |
|
|
|
|
|
|
|
|
|
@ -418,12 +436,12 @@ func (h *Head) Init() error { |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Backfill the checkpoint first if it exists.
|
|
|
|
|
cp, n, err := LastCheckpoint(h.wal.Dir()) |
|
|
|
|
dir, startFrom, err := LastCheckpoint(h.wal.Dir()) |
|
|
|
|
if err != nil && err != ErrNotFound { |
|
|
|
|
return errors.Wrap(err, "find last checkpoint") |
|
|
|
|
} |
|
|
|
|
if err == nil { |
|
|
|
|
sr, err := wal.NewSegmentsReader(filepath.Join(h.wal.Dir(), cp)) |
|
|
|
|
sr, err := wal.NewSegmentsReader(filepath.Join(h.wal.Dir(), dir)) |
|
|
|
|
if err != nil { |
|
|
|
|
return errors.Wrap(err, "open checkpoint") |
|
|
|
|
} |
|
|
|
|
@ -434,11 +452,11 @@ func (h *Head) Init() error { |
|
|
|
|
if err := h.loadWAL(wal.NewReader(sr)); err != nil { |
|
|
|
|
return errors.Wrap(err, "backfill checkpoint") |
|
|
|
|
} |
|
|
|
|
n++ |
|
|
|
|
startFrom++ |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Backfill segments from the last checkpoint onwards
|
|
|
|
|
sr, err := wal.NewSegmentsRangeReader(h.wal.Dir(), n, -1) |
|
|
|
|
sr, err := wal.NewSegmentsRangeReader(h.wal.Dir(), startFrom, -1) |
|
|
|
|
if err != nil { |
|
|
|
|
return errors.Wrap(err, "open WAL segments") |
|
|
|
|
} |
|
|
|
|
@ -493,18 +511,18 @@ func (h *Head) Truncate(mint int64) (err error) { |
|
|
|
|
} |
|
|
|
|
start = time.Now() |
|
|
|
|
|
|
|
|
|
m, n, err := h.wal.Segments() |
|
|
|
|
first, last, err := h.wal.Segments() |
|
|
|
|
if err != nil { |
|
|
|
|
return errors.Wrap(err, "get segment range") |
|
|
|
|
} |
|
|
|
|
n-- // Never consider last segment for checkpoint.
|
|
|
|
|
if n < 0 { |
|
|
|
|
last-- // Never consider last segment for checkpoint.
|
|
|
|
|
if last < 0 { |
|
|
|
|
return nil // no segments yet.
|
|
|
|
|
} |
|
|
|
|
// The lower third of segments should contain mostly obsolete samples.
|
|
|
|
|
// If we have less than three segments, it's not worth checkpointing yet.
|
|
|
|
|
n = m + (n-m)/3 |
|
|
|
|
if n <= m { |
|
|
|
|
last = first + (last-first)/3 |
|
|
|
|
if last <= first { |
|
|
|
|
return nil |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
@ -512,18 +530,18 @@ func (h *Head) Truncate(mint int64) (err error) { |
|
|
|
|
return h.series.getByID(id) != nil |
|
|
|
|
} |
|
|
|
|
h.metrics.checkpointCreationTotal.Inc() |
|
|
|
|
if _, err = Checkpoint(h.wal, m, n, keep, mint); err != nil { |
|
|
|
|
if _, err = Checkpoint(h.wal, first, last, keep, mint); err != nil { |
|
|
|
|
h.metrics.checkpointCreationFail.Inc() |
|
|
|
|
return errors.Wrap(err, "create checkpoint") |
|
|
|
|
} |
|
|
|
|
if err := h.wal.Truncate(n + 1); err != nil { |
|
|
|
|
if err := h.wal.Truncate(last + 1); err != nil { |
|
|
|
|
// If truncating fails, we'll just try again at the next checkpoint.
|
|
|
|
|
// Leftover segments will just be ignored in the future if there's a checkpoint
|
|
|
|
|
// that supersedes them.
|
|
|
|
|
level.Error(h.logger).Log("msg", "truncating segments failed", "err", err) |
|
|
|
|
} |
|
|
|
|
h.metrics.checkpointDeleteTotal.Inc() |
|
|
|
|
if err := DeleteCheckpoints(h.wal.Dir(), n); err != nil { |
|
|
|
|
if err := DeleteCheckpoints(h.wal.Dir(), last); err != nil { |
|
|
|
|
// Leftover old checkpoints do not cause problems down the line beyond
|
|
|
|
|
// occupying disk space.
|
|
|
|
|
// They will just be ignored since a higher checkpoint exists.
|
|
|
|
|
@ -533,7 +551,7 @@ func (h *Head) Truncate(mint int64) (err error) { |
|
|
|
|
h.metrics.walTruncateDuration.Observe(time.Since(start).Seconds()) |
|
|
|
|
|
|
|
|
|
level.Info(h.logger).Log("msg", "WAL checkpoint complete", |
|
|
|
|
"low", m, "high", n, "duration", time.Since(start)) |
|
|
|
|
"first", first, "last", last, "duration", time.Since(start)) |
|
|
|
|
|
|
|
|
|
return nil |
|
|
|
|
} |
|
|
|
|
@ -1014,19 +1032,33 @@ func (h *headIndexReader) LabelValues(names ...string) (index.StringTuples, erro |
|
|
|
|
if len(names) != 1 { |
|
|
|
|
return nil, errInvalidSize |
|
|
|
|
} |
|
|
|
|
var sl []string |
|
|
|
|
|
|
|
|
|
h.head.symMtx.RLock() |
|
|
|
|
defer h.head.symMtx.RUnlock() |
|
|
|
|
|
|
|
|
|
sl := make([]string, 0, len(h.head.values[names[0]])) |
|
|
|
|
for s := range h.head.values[names[0]] { |
|
|
|
|
sl = append(sl, s) |
|
|
|
|
} |
|
|
|
|
h.head.symMtx.RUnlock() |
|
|
|
|
sort.Strings(sl) |
|
|
|
|
|
|
|
|
|
return index.NewStringTuples(sl, len(names)) |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// LabelNames returns all the unique label names present in the head.
|
|
|
|
|
func (h *headIndexReader) LabelNames() ([]string, error) { |
|
|
|
|
h.head.symMtx.RLock() |
|
|
|
|
defer h.head.symMtx.RUnlock() |
|
|
|
|
labelNames := make([]string, 0, len(h.head.values)) |
|
|
|
|
for name := range h.head.values { |
|
|
|
|
if name == "" { |
|
|
|
|
continue |
|
|
|
|
} |
|
|
|
|
labelNames = append(labelNames, name) |
|
|
|
|
} |
|
|
|
|
sort.Strings(labelNames) |
|
|
|
|
return labelNames, nil |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Postings returns the postings list iterator for the label pair.
|
|
|
|
|
func (h *headIndexReader) Postings(name, value string) (index.Postings, error) { |
|
|
|
|
return h.head.postings.Get(name, value), nil |
|
|
|
|
@ -1088,9 +1120,7 @@ func (h *headIndexReader) Series(ref uint64, lbls *labels.Labels, chks *[]chunks |
|
|
|
|
func (h *headIndexReader) LabelIndices() ([][]string, error) { |
|
|
|
|
h.head.symMtx.RLock() |
|
|
|
|
defer h.head.symMtx.RUnlock() |
|
|
|
|
|
|
|
|
|
res := [][]string{} |
|
|
|
|
|
|
|
|
|
for s := range h.head.values { |
|
|
|
|
res = append(res, []string{s}) |
|
|
|
|
} |
|
|
|
|
@ -1313,6 +1343,14 @@ type sample struct { |
|
|
|
|
v float64 |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
func (s sample) T() int64 { |
|
|
|
|
return s.t |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
func (s sample) V() float64 { |
|
|
|
|
return s.v |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// memSeries is the in-memory representation of a series. None of its methods
|
|
|
|
|
// are goroutine safe and it is the caller's responsibility to lock it.
|
|
|
|
|
type memSeries struct { |
|
|
|
|
@ -1321,11 +1359,11 @@ type memSeries struct { |
|
|
|
|
ref uint64 |
|
|
|
|
lset labels.Labels |
|
|
|
|
chunks []*memChunk |
|
|
|
|
headChunk *memChunk |
|
|
|
|
chunkRange int64 |
|
|
|
|
firstChunkID int |
|
|
|
|
|
|
|
|
|
nextAt int64 // Timestamp at which to cut the next chunk.
|
|
|
|
|
lastValue float64 |
|
|
|
|
sampleBuf [4]sample |
|
|
|
|
pendingCommit bool // Whether there are samples waiting to be committed to this series.
|
|
|
|
|
|
|
|
|
|
@ -1354,6 +1392,7 @@ func (s *memSeries) cut(mint int64) *memChunk { |
|
|
|
|
maxTime: math.MinInt64, |
|
|
|
|
} |
|
|
|
|
s.chunks = append(s.chunks, c) |
|
|
|
|
s.headChunk = c |
|
|
|
|
|
|
|
|
|
// Set upper bound on when the next chunk must be started. An earlier timestamp
|
|
|
|
|
// may be chosen dynamically at a later point.
|
|
|
|
|
@ -1392,7 +1431,7 @@ func (s *memSeries) appendable(t int64, v float64) error { |
|
|
|
|
} |
|
|
|
|
// We are allowing exact duplicates as we can encounter them in valid cases
|
|
|
|
|
// like federation and erroring out at that time would be extremely noisy.
|
|
|
|
|
if math.Float64bits(s.lastValue) != math.Float64bits(v) { |
|
|
|
|
if math.Float64bits(s.sampleBuf[3].v) != math.Float64bits(v) { |
|
|
|
|
return ErrAmendSample |
|
|
|
|
} |
|
|
|
|
return nil |
|
|
|
|
@ -1422,12 +1461,20 @@ func (s *memSeries) truncateChunksBefore(mint int64) (removed int) { |
|
|
|
|
} |
|
|
|
|
s.chunks = append(s.chunks[:0], s.chunks[k:]...) |
|
|
|
|
s.firstChunkID += k |
|
|
|
|
if len(s.chunks) == 0 { |
|
|
|
|
s.headChunk = nil |
|
|
|
|
} else { |
|
|
|
|
s.headChunk = s.chunks[len(s.chunks)-1] |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
return k |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// append adds the sample (t, v) to the series.
|
|
|
|
|
func (s *memSeries) append(t int64, v float64) (success, chunkCreated bool) { |
|
|
|
|
// Based on Gorilla white papers this offers near-optimal compression ratio
|
|
|
|
|
// so anything bigger that this has diminishing returns and increases
|
|
|
|
|
// the time range within which we have to decompress all samples.
|
|
|
|
|
const samplesPerChunk = 120 |
|
|
|
|
|
|
|
|
|
c := s.head() |
|
|
|
|
@ -1456,8 +1503,6 @@ func (s *memSeries) append(t int64, v float64) (success, chunkCreated bool) { |
|
|
|
|
|
|
|
|
|
c.maxTime = t |
|
|
|
|
|
|
|
|
|
s.lastValue = v |
|
|
|
|
|
|
|
|
|
s.sampleBuf[0] = s.sampleBuf[1] |
|
|
|
|
s.sampleBuf[1] = s.sampleBuf[2] |
|
|
|
|
s.sampleBuf[2] = s.sampleBuf[3] |
|
|
|
|
@ -1501,10 +1546,7 @@ func (s *memSeries) iterator(id int) chunkenc.Iterator { |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
func (s *memSeries) head() *memChunk { |
|
|
|
|
if len(s.chunks) == 0 { |
|
|
|
|
return nil |
|
|
|
|
} |
|
|
|
|
return s.chunks[len(s.chunks)-1] |
|
|
|
|
return s.headChunk |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
type memChunk struct { |
|
|
|
|
|