mirror of https://github.com/grafana/loki
Adds WAL support (experimental) (#2981)
* marshalable chunks * wal record types custom serialization * proto types for wal checkpoints * byteswith output unaffected by buffer * wal & record pool ifcs * wal record can hold entries from multiple series * entry pool * ingester uses noopWal * removes duplicate argument passing in ingester code. adds ingester config validation & derives chunk encoding. * segment writing * [WIP] wal recovery from segments * replay uses sync.Maps & preserves WAL fingerprints * in memory wal recovery * wal segment recovery * ingester metrics struct * wal replay locks streamsMtx in instances, adds checkpoint codec * ingester metrics * checkpointer * WAL checkpoint writer * checkpointwriter can write multiple checkpoints * reorgs checkpointing * wires up checkpointwriter to wal * ingester SeriesIter impl * wires up ingesterRecoverer to consume checkpoints * generic recovery fn * generic recovery fn * recover from both wal types * cleans up old tmp checkpoints & allows aborting in flight checkpoints * wires up wal checkpointing * more granular wal logging * fixes off by 1 wal truncation & removes double logging * adds userID to wal records correctly * wire chunk encoding tests * more granular wal metrics * checkpoint encoding test * ignores debug bins * segment replay ignores out of orders * fixes bug between WAL reading []byte validity and proto unmarshalling refs * conf validations, removes comments * flush on shutdown config * POST /ingester/shutdown * renames flush on shutdown * wal & checkpoint use same segment size * writes entries to wal regardless of tailers * makes wal checkpoing duration default to 5m * recovery metrics * encodes headchunks separately for wal purposes * merge upstream * linting * addresses pr feedback uses entry pool in stream push/tailer removes unnecessary pool interaction checkpointbytes comment fillchunk helper, record resetting in tests via pool redundant comment defers wg done in recovery s/num/count/ checkpoint wal uses a logger encodeWithTypeHeader now creates its own []byte removes pool from decodeEntries wal stop can error * prevent shared access bug with tailers and entry pool * removes stream push entry pool optimizationpull/3002/head
parent
ae9c4b82ec
commit
4d9865acd4
@ -0,0 +1,87 @@ |
||||
package ingester |
||||
|
||||
import ( |
||||
"github.com/prometheus/client_golang/prometheus" |
||||
"github.com/prometheus/client_golang/prometheus/promauto" |
||||
) |
||||
|
||||
type ingesterMetrics struct { |
||||
checkpointDeleteFail prometheus.Counter |
||||
checkpointDeleteTotal prometheus.Counter |
||||
checkpointCreationFail prometheus.Counter |
||||
checkpointCreationTotal prometheus.Counter |
||||
checkpointDuration prometheus.Summary |
||||
checkpointLoggedBytesTotal prometheus.Counter |
||||
|
||||
walReplayDuration prometheus.Gauge |
||||
walCorruptionsTotal *prometheus.CounterVec |
||||
walLoggedBytesTotal prometheus.Counter |
||||
walRecordsLogged prometheus.Counter |
||||
|
||||
recoveredStreamsTotal prometheus.Counter |
||||
recoveredChunksTotal prometheus.Counter |
||||
recoveredEntriesTotal prometheus.Counter |
||||
} |
||||
|
||||
const ( |
||||
walTypeCheckpoint = "checkpoint" |
||||
walTypeSegment = "segment" |
||||
) |
||||
|
||||
func newIngesterMetrics(r prometheus.Registerer) *ingesterMetrics { |
||||
return &ingesterMetrics{ |
||||
walReplayDuration: promauto.With(r).NewGauge(prometheus.GaugeOpts{ |
||||
Name: "loki_ingester_wal_replay_duration_seconds", |
||||
Help: "Time taken to replay the checkpoint and the WAL.", |
||||
}), |
||||
walCorruptionsTotal: promauto.With(r).NewCounterVec(prometheus.CounterOpts{ |
||||
Name: "loki_ingester_wal_corruptions_total", |
||||
Help: "Total number of WAL corruptions encountered.", |
||||
}, []string{"type"}), |
||||
checkpointDeleteFail: promauto.With(r).NewCounter(prometheus.CounterOpts{ |
||||
Name: "loki_ingester_checkpoint_deletions_failed_total", |
||||
Help: "Total number of checkpoint deletions that failed.", |
||||
}), |
||||
checkpointDeleteTotal: promauto.With(r).NewCounter(prometheus.CounterOpts{ |
||||
Name: "loki_ingester_checkpoint_deletions_total", |
||||
Help: "Total number of checkpoint deletions attempted.", |
||||
}), |
||||
checkpointCreationFail: promauto.With(r).NewCounter(prometheus.CounterOpts{ |
||||
Name: "loki_ingester_checkpoint_creations_failed_total", |
||||
Help: "Total number of checkpoint creations that failed.", |
||||
}), |
||||
checkpointCreationTotal: promauto.With(r).NewCounter(prometheus.CounterOpts{ |
||||
Name: "loki_ingester_checkpoint_creations_total", |
||||
Help: "Total number of checkpoint creations attempted.", |
||||
}), |
||||
checkpointDuration: promauto.With(r).NewSummary(prometheus.SummaryOpts{ |
||||
Name: "loki_ingester_checkpoint_duration_seconds", |
||||
Help: "Time taken to create a checkpoint.", |
||||
Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001}, |
||||
}), |
||||
walRecordsLogged: promauto.With(r).NewCounter(prometheus.CounterOpts{ |
||||
Name: "loki_ingester_wal_records_logged_total", |
||||
Help: "Total number of WAL records logged.", |
||||
}), |
||||
checkpointLoggedBytesTotal: promauto.With(r).NewCounter(prometheus.CounterOpts{ |
||||
Name: "loki_ingester_checkpoint_logged_bytes_total", |
||||
Help: "Total number of bytes written to disk for checkpointing.", |
||||
}), |
||||
walLoggedBytesTotal: promauto.With(r).NewCounter(prometheus.CounterOpts{ |
||||
Name: "loki_ingester_wal_logged_bytes_total", |
||||
Help: "Total number of bytes written to disk for WAL records.", |
||||
}), |
||||
recoveredStreamsTotal: promauto.With(r).NewCounter(prometheus.CounterOpts{ |
||||
Name: "loki_ingester_wal_recovered_streams_total", |
||||
Help: "Total number of streams recovered from the WAL.", |
||||
}), |
||||
recoveredChunksTotal: promauto.With(r).NewCounter(prometheus.CounterOpts{ |
||||
Name: "loki_ingester_wal_recovered_chunks_total", |
||||
Help: "Total number of chunks recovered from the WAL checkpoints.", |
||||
}), |
||||
recoveredEntriesTotal: promauto.With(r).NewCounter(prometheus.CounterOpts{ |
||||
Name: "loki_ingester_wal_recovered_entries_total", |
||||
Help: "Total number of entries recovered from the WAL.", |
||||
}), |
||||
} |
||||
} |
||||
@ -0,0 +1,395 @@ |
||||
package ingester |
||||
|
||||
import ( |
||||
"context" |
||||
io "io" |
||||
"runtime" |
||||
"sync" |
||||
|
||||
"github.com/cortexproject/cortex/pkg/ingester/client" |
||||
"github.com/cortexproject/cortex/pkg/util" |
||||
"github.com/go-kit/kit/log/level" |
||||
"github.com/pkg/errors" |
||||
"github.com/prometheus/prometheus/tsdb/record" |
||||
"github.com/prometheus/prometheus/tsdb/wal" |
||||
|
||||
"github.com/grafana/loki/pkg/logproto" |
||||
) |
||||
|
||||
type WALReader interface { |
||||
Next() bool |
||||
Err() error |
||||
// Record should not be used across multiple calls to Next()
|
||||
Record() []byte |
||||
} |
||||
|
||||
type NoopWALReader struct{} |
||||
|
||||
func (NoopWALReader) Next() bool { return false } |
||||
func (NoopWALReader) Err() error { return nil } |
||||
func (NoopWALReader) Record() []byte { return nil } |
||||
func (NoopWALReader) Close() error { return nil } |
||||
|
||||
// If startSegment is <0, it means all the segments.
|
||||
func newWalReader(dir string, startSegment int) (*wal.Reader, io.Closer, error) { |
||||
var ( |
||||
segmentReader io.ReadCloser |
||||
err error |
||||
) |
||||
if startSegment < 0 { |
||||
segmentReader, err = wal.NewSegmentsReader(dir) |
||||
if err != nil { |
||||
return nil, nil, err |
||||
} |
||||
} else { |
||||
first, last, err := wal.Segments(dir) |
||||
if err != nil { |
||||
return nil, nil, err |
||||
} |
||||
if startSegment > last { |
||||
return nil, nil, errors.New("start segment is beyond the last WAL segment") |
||||
} |
||||
if first > startSegment { |
||||
startSegment = first |
||||
} |
||||
segmentReader, err = wal.NewSegmentsRangeReader(wal.SegmentRange{ |
||||
Dir: dir, |
||||
First: startSegment, |
||||
Last: -1, // Till the end.
|
||||
}) |
||||
if err != nil { |
||||
return nil, nil, err |
||||
} |
||||
} |
||||
return wal.NewReader(segmentReader), segmentReader, nil |
||||
} |
||||
|
||||
func newCheckpointReader(dir string) (WALReader, io.Closer, error) { |
||||
lastCheckpointDir, idx, err := lastCheckpoint(dir) |
||||
if err != nil { |
||||
return nil, nil, err |
||||
} |
||||
if idx < 0 { |
||||
level.Info(util.Logger).Log("msg", "no checkpoint found, treating as no-op") |
||||
var reader NoopWALReader |
||||
return reader, reader, nil |
||||
} |
||||
|
||||
r, err := wal.NewSegmentsReader(lastCheckpointDir) |
||||
if err != nil { |
||||
return nil, nil, err |
||||
} |
||||
return wal.NewReader(r), r, nil |
||||
} |
||||
|
||||
type Recoverer interface { |
||||
NumWorkers() int |
||||
Series(series *Series) error |
||||
SetStream(userID string, series record.RefSeries) error |
||||
Push(userID string, entries RefEntries) error |
||||
Close() |
||||
Done() <-chan struct{} |
||||
} |
||||
|
||||
type ingesterRecoverer struct { |
||||
// basically map[userID]map[fingerprint]*stream
|
||||
users sync.Map |
||||
ing *Ingester |
||||
done chan struct{} |
||||
} |
||||
|
||||
func newIngesterRecoverer(i *Ingester) *ingesterRecoverer { |
||||
return &ingesterRecoverer{ |
||||
ing: i, |
||||
done: make(chan struct{}), |
||||
} |
||||
} |
||||
|
||||
// Use all available cores
|
||||
func (r *ingesterRecoverer) NumWorkers() int { return runtime.GOMAXPROCS(0) } |
||||
|
||||
func (r *ingesterRecoverer) Series(series *Series) error { |
||||
inst := r.ing.getOrCreateInstance(series.UserID) |
||||
|
||||
// TODO(owen-d): create another fn to avoid unnecessary label type conversions.
|
||||
stream, err := inst.getOrCreateStream(logproto.Stream{ |
||||
Labels: client.FromLabelAdaptersToLabels(series.Labels).String(), |
||||
}, true, nil) |
||||
|
||||
if err != nil { |
||||
return err |
||||
} |
||||
|
||||
added, err := stream.setChunks(series.Chunks) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
r.ing.metrics.recoveredChunksTotal.Add(float64(len(series.Chunks))) |
||||
r.ing.metrics.recoveredEntriesTotal.Add(float64(added)) |
||||
|
||||
// now store the stream in the recovery map under the fingerprint originally recorded
|
||||
// as it's possible the newly mapped fingerprint is different. This is because the WAL records
|
||||
// will use this original reference.
|
||||
got, _ := r.users.LoadOrStore(series.UserID, &sync.Map{}) |
||||
streamsMap := got.(*sync.Map) |
||||
streamsMap.Store(series.Fingerprint, stream) |
||||
return nil |
||||
} |
||||
|
||||
// SetStream is responsible for setting the key path for userIDs -> fingerprints -> streams.
|
||||
// Internally, this uses nested sync.Maps due to their performance benefits for sets that only grow.
|
||||
// Using these also allows us to bypass the ingester -> instance -> stream hierarchy internally, which
|
||||
// may yield some performance gains, but is essential for the following:
|
||||
// Due to the use of the instance's fingerprint mapper, stream fingerprints are NOT necessarily
|
||||
// deterministic. The WAL uses the post-mapped fingerprint on the ingester that originally
|
||||
// created the stream and we ensure that said fingerprint maps correctly to the newly
|
||||
// created stream during WAL replay, even if the new in memory stream was assigned a different
|
||||
// fingerprint from the mapper. This is paramount because subsequent WAL records will use
|
||||
// the fingerprint reported in the WAL record, not the potentially differing one assigned during
|
||||
// stream creation.
|
||||
func (r *ingesterRecoverer) SetStream(userID string, series record.RefSeries) error { |
||||
inst := r.ing.getOrCreateInstance(userID) |
||||
|
||||
stream, err := inst.getOrCreateStream( |
||||
logproto.Stream{ |
||||
Labels: series.Labels.String(), |
||||
}, |
||||
true, |
||||
nil, |
||||
) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
|
||||
// Now that we have the stream, ensure that the userID -> fingerprint -> stream
|
||||
// path is set properly.
|
||||
got, _ := r.users.LoadOrStore(userID, &sync.Map{}) |
||||
streamsMap := got.(*sync.Map) |
||||
streamsMap.Store(series.Ref, stream) |
||||
return nil |
||||
} |
||||
|
||||
func (r *ingesterRecoverer) Push(userID string, entries RefEntries) error { |
||||
out, ok := r.users.Load(userID) |
||||
if !ok { |
||||
return errors.Errorf("user (%s) not set during WAL replay", userID) |
||||
} |
||||
|
||||
s, ok := out.(*sync.Map).Load(entries.Ref) |
||||
if !ok { |
||||
return errors.Errorf("stream (%d) not set during WAL replay for user (%s)", entries.Ref, userID) |
||||
} |
||||
|
||||
// ignore out of order errors here (it's possible for a checkpoint to already have data from the wal segments)
|
||||
_ = s.(*stream).Push(context.Background(), entries.Entries, nil) |
||||
return nil |
||||
} |
||||
|
||||
func (r *ingesterRecoverer) Close() { |
||||
close(r.done) |
||||
} |
||||
|
||||
func (r *ingesterRecoverer) Done() <-chan struct{} { |
||||
return r.done |
||||
} |
||||
|
||||
func RecoverWAL(reader WALReader, recoverer Recoverer) error { |
||||
dispatch := func(recoverer Recoverer, b []byte, inputs []chan recoveryInput, errCh <-chan error) error { |
||||
rec := recordPool.GetRecord() |
||||
if err := decodeWALRecord(b, rec); err != nil { |
||||
return err |
||||
} |
||||
|
||||
// First process all series to ensure we don't write entries to nonexistant series.
|
||||
for _, s := range rec.Series { |
||||
if err := recoverer.SetStream(rec.UserID, s); err != nil { |
||||
return err |
||||
} |
||||
|
||||
} |
||||
|
||||
for _, entries := range rec.RefEntries { |
||||
worker := int(entries.Ref % uint64(len(inputs))) |
||||
select { |
||||
case err := <-errCh: |
||||
return err |
||||
|
||||
case inputs[worker] <- recoveryInput{ |
||||
userID: rec.UserID, |
||||
data: entries, |
||||
}: |
||||
} |
||||
} |
||||
|
||||
return nil |
||||
} |
||||
|
||||
process := func(recoverer Recoverer, input <-chan recoveryInput, errCh chan<- error) { |
||||
for { |
||||
select { |
||||
case <-recoverer.Done(): |
||||
|
||||
case next, ok := <-input: |
||||
if !ok { |
||||
return |
||||
} |
||||
entries, ok := next.data.(RefEntries) |
||||
var err error |
||||
if !ok { |
||||
err = errors.Errorf("unexpected type (%T) when recovering WAL, expecting (%T)", next.data, entries) |
||||
} |
||||
if err == nil { |
||||
err = recoverer.Push(next.userID, entries) |
||||
} |
||||
|
||||
// Pass the error back, but respect the quit signal.
|
||||
if err != nil { |
||||
select { |
||||
case errCh <- err: |
||||
case <-recoverer.Done(): |
||||
} |
||||
return |
||||
} |
||||
} |
||||
} |
||||
} |
||||
|
||||
return recoverGeneric( |
||||
reader, |
||||
recoverer, |
||||
dispatch, |
||||
process, |
||||
) |
||||
|
||||
} |
||||
|
||||
func RecoverCheckpoint(reader WALReader, recoverer Recoverer) error { |
||||
dispatch := func(recoverer Recoverer, b []byte, inputs []chan recoveryInput, errCh <-chan error) error { |
||||
s := &Series{} |
||||
if err := decodeCheckpointRecord(b, s); err != nil { |
||||
return err |
||||
} |
||||
|
||||
worker := int(s.Fingerprint % uint64(len(inputs))) |
||||
select { |
||||
case err := <-errCh: |
||||
return err |
||||
|
||||
case inputs[worker] <- recoveryInput{ |
||||
userID: s.UserID, |
||||
data: s, |
||||
}: |
||||
} |
||||
|
||||
return nil |
||||
} |
||||
|
||||
process := func(recoverer Recoverer, input <-chan recoveryInput, errCh chan<- error) { |
||||
for { |
||||
select { |
||||
case <-recoverer.Done(): |
||||
|
||||
case next, ok := <-input: |
||||
if !ok { |
||||
return |
||||
} |
||||
series, ok := next.data.(*Series) |
||||
var err error |
||||
if !ok { |
||||
err = errors.Errorf("unexpected type (%T) when recovering WAL, expecting (%T)", next.data, series) |
||||
} |
||||
if err == nil { |
||||
err = recoverer.Series(series) |
||||
} |
||||
|
||||
// Pass the error back, but respect the quit signal.
|
||||
if err != nil { |
||||
select { |
||||
case errCh <- err: |
||||
case <-recoverer.Done(): |
||||
} |
||||
return |
||||
} |
||||
} |
||||
} |
||||
} |
||||
|
||||
return recoverGeneric( |
||||
reader, |
||||
recoverer, |
||||
dispatch, |
||||
process, |
||||
) |
||||
} |
||||
|
||||
type recoveryInput struct { |
||||
userID string |
||||
data interface{} |
||||
} |
||||
|
||||
// recoverGeneric enables reusing the ability to recover from WALs of different types
|
||||
// by exposing the dispatch and process functions.
|
||||
// Note: it explicitly does not call the Recoverer.Close function as it's possible to layer
|
||||
// multiple recoveries on top of each other, as in the case of recovering from Checkpoints
|
||||
// then the WAL.
|
||||
func recoverGeneric( |
||||
reader WALReader, |
||||
recoverer Recoverer, |
||||
dispatch func(Recoverer, []byte, []chan recoveryInput, <-chan error) error, |
||||
process func(Recoverer, <-chan recoveryInput, chan<- error), |
||||
) error { |
||||
var wg sync.WaitGroup |
||||
var lastErr error |
||||
nWorkers := recoverer.NumWorkers() |
||||
|
||||
if nWorkers < 1 { |
||||
return errors.New("cannot recover with no workers") |
||||
} |
||||
|
||||
errCh := make(chan error) |
||||
inputs := make([]chan recoveryInput, 0, nWorkers) |
||||
wg.Add(nWorkers) |
||||
for i := 0; i < nWorkers; i++ { |
||||
inputs = append(inputs, make(chan recoveryInput)) |
||||
|
||||
go func(input <-chan recoveryInput) { |
||||
defer wg.Done() |
||||
process(recoverer, input, errCh) |
||||
}(inputs[i]) |
||||
|
||||
} |
||||
|
||||
outer: |
||||
for reader.Next() { |
||||
b := reader.Record() |
||||
if lastErr = reader.Err(); lastErr != nil { |
||||
break outer |
||||
} |
||||
|
||||
if lastErr = dispatch(recoverer, b, inputs, errCh); lastErr != nil { |
||||
break outer |
||||
} |
||||
} |
||||
|
||||
for _, w := range inputs { |
||||
close(w) |
||||
} |
||||
|
||||
// may have broken loop early
|
||||
if lastErr != nil { |
||||
return lastErr |
||||
} |
||||
|
||||
finished := make(chan struct{}) |
||||
go func(finished chan<- struct{}) { |
||||
wg.Wait() |
||||
finished <- struct{}{} |
||||
}(finished) |
||||
|
||||
select { |
||||
case <-finished: |
||||
case lastErr = <-errCh: |
||||
} |
||||
|
||||
return lastErr |
||||
} |
||||
@ -0,0 +1,192 @@ |
||||
package ingester |
||||
|
||||
import ( |
||||
fmt "fmt" |
||||
"runtime" |
||||
"sync" |
||||
"testing" |
||||
"time" |
||||
|
||||
"github.com/pkg/errors" |
||||
"github.com/prometheus/prometheus/pkg/labels" |
||||
"github.com/prometheus/prometheus/tsdb/record" |
||||
"github.com/stretchr/testify/require" |
||||
|
||||
"github.com/grafana/loki/pkg/logproto" |
||||
) |
||||
|
||||
type MemoryWALReader struct { |
||||
xs [][]byte |
||||
|
||||
initialized bool |
||||
} |
||||
|
||||
func (m *MemoryWALReader) Next() bool { |
||||
if len(m.xs) < 1 { |
||||
return false |
||||
} |
||||
|
||||
// don't advance on the first call
|
||||
if !m.initialized { |
||||
m.initialized = true |
||||
return true |
||||
} |
||||
|
||||
m.xs = m.xs[1:] |
||||
return len(m.xs) > 0 |
||||
} |
||||
|
||||
func (m *MemoryWALReader) Err() error { return nil } |
||||
|
||||
func (m *MemoryWALReader) Record() []byte { return m.xs[0] } |
||||
|
||||
func buildMemoryReader(users, totalStreams, entriesPerStream int) (*MemoryWALReader, []*WALRecord) { |
||||
var recs []*WALRecord |
||||
reader := &MemoryWALReader{} |
||||
for i := 0; i < totalStreams; i++ { |
||||
user := fmt.Sprintf("%d", i%users) |
||||
recs = append(recs, &WALRecord{ |
||||
UserID: user, |
||||
Series: []record.RefSeries{ |
||||
{ |
||||
Ref: uint64(i), |
||||
Labels: labels.FromMap( |
||||
map[string]string{ |
||||
"stream": fmt.Sprint(i), |
||||
"user": user, |
||||
}, |
||||
), |
||||
}, |
||||
}, |
||||
}) |
||||
|
||||
var entries []logproto.Entry |
||||
for j := 0; j < entriesPerStream; j++ { |
||||
entries = append(entries, logproto.Entry{ |
||||
Timestamp: time.Unix(int64(j), 0), |
||||
Line: fmt.Sprintf("%d", j), |
||||
}) |
||||
} |
||||
recs = append(recs, &WALRecord{ |
||||
UserID: user, |
||||
RefEntries: []RefEntries{ |
||||
{ |
||||
Ref: uint64(i), |
||||
Entries: entries, |
||||
}, |
||||
}, |
||||
}) |
||||
} |
||||
|
||||
for _, rec := range recs { |
||||
if len(rec.Series) > 0 { |
||||
reader.xs = append(reader.xs, rec.encodeSeries(nil)) |
||||
} |
||||
|
||||
if len(rec.RefEntries) > 0 { |
||||
reader.xs = append(reader.xs, rec.encodeEntries(nil)) |
||||
} |
||||
} |
||||
|
||||
return reader, recs |
||||
|
||||
} |
||||
|
||||
type MemRecoverer struct { |
||||
users map[string]map[uint64][]logproto.Entry |
||||
done chan struct{} |
||||
|
||||
sync.Mutex |
||||
usersCt, streamsCt, seriesCt int |
||||
} |
||||
|
||||
func NewMemRecoverer() *MemRecoverer { |
||||
return &MemRecoverer{ |
||||
users: make(map[string]map[uint64][]logproto.Entry), |
||||
done: make(chan struct{}), |
||||
} |
||||
} |
||||
|
||||
func (r *MemRecoverer) NumWorkers() int { return runtime.GOMAXPROCS(0) } |
||||
|
||||
func (r *MemRecoverer) Series(_ *Series) error { return nil } |
||||
|
||||
func (r *MemRecoverer) SetStream(userID string, series record.RefSeries) error { |
||||
r.Lock() |
||||
defer r.Unlock() |
||||
user, ok := r.users[userID] |
||||
if !ok { |
||||
user = make(map[uint64][]logproto.Entry) |
||||
r.users[userID] = user |
||||
r.usersCt++ |
||||
} |
||||
|
||||
if _, exists := user[series.Ref]; exists { |
||||
return errors.Errorf("stream (%d) already exists for user (%s)", series.Ref, userID) |
||||
} |
||||
|
||||
user[series.Ref] = make([]logproto.Entry, 0) |
||||
r.streamsCt++ |
||||
return nil |
||||
} |
||||
|
||||
func (r *MemRecoverer) Push(userID string, entries RefEntries) error { |
||||
r.Lock() |
||||
defer r.Unlock() |
||||
|
||||
user, ok := r.users[userID] |
||||
if !ok { |
||||
return errors.Errorf("unexpected user access (%s)", userID) |
||||
} |
||||
|
||||
stream, ok := user[entries.Ref] |
||||
if !ok { |
||||
return errors.Errorf("unexpected stream access") |
||||
} |
||||
|
||||
r.seriesCt += len(entries.Entries) |
||||
user[entries.Ref] = append(stream, entries.Entries...) |
||||
return nil |
||||
} |
||||
|
||||
func (r *MemRecoverer) Close() { close(r.done) } |
||||
|
||||
func (r *MemRecoverer) Done() <-chan struct{} { return r.done } |
||||
|
||||
func Test_InMemorySegmentRecover(t *testing.T) { |
||||
var ( |
||||
users = 10 |
||||
streamsCt = 1000 |
||||
entriesPerStream = 50 |
||||
) |
||||
reader, recs := buildMemoryReader(users, streamsCt, entriesPerStream) |
||||
|
||||
recoverer := NewMemRecoverer() |
||||
|
||||
require.Nil(t, RecoverWAL(reader, recoverer)) |
||||
recoverer.Close() |
||||
|
||||
require.Equal(t, users, recoverer.usersCt) |
||||
require.Equal(t, streamsCt, recoverer.streamsCt) |
||||
require.Equal(t, streamsCt*entriesPerStream, recoverer.seriesCt) |
||||
|
||||
for _, rec := range recs { |
||||
user, ok := recoverer.users[rec.UserID] |
||||
require.Equal(t, true, ok) |
||||
|
||||
for _, s := range rec.Series { |
||||
_, ok := user[s.Ref] |
||||
require.Equal(t, true, ok) |
||||
} |
||||
|
||||
for _, entries := range rec.RefEntries { |
||||
stream, ok := user[entries.Ref] |
||||
require.Equal(t, true, ok) |
||||
|
||||
for i, entry := range entries.Entries { |
||||
require.Equal(t, entry, stream[i]) |
||||
} |
||||
} |
||||
} |
||||
|
||||
} |
||||
@ -0,0 +1,210 @@ |
||||
package ingester |
||||
|
||||
import ( |
||||
"flag" |
||||
"sync" |
||||
"time" |
||||
|
||||
"github.com/cortexproject/cortex/pkg/util" |
||||
"github.com/go-kit/kit/log/level" |
||||
"github.com/pkg/errors" |
||||
"github.com/prometheus/client_golang/prometheus" |
||||
"github.com/prometheus/prometheus/tsdb/wal" |
||||
|
||||
"github.com/grafana/loki/pkg/logproto" |
||||
) |
||||
|
||||
var ( |
||||
// shared pool for WALRecords and []logproto.Entries
|
||||
recordPool = newRecordPool() |
||||
) |
||||
|
||||
const walSegmentSize = wal.DefaultSegmentSize * 4 |
||||
|
||||
type WALConfig struct { |
||||
Enabled bool `yaml:"enabled"` |
||||
Dir string `yaml:"dir"` |
||||
Recover bool `yaml:"recover"` |
||||
CheckpointDuration time.Duration `yaml:"checkpoint_duration"` |
||||
FlushOnShutdown bool `yaml:"flush_on_shutdown"` |
||||
} |
||||
|
||||
func (cfg *WALConfig) Validate() error { |
||||
if cfg.Enabled && cfg.CheckpointDuration < 1 { |
||||
return errors.Errorf("invalid checkpoint duration: %v", cfg.CheckpointDuration) |
||||
} |
||||
return nil |
||||
} |
||||
|
||||
// RegisterFlags adds the flags required to config this to the given FlagSet
|
||||
func (cfg *WALConfig) RegisterFlags(f *flag.FlagSet) { |
||||
f.StringVar(&cfg.Dir, "ingester.wal-dir", "wal", "Directory to store the WAL and/or recover from WAL.") |
||||
f.BoolVar(&cfg.Enabled, "ingester.wal-enabled", false, "Enable writing of ingested data into WAL.") |
||||
f.BoolVar(&cfg.Recover, "ingester.recover-from-wal", false, "Recover data from existing WAL irrespective of WAL enabled/disabled.") |
||||
f.DurationVar(&cfg.CheckpointDuration, "ingester.checkpoint-duration", 5*time.Minute, "Interval at which checkpoints should be created.") |
||||
f.BoolVar(&cfg.FlushOnShutdown, "ingester.flush-on-shutdown", false, "When WAL is enabled, should chunks be flushed to long-term storage on shutdown.") |
||||
} |
||||
|
||||
// WAL interface allows us to have a no-op WAL when the WAL is disabled.
|
||||
type WAL interface { |
||||
// Log marshalls the records and writes it into the WAL.
|
||||
Log(*WALRecord) error |
||||
// Stop stops all the WAL operations.
|
||||
Stop() error |
||||
} |
||||
|
||||
type noopWAL struct{} |
||||
|
||||
func (noopWAL) Log(*WALRecord) error { return nil } |
||||
func (noopWAL) Stop() error { return nil } |
||||
|
||||
type walWrapper struct { |
||||
cfg WALConfig |
||||
wal *wal.WAL |
||||
metrics *ingesterMetrics |
||||
seriesIter SeriesIter |
||||
|
||||
wait sync.WaitGroup |
||||
quit chan struct{} |
||||
} |
||||
|
||||
// newWAL creates a WAL object. If the WAL is disabled, then the returned WAL is a no-op WAL.
|
||||
func newWAL(cfg WALConfig, registerer prometheus.Registerer, metrics *ingesterMetrics, seriesIter SeriesIter) (WAL, error) { |
||||
if !cfg.Enabled { |
||||
return noopWAL{}, nil |
||||
} |
||||
|
||||
tsdbWAL, err := wal.NewSize(util.Logger, registerer, cfg.Dir, walSegmentSize, false) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
|
||||
w := &walWrapper{ |
||||
cfg: cfg, |
||||
quit: make(chan struct{}), |
||||
wal: tsdbWAL, |
||||
metrics: metrics, |
||||
seriesIter: seriesIter, |
||||
} |
||||
|
||||
w.wait.Add(1) |
||||
go w.run() |
||||
return w, nil |
||||
} |
||||
|
||||
func (w *walWrapper) Log(record *WALRecord) error { |
||||
if record == nil || (len(record.Series) == 0 && len(record.RefEntries) == 0) { |
||||
return nil |
||||
} |
||||
select { |
||||
case <-w.quit: |
||||
return nil |
||||
default: |
||||
buf := recordPool.GetBytes()[:0] |
||||
defer func() { |
||||
recordPool.PutBytes(buf) |
||||
}() |
||||
|
||||
// Always write series then entries.
|
||||
if len(record.Series) > 0 { |
||||
buf = record.encodeSeries(buf) |
||||
if err := w.wal.Log(buf); err != nil { |
||||
return err |
||||
} |
||||
w.metrics.walRecordsLogged.Inc() |
||||
w.metrics.walLoggedBytesTotal.Add(float64(len(buf))) |
||||
buf = buf[:0] |
||||
} |
||||
if len(record.RefEntries) > 0 { |
||||
buf = record.encodeEntries(buf) |
||||
if err := w.wal.Log(buf); err != nil { |
||||
return err |
||||
} |
||||
w.metrics.walRecordsLogged.Inc() |
||||
w.metrics.walLoggedBytesTotal.Add(float64(len(buf))) |
||||
} |
||||
return nil |
||||
} |
||||
} |
||||
|
||||
func (w *walWrapper) Stop() error { |
||||
close(w.quit) |
||||
w.wait.Wait() |
||||
err := w.wal.Close() |
||||
level.Info(util.Logger).Log("msg", "stopped", "component", "wal") |
||||
return err |
||||
} |
||||
|
||||
func (w *walWrapper) checkpointWriter() *WALCheckpointWriter { |
||||
return &WALCheckpointWriter{ |
||||
metrics: w.metrics, |
||||
segmentWAL: w.wal, |
||||
} |
||||
} |
||||
|
||||
func (w *walWrapper) run() { |
||||
level.Info(util.Logger).Log("msg", "started", "component", "wal") |
||||
defer w.wait.Done() |
||||
|
||||
checkpointer := NewCheckpointer( |
||||
w.cfg.CheckpointDuration, |
||||
w.seriesIter, |
||||
w.checkpointWriter(), |
||||
w.metrics, |
||||
w.quit, |
||||
) |
||||
checkpointer.Run() |
||||
|
||||
} |
||||
|
||||
type resettingPool struct { |
||||
rPool *sync.Pool // records
|
||||
ePool *sync.Pool // entries
|
||||
bPool *sync.Pool // bytes
|
||||
} |
||||
|
||||
func (p *resettingPool) GetRecord() *WALRecord { |
||||
rec := p.rPool.Get().(*WALRecord) |
||||
rec.Reset() |
||||
return rec |
||||
} |
||||
|
||||
func (p *resettingPool) PutRecord(r *WALRecord) { |
||||
p.rPool.Put(r) |
||||
} |
||||
|
||||
func (p *resettingPool) GetEntries() []logproto.Entry { |
||||
return p.ePool.Get().([]logproto.Entry) |
||||
} |
||||
|
||||
func (p *resettingPool) PutEntries(es []logproto.Entry) { |
||||
p.ePool.Put(es[:0]) // nolint:staticcheck
|
||||
} |
||||
|
||||
func (p *resettingPool) GetBytes() []byte { |
||||
return p.bPool.Get().([]byte) |
||||
} |
||||
|
||||
func (p *resettingPool) PutBytes(b []byte) { |
||||
p.bPool.Put(b[:0]) // nolint:staticcheck
|
||||
} |
||||
|
||||
func newRecordPool() *resettingPool { |
||||
return &resettingPool{ |
||||
rPool: &sync.Pool{ |
||||
New: func() interface{} { |
||||
return &WALRecord{} |
||||
}, |
||||
}, |
||||
ePool: &sync.Pool{ |
||||
New: func() interface{} { |
||||
return make([]logproto.Entry, 0, 512) |
||||
}, |
||||
}, |
||||
bPool: &sync.Pool{ |
||||
New: func() interface{} { |
||||
return make([]byte, 0, 1<<10) // 1kb
|
||||
}, |
||||
}, |
||||
} |
||||
} |
||||
Loading…
Reference in new issue