loki/pkg/canary/comparator/comparator.go

package comparator

import (
	"fmt"
	"io"
	"math/rand"
	"sync"
	"time"

	"github.com/prometheus/client_golang/prometheus"
	"github.com/prometheus/client_golang/prometheus/promauto"
	"github.com/weaveworks/common/instrument"

	"github.com/grafana/loki/pkg/canary/reader"
)

const (
	ErrOutOfOrderEntry           = "out of order entry %s was received before entries: %v\n"
	ErrEntryNotReceivedWs        = "websocket failed to receive entry %v within %f seconds\n"
	ErrEntryNotReceived          = "failed to receive entry %v within %f seconds\n"
	ErrSpotCheckEntryNotReceived = "failed to find entry %v in Loki when spot check querying %v after it was written\n"
	ErrDuplicateEntry            = "received a duplicate entry for ts %v\n"
	ErrUnexpectedEntry           = "received an unexpected entry with ts %v\n"
	DebugWebsocketMissingEntry   = "websocket missing entry: %v\n"
	DebugQueryResult             = "confirmation query result: %v\n"
	DebugEntryFound              = "missing websocket entry %v was found %v seconds after it was originally sent\n"
)

var (
	totalEntries = promauto.NewCounter(prometheus.CounterOpts{
		Namespace: "loki_canary",
		Name:      "entries_total",
		Help:      "counts log entries written to the file",
	})
	outOfOrderEntries = promauto.NewCounter(prometheus.CounterOpts{
		Namespace: "loki_canary",
		Name:      "out_of_order_entries_total",
		Help:      "counts log entries received with a timestamp more recent than the others in the queue",
	})
	wsMissingEntries = promauto.NewCounter(prometheus.CounterOpts{
		Namespace: "loki_canary",
		Name:      "websocket_missing_entries_total",
		Help:      "counts log entries not received within the wait duration via the websocket connection",
	})
	missingEntries = promauto.NewCounter(prometheus.CounterOpts{
		Namespace: "loki_canary",
		Name:      "missing_entries_total",
		Help:      "counts log entries not received within the maxWait duration via both websocket and direct query",
	})
	spotCheckMissing = promauto.NewCounter(prometheus.CounterOpts{
		Namespace: "loki_canary",
		Name:      "spot_check_missing_entries_total",
		Help:      "counts log entries not received when directly queried as part of spot checking",
	})
	spotCheckEntries = promauto.NewCounter(prometheus.CounterOpts{
		Namespace: "loki_canary",
		Name:      "spot_check_entries_total",
		Help:      "total count of entries pot checked",
	})
	unexpectedEntries = promauto.NewCounter(prometheus.CounterOpts{
		Namespace: "loki_canary",
		Name:      "unexpected_entries_total",
		Help:      "counts a log entry received which was not expected (e.g. received after reported missing)",
	})
	duplicateEntries = promauto.NewCounter(prometheus.CounterOpts{
		Namespace: "loki_canary",
		Name:      "duplicate_entries_total",
		Help:      "counts a log entry received more than one time",
	})
	metricTestExpected = promauto.NewGauge(prometheus.GaugeOpts{
		Namespace: "loki_canary",
		Name:      "metric_test_expected",
		Help:      "How many counts were expected by the metric test query",
	})
	metricTestActual = promauto.NewGauge(prometheus.GaugeOpts{
		Namespace: "loki_canary",
		Name:      "metric_test_actual",
		Help:      "How many counts were actually received by the metric test query",
	})
	responseLatency   prometheus.Histogram
	metricTestLatency = promauto.NewHistogram(prometheus.HistogramOpts{
		Namespace: "loki_canary",
		Name:      "metric_test_request_duration_seconds",
		Help:      "how long the metric test query execution took in seconds.",
		Buckets:   instrument.DefBuckets,
	})
	spotTestLatency = promauto.NewHistogram(prometheus.HistogramOpts{
		Namespace: "loki_canary",
		Name:      "spot_check_request_duration_seconds",
		Help:      "how long the spot check test query execution took in seconds.",
		Buckets:   instrument.DefBuckets,
	})
)

type Comparator struct {
	entMtx              sync.Mutex // Locks access to []entries and []ackdEntries
	missingMtx          sync.Mutex // Locks access to []missingEntries
	spotEntMtx          sync.Mutex // Locks access to []spotCheck
	spotMtx             sync.Mutex // Locks spotcheckRunning for single threaded but async spotCheck()
	metTestMtx          sync.Mutex // Locks metricTestRunning for single threaded but async metricTest()
	pruneMtx            sync.Mutex // Locks pruneEntriesRunning for single threaded but async pruneEntries()
	w                   io.Writer
	entries             []*time.Time
	missingEntries      []*time.Time
	spotCheck           []*time.Time
	ackdEntries         []*time.Time
	wait                time.Duration
	maxWait             time.Duration
	pruneInterval       time.Duration
	pruneEntriesRunning bool
	spotCheckInterval   time.Duration
	spotCheckMax        time.Duration
	spotCheckQueryRate  time.Duration
	spotCheckWait       time.Duration
	spotCheckRunning    bool
	metricTestInterval  time.Duration
	metricTestRange     time.Duration
	metricTestRunning   bool
	writeInterval       time.Duration
	confirmAsync        bool
	startTime           time.Time
	sent                chan time.Time
	recv                chan time.Time
	rdr                 reader.LokiReader
	quit                chan struct{}
	done                chan struct{}
}

func NewComparator(writer io.Writer,
	wait time.Duration,
	maxWait time.Duration,
	pruneInterval time.Duration,
	spotCheckInterval, spotCheckMax, spotCheckQueryRate, spotCheckWait time.Duration,
	metricTestInterval time.Duration,
	metricTestRange time.Duration,
	writeInterval time.Duration,
	buckets int,
	sentChan chan time.Time,
	receivedChan chan time.Time,
	reader reader.LokiReader,
	confirmAsync bool) *Comparator {
	c := &Comparator{
		w:                   writer,
		entries:             []*time.Time{},
		spotCheck:           []*time.Time{},
		wait:                wait,
		maxWait:             maxWait,
		pruneInterval:       pruneInterval,
		pruneEntriesRunning: false,
		spotCheckInterval:   spotCheckInterval,
		spotCheckMax:        spotCheckMax,
		spotCheckQueryRate:  spotCheckQueryRate,
		spotCheckWait:       spotCheckWait,
		spotCheckRunning:    false,
		metricTestInterval:  metricTestInterval,
		metricTestRange:     metricTestRange,
		metricTestRunning:   false,
		writeInterval:       writeInterval,
		confirmAsync:        confirmAsync,
		startTime:           time.Now(),
		sent:                sentChan,
		recv:                receivedChan,
		rdr:                 reader,
		quit:                make(chan struct{}),
		done:                make(chan struct{}),
	}

	if responseLatency == nil {
		responseLatency = promauto.NewHistogram(prometheus.HistogramOpts{
			Namespace: "loki_canary",
			Name:      "response_latency_seconds",
			Help:      "is how long it takes for log lines to be returned from Loki in seconds.",
			Buckets:   prometheus.ExponentialBuckets(0.5, 2, buckets),
		})
	}

	go c.run()

	return c
}

func (c *Comparator) Stop() {
	if c.quit != nil {
		close(c.quit)
		<-c.done
		c.quit = nil
	}
}

func (c *Comparator) entrySent(ts time.Time) {
	c.entMtx.Lock()
	c.entries = append(c.entries, &ts)
	totalEntries.Inc()
	c.entMtx.Unlock()
	//If this entry equals or exceeds the spot check interval from the last entry in the spot check array, add it.
	c.spotEntMtx.Lock()
	if len(c.spotCheck) == 0 || ts.Sub(*c.spotCheck[len(c.spotCheck)-1]) >= c.spotCheckInterval {
		c.spotCheck = append(c.spotCheck, &ts)
	}
	c.spotEntMtx.Unlock()
}

// entryReceived removes the received entry from the buffer if it exists, reports on out of order entries received
func (c *Comparator) entryReceived(ts time.Time) {
	c.entMtx.Lock()
	defer c.entMtx.Unlock()

	matched := false
	c.entries = pruneList(c.entries,
		func(_ int, t *time.Time) bool {
			return ts.Equal(*t)
		},
		func(i int, t *time.Time) {
			matched = true
			// If this isn't the first item in the list we received it out of order
			if i != 0 {
				outOfOrderEntries.Inc()
				fmt.Fprintf(c.w, ErrOutOfOrderEntry, t, c.entries[:i])
			}
			responseLatency.Observe(time.Since(ts).Seconds())
			// Put this element in the acknowledged entries list so we can use it to check for duplicates
			c.ackdEntries = append(c.ackdEntries, c.entries[i])
		})

	if !matched {
		duplicate := false
		for _, e := range c.ackdEntries {
			if ts.Equal(*e) {
				duplicate = true
				duplicateEntries.Inc()
				fmt.Fprintf(c.w, ErrDuplicateEntry, ts.UnixNano())
				break
			}
		}
		if !duplicate {
			fmt.Fprintf(c.w, ErrUnexpectedEntry, ts.UnixNano())
			unexpectedEntries.Inc()
		}
	}
}

func (c *Comparator) Size() int {
	c.entMtx.Lock()
	defer c.entMtx.Unlock()
	return len(c.entries)
}

func (c *Comparator) run() {
	t := time.NewTicker(c.pruneInterval)
	// Use a random tick up to the interval for the first tick
	firstMt := true
	rand.Seed(time.Now().UnixNano())
	mt := time.NewTicker(time.Duration(rand.Int63n(c.metricTestInterval.Nanoseconds())))
	sc := time.NewTicker(c.spotCheckQueryRate)
	defer func() {
		t.Stop()
		mt.Stop()
		sc.Stop()
		close(c.done)
	}()

	for {
		select {
		case e := <-c.recv:
			c.entryReceived(e)
		case e := <-c.sent:
			c.entrySent(e)
		case <-t.C:
			// Only run one instance of prune entries at a time.
			c.pruneMtx.Lock()
			if !c.pruneEntriesRunning {
				c.pruneEntriesRunning = true
				go c.pruneEntries(time.Now())
			}
			c.pruneMtx.Unlock()
		case <-sc.C:
			// Only run one instance of spot check at a time.
			c.spotMtx.Lock()
			if !c.spotCheckRunning {
				c.spotCheckRunning = true
				go c.spotCheckEntries(time.Now())
			}
			c.spotMtx.Unlock()
		case <-mt.C:
			// Only run one intstance of metric tests at a time.
			c.metTestMtx.Lock()
			if !c.metricTestRunning {
				c.metricTestRunning = true
				go c.metricTest(time.Now())
			}
			c.metTestMtx.Unlock()
			if firstMt {
				// After the first tick which is at a random time, resume at the normal periodic interval
				firstMt = false
				mt.Reset(c.metricTestInterval)
			}
		case <-c.quit:
			return
		}
	}
}

// check that the expected # of log lines have been written to Loki
func (c *Comparator) metricTest(currTime time.Time) {
	// Always make sure to set the running state back to false
	defer func() {
		c.metTestMtx.Lock()
		c.metricTestRunning = false
		c.metTestMtx.Unlock()
	}()
	adjustedRange := c.metricTestRange

	// Adjust the query range to not be longer than the canary has been running.
	// We can't query for 24 hours of counts if it's only been running for 10m,
	// so we adjusted the range to the run time until we reach the desired lookback time.
	if currTime.Add(-c.metricTestRange).Before(c.startTime) {
		adjustedRange = currTime.Sub(c.startTime)
	}
	begin := time.Now()
	actualCount, err := c.rdr.QueryCountOverTime(fmt.Sprintf("%.0fs", adjustedRange.Seconds()))
	metricTestLatency.Observe(time.Since(begin).Seconds())
	if err != nil {
		fmt.Fprintf(c.w, "error running metric query test: %s\n", err.Error())
		return
	}
	expectedCount := float64(adjustedRange.Milliseconds()) / float64(c.writeInterval.Milliseconds())
	metricTestExpected.Set(expectedCount)
	metricTestActual.Set(actualCount)
}

// spotCheck is used to ensure that log data is actually available after being flushed from the
// ingesters in memory storage and  persisted to disk/blob storage, which the tail check cannot confirm.
// It keeps a sampled slice of log lines written by the canary and checks for them periodically until
// it decides they are too old to continue checking for.
func (c *Comparator) spotCheckEntries(currTime time.Time) {
	// Always make sure to set the running state back to false
	defer func() {
		c.spotMtx.Lock()
		c.spotCheckRunning = false
		c.spotMtx.Unlock()
	}()
	c.spotEntMtx.Lock()

	// Remove any entries from the spotcheck list which are too old
	c.spotCheck = pruneList(c.spotCheck,
		func(_ int, t *time.Time) bool {
			return t.Before(currTime.Add(-c.spotCheckMax))
		},
		func(_ int, t *time.Time) {

		})

	// Make a copy so we don't have to hold the lock to verify entries
	cpy := make([]*time.Time, len(c.spotCheck))
	copy(cpy, c.spotCheck)
	c.spotEntMtx.Unlock()

	for _, sce := range cpy {
		// Make sure enough time has passed to start checking for this entry
		if currTime.Sub(*sce) < c.spotCheckWait {
			continue
		}
		spotCheckEntries.Inc()
		// Because we are querying loki timestamps vs the timestamp in the log,
		// make the range +/- 10 seconds to allow for clock inaccuracies
		start := *sce
		adjustedStart := start.Add(-10 * time.Second)
		adjustedEnd := start.Add(10 * time.Second)
		begin := time.Now()
		recvd, err := c.rdr.Query(adjustedStart, adjustedEnd)
		spotTestLatency.Observe(time.Since(begin).Seconds())
		if err != nil {
			fmt.Fprintf(c.w, "error querying loki: %s\n", err)
			return
		}

		found := false
		for _, r := range recvd {
			if (*sce).Equal(r) {
				found = true
				break
			}
		}
		if !found {
			fmt.Fprintf(c.w, ErrSpotCheckEntryNotReceived, sce.UnixNano(), currTime.Sub(*sce))
			for _, r := range recvd {
				fmt.Fprintf(c.w, DebugQueryResult, r.UnixNano())
			}
			spotCheckMissing.Inc()
		}
	}

}

func (c *Comparator) pruneEntries(currentTime time.Time) {
	// Always make sure to set the running state back to false
	defer func() {
		c.pruneMtx.Lock()
		c.pruneEntriesRunning = false
		c.pruneMtx.Unlock()
	}()
	c.entMtx.Lock()
	defer c.entMtx.Unlock()

	missing := []*time.Time{}
	// Prune entry list of anything older than c.wait and add it to missing list
	c.entries = pruneList(c.entries,
		func(_ int, t *time.Time) bool {
			return t.Before(currentTime.Add(-c.wait)) || t.Equal(currentTime.Add(-c.wait))
		},
		func(_ int, t *time.Time) {
			missing = append(missing, t)
			wsMissingEntries.Inc()
			fmt.Fprintf(c.w, ErrEntryNotReceivedWs, t.UnixNano(), c.wait.Seconds())
		})

	// Add the list of missing entries to the list for which we will attempt to query Loki for
	c.missingMtx.Lock()
	c.missingEntries = append(c.missingEntries, missing...)
	c.missingMtx.Unlock()
	if len(c.missingEntries) > 0 {
		if c.confirmAsync {
			go c.confirmMissing(currentTime)
		} else {
			c.confirmMissing(currentTime)
		}

	}

	// Prune c.ackdEntries list of old acknowledged entries which we were using to find duplicates
	c.ackdEntries = pruneList(c.ackdEntries,
		func(_ int, t *time.Time) bool {
			return t.Before(currentTime.Add(-c.wait))
		},
		func(_ int, t *time.Time) {

		})
}

func (c *Comparator) confirmMissing(currentTime time.Time) {
	// Because we are querying loki timestamps vs the timestamp in the log,
	// make the range +/- 10 seconds to allow for clock inaccuracies
	c.missingMtx.Lock()
	if len(c.missingEntries) == 0 {
		c.missingMtx.Unlock()
		return
	}
	start := *c.missingEntries[0]
	start = start.Add(-10 * time.Second)
	end := *c.missingEntries[len(c.missingEntries)-1]
	end = end.Add(10 * time.Second)
	c.missingMtx.Unlock()

	recvd, err := c.rdr.Query(start, end)
	if err != nil {
		fmt.Fprintf(c.w, "error querying loki: %s\n", err)
		return
	}
	// Now that query has returned, take out the lock on the missingEntries list so we can modify it
	// It's possible more entries were added to this list but that's ok, if they match something in the
	// query result we will remove them, if they don't they won't be old enough yet to remove.
	c.missingMtx.Lock()
	defer c.missingMtx.Unlock()

	// This is to help debug some missing log entries when queried,
	// let's print exactly what we are missing and what Loki sent back
	for _, r := range c.missingEntries {
		fmt.Fprintf(c.w, DebugWebsocketMissingEntry, r.UnixNano())
	}
	for _, r := range recvd {
		fmt.Fprintf(c.w, DebugQueryResult, r.UnixNano())
	}

	k := 0
	for i, m := range c.missingEntries {
		found := false
		for _, r := range recvd {
			if (*m).Equal(r) {
				// Entry was found in loki, this can be dropped from the list of missing
				// which is done by NOT incrementing the output index k
				fmt.Fprintf(c.w, DebugEntryFound, (*m).UnixNano(), currentTime.Sub(*m).Seconds())
				found = true
			}
		}
		if !found {
			// Item is still missing
			if i != k {
				c.missingEntries[k] = c.missingEntries[i]
			}
			k++
		}
	}
	// Nil out the pointers to any trailing elements which were removed from the slice
	for i := k; i < len(c.missingEntries); i++ {
		c.missingEntries[i] = nil // or the zero value of T
	}
	c.missingEntries = c.missingEntries[:k]

	// Remove entries from missing list which are older than maxWait
	removed := []*time.Time{}
	c.missingEntries = pruneList(c.missingEntries,
		func(_ int, t *time.Time) bool {
			return t.Before(currentTime.Add(-c.maxWait))
		},
		func(_ int, t *time.Time) {
			removed = append(removed, t)
		})

	// Record the entries which were removed and never received
	for _, e := range removed {
		missingEntries.Inc()
		fmt.Fprintf(c.w, ErrEntryNotReceived, e.UnixNano(), c.maxWait.Seconds())
	}
}

func pruneList(list []*time.Time, shouldRemove func(int, *time.Time) bool, handleRemoved func(int, *time.Time)) []*time.Time {
	// Prune the acknowledged list, remove anything older than our maxwait
	k := 0
	for i, e := range list {
		if shouldRemove(i, e) {
			handleRemoved(i, e)
			// Do not increment output index k, causing this entry to be dropped
		} else {
			// If items were skipped, assign the kth element to the current item which is not skipped
			if i != k {
				list[k] = list[i]
			}
			// Increment k for the next output item
			k++
		}
	}
	// Nil out the pointers to any trailing elements which were removed from the slice
	for i := k; i < len(list); i++ {
		list[i] = nil // or the zero value of T
	}
	// Reslice the list to the new size k
	return list[:k]

}