loki/pkg/storage/chunk/encoding/chunk.go

// This file was taken from Prometheus (https://github.com/prometheus/prometheus).
// The original license header is included below:
//
// Copyright 2014 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package encoding

import (
	"errors"
	"io"
	"sort"

	"github.com/prometheus/common/model"
	errs "github.com/weaveworks/common/errors"

	"github.com/grafana/loki/pkg/prom1/storage/metric"
)

const (
	// ChunkLen is the length of a chunk in bytes.
	ChunkLen = 1024

	ErrSliceNoDataInRange = errs.Error("chunk has no data for given range to slice")
	ErrSliceChunkOverflow = errs.Error("slicing should not overflow a chunk")
)

var errChunkBoundsExceeded = errors.New("attempted access outside of chunk boundaries")

// Chunk is the interface for all chunks. Chunks are generally not
// goroutine-safe.
type Chunk interface {
	// Add adds a SamplePair to the chunks, performs any necessary
	// re-encoding, and creates any necessary overflow chunk.
	// The returned Chunk is the overflow chunk if it was created.
	// The returned Chunk is nil if the sample got appended to the same chunk.
	Add(sample model.SamplePair) (Chunk, error)
	// NewIterator returns an iterator for the chunks.
	// The iterator passed as argument is for re-use. Depending on implementation,
	// the iterator can be re-used or a new iterator can be allocated.
	NewIterator(Iterator) Iterator
	Marshal(io.Writer) error
	UnmarshalFromBuf([]byte) error
	Encoding() Encoding
	Utilization() float64

	// Slice returns a smaller chunk that includes all samples between start and end
	// (inclusive).  Its may over estimate. On some encodings it is a noop.
	Slice(start, end model.Time) Chunk

	// Rebound returns a smaller chunk that includes all samples between start and end (inclusive).
	// We do not want to change existing Slice implementations because
	// it is built specifically for query optimization and is a noop for some of the encodings.
	Rebound(start, end model.Time) (Chunk, error)

	// Len returns the number of samples in the chunk.  Implementations may be
	// expensive.
	Len() int

	// Size returns the approximate length of the chunk in bytes.
	Size() int
}

// Iterator enables efficient access to the content of a chunk. It is
// generally not safe to use an Iterator concurrently with or after chunk
// mutation.
type Iterator interface {
	// Scans the next value in the chunk. Directly after the iterator has
	// been created, the next value is the first value in the
	// chunk. Otherwise, it is the value following the last value scanned or
	// found (by one of the Find... methods). Returns false if either the
	// end of the chunk is reached or an error has occurred.
	Scan() bool
	// Finds the oldest value at or after the provided time. Returns false
	// if either the chunk contains no value at or after the provided time,
	// or an error has occurred.
	FindAtOrAfter(model.Time) bool
	// Returns the last value scanned (by the scan method) or found (by one
	// of the find... methods). It returns model.ZeroSamplePair before any of
	// those methods were called.
	Value() model.SamplePair
	// Returns a batch of the provisded size; NB not idempotent!  Should only be called
	// once per Scan.
	Batch(size int) Batch
	// Returns the last error encountered. In general, an error signals data
	// corruption in the chunk and requires quarantining.
	Err() error
}

// BatchSize is samples per batch; this was choose by benchmarking all sizes from
// 1 to 128.
const BatchSize = 12

// Batch is a sorted set of (timestamp, value) pairs.  They are intended to be
// small, and passed by value.
type Batch struct {
	Timestamps [BatchSize]int64
	Values     [BatchSize]float64
	Index      int
	Length     int
}

// RangeValues is a utility function that retrieves all values within the given
// range from an Iterator.
func RangeValues(it Iterator, in metric.Interval) ([]model.SamplePair, error) {
	result := []model.SamplePair{}
	if !it.FindAtOrAfter(in.OldestInclusive) {
		return result, it.Err()
	}
	for !it.Value().Timestamp.After(in.NewestInclusive) {
		result = append(result, it.Value())
		if !it.Scan() {
			break
		}
	}
	return result, it.Err()
}

// addToOverflowChunk is a utility function that creates a new chunk as overflow
// chunk, adds the provided sample to it, and returns a chunk slice containing
// the provided old chunk followed by the new overflow chunk.
func addToOverflowChunk(s model.SamplePair) (Chunk, error) {
	overflowChunk := New()
	_, err := overflowChunk.Add(s)
	if err != nil {
		return nil, err
	}
	return overflowChunk, nil
}

// transcodeAndAdd is a utility function that transcodes the dst chunk into the
// provided src chunk (plus the necessary overflow chunks) and then adds the
// provided sample. It returns the new chunks (transcoded plus overflow) with
// the new sample at the end.
func transcodeAndAdd(dst Chunk, src Chunk, s model.SamplePair) ([]Chunk, error) {
	var (
		head     = dst
		newChunk Chunk
		body     = []Chunk{head}
		err      error
	)

	it := src.NewIterator(nil)
	for it.Scan() {
		if newChunk, err = head.Add(it.Value()); err != nil {
			return nil, err
		}
		if newChunk != nil {
			body = append(body, newChunk)
			head = newChunk
		}
	}
	if it.Err() != nil {
		return nil, it.Err()
	}

	if newChunk, err = head.Add(s); err != nil {
		return nil, err
	}
	if newChunk != nil {
		body = append(body, newChunk)
	}
	return body, nil
}

// indexAccessor allows accesses to samples by index.
type indexAccessor interface {
	timestampAtIndex(int) model.Time
	sampleValueAtIndex(int) model.SampleValue
	err() error
}

// indexAccessingChunkIterator is a chunk iterator for chunks for which an
// indexAccessor implementation exists.
type indexAccessingChunkIterator struct {
	len       int
	pos       int
	lastValue model.SamplePair
	acc       indexAccessor
}

func newIndexAccessingChunkIterator(len int, acc indexAccessor) *indexAccessingChunkIterator {
	return &indexAccessingChunkIterator{
		len:       len,
		pos:       -1,
		lastValue: model.ZeroSamplePair,
		acc:       acc,
	}
}

// scan implements Iterator.
func (it *indexAccessingChunkIterator) Scan() bool {
	it.pos++
	if it.pos >= it.len {
		return false
	}
	it.lastValue = model.SamplePair{
		Timestamp: it.acc.timestampAtIndex(it.pos),
		Value:     it.acc.sampleValueAtIndex(it.pos),
	}
	return it.acc.err() == nil
}

// findAtOrAfter implements Iterator.
func (it *indexAccessingChunkIterator) FindAtOrAfter(t model.Time) bool {
	i := sort.Search(it.len, func(i int) bool {
		return !it.acc.timestampAtIndex(i).Before(t)
	})
	if i == it.len || it.acc.err() != nil {
		return false
	}
	it.pos = i
	it.lastValue = model.SamplePair{
		Timestamp: it.acc.timestampAtIndex(i),
		Value:     it.acc.sampleValueAtIndex(i),
	}
	return true
}

// value implements Iterator.
func (it *indexAccessingChunkIterator) Value() model.SamplePair {
	return it.lastValue
}

func (it *indexAccessingChunkIterator) Batch(size int) Batch {
	var batch Batch
	j := 0
	for j < size && it.pos < it.len {
		batch.Timestamps[j] = int64(it.acc.timestampAtIndex(it.pos))
		batch.Values[j] = float64(it.acc.sampleValueAtIndex(it.pos))
		it.pos++
		j++
	}
	// Interface contract is that you call Scan before calling Batch; therefore
	// without this decrement, you'd end up skipping samples.
	it.pos--
	batch.Index = 0
	batch.Length = j
	return batch
}

// err implements Iterator.
func (it *indexAccessingChunkIterator) Err() error {
	return it.acc.err()
}

func reboundChunk(c Chunk, start, end model.Time) (Chunk, error) {
	itr := c.NewIterator(nil)
	if !itr.FindAtOrAfter(start) {
		return nil, ErrSliceNoDataInRange
	}

	pc, err := NewForEncoding(c.Encoding())
	if err != nil {
		return nil, err
	}

	for !itr.Value().Timestamp.After(end) {
		oc, err := pc.Add(itr.Value())
		if err != nil {
			return nil, err
		}

		if oc != nil {
			return nil, ErrSliceChunkOverflow
		}
		if !itr.Scan() {
			break
		}
	}

	err = itr.Err()
	if err != nil {
		return nil, err
	}

	if pc.Len() == 0 {
		return nil, ErrSliceNoDataInRange
	}

	return pc, nil
}