loki/pkg/dataobj/section.go

package dataobj

import (
	"context"
	"io"
	"iter"
	"strconv"
)

// A Sections is a slice of [Section].
type Sections []*Section

// Filter returns an iterator over sections that pass some predicate. The index
// field is the number of the section that passed the predicate.
func (s Sections) Filter(predicate func(*Section) bool) iter.Seq2[int, *Section] {
	return func(yield func(int, *Section) bool) {
		var matches int

		for _, sec := range s {
			if !predicate(sec) {
				continue
			} else if !yield(matches, sec) {
				return
			}
			matches++
		}
	}
}

// Count returns the number of sections that pass some predicate.
func (s Sections) Count(predicate func(*Section) bool) int {
	var count int
	for range s.Filter(predicate) {
		count++
	}
	return count
}

// A Section is a subset of an [Object] that holds a specific type of data. Use
// section packages for higher-level abstractions around sections.
type Section struct {
	Type   SectionType   // The type denoting the kind of data held in a section.
	Reader SectionReader // The low-level reader for a Section.

	// Tenant specifies the tenant that owns this section. Tenant is required
	// for sections which wholly contain tenant-specific data.
	Tenant string
}

// SectionType uniquely identifies a [Section] type.
type SectionType struct {
	Namespace string // A namesapce for the section (e.g., "github.com/grafana/loki").
	Kind      string // The kind of section, scoped to the namespace (e.g., "logs").

	// Version is an optional section-specified value denoting an encoding
	// version of the section.
	Version uint32
}

// Equals returns true if o has the same namespace and kind as ty. The Version
// field is not checked.
func (ty SectionType) Equals(o SectionType) bool {
	return ty.Namespace == o.Namespace && ty.Kind == o.Kind
}

func (ty SectionType) String() string {
	base := ty.Namespace + "/" + ty.Kind
	base += "@v" + strconv.FormatUint(uint64(ty.Version), 10)
	return base
}

// SectionReader is a low-level interface to read data ranges and metadata from
// a section.
//
// Section packages provider higher-level abstractions around [Section] using
// this interface.
type SectionReader interface {
	// ExtensionData returns optional encoded information about the section
	// stored at the file level, provided through the [SectionWriter]. Sections
	// can use this for retrieving critical information that must be known
	// without needing to read the metadata first.
	//
	// ExtensionData will be nil if no extension data is available.
	ExtensionData() []byte

	// DataRange opens a reader of length bytes from the data region of a
	// section. The offset argument determines where in the data region reading
	// should start.
	//
	// DataRange returns an error if the read fails or if offset+length goes
	// beyond the readable data region. The returned reader is only valid as long
	// as the provided ctx is not canceled.
	DataRange(ctx context.Context, offset, length int64) (io.ReadCloser, error)

	// MetadataRange opens a reader of length bytes from the metadata region of
	// a section. The offset argument determines where in the metadata region
	// reading should start.
	//
	// MetadataRange returns an error if the read fails or if offset+length goes
	// beyond the readable metadata region. The returned reader is only valid as long
	// as the provided ctx is not canceled.
	MetadataRange(ctx context.Context, offset, length int64) (io.ReadCloser, error)

	// DataSize returns the total size of the data region of a section. DataSize
	// returns 0 for sections with no data region.
	DataSize() int64

	// MetadataSize returns the total size of the metadata region of a section.
	// MetadataSize returns 0 for sections with no metadata region.
	MetadataSize() int64
}

// A SectionBuilder accumulates data for a single in-progress section.
//
// Each section package provides an implementation of SectionBuilder that
// includes utilities to buffer data into that section. Callers should use
// Bytes or EstimatedSize to determine when enough data has been accumulated
// into a section.
type SectionBuilder interface {
	// Type returns the SectionType representing the section being built.
	// Implementations are responsible for guaranteeing that two no
	// SectionBuilders return the same SectionType for different encodings.
	//
	// The returned Type is encoded directly into data objects. Implementations
	// that change SectionType values should be careful to continue supporting
	// old values for backwards compatibility.
	Type() SectionType

	// Flush encodes and flushes the section to w. Encodings that rely on byte
	// offsets should be relative to the first byte of the section's data.
	//
	// Flush returns the number of bytes written to w, and any error encountered
	// while encoding or flushing.
	//
	// After Flush is called, the SectionBuilder is reset to a fresh state and
	// can be reused.
	Flush(w SectionWriter) (n int64, err error)

	// Reset resets the SectionBuilder to a fresh state.
	Reset()
}

// SectionWriter writes data object sections to an underlying stream, such as a
// data object.
type SectionWriter interface {
	// WriteSection writes a section to the underlying data stream, partitioned
	// by section data and section metadata. It returns the sum of bytes written
	// from both input slices (0 <= n <= len(data)+len(metadata)) and any error
	// encountered that caused the write to stop early.
	//
	// The opts argument provides additional information about the section being
	// written. If opts is nil, the section is written without any additional
	// context.
	//
	// Implementations of WriteSection:
	//
	//   - Must return an error if the write stops early.
	//   - Must not modify the slices passed to it, even temporarily.
	//   - Must not retain references to slices after WriteSection returns.
	//
	// The physical layout of data and metadata is not defined: they may be
	// written non-contiguously, interleaved, or in any order.
	WriteSection(opts *WriteSectionOptions, data, metadata []byte) (n int64, err error)
}

// WriteSectionOptions provides additional options when writing sections.
type WriteSectionOptions struct {
	// Tenant that owns the written data and metadata. Tenant must be set for
	// sections that are wholly owned by a single tenant.
	Tenant string

	// ExtensionData is an optional field for section information to store at
	// the file level. To minimize the cost of opening data objects, sections
	// should only use this field for information that's required to start
	// reading section metadata and to keep the payload as small as possible.
	//
	// ExtensionData does not impact the return value of n in
	// [SectionWriter.WriteSection].
	//
	// Implementations of [SectionWriter] must not retain references to this
	// slice after WriteSection returns.
	ExtensionData []byte
}