Like Prometheus, but for logs.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
loki/pkg/dataobj/sections/logs/decoder.go

169 lines
4.8 KiB

refactor(dataobj): invert dependency between dataobj and sections (#17762) Originally, the dataobj package was a higher-level API around sections. This design caused it to become a bottleneck: * Implementing any new public behaviour for a section required bubbling it up to the dataobj API for it to be exposed, making it tedious to add new sections or update existing ones. * The `dataobj.Builder` pattern was focused on constructing dataobjs for storing log data, which will cause friction as we build objects around other use cases. This PR builds on top of the foundation laid out by #17704 and #17708, fully inverting the dependency between dataobj and sections: * The `dataobj` package has no knowledge of what sections exist, and can now be used for writing and reading generic sections. Section packages now create higher-level APIs around the abstractions provided by `dataobj`. * Section packages are now public, and callers interact directly with these packages for writing and reading section-specific data. * All logic for a section (encoding, decoding, buffering, reading) is now fully self-contained inside the section package. Previously, the implementation of each section was spread across three packages (`pkg/dataobj/internal/encoding`, `pkg/dataobj/internal/sections/SECTION`, `pkg/dataobj`). * Cutting a section is now a decision made by the caller rather than the section implementation. Previously, the logs section builder would create multiple sections. For the most part, this change is a no-op, with two exceptions: 1. Section cutting is now performed by the caller; however, this shouldn't result in any issues. 2. Removing the high-level `dataobj.Stream` and `dataobj.Record` types will temporarily reduce the allocation gains from #16988. I will address this after this PR is merged.
7 months ago
package logs
import (
"bytes"
"context"
"fmt"
"io"
"github.com/grafana/loki/v3/pkg/dataobj"
"github.com/grafana/loki/v3/pkg/dataobj/internal/dataset"
"github.com/grafana/loki/v3/pkg/dataobj/internal/metadata/logsmd"
"github.com/grafana/loki/v3/pkg/dataobj/internal/result"
"github.com/grafana/loki/v3/pkg/dataobj/internal/util/bufpool"
"github.com/grafana/loki/v3/pkg/dataobj/internal/util/windowing"
)
// newDecoder creates a new [decoder] for the given [dataobj.SectionReader].
func newDecoder(reader dataobj.SectionReader) *decoder {
return &decoder{sr: reader}
}
type decoder struct {
sr dataobj.SectionReader
}
// Columns describes the set of columns in the section.
func (rd *decoder) Columns(ctx context.Context) ([]*logsmd.ColumnDesc, error) {
rc, err := rd.sr.Metadata(ctx)
if err != nil {
return nil, fmt.Errorf("reading streams section metadata: %w", err)
}
defer rc.Close()
br := bufpool.GetReader(rc)
defer bufpool.PutReader(br)
md, err := decodeLogsMetadata(br)
if err != nil {
return nil, err
}
return md.Columns, nil
}
// Pages retrieves the set of pages for the provided columns. The order of page
// lists emitted by the sequence matches the order of columns provided: the
// first page list corresponds to the first column, and so on.
func (rd *decoder) Pages(ctx context.Context, columns []*logsmd.ColumnDesc) result.Seq[[]*logsmd.PageDesc] {
return result.Iter(func(yield func([]*logsmd.PageDesc) bool) error {
results := make([][]*logsmd.PageDesc, len(columns))
columnInfo := func(c *logsmd.ColumnDesc) (uint64, uint64) {
return c.GetInfo().MetadataOffset, c.GetInfo().MetadataSize
}
for window := range windowing.Iter(columns, columnInfo, windowing.S3WindowSize) {
if len(window) == 0 {
continue
}
var (
windowOffset = window.Start().GetInfo().MetadataOffset
windowSize = (window.End().GetInfo().MetadataOffset + window.End().GetInfo().MetadataSize) - windowOffset
)
rc, err := rd.sr.DataRange(ctx, int64(windowOffset), int64(windowSize))
if err != nil {
return fmt.Errorf("reading column data: %w", err)
}
data, err := readAndClose(rc, windowSize)
if err != nil {
return fmt.Errorf("read page data: %w", err)
}
for _, wp := range window {
// Find the slice in the data for this column.
var (
columnOffset = wp.Data.GetInfo().MetadataOffset
dataOffset = columnOffset - windowOffset
)
r := bytes.NewReader(data[dataOffset : dataOffset+wp.Data.GetInfo().MetadataSize])
md, err := decodeLogsColumnMetadata(r)
if err != nil {
return err
}
// wp.Index is the position of the column in the original pages slice;
// this retains the proper order of data in results.
results[wp.Index] = md.Pages
}
}
for _, data := range results {
if !yield(data) {
return nil
}
}
return nil
})
}
// readAndClose reads exactly size bytes from rc and then closes it.
func readAndClose(rc io.ReadCloser, size uint64) ([]byte, error) {
defer rc.Close()
data := make([]byte, size)
if _, err := io.ReadFull(rc, data); err != nil {
return nil, fmt.Errorf("read column data: %w", err)
}
return data, nil
}
// ReadPages reads the provided set of pages, iterating over their data
// matching the argument order. If an error is encountered while retrieving
// pages, an error is emitted from the sequence and iteration stops.
func (rd *decoder) ReadPages(ctx context.Context, pages []*logsmd.PageDesc) result.Seq[dataset.PageData] {
return result.Iter(func(yield func(dataset.PageData) bool) error {
results := make([]dataset.PageData, len(pages))
pageInfo := func(p *logsmd.PageDesc) (uint64, uint64) {
return p.GetInfo().DataOffset, p.GetInfo().DataSize
}
// TODO(rfratto): If there are many windows, it may make sense to read them
// in parallel.
for window := range windowing.Iter(pages, pageInfo, windowing.S3WindowSize) {
if len(window) == 0 {
continue
}
var (
windowOffset = window.Start().GetInfo().DataOffset
windowSize = (window.End().GetInfo().DataOffset + window.End().GetInfo().DataSize) - windowOffset
)
rc, err := rd.sr.DataRange(ctx, int64(windowOffset), int64(windowSize))
if err != nil {
return fmt.Errorf("reading page data: %w", err)
}
data, err := readAndClose(rc, windowSize)
if err != nil {
return fmt.Errorf("read page data: %w", err)
}
for _, wp := range window {
// Find the slice in the data for this page.
var (
pageOffset = wp.Data.GetInfo().DataOffset
dataOffset = pageOffset - windowOffset
)
// wp.Index is the position of the page in the original pages slice;
// this retains the proper order of data in results.
results[wp.Index] = dataset.PageData(data[dataOffset : dataOffset+wp.Data.GetInfo().DataSize])
}
}
for _, data := range results {
if !yield(data) {
return nil
}
}
return nil
})
}