mirror of https://github.com/grafana/loki
chore(dataobj-consumer): Sort logs object-wide (#19231)
Sorting logs globally (object-wide per tenant) removes overlapping time ranges of sections in the objects. Sections contain logs from multiple streams, and the ingest lag of streams may vary. This means that although logs of individual sections are sorted by timestamp, the overall sorting of logs is not guaranteed. Therefore sections are sorted with a k-way merge (SortMerge), which does over-query data in case the query would reach its result limit early. Signed-off-by: Christian Haudum <christian.haudum@gmail.com>pull/19254/head
parent
37eddabac7
commit
a3017f2b13
@ -0,0 +1,90 @@ |
||||
package logsobj |
||||
|
||||
import ( |
||||
"context" |
||||
"fmt" |
||||
"math" |
||||
|
||||
"github.com/grafana/loki/v3/pkg/dataobj" |
||||
"github.com/grafana/loki/v3/pkg/dataobj/internal/dataset" |
||||
"github.com/grafana/loki/v3/pkg/dataobj/internal/result" |
||||
"github.com/grafana/loki/v3/pkg/dataobj/sections/logs" |
||||
"github.com/grafana/loki/v3/pkg/util/loser" |
||||
) |
||||
|
||||
// sortMergeIterator returns an iterator that performs a k-way merge of records from multiple logs sections.
|
||||
// It requires that the input sections are sorted sorted by the same order.
|
||||
func sortMergeIterator(ctx context.Context, sections []*dataobj.Section) (result.Seq[logs.Record], error) { |
||||
sequences := make([]*sectionSequence, 0, len(sections)) |
||||
for _, s := range sections { |
||||
sec, err := logs.Open(ctx, s) |
||||
if err != nil { |
||||
return nil, fmt.Errorf("failed to open logs section: %w", err) |
||||
} |
||||
|
||||
ds, err := logs.MakeColumnarDataset(sec) |
||||
if err != nil { |
||||
return nil, fmt.Errorf("creating columnar dataset: %w", err) |
||||
} |
||||
|
||||
columns, err := result.Collect(ds.ListColumns(ctx)) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
|
||||
r := dataset.NewReader(dataset.ReaderOptions{ |
||||
Dataset: ds, |
||||
Columns: columns, |
||||
Prefetch: true, |
||||
}) |
||||
|
||||
sequences = append(sequences, §ionSequence{ |
||||
section: sec, |
||||
DatasetSequence: logs.NewDatasetSequence(r, 8<<10), |
||||
}) |
||||
} |
||||
|
||||
maxValue := result.Value(dataset.Row{ |
||||
Index: math.MaxInt, |
||||
Values: []dataset.Value{ |
||||
dataset.Int64Value(math.MaxInt64), // StreamID
|
||||
dataset.Int64Value(math.MinInt64), // Timestamp
|
||||
}, |
||||
}) |
||||
|
||||
tree := loser.New(sequences, maxValue, sectionSequenceAt, rowResultLess, sectionSequenceClose) |
||||
|
||||
return result.Iter( |
||||
func(yield func(logs.Record) bool) error { |
||||
defer tree.Close() |
||||
for tree.Next() { |
||||
seq := tree.Winner() |
||||
|
||||
row, err := sectionSequenceAt(seq).Value() |
||||
if err != nil { |
||||
return err |
||||
} |
||||
|
||||
var record logs.Record |
||||
err = logs.DecodeRow(seq.section.Columns(), row, &record, nil) |
||||
if err != nil || !yield(record) { |
||||
return err |
||||
} |
||||
} |
||||
return nil |
||||
}), nil |
||||
} |
||||
|
||||
type sectionSequence struct { |
||||
logs.DatasetSequence |
||||
section *logs.Section |
||||
} |
||||
|
||||
var _ loser.Sequence = (*sectionSequence)(nil) |
||||
|
||||
func sectionSequenceAt(seq *sectionSequence) result.Result[dataset.Row] { return seq.At() } |
||||
func sectionSequenceClose(seq *sectionSequence) { seq.Close() } |
||||
|
||||
func rowResultLess(a, b result.Result[dataset.Row]) bool { |
||||
return result.Compare(a, b, logs.CompareRows) < 0 |
||||
} |
||||
@ -0,0 +1,100 @@ |
||||
package main |
||||
|
||||
import ( |
||||
"context" |
||||
"io" |
||||
"log" |
||||
"os" |
||||
"time" |
||||
|
||||
gokitlog "github.com/go-kit/log" |
||||
"github.com/grafana/loki/v3/pkg/dataobj" |
||||
"github.com/grafana/loki/v3/pkg/dataobj/consumer/logsobj" |
||||
"github.com/grafana/loki/v3/pkg/scratch" |
||||
) |
||||
|
||||
func main() { |
||||
args := os.Args[1:] |
||||
if len(args) < 1 { |
||||
log.Fatal("requires at least 1 argument: dataobj") |
||||
} |
||||
|
||||
ctx := context.Background() |
||||
ctx, cancel := context.WithTimeout(ctx, 10*time.Minute) |
||||
defer cancel() |
||||
|
||||
fp, err := os.Open(args[0]) |
||||
if err != nil { |
||||
log.Fatal(err) |
||||
} |
||||
defer fp.Close() |
||||
|
||||
fi, err := fp.Stat() |
||||
if err != nil { |
||||
log.Fatal(err) |
||||
} |
||||
|
||||
orig, err := dataobj.FromReaderAt(fp, fi.Size()) |
||||
if err != nil { |
||||
log.Fatal(err) |
||||
} |
||||
|
||||
cfg := logsobj.BuilderConfig{ |
||||
TargetPageSize: 64 << 10, |
||||
MaxPageRows: 1000, |
||||
TargetObjectSize: 512 << 20, |
||||
TargetSectionSize: 512 << 20, |
||||
BufferSize: 16 << 20, |
||||
SectionStripeMergeLimit: 8, |
||||
} |
||||
scr, err := scratch.NewFilesystem(gokitlog.NewNopLogger(), os.TempDir()) |
||||
if err != nil { |
||||
log.Fatal(err) |
||||
} |
||||
b, err := logsobj.NewBuilder(cfg, scr) |
||||
if err != nil { |
||||
log.Fatal(err) |
||||
} |
||||
|
||||
start := time.Now() |
||||
sortedObj, closer, err := b.CopyAndSort(orig) |
||||
duration := time.Since(start) |
||||
if err != nil { |
||||
log.Fatal(err) |
||||
} |
||||
defer closer.Close() |
||||
|
||||
log.Printf("Took %s\n", duration) |
||||
|
||||
log.Println("== ORIIGNAL DATAOBJ") |
||||
for _, s := range sortedObj.Sections() { |
||||
log.Println(" ", s.Type.String(), s.Tenant) |
||||
} |
||||
|
||||
log.Println("== SORTED DATAOBJ") |
||||
for _, s := range sortedObj.Sections() { |
||||
log.Println(" ", s.Type.String(), s.Tenant) |
||||
} |
||||
|
||||
fw, err := os.CreateTemp("", fi.Name()+"-sorted") |
||||
if err != nil { |
||||
log.Fatal(err) |
||||
} |
||||
defer fw.Close() |
||||
|
||||
reader, err := sortedObj.Reader(ctx) |
||||
if err != nil { |
||||
log.Fatal(err) |
||||
} |
||||
defer reader.Close() |
||||
|
||||
start = time.Now() |
||||
// Copy the sorted data from reader to the output file
|
||||
written, err := io.Copy(fw, reader) |
||||
duration = time.Since(start) |
||||
if err != nil { |
||||
log.Fatal(err) |
||||
} |
||||
|
||||
log.Printf("Written %d bytes to %s in %s\n", written, fw.Name(), duration) |
||||
} |
||||
Loading…
Reference in new issue