Like Prometheus, but for logs.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 
loki/pkg/logql/bench/generator.go

487 lines
14 KiB

package bench
import (
"encoding/json"
"fmt"
"iter"
"maps"
"math/rand"
"os"
"path/filepath"
"sort"
"strings"
"time"
"github.com/grafana/loki/v3/pkg/logproto"
)
const (
DefaultDataDir = "./data"
configFileName = "generator.json"
errorLevel = "error" // Constant for the error level string
)
// Batch represents a collection of log streams
type Batch struct {
Streams []logproto.Stream
}
// Size of batch in bytes including all entries, labels and structured metadata.
func (b Batch) Size() int {
var size int
for _, stream := range b.Streams {
size += len(stream.Labels)
for _, entry := range stream.Entries {
size += len(entry.Line)
for _, sm := range entry.StructuredMetadata {
size += len(sm.Name) + len(sm.Value)
}
}
}
return size
}
// LabelConfig configures the cardinality of generated labels
type LabelConfig struct {
Clusters int // 1-10 clusters
Namespaces int // 10-100 namespaces
Services int // 100-1000 services
Pods int // 1000-10000 pods
Containers int // 1-5 containers per pod
LogLevels []string // Log levels to use
EnvTypes []string // Environment types
Regions []string // Regions
Datacenters []string // Datacenters
}
// Default configuration with reasonable cardinality values
var defaultLabelConfig = LabelConfig{
Clusters: 5,
Namespaces: 50,
Services: 200,
Pods: 5000,
Containers: 3,
LogLevels: []string{"debug", "info", "warn", "error"},
EnvTypes: []string{"prod", "staging", "dev"},
Regions: []string{"us-east-1", "us-west-2", "eu-west-1", "ap-southeast-1"},
Datacenters: []string{"dc1", "dc2", "dc3"},
}
// GeneratorConfig contains all configuration for the log generator
type GeneratorConfig struct {
StartTime time.Time
TimeSpread time.Duration
// DenseIntervals defines periods of high log density
// Each interval will have 10x more logs than normal periods
DenseIntervals []DenseInterval
LabelConfig LabelConfig
NumStreams int // Number of streams to generate per batch
Seed int64 // Source of randomness
}
// DenseInterval represents a period of high log volume
type DenseInterval struct {
Start time.Time
Duration time.Duration
}
// Default generator configuration with sensible values
var defaultGeneratorConfig = GeneratorConfig{
StartTime: time.Date(2024, 1, 1, 0, 0, 0, 0, time.UTC),
TimeSpread: 24 * time.Hour,
LabelConfig: defaultLabelConfig,
NumStreams: 250, // Default to 250 streams per batch
Seed: 1, // Default to seed 1 for reproducibility
DenseIntervals: []DenseInterval{
{
Start: time.Date(2024, 1, 1, 10, 0, 0, 0, time.UTC),
Duration: time.Hour,
},
{
Start: time.Date(2024, 1, 1, 15, 0, 0, 0, time.UTC),
Duration: 30 * time.Minute,
},
},
}
// NewRand creates a new random source using the configured seed
func (c *GeneratorConfig) NewRand() *rand.Rand {
return rand.New(rand.NewSource(c.Seed))
}
// StreamMetadata holds the consistent properties of a stream
type StreamMetadata struct {
Labels string
App Application
}
// Generator represents a log generator with configuration
type Generator struct {
config GeneratorConfig
rnd *rand.Rand
streamsMeta []StreamMetadata // Pre-generated stream metadata used across batches
apps map[string]Application // Map of available applications by name
}
// Opt represents configuration options for the generator
type Opt struct {
startTime time.Time
timeSpread time.Duration
denseIntervals []DenseInterval
labelConfig LabelConfig
numStreams int // Number of streams to generate per batch
seed int64 // Source of randomness
}
// WithStartTime sets the start time for log generation
func (o Opt) WithStartTime(t time.Time) Opt {
o.startTime = t
return o
}
// WithTimeSpread sets the time spread for log generation
func (o Opt) WithTimeSpread(d time.Duration) Opt {
o.timeSpread = d
return o
}
// WithDenseInterval adds a dense interval to the configuration
func (o Opt) WithDenseInterval(start time.Time, duration time.Duration) Opt {
o.denseIntervals = append(o.denseIntervals, DenseInterval{
Start: start,
Duration: duration,
})
return o
}
// WithLabelCardinality configures the cardinality of different labels
func (o Opt) WithLabelCardinality(clusters, namespaces, services, pods, containers int) Opt {
o.labelConfig.Clusters = clusters
o.labelConfig.Namespaces = namespaces
o.labelConfig.Services = services
o.labelConfig.Pods = pods
o.labelConfig.Containers = containers
return o
}
// WithLabelConfig sets the entire label configuration
func (o Opt) WithLabelConfig(cfg LabelConfig) Opt {
o.labelConfig = cfg
return o
}
// WithNumStreams sets the number of streams to generate per batch
func (o Opt) WithNumStreams(n int) Opt {
o.numStreams = n
return o
}
// WithSeed sets the seed for random number generation
func (o Opt) WithSeed(seed int64) Opt {
o.seed = seed
return o
}
// DefaultOpt returns the default options
func DefaultOpt() Opt {
return Opt{
startTime: defaultGeneratorConfig.StartTime,
timeSpread: defaultGeneratorConfig.TimeSpread,
denseIntervals: defaultGeneratorConfig.DenseIntervals,
labelConfig: defaultGeneratorConfig.LabelConfig,
numStreams: defaultGeneratorConfig.NumStreams,
seed: 1, // Default to seed 1 for reproducibility
}
}
// NewGenerator creates a new generator with the given options
func NewGenerator(opt Opt) *Generator {
g := &Generator{
config: GeneratorConfig{
StartTime: opt.startTime,
TimeSpread: opt.timeSpread,
DenseIntervals: opt.denseIntervals,
LabelConfig: opt.labelConfig,
NumStreams: opt.numStreams,
Seed: opt.seed,
},
rnd: rand.New(rand.NewSource(opt.seed)),
apps: make(map[string]Application),
}
// Initialize available applications
for _, app := range defaultApplications {
g.apps[app.Name] = app
}
return g
}
// generateStreamMetadata pre-generates metadata for all streams
func (g *Generator) generateStreamMetadata() {
numStreams := g.config.NumStreams
if numStreams == 0 {
numStreams = defaultGeneratorConfig.NumStreams
}
g.streamsMeta = make([]StreamMetadata, numStreams)
// Create slice of available app names for random selection
var appNames []string
for name := range g.apps {
appNames = append(appNames, name)
}
// Sort app names for deterministic selection
sort.Strings(appNames)
// For each stream, generate consistent metadata
for i := 0; i < numStreams; i++ {
// Pick a deterministic application based on stream index
appIndex := i % len(g.apps)
appName := appNames[appIndex]
app := g.apps[appName]
// Generate deterministic labels for this stream
cluster := fmt.Sprintf("cluster-%d", g.rnd.Intn(g.config.LabelConfig.Clusters))
namespace := fmt.Sprintf("namespace-%d", g.rnd.Intn(g.config.LabelConfig.Namespaces))
service := fmt.Sprintf("service-%d", g.rnd.Intn(g.config.LabelConfig.Services))
pod := fmt.Sprintf("pod-%d", g.rnd.Intn(g.config.LabelConfig.Pods))
container := fmt.Sprintf("container-%d", g.rnd.Intn(g.config.LabelConfig.Containers))
env := g.config.LabelConfig.EnvTypes[g.rnd.Intn(len(g.config.LabelConfig.EnvTypes))]
region := g.config.LabelConfig.Regions[g.rnd.Intn(len(g.config.LabelConfig.Regions))]
dc := g.config.LabelConfig.Datacenters[g.rnd.Intn(len(g.config.LabelConfig.Datacenters))]
// Build Loki label string
labels := fmt.Sprintf(
`{cluster="%s", namespace="%s", service="%s", pod="%s", container="%s", env="%s", region="%s", datacenter="%s", service_name="%s"}`,
cluster, namespace, service, pod, container, env, region, dc, app.Name,
)
g.streamsMeta[i] = StreamMetadata{
Labels: labels,
App: app,
}
}
}
// Batches returns an iterator that produces log batches
// Each batch contains the configured number of streams with generated log entries
func (g *Generator) Batches() iter.Seq[*Batch] {
// Pre-generate stream metadata once
g.generateStreamMetadata()
return func(yield func(*Batch) bool) {
for {
// Generate streams for this batch using the same metadata but new entries
streams := make([]logproto.Stream, len(g.streamsMeta))
for j := range streams {
meta := g.streamsMeta[j]
// Generate entries specific to this stream's application and format
entries := g.generateEntriesForStream(meta)
streams[j] = logproto.Stream{
Labels: meta.Labels,
Entries: entries,
}
}
if !yield(&Batch{Streams: streams}) {
return
}
}
}
}
// generateEntriesForStream creates log entries for a specific stream
// using the application and format from the stream metadata
func (g *Generator) generateEntriesForStream(meta StreamMetadata) []logproto.Entry {
app := meta.App
faker := NewFaker(g.rnd)
// Calculate how many entries to generate based on time spread
baseEntries := 10 + g.rnd.Intn(90) // 10-100 entries per stream
entries := make([]logproto.Entry, 0, baseEntries)
// Generate timestamps spread across the time range
spreadInterval := g.config.TimeSpread / time.Duration(baseEntries)
// Prepare OTEL attributes for this stream
otel := OTELAttributes{
Resource: make(map[string]string),
}
// Copy resource attributes from the application
maps.Copy(otel.Resource, app.OTELResource)
// Sort keys for deterministic iteration order
var templateKeys []string
for k, v := range otel.Resource {
if strings.HasPrefix(v, "${") && strings.HasSuffix(v, "}") {
templateKeys = append(templateKeys, k)
}
}
sort.Strings(templateKeys)
// Replace template variables in deterministic order
for _, k := range templateKeys {
v := otel.Resource[k]
switch v {
case "${HOSTNAME}":
otel.Resource[k] = faker.Hostname()
case "${BROKER_ID}":
otel.Resource[k] = fmt.Sprintf("%d", g.rnd.Intn(10))
}
}
// Generate entries with timestamps spread across the configured time range
for i := range baseEntries {
ts := g.config.StartTime.Add(time.Duration(i) * spreadInterval)
// Check if timestamp falls in a dense interval
isDense := false
for _, interval := range g.config.DenseIntervals {
if ts.After(interval.Start) && ts.Before(interval.Start.Add(interval.Duration)) {
isDense = true
break
}
}
// Generate more entries during dense intervals
numEntries := 1
if isDense {
numEntries = 10 // 10x more logs during dense periods
}
for range numEntries {
// Add small jitter within spread interval
jitter := time.Duration(g.rnd.Int63n(int64(spreadInterval)))
entryTs := ts.Add(jitter)
// Randomly select log level for this entry, biased towards the stream's default level
level := g.config.LabelConfig.LogLevels[g.rnd.Intn(len(g.config.LabelConfig.LogLevels))]
// Generate trace context for some entries (about 30%)
var traceCtx *OTELTraceContext
if g.rnd.Float32() < 0.3 {
traceCtx = &OTELTraceContext{
TraceID: faker.TraceID(),
SpanID: faker.SpanID(),
}
}
// Generate log line using the application's generators for the selected format
line := app.LogGenerator(level, entryTs, faker)
// Create metadata in a deterministic order
var metadata []logproto.LabelAdapter
metadata = append(metadata,
logproto.LabelAdapter{Name: "level", Value: level},
logproto.LabelAdapter{Name: "detected_level", Value: level},
)
// Then add resource attributes in sorted order
var resourceKeys []string
for k := range otel.Resource {
resourceKeys = append(resourceKeys, k)
}
sort.Strings(resourceKeys)
for _, k := range resourceKeys {
metadata = append(metadata, logproto.LabelAdapter{
Name: "resource_" + k,
Value: otel.Resource[k],
})
}
// Generate colliding structured metadata keys
if g.rnd.Float32() < 0.1 && len(otel.Resource) > 0 {
g.rnd.Shuffle(len(resourceKeys), func(i, j int) { resourceKeys[i], resourceKeys[j] = resourceKeys[j], resourceKeys[i] })
metadata = append(metadata,
logproto.LabelAdapter{Name: resourceKeys[0], Value: app.OTELResource[resourceKeys[0]]},
)
}
// Finally add trace context if present
if traceCtx != nil {
metadata = append(metadata,
logproto.LabelAdapter{Name: "trace_id", Value: traceCtx.TraceID},
logproto.LabelAdapter{Name: "span_id", Value: traceCtx.SpanID},
)
}
entries = append(entries, logproto.Entry{
Timestamp: entryTs,
Line: line,
StructuredMetadata: metadata,
})
}
}
return entries
}
// GenerateDataset generates a dataset of approximately the specified size
func (g *Generator) GenerateDataset(targetSize int64, outputFile string) error {
var totalSize int64
streams := make([]logproto.Stream, 0, g.config.NumStreams)
for batch := range g.Batches() {
batchSize := int64(batch.Size())
streams = append(streams, batch.Streams...)
totalSize += batchSize
if totalSize >= targetSize {
break
}
}
req := logproto.PushRequest{Streams: streams}
data, err := req.Marshal()
if err != nil {
return fmt.Errorf("failed to marshal dataset: %w", err)
}
return os.WriteFile(outputFile, data, 0o644)
}
// OTELAttributes represents OpenTelemetry attributes for logs
type OTELAttributes struct {
Resource map[string]string // Resource attributes constant for the service
Trace *OTELTraceContext // Optional trace context
}
// OTELTraceContext represents OpenTelemetry trace context
type OTELTraceContext struct {
TraceID string
SpanID string
}
// SaveConfig saves the generator configuration to a file in the data directory
func SaveConfig(dataDir string, config *GeneratorConfig) error {
configPath := filepath.Join(dataDir, configFileName)
configData, err := json.Marshal(config)
if err != nil {
return fmt.Errorf("failed to marshal generator config: %w", err)
}
if err := os.WriteFile(configPath, configData, 0o644); err != nil {
return fmt.Errorf("failed to write generator config: %w", err)
}
return nil
}
// LoadConfig loads the generator configuration from the data directory
func LoadConfig(dataDir string) (*GeneratorConfig, error) {
configPath := filepath.Join(dataDir, configFileName)
configData, err := os.ReadFile(configPath)
if err != nil {
return nil, fmt.Errorf("failed to read generator config: %w", err)
}
var config GeneratorConfig
if err := json.Unmarshal(configData, &config); err != nil {
return nil, fmt.Errorf("failed to unmarshal generator config: %w", err)
}
return &config, nil
}