// This directory was copied and adapted from https://github.com/grafana/agent/tree/main/pkg/metrics. // We cannot vendor the agent in since the agent vendors loki in, which would cause a cyclic dependency. // NOTE: many changes have been made to the original code for our use-case. package instance import ( "bytes" "context" "errors" "flag" "fmt" "math" "path/filepath" "strings" "sync" "time" "github.com/go-kit/log" "github.com/go-kit/log/level" "github.com/oklog/run" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/prometheus/config" "github.com/prometheus/prometheus/model/timestamp" "github.com/prometheus/prometheus/scrape" "github.com/prometheus/prometheus/storage" "github.com/prometheus/prometheus/storage/remote" "gopkg.in/yaml.v2" "github.com/grafana/loki/pkg/ruler/storage/util" "github.com/grafana/loki/pkg/ruler/storage/wal" "github.com/grafana/loki/pkg/util/build" ) func init() { remote.UserAgent = fmt.Sprintf("LokiRulerWAL/%s", build.Version) } var ( remoteWriteMetricName = "queue_highest_sent_timestamp_seconds" ) // Default configuration values var ( DefaultConfig = Config{ Dir: "ruler-wal", TruncateFrequency: 60 * time.Minute, MinAge: 5 * time.Minute, MaxAge: 4 * time.Hour, RemoteFlushDeadline: 1 * time.Minute, } ) // Config is a specific agent that runs within the overall Prometheus // agent. It has its own set of scrape_configs and remote_write rules. type Config struct { Tenant string Name string RemoteWrite []*config.RemoteWriteConfig Dir string `yaml:"dir"` // How frequently the WAL should be truncated. TruncateFrequency time.Duration `yaml:"truncate_frequency,omitempty"` // Minimum and maximum time series should exist in the WAL for. MinAge time.Duration `yaml:"min_age,omitempty"` MaxAge time.Duration `yaml:"max_age,omitempty"` RemoteFlushDeadline time.Duration `yaml:"remote_flush_deadline,omitempty"` } // UnmarshalYAML implements yaml.Unmarshaler. func (c *Config) UnmarshalYAML(unmarshal func(interface{}) error) error { *c = DefaultConfig type plain Config return unmarshal((*plain)(c)) } // MarshalYAML implements yaml.Marshaler. func (c Config) MarshalYAML() (interface{}, error) { // We want users to be able to marshal instance.Configs directly without // *needing* to call instance.MarshalConfig, so we call it internally // here and return a map. bb, err := MarshalConfig(&c, false) if err != nil { return nil, err } // Use a yaml.MapSlice rather than a map[string]interface{} so // order of keys is retained compared to just calling MarshalConfig. var m yaml.MapSlice if err := yaml.Unmarshal(bb, &m); err != nil { return nil, err } return m, nil } // ApplyDefaults applies default configurations to the configuration to all // values that have not been changed to their non-zero value. ApplyDefaults // also validates the config. // // The value for global will saved. func (c *Config) ApplyDefaults() error { switch { case c.Name == "": return errors.New("missing instance name") case c.TruncateFrequency <= 0: return errors.New("wal_truncate_frequency must be greater than 0s") case c.RemoteFlushDeadline <= 0: return errors.New("remote_flush_deadline must be greater than 0s") case c.MinAge > c.MaxAge: return errors.New("min_wal_time must be less than max_wal_time") } for _, cfg := range c.RemoteWrite { if cfg == nil { return fmt.Errorf("empty or null remote write config section") } } return nil } // Clone makes a deep copy of the config along with global settings. func (c *Config) Clone() (Config, error) { bb, err := MarshalConfig(c, false) if err != nil { return Config{}, err } cp, err := UnmarshalConfig(bytes.NewReader(bb)) if err != nil { return Config{}, err } // Some tests will trip up on this; the marshal/unmarshal cycle might set // an empty slice to nil. Set it back to an empty slice if we detect this // happening. if cp.RemoteWrite == nil && c.RemoteWrite != nil { cp.RemoteWrite = []*config.RemoteWriteConfig{} } return *cp, nil } func (c *Config) RegisterFlags(f *flag.FlagSet) { f.StringVar(&c.Dir, "ruler.wal.dir", DefaultConfig.Dir, "Directory to store the WAL and/or recover from WAL.") f.DurationVar(&c.TruncateFrequency, "ruler.wal.truncate-frequency", DefaultConfig.TruncateFrequency, "How often to run the WAL truncation.") f.DurationVar(&c.MinAge, "ruler.wal.min-age", DefaultConfig.MinAge, "Minimum age that samples must exist in the WAL before being truncated.") f.DurationVar(&c.MaxAge, "ruler.wal.max-age", DefaultConfig.MaxAge, "Maximum age that samples must exist in the WAL before being truncated.") } type walStorageFactory func(reg prometheus.Registerer) (walStorage, error) // Instance is an individual metrics collector and remote_writer. type Instance struct { initialized bool // All fields in the following block may be accessed and modified by // concurrently running goroutines. // // Note that all Prometheus components listed here may be nil at any // given time; methods reading them should take care to do nil checks. mut sync.Mutex cfg Config wal walStorage remoteStore *remote.Storage storage storage.Storage logger log.Logger reg prometheus.Registerer newWal walStorageFactory vc *MetricValueCollector tenant string } // New creates a new Instance with a directory for storing the WAL. The instance // will not start until Run is called on the instance. func New(reg prometheus.Registerer, cfg Config, metrics *wal.Metrics, logger log.Logger) (*Instance, error) { logger = log.With(logger, "instance", cfg.Name) instWALDir := filepath.Join(cfg.Dir, cfg.Tenant) newWal := func(reg prometheus.Registerer) (walStorage, error) { return wal.NewStorage(logger, metrics, reg, instWALDir) } return newInstance(cfg, reg, logger, newWal, cfg.Tenant) } func newInstance(cfg Config, reg prometheus.Registerer, logger log.Logger, newWal walStorageFactory, tenant string) (*Instance, error) { vc := NewMetricValueCollector(prometheus.DefaultGatherer, remoteWriteMetricName) i := &Instance{ cfg: cfg, logger: logger, vc: vc, reg: reg, newWal: newWal, tenant: tenant, } return i, nil } func (i *Instance) Storage() storage.Storage { i.mut.Lock() defer i.mut.Unlock() return i.storage } // Run starts the instance, initializing Prometheus components, and will // continue to run until an error happens during execution or the provided // context is cancelled. // // Run may be re-called after exiting, as components will be reinitialized each // time Run is called. func (i *Instance) Run(ctx context.Context) error { // i.cfg may change at any point in the middle of this method but not in a way // that affects any of the code below; rather than grabbing a mutex every time // we want to read the config, we'll simplify the access and just grab a copy // now. i.mut.Lock() cfg := i.cfg i.mut.Unlock() level.Debug(i.logger).Log("msg", "initializing instance", "name", cfg.Name) // trackingReg wraps the register for the instance to make sure that if Run // exits, any metrics Prometheus registers are removed and can be // re-registered if Run is called again. trackingReg := util.WrapWithUnregisterer(i.reg) defer trackingReg.UnregisterAll() if err := i.initialize(ctx, trackingReg, &cfg); err != nil { level.Error(i.logger).Log("msg", "failed to initialize instance", "err", err) return fmt.Errorf("failed to initialize instance: %w", err) } // The actors defined here are defined in the order we want them to shut down. // Primarily, we want to ensure that the following shutdown order is // maintained: // 1. The scrape manager stops // 2. WAL storage is closed // 3. Remote write storage is closed // This is done to allow the instance to write stale markers for all active // series. rg := runGroupWithContext(ctx) { // Truncation loop ctx, contextCancel := context.WithCancel(context.Background()) defer contextCancel() rg.Add( func() error { i.truncateLoop(ctx, i.wal, &cfg) level.Info(i.logger).Log("msg", "truncation loop stopped") return nil }, func(err error) { level.Info(i.logger).Log("msg", "stopping truncation loop...") contextCancel() }, ) } level.Debug(i.logger).Log("msg", "running instance", "name", cfg.Name) err := rg.Run() if err != nil { level.Error(i.logger).Log("msg", "agent instance stopped with error", "err", err) } return err } type noopScrapeManager struct{} func (n noopScrapeManager) Get() (*scrape.Manager, error) { return nil, nil } // initialize sets up the various Prometheus components with their initial // settings. initialize will be called each time the Instance is run. Prometheus // components cannot be reused after they are stopped so we need to recreate them // each run. func (i *Instance) initialize(_ context.Context, reg prometheus.Registerer, cfg *Config) error { i.mut.Lock() defer i.mut.Unlock() // explicitly set this in case this function is called multiple times i.initialized = false var err error i.wal, err = i.newWal(reg) if err != nil { return fmt.Errorf("error creating WAL: %w", err) } // Setup the remote storage remoteLogger := log.With(i.logger, "component", "remote") i.remoteStore = remote.NewStorage(remoteLogger, reg, i.wal.StartTime, i.wal.Directory(), cfg.RemoteFlushDeadline, noopScrapeManager{}) err = i.remoteStore.ApplyConfig(&config.Config{ RemoteWriteConfigs: cfg.RemoteWrite, }) if err != nil { return fmt.Errorf("failed applying config to remote storage: %w", err) } i.storage = storage.NewFanout(i.logger, i.wal, i.remoteStore) i.initialized = true return nil } // Update accepts a new Config for the Instance and will dynamically update any // running Prometheus components with the new values from Config. Update will // return an ErrInvalidUpdate if the Update could not be applied. func (i *Instance) Update(c Config) (err error) { i.mut.Lock() defer i.mut.Unlock() // It's only (currently) valid to update scrape_configs and remote_write, so // if any other field has changed here, return the error. switch { // This first case should never happen in practice but it's included here for // completions sake. case i.cfg.Name != c.Name: err = errImmutableField{Field: "name"} case i.cfg.TruncateFrequency != c.TruncateFrequency: err = errImmutableField{Field: "wal_truncate_frequency"} case i.cfg.RemoteFlushDeadline != c.RemoteFlushDeadline: err = errImmutableField{Field: "remote_flush_deadline"} } if err != nil { return ErrInvalidUpdate{Inner: err} } // Check to see if the components exist yet. if i.remoteStore == nil { return ErrInvalidUpdate{ Inner: fmt.Errorf("cannot dynamically update because instance is not running"), } } // NOTE(rfratto): Prometheus applies configs in a specific order to ensure // flow from service discovery down to the WAL continues working properly. // // Keep the following order below: // // 1. Local config // 2. Remote Store // 3. Scrape Manager // 4. Discovery Manager originalConfig := i.cfg defer func() { if err != nil { i.cfg = originalConfig } }() i.cfg = c err = i.remoteStore.ApplyConfig(&config.Config{ RemoteWriteConfigs: c.RemoteWrite, }) if err != nil { return fmt.Errorf("error applying new remote_write configs: %w", err) } return nil } // Ready indicates if the instance is ready for processing. func (i *Instance) Ready() bool { i.mut.Lock() defer i.mut.Unlock() return i.initialized } // StorageDirectory returns the directory where this Instance is writing series // and samples to for the WAL. func (i *Instance) StorageDirectory() string { return i.wal.Directory() } // Appender returns a storage.Appender from the instance's WAL func (i *Instance) Appender(ctx context.Context) storage.Appender { return i.wal.Appender(ctx) } // Stop stops the WAL func (i *Instance) Stop() error { level.Info(i.logger).Log("msg", "stopping WAL instance", "user", i.Tenant()) // close WAL first to prevent any further appends if err := i.wal.Close(); err != nil { level.Error(i.logger).Log("msg", "error stopping WAL instance", "user", i.Tenant(), "err", err) return err } if err := i.remoteStore.Close(); err != nil { level.Error(i.logger).Log("msg", "error stopping remote storage instance", "user", i.Tenant(), "err", err) return err } return nil } // Tenant returns the tenant name of the instance func (i *Instance) Tenant() string { return i.tenant } func (i *Instance) truncateLoop(ctx context.Context, wal walStorage, cfg *Config) { // Track the last timestamp we truncated for to prevent segments from getting // deleted until at least some new data has been sent. var lastTs int64 = math.MinInt64 for { select { case <-ctx.Done(): return case <-time.After(cfg.TruncateFrequency): // The timestamp ts is used to determine which series are not receiving // samples and may be deleted from the WAL. Their most recent append // timestamp is compared to ts, and if that timestamp is older then ts, // they are considered inactive and may be deleted. // // Subtracting a duration from ts will delay when it will be considered // inactive and scheduled for deletion. ts := i.getRemoteWriteTimestamp() - i.cfg.MinAge.Milliseconds() if ts < 0 { ts = 0 } // Network issues can prevent the result of getRemoteWriteTimestamp from // changing. We don't want data in the WAL to grow forever, so we set a cap // on the maximum age data can be. If our ts is older than this cutoff point, // we'll shift it forward to start deleting very stale data. if maxTS := timestamp.FromTime(time.Now().Add(-i.cfg.MaxAge)); ts < maxTS { ts = maxTS } if ts == lastTs { level.Debug(i.logger).Log("msg", "not truncating the WAL, remote_write timestamp is unchanged", "ts", ts) continue } lastTs = ts level.Debug(i.logger).Log("msg", "truncating the WAL", "ts", ts) err := wal.Truncate(ts) if err != nil { // The only issue here is larger disk usage and a greater replay time, // so we'll only log this as a warning. level.Warn(i.logger).Log("msg", "could not truncate WAL", "err", err) } } } } // getRemoteWriteTimestamp looks up the last successful remote write timestamp. // This is passed to wal.Storage for its truncation. If no remote write sections // are configured, getRemoteWriteTimestamp returns the current time. func (i *Instance) getRemoteWriteTimestamp() int64 { i.mut.Lock() defer i.mut.Unlock() if len(i.cfg.RemoteWrite) == 0 { return timestamp.FromTime(time.Now()) } lbls := make([]string, len(i.cfg.RemoteWrite)) for idx := 0; idx < len(lbls); idx++ { lbls[idx] = i.cfg.RemoteWrite[idx].Name } vals, err := i.vc.GetValues("remote_name", lbls...) if err != nil { level.Error(i.logger).Log("msg", "could not get remote write timestamps", "err", err) return 0 } if len(vals) == 0 { return 0 } // We use the lowest value from the metric since we don't want to delete any // segments from the WAL until they've been written by all of the remote_write // configurations. ts := int64(math.MaxInt64) for _, val := range vals { ival := int64(val) if ival < ts { ts = ival } } // Convert to the millisecond precision which is used by the WAL return ts * 1000 } // walStorage is an interface satisfied by wal.Storage, and created for testing. type walStorage interface { // walStorage implements Queryable/ChunkQueryable for compatibility, but is unused. storage.Queryable storage.ChunkQueryable Directory() string StartTime() (int64, error) WriteStalenessMarkers(remoteTsFunc func() int64) error Appender(context.Context) storage.Appender Truncate(mint int64) error Close() error } // MetricValueCollector wraps around a Gatherer and provides utilities for // pulling metric values from a given metric name and label matchers. // // This is used by the agent instances to find the most recent timestamp // successfully remote_written to for purposes of safely truncating the WAL. // // MetricValueCollector is only intended for use with Gauges and Counters. type MetricValueCollector struct { g prometheus.Gatherer match string } // NewMetricValueCollector creates a new MetricValueCollector. func NewMetricValueCollector(g prometheus.Gatherer, match string) *MetricValueCollector { return &MetricValueCollector{ g: g, match: match, } } // GetValues looks through all the tracked metrics and returns all values // for metrics that match some key value pair. func (vc *MetricValueCollector) GetValues(label string, labelValues ...string) ([]float64, error) { vals := []float64{} families, err := vc.g.Gather() if err != nil { return nil, err } for _, family := range families { if !strings.Contains(family.GetName(), vc.match) { continue } for _, m := range family.GetMetric() { matches := false for _, l := range m.GetLabel() { if l.GetName() != label { continue } v := l.GetValue() for _, match := range labelValues { if match == v { matches = true break } } break } if !matches { continue } var value float64 if m.Gauge != nil { value = m.Gauge.GetValue() } else if m.Counter != nil { value = m.Counter.GetValue() } else if m.Untyped != nil { value = m.Untyped.GetValue() } else { return nil, errors.New("tracking unexpected metric type") } vals = append(vals, value) } } return vals, nil } type runGroupContext struct { cancel context.CancelFunc g *run.Group } // runGroupWithContext creates a new run.Group that will be stopped if the // context gets canceled in addition to the normal behavior of stopping // when any of the actors stop. func runGroupWithContext(ctx context.Context) *runGroupContext { ctx, cancel := context.WithCancel(ctx) var g run.Group g.Add(func() error { <-ctx.Done() return nil }, func(_ error) { cancel() }) return &runGroupContext{cancel: cancel, g: &g} } func (rg *runGroupContext) Add(execute func() error, interrupt func(error)) { rg.g.Add(execute, interrupt) } func (rg *runGroupContext) Run() error { return rg.g.Run() } func (rg *runGroupContext) Stop(_ error) { rg.cancel() }