mirror of https://github.com/grafana/loki
Unify ring managers across components (#10931)
**What this PR does / why we need it**: This PR creates a re-usable `RingManager` implementation that is then used by * Query Scheduler ring * Index Gateway ring * Bloom Gateway ring * Bloom Compactor ring **Which issue(s) this PR fixes**: Every component had its own implementation of the ring manager which only really differed in the log messages containing the component name. --------- Signed-off-by: Christian Haudum <christian.haudum@gmail.com>pull/10962/head
parent
89829065e7
commit
2abb472e7e
@ -1,208 +0,0 @@ |
||||
package bloomcompactor |
||||
|
||||
import ( |
||||
"context" |
||||
"net/http" |
||||
"time" |
||||
|
||||
"github.com/go-kit/log" |
||||
"github.com/go-kit/log/level" |
||||
"github.com/grafana/dskit/kv" |
||||
"github.com/grafana/dskit/ring" |
||||
"github.com/grafana/dskit/services" |
||||
"github.com/pkg/errors" |
||||
"github.com/prometheus/client_golang/prometheus" |
||||
) |
||||
|
||||
const ( |
||||
// ringAutoForgetUnhealthyPeriods is how many consecutive timeout periods an unhealthy instance
|
||||
// in the ring will be automatically removed.
|
||||
ringAutoForgetUnhealthyPeriods = 10 |
||||
|
||||
// ringNameForServer is the name of the ring used by the bloom-compactor server.
|
||||
ringNameForServer = "bloom-compactor" |
||||
// start with a single instance
|
||||
ringNumTokens = 1 |
||||
ringCheckPeriod = 3 * time.Second |
||||
|
||||
// ringKey is the key under which we register different instances of bloom-compactor in the KVStore.
|
||||
ringKey = "bloom-compactor" |
||||
|
||||
replicationFactor = 1 |
||||
) |
||||
|
||||
type RingManager struct { |
||||
services.Service |
||||
|
||||
cfg Config |
||||
logger log.Logger |
||||
|
||||
subservices *services.Manager |
||||
subservicesWatcher *services.FailureWatcher |
||||
|
||||
RingLifecycler *ring.BasicLifecycler |
||||
Ring *ring.Ring |
||||
} |
||||
|
||||
func NewRingManager(cfg Config, logger log.Logger, registerer prometheus.Registerer) (*RingManager, error) { |
||||
rm := &RingManager{ |
||||
cfg: cfg, logger: logger, |
||||
} |
||||
|
||||
// instantiate kv store.
|
||||
ringStore, err := kv.NewClient( |
||||
rm.cfg.RingCfg.KVStore, |
||||
ring.GetCodec(), |
||||
kv.RegistererWithKVName(prometheus.WrapRegistererWithPrefix("loki_", registerer), "bloom-compactor-ring-manager"), |
||||
rm.logger, |
||||
) |
||||
if err != nil { |
||||
return nil, errors.Wrap(err, "bloom-compactor ring manager failed to create KV store client") |
||||
} |
||||
|
||||
lifecyclerCfg, err := rm.cfg.RingCfg.ToLifecyclerConfig(ringNumTokens, rm.logger) |
||||
if err != nil { |
||||
return nil, errors.Wrap(err, "invalid ring lifecycler config") |
||||
} |
||||
|
||||
// Define lifecycler delegates in reverse order (last to be called defined first because they're
|
||||
// chained via "next delegate").
|
||||
delegate := ring.BasicLifecyclerDelegate(rm) |
||||
delegate = ring.NewLeaveOnStoppingDelegate(delegate, rm.logger) |
||||
delegate = ring.NewTokensPersistencyDelegate(rm.cfg.RingCfg.TokensFilePath, ring.JOINING, delegate, rm.logger) |
||||
delegate = ring.NewAutoForgetDelegate(ringAutoForgetUnhealthyPeriods*rm.cfg.RingCfg.HeartbeatTimeout, delegate, rm.logger) |
||||
|
||||
rm.RingLifecycler, err = ring.NewBasicLifecycler(lifecyclerCfg, ringNameForServer, ringKey, ringStore, delegate, rm.logger, registerer) |
||||
if err != nil { |
||||
return nil, errors.Wrap(err, "failed to create bloom-compactor ring manager lifecycler") |
||||
} |
||||
|
||||
// instantiate ring.
|
||||
ringCfg := rm.cfg.RingCfg.ToRingConfig(replicationFactor) |
||||
rm.Ring, err = ring.NewWithStoreClientAndStrategy( |
||||
ringCfg, |
||||
ringNameForServer, |
||||
ringKey, |
||||
ringStore, |
||||
ring.NewIgnoreUnhealthyInstancesReplicationStrategy(), |
||||
prometheus.WrapRegistererWithPrefix("loki_", registerer), |
||||
rm.logger, |
||||
) |
||||
if err != nil { |
||||
return nil, errors.Wrap(err, "bloom-compactor ring manager failed to create ring client") |
||||
} |
||||
|
||||
svcs := []services.Service{rm.RingLifecycler, rm.Ring} |
||||
rm.subservices, err = services.NewManager(svcs...) |
||||
if err != nil { |
||||
return nil, errors.Wrap(err, "new bloom services manager in server mode") |
||||
} |
||||
|
||||
rm.subservicesWatcher = services.NewFailureWatcher() |
||||
rm.subservicesWatcher.WatchManager(rm.subservices) |
||||
rm.Service = services.NewBasicService(rm.starting, rm.running, rm.stopping) |
||||
|
||||
return rm, nil |
||||
|
||||
} |
||||
|
||||
// starting implements the Lifecycler interface and is one of the lifecycle hooks.
|
||||
func (rm *RingManager) starting(ctx context.Context) (err error) { |
||||
// In case this function will return error we want to unregister the instance
|
||||
// from the ring. We do it ensuring dependencies are gracefully stopped if they
|
||||
// were already started.
|
||||
defer func() { |
||||
if err == nil || rm.subservices == nil { |
||||
return |
||||
} |
||||
|
||||
if stopErr := services.StopManagerAndAwaitStopped(context.Background(), rm.subservices); stopErr != nil { |
||||
level.Error(rm.logger).Log("msg", "failed to gracefully stop bloom-compactor ring manager dependencies", "err", stopErr) |
||||
} |
||||
}() |
||||
|
||||
if err := services.StartManagerAndAwaitHealthy(ctx, rm.subservices); err != nil { |
||||
return errors.Wrap(err, "unable to start bloom-compactor ring manager subservices") |
||||
} |
||||
|
||||
// The BasicLifecycler does not automatically move state to ACTIVE such that any additional work that
|
||||
// someone wants to do can be done before becoming ACTIVE. For the bloom-compactor we don't currently
|
||||
// have any additional work so we can become ACTIVE right away.
|
||||
// Wait until the ring client detected this instance in the JOINING
|
||||
// state to make sure that when we'll run the initial sync we already
|
||||
// know the tokens assigned to this instance.
|
||||
level.Info(rm.logger).Log("msg", "waiting until bloom-compactor is JOINING in the ring") |
||||
if err := ring.WaitInstanceState(ctx, rm.Ring, rm.RingLifecycler.GetInstanceID(), ring.JOINING); err != nil { |
||||
return err |
||||
} |
||||
level.Info(rm.logger).Log("msg", "bloom-compactor is JOINING in the ring") |
||||
|
||||
if err = rm.RingLifecycler.ChangeState(ctx, ring.ACTIVE); err != nil { |
||||
return errors.Wrapf(err, "switch instance to %s in the ring", ring.ACTIVE) |
||||
} |
||||
|
||||
// Wait until the ring client detected this instance in the ACTIVE state to
|
||||
// make sure that when we'll run the loop it won't be detected as a ring
|
||||
// topology change.
|
||||
level.Info(rm.logger).Log("msg", "waiting until bloom-compactor is ACTIVE in the ring") |
||||
if err := ring.WaitInstanceState(ctx, rm.Ring, rm.RingLifecycler.GetInstanceID(), ring.ACTIVE); err != nil { |
||||
return err |
||||
} |
||||
level.Info(rm.logger).Log("msg", "bloom-compactor is ACTIVE in the ring") |
||||
|
||||
return nil |
||||
} |
||||
|
||||
// running implements the Lifecycler interface and is one of the lifecycle hooks.
|
||||
func (rm *RingManager) running(ctx context.Context) error { |
||||
t := time.NewTicker(ringCheckPeriod) |
||||
defer t.Stop() |
||||
for { |
||||
select { |
||||
case <-ctx.Done(): |
||||
return nil |
||||
case err := <-rm.subservicesWatcher.Chan(): |
||||
return errors.Wrap(err, "running bloom-compactor ring manager subservice failed") |
||||
case <-t.C: |
||||
continue |
||||
} |
||||
} |
||||
} |
||||
|
||||
// stopping implements the Lifecycler interface and is one of the lifecycle hooks.
|
||||
func (rm *RingManager) stopping(_ error) error { |
||||
level.Debug(rm.logger).Log("msg", "stopping bloom-compactor ring manager") |
||||
return services.StopManagerAndAwaitStopped(context.Background(), rm.subservices) |
||||
} |
||||
|
||||
func (rm *RingManager) ServeHTTP(w http.ResponseWriter, req *http.Request) { |
||||
rm.Ring.ServeHTTP(w, req) |
||||
} |
||||
|
||||
func (rm *RingManager) OnRingInstanceRegister(_ *ring.BasicLifecycler, ringDesc ring.Desc, instanceExists bool, _ string, instanceDesc ring.InstanceDesc) (ring.InstanceState, ring.Tokens) { |
||||
// When we initialize the bloom-compactor instance in the ring we want to start from
|
||||
// a clean situation, so whatever is the state we set it JOINING, while we keep existing
|
||||
// tokens (if any) or the ones loaded from file.
|
||||
var tokens []uint32 |
||||
if instanceExists { |
||||
tokens = instanceDesc.GetTokens() |
||||
} |
||||
|
||||
takenTokens := ringDesc.GetTokens() |
||||
gen := ring.NewRandomTokenGenerator() |
||||
newTokens := gen.GenerateTokens(ringNumTokens-len(tokens), takenTokens) |
||||
|
||||
// Tokens sorting will be enforced by the parent caller.
|
||||
tokens = append(tokens, newTokens...) |
||||
|
||||
return ring.JOINING, tokens |
||||
} |
||||
|
||||
func (rm *RingManager) OnRingInstanceTokens(_ *ring.BasicLifecycler, _ ring.Tokens) { |
||||
} |
||||
|
||||
func (rm *RingManager) OnRingInstanceStopping(_ *ring.BasicLifecycler) { |
||||
} |
||||
|
||||
func (rm *RingManager) OnRingInstanceHeartbeat(_ *ring.BasicLifecycler, _ *ring.Desc, _ *ring.InstanceDesc) { |
||||
} |
@ -1,29 +0,0 @@ |
||||
package scheduler |
||||
|
||||
import ( |
||||
"github.com/grafana/dskit/ring" |
||||
) |
||||
|
||||
func (rm *RingManager) OnRingInstanceRegister(_ *ring.BasicLifecycler, ringDesc ring.Desc, instanceExists bool, _ string, instanceDesc ring.InstanceDesc) (ring.InstanceState, ring.Tokens) { |
||||
// When we initialize the scheduler instance in the ring we want to start from
|
||||
// a clean situation, so whatever is the state we set it JOINING, while we keep existing
|
||||
// tokens (if any) or the ones loaded from file.
|
||||
var tokens []uint32 |
||||
if instanceExists { |
||||
tokens = instanceDesc.GetTokens() |
||||
} |
||||
|
||||
takenTokens := ringDesc.GetTokens() |
||||
gen := ring.NewRandomTokenGenerator() |
||||
newTokens := gen.GenerateTokens(ringNumTokens-len(tokens), takenTokens) |
||||
|
||||
// Tokens sorting will be enforced by the parent caller.
|
||||
tokens = append(tokens, newTokens...) |
||||
|
||||
return ring.JOINING, tokens |
||||
} |
||||
|
||||
func (rm *RingManager) OnRingInstanceTokens(_ *ring.BasicLifecycler, _ ring.Tokens) {} |
||||
func (rm *RingManager) OnRingInstanceStopping(_ *ring.BasicLifecycler) {} |
||||
func (rm *RingManager) OnRingInstanceHeartbeat(_ *ring.BasicLifecycler, _ *ring.Desc, _ *ring.InstanceDesc) { |
||||
} |
@ -1,252 +0,0 @@ |
||||
package scheduler |
||||
|
||||
import ( |
||||
"context" |
||||
"fmt" |
||||
"net/http" |
||||
"time" |
||||
|
||||
"github.com/go-kit/log" |
||||
"github.com/go-kit/log/level" |
||||
"github.com/grafana/dskit/kv" |
||||
"github.com/grafana/dskit/ring" |
||||
"github.com/grafana/dskit/services" |
||||
"github.com/pkg/errors" |
||||
"github.com/prometheus/client_golang/prometheus" |
||||
) |
||||
|
||||
const ( |
||||
// ringAutoForgetUnhealthyPeriods is how many consecutive timeout periods an unhealthy instance
|
||||
// in the ring will be automatically removed.
|
||||
ringAutoForgetUnhealthyPeriods = 10 |
||||
|
||||
// ringKey is the key under which we store the store gateways ring in the KVStore.
|
||||
ringKey = "scheduler" |
||||
|
||||
// ringNameForServer is the name of the ring used by the compactor server.
|
||||
ringNameForServer = "scheduler" |
||||
|
||||
// ringReplicationFactor should be 2 because we want 2 schedulers.
|
||||
ringReplicationFactor = 2 |
||||
|
||||
// ringNumTokens sets our single token in the ring,
|
||||
// we only need to insert 1 token to be used for leader election purposes.
|
||||
ringNumTokens = 1 |
||||
|
||||
// ringCheckPeriod is how often we check the ring to see if this instance is still in
|
||||
// the replicaset of instances to act as schedulers.
|
||||
ringCheckPeriod = 3 * time.Second |
||||
) |
||||
|
||||
// RingManagerMode defines the different modes for the RingManager to execute.
|
||||
//
|
||||
// The RingManager and its modes are only relevant if the Scheduler discovery is done using ring.
|
||||
type RingManagerMode int |
||||
|
||||
const ( |
||||
// RingManagerModeReader is the RingManager mode executed by Loki components that want to discover Scheduler instances.
|
||||
// The RingManager in reader mode will have its own ring key-value store client, but it won't try to register itself in the ring.
|
||||
RingManagerModeReader RingManagerMode = iota |
||||
|
||||
// RingManagerModeMember is the RingManager mode execute by the Schedulers to register themselves in the ring.
|
||||
RingManagerModeMember |
||||
) |
||||
|
||||
// RingManager is a component instantiated before all the others and is responsible for the ring setup.
|
||||
//
|
||||
// All Loki components that are involved with the Schedulers (including the Schedulers itself) will
|
||||
// require a RingManager. However, the components that are clients of the Schedulers will run it in reader
|
||||
// mode while the Schedulers itself will run the manager in member mode.
|
||||
type RingManager struct { |
||||
services.Service |
||||
|
||||
subservices *services.Manager |
||||
subservicesWatcher *services.FailureWatcher |
||||
|
||||
RingLifecycler *ring.BasicLifecycler |
||||
Ring *ring.Ring |
||||
managerMode RingManagerMode |
||||
|
||||
cfg Config |
||||
|
||||
log log.Logger |
||||
} |
||||
|
||||
// NewRingManager is the recommended way of instantiating a RingManager.
|
||||
//
|
||||
// The other functions will assume the RingManager was instantiated through this function.
|
||||
func NewRingManager(managerMode RingManagerMode, cfg Config, log log.Logger, registerer prometheus.Registerer) (*RingManager, error) { |
||||
rm := &RingManager{ |
||||
cfg: cfg, log: log, managerMode: managerMode, |
||||
} |
||||
|
||||
if !cfg.UseSchedulerRing { |
||||
return nil, fmt.Errorf("ring manager shouldn't be invoked when ring is not used for discovering schedulers") |
||||
} |
||||
|
||||
// instantiate kv store for both modes.
|
||||
ringStore, err := kv.NewClient( |
||||
rm.cfg.SchedulerRing.KVStore, |
||||
ring.GetCodec(), |
||||
kv.RegistererWithKVName(prometheus.WrapRegistererWithPrefix("loki_", registerer), "scheduler"), |
||||
rm.log, |
||||
) |
||||
if err != nil { |
||||
return nil, errors.Wrap(err, "scheduler ring manager create KV store client") |
||||
} |
||||
|
||||
// instantiate ring for both mode modes.
|
||||
ringCfg := rm.cfg.SchedulerRing.ToRingConfig(ringReplicationFactor) |
||||
rm.Ring, err = ring.NewWithStoreClientAndStrategy( |
||||
ringCfg, |
||||
ringNameForServer, |
||||
ringKey, |
||||
ringStore, |
||||
ring.NewIgnoreUnhealthyInstancesReplicationStrategy(), |
||||
prometheus.WrapRegistererWithPrefix("cortex_", registerer), |
||||
rm.log, |
||||
) |
||||
if err != nil { |
||||
return nil, errors.Wrap(err, "failed to create ring client for scheduler ring manager") |
||||
} |
||||
|
||||
if managerMode == RingManagerModeMember { |
||||
if err := rm.startMemberMode(ringStore, registerer); err != nil { |
||||
return nil, err |
||||
} |
||||
return rm, nil |
||||
} |
||||
|
||||
if err := rm.startReaderMode(); err != nil { |
||||
return nil, err |
||||
} |
||||
return rm, nil |
||||
} |
||||
|
||||
func (rm *RingManager) startMemberMode(ringStore kv.Client, registerer prometheus.Registerer) error { |
||||
lifecyclerCfg, err := rm.cfg.SchedulerRing.ToLifecyclerConfig(ringNumTokens, rm.log) |
||||
if err != nil { |
||||
return errors.Wrap(err, "invalid ring lifecycler config") |
||||
} |
||||
|
||||
delegate := ring.BasicLifecyclerDelegate(rm) |
||||
delegate = ring.NewLeaveOnStoppingDelegate(delegate, rm.log) |
||||
delegate = ring.NewTokensPersistencyDelegate(rm.cfg.SchedulerRing.TokensFilePath, ring.JOINING, delegate, rm.log) |
||||
delegate = ring.NewAutoForgetDelegate(ringAutoForgetUnhealthyPeriods*rm.cfg.SchedulerRing.HeartbeatTimeout, delegate, rm.log) |
||||
|
||||
rm.RingLifecycler, err = ring.NewBasicLifecycler(lifecyclerCfg, ringNameForServer, ringKey, ringStore, delegate, rm.log, registerer) |
||||
if err != nil { |
||||
return errors.Wrap(err, "failed to create ring lifecycler for scheduler ring manager") |
||||
} |
||||
|
||||
svcs := []services.Service{rm.RingLifecycler, rm.Ring} |
||||
rm.subservices, err = services.NewManager(svcs...) |
||||
if err != nil { |
||||
return errors.Wrap(err, "failed to create services manager for scheduler ring manager in member mode") |
||||
} |
||||
|
||||
rm.subservicesWatcher = services.NewFailureWatcher() |
||||
rm.subservicesWatcher.WatchManager(rm.subservices) |
||||
rm.Service = services.NewBasicService(rm.starting, rm.running, rm.stopping) |
||||
|
||||
return nil |
||||
} |
||||
|
||||
func (rm *RingManager) startReaderMode() error { |
||||
var err error |
||||
|
||||
svcs := []services.Service{rm.Ring} |
||||
rm.subservices, err = services.NewManager(svcs...) |
||||
if err != nil { |
||||
return errors.Wrap(err, "failed to create services manager for scheduler ring manager in reader mode") |
||||
} |
||||
|
||||
rm.subservicesWatcher = services.NewFailureWatcher() |
||||
rm.subservicesWatcher.WatchManager(rm.subservices) |
||||
|
||||
rm.Service = services.NewIdleService(func(ctx context.Context) error { |
||||
return services.StartManagerAndAwaitHealthy(ctx, rm.subservices) |
||||
}, func(failureCase error) error { |
||||
return services.StopManagerAndAwaitStopped(context.Background(), rm.subservices) |
||||
}) |
||||
|
||||
return nil |
||||
} |
||||
|
||||
// starting implements the Lifecycler interface and is one of the lifecycle hooks.
|
||||
func (rm *RingManager) starting(ctx context.Context) (err error) { |
||||
// In case this function will return error we want to unregister the instance
|
||||
// from the ring. We do it ensuring dependencies are gracefully stopped if they
|
||||
// were already started.
|
||||
defer func() { |
||||
if err == nil || rm.subservices == nil { |
||||
return |
||||
} |
||||
|
||||
if stopErr := services.StopManagerAndAwaitStopped(context.Background(), rm.subservices); stopErr != nil { |
||||
level.Error(rm.log).Log("msg", "failed to gracefully stop scheduler ring manager dependencies", "err", stopErr) |
||||
} |
||||
}() |
||||
|
||||
if err := services.StartManagerAndAwaitHealthy(ctx, rm.subservices); err != nil { |
||||
return errors.Wrap(err, "unable to start scheduler ring manager subservices") |
||||
} |
||||
|
||||
// The BasicLifecycler does not automatically move state to ACTIVE such that any additional work that
|
||||
// someone wants to do can be done before becoming ACTIVE. For the schedulers we don't currently
|
||||
// have any additional work so we can become ACTIVE right away.
|
||||
// Wait until the ring client detected this instance in the JOINING
|
||||
// state to make sure that when we'll run the initial sync we already
|
||||
// know the tokens assigned to this instance.
|
||||
level.Info(rm.log).Log("msg", "waiting until scheduler is JOINING in the ring") |
||||
if err := ring.WaitInstanceState(ctx, rm.Ring, rm.RingLifecycler.GetInstanceID(), ring.JOINING); err != nil { |
||||
return err |
||||
} |
||||
level.Info(rm.log).Log("msg", "scheduler is JOINING in the ring") |
||||
|
||||
if err = rm.RingLifecycler.ChangeState(ctx, ring.ACTIVE); err != nil { |
||||
return errors.Wrapf(err, "switch instance to %s in the ring", ring.ACTIVE) |
||||
} |
||||
|
||||
// Wait until the ring client detected this instance in the ACTIVE state to
|
||||
// make sure that when we'll run the loop it won't be detected as a ring
|
||||
// topology change.
|
||||
level.Info(rm.log).Log("msg", "waiting until scheduler is ACTIVE in the ring") |
||||
if err := ring.WaitInstanceState(ctx, rm.Ring, rm.RingLifecycler.GetInstanceID(), ring.ACTIVE); err != nil { |
||||
return err |
||||
} |
||||
level.Info(rm.log).Log("msg", "scheduler is ACTIVE in the ring") |
||||
|
||||
return nil |
||||
} |
||||
|
||||
// running implements the Lifecycler interface and is one of the lifecycle hooks.
|
||||
func (rm *RingManager) running(ctx context.Context) error { |
||||
t := time.NewTicker(ringCheckPeriod) |
||||
defer t.Stop() |
||||
for { |
||||
select { |
||||
case <-ctx.Done(): |
||||
return nil |
||||
case err := <-rm.subservicesWatcher.Chan(): |
||||
return errors.Wrap(err, "running scheduler ring manager subservice failed") |
||||
case <-t.C: |
||||
continue |
||||
} |
||||
} |
||||
} |
||||
|
||||
// stopping implements the Lifecycler interface and is one of the lifecycle hooks.
|
||||
func (rm *RingManager) stopping(_ error) error { |
||||
level.Debug(rm.log).Log("msg", "stopping scheduler ring manager") |
||||
return services.StopManagerAndAwaitStopped(context.Background(), rm.subservices) |
||||
} |
||||
|
||||
// ServeHTTP serves the HTTP route /scheduler/ring.
|
||||
func (rm *RingManager) ServeHTTP(w http.ResponseWriter, req *http.Request) { |
||||
if rm.cfg.UseSchedulerRing { |
||||
rm.Ring.ServeHTTP(w, req) |
||||
} else { |
||||
_, _ = w.Write([]byte("QueryScheduler running with '-query-scheduler.use-scheduler-ring' set to false.")) |
||||
} |
||||
} |
@ -1,29 +0,0 @@ |
||||
package indexgateway |
||||
|
||||
import ( |
||||
"github.com/grafana/dskit/ring" |
||||
) |
||||
|
||||
func (rm *RingManager) OnRingInstanceRegister(_ *ring.BasicLifecycler, ringDesc ring.Desc, instanceExists bool, _ string, instanceDesc ring.InstanceDesc) (ring.InstanceState, ring.Tokens) { |
||||
// When we initialize the index gateway instance in the ring we want to start from
|
||||
// a clean situation, so whatever is the state we set it JOINING, while we keep existing
|
||||
// tokens (if any) or the ones loaded from file.
|
||||
var tokens []uint32 |
||||
if instanceExists { |
||||
tokens = instanceDesc.GetTokens() |
||||
} |
||||
|
||||
takenTokens := ringDesc.GetTokens() |
||||
gen := ring.NewRandomTokenGenerator() |
||||
newTokens := gen.GenerateTokens(ringNumTokens-len(tokens), takenTokens) |
||||
|
||||
// Tokens sorting will be enforced by the parent caller.
|
||||
tokens = append(tokens, newTokens...) |
||||
|
||||
return ring.JOINING, tokens |
||||
} |
||||
|
||||
func (rm *RingManager) OnRingInstanceTokens(_ *ring.BasicLifecycler, _ ring.Tokens) {} |
||||
func (rm *RingManager) OnRingInstanceStopping(_ *ring.BasicLifecycler) {} |
||||
func (rm *RingManager) OnRingInstanceHeartbeat(_ *ring.BasicLifecycler, _ *ring.Desc, _ *ring.InstanceDesc) { |
||||
} |
@ -1,235 +0,0 @@ |
||||
package indexgateway |
||||
|
||||
import ( |
||||
"context" |
||||
"fmt" |
||||
"net/http" |
||||
"time" |
||||
|
||||
"github.com/go-kit/log" |
||||
"github.com/go-kit/log/level" |
||||
"github.com/grafana/dskit/kv" |
||||
"github.com/grafana/dskit/ring" |
||||
"github.com/grafana/dskit/services" |
||||
"github.com/pkg/errors" |
||||
"github.com/prometheus/client_golang/prometheus" |
||||
) |
||||
|
||||
const ( |
||||
ringAutoForgetUnhealthyPeriods = 10 |
||||
ringNameForServer = "index-gateway" |
||||
ringNumTokens = 128 |
||||
ringCheckPeriod = 3 * time.Second |
||||
|
||||
// RingIdentifier is used as a unique name to register the Index Gateway ring.
|
||||
RingIdentifier = "index-gateway" |
||||
|
||||
// RingKey is the name of the key used to register the different Index Gateway instances in the key-value store.
|
||||
RingKey = "index-gateway" |
||||
) |
||||
|
||||
// ManagerMode defines the different modes for the RingManager to execute.
|
||||
//
|
||||
// The RingManager and its modes are only relevant if the IndexGateway is running in ring mode.
|
||||
type ManagerMode int |
||||
|
||||
const ( |
||||
// ClientMode is the RingManager mode executed by Loki components that are clients of the IndexGateway.
|
||||
// The RingManager in client will have its own ring key-value store but it won't try to register itself in the ring.
|
||||
ClientMode ManagerMode = iota |
||||
|
||||
// ServerMode is the RingManager mode execute by the IndexGateway.
|
||||
// The RingManager in server mode will register itself in the ring.
|
||||
ServerMode |
||||
) |
||||
|
||||
// RingManager is a component instantiated before all the others and is responsible for the ring setup.
|
||||
//
|
||||
// All Loki components that are involved with the IndexGateway (including the IndexGateway itself) will
|
||||
// require a RingManager. However, the components that are clients of the IndexGateway will ran it in client
|
||||
// mode while the IndexGateway itself will ran the manager in server mode.
|
||||
type RingManager struct { |
||||
services.Service |
||||
|
||||
subservices *services.Manager |
||||
subservicesWatcher *services.FailureWatcher |
||||
|
||||
RingLifecycler *ring.BasicLifecycler |
||||
Ring *ring.Ring |
||||
Mode ManagerMode |
||||
|
||||
cfg Config |
||||
|
||||
log log.Logger |
||||
} |
||||
|
||||
// NewRingManager is the recommended way of instantiating a RingManager.
|
||||
//
|
||||
// The other functions will assume the RingManager was instantiated through this function.
|
||||
func NewRingManager(mode ManagerMode, cfg Config, log log.Logger, registerer prometheus.Registerer) (*RingManager, error) { |
||||
rm := &RingManager{ |
||||
cfg: cfg, log: log, Mode: mode, |
||||
} |
||||
|
||||
if cfg.Mode != RingMode { |
||||
return nil, fmt.Errorf("ring manager shouldn't be invoked when index gateway not in ring mode") |
||||
} |
||||
|
||||
// instantiate kv store for both modes.
|
||||
ringStore, err := kv.NewClient( |
||||
rm.cfg.Ring.KVStore, |
||||
ring.GetCodec(), |
||||
kv.RegistererWithKVName(prometheus.WrapRegistererWithPrefix("loki_", registerer), "index-gateway-ring-manager"), |
||||
rm.log, |
||||
) |
||||
if err != nil { |
||||
return nil, errors.Wrap(err, "index gateway ring manager create KV store client") |
||||
} |
||||
|
||||
// instantiate ring for both mode modes.
|
||||
ringCfg := rm.cfg.Ring.ToRingConfig(rm.cfg.Ring.ReplicationFactor) |
||||
rm.Ring, err = ring.NewWithStoreClientAndStrategy(ringCfg, ringNameForServer, RingKey, ringStore, ring.NewIgnoreUnhealthyInstancesReplicationStrategy(), prometheus.WrapRegistererWithPrefix("loki_", registerer), rm.log) |
||||
if err != nil { |
||||
return nil, errors.Wrap(err, "index gateway ring manager create ring client") |
||||
} |
||||
|
||||
if mode == ServerMode { |
||||
if err := rm.startServerMode(ringStore, registerer); err != nil { |
||||
return nil, err |
||||
} |
||||
return rm, nil |
||||
} |
||||
|
||||
if err := rm.startClientMode(); err != nil { |
||||
return nil, err |
||||
} |
||||
return rm, nil |
||||
} |
||||
|
||||
func (rm *RingManager) startServerMode(ringStore kv.Client, registerer prometheus.Registerer) error { |
||||
lifecyclerCfg, err := rm.cfg.Ring.ToLifecyclerConfig(ringNumTokens, rm.log) |
||||
if err != nil { |
||||
return errors.Wrap(err, "invalid ring lifecycler config") |
||||
} |
||||
|
||||
delegate := ring.BasicLifecyclerDelegate(rm) |
||||
delegate = ring.NewLeaveOnStoppingDelegate(delegate, rm.log) |
||||
delegate = ring.NewTokensPersistencyDelegate(rm.cfg.Ring.TokensFilePath, ring.JOINING, delegate, rm.log) |
||||
delegate = ring.NewAutoForgetDelegate(ringAutoForgetUnhealthyPeriods*rm.cfg.Ring.HeartbeatTimeout, delegate, rm.log) |
||||
|
||||
rm.RingLifecycler, err = ring.NewBasicLifecycler(lifecyclerCfg, ringNameForServer, RingKey, ringStore, delegate, rm.log, registerer) |
||||
if err != nil { |
||||
return errors.Wrap(err, "index gateway ring manager create ring lifecycler") |
||||
} |
||||
|
||||
svcs := []services.Service{rm.RingLifecycler, rm.Ring} |
||||
rm.subservices, err = services.NewManager(svcs...) |
||||
if err != nil { |
||||
return errors.Wrap(err, "new index gateway services manager in server mode") |
||||
} |
||||
|
||||
rm.subservicesWatcher = services.NewFailureWatcher() |
||||
rm.subservicesWatcher.WatchManager(rm.subservices) |
||||
rm.Service = services.NewBasicService(rm.starting, rm.running, rm.stopping) |
||||
|
||||
return nil |
||||
} |
||||
|
||||
func (rm *RingManager) startClientMode() error { |
||||
var err error |
||||
|
||||
svcs := []services.Service{rm.Ring} |
||||
rm.subservices, err = services.NewManager(svcs...) |
||||
if err != nil { |
||||
return errors.Wrap(err, "new index gateway services manager in client mode") |
||||
} |
||||
|
||||
rm.subservicesWatcher = services.NewFailureWatcher() |
||||
rm.subservicesWatcher.WatchManager(rm.subservices) |
||||
|
||||
rm.Service = services.NewIdleService(func(ctx context.Context) error { |
||||
return services.StartManagerAndAwaitHealthy(ctx, rm.subservices) |
||||
}, func(failureCase error) error { |
||||
return services.StopManagerAndAwaitStopped(context.Background(), rm.subservices) |
||||
}) |
||||
|
||||
return nil |
||||
} |
||||
|
||||
// starting implements the Lifecycler interface and is one of the lifecycle hooks.
|
||||
func (rm *RingManager) starting(ctx context.Context) (err error) { |
||||
// In case this function will return error we want to unregister the instance
|
||||
// from the ring. We do it ensuring dependencies are gracefully stopped if they
|
||||
// were already started.
|
||||
defer func() { |
||||
if err == nil || rm.subservices == nil { |
||||
return |
||||
} |
||||
|
||||
if stopErr := services.StopManagerAndAwaitStopped(context.Background(), rm.subservices); stopErr != nil { |
||||
level.Error(rm.log).Log("msg", "failed to gracefully stop index gateway ring manager dependencies", "err", stopErr) |
||||
} |
||||
}() |
||||
|
||||
if err := services.StartManagerAndAwaitHealthy(ctx, rm.subservices); err != nil { |
||||
return errors.Wrap(err, "unable to start index gateway ring manager subservices") |
||||
} |
||||
|
||||
// The BasicLifecycler does not automatically move state to ACTIVE such that any additional work that
|
||||
// someone wants to do can be done before becoming ACTIVE. For the index gateway we don't currently
|
||||
// have any additional work so we can become ACTIVE right away.
|
||||
// Wait until the ring client detected this instance in the JOINING
|
||||
// state to make sure that when we'll run the initial sync we already
|
||||
// know the tokens assigned to this instance.
|
||||
level.Info(rm.log).Log("msg", "waiting until index gateway is JOINING in the ring") |
||||
if err := ring.WaitInstanceState(ctx, rm.Ring, rm.RingLifecycler.GetInstanceID(), ring.JOINING); err != nil { |
||||
return err |
||||
} |
||||
level.Info(rm.log).Log("msg", "index gateway is JOINING in the ring") |
||||
|
||||
if err = rm.RingLifecycler.ChangeState(ctx, ring.ACTIVE); err != nil { |
||||
return errors.Wrapf(err, "switch instance to %s in the ring", ring.ACTIVE) |
||||
} |
||||
|
||||
// Wait until the ring client detected this instance in the ACTIVE state to
|
||||
// make sure that when we'll run the loop it won't be detected as a ring
|
||||
// topology change.
|
||||
level.Info(rm.log).Log("msg", "waiting until index gateway is ACTIVE in the ring") |
||||
if err := ring.WaitInstanceState(ctx, rm.Ring, rm.RingLifecycler.GetInstanceID(), ring.ACTIVE); err != nil { |
||||
return err |
||||
} |
||||
level.Info(rm.log).Log("msg", "index gateway is ACTIVE in the ring") |
||||
|
||||
return nil |
||||
} |
||||
|
||||
// running implements the Lifecycler interface and is one of the lifecycle hooks.
|
||||
func (rm *RingManager) running(ctx context.Context) error { |
||||
t := time.NewTicker(ringCheckPeriod) |
||||
defer t.Stop() |
||||
for { |
||||
select { |
||||
case <-ctx.Done(): |
||||
return nil |
||||
case err := <-rm.subservicesWatcher.Chan(): |
||||
return errors.Wrap(err, "running index gateway ring manager subservice failed") |
||||
case <-t.C: |
||||
continue |
||||
} |
||||
} |
||||
} |
||||
|
||||
// stopping implements the Lifecycler interface and is one of the lifecycle hooks.
|
||||
func (rm *RingManager) stopping(_ error) error { |
||||
level.Debug(rm.log).Log("msg", "stopping index gateway ring manager") |
||||
return services.StopManagerAndAwaitStopped(context.Background(), rm.subservices) |
||||
} |
||||
|
||||
// ServeHTTP serves the HTTP route /indexgateway/ring.
|
||||
func (rm *RingManager) ServeHTTP(w http.ResponseWriter, req *http.Request) { |
||||
if rm.cfg.Mode == RingMode { |
||||
rm.Ring.ServeHTTP(w, req) |
||||
} else { |
||||
_, _ = w.Write([]byte("IndexGateway running with 'useIndexGatewayRing' disabled.")) |
||||
} |
||||
} |
@ -1,4 +1,4 @@ |
||||
package util |
||||
package ring |
||||
|
||||
import ( |
||||
"testing" |
Loading…
Reference in new issue