Ruler/loki rule validator (#2589)

* revendors cortex

* loki multitenantmanager

* vendoring compat

* go mod
k32
Owen Diehl 5 years ago committed by GitHub
parent cd63a535d2
commit 164f5cd0ae
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 6
      go.mod
  2. 9
      go.sum
  3. 9
      pkg/distributor/distributor_test.go
  4. 9
      pkg/querier/querier_mock_test.go
  5. 6
      pkg/querier/queryrange/codec.go
  6. 3
      pkg/querier/queryrange/roundtrip.go
  7. 21
      pkg/ruler/manager/compat.go
  8. 4
      pkg/ruler/ruler.go
  9. 39
      vendor/github.com/bradfitz/gomemcache/memcache/memcache.go
  10. 16
      vendor/github.com/cortexproject/cortex/pkg/alertmanager/alertmanager.go
  11. 8
      vendor/github.com/cortexproject/cortex/pkg/alertmanager/alertmanager_metrics.go
  12. 48
      vendor/github.com/cortexproject/cortex/pkg/alertmanager/multitenant.go
  13. 22
      vendor/github.com/cortexproject/cortex/pkg/api/api.go
  14. 66
      vendor/github.com/cortexproject/cortex/pkg/chunk/cache/memcached_client.go
  15. 31
      vendor/github.com/cortexproject/cortex/pkg/chunk/chunk_store.go
  16. 2
      vendor/github.com/cortexproject/cortex/pkg/chunk/series_store.go
  17. 10
      vendor/github.com/cortexproject/cortex/pkg/cortex/cortex.go
  18. 22
      vendor/github.com/cortexproject/cortex/pkg/cortex/modules.go
  19. 5
      vendor/github.com/cortexproject/cortex/pkg/distributor/distributor.go
  20. 2
      vendor/github.com/cortexproject/cortex/pkg/distributor/ha_tracker.go
  21. 10
      vendor/github.com/cortexproject/cortex/pkg/ingester/ingester.go
  22. 2
      vendor/github.com/cortexproject/cortex/pkg/ingester/wal.go
  23. 2
      vendor/github.com/cortexproject/cortex/pkg/querier/blocks_store_balanced_set.go
  24. 7
      vendor/github.com/cortexproject/cortex/pkg/querier/blocks_store_queryable.go
  25. 37
      vendor/github.com/cortexproject/cortex/pkg/querier/blocks_store_replicated_set.go
  26. 25
      vendor/github.com/cortexproject/cortex/pkg/querier/distributor_queryable.go
  27. 77
      vendor/github.com/cortexproject/cortex/pkg/querier/querier.go
  28. 13
      vendor/github.com/cortexproject/cortex/pkg/querier/queryrange/query_range.go
  29. 393
      vendor/github.com/cortexproject/cortex/pkg/querier/queryrange/queryrange.pb.go
  30. 5
      vendor/github.com/cortexproject/cortex/pkg/querier/queryrange/queryrange.proto
  31. 21
      vendor/github.com/cortexproject/cortex/pkg/querier/queryrange/results_cache.go
  32. 8
      vendor/github.com/cortexproject/cortex/pkg/querier/queryrange/roundtrip.go
  33. 8
      vendor/github.com/cortexproject/cortex/pkg/querier/queryrange/split_by_interval.go
  34. 7
      vendor/github.com/cortexproject/cortex/pkg/ring/basic_lifecycler.go
  35. 38
      vendor/github.com/cortexproject/cortex/pkg/ring/kv/etcd/etcd.go
  36. 7
      vendor/github.com/cortexproject/cortex/pkg/ring/lifecycler.go
  37. 6
      vendor/github.com/cortexproject/cortex/pkg/ring/model.go
  38. 13
      vendor/github.com/cortexproject/cortex/pkg/ring/ring.go
  39. 25
      vendor/github.com/cortexproject/cortex/pkg/ruler/api.go
  40. 60
      vendor/github.com/cortexproject/cortex/pkg/ruler/manager.go
  41. 22
      vendor/github.com/cortexproject/cortex/pkg/ruler/ruler.go
  42. 6
      vendor/github.com/cortexproject/cortex/pkg/storage/tsdb/util.go
  43. 95
      vendor/github.com/cortexproject/cortex/pkg/storegateway/bucket_stores.go
  44. 56
      vendor/github.com/cortexproject/cortex/pkg/storegateway/gateway.go
  45. 60
      vendor/github.com/cortexproject/cortex/pkg/storegateway/metadata_filters.go
  46. 201
      vendor/github.com/cortexproject/cortex/pkg/storegateway/sharding_strategy.go
  47. 53
      vendor/github.com/cortexproject/cortex/pkg/util/extract_forwarded.go
  48. 6
      vendor/github.com/cortexproject/cortex/pkg/util/log.go
  49. 17
      vendor/github.com/cortexproject/cortex/pkg/util/push/push.go
  50. 11
      vendor/github.com/cortexproject/cortex/pkg/util/validation/limits.go
  51. 15
      vendor/github.com/sony/gobreaker/.travis.yml
  52. 21
      vendor/github.com/sony/gobreaker/LICENSE
  53. 128
      vendor/github.com/sony/gobreaker/README.md
  54. 5
      vendor/github.com/sony/gobreaker/go.mod
  55. 7
      vendor/github.com/sony/gobreaker/go.sum
  56. 344
      vendor/github.com/sony/gobreaker/gobreaker.go
  57. 7
      vendor/modules.txt

@ -10,7 +10,7 @@ require (
github.com/cespare/xxhash/v2 v2.1.1
github.com/containerd/fifo v0.0.0-20190226154929-a9fb20d87448 // indirect
github.com/coreos/go-systemd v0.0.0-20191104093116-d3cd4ed1dbcf
github.com/cortexproject/cortex v1.3.1-0.20200824064113-84c06a83898e
github.com/cortexproject/cortex v1.3.1-0.20200901164804-97d13c1ef16f
github.com/davecgh/go-spew v1.1.1
github.com/docker/docker v17.12.0-ce-rc1.0.20200706150819-a40b877fbb9e+incompatible
github.com/docker/go-metrics v0.0.0-20181218153428-b84716841b82 // indirect
@ -80,3 +80,7 @@ replace github.com/gocql/gocql => github.com/grafana/gocql v0.0.0-20200605141915
// Same as Cortex, we can't upgrade to grpc 1.30.0 until go.etcd.io/etcd will support it.
replace google.golang.org/grpc => google.golang.org/grpc v1.29.1
// Same as Cortex
// Using a 3rd-party branch for custom dialer - see https://github.com/bradfitz/gomemcache/pull/86
replace github.com/bradfitz/gomemcache => github.com/themihai/gomemcache v0.0.0-20180902122335-24332e2d58ab

@ -182,8 +182,6 @@ github.com/bmizerany/assert v0.0.0-20160611221934-b7ed37b82869 h1:DDGfHa7BWjL4Yn
github.com/bmizerany/assert v0.0.0-20160611221934-b7ed37b82869/go.mod h1:Ekp36dRnpXw/yCqJaO+ZrUyxD+3VXMFFr56k5XYrpB4=
github.com/bmizerany/pat v0.0.0-20170815010413-6226ea591a40/go.mod h1:8rLXio+WjiTceGBHIoTvn60HIbs7Hm7bcHjyrSqYB9c=
github.com/boltdb/bolt v1.3.1/go.mod h1:clJnj/oiGkjum5o1McbSZDSLxVThjynRyGBgiAx27Ps=
github.com/bradfitz/gomemcache v0.0.0-20190913173617-a41fca850d0b h1:L/QXpzIa3pOvUGt1D1lA5KjYhPBAN/3iWdP7xeFS9F0=
github.com/bradfitz/gomemcache v0.0.0-20190913173617-a41fca850d0b/go.mod h1:H0wQNHz2YrLsuXOZozoeDmnHXkNCRmMW0gwFWDfEZDA=
github.com/c-bata/go-prompt v0.2.2/go.mod h1:VzqtzE2ksDBcdln8G7mk2RX9QyGjH+OVqOCSiVIqS34=
github.com/c2h5oh/datasize v0.0.0-20200112174442-28bbd4740fee h1:BnPxIde0gjtTnc9Er7cxvBk8DHLWhEux0SxayC8dP6I=
github.com/c2h5oh/datasize v0.0.0-20200112174442-28bbd4740fee/go.mod h1:S/7n9copUssQ56c7aAgHqftWO4LTf4xY6CGWt8Bc+3M=
@ -237,8 +235,8 @@ github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f h1:lBNOc5arjvs8E5mO2tbp
github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f/go.mod h1:E3G3o1h8I7cfcXa63jLwjI0eiQQMgzzUDFVpN/nH/eA=
github.com/cortexproject/cortex v0.6.1-0.20200228110116-92ab6cbe0995/go.mod h1:3Xa3DjJxtpXqxcMGdk850lcIRb81M0fyY1MQ6udY134=
github.com/cortexproject/cortex v1.2.1-0.20200805064754-d8edc95e2c91/go.mod h1:PVPxNLrxKH+yc8asaJOxuz7TiRmMizFfnSMOnRzM6oM=
github.com/cortexproject/cortex v1.3.1-0.20200824064113-84c06a83898e h1:f9Si1KNKw1ulAUnb++STgNSoRca+KABemwPtIq2HcF4=
github.com/cortexproject/cortex v1.3.1-0.20200824064113-84c06a83898e/go.mod h1:w0J/iHsF4Hh2KxmmpuYIBo/I9t8dxKztThqr+683p7M=
github.com/cortexproject/cortex v1.3.1-0.20200901164804-97d13c1ef16f h1:g+MHBeXc4V6JRVzAYzvZUJFfxsCXHmuRpz5M10mvl30=
github.com/cortexproject/cortex v1.3.1-0.20200901164804-97d13c1ef16f/go.mod h1:ub8BpRZrRa02BOM8NJTnI2YklxW/mGhEkJDrhsDfcfg=
github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU=
github.com/creack/pty v1.1.7/go.mod h1:lj5s0c3V2DBrqTV7llrYr5NG6My20zk30Fl46Y7DoTY=
github.com/cznic/b v0.0.0-20180115125044-35e9bbe41f07/go.mod h1:URriBxXwVq5ijiJ12C7iIZqlA69nTlI+LgI6/pwftG8=
@ -1073,6 +1071,7 @@ github.com/smartystreets/goconvey v1.6.4 h1:fv0U8FUIMPNf1L9lnHLvLhgicrIVChEkdzIK
github.com/smartystreets/goconvey v1.6.4/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA=
github.com/soheilhy/cmux v0.1.4 h1:0HKaf1o97UwFjHH9o5XsHUOF+tqmdA7KEzXLpiyaw0E=
github.com/soheilhy/cmux v0.1.4/go.mod h1:IM3LyeVVIOuxMH7sFAkER9+bJ4dT7Ms6E4xg4kGIyLM=
github.com/sony/gobreaker v0.4.1 h1:oMnRNZXX5j85zso6xCPRNPtmAycat+WcoKbklScLDgQ=
github.com/sony/gobreaker v0.4.1/go.mod h1:ZKptC7FHNvhBz7dN2LGjPVBz2sZJmc0/PkyDJOjmxWY=
github.com/soundcloud/go-runit v0.0.0-20150630195641-06ad41a06c4a/go.mod h1:LeFCbQYJ3KJlPs/FvPz2dy1tkpxyeNESVyCNNzRXFR0=
github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72 h1:qLC7fQah7D6K1B0ujays3HV9gkFtllcxhzImRR7ArPQ=
@ -1107,6 +1106,8 @@ github.com/thanos-io/thanos v0.13.1-0.20200731083140-69b87607decf h1:yq9nWz5Iv6e
github.com/thanos-io/thanos v0.13.1-0.20200731083140-69b87607decf/go.mod h1:G8caR6G7pSDreRDvFm9wFuyjEBztmr8Ag3kBYpa/fEc=
github.com/thanos-io/thanos v0.13.1-0.20200807203500-9b578afb4763 h1:c84P3YUu8bxLWE2csCSK4XJNi5FxcC+HL4WDNDEbTwA=
github.com/thanos-io/thanos v0.13.1-0.20200807203500-9b578afb4763/go.mod h1:KyW0a93tsh7v4hXAwo2CVAIRYuZT1Kkf4e04gisQjAg=
github.com/themihai/gomemcache v0.0.0-20180902122335-24332e2d58ab h1:7ZR3hmisBWw77ZpO1/o86g+JV3VKlk3d48jopJxzTjU=
github.com/themihai/gomemcache v0.0.0-20180902122335-24332e2d58ab/go.mod h1:eheTFp954zcWZXCU8d0AT76ftsQOTo4DTqkN/h3k1MY=
github.com/tidwall/pretty v0.0.0-20180105212114-65a9db5fad51/go.mod h1:XNkn88O1ChpSDQmQeStsy+sBenx6DDtFZJxhVysOjyk=
github.com/tidwall/pretty v1.0.0 h1:HsD+QiTn7sK6flMKIvNmpqz1qrpP3Ps6jOKIKMooyg4=
github.com/tidwall/pretty v1.0.0/go.mod h1:XNkn88O1ChpSDQmQeStsy+sBenx6DDtFZJxhVysOjyk=

@ -307,3 +307,12 @@ func (r mockRing) IngesterCount() int {
func (r mockRing) Subring(key uint32, n int) ring.ReadRing {
return r
}
func (r mockRing) HasInstance(instanceID string) bool {
for _, ing := range r.ingesters {
if ing.Addr != instanceID {
return true
}
}
return false
}

@ -319,6 +319,15 @@ func (r *readRingMock) Subring(key uint32, n int) ring.ReadRing {
return r
}
func (r *readRingMock) HasInstance(instanceID string) bool {
for _, ing := range r.replicationSet.Ingesters {
if ing.Addr != instanceID {
return true
}
}
return false
}
func mockReadRingWithOneActiveIngester() *readRingMock {
return newReadRingMock([]ring.IngesterDesc{
{Addr: "test", Timestamp: time.Now().UnixNano(), State: ring.ACTIVE, Tokens: []uint32{1, 2, 3}},

@ -72,6 +72,8 @@ func (r *LokiRequest) LogToSpan(sp opentracing.Span) {
)
}
func (*LokiRequest) GetCachingOptions() (res queryrange.CachingOptions) { return }
func (r *LokiSeriesRequest) GetEnd() int64 {
return r.EndTs.UnixNano() / (int64(time.Millisecond) / int64(time.Nanosecond))
}
@ -108,6 +110,8 @@ func (r *LokiSeriesRequest) LogToSpan(sp opentracing.Span) {
)
}
func (*LokiSeriesRequest) GetCachingOptions() (res queryrange.CachingOptions) { return }
func (r *LokiLabelNamesRequest) GetEnd() int64 {
return r.EndTs.UnixNano() / (int64(time.Millisecond) / int64(time.Nanosecond))
}
@ -143,6 +147,8 @@ func (r *LokiLabelNamesRequest) LogToSpan(sp opentracing.Span) {
)
}
func (*LokiLabelNamesRequest) GetCachingOptions() (res queryrange.CachingOptions) { return }
func (codec) DecodeRequest(_ context.Context, r *http.Request) (queryrange.Request, error) {
if err := r.ParseForm(); err != nil {
return nil, httpgrpc.Errorf(http.StatusBadRequest, err.Error())

@ -354,6 +354,9 @@ func NewMetricTripperware(
codec,
extractor,
nil,
func(r queryrange.Request) bool {
return !r.GetCachingOptions().Disabled
},
registerer,
)
if err != nil {

@ -63,6 +63,21 @@ func engineQueryFunc(engine *logql.Engine, delay time.Duration) rules.QueryFunc
}
// MultiTenantManagerAdapter will wrap a MultiTenantManager which validates loki rules
func MultiTenantManagerAdapter(mgr ruler.MultiTenantManager) *MultiTenantManager {
return &MultiTenantManager{mgr}
}
// MultiTenantManager wraps a cortex MultiTenantManager but validates loki rules
type MultiTenantManager struct {
ruler.MultiTenantManager
}
// ValidateRuleGroup validates a rulegroup
func (m *MultiTenantManager) ValidateRuleGroup(grp rulefmt.RuleGroup) []error {
return validateGroups(grp)
}
func MemstoreTenantManager(
cfg ruler.Config,
engine *logql.Engine,
@ -148,13 +163,13 @@ func (GroupLoader) parseRules(content []byte) (*rulefmt.RuleGroups, []error) {
return nil, errs
}
return &groups, validateGroup(&groups)
return &groups, validateGroups(groups.Groups...)
}
func validateGroup(grps *rulefmt.RuleGroups) (errs []error) {
func validateGroups(grps ...rulefmt.RuleGroup) (errs []error) {
set := map[string]struct{}{}
for i, g := range grps.Groups {
for i, g := range grps {
if g.Name == "" {
errs = append(errs, errors.Errorf("group %d: Groupname must not be empty", i))
}

@ -15,7 +15,7 @@ type Config struct {
func NewRuler(cfg Config, engine *logql.Engine, reg prometheus.Registerer, logger log.Logger, ruleStore cRules.RuleStore) (*ruler.Ruler, error) {
manager, err := ruler.NewDefaultMultiTenantManager(
mgr, err := ruler.NewDefaultMultiTenantManager(
cfg.Config,
manager.MemstoreTenantManager(
cfg.Config,
@ -30,7 +30,7 @@ func NewRuler(cfg Config, engine *logql.Engine, reg prometheus.Registerer, logge
}
return ruler.NewRuler(
cfg.Config,
manager,
manager.MultiTenantManagerAdapter(mgr),
reg,
logger,
ruleStore,

@ -112,7 +112,6 @@ var (
resultTouched = []byte("TOUCHED\r\n")
resultClientErrorPrefix = []byte("CLIENT_ERROR ")
versionPrefix = []byte("VERSION")
)
// New returns a memcache client using the provided server(s)
@ -132,6 +131,8 @@ func NewFromSelector(ss ServerSelector) *Client {
// Client is a memcache client.
// It is safe for unlocked use by multiple concurrent goroutines.
type Client struct {
// Dialer specifies a custom dialer used to dial new connections to a server.
DialTimeout func(network, address string, timeout time.Duration) (net.Conn, error)
// Timeout specifies the socket read/write timeout.
// If zero, DefaultTimeout is used.
Timeout time.Duration
@ -259,8 +260,10 @@ func (c *Client) dial(addr net.Addr) (net.Conn, error) {
cn net.Conn
err error
}
nc, err := net.DialTimeout(addr.Network(), addr.String(), c.netTimeout())
if c.DialTimeout == nil {
c.DialTimeout = net.DialTimeout
}
nc, err := c.DialTimeout(addr.Network(), addr.String(), c.netTimeout())
if err == nil {
return nc, nil
}
@ -399,30 +402,6 @@ func (c *Client) flushAllFromAddr(addr net.Addr) error {
})
}
// ping sends the version command to the given addr
func (c *Client) ping(addr net.Addr) error {
return c.withAddrRw(addr, func(rw *bufio.ReadWriter) error {
if _, err := fmt.Fprintf(rw, "version\r\n"); err != nil {
return err
}
if err := rw.Flush(); err != nil {
return err
}
line, err := rw.ReadSlice('\n')
if err != nil {
return err
}
switch {
case bytes.HasPrefix(line, versionPrefix):
break
default:
return fmt.Errorf("memcache: unexpected response line from ping: %q", string(line))
}
return nil
})
}
func (c *Client) touchFromAddr(addr net.Addr, keys []string, expiration int32) error {
return c.withAddrRw(addr, func(rw *bufio.ReadWriter) error {
for _, key := range keys {
@ -669,12 +648,6 @@ func (c *Client) DeleteAll() error {
})
}
// Ping checks all instances if they are alive. Returns error if any
// of them is down.
func (c *Client) Ping() error {
return c.selector.Each(c.ping)
}
// Increment atomically increments key by delta. The return value is
// the new value after being incremented or an error. If the value
// didn't exist in memcached the error is ErrCacheMiss. The value in

@ -67,20 +67,16 @@ type Alertmanager struct {
mux *http.ServeMux
registry *prometheus.Registry
// The Dispatcher is the only component we need to recreate when we call ApplyConfig.
// Given its metrics don't have any variable labels we need to re-use the same metrics.
dispatcherMetrics *dispatch.DispatcherMetrics
activeMtx sync.Mutex
active bool
}
var (
webReload = make(chan chan error)
// In order to workaround a bug in the alertmanager, which doesn't register the
// metrics in the input registry but to the global default one, we do define a
// singleton dispatcher metrics instance that is going to be shared across all
// tenants alertmanagers.
// TODO change this once the vendored alertmanager will have this PR merged into:
// https://github.com/prometheus/alertmanager/pull/2200
dispatcherMetrics = dispatch.NewDispatcherMetrics(prometheus.NewRegistry())
)
func init() {
@ -158,6 +154,7 @@ func New(cfg *Config, reg *prometheus.Registry) (*Alertmanager, error) {
Silences: am.silences,
StatusFunc: am.marker.Status,
Peer: cfg.Peer,
Registry: am.registry,
Logger: log.With(am.logger, "component", "api"),
GroupFunc: func(f1 func(*dispatch.Route) bool, f2 func(*types.Alert, time.Time) bool) (dispatch.AlertGroups, map[model.Fingerprint][]string) {
return am.dispatcher.Groups(f1, f2)
@ -172,6 +169,7 @@ func New(cfg *Config, reg *prometheus.Registry) (*Alertmanager, error) {
ui.Register(router, webReload, log.With(am.logger, "component", "ui"))
am.mux = am.api.Register(router, am.cfg.ExternalURL.Path)
am.dispatcherMetrics = dispatch.NewDispatcherMetrics(am.registry)
return am, nil
}
@ -240,7 +238,7 @@ func (am *Alertmanager) ApplyConfig(userID string, conf *config.Config) error {
am.marker,
timeoutFunc,
log.With(am.logger, "component", "dispatcher"),
dispatcherMetrics,
am.dispatcherMetrics,
)
go am.dispatcher.Run()

@ -62,11 +62,11 @@ func newAlertmanagerMetrics() *alertmanagerMetrics {
numNotifications: prometheus.NewDesc(
"cortex_alertmanager_notifications_total",
"The total number of attempted notifications.",
[]string{"user"}, nil),
[]string{"user", "integration"}, nil),
numFailedNotifications: prometheus.NewDesc(
"cortex_alertmanager_notifications_failed_total",
"The total number of failed notifications.",
[]string{"user"}, nil),
[]string{"user", "integration"}, nil),
notificationLatencySeconds: prometheus.NewDesc(
"cortex_alertmanager_notification_latency_seconds",
"The latency of notifications in seconds.",
@ -186,8 +186,8 @@ func (m *alertmanagerMetrics) Collect(out chan<- prometheus.Metric) {
data.SendSumOfCountersPerUser(out, m.alertsReceived, "alertmanager_alerts_received_total")
data.SendSumOfCountersPerUser(out, m.alertsInvalid, "alertmanager_alerts_invalid_total")
data.SendSumOfCountersPerUser(out, m.numNotifications, "alertmanager_notifications_total")
data.SendSumOfCountersPerUser(out, m.numFailedNotifications, "alertmanager_notifications_failed_total")
data.SendSumOfCountersPerUserWithLabels(out, m.numNotifications, "alertmanager_notifications_total", "integration")
data.SendSumOfCountersPerUserWithLabels(out, m.numFailedNotifications, "alertmanager_notifications_failed_total", "integration")
data.SendSumOfHistograms(out, m.notificationLatencySeconds, "alertmanager_notification_latency_seconds")
data.SendSumOfGaugesPerUserWithLabels(out, m.markerAlerts, "alertmanager_alerts", "state")

@ -135,7 +135,7 @@ func newMultitenantAlertmanagerMetrics(reg prometheus.Registerer) *multitenantAl
m.invalidConfig = promauto.With(reg).NewGaugeVec(prometheus.GaugeOpts{
Namespace: "cortex",
Name: "alertmanager_config_invalid",
Help: "Whenever the Alertmanager config is invalid for a user.",
Help: "Boolean set to 1 whenever the Alertmanager config is invalid for a user.",
}, []string{"user"})
return m
@ -175,6 +175,10 @@ func NewMultitenantAlertmanager(cfg *MultitenantAlertmanagerConfig, logger log.L
return nil, fmt.Errorf("unable to create Alertmanager data directory %q: %s", cfg.DataDir, err)
}
if cfg.ExternalURL.URL == nil {
return nil, fmt.Errorf("unable to create Alertmanager because the external URL has not been configured")
}
var fallbackConfig []byte
if cfg.FallbackConfigFile != "" {
fallbackConfig, err = ioutil.ReadFile(cfg.FallbackConfigFile)
@ -459,11 +463,47 @@ func (am *MultitenantAlertmanager) ServeHTTP(w http.ResponseWriter, req *http.Re
userAM, ok := am.alertmanagers[userID]
am.alertmanagersMtx.Unlock()
if !ok || !userAM.IsActive() {
http.Error(w, "no Alertmanager for this user ID", http.StatusNotFound)
if ok {
if !userAM.IsActive() {
http.Error(w, "the Alertmanager is not configured", http.StatusNotFound)
return
}
userAM.mux.ServeHTTP(w, req)
return
}
if am.fallbackConfig != "" {
userAM, err = am.alertmanagerFromFallbackConfig(userID)
if err != nil {
http.Error(w, "Failed to initialize the Alertmanager", http.StatusInternalServerError)
return
}
userAM.mux.ServeHTTP(w, req)
return
}
userAM.mux.ServeHTTP(w, req)
http.Error(w, "the Alertmanager is not configured", http.StatusNotFound)
}
func (am *MultitenantAlertmanager) alertmanagerFromFallbackConfig(userID string) (*Alertmanager, error) {
// Upload an empty config so that the Alertmanager is no de-activated in the next poll
cfgDesc := alerts.ToProto("", nil, userID)
err := am.store.SetAlertConfig(context.Background(), cfgDesc)
if err != nil {
return nil, err
}
// Calling setConfig with an empty configuration will use the fallback config.
err = am.setConfig(cfgDesc)
if err != nil {
return nil, err
}
am.alertmanagersMtx.Lock()
defer am.alertmanagersMtx.Unlock()
return am.alertmanagers[userID], nil
}
// GetStatusHandler returns the status handler for this multi-tenant

@ -65,17 +65,29 @@ type API struct {
authMiddleware middleware.Interface
server *server.Server
logger log.Logger
sourceIPs *middleware.SourceIPExtractor
}
func New(cfg Config, s *server.Server, logger log.Logger) (*API, error) {
func New(cfg Config, serverCfg server.Config, s *server.Server, logger log.Logger) (*API, error) {
// Ensure the encoded path is used. Required for the rules API
s.HTTP.UseEncodedPath()
var sourceIPs *middleware.SourceIPExtractor
if serverCfg.LogSourceIPs {
var err error
sourceIPs, err = middleware.NewSourceIPs(serverCfg.LogSourceIPsHeader, serverCfg.LogSourceIPsRegex)
if err != nil {
// This should have already been caught in the Server creation
return nil, err
}
}
api := &API{
cfg: cfg,
authMiddleware: cfg.HTTPAuthMiddleware,
server: s,
logger: logger,
sourceIPs: sourceIPs,
}
// If no authentication middleware is present in the config, use the default authentication middleware.
@ -161,12 +173,12 @@ func (a *API) RegisterAPI(cfg interface{}) {
// RegisterDistributor registers the endpoints associated with the distributor.
func (a *API) RegisterDistributor(d *distributor.Distributor, pushConfig distributor.Config) {
a.RegisterRoute("/api/v1/push", push.Handler(pushConfig, d.Push), true)
a.RegisterRoute("/api/v1/push", push.Handler(pushConfig, a.sourceIPs, d.Push), true)
a.RegisterRoute("/distributor/all_user_stats", http.HandlerFunc(d.AllUserStatsHandler), false)
a.RegisterRoute("/distributor/ha_tracker", d.HATracker, false)
// Legacy Routes
a.RegisterRoute(a.cfg.LegacyHTTPPrefix+"/push", push.Handler(pushConfig, d.Push), true)
a.RegisterRoute(a.cfg.LegacyHTTPPrefix+"/push", push.Handler(pushConfig, a.sourceIPs, d.Push), true)
a.RegisterRoute("/all_user_stats", http.HandlerFunc(d.AllUserStatsHandler), false)
a.RegisterRoute("/ha-tracker", d.HATracker, false)
}
@ -177,12 +189,12 @@ func (a *API) RegisterIngester(i *ingester.Ingester, pushConfig distributor.Conf
a.RegisterRoute("/ingester/flush", http.HandlerFunc(i.FlushHandler), false)
a.RegisterRoute("/ingester/shutdown", http.HandlerFunc(i.ShutdownHandler), false)
a.RegisterRoute("/ingester/push", push.Handler(pushConfig, i.Push), true) // For testing and debugging.
a.RegisterRoute("/ingester/push", push.Handler(pushConfig, a.sourceIPs, i.Push), true) // For testing and debugging.
// Legacy Routes
a.RegisterRoute("/flush", http.HandlerFunc(i.FlushHandler), false)
a.RegisterRoute("/shutdown", http.HandlerFunc(i.ShutdownHandler), false)
a.RegisterRoute("/push", push.Handler(pushConfig, i.Push), true) // For testing and debugging.
a.RegisterRoute("/push", push.Handler(pushConfig, a.sourceIPs, i.Push), true) // For testing and debugging.
}
// RegisterPurger registers the endpoints associated with the Purger/DeleteStore. They do not exactly

@ -16,6 +16,7 @@ import (
"github.com/pkg/errors"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
"github.com/sony/gobreaker"
"github.com/thanos-io/thanos/pkg/discovery/dns"
"github.com/cortexproject/cortex/pkg/util"
@ -35,6 +36,8 @@ type serverSelector interface {
// memcachedClient is a memcache client that gets its server list from SRV
// records, and periodically updates that ServerList.
type memcachedClient struct {
sync.Mutex
name string
*memcache.Client
serverList serverSelector
@ -44,6 +47,11 @@ type memcachedClient struct {
addresses []string
provider *dns.Provider
cbs map[ /*address*/ string]*gobreaker.CircuitBreaker
cbFailures uint
cbTimeout time.Duration
cbInterval time.Duration
quit chan struct{}
wait sync.WaitGroup
@ -61,6 +69,9 @@ type MemcachedClientConfig struct {
MaxIdleConns int `yaml:"max_idle_conns"`
UpdateInterval time.Duration `yaml:"update_interval"`
ConsistentHash bool `yaml:"consistent_hash"`
CBFailures uint `yaml:"circuit_breaker_consecutive_failures"`
CBTimeout time.Duration `yaml:"circuit_breaker_timeout"` // reset error count after this long
CBInterval time.Duration `yaml:"circuit_breaker_interval"` // remain closed for this long after CBFailures errors
}
// RegisterFlagsWithPrefix adds the flags required to config this to the given FlagSet
@ -72,6 +83,9 @@ func (cfg *MemcachedClientConfig) RegisterFlagsWithPrefix(prefix, description st
f.DurationVar(&cfg.Timeout, prefix+"memcached.timeout", 100*time.Millisecond, description+"Maximum time to wait before giving up on memcached requests.")
f.DurationVar(&cfg.UpdateInterval, prefix+"memcached.update-interval", 1*time.Minute, description+"Period with which to poll DNS for memcache servers.")
f.BoolVar(&cfg.ConsistentHash, prefix+"memcached.consistent-hash", true, description+"Use consistent hashing to distribute to memcache servers.")
f.UintVar(&cfg.CBFailures, prefix+"memcached.circuit-breaker-consecutive-failures", 0, description+"Trip circuit-breaker after this number of consecutive dial failures (if zero then circuit-breaker is disabled).")
f.DurationVar(&cfg.CBTimeout, prefix+"memcached.circuit-breaker-timeout", 10*time.Second, description+"Duration circuit-breaker remains open after tripping (if zero then 60 seconds is used).")
f.DurationVar(&cfg.CBInterval, prefix+"memcached.circuit-breaker-interval", 10*time.Second, description+"Reset circuit-breaker counts after this long (if zero then never reset).")
}
// NewMemcachedClient creates a new MemcacheClient that gets its server list
@ -93,12 +107,17 @@ func NewMemcachedClient(cfg MemcachedClientConfig, name string, r prometheus.Reg
}, r))
newClient := &memcachedClient{
name: name,
Client: client,
serverList: selector,
hostname: cfg.Host,
service: cfg.Service,
logger: logger,
provider: dns.NewProvider(logger, dnsProviderRegisterer, dns.GolangResolverType),
cbs: make(map[string]*gobreaker.CircuitBreaker),
cbFailures: cfg.CBFailures,
cbInterval: cfg.CBInterval,
cbTimeout: cfg.CBTimeout,
quit: make(chan struct{}),
numServers: promauto.With(r).NewGauge(prometheus.GaugeOpts{
@ -108,6 +127,9 @@ func NewMemcachedClient(cfg MemcachedClientConfig, name string, r prometheus.Reg
ConstLabels: prometheus.Labels{"name": name},
}),
}
if cfg.CBFailures > 0 {
newClient.Client.DialTimeout = newClient.dialViaCircuitBreaker
}
if len(cfg.Addresses) > 0 {
util.WarnExperimentalUse("DNS-based memcached service discovery")
@ -124,6 +146,36 @@ func NewMemcachedClient(cfg MemcachedClientConfig, name string, r prometheus.Reg
return newClient
}
func (c *memcachedClient) circuitBreakerStateChange(name string, from gobreaker.State, to gobreaker.State) {
level.Info(c.logger).Log("msg", "circuit-breaker state change", "name", name, "from", from, "to", to)
}
func (c *memcachedClient) dialViaCircuitBreaker(network, address string, timeout time.Duration) (net.Conn, error) {
c.Lock()
cb := c.cbs[address]
if cb == nil {
cb = gobreaker.NewCircuitBreaker(gobreaker.Settings{
Name: c.name + ":" + address,
Interval: c.cbInterval,
Timeout: c.cbTimeout,
OnStateChange: c.circuitBreakerStateChange,
ReadyToTrip: func(counts gobreaker.Counts) bool {
return uint(counts.ConsecutiveFailures) > c.cbFailures
},
})
c.cbs[address] = cb
}
c.Unlock()
conn, err := cb.Execute(func() (interface{}, error) {
return net.DialTimeout(network, address, timeout)
})
if err != nil {
return nil, err
}
return conn.(net.Conn), nil
}
// Stop the memcache client.
func (c *memcachedClient) Stop() {
close(c.quit)
@ -186,6 +238,20 @@ func (c *memcachedClient) updateMemcacheServers() error {
}
}
if len(servers) > 0 {
// Copy across circuit-breakers for current set of addresses, thus
// leaving behind any for servers we won't talk to again
c.Lock()
newCBs := make(map[string]*gobreaker.CircuitBreaker, len(servers))
for _, address := range servers {
if cb, exists := c.cbs[address]; exists {
newCBs[address] = cb
}
}
c.cbs = newCBs
c.Unlock()
}
// ServerList deterministically maps keys to _index_ of the server list.
// Since DNS returns records in different order each time, we sort to
// guarantee best possible match between nodes.

@ -460,16 +460,6 @@ func (c *baseStore) lookupIdsByMetricNameMatcher(ctx context.Context, from, thro
} else if matcher.Type == labels.MatchEqual {
labelName = matcher.Name
queries, err = c.schema.GetReadQueriesForMetricLabelValue(from, through, userID, metricName, matcher.Name, matcher.Value)
} else if matcher.Type == labels.MatchRegexp && len(FindSetMatches(matcher.Value)) > 0 {
set := FindSetMatches(matcher.Value)
for _, v := range set {
var qs []IndexQuery
qs, err = c.schema.GetReadQueriesForMetricLabelValue(from, through, userID, metricName, matcher.Name, v)
if err != nil {
break
}
queries = append(queries, qs...)
}
} else {
labelName = matcher.Name
queries, err = c.schema.GetReadQueriesForMetricLabel(from, through, userID, metricName, matcher.Name)
@ -550,6 +540,14 @@ func (c *baseStore) parseIndexEntries(_ context.Context, entries []IndexEntry, m
return nil, nil
}
matchSet := map[string]struct{}{}
if matcher != nil && matcher.Type == labels.MatchRegexp {
set := FindSetMatches(matcher.Value)
for _, v := range set {
matchSet[v] = struct{}{}
}
}
result := make([]string, 0, len(entries))
for _, entry := range entries {
chunkKey, labelValue, _, err := parseChunkTimeRangeValue(entry.RangeValue, entry.Value)
@ -557,6 +555,19 @@ func (c *baseStore) parseIndexEntries(_ context.Context, entries []IndexEntry, m
return nil, err
}
// If the matcher is like a set (=~"a|b|c|d|...") and
// the label value is not in that set move on.
if len(matchSet) > 0 {
if _, ok := matchSet[string(labelValue)]; !ok {
continue
}
// If its in the set, then add it to set, we don't need to run
// matcher on it again.
result = append(result, chunkKey)
continue
}
if matcher != nil && !matcher.Matches(string(labelValue)) {
continue
}

@ -36,7 +36,7 @@ var (
Namespace: "cortex",
Name: "chunk_store_index_lookups_per_query",
Help: "Distribution of #index lookups per query.",
Buckets: prometheus.DefBuckets,
Buckets: prometheus.ExponentialBuckets(1, 2, 5),
})
preIntersectionPerQuery = promauto.NewHistogram(prometheus.HistogramOpts{
Namespace: "cortex",

@ -188,8 +188,16 @@ func (c *Config) Validate(log log.Logger) error {
return errors.Wrap(err, "invalid query_range config")
}
if err := c.TableManager.Validate(); err != nil {
return errors.Wrap(err, "invalid table_manager config")
return errors.Wrap(err, "invalid table-manager config")
}
if err := c.StoreGateway.Validate(c.LimitsConfig); err != nil {
return errors.Wrap(err, "invalid store-gateway config")
}
if c.Storage.Engine == storage.StorageEngineBlocks && c.Querier.SecondStoreEngine != storage.StorageEngineChunks && len(c.Schema.Configs) > 0 {
level.Warn(log).Log("schema configuration is not used by the blocks storage engine, and will have no effect")
}
return nil
}

@ -72,7 +72,7 @@ func (t *Cortex) initAPI() (services.Service, error) {
t.Cfg.API.ServerPrefix = t.Cfg.Server.PathPrefix
t.Cfg.API.LegacyHTTPPrefix = t.Cfg.HTTPPrefix
a, err := api.New(t.Cfg.API, t.Server, util.Logger)
a, err := api.New(t.Cfg.API, t.Cfg.Server, t.Server, util.Logger)
if err != nil {
return nil, err
}
@ -112,7 +112,6 @@ func (t *Cortex) initServer() (services.Service, error) {
func (t *Cortex) initRing() (serv services.Service, err error) {
t.Cfg.Ingester.LifecyclerConfig.RingConfig.KVStore.Multi.ConfigProvider = multiClientRuntimeConfigChannel(t.RuntimeConfig)
t.Cfg.Ingester.LifecyclerConfig.RingConfig.KVStore.MemberlistKV = t.MemberlistKV.GetMemberlistKV
t.Ring, err = ring.New(t.Cfg.Ingester.LifecyclerConfig.RingConfig, "ingester", ring.IngesterRingKey, prometheus.DefaultRegisterer)
if err != nil {
return nil, err
@ -153,7 +152,6 @@ func (t *Cortex) initOverrides() (serv services.Service, err error) {
func (t *Cortex) initDistributor() (serv services.Service, err error) {
t.Cfg.Distributor.DistributorRing.ListenPort = t.Cfg.Server.GRPCListenPort
t.Cfg.Distributor.DistributorRing.KVStore.MemberlistKV = t.MemberlistKV.GetMemberlistKV
// Check whether the distributor can join the distributors ring, which is
// whenever it's not running as an internal dependency (ie. querier or
@ -297,7 +295,6 @@ func (t *Cortex) tsdbIngesterConfig() {
func (t *Cortex) initIngester() (serv services.Service, err error) {
t.Cfg.Ingester.LifecyclerConfig.RingConfig.KVStore.Multi.ConfigProvider = multiClientRuntimeConfigChannel(t.RuntimeConfig)
t.Cfg.Ingester.LifecyclerConfig.RingConfig.KVStore.MemberlistKV = t.MemberlistKV.GetMemberlistKV
t.Cfg.Ingester.LifecyclerConfig.ListenPort = t.Cfg.Server.GRPCListenPort
t.Cfg.Ingester.ShardByAllLabels = t.Cfg.Distributor.ShardByAllLabels
t.tsdbIngesterConfig()
@ -504,7 +501,6 @@ func (t *Cortex) initRuler() (serv services.Service, err error) {
}
t.Cfg.Ruler.Ring.ListenPort = t.Cfg.Server.GRPCListenPort
t.Cfg.Ruler.Ring.KVStore.MemberlistKV = t.MemberlistKV.GetMemberlistKV
rulerRegisterer := prometheus.WrapRegistererWith(prometheus.Labels{"engine": "ruler"}, prometheus.DefaultRegisterer)
queryable, engine := querier.New(t.Cfg.Querier, t.Overrides, t.Distributor, t.StoreQueryables, t.TombstonesLoader, rulerRegisterer)
@ -557,7 +553,6 @@ func (t *Cortex) initAlertManager() (serv services.Service, err error) {
func (t *Cortex) initCompactor() (serv services.Service, err error) {
t.Cfg.Compactor.ShardingRing.ListenPort = t.Cfg.Server.GRPCListenPort
t.Cfg.Compactor.ShardingRing.KVStore.MemberlistKV = t.MemberlistKV.GetMemberlistKV
t.Compactor, err = compactor.NewCompactor(t.Cfg.Compactor, t.Cfg.BlocksStorage, util.Logger, prometheus.DefaultRegisterer)
if err != nil {
@ -575,7 +570,6 @@ func (t *Cortex) initStoreGateway() (serv services.Service, err error) {
}
t.Cfg.StoreGateway.ShardingRing.ListenPort = t.Cfg.Server.GRPCListenPort
t.Cfg.StoreGateway.ShardingRing.KVStore.MemberlistKV = t.MemberlistKV.GetMemberlistKV
t.StoreGateway, err = storegateway.NewStoreGateway(t.Cfg.StoreGateway, t.Cfg.BlocksStorage, t.Overrides, t.Cfg.Server.LogLevel, util.Logger, prometheus.DefaultRegisterer)
if err != nil {
@ -594,6 +588,14 @@ func (t *Cortex) initMemberlistKV() (services.Service, error) {
ring.GetCodec(),
}
t.MemberlistKV = memberlist.NewKVInitService(&t.Cfg.MemberlistKV)
// Update the config.
t.Cfg.Distributor.DistributorRing.KVStore.MemberlistKV = t.MemberlistKV.GetMemberlistKV
t.Cfg.Ingester.LifecyclerConfig.RingConfig.KVStore.MemberlistKV = t.MemberlistKV.GetMemberlistKV
t.Cfg.StoreGateway.ShardingRing.KVStore.MemberlistKV = t.MemberlistKV.GetMemberlistKV
t.Cfg.Compactor.ShardingRing.KVStore.MemberlistKV = t.MemberlistKV.GetMemberlistKV
t.Cfg.Ruler.Ring.KVStore.MemberlistKV = t.MemberlistKV.GetMemberlistKV
return t.MemberlistKV, nil
}
@ -655,15 +657,15 @@ func (t *Cortex) setupModuleManager() error {
Store: {Overrides, DeleteRequestsStore},
Ingester: {Overrides, Store, API, RuntimeConfig, MemberlistKV},
Flusher: {Store, API},
Querier: {Overrides, Distributor, Store, Ring, API, StoreQueryable},
Querier: {Overrides, Distributor, Store, Ring, API, StoreQueryable, MemberlistKV},
StoreQueryable: {Overrides, Store},
QueryFrontend: {API, Overrides, DeleteRequestsStore},
TableManager: {API},
Ruler: {Overrides, Distributor, Store, StoreQueryable, RulerStorage},
Configs: {API},
AlertManager: {API},
Compactor: {API},
StoreGateway: {API, Overrides},
Compactor: {API, MemberlistKV},
StoreGateway: {API, Overrides, MemberlistKV},
Purger: {Store, DeleteRequestsStore, API},
All: {QueryFrontend, Querier, Ingester, Distributor, TableManager, Purger, StoreGateway, Ruler},
}

@ -364,6 +364,7 @@ func (d *Distributor) Push(ctx context.Context, req *client.WriteRequest) (*clie
if err != nil {
return nil, err
}
source := util.GetSourceIPsFromOutgoingCtx(ctx)
var firstPartialErr error
removeReplica := false
@ -538,6 +539,10 @@ func (d *Distributor) Push(ctx context.Context, req *client.WriteRequest) (*clie
if sp := opentracing.SpanFromContext(ctx); sp != nil {
localCtx = opentracing.ContextWithSpan(localCtx, sp)
}
// Get clientIP(s) from Context and add it to localCtx
localCtx = util.AddSourceIPsToOutgoingContext(localCtx, source)
return d.send(localCtx, ingester, timeseries, metadata, req.Source)
}, func() { client.ReuseSlice(req.Timeseries) })
if err != nil {

@ -94,7 +94,7 @@ type HATrackerConfig struct {
// more than this duration
FailoverTimeout time.Duration `yaml:"ha_tracker_failover_timeout"`
KVStore kv.Config
KVStore kv.Config `yaml:"kvstore" doc:"description=Backend storage to use for the ring. Please be aware that memberlist is not supported by the HA tracker since gossip propagation is too slow for HA purposes."`
}
// RegisterFlags adds the flags required to config this to the given FlagSet.

@ -432,11 +432,6 @@ func (i *Ingester) Push(ctx context.Context, req *client.WriteRequest) (*client.
}
}
if firstPartialErr != nil {
// grpcForwardableError turns the error into a string so it no longer references `req`
return &client.WriteResponse{}, grpcForwardableError(userID, firstPartialErr.code, firstPartialErr)
}
if record != nil {
// Log the record only if there was no error in ingestion.
if err := i.wal.Log(record); err != nil {
@ -445,6 +440,11 @@ func (i *Ingester) Push(ctx context.Context, req *client.WriteRequest) (*client.
recordPool.Put(record)
}
if firstPartialErr != nil {
// grpcForwardableError turns the error into a string so it no longer references `req`
return &client.WriteResponse{}, grpcForwardableError(userID, firstPartialErr.code, firstPartialErr)
}
return &client.WriteResponse{}, nil
}

@ -179,7 +179,7 @@ func (w *walWrapper) Stop() {
}
func (w *walWrapper) Log(record *WALRecord) error {
if record == nil {
if record == nil || (len(record.Series) == 0 && len(record.Samples) == 0) {
return nil
}
select {

@ -58,7 +58,7 @@ func (s *blocksStoreBalancedSet) resolve(ctx context.Context) error {
return nil
}
func (s *blocksStoreBalancedSet) GetClientsFor(blockIDs []ulid.ULID, exclude map[ulid.ULID][]string) (map[BlocksStoreClient][]ulid.ULID, error) {
func (s *blocksStoreBalancedSet) GetClientsFor(_ string, blockIDs []ulid.ULID, exclude map[ulid.ULID][]string) (map[BlocksStoreClient][]ulid.ULID, error) {
addresses := s.dnsProvider.Addresses()
if len(addresses) == 0 {
return nil, fmt.Errorf("no address resolved for the store-gateway service addresses %s", strings.Join(s.serviceAddresses, ","))

@ -57,7 +57,7 @@ type BlocksStoreSet interface {
// GetClientsFor returns the store gateway clients that should be used to
// query the set of blocks in input. The exclude parameter is the map of
// blocks -> store-gateway addresses that should be excluded.
GetClientsFor(blockIDs []ulid.ULID, exclude map[ulid.ULID][]string) (map[BlocksStoreClient][]ulid.ULID, error)
GetClientsFor(userID string, blockIDs []ulid.ULID, exclude map[ulid.ULID][]string) (map[BlocksStoreClient][]ulid.ULID, error)
}
// BlocksFinder is the interface used to find blocks for a given user and time range.
@ -82,6 +82,7 @@ type BlocksStoreClient interface {
// BlocksStoreLimits is the interface that should be implemented by the limits provider.
type BlocksStoreLimits interface {
MaxChunksPerQuery(userID string) int
StoreGatewayTenantShardSize(userID string) int
}
type blocksStoreQueryableMetrics struct {
@ -193,7 +194,7 @@ func NewBlocksStoreQueryableFromConfig(querierCfg Config, gatewayCfg storegatewa
reg.MustRegister(storesRing)
}
stores, err = newBlocksStoreReplicationSet(storesRing, querierCfg.StoreGatewayClient, logger, reg)
stores, err = newBlocksStoreReplicationSet(storesRing, gatewayCfg.ShardingStrategy, limits, querierCfg.StoreGatewayClient, logger, reg)
if err != nil {
return nil, errors.Wrap(err, "failed to create store set")
}
@ -368,7 +369,7 @@ func (q *blocksStoreQuerier) selectSorted(sp *storage.SelectHints, matchers ...*
for attempt := 1; attempt <= maxFetchSeriesAttempts; attempt++ {
// Find the set of store-gateway instances having the blocks. The exclude parameter is the
// map of blocks queried so far, with the list of store-gateway addresses for each block.
clients, err := q.stores.GetClientsFor(remainingBlocks, attemptedBlocks)
clients, err := q.stores.GetClientsFor(q.userID, remainingBlocks, attemptedBlocks)
if err != nil {
// If it's a retry and we get an error, it means there are no more store-gateways left
// from which running another attempt, so we're just stopping retrying.

@ -12,6 +12,7 @@ import (
"github.com/cortexproject/cortex/pkg/ring"
"github.com/cortexproject/cortex/pkg/ring/client"
cortex_tsdb "github.com/cortexproject/cortex/pkg/storage/tsdb"
"github.com/cortexproject/cortex/pkg/storegateway"
"github.com/cortexproject/cortex/pkg/util"
"github.com/cortexproject/cortex/pkg/util/services"
"github.com/cortexproject/cortex/pkg/util/tls"
@ -22,18 +23,29 @@ import (
type blocksStoreReplicationSet struct {
services.Service
storesRing *ring.Ring
clientsPool *client.Pool
storesRing *ring.Ring
clientsPool *client.Pool
shardingStrategy string
limits BlocksStoreLimits
// Subservices manager.
subservices *services.Manager
subservicesWatcher *services.FailureWatcher
}
func newBlocksStoreReplicationSet(storesRing *ring.Ring, tlsCfg tls.ClientConfig, logger log.Logger, reg prometheus.Registerer) (*blocksStoreReplicationSet, error) {
func newBlocksStoreReplicationSet(
storesRing *ring.Ring,
shardingStrategy string,
limits BlocksStoreLimits,
tlsCfg tls.ClientConfig,
logger log.Logger,
reg prometheus.Registerer,
) (*blocksStoreReplicationSet, error) {
s := &blocksStoreReplicationSet{
storesRing: storesRing,
clientsPool: newStoreGatewayClientPool(client.NewRingServiceDiscovery(storesRing), tlsCfg, logger, reg),
storesRing: storesRing,
clientsPool: newStoreGatewayClientPool(client.NewRingServiceDiscovery(storesRing), tlsCfg, logger, reg),
shardingStrategy: shardingStrategy,
limits: limits,
}
var err error
@ -72,17 +84,26 @@ func (s *blocksStoreReplicationSet) stopping(_ error) error {
return services.StopManagerAndAwaitStopped(context.Background(), s.subservices)
}
func (s *blocksStoreReplicationSet) GetClientsFor(blockIDs []ulid.ULID, exclude map[ulid.ULID][]string) (map[BlocksStoreClient][]ulid.ULID, error) {
func (s *blocksStoreReplicationSet) GetClientsFor(userID string, blockIDs []ulid.ULID, exclude map[ulid.ULID][]string) (map[BlocksStoreClient][]ulid.ULID, error) {
shards := map[string][]ulid.ULID{}
// If shuffle sharding is enabled, we should build a subring for the user,
// otherwise we just use the full ring.
var userRing ring.ReadRing
if s.shardingStrategy == storegateway.ShardingStrategyShuffle {
userRing = storegateway.GetShuffleShardingSubring(s.storesRing, userID, s.limits)
} else {
userRing = s.storesRing
}
// Find the replication set of each block we need to query.
for _, blockID := range blockIDs {
// Buffer internally used by the ring (give extra room for a JOINING + LEAVING instance).
// Do not reuse the same buffer across multiple Get() calls because we do retain the
// returned replication set.
buf := make([]ring.IngesterDesc, 0, s.storesRing.ReplicationFactor()+2)
buf := make([]ring.IngesterDesc, 0, userRing.ReplicationFactor()+2)
set, err := s.storesRing.Get(cortex_tsdb.HashBlockID(blockID), ring.BlocksRead, buf)
set, err := userRing.Get(cortex_tsdb.HashBlockID(blockID), ring.BlocksRead, buf)
if err != nil {
return nil, errors.Wrapf(err, "failed to get store-gateway replication set owning the block %s", blockID.String())
}

@ -137,8 +137,9 @@ func (q *distributorQuerier) streamingSelect(minT, maxT int64, matchers []*label
return storage.ErrSeriesSet(err)
}
if len(results.Timeseries) != 0 {
return newTimeSeriesSeriesSet(results.Timeseries)
sets := []storage.SeriesSet(nil)
if len(results.Timeseries) > 0 {
sets = append(sets, newTimeSeriesSeriesSet(results.Timeseries))
}
serieses := make([]storage.Series, 0, len(results.Chunkseries))
@ -156,15 +157,27 @@ func (q *distributorQuerier) streamingSelect(minT, maxT int64, matchers []*label
return storage.ErrSeriesSet(err)
}
series := &chunkSeries{
serieses = append(serieses, &chunkSeries{
labels: ls,
chunks: chunks,
chunkIteratorFunc: q.chunkIterFn,
}
serieses = append(serieses, series)
mint: minT,
maxt: maxT,
})
}
if len(serieses) > 0 {
sets = append(sets, series.NewConcreteSeriesSet(serieses))
}
return series.NewConcreteSeriesSet(serieses)
if len(sets) == 0 {
return storage.EmptySeriesSet()
}
if len(sets) == 1 {
return sets[0]
}
// Sets need to be sorted. Both series.NewConcreteSeriesSet and newTimeSeriesSeriesSet take care of that.
return storage.NewMergeSeriesSet(sets, storage.ChainedSeriesMerge)
}
func (q *distributorQuerier) LabelValues(name string) ([]string, storage.Warnings, error) {

@ -370,31 +370,23 @@ func (q querier) mergeSeriesSets(sets []storage.SeriesSet) storage.SeriesSet {
chunks := []chunk.Chunk(nil)
for _, set := range sets {
if !set.Next() {
// nothing in this set. If it has no error, we can ignore it completely.
// If there is error, we have to report it.
err := set.Err()
if err != nil {
otherSets = append(otherSets, storage.ErrSeriesSet(err))
}
continue
}
nonChunkSeries := []storage.Series(nil)
s := set.At()
if sc, ok := s.(SeriesWithChunks); ok {
chunks = append(chunks, sc.Chunks()...)
// SeriesSet may have some series backed up by chunks, and some not.
for set.Next() {
s := set.At()
// iterate over remaining series in this set, and store chunks
// Here we assume that all remaining series in the set are also backed-up by chunks.
// If not, there will be panics.
for set.Next() {
s = set.At()
chunks = append(chunks, s.(SeriesWithChunks).Chunks()...)
if sc, ok := s.(SeriesWithChunks); ok {
chunks = append(chunks, sc.Chunks()...)
} else {
nonChunkSeries = append(nonChunkSeries, s)
}
} else {
// We already called set.Next() once, but we want to return same result from At() also
// to the query engine.
otherSets = append(otherSets, &seriesSetWithFirstSeries{set: set, firstSeries: s})
}
if err := set.Err(); err != nil {
otherSets = append(otherSets, storage.ErrSeriesSet(err))
} else if len(nonChunkSeries) > 0 {
otherSets = append(otherSets, &sliceSeriesSet{series: nonChunkSeries, ix: -1})
}
}
@ -413,42 +405,29 @@ func (q querier) mergeSeriesSets(sets []storage.SeriesSet) storage.SeriesSet {
return storage.NewMergeSeriesSet(otherSets, storage.ChainedSeriesMerge)
}
// This series set ignores first 'Next' call and simply returns cached result
// to avoid doing the work required to compute it twice.
type seriesSetWithFirstSeries struct {
firstNextCalled bool
firstSeries storage.Series
set storage.SeriesSet
type sliceSeriesSet struct {
series []storage.Series
ix int
}
func (pss *seriesSetWithFirstSeries) Next() bool {
if pss.firstNextCalled {
pss.firstSeries = nil
return pss.set.Next()
}
pss.firstNextCalled = true
return true
func (s *sliceSeriesSet) Next() bool {
s.ix++
return s.ix < len(s.series)
}
func (pss *seriesSetWithFirstSeries) At() storage.Series {
if pss.firstSeries != nil {
return pss.firstSeries
func (s *sliceSeriesSet) At() storage.Series {
if s.ix < 0 || s.ix >= len(s.series) {
return nil
}
return pss.set.At()
return s.series[s.ix]
}
func (pss *seriesSetWithFirstSeries) Err() error {
if pss.firstSeries != nil {
return nil
}
return pss.set.Err()
func (s *sliceSeriesSet) Err() error {
return nil
}
func (pss *seriesSetWithFirstSeries) Warnings() storage.Warnings {
if pss.firstSeries != nil {
return nil
}
return pss.set.Warnings()
func (s *sliceSeriesSet) Warnings() storage.Warnings {
return nil
}
type storeQueryable struct {

@ -9,6 +9,7 @@ import (
"net/url"
"sort"
"strconv"
"strings"
"time"
"github.com/gogo/protobuf/proto"
@ -38,7 +39,7 @@ var (
PrometheusCodec Codec = &prometheusCodec{}
// Name of the cache control header.
cachecontrolHeader = "Cache-Control"
cacheControlHeader = "Cache-Control"
)
// Codec is used to encode/decode query range requests and responses so they can be passed down to middlewares.
@ -72,6 +73,8 @@ type Request interface {
GetStep() int64
// GetQuery returns the query of the request.
GetQuery() string
// GetCachingOptions returns the caching options.
GetCachingOptions() CachingOptions
// WithStartEnd clone the current request with different start and end timestamp.
WithStartEnd(int64, int64) Request
// WithQuery clone the current request with a different query.
@ -205,6 +208,14 @@ func (prometheusCodec) DecodeRequest(_ context.Context, r *http.Request) (Reques
result.Query = r.FormValue("query")
result.Path = r.URL.Path
for _, value := range r.Header.Values(cacheControlHeader) {
if strings.Contains(value, noStoreValue) {
result.CachingOptions.Disabled = true
break
}
}
return &result, nil
}

@ -33,12 +33,13 @@ var _ = time.Kitchen
const _ = proto.GoGoProtoPackageIsVersion3 // please upgrade the proto package
type PrometheusRequest struct {
Path string `protobuf:"bytes,1,opt,name=path,proto3" json:"path,omitempty"`
Start int64 `protobuf:"varint,2,opt,name=start,proto3" json:"start,omitempty"`
End int64 `protobuf:"varint,3,opt,name=end,proto3" json:"end,omitempty"`
Step int64 `protobuf:"varint,4,opt,name=step,proto3" json:"step,omitempty"`
Timeout time.Duration `protobuf:"bytes,5,opt,name=timeout,proto3,stdduration" json:"timeout"`
Query string `protobuf:"bytes,6,opt,name=query,proto3" json:"query,omitempty"`
Path string `protobuf:"bytes,1,opt,name=path,proto3" json:"path,omitempty"`
Start int64 `protobuf:"varint,2,opt,name=start,proto3" json:"start,omitempty"`
End int64 `protobuf:"varint,3,opt,name=end,proto3" json:"end,omitempty"`
Step int64 `protobuf:"varint,4,opt,name=step,proto3" json:"step,omitempty"`
Timeout time.Duration `protobuf:"bytes,5,opt,name=timeout,proto3,stdduration" json:"timeout"`
Query string `protobuf:"bytes,6,opt,name=query,proto3" json:"query,omitempty"`
CachingOptions CachingOptions `protobuf:"bytes,7,opt,name=cachingOptions,proto3" json:"cachingOptions"`
}
func (m *PrometheusRequest) Reset() { *m = PrometheusRequest{} }
@ -115,6 +116,13 @@ func (m *PrometheusRequest) GetQuery() string {
return ""
}
func (m *PrometheusRequest) GetCachingOptions() CachingOptions {
if m != nil {
return m.CachingOptions
}
return CachingOptions{}
}
type PrometheusResponseHeader struct {
Name string `protobuf:"bytes,1,opt,name=Name,json=name,proto3" json:"-"`
Values []string `protobuf:"bytes,2,rep,name=Values,json=values,proto3" json:"-"`
@ -455,6 +463,49 @@ func (m *Extent) GetResponse() *types.Any {
return nil
}
type CachingOptions struct {
Disabled bool `protobuf:"varint,1,opt,name=disabled,proto3" json:"disabled,omitempty"`
}
func (m *CachingOptions) Reset() { *m = CachingOptions{} }
func (*CachingOptions) ProtoMessage() {}
func (*CachingOptions) Descriptor() ([]byte, []int) {
return fileDescriptor_79b02382e213d0b2, []int{7}
}
func (m *CachingOptions) XXX_Unmarshal(b []byte) error {
return m.Unmarshal(b)
}
func (m *CachingOptions) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
if deterministic {
return xxx_messageInfo_CachingOptions.Marshal(b, m, deterministic)
} else {
b = b[:cap(b)]
n, err := m.MarshalToSizedBuffer(b)
if err != nil {
return nil, err
}
return b[:n], nil
}
}
func (m *CachingOptions) XXX_Merge(src proto.Message) {
xxx_messageInfo_CachingOptions.Merge(m, src)
}
func (m *CachingOptions) XXX_Size() int {
return m.Size()
}
func (m *CachingOptions) XXX_DiscardUnknown() {
xxx_messageInfo_CachingOptions.DiscardUnknown(m)
}
var xxx_messageInfo_CachingOptions proto.InternalMessageInfo
func (m *CachingOptions) GetDisabled() bool {
if m != nil {
return m.Disabled
}
return false
}
func init() {
proto.RegisterType((*PrometheusRequest)(nil), "queryrange.PrometheusRequest")
proto.RegisterType((*PrometheusResponseHeader)(nil), "queryrange.PrometheusResponseHeader")
@ -463,62 +514,66 @@ func init() {
proto.RegisterType((*SampleStream)(nil), "queryrange.SampleStream")
proto.RegisterType((*CachedResponse)(nil), "queryrange.CachedResponse")
proto.RegisterType((*Extent)(nil), "queryrange.Extent")
proto.RegisterType((*CachingOptions)(nil), "queryrange.CachingOptions")
}
func init() { proto.RegisterFile("queryrange.proto", fileDescriptor_79b02382e213d0b2) }
var fileDescriptor_79b02382e213d0b2 = []byte{
// 792 bytes of a gzipped FileDescriptorProto
0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x9c, 0x54, 0x4f, 0x8f, 0xdb, 0x44,
0x14, 0xcf, 0xac, 0x1d, 0x67, 0x33, 0xa9, 0xd2, 0xed, 0xb4, 0x02, 0x67, 0x25, 0xec, 0xc8, 0xe2,
0x10, 0x24, 0xea, 0x48, 0x41, 0x48, 0x5c, 0x40, 0x5b, 0xd3, 0x45, 0x80, 0x10, 0xaa, 0x66, 0x2b,
0x0e, 0x5c, 0xd0, 0x24, 0x7e, 0x38, 0x6e, 0xe3, 0x3f, 0x1d, 0x8f, 0x51, 0x73, 0x40, 0x42, 0x7c,
0x02, 0x8e, 0x7c, 0x04, 0x90, 0xf8, 0x00, 0x7c, 0x03, 0x7a, 0xdc, 0x63, 0xc5, 0xc1, 0xb0, 0xd9,
0x0b, 0xf2, 0xa9, 0x1f, 0x01, 0x79, 0x66, 0x9c, 0xb8, 0x94, 0x53, 0x2f, 0x9b, 0xf7, 0x7e, 0xef,
0xdf, 0xef, 0xfd, 0x3c, 0x6f, 0xf1, 0xc9, 0x93, 0x12, 0xf8, 0x96, 0xb3, 0x34, 0x02, 0x3f, 0xe7,
0x99, 0xc8, 0x08, 0x3e, 0x20, 0xa7, 0x77, 0xa3, 0x58, 0xac, 0xcb, 0xa5, 0xbf, 0xca, 0x92, 0x79,
0x94, 0x45, 0xd9, 0x5c, 0xa6, 0x2c, 0xcb, 0x6f, 0xa5, 0x27, 0x1d, 0x69, 0xa9, 0xd2, 0x53, 0x27,
0xca, 0xb2, 0x68, 0x03, 0x87, 0xac, 0xb0, 0xe4, 0x4c, 0xc4, 0x59, 0xaa, 0xe3, 0x67, 0x9d, 0x76,
0xab, 0x8c, 0x0b, 0x78, 0x9a, 0xf3, 0xec, 0x11, 0xac, 0x84, 0xf6, 0xe6, 0xf9, 0xe3, 0x68, 0x1e,
0xa7, 0x11, 0x14, 0x02, 0xf8, 0x7c, 0xb5, 0x89, 0x21, 0x6d, 0x43, 0xba, 0xc3, 0xe4, 0xbf, 0x13,
0x58, 0xba, 0x55, 0x21, 0xef, 0x77, 0x84, 0x6f, 0x3d, 0xe0, 0x59, 0x02, 0x62, 0x0d, 0x65, 0x41,
0xe1, 0x49, 0x09, 0x85, 0x20, 0x04, 0x9b, 0x39, 0x13, 0x6b, 0x1b, 0x4d, 0xd1, 0x6c, 0x48, 0xa5,
0x4d, 0xee, 0xe0, 0x7e, 0x21, 0x18, 0x17, 0xf6, 0xd1, 0x14, 0xcd, 0x0c, 0xaa, 0x1c, 0x72, 0x82,
0x0d, 0x48, 0x43, 0xdb, 0x90, 0x58, 0x63, 0x36, 0xb5, 0x85, 0x80, 0xdc, 0x36, 0x25, 0x24, 0x6d,
0xf2, 0x21, 0x1e, 0x88, 0x38, 0x81, 0xac, 0x14, 0x76, 0x7f, 0x8a, 0x66, 0xa3, 0xc5, 0xc4, 0x57,
0x94, 0xfc, 0x96, 0x92, 0x7f, 0x5f, 0x2f, 0x1d, 0x1c, 0x3f, 0xab, 0xdc, 0xde, 0xcf, 0x7f, 0xb9,
0x88, 0xb6, 0x35, 0xcd, 0x68, 0x29, 0xaf, 0x6d, 0x49, 0x3e, 0xca, 0xf1, 0x1e, 0x62, 0xbb, 0xcb,
0xbc, 0xc8, 0xb3, 0xb4, 0x80, 0x4f, 0x81, 0x85, 0xc0, 0xc9, 0x04, 0x9b, 0x5f, 0xb2, 0x04, 0xd4,
0x02, 0x41, 0xbf, 0xae, 0x5c, 0x74, 0x97, 0x9a, 0x29, 0x4b, 0x80, 0xbc, 0x85, 0xad, 0xaf, 0xd8,
0xa6, 0x84, 0xc2, 0x3e, 0x9a, 0x1a, 0x87, 0xa0, 0xf5, 0x9d, 0x04, 0xbd, 0x5f, 0x8f, 0x30, 0x79,
0xb5, 0x2d, 0xf1, 0xb0, 0x75, 0x21, 0x98, 0x28, 0x0b, 0xdd, 0x12, 0xd7, 0x95, 0x6b, 0x15, 0x12,
0xa1, 0xfa, 0x97, 0x7c, 0x82, 0xcd, 0xfb, 0x4c, 0x30, 0x29, 0xd0, 0x68, 0x71, 0xea, 0x77, 0x1e,
0xc9, 0xa1, 0x63, 0x93, 0x11, 0xbc, 0xd1, 0xec, 0x58, 0x57, 0xee, 0x38, 0x64, 0x82, 0xbd, 0x9b,
0x25, 0xb1, 0x80, 0x24, 0x17, 0x5b, 0x6a, 0x36, 0x3e, 0x79, 0x1f, 0x0f, 0xcf, 0x39, 0xcf, 0xf8,
0xc3, 0x6d, 0x0e, 0x52, 0xd9, 0x61, 0xf0, 0x66, 0x5d, 0xb9, 0xb7, 0xa1, 0x05, 0x3b, 0x15, 0xc3,
0x3d, 0x48, 0xde, 0xc1, 0x7d, 0x59, 0x26, 0x95, 0x1f, 0x06, 0xb7, 0xeb, 0xca, 0xbd, 0x29, 0xa3,
0x9d, 0xf4, 0xbe, 0x04, 0xc8, 0x39, 0x1e, 0x28, 0xa1, 0x0a, 0xbb, 0x3f, 0x35, 0x66, 0xa3, 0xc5,
0xdb, 0xff, 0x4f, 0xf6, 0x65, 0x55, 0x5b, 0xa9, 0x06, 0x6b, 0x55, 0xeb, 0xfd, 0x88, 0xf0, 0xf8,
0xe5, 0xcd, 0x88, 0x8f, 0x31, 0x85, 0xa2, 0xdc, 0x08, 0x49, 0x5e, 0x69, 0x35, 0xae, 0x2b, 0x17,
0xf3, 0x3d, 0x4a, 0x3b, 0x36, 0x39, 0xc3, 0x96, 0xca, 0x97, 0x5f, 0x63, 0xb4, 0xb0, 0xbb, 0x44,
0x2e, 0x58, 0x92, 0x6f, 0xe0, 0x42, 0x70, 0x60, 0x49, 0x30, 0xd6, 0x9a, 0x59, 0xaa, 0x9a, 0xea,
0x5f, 0xef, 0x0f, 0x84, 0x6f, 0x74, 0x13, 0xc9, 0xf7, 0xd8, 0xda, 0xb0, 0x25, 0x6c, 0x9a, 0x4f,
0xd5, 0xb4, 0xbc, 0xe5, 0xeb, 0x63, 0xf8, 0xa2, 0x41, 0x1f, 0xb0, 0x98, 0x07, 0xb4, 0xe9, 0xf5,
0x67, 0xe5, 0xbe, 0xce, 0x69, 0xa9, 0x36, 0xf7, 0x42, 0x96, 0x0b, 0xe0, 0x0d, 0x9f, 0x04, 0x04,
0x8f, 0x57, 0x54, 0x0f, 0x25, 0x1f, 0xe0, 0x41, 0x21, 0xe9, 0x14, 0x7a, 0xa5, 0x71, 0x3b, 0x5f,
0xb1, 0x3c, 0x2c, 0xa2, 0x5e, 0x1c, 0x6d, 0xd3, 0xbd, 0x47, 0x78, 0xfc, 0x31, 0x5b, 0xad, 0x21,
0xdc, 0xbf, 0xba, 0x09, 0x36, 0x1e, 0xc3, 0x56, 0xcb, 0x38, 0xa8, 0x2b, 0xb7, 0x71, 0x69, 0xf3,
0xa7, 0x39, 0x29, 0x78, 0x2a, 0x20, 0x15, 0xed, 0x18, 0xd2, 0x55, 0xee, 0x5c, 0x86, 0x82, 0x9b,
0x7a, 0x54, 0x9b, 0x4a, 0x5b, 0xc3, 0xfb, 0x0d, 0x61, 0x4b, 0x25, 0x11, 0xb7, 0x3d, 0xec, 0x66,
0x8c, 0x11, 0x0c, 0xeb, 0xca, 0x55, 0x40, 0x7b, 0xe3, 0x13, 0x75, 0xe3, 0xf2, 0xee, 0x15, 0x0b,
0x48, 0x43, 0x75, 0xec, 0x53, 0x7c, 0x2c, 0x38, 0x5b, 0xc1, 0x37, 0x71, 0xa8, 0x9f, 0x5d, 0xfb,
0x46, 0x24, 0xfc, 0x59, 0x48, 0x3e, 0xc2, 0xc7, 0x5c, 0xaf, 0xa3, 0x6f, 0xff, 0xce, 0x2b, 0xb7,
0x7f, 0x2f, 0xdd, 0x06, 0x37, 0xea, 0xca, 0xdd, 0x67, 0xd2, 0xbd, 0xf5, 0xb9, 0x79, 0x6c, 0x9c,
0x98, 0xc1, 0xd9, 0xe5, 0x95, 0xd3, 0x7b, 0x7e, 0xe5, 0xf4, 0x5e, 0x5c, 0x39, 0xe8, 0x87, 0x9d,
0x83, 0x7e, 0xd9, 0x39, 0xe8, 0xd9, 0xce, 0x41, 0x97, 0x3b, 0x07, 0xfd, 0xbd, 0x73, 0xd0, 0x3f,
0x3b, 0xa7, 0xf7, 0x62, 0xe7, 0xa0, 0x9f, 0xae, 0x9d, 0xde, 0xe5, 0xb5, 0xd3, 0x7b, 0x7e, 0xed,
0xf4, 0xbe, 0xee, 0xfc, 0x53, 0x5e, 0x5a, 0x72, 0xda, 0x7b, 0xff, 0x06, 0x00, 0x00, 0xff, 0xff,
0xa8, 0x2f, 0xc9, 0x09, 0xbb, 0x05, 0x00, 0x00,
// 838 bytes of a gzipped FileDescriptorProto
0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x9c, 0x55, 0x4f, 0x8f, 0xdb, 0x44,
0x14, 0x8f, 0x37, 0x8e, 0x93, 0xbc, 0xad, 0xd2, 0xed, 0xb4, 0x02, 0x67, 0x25, 0xec, 0xc8, 0xe2,
0xb0, 0x48, 0x6d, 0x22, 0x2d, 0x42, 0xe2, 0x02, 0xda, 0x9a, 0x2e, 0x2a, 0x08, 0x41, 0x35, 0x5b,
0x71, 0xe0, 0x82, 0x26, 0xf1, 0xc3, 0x71, 0x1b, 0xff, 0xe9, 0x78, 0x8c, 0x9a, 0x03, 0x12, 0xea,
0x27, 0xe0, 0xc8, 0x47, 0x00, 0x89, 0xef, 0x41, 0x8f, 0x7b, 0xac, 0x38, 0x18, 0x36, 0x7b, 0x41,
0x3e, 0xf5, 0x23, 0xa0, 0x99, 0xb1, 0x13, 0xef, 0x96, 0x53, 0x2f, 0xf1, 0xfb, 0xff, 0x7e, 0xef,
0x37, 0x33, 0x2f, 0x70, 0xf0, 0xac, 0x40, 0xbe, 0xe6, 0x2c, 0x09, 0x71, 0x9a, 0xf1, 0x54, 0xa4,
0x04, 0x76, 0x96, 0xc3, 0x7b, 0x61, 0x24, 0x96, 0xc5, 0x7c, 0xba, 0x48, 0xe3, 0x59, 0x98, 0x86,
0xe9, 0x4c, 0x85, 0xcc, 0x8b, 0x1f, 0x94, 0xa6, 0x14, 0x25, 0xe9, 0xd4, 0x43, 0x27, 0x4c, 0xd3,
0x70, 0x85, 0xbb, 0xa8, 0xa0, 0xe0, 0x4c, 0x44, 0x69, 0x52, 0xfb, 0x4f, 0x5a, 0xe5, 0x16, 0x29,
0x17, 0xf8, 0x3c, 0xe3, 0xe9, 0x13, 0x5c, 0x88, 0x5a, 0x9b, 0x65, 0x4f, 0xc3, 0x59, 0x94, 0x84,
0x98, 0x0b, 0xe4, 0xb3, 0xc5, 0x2a, 0xc2, 0xa4, 0x71, 0xd5, 0x15, 0xc6, 0xd7, 0x3b, 0xb0, 0x64,
0xad, 0x5d, 0xde, 0x8b, 0x3d, 0xb8, 0xf5, 0x88, 0xa7, 0x31, 0x8a, 0x25, 0x16, 0x39, 0xc5, 0x67,
0x05, 0xe6, 0x82, 0x10, 0x30, 0x33, 0x26, 0x96, 0xb6, 0x31, 0x31, 0x8e, 0x86, 0x54, 0xc9, 0xe4,
0x0e, 0xf4, 0x72, 0xc1, 0xb8, 0xb0, 0xf7, 0x26, 0xc6, 0x51, 0x97, 0x6a, 0x85, 0x1c, 0x40, 0x17,
0x93, 0xc0, 0xee, 0x2a, 0x9b, 0x14, 0x65, 0x6e, 0x2e, 0x30, 0xb3, 0x4d, 0x65, 0x52, 0x32, 0xf9,
0x04, 0xfa, 0x22, 0x8a, 0x31, 0x2d, 0x84, 0xdd, 0x9b, 0x18, 0x47, 0xfb, 0xc7, 0xe3, 0xa9, 0x86,
0x34, 0x6d, 0x20, 0x4d, 0x1f, 0xd4, 0x43, 0xfb, 0x83, 0x97, 0xa5, 0xdb, 0xf9, 0xf5, 0x6f, 0xd7,
0xa0, 0x4d, 0x8e, 0x6c, 0xad, 0xe8, 0xb5, 0x2d, 0x85, 0x47, 0x2b, 0xe4, 0x21, 0x8c, 0x16, 0x6c,
0xb1, 0x8c, 0x92, 0xf0, 0x9b, 0x4c, 0x66, 0xe6, 0x76, 0x5f, 0xd5, 0x3e, 0x9c, 0xb6, 0x4e, 0xe7,
0xb3, 0x2b, 0x11, 0xbe, 0x29, 0x8b, 0xd3, 0x6b, 0x79, 0xde, 0x63, 0xb0, 0xdb, 0x1c, 0xe4, 0x59,
0x9a, 0xe4, 0xf8, 0x10, 0x59, 0x80, 0x9c, 0x8c, 0xc1, 0xfc, 0x9a, 0xc5, 0xa8, 0xa9, 0xf0, 0x7b,
0x55, 0xe9, 0x1a, 0xf7, 0xa8, 0x99, 0xb0, 0x18, 0xc9, 0x7b, 0x60, 0x7d, 0xcb, 0x56, 0x05, 0xe6,
0xf6, 0xde, 0xa4, 0xbb, 0x73, 0x5a, 0x3f, 0x2a, 0xa3, 0xf7, 0xfb, 0x1e, 0x90, 0x37, 0xcb, 0x12,
0x0f, 0xac, 0x33, 0xc1, 0x44, 0x91, 0xd7, 0x25, 0xa1, 0x2a, 0x5d, 0x2b, 0x57, 0x16, 0x5a, 0x7f,
0xc9, 0xe7, 0x60, 0x3e, 0x60, 0x82, 0x29, 0xaa, 0xaf, 0x0d, 0xb4, 0xab, 0x28, 0x23, 0xfc, 0x77,
0xe4, 0x40, 0x55, 0xe9, 0x8e, 0x02, 0x26, 0xd8, 0xdd, 0x34, 0x8e, 0x04, 0xc6, 0x99, 0x58, 0x53,
0x53, 0xea, 0xe4, 0x23, 0x18, 0x9e, 0x72, 0x9e, 0xf2, 0xc7, 0xeb, 0x0c, 0xd5, 0x19, 0x0d, 0xfd,
0x77, 0xab, 0xd2, 0xbd, 0x8d, 0x8d, 0xb1, 0x95, 0x31, 0xdc, 0x1a, 0xc9, 0x07, 0xd0, 0x53, 0x69,
0xea, 0x0c, 0x87, 0xfe, 0xed, 0xaa, 0x74, 0x6f, 0x2a, 0x6f, 0x2b, 0xbc, 0xa7, 0x0c, 0xe4, 0x14,
0xfa, 0x9a, 0xa8, 0xdc, 0xee, 0x4d, 0xba, 0x47, 0xfb, 0xc7, 0xef, 0xff, 0x3f, 0xd8, 0xab, 0xac,
0x36, 0x54, 0xf5, 0x97, 0x3a, 0xd7, 0x7b, 0x61, 0xc0, 0xe8, 0xea, 0x64, 0x64, 0x0a, 0x40, 0x31,
0x2f, 0x56, 0x42, 0x81, 0xd7, 0x5c, 0x8d, 0xaa, 0xd2, 0x05, 0xbe, 0xb5, 0xd2, 0x96, 0x4c, 0x4e,
0xc0, 0xd2, 0xf1, 0xea, 0x34, 0xf6, 0x8f, 0xed, 0x36, 0x90, 0x33, 0x16, 0x67, 0x2b, 0x3c, 0x13,
0x1c, 0x59, 0xec, 0x8f, 0x6a, 0xce, 0x2c, 0x9d, 0x4d, 0xeb, 0xaf, 0xf7, 0xa7, 0x01, 0x37, 0xda,
0x81, 0xe4, 0x27, 0xb0, 0x56, 0x6c, 0x8e, 0x2b, 0x79, 0x54, 0xb2, 0xe4, 0xad, 0x69, 0xfd, 0xac,
0xbe, 0x92, 0xd6, 0x47, 0x2c, 0xe2, 0x3e, 0x95, 0xb5, 0xfe, 0x2a, 0xdd, 0xb7, 0x79, 0xa4, 0xba,
0xcc, 0xfd, 0x80, 0x65, 0x02, 0xb9, 0xc4, 0x13, 0xa3, 0xe0, 0xd1, 0x82, 0xd6, 0x4d, 0xc9, 0xc7,
0xd0, 0xcf, 0x15, 0x9c, 0xbc, 0x1e, 0x69, 0xd4, 0xf4, 0xd7, 0x28, 0x77, 0x83, 0xe8, 0x1b, 0x47,
0x9b, 0x70, 0xef, 0x09, 0x8c, 0xe4, 0xc5, 0xc7, 0x60, 0x7b, 0xeb, 0xc6, 0xd0, 0x7d, 0x8a, 0xeb,
0x9a, 0xc6, 0x7e, 0x55, 0xba, 0x52, 0xa5, 0xf2, 0x47, 0x3e, 0x4e, 0x7c, 0x2e, 0x30, 0x11, 0x4d,
0x1b, 0xd2, 0x66, 0xee, 0x54, 0xb9, 0xfc, 0x9b, 0x75, 0xab, 0x26, 0x94, 0x36, 0x82, 0xf7, 0x87,
0x01, 0x96, 0x0e, 0x22, 0x6e, 0xb3, 0x22, 0x64, 0x9b, 0xae, 0x3f, 0xac, 0x4a, 0x57, 0x1b, 0x9a,
0x6d, 0x31, 0xd6, 0xdb, 0x42, 0x6d, 0x10, 0x8d, 0x02, 0x93, 0x40, 0xaf, 0x8d, 0x09, 0x0c, 0x04,
0x67, 0x0b, 0xfc, 0x3e, 0x0a, 0xea, 0x6b, 0xd7, 0xdc, 0x11, 0x65, 0xfe, 0x22, 0x20, 0x9f, 0xc2,
0x80, 0xd7, 0xe3, 0xd4, 0x5b, 0xe4, 0xce, 0x1b, 0x5b, 0xe4, 0x7e, 0xb2, 0xf6, 0x6f, 0x54, 0xa5,
0xbb, 0x8d, 0xa4, 0x5b, 0xe9, 0x4b, 0x73, 0xd0, 0x3d, 0x30, 0xbd, 0xbb, 0x9a, 0x9a, 0xdd, 0xeb,
0x27, 0x87, 0x30, 0x08, 0xa2, 0x9c, 0xcd, 0x57, 0x18, 0x28, 0xe0, 0x03, 0xba, 0xd5, 0xfd, 0x93,
0xf3, 0x0b, 0xa7, 0xf3, 0xea, 0xc2, 0xe9, 0xbc, 0xbe, 0x70, 0x8c, 0x9f, 0x37, 0x8e, 0xf1, 0xdb,
0xc6, 0x31, 0x5e, 0x6e, 0x1c, 0xe3, 0x7c, 0xe3, 0x18, 0xff, 0x6c, 0x1c, 0xe3, 0xdf, 0x8d, 0xd3,
0x79, 0xbd, 0x71, 0x8c, 0x5f, 0x2e, 0x9d, 0xce, 0xf9, 0xa5, 0xd3, 0x79, 0x75, 0xe9, 0x74, 0xbe,
0x6b, 0xfd, 0x19, 0xcc, 0x2d, 0x85, 0xed, 0xc3, 0xff, 0x02, 0x00, 0x00, 0xff, 0xff, 0x24, 0x32,
0xd7, 0x62, 0x33, 0x06, 0x00, 0x00,
}
func (this *PrometheusRequest) Equal(that interface{}) bool {
@ -558,6 +613,9 @@ func (this *PrometheusRequest) Equal(that interface{}) bool {
if this.Query != that1.Query {
return false
}
if !this.CachingOptions.Equal(&that1.CachingOptions) {
return false
}
return true
}
func (this *PrometheusResponseHeader) Equal(that interface{}) bool {
@ -767,11 +825,35 @@ func (this *Extent) Equal(that interface{}) bool {
}
return true
}
func (this *CachingOptions) Equal(that interface{}) bool {
if that == nil {
return this == nil
}
that1, ok := that.(*CachingOptions)
if !ok {
that2, ok := that.(CachingOptions)
if ok {
that1 = &that2
} else {
return false
}
}
if that1 == nil {
return this == nil
} else if this == nil {
return false
}
if this.Disabled != that1.Disabled {
return false
}
return true
}
func (this *PrometheusRequest) GoString() string {
if this == nil {
return "nil"
}
s := make([]string, 0, 10)
s := make([]string, 0, 11)
s = append(s, "&queryrange.PrometheusRequest{")
s = append(s, "Path: "+fmt.Sprintf("%#v", this.Path)+",\n")
s = append(s, "Start: "+fmt.Sprintf("%#v", this.Start)+",\n")
@ -779,6 +861,7 @@ func (this *PrometheusRequest) GoString() string {
s = append(s, "Step: "+fmt.Sprintf("%#v", this.Step)+",\n")
s = append(s, "Timeout: "+fmt.Sprintf("%#v", this.Timeout)+",\n")
s = append(s, "Query: "+fmt.Sprintf("%#v", this.Query)+",\n")
s = append(s, "CachingOptions: "+strings.Replace(this.CachingOptions.GoString(), `&`, ``, 1)+",\n")
s = append(s, "}")
return strings.Join(s, "")
}
@ -875,6 +958,16 @@ func (this *Extent) GoString() string {
s = append(s, "}")
return strings.Join(s, "")
}
func (this *CachingOptions) GoString() string {
if this == nil {
return "nil"
}
s := make([]string, 0, 5)
s = append(s, "&queryrange.CachingOptions{")
s = append(s, "Disabled: "+fmt.Sprintf("%#v", this.Disabled)+",\n")
s = append(s, "}")
return strings.Join(s, "")
}
func valueToGoStringQueryrange(v interface{}, typ string) string {
rv := reflect.ValueOf(v)
if rv.IsNil() {
@ -903,6 +996,16 @@ func (m *PrometheusRequest) MarshalToSizedBuffer(dAtA []byte) (int, error) {
_ = i
var l int
_ = l
{
size, err := m.CachingOptions.MarshalToSizedBuffer(dAtA[:i])
if err != nil {
return 0, err
}
i -= size
i = encodeVarintQueryrange(dAtA, i, uint64(size))
}
i--
dAtA[i] = 0x3a
if len(m.Query) > 0 {
i -= len(m.Query)
copy(dAtA[i:], m.Query)
@ -910,12 +1013,12 @@ func (m *PrometheusRequest) MarshalToSizedBuffer(dAtA []byte) (int, error) {
i--
dAtA[i] = 0x32
}
n1, err1 := github_com_gogo_protobuf_types.StdDurationMarshalTo(m.Timeout, dAtA[i-github_com_gogo_protobuf_types.SizeOfStdDuration(m.Timeout):])
if err1 != nil {
return 0, err1
n2, err2 := github_com_gogo_protobuf_types.StdDurationMarshalTo(m.Timeout, dAtA[i-github_com_gogo_protobuf_types.SizeOfStdDuration(m.Timeout):])
if err2 != nil {
return 0, err2
}
i -= n1
i = encodeVarintQueryrange(dAtA, i, uint64(n1))
i -= n2
i = encodeVarintQueryrange(dAtA, i, uint64(n2))
i--
dAtA[i] = 0x2a
if m.Step != 0 {
@ -1241,6 +1344,39 @@ func (m *Extent) MarshalToSizedBuffer(dAtA []byte) (int, error) {
return len(dAtA) - i, nil
}
func (m *CachingOptions) Marshal() (dAtA []byte, err error) {
size := m.Size()
dAtA = make([]byte, size)
n, err := m.MarshalToSizedBuffer(dAtA[:size])
if err != nil {
return nil, err
}
return dAtA[:n], nil
}
func (m *CachingOptions) MarshalTo(dAtA []byte) (int, error) {
size := m.Size()
return m.MarshalToSizedBuffer(dAtA[:size])
}
func (m *CachingOptions) MarshalToSizedBuffer(dAtA []byte) (int, error) {
i := len(dAtA)
_ = i
var l int
_ = l
if m.Disabled {
i--
if m.Disabled {
dAtA[i] = 1
} else {
dAtA[i] = 0
}
i--
dAtA[i] = 0x8
}
return len(dAtA) - i, nil
}
func encodeVarintQueryrange(dAtA []byte, offset int, v uint64) int {
offset -= sovQueryrange(v)
base := offset
@ -1277,6 +1413,8 @@ func (m *PrometheusRequest) Size() (n int) {
if l > 0 {
n += 1 + l + sovQueryrange(uint64(l))
}
l = m.CachingOptions.Size()
n += 1 + l + sovQueryrange(uint64(l))
return n
}
@ -1410,6 +1548,18 @@ func (m *Extent) Size() (n int) {
return n
}
func (m *CachingOptions) Size() (n int) {
if m == nil {
return 0
}
var l int
_ = l
if m.Disabled {
n += 2
}
return n
}
func sovQueryrange(x uint64) (n int) {
return (math_bits.Len64(x|1) + 6) / 7
}
@ -1427,6 +1577,7 @@ func (this *PrometheusRequest) String() string {
`Step:` + fmt.Sprintf("%v", this.Step) + `,`,
`Timeout:` + strings.Replace(strings.Replace(fmt.Sprintf("%v", this.Timeout), "Duration", "duration.Duration", 1), `&`, ``, 1) + `,`,
`Query:` + fmt.Sprintf("%v", this.Query) + `,`,
`CachingOptions:` + strings.Replace(strings.Replace(this.CachingOptions.String(), "CachingOptions", "CachingOptions", 1), `&`, ``, 1) + `,`,
`}`,
}, "")
return s
@ -1522,6 +1673,16 @@ func (this *Extent) String() string {
}, "")
return s
}
func (this *CachingOptions) String() string {
if this == nil {
return "nil"
}
s := strings.Join([]string{`&CachingOptions{`,
`Disabled:` + fmt.Sprintf("%v", this.Disabled) + `,`,
`}`,
}, "")
return s
}
func valueToStringQueryrange(v interface{}) string {
rv := reflect.ValueOf(v)
if rv.IsNil() {
@ -1713,6 +1874,39 @@ func (m *PrometheusRequest) Unmarshal(dAtA []byte) error {
}
m.Query = string(dAtA[iNdEx:postIndex])
iNdEx = postIndex
case 7:
if wireType != 2 {
return fmt.Errorf("proto: wrong wireType = %d for field CachingOptions", wireType)
}
var msglen int
for shift := uint(0); ; shift += 7 {
if shift >= 64 {
return ErrIntOverflowQueryrange
}
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := dAtA[iNdEx]
iNdEx++
msglen |= int(b&0x7F) << shift
if b < 0x80 {
break
}
}
if msglen < 0 {
return ErrInvalidLengthQueryrange
}
postIndex := iNdEx + msglen
if postIndex < 0 {
return ErrInvalidLengthQueryrange
}
if postIndex > l {
return io.ErrUnexpectedEOF
}
if err := m.CachingOptions.Unmarshal(dAtA[iNdEx:postIndex]); err != nil {
return err
}
iNdEx = postIndex
default:
iNdEx = preIndex
skippy, err := skipQueryrange(dAtA[iNdEx:])
@ -2588,6 +2782,79 @@ func (m *Extent) Unmarshal(dAtA []byte) error {
}
return nil
}
func (m *CachingOptions) Unmarshal(dAtA []byte) error {
l := len(dAtA)
iNdEx := 0
for iNdEx < l {
preIndex := iNdEx
var wire uint64
for shift := uint(0); ; shift += 7 {
if shift >= 64 {
return ErrIntOverflowQueryrange
}
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := dAtA[iNdEx]
iNdEx++
wire |= uint64(b&0x7F) << shift
if b < 0x80 {
break
}
}
fieldNum := int32(wire >> 3)
wireType := int(wire & 0x7)
if wireType == 4 {
return fmt.Errorf("proto: CachingOptions: wiretype end group for non-group")
}
if fieldNum <= 0 {
return fmt.Errorf("proto: CachingOptions: illegal tag %d (wire type %d)", fieldNum, wire)
}
switch fieldNum {
case 1:
if wireType != 0 {
return fmt.Errorf("proto: wrong wireType = %d for field Disabled", wireType)
}
var v int
for shift := uint(0); ; shift += 7 {
if shift >= 64 {
return ErrIntOverflowQueryrange
}
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := dAtA[iNdEx]
iNdEx++
v |= int(b&0x7F) << shift
if b < 0x80 {
break
}
}
m.Disabled = bool(v != 0)
default:
iNdEx = preIndex
skippy, err := skipQueryrange(dAtA[iNdEx:])
if err != nil {
return err
}
if skippy < 0 {
return ErrInvalidLengthQueryrange
}
if (iNdEx + skippy) < 0 {
return ErrInvalidLengthQueryrange
}
if (iNdEx + skippy) > l {
return io.ErrUnexpectedEOF
}
iNdEx += skippy
}
}
if iNdEx > l {
return io.ErrUnexpectedEOF
}
return nil
}
func skipQueryrange(dAtA []byte) (n int, err error) {
l := len(dAtA)
iNdEx := 0

@ -19,6 +19,7 @@ message PrometheusRequest {
int64 step = 4;
google.protobuf.Duration timeout = 5 [(gogoproto.stdduration) = true, (gogoproto.nullable) = false];
string query = 6;
CachingOptions cachingOptions = 7 [(gogoproto.nullable) = false];
}
message PrometheusResponseHeader {
@ -59,3 +60,7 @@ message Extent {
string trace_id = 4 [(gogoproto.jsontag) = "-"];
google.protobuf.Any response = 5 [(gogoproto.jsontag) = "response"];
}
message CachingOptions {
bool disabled = 1;
}

@ -27,8 +27,8 @@ import (
)
var (
// Value that cachecontrolHeader has if the response indicates that the results should not be cached.
noCacheValue = "no-store"
// Value that cacheControlHeader has if the response indicates that the results should not be cached.
noStoreValue = "no-store"
// ResultsCacheGenNumberHeaderName holds name of the header we want to set in http response
ResultsCacheGenNumberHeaderName = "Results-Cache-Gen-Number"
@ -102,6 +102,10 @@ func (t constSplitter) GenerateCacheKey(userID string, r Request) string {
return fmt.Sprintf("%s:%s:%d:%d", userID, r.GetQuery(), r.GetStep(), currentInterval)
}
// ShouldCacheFn checks whether the current request should go to cache
// or not. If not, just send the request to next handler.
type ShouldCacheFn func(r Request) bool
type resultsCache struct {
logger log.Logger
cfg ResultsCacheConfig
@ -113,6 +117,7 @@ type resultsCache struct {
extractor Extractor
merger Merger
cacheGenNumberLoader CacheGenNumberLoader
shouldCache ShouldCacheFn
}
// NewResultsCacheMiddleware creates results cache middleware from config.
@ -129,6 +134,7 @@ func NewResultsCacheMiddleware(
merger Merger,
extractor Extractor,
cacheGenNumberLoader CacheGenNumberLoader,
shouldCache ShouldCacheFn,
reg prometheus.Registerer,
) (Middleware, cache.Cache, error) {
c, err := cache.New(cfg.CacheConfig, reg, logger)
@ -151,6 +157,7 @@ func NewResultsCacheMiddleware(
extractor: extractor,
splitter: splitter,
cacheGenNumberLoader: cacheGenNumberLoader,
shouldCache: shouldCache,
}
}), c, nil
}
@ -161,6 +168,10 @@ func (s resultsCache) Do(ctx context.Context, r Request) (Response, error) {
return nil, httpgrpc.Errorf(http.StatusBadRequest, err.Error())
}
if s.shouldCache != nil && !s.shouldCache(r) {
return s.next.Do(ctx, r)
}
if s.cacheGenNumberLoader != nil {
ctx = cache.InjectCacheGenNumber(ctx, s.cacheGenNumberLoader.GetResultsCacheGenNumber(userID))
}
@ -201,10 +212,10 @@ func (s resultsCache) Do(ctx context.Context, r Request) (Response, error) {
// shouldCacheResponse says whether the response should be cached or not.
func (s resultsCache) shouldCacheResponse(ctx context.Context, r Response) bool {
headerValues := getHeaderValuesWithName(r, cachecontrolHeader)
headerValues := getHeaderValuesWithName(r, cacheControlHeader)
for _, v := range headerValues {
if v == noCacheValue {
level.Debug(s.logger).Log("msg", fmt.Sprintf("%s header in response is equal to %s, not caching the response", cachecontrolHeader, noCacheValue))
if v == noStoreValue {
level.Debug(s.logger).Log("msg", fmt.Sprintf("%s header in response is equal to %s, not caching the response", cacheControlHeader, noStoreValue))
return false
}
}

@ -154,12 +154,16 @@ func NewTripperware(
queryRangeMiddleware = append(queryRangeMiddleware, InstrumentMiddleware("step_align", metrics), StepAlignMiddleware)
}
if cfg.SplitQueriesByInterval != 0 {
queryRangeMiddleware = append(queryRangeMiddleware, InstrumentMiddleware("split_by_interval", metrics), SplitByIntervalMiddleware(cfg.SplitQueriesByInterval, limits, codec, registerer))
staticIntervalFn := func(_ Request) time.Duration { return cfg.SplitQueriesByInterval }
queryRangeMiddleware = append(queryRangeMiddleware, InstrumentMiddleware("split_by_interval", metrics), SplitByIntervalMiddleware(staticIntervalFn, limits, codec, registerer))
}
var c cache.Cache
if cfg.CacheResults {
queryCacheMiddleware, cache, err := NewResultsCacheMiddleware(log, cfg.ResultsCacheConfig, constSplitter(cfg.SplitQueriesByInterval), limits, codec, cacheExtractor, cacheGenNumberLoader, registerer)
shouldCache := func(r Request) bool {
return !r.GetCachingOptions().Disabled
}
queryCacheMiddleware, cache, err := NewResultsCacheMiddleware(log, cfg.ResultsCacheConfig, constSplitter(cfg.SplitQueriesByInterval), limits, codec, cacheExtractor, cacheGenNumberLoader, shouldCache, registerer)
if err != nil {
return nil, nil, err
}

@ -8,8 +8,10 @@ import (
"github.com/prometheus/client_golang/prometheus/promauto"
)
type IntervalFn func(r Request) time.Duration
// SplitByIntervalMiddleware creates a new Middleware that splits requests by a given interval.
func SplitByIntervalMiddleware(interval time.Duration, limits Limits, merger Merger, registerer prometheus.Registerer) Middleware {
func SplitByIntervalMiddleware(interval IntervalFn, limits Limits, merger Merger, registerer prometheus.Registerer) Middleware {
return MiddlewareFunc(func(next Handler) Handler {
return splitByInterval{
next: next,
@ -29,7 +31,7 @@ type splitByInterval struct {
next Handler
limits Limits
merger Merger
interval time.Duration
interval IntervalFn
// Metrics.
splitByCounter prometheus.Counter
@ -38,7 +40,7 @@ type splitByInterval struct {
func (s splitByInterval) Do(ctx context.Context, r Request) (Response, error) {
// First we're going to build new requests, one for each day, taking care
// to line up the boundaries with step.
reqs := splitQuery(r, s.interval)
reqs := splitQuery(r, s.interval(r))
s.splitByCounter.Add(float64(len(reqs)))
reqResps, err := DoRequests(ctx, s.next, reqs, s.limits)

@ -381,11 +381,18 @@ func (l *BasicLifecycler) updateInstance(ctx context.Context, update func(*Desc,
instanceDesc = ringDesc.AddIngester(l.cfg.ID, l.cfg.Addr, l.cfg.Zone, l.GetTokens(), l.GetState())
}
prevTimestamp := instanceDesc.Timestamp
changed := update(ringDesc, &instanceDesc)
if ok && !changed {
return nil, false, nil
}
// Memberlist requires that the timestamp always change, so we do update it unless
// was updated in the callback function.
if instanceDesc.Timestamp == prevTimestamp {
instanceDesc.Timestamp = time.Now().Unix()
}
ringDesc.Ingesters[l.cfg.ID] = instanceDesc
return ringDesc, true, nil
})

@ -2,12 +2,15 @@ package etcd
import (
"context"
"crypto/tls"
"flag"
"fmt"
"time"
"github.com/go-kit/kit/log/level"
"github.com/pkg/errors"
"go.etcd.io/etcd/clientv3"
"go.etcd.io/etcd/pkg/transport"
"github.com/cortexproject/cortex/pkg/ring/kv/codec"
"github.com/cortexproject/cortex/pkg/util"
@ -16,9 +19,14 @@ import (
// Config for a new etcd.Client.
type Config struct {
Endpoints []string `yaml:"endpoints"`
DialTimeout time.Duration `yaml:"dial_timeout"`
MaxRetries int `yaml:"max_retries"`
Endpoints []string `yaml:"endpoints"`
DialTimeout time.Duration `yaml:"dial_timeout"`
MaxRetries int `yaml:"max_retries"`
EnableTLS bool `yaml:"tls_enabled"`
CertFile string `yaml:"tls_cert_path"`
KeyFile string `yaml:"tls_key_path"`
TrustedCAFile string `yaml:"tls_ca_path"`
InsecureSkipVerify bool `yaml:"tls_insecure_skip_verify"`
}
// Client implements ring.KVClient for etcd.
@ -34,10 +42,33 @@ func (cfg *Config) RegisterFlagsWithPrefix(f *flag.FlagSet, prefix string) {
f.Var((*flagext.StringSlice)(&cfg.Endpoints), prefix+"etcd.endpoints", "The etcd endpoints to connect to.")
f.DurationVar(&cfg.DialTimeout, prefix+"etcd.dial-timeout", 10*time.Second, "The dial timeout for the etcd connection.")
f.IntVar(&cfg.MaxRetries, prefix+"etcd.max-retries", 10, "The maximum number of retries to do for failed ops.")
f.BoolVar(&cfg.EnableTLS, prefix+"etcd.tls-enabled", false, "Enable TLS.")
f.StringVar(&cfg.CertFile, prefix+"etcd.tls-cert-path", "", "The TLS certificate file path.")
f.StringVar(&cfg.KeyFile, prefix+"etcd.tls-key-path", "", "The TLS private key file path.")
f.StringVar(&cfg.TrustedCAFile, prefix+"etcd.tls-ca-path", "", "The trusted CA file path.")
f.BoolVar(&cfg.InsecureSkipVerify, prefix+"etcd.tls-insecure-skip-verify", false, "Skip validating server certificate.")
}
// GetTLS sets the TLS config field with certs
func (cfg *Config) GetTLS() (*tls.Config, error) {
if !cfg.EnableTLS {
return nil, nil
}
tlsInfo := &transport.TLSInfo{
CertFile: cfg.CertFile,
KeyFile: cfg.KeyFile,
TrustedCAFile: cfg.TrustedCAFile,
InsecureSkipVerify: cfg.InsecureSkipVerify,
}
return tlsInfo.ClientConfig()
}
// New makes a new Client.
func New(cfg Config, codec codec.Codec) (*Client, error) {
tlsConfig, err := cfg.GetTLS()
if err != nil {
return nil, errors.Wrapf(err, "unable to initialise TLS configuration for etcd")
}
cli, err := clientv3.New(clientv3.Config{
Endpoints: cfg.Endpoints,
DialTimeout: cfg.DialTimeout,
@ -58,6 +89,7 @@ func New(cfg Config, codec codec.Codec) (*Client, error) {
DialKeepAliveTime: 10 * time.Second,
DialKeepAliveTimeout: 2 * cfg.DialTimeout,
PermitWithoutStream: true,
TLS: tlsConfig,
})
if err != nil {
return nil, err

@ -227,9 +227,10 @@ func (i *Lifecycler) CheckReady(ctx context.Context) error {
}
if err := ringDesc.Ready(time.Now(), i.cfg.RingConfig.HeartbeatTimeout); err != nil {
level.Warn(util.Logger).Log("msg", "found an existing ingester(s) with a problem in the ring, "+
"this ingester cannot complete joining and become ready until this problem is resolved. "+
"The /ring http endpoint on the distributor (or single binary) provides visibility into the ring.", "err", err)
level.Warn(util.Logger).Log("msg", "found an existing instance(s) with a problem in the ring, "+
"this instance cannot complete joining and become ready until this problem is resolved. "+
"The /ring http endpoint on the distributor (or single binary) provides visibility into the ring.",
"ring", i.RingName, "err", err)
return err
}

@ -102,15 +102,15 @@ func (d *Desc) Ready(now time.Time, heartbeatTimeout time.Duration) error {
numTokens := 0
for id, ingester := range d.Ingesters {
if now.Sub(time.Unix(ingester.Timestamp, 0)) > heartbeatTimeout {
return fmt.Errorf("ingester %s past heartbeat timeout", id)
return fmt.Errorf("instance %s past heartbeat timeout", id)
} else if ingester.State != ACTIVE {
return fmt.Errorf("ingester %s in state %v", id, ingester.State)
return fmt.Errorf("instance %s in state %v", id, ingester.State)
}
numTokens += len(ingester.Tokens)
}
if numTokens == 0 {
return fmt.Errorf("Not ready: no tokens in ring")
return fmt.Errorf("no tokens in ring")
}
return nil
}

@ -48,6 +48,9 @@ type ReadRing interface {
ReplicationFactor() int
IngesterCount() int
Subring(key uint32, n int) ReadRing
// HasInstance returns whether the ring contains an instance matching the provided instanceID.
HasInstance(instanceID string) bool
}
// Operation can be Read or Write
@ -475,3 +478,13 @@ func (r *Ring) GetInstanceState(instanceID string) (IngesterState, error) {
return instance.GetState(), nil
}
// HasInstance returns whether the ring contains an instance matching the provided instanceID.
func (r *Ring) HasInstance(instanceID string) bool {
r.mtx.RLock()
defer r.mtx.RUnlock()
instances := r.ringDesc.GetIngesters()
_, ok := instances[instanceID]
return ok
}

@ -273,29 +273,6 @@ var (
ErrBadRuleGroup = errors.New("unable to decoded rule group")
)
// ValidateRuleGroup validates a rulegroup
func ValidateRuleGroup(g rulefmt.RuleGroup) []error {
var errs []error
for i, r := range g.Rules {
for _, err := range r.Validate() {
var ruleName string
if r.Alert.Value != "" {
ruleName = r.Alert.Value
} else {
ruleName = r.Record.Value
}
errs = append(errs, &rulefmt.Error{
Group: g.Name,
Rule: i,
RuleName: ruleName,
Err: err,
})
}
}
return errs
}
func marshalAndSend(output interface{}, w http.ResponseWriter, logger log.Logger) {
d, err := yaml.Marshal(&output)
if err != nil {
@ -464,7 +441,7 @@ func (r *Ruler) CreateRuleGroup(w http.ResponseWriter, req *http.Request) {
return
}
errs := ValidateRuleGroup(rg)
errs := r.manager.ValidateRuleGroup(rg)
if len(errs) > 0 {
for _, err := range errs {
level.Error(logger).Log("msg", "unable to validate rule group payload", "err", err.Error())

@ -12,6 +12,7 @@ import (
"github.com/prometheus/client_golang/prometheus/promauto"
"github.com/prometheus/prometheus/config"
"github.com/prometheus/prometheus/notifier"
"github.com/prometheus/prometheus/pkg/rulefmt"
promRules "github.com/prometheus/prometheus/rules"
"github.com/weaveworks/common/user"
"golang.org/x/net/context/ctxhttp"
@ -37,9 +38,12 @@ type DefaultMultiTenantManager struct {
notifiersMtx sync.Mutex
notifiers map[string]*rulerNotifier
managersTotal prometheus.Gauge
registry prometheus.Registerer
logger log.Logger
managersTotal prometheus.Gauge
lastReloadSuccessful *prometheus.GaugeVec
lastReloadSuccessfulTimestamp *prometheus.GaugeVec
configUpdatesTotal *prometheus.CounterVec
registry prometheus.Registerer
logger log.Logger
}
func NewDefaultMultiTenantManager(cfg Config, managerFactory ManagerFactory, reg prometheus.Registerer, logger log.Logger) (*DefaultMultiTenantManager, error) {
@ -66,6 +70,21 @@ func NewDefaultMultiTenantManager(cfg Config, managerFactory ManagerFactory, reg
Name: "ruler_managers_total",
Help: "Total number of managers registered and running in the ruler",
}),
lastReloadSuccessful: promauto.With(reg).NewGaugeVec(prometheus.GaugeOpts{
Namespace: "cortex",
Name: "ruler_config_last_reload_successful",
Help: "Boolean set to 1 whenever the last configuration reload attempt was successful.",
}, []string{"user"}),
lastReloadSuccessfulTimestamp: promauto.With(reg).NewGaugeVec(prometheus.GaugeOpts{
Namespace: "cortex",
Name: "ruler_config_last_reload_successful_seconds",
Help: "Timestamp of the last successful configuration reload.",
}, []string{"user"}),
configUpdatesTotal: promauto.With(reg).NewCounterVec(prometheus.CounterOpts{
Namespace: "cortex",
Name: "ruler_config_updates_total",
Help: "Total number of config updates triggered by a user",
}, []string{"user"}),
registry: reg,
logger: logger,
}, nil
@ -86,6 +105,9 @@ func (r *DefaultMultiTenantManager) SyncRuleGroups(ctx context.Context, ruleGrou
if _, exists := ruleGroups[userID]; !exists {
go mngr.Stop()
delete(r.userManagers, userID)
r.lastReloadSuccessful.DeleteLabelValues(userID)
r.lastReloadSuccessfulTimestamp.DeleteLabelValues(userID)
r.configUpdatesTotal.DeleteLabelValues(userID)
level.Info(r.logger).Log("msg", "deleting rule manager", "user", userID)
}
}
@ -100,18 +122,19 @@ func (r *DefaultMultiTenantManager) syncRulesToManager(ctx context.Context, user
// have been updated
update, files, err := r.mapper.MapRules(user, groups.Formatted())
if err != nil {
r.lastReloadSuccessful.WithLabelValues(user).Set(0)
level.Error(r.logger).Log("msg", "unable to map rule files", "user", user, "err", err)
return
}
if update {
level.Debug(r.logger).Log("msg", "updating rules", "user", "user")
configUpdatesTotal.WithLabelValues(user).Inc()
r.configUpdatesTotal.WithLabelValues(user).Inc()
manager, exists := r.userManagers[user]
if !exists {
manager, err = r.newManager(ctx, user)
if err != nil {
configUpdateFailuresTotal.WithLabelValues(user, "rule-manager-creation-failure").Inc()
r.lastReloadSuccessful.WithLabelValues(user).Set(0)
level.Error(r.logger).Log("msg", "unable to create rule manager", "user", user, "err", err)
return
}
@ -122,10 +145,13 @@ func (r *DefaultMultiTenantManager) syncRulesToManager(ctx context.Context, user
}
err = manager.Update(r.cfg.EvaluationInterval, files, nil)
if err != nil {
configUpdateFailuresTotal.WithLabelValues(user, "rules-update-failure").Inc()
r.lastReloadSuccessful.WithLabelValues(user).Set(0)
level.Error(r.logger).Log("msg", "unable to update rule manager", "user", user, "err", err)
return
}
r.lastReloadSuccessful.WithLabelValues(user).Set(1)
r.lastReloadSuccessfulTimestamp.WithLabelValues(user).SetToCurrentTime()
}
}
@ -221,3 +247,25 @@ func (r *DefaultMultiTenantManager) Stop() {
r.userManagerMtx.Unlock()
level.Info(r.logger).Log("msg", "all user managers stopped")
}
func (*DefaultMultiTenantManager) ValidateRuleGroup(g rulefmt.RuleGroup) []error {
var errs []error
for i, r := range g.Rules {
for _, err := range r.Validate() {
var ruleName string
if r.Alert.Value != "" {
ruleName = r.Alert.Value
} else {
ruleName = r.Record.Value
}
errs = append(errs, &rulefmt.Error{
Group: g.Name,
Rule: i,
RuleName: ruleName,
Err: err,
})
}
}
return errs
}

@ -17,6 +17,7 @@ import (
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
"github.com/prometheus/prometheus/notifier"
"github.com/prometheus/prometheus/pkg/rulefmt"
promRules "github.com/prometheus/prometheus/rules"
"github.com/prometheus/prometheus/util/strutil"
"github.com/weaveworks/common/user"
@ -38,16 +39,6 @@ var (
Name: "ruler_ring_check_errors_total",
Help: "Number of errors that have occurred when checking the ring for ownership",
})
configUpdatesTotal = promauto.NewCounterVec(prometheus.CounterOpts{
Namespace: "cortex",
Name: "ruler_config_updates_total",
Help: "Total number of config updates triggered by a user",
}, []string{"user"})
configUpdateFailuresTotal = promauto.NewCounterVec(prometheus.CounterOpts{
Namespace: "cortex",
Name: "ruler_config_update_failures_total",
Help: "Total number of config update failures triggered by a user",
}, []string{"user", "reason"})
)
// Config is the configuration for the recording rules server.
@ -147,6 +138,8 @@ type MultiTenantManager interface {
GetRules(userID string) []*promRules.Group
// Stop stops all Manager components.
Stop()
// ValidateRuleGroup validates a rulegroup
ValidateRuleGroup(rulefmt.RuleGroup) []error
}
// Ruler evaluates rules.
@ -212,6 +205,10 @@ func NewRuler(cfg Config, manager MultiTenantManager, reg prometheus.Registerer,
if err = enableSharding(ruler, ringStore); err != nil {
return nil, errors.Wrap(err, "setup ruler sharding ring")
}
if reg != nil {
reg.MustRegister(ruler.ring)
}
}
ruler.Service = services.NewBasicService(ruler.starting, ruler.run, ruler.stopping)
@ -230,12 +227,13 @@ func enableSharding(r *Ruler, ringStore kv.Client) error {
delegate = ring.NewLeaveOnStoppingDelegate(delegate, r.logger)
delegate = ring.NewAutoForgetDelegate(r.cfg.Ring.HeartbeatTimeout*ringAutoForgetUnhealthyPeriods, delegate, r.logger)
r.lifecycler, err = ring.NewBasicLifecycler(lifecyclerCfg, ring.RulerRingKey, ring.RulerRingKey, ringStore, delegate, r.logger, r.registry)
rulerRingName := "ruler"
r.lifecycler, err = ring.NewBasicLifecycler(lifecyclerCfg, rulerRingName, ring.RulerRingKey, ringStore, delegate, r.logger, r.registry)
if err != nil {
return errors.Wrap(err, "failed to initialize ruler's lifecycler")
}
r.ring, err = ring.NewWithStoreClientAndStrategy(r.cfg.Ring.ToRingConfig(), ring.RulerRingKey, ring.RulerRingKey, ringStore, &ring.DefaultReplicationStrategy{})
r.ring, err = ring.NewWithStoreClientAndStrategy(r.cfg.Ring.ToRingConfig(), rulerRingName, ring.RulerRingKey, ringStore, &ring.DefaultReplicationStrategy{})
if err != nil {
return errors.Wrap(err, "failed to initialize ruler's ring")
}

@ -15,3 +15,9 @@ func HashBlockID(id ulid.ULID) uint32 {
}
return h
}
// HashTenantID returns a 32-bit hash of the tenant ID useful for
// ring-based sharding.
func HashTenantID(id string) uint32 {
return client.HashAdd32a(client.HashNew32a(), id)
}

@ -41,7 +41,7 @@ type BucketStores struct {
logLevel logging.Level
bucketStoreMetrics *BucketStoreMetrics
metaFetcherMetrics *MetadataFetcherMetrics
filters []block.MetadataFilter
shardingStrategy ShardingStrategy
// Index cache shared across all tenants.
indexCache storecache.IndexCache
@ -54,12 +54,14 @@ type BucketStores struct {
stores map[string]*store.BucketStore
// Metrics.
syncTimes prometheus.Histogram
syncLastSuccess prometheus.Gauge
syncTimes prometheus.Histogram
syncLastSuccess prometheus.Gauge
tenantsDiscovered prometheus.Gauge
tenantsSynced prometheus.Gauge
}
// NewBucketStores makes a new BucketStores.
func NewBucketStores(cfg tsdb.BlocksStorageConfig, filters []block.MetadataFilter, bucketClient objstore.Bucket, limits *validation.Overrides, logLevel logging.Level, logger log.Logger, reg prometheus.Registerer) (*BucketStores, error) {
func NewBucketStores(cfg tsdb.BlocksStorageConfig, shardingStrategy ShardingStrategy, bucketClient objstore.Bucket, limits *validation.Overrides, logLevel logging.Level, logger log.Logger, reg prometheus.Registerer) (*BucketStores, error) {
cachingBucket, err := tsdb.CreateCachingBucket(cfg.BucketStore.ChunksCache, cfg.BucketStore.MetadataCache, bucketClient, logger, reg)
if err != nil {
return nil, errors.Wrapf(err, "create caching bucket")
@ -78,7 +80,7 @@ func NewBucketStores(cfg tsdb.BlocksStorageConfig, filters []block.MetadataFilte
cfg: cfg,
limits: limits,
bucket: cachingBucket,
filters: filters,
shardingStrategy: shardingStrategy,
stores: map[string]*store.BucketStore{},
logLevel: logLevel,
bucketStoreMetrics: NewBucketStoreMetrics(),
@ -93,6 +95,14 @@ func NewBucketStores(cfg tsdb.BlocksStorageConfig, filters []block.MetadataFilte
Name: "cortex_bucket_stores_blocks_last_successful_sync_timestamp_seconds",
Help: "Unix timestamp of the last successful blocks sync.",
}),
tenantsDiscovered: promauto.With(reg).NewGauge(prometheus.GaugeOpts{
Name: "cortex_bucket_stores_tenants_discovered",
Help: "Number of tenants discovered in the bucket.",
}),
tenantsSynced: promauto.With(reg).NewGauge(prometheus.GaugeOpts{
Name: "cortex_bucket_stores_tenants_synced",
Help: "Number of tenants synced.",
}),
}
// Init the index cache.
@ -147,6 +157,22 @@ func (u *BucketStores) syncUsersBlocks(ctx context.Context, f func(context.Conte
errs := tsdb_errors.MultiError{}
errsMx := sync.Mutex{}
// Scan users in the bucket. In case of error, it may return a subset of users. If we sync a subset of users
// during a periodic sync, we may end up unloading blocks for users that still belong to this store-gateway
// so we do prefer to not run the sync at all.
userIDs, err := u.scanUsers(ctx)
if err != nil {
return err
}
includeUserIDs := make(map[string]struct{})
for _, userID := range u.shardingStrategy.FilterUsers(ctx, userIDs) {
includeUserIDs[userID] = struct{}{}
}
u.tenantsDiscovered.Set(float64(len(userIDs)))
u.tenantsSynced.Set(float64(len(includeUserIDs)))
// Create a pool of workers which will synchronize blocks. The pool size
// is limited in order to avoid to concurrently sync a lot of tenants in
// a large cluster.
@ -165,28 +191,32 @@ func (u *BucketStores) syncUsersBlocks(ctx context.Context, f func(context.Conte
}()
}
// Iterate the bucket, lazily create a bucket store for each new user found
// Lazily create a bucket store for each new user found
// and submit a sync job for each user.
err := u.bucket.Iter(ctx, "", func(s string) error {
user := strings.TrimSuffix(s, "/")
for _, userID := range userIDs {
// If we don't have a store for the tenant yet, then we should skip it if it's not
// included in the store-gateway shard. If we already have it, we need to sync it
// anyway to make sure all its blocks are unloaded and metrics updated correctly
// (but bucket API calls are skipped thanks to the objstore client adapter).
if _, included := includeUserIDs[userID]; !included && u.getStore(userID) == nil {
continue
}
bs, err := u.getOrCreateStore(user)
bs, err := u.getOrCreateStore(userID)
if err != nil {
return err
errsMx.Lock()
errs.Add(err)
errsMx.Unlock()
continue
}
select {
case jobs <- job{userID: user, store: bs}:
return nil
case jobs <- job{userID: userID, store: bs}:
// Nothing to do. Will loop to push more jobs.
case <-ctx.Done():
return ctx.Err()
}
})
if err != nil {
errsMx.Lock()
errs.Add(err)
errsMx.Unlock()
}
// Wait until all workers completed.
@ -217,6 +247,22 @@ func (u *BucketStores) Series(req *storepb.SeriesRequest, srv storepb.Store_Seri
})
}
// scanUsers in the bucket and return the list of found users. If an error occurs while
// iterating the bucket, it may return both an error and a subset of the users in the bucket.
func (u *BucketStores) scanUsers(ctx context.Context) ([]string, error) {
var users []string
// Iterate the bucket to find all users in the bucket. Due to how the bucket listing
// caching works, it's more likely to have a cache hit if there's no delay while
// iterating the bucket, so we do load all users in memory and later process them.
err := u.bucket.Iter(ctx, "", func(s string) error {
users = append(users, strings.TrimSuffix(s, "/"))
return nil
})
return users, err
}
func (u *BucketStores) getStore(userID string) *store.BucketStore {
u.storesMu.RLock()
store := u.stores[userID]
@ -247,15 +293,22 @@ func (u *BucketStores) getOrCreateStore(userID string) (*store.BucketStore, erro
userBkt := tsdb.NewUserBucketClient(userID, u.bucket)
// Wrap the bucket reader to skip iterating the bucket at all if the user doesn't
// belong to the store-gateway shard. We need to run the BucketStore synching anyway
// in order to unload previous tenants in case of a resharding leading to tenants
// moving out from the store-gateway shard and also make sure both MetaFetcher and
// BucketStore metrics are correctly updated.
fetcherBkt := NewShardingBucketReaderAdapter(userID, u.shardingStrategy, userBkt)
fetcherReg := prometheus.NewRegistry()
fetcher, err := block.NewMetaFetcher(
userLogger,
u.cfg.BucketStore.MetaSyncConcurrency,
userBkt,
fetcherBkt,
filepath.Join(u.cfg.BucketStore.SyncDir, userID), // The fetcher stores cached metas in the "meta-syncer/" sub directory
fetcherReg,
// The input filters MUST be before the ones we create here (order matters).
append(u.filters, []block.MetadataFilter{
// The sharding strategy filter MUST be before the ones we create here (order matters).
append([]block.MetadataFilter{NewShardingMetadataFilterAdapter(userID, u.shardingStrategy)}, []block.MetadataFilter{
block.NewConsistencyDelayMetaFilter(userLogger, u.cfg.BucketStore.ConsistencyDelay, fetcherReg),
block.NewIgnoreDeletionMarkFilter(userLogger, userBkt, u.cfg.BucketStore.IgnoreDeletionMarksDelay),
// The duplicate filter has been intentionally omitted because it could cause troubles with

@ -3,6 +3,8 @@ package storegateway
import (
"context"
"flag"
"fmt"
"strings"
"time"
"github.com/go-kit/kit/log"
@ -10,7 +12,6 @@ import (
"github.com/pkg/errors"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
"github.com/thanos-io/thanos/pkg/block"
"github.com/thanos-io/thanos/pkg/extprom"
"github.com/thanos-io/thanos/pkg/objstore"
"github.com/thanos-io/thanos/pkg/store/storepb"
@ -37,12 +38,25 @@ const (
// ringAutoForgetUnhealthyPeriods is how many consecutive timeout periods an unhealthy instance
// in the ring will be automatically removed.
ringAutoForgetUnhealthyPeriods = 10
// Supported sharding strategies.
ShardingStrategyDefault = "default"
ShardingStrategyShuffle = "shuffle-sharding"
)
var (
supportedShardingStrategies = []string{ShardingStrategyDefault, ShardingStrategyShuffle}
// Validation errors.
errInvalidShardingStrategy = errors.New("invalid sharding strategy")
errInvalidTenantShardSize = errors.New("invalid tenant shard size, the value must be greater than 0")
)
// Config holds the store gateway config.
type Config struct {
ShardingEnabled bool `yaml:"sharding_enabled"`
ShardingRing RingConfig `yaml:"sharding_ring" doc:"description=The hash ring configuration. This option is required only if blocks sharding is enabled."`
ShardingEnabled bool `yaml:"sharding_enabled"`
ShardingRing RingConfig `yaml:"sharding_ring" doc:"description=The hash ring configuration. This option is required only if blocks sharding is enabled."`
ShardingStrategy string `yaml:"sharding_strategy"`
}
// RegisterFlags registers the Config flags.
@ -50,6 +64,22 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
cfg.ShardingRing.RegisterFlags(f)
f.BoolVar(&cfg.ShardingEnabled, "experimental.store-gateway.sharding-enabled", false, "Shard blocks across multiple store gateway instances."+sharedOptionWithQuerier)
f.StringVar(&cfg.ShardingStrategy, "experimental.store-gateway.sharding-strategy", ShardingStrategyDefault, fmt.Sprintf("The sharding strategy to use. Supported values are: %s.", strings.Join(supportedShardingStrategies, ", ")))
}
// Validate the Config.
func (cfg *Config) Validate(limits validation.Limits) error {
if cfg.ShardingEnabled {
if !util.StringsContain(supportedShardingStrategies, cfg.ShardingStrategy) {
return errInvalidShardingStrategy
}
if cfg.ShardingStrategy == ShardingStrategyShuffle && limits.StoreGatewayTenantShardSize <= 0 {
return errInvalidTenantShardSize
}
}
return nil
}
// StoreGateway is the Cortex service responsible to expose an API over the bucket
@ -98,7 +128,6 @@ func NewStoreGateway(gatewayCfg Config, storageCfg cortex_tsdb.BlocksStorageConf
func newStoreGateway(gatewayCfg Config, storageCfg cortex_tsdb.BlocksStorageConfig, bucketClient objstore.Bucket, ringStore kv.Client, limits *validation.Overrides, logLevel logging.Level, logger log.Logger, reg prometheus.Registerer) (*StoreGateway, error) {
var err error
var filters []block.MetadataFilter
g := &StoreGateway{
gatewayCfg: gatewayCfg,
@ -115,6 +144,9 @@ func newStoreGateway(gatewayCfg Config, storageCfg cortex_tsdb.BlocksStorageConf
g.bucketSync.WithLabelValues(syncReasonPeriodic)
g.bucketSync.WithLabelValues(syncReasonRingChange)
// Init sharding strategy.
var shardingStrategy ShardingStrategy
if gatewayCfg.ShardingEnabled {
lifecyclerCfg, err := gatewayCfg.ShardingRing.ToLifecyclerConfig()
if err != nil {
@ -143,12 +175,20 @@ func newStoreGateway(gatewayCfg Config, storageCfg cortex_tsdb.BlocksStorageConf
reg.MustRegister(g.ring)
}
// Filter blocks by the shard of this store-gateway instance if the
// sharding is enabled.
filters = append(filters, NewShardingMetadataFilter(g.ring, lifecyclerCfg.Addr, logger))
// Instance the right strategy.
switch gatewayCfg.ShardingStrategy {
case ShardingStrategyDefault:
shardingStrategy = NewDefaultShardingStrategy(g.ring, lifecyclerCfg.Addr, logger)
case ShardingStrategyShuffle:
shardingStrategy = NewShuffleShardingStrategy(g.ring, lifecyclerCfg.ID, lifecyclerCfg.Addr, limits, logger)
default:
return nil, errInvalidShardingStrategy
}
} else {
shardingStrategy = NewNoShardingStrategy()
}
g.stores, err = NewBucketStores(storageCfg, filters, bucketClient, limits, logLevel, logger, extprom.WrapRegistererWith(prometheus.Labels{"component": "store-gateway"}, reg))
g.stores, err = NewBucketStores(storageCfg, shardingStrategy, bucketClient, limits, logLevel, logger, extprom.WrapRegistererWith(prometheus.Labels{"component": "store-gateway"}, reg))
if err != nil {
return nil, errors.Wrap(err, "create bucket stores")
}

@ -1,60 +0,0 @@
package storegateway
import (
"context"
"github.com/go-kit/kit/log"
"github.com/go-kit/kit/log/level"
"github.com/oklog/ulid"
"github.com/thanos-io/thanos/pkg/block/metadata"
"github.com/thanos-io/thanos/pkg/extprom"
"github.com/cortexproject/cortex/pkg/ring"
cortex_tsdb "github.com/cortexproject/cortex/pkg/storage/tsdb"
)
const (
shardExcludedMeta = "shard-excluded"
)
// ShardingMetadataFilter represents struct that allows sharding using the ring.
// Not go-routine safe.
type ShardingMetadataFilter struct {
r *ring.Ring
instanceAddr string
logger log.Logger
}
// NewShardingMetadataFilter creates ShardingMetadataFilter.
func NewShardingMetadataFilter(r *ring.Ring, instanceAddr string, logger log.Logger) *ShardingMetadataFilter {
return &ShardingMetadataFilter{
r: r,
instanceAddr: instanceAddr,
logger: logger,
}
}
// Filter filters out blocks not included within the current shard.
func (f *ShardingMetadataFilter) Filter(_ context.Context, metas map[ulid.ULID]*metadata.Meta, synced *extprom.TxGaugeVec) error {
// Buffer internally used by the ring (give extra room for a JOINING + LEAVING instance).
buf := make([]ring.IngesterDesc, 0, f.r.ReplicationFactor()+2)
for blockID := range metas {
key := cortex_tsdb.HashBlockID(blockID)
set, err := f.r.Get(key, ring.BlocksSync, buf)
// If there are no healthy instances in the replication set or
// the replication set for this block doesn't include this instance
// then we filter it out.
if err != nil || !set.Includes(f.instanceAddr) {
if err != nil {
level.Warn(f.logger).Log("msg", "failed to get replication set for block", "block", blockID.String(), "err", err)
}
synced.WithLabelValues(shardExcludedMeta).Inc()
delete(metas, blockID)
}
}
return nil
}

@ -0,0 +1,201 @@
package storegateway
import (
"context"
"github.com/go-kit/kit/log"
"github.com/go-kit/kit/log/level"
"github.com/oklog/ulid"
"github.com/thanos-io/thanos/pkg/block"
"github.com/thanos-io/thanos/pkg/block/metadata"
"github.com/thanos-io/thanos/pkg/extprom"
"github.com/thanos-io/thanos/pkg/objstore"
"github.com/cortexproject/cortex/pkg/ring"
cortex_tsdb "github.com/cortexproject/cortex/pkg/storage/tsdb"
)
const (
shardExcludedMeta = "shard-excluded"
)
type ShardingStrategy interface {
// FilterUsers whose blocks should be loaded by the store-gateway. Returns the list of user IDs
// that should be synced by the store-gateway.
FilterUsers(ctx context.Context, userIDs []string) []string
// FilterBlocks that should be loaded by the store-gateway.
FilterBlocks(ctx context.Context, userID string, metas map[ulid.ULID]*metadata.Meta, synced *extprom.TxGaugeVec) error
}
// ShardingLimits is the interface that should be implemented by the limits provider,
// limiting the scope of the limits to the ones required by sharding strategies.
type ShardingLimits interface {
StoreGatewayTenantShardSize(userID string) int
}
// NoShardingStrategy is a no-op strategy. When this strategy is used, no tenant/block is filtered out.
type NoShardingStrategy struct{}
func NewNoShardingStrategy() *NoShardingStrategy {
return &NoShardingStrategy{}
}
func (s *NoShardingStrategy) FilterUsers(_ context.Context, userIDs []string) []string {
return userIDs
}
func (s *NoShardingStrategy) FilterBlocks(_ context.Context, _ string, _ map[ulid.ULID]*metadata.Meta, _ *extprom.TxGaugeVec) error {
return nil
}
// DefaultShardingStrategy is a sharding strategy based on the hash ring formed by store-gateways.
// Not go-routine safe.
type DefaultShardingStrategy struct {
r *ring.Ring
instanceAddr string
logger log.Logger
}
// NewDefaultShardingStrategy creates DefaultShardingStrategy.
func NewDefaultShardingStrategy(r *ring.Ring, instanceAddr string, logger log.Logger) *DefaultShardingStrategy {
return &DefaultShardingStrategy{
r: r,
instanceAddr: instanceAddr,
logger: logger,
}
}
// FilterUsers implements ShardingStrategy.
func (s *DefaultShardingStrategy) FilterUsers(_ context.Context, userIDs []string) []string {
return userIDs
}
// FilterBlocks implements ShardingStrategy.
func (s *DefaultShardingStrategy) FilterBlocks(_ context.Context, _ string, metas map[ulid.ULID]*metadata.Meta, synced *extprom.TxGaugeVec) error {
filterBlocksByRingSharding(s.r, s.instanceAddr, metas, synced, s.logger)
return nil
}
// ShuffleShardingStrategy is a shuffle sharding strategy, based on the hash ring formed by store-gateways,
// where each tenant blocks are sharded across a subset of store-gateway instances.
type ShuffleShardingStrategy struct {
r *ring.Ring
instanceID string
instanceAddr string
limits ShardingLimits
logger log.Logger
}
// NewShuffleShardingStrategy makes a new ShuffleShardingStrategy.
func NewShuffleShardingStrategy(r *ring.Ring, instanceID, instanceAddr string, limits ShardingLimits, logger log.Logger) *ShuffleShardingStrategy {
return &ShuffleShardingStrategy{
r: r,
instanceID: instanceID,
instanceAddr: instanceAddr,
limits: limits,
logger: logger,
}
}
// FilterUsers implements ShardingStrategy.
func (s *ShuffleShardingStrategy) FilterUsers(_ context.Context, userIDs []string) []string {
var filteredIDs []string
for _, userID := range userIDs {
subRing := GetShuffleShardingSubring(s.r, userID, s.limits)
// Include the user only if it belongs to this store-gateway shard.
if subRing.HasInstance(s.instanceID) {
filteredIDs = append(filteredIDs, userID)
}
}
return filteredIDs
}
// FilterBlocks implements ShardingStrategy.
func (s *ShuffleShardingStrategy) FilterBlocks(_ context.Context, userID string, metas map[ulid.ULID]*metadata.Meta, synced *extprom.TxGaugeVec) error {
subRing := GetShuffleShardingSubring(s.r, userID, s.limits)
filterBlocksByRingSharding(subRing, s.instanceAddr, metas, synced, s.logger)
return nil
}
func filterBlocksByRingSharding(r ring.ReadRing, instanceAddr string, metas map[ulid.ULID]*metadata.Meta, synced *extprom.TxGaugeVec, logger log.Logger) {
// Buffer internally used by the ring (give extra room for a JOINING + LEAVING instance).
buf := make([]ring.IngesterDesc, 0, r.ReplicationFactor()+2)
for blockID := range metas {
key := cortex_tsdb.HashBlockID(blockID)
set, err := r.Get(key, ring.BlocksSync, buf)
// If there are no healthy instances in the replication set or
// the replication set for this block doesn't include this instance
// then we filter it out.
if err != nil || !set.Includes(instanceAddr) {
if err != nil {
level.Warn(logger).Log("msg", "excluded block because failed to get replication set", "block", blockID.String(), "err", err)
}
synced.WithLabelValues(shardExcludedMeta).Inc()
delete(metas, blockID)
}
}
}
// GetShuffleShardingSubring returns the subring to be used for a given user. This function
// should be used both by store-gateway and querier in order to guarantee the same logic is used.
func GetShuffleShardingSubring(ring *ring.Ring, userID string, limits ShardingLimits) ring.ReadRing {
shardSize := limits.StoreGatewayTenantShardSize(userID)
// A shard size of 0 means shuffle sharding is disabled for this specific user,
// so we just return the full ring so that blocks will be sharded across all store-gateways.
if shardSize <= 0 {
return ring
}
return ring.Subring(cortex_tsdb.HashTenantID(userID), shardSize)
}
type shardingMetadataFilterAdapter struct {
userID string
strategy ShardingStrategy
}
func NewShardingMetadataFilterAdapter(userID string, strategy ShardingStrategy) block.MetadataFilter {
return &shardingMetadataFilterAdapter{
userID: userID,
strategy: strategy,
}
}
// Filter implements block.MetadataFilter.
func (a *shardingMetadataFilterAdapter) Filter(ctx context.Context, metas map[ulid.ULID]*metadata.Meta, synced *extprom.TxGaugeVec) error {
return a.strategy.FilterBlocks(ctx, a.userID, metas, synced)
}
type shardingBucketReaderAdapter struct {
objstore.InstrumentedBucketReader
userID string
strategy ShardingStrategy
}
func NewShardingBucketReaderAdapter(userID string, strategy ShardingStrategy, wrapped objstore.InstrumentedBucketReader) objstore.InstrumentedBucketReader {
return &shardingBucketReaderAdapter{
InstrumentedBucketReader: wrapped,
userID: userID,
strategy: strategy,
}
}
// Iter implements objstore.BucketReader.
func (a *shardingBucketReaderAdapter) Iter(ctx context.Context, dir string, f func(string) error) error {
// Skip iterating the bucket if the tenant doesn't belong to the shard. From the caller
// perspective, this will look like the tenant has no blocks in the storage.
if len(a.strategy.FilterUsers(ctx, []string{a.userID})) == 0 {
return nil
}
return a.InstrumentedBucketReader.Iter(ctx, dir, f)
}

@ -0,0 +1,53 @@
package util
import (
"context"
"google.golang.org/grpc/metadata"
)
// ipAddressesKey is key for the GRPC metadata where the IP addresses are stored
const ipAddressesKey = "github.com/cortexproject/cortex/util/extract_forwarded/x-forwarded-for"
// GetSourceIPsFromOutgoingCtx extracts the source field from the GRPC context
func GetSourceIPsFromOutgoingCtx(ctx context.Context) string {
md, ok := metadata.FromOutgoingContext(ctx)
if !ok {
return ""
}
ipAddresses, ok := md[ipAddressesKey]
if !ok {
return ""
}
return ipAddresses[0]
}
// GetSourceIPsFromIncomingCtx extracts the source field from the GRPC context
func GetSourceIPsFromIncomingCtx(ctx context.Context) string {
md, ok := metadata.FromIncomingContext(ctx)
if !ok {
return ""
}
ipAddresses, ok := md[ipAddressesKey]
if !ok {
return ""
}
return ipAddresses[0]
}
// AddSourceIPsToOutgoingContext adds the given source to the GRPC context
func AddSourceIPsToOutgoingContext(ctx context.Context, source string) context.Context {
if source != "" {
ctx = metadata.AppendToOutgoingContext(ctx, ipAddressesKey, source)
}
return ctx
}
// AddSourceIPsToIncomingContext adds the given source to the GRPC context
func AddSourceIPsToIncomingContext(ctx context.Context, source string) context.Context {
if source != "" {
md := metadata.Pairs(ipAddressesKey, source)
ctx = metadata.NewIncomingContext(ctx, md)
}
return ctx
}

@ -132,6 +132,12 @@ func WithTraceID(traceID string, l log.Logger) log.Logger {
return log.With(l, "traceID", traceID)
}
// WithSourceIPs returns a Logger that has information about the source IPs in
// its details.
func WithSourceIPs(sourceIPs string, l log.Logger) log.Logger {
return log.With(l, "sourceIPs", sourceIPs)
}
// CheckFatal prints an error and exits with error code 1 if err is non-nil
func CheckFatal(location string, err error) {
if err != nil {

@ -6,6 +6,7 @@ import (
"github.com/go-kit/kit/log/level"
"github.com/weaveworks/common/httpgrpc"
"github.com/weaveworks/common/middleware"
"github.com/cortexproject/cortex/pkg/distributor"
"github.com/cortexproject/cortex/pkg/ingester/client"
@ -13,12 +14,20 @@ import (
)
// Handler is a http.Handler which accepts WriteRequests.
func Handler(cfg distributor.Config, push func(context.Context, *client.WriteRequest) (*client.WriteResponse, error)) http.Handler {
func Handler(cfg distributor.Config, sourceIPs *middleware.SourceIPExtractor, push func(context.Context, *client.WriteRequest) (*client.WriteResponse, error)) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
ctx := r.Context()
logger := util.WithContext(ctx, util.Logger)
if sourceIPs != nil {
source := sourceIPs.Get(r)
if source != "" {
ctx = util.AddSourceIPsToOutgoingContext(ctx, source)
logger = util.WithSourceIPs(source, logger)
}
}
compressionType := util.CompressionTypeFor(r.Header.Get("X-Prometheus-Remote-Write-Version"))
var req client.PreallocWriteRequest
_, err := util.ParseProtoReader(r.Context(), r.Body, int(r.ContentLength), cfg.MaxRecvMsgSize, &req, compressionType)
logger := util.WithContext(r.Context(), util.Logger)
_, err := util.ParseProtoReader(ctx, r.Body, int(r.ContentLength), cfg.MaxRecvMsgSize, &req, compressionType)
if err != nil {
level.Error(logger).Log("err", err.Error())
http.Error(w, err.Error(), http.StatusBadRequest)
@ -28,7 +37,7 @@ func Handler(cfg distributor.Config, push func(context.Context, *client.WriteReq
req.Source = client.API
}
if _, err := push(r.Context(), &req.WriteRequest); err != nil {
if _, err := push(ctx, &req.WriteRequest); err != nil {
resp, ok := httpgrpc.HTTPResponseFromError(err)
if !ok {
http.Error(w, err.Error(), http.StatusInternalServerError)

@ -62,6 +62,9 @@ type Limits struct {
CardinalityLimit int `yaml:"cardinality_limit"`
MaxCacheFreshness time.Duration `yaml:"max_cache_freshness"`
// Store-gateway.
StoreGatewayTenantShardSize int `yaml:"store_gateway_tenant_shard_size"`
// Config for overrides, convenient if it goes here. [Deprecated in favor of RuntimeConfig flag in cortex.Config]
PerTenantOverrideConfig string `yaml:"per_tenant_override_config"`
PerTenantOverridePeriod time.Duration `yaml:"per_tenant_override_period"`
@ -108,6 +111,9 @@ func (l *Limits) RegisterFlags(f *flag.FlagSet) {
f.StringVar(&l.PerTenantOverrideConfig, "limits.per-user-override-config", "", "File name of per-user overrides. [deprecated, use -runtime-config.file instead]")
f.DurationVar(&l.PerTenantOverridePeriod, "limits.per-user-override-period", 10*time.Second, "Period with which to reload the overrides. [deprecated, use -runtime-config.reload-period instead]")
// Store-gateway.
f.IntVar(&l.StoreGatewayTenantShardSize, "experimental.store-gateway.tenant-shard-size", 0, "The default tenant's shard size when the shuffle-sharding strategy is used. Must be set when the store-gateway sharding is enabled with the shuffle-sharding strategy. When this setting is specified in the per-tenant overrides, a value of 0 disables shuffle sharding for the tenant.")
}
// Validate the limits config and returns an error if the validation
@ -340,6 +346,11 @@ func (o *Overrides) SubringSize(userID string) int {
return o.getOverridesForUser(userID).SubringSize
}
// StoreGatewayTenantShardSize returns the size of the store-gateway shard size for a given user.
func (o *Overrides) StoreGatewayTenantShardSize(userID string) int {
return o.getOverridesForUser(userID).StoreGatewayTenantShardSize
}
func (o *Overrides) getOverridesForUser(userID string) *Limits {
if o.tenantLimits != nil {
l := o.tenantLimits(userID)

@ -0,0 +1,15 @@
language: go
go:
- 1.10.x
- 1.11.x
- 1.12.x
sudo: false
before_install:
- go get -u golang.org/x/lint/golint
- go get github.com/axw/gocov/gocov
- go get github.com/mattn/goveralls
script:
- test -z "`gofmt -l .`"
- test -z "`golint ./...`"
- $GOPATH/bin/goveralls -service=travis-ci
- cd example && go build -o http_breaker && ./http_breaker

@ -0,0 +1,21 @@
The MIT License (MIT)
Copyright 2015 Sony Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.

@ -0,0 +1,128 @@
gobreaker
=========
[![GoDoc](https://godoc.org/github.com/sony/gobreaker?status.svg)](http://godoc.org/github.com/sony/gobreaker)
[![Build Status](https://travis-ci.org/sony/gobreaker.svg?branch=master)](https://travis-ci.org/sony/gobreaker)
[![Coverage Status](https://coveralls.io/repos/sony/gobreaker/badge.svg?branch=master&service=github)](https://coveralls.io/github/sony/gobreaker?branch=master)
[gobreaker][repo-url] implements the [Circuit Breaker pattern](https://msdn.microsoft.com/en-us/library/dn589784.aspx) in Go.
Installation
------------
```
go get github.com/sony/gobreaker
```
Usage
-----
The struct `CircuitBreaker` is a state machine to prevent sending requests that are likely to fail.
The function `NewCircuitBreaker` creates a new `CircuitBreaker`.
```go
func NewCircuitBreaker(st Settings) *CircuitBreaker
```
You can configure `CircuitBreaker` by the struct `Settings`:
```go
type Settings struct {
Name string
MaxRequests uint32
Interval time.Duration
Timeout time.Duration
ReadyToTrip func(counts Counts) bool
OnStateChange func(name string, from State, to State)
}
```
- `Name` is the name of the `CircuitBreaker`.
- `MaxRequests` is the maximum number of requests allowed to pass through
when the `CircuitBreaker` is half-open.
If `MaxRequests` is 0, `CircuitBreaker` allows only 1 request.
- `Interval` is the cyclic period of the closed state
for `CircuitBreaker` to clear the internal `Counts`, described later in this section.
If `Interval` is 0, `CircuitBreaker` doesn't clear the internal `Counts` during the closed state.
- `Timeout` is the period of the open state,
after which the state of `CircuitBreaker` becomes half-open.
If `Timeout` is 0, the timeout value of `CircuitBreaker` is set to 60 seconds.
- `ReadyToTrip` is called with a copy of `Counts` whenever a request fails in the closed state.
If `ReadyToTrip` returns true, `CircuitBreaker` will be placed into the open state.
If `ReadyToTrip` is `nil`, default `ReadyToTrip` is used.
Default `ReadyToTrip` returns true when the number of consecutive failures is more than 5.
- `OnStateChange` is called whenever the state of `CircuitBreaker` changes.
The struct `Counts` holds the numbers of requests and their successes/failures:
```go
type Counts struct {
Requests uint32
TotalSuccesses uint32
TotalFailures uint32
ConsecutiveSuccesses uint32
ConsecutiveFailures uint32
}
```
`CircuitBreaker` clears the internal `Counts` either
on the change of the state or at the closed-state intervals.
`Counts` ignores the results of the requests sent before clearing.
`CircuitBreaker` can wrap any function to send a request:
```go
func (cb *CircuitBreaker) Execute(req func() (interface{}, error)) (interface{}, error)
```
The method `Execute` runs the given request if `CircuitBreaker` accepts it.
`Execute` returns an error instantly if `CircuitBreaker` rejects the request.
Otherwise, `Execute` returns the result of the request.
If a panic occurs in the request, `CircuitBreaker` handles it as an error
and causes the same panic again.
Example
-------
```go
var cb *breaker.CircuitBreaker
func Get(url string) ([]byte, error) {
body, err := cb.Execute(func() (interface{}, error) {
resp, err := http.Get(url)
if err != nil {
return nil, err
}
defer resp.Body.Close()
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
return nil, err
}
return body, nil
})
if err != nil {
return nil, err
}
return body.([]byte), nil
}
```
See [example](https://github.com/sony/gobreaker/blob/master/example) for details.
License
-------
The MIT License (MIT)
See [LICENSE](https://github.com/sony/gobreaker/blob/master/LICENSE) for details.
[repo-url]: https://github.com/sony/gobreaker

@ -0,0 +1,5 @@
module github.com/sony/gobreaker
go 1.12
require github.com/stretchr/testify v1.3.0

@ -0,0 +1,7 @@
github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=

@ -0,0 +1,344 @@
// Package gobreaker implements the Circuit Breaker pattern.
// See https://msdn.microsoft.com/en-us/library/dn589784.aspx.
package gobreaker
import (
"errors"
"fmt"
"sync"
"time"
)
// State is a type that represents a state of CircuitBreaker.
type State int
// These constants are states of CircuitBreaker.
const (
StateClosed State = iota
StateHalfOpen
StateOpen
)
var (
// ErrTooManyRequests is returned when the CB state is half open and the requests count is over the cb maxRequests
ErrTooManyRequests = errors.New("too many requests")
// ErrOpenState is returned when the CB state is open
ErrOpenState = errors.New("circuit breaker is open")
)
// String implements stringer interface.
func (s State) String() string {
switch s {
case StateClosed:
return "closed"
case StateHalfOpen:
return "half-open"
case StateOpen:
return "open"
default:
return fmt.Sprintf("unknown state: %d", s)
}
}
// Counts holds the numbers of requests and their successes/failures.
// CircuitBreaker clears the internal Counts either
// on the change of the state or at the closed-state intervals.
// Counts ignores the results of the requests sent before clearing.
type Counts struct {
Requests uint32
TotalSuccesses uint32
TotalFailures uint32
ConsecutiveSuccesses uint32
ConsecutiveFailures uint32
}
func (c *Counts) onRequest() {
c.Requests++
}
func (c *Counts) onSuccess() {
c.TotalSuccesses++
c.ConsecutiveSuccesses++
c.ConsecutiveFailures = 0
}
func (c *Counts) onFailure() {
c.TotalFailures++
c.ConsecutiveFailures++
c.ConsecutiveSuccesses = 0
}
func (c *Counts) clear() {
c.Requests = 0
c.TotalSuccesses = 0
c.TotalFailures = 0
c.ConsecutiveSuccesses = 0
c.ConsecutiveFailures = 0
}
// Settings configures CircuitBreaker:
//
// Name is the name of the CircuitBreaker.
//
// MaxRequests is the maximum number of requests allowed to pass through
// when the CircuitBreaker is half-open.
// If MaxRequests is 0, the CircuitBreaker allows only 1 request.
//
// Interval is the cyclic period of the closed state
// for the CircuitBreaker to clear the internal Counts.
// If Interval is 0, the CircuitBreaker doesn't clear internal Counts during the closed state.
//
// Timeout is the period of the open state,
// after which the state of the CircuitBreaker becomes half-open.
// If Timeout is 0, the timeout value of the CircuitBreaker is set to 60 seconds.
//
// ReadyToTrip is called with a copy of Counts whenever a request fails in the closed state.
// If ReadyToTrip returns true, the CircuitBreaker will be placed into the open state.
// If ReadyToTrip is nil, default ReadyToTrip is used.
// Default ReadyToTrip returns true when the number of consecutive failures is more than 5.
//
// OnStateChange is called whenever the state of the CircuitBreaker changes.
type Settings struct {
Name string
MaxRequests uint32
Interval time.Duration
Timeout time.Duration
ReadyToTrip func(counts Counts) bool
OnStateChange func(name string, from State, to State)
}
// CircuitBreaker is a state machine to prevent sending requests that are likely to fail.
type CircuitBreaker struct {
name string
maxRequests uint32
interval time.Duration
timeout time.Duration
readyToTrip func(counts Counts) bool
onStateChange func(name string, from State, to State)
mutex sync.Mutex
state State
generation uint64
counts Counts
expiry time.Time
}
// TwoStepCircuitBreaker is like CircuitBreaker but instead of surrounding a function
// with the breaker functionality, it only checks whether a request can proceed and
// expects the caller to report the outcome in a separate step using a callback.
type TwoStepCircuitBreaker struct {
cb *CircuitBreaker
}
// NewCircuitBreaker returns a new CircuitBreaker configured with the given Settings.
func NewCircuitBreaker(st Settings) *CircuitBreaker {
cb := new(CircuitBreaker)
cb.name = st.Name
cb.interval = st.Interval
cb.onStateChange = st.OnStateChange
if st.MaxRequests == 0 {
cb.maxRequests = 1
} else {
cb.maxRequests = st.MaxRequests
}
if st.Timeout == 0 {
cb.timeout = defaultTimeout
} else {
cb.timeout = st.Timeout
}
if st.ReadyToTrip == nil {
cb.readyToTrip = defaultReadyToTrip
} else {
cb.readyToTrip = st.ReadyToTrip
}
cb.toNewGeneration(time.Now())
return cb
}
// NewTwoStepCircuitBreaker returns a new TwoStepCircuitBreaker configured with the given Settings.
func NewTwoStepCircuitBreaker(st Settings) *TwoStepCircuitBreaker {
return &TwoStepCircuitBreaker{
cb: NewCircuitBreaker(st),
}
}
const defaultTimeout = time.Duration(60) * time.Second
func defaultReadyToTrip(counts Counts) bool {
return counts.ConsecutiveFailures > 5
}
// Name returns the name of the CircuitBreaker.
func (cb *CircuitBreaker) Name() string {
return cb.name
}
// State returns the current state of the CircuitBreaker.
func (cb *CircuitBreaker) State() State {
cb.mutex.Lock()
defer cb.mutex.Unlock()
now := time.Now()
state, _ := cb.currentState(now)
return state
}
// Execute runs the given request if the CircuitBreaker accepts it.
// Execute returns an error instantly if the CircuitBreaker rejects the request.
// Otherwise, Execute returns the result of the request.
// If a panic occurs in the request, the CircuitBreaker handles it as an error
// and causes the same panic again.
func (cb *CircuitBreaker) Execute(req func() (interface{}, error)) (interface{}, error) {
generation, err := cb.beforeRequest()
if err != nil {
return nil, err
}
defer func() {
e := recover()
if e != nil {
cb.afterRequest(generation, false)
panic(e)
}
}()
result, err := req()
cb.afterRequest(generation, err == nil)
return result, err
}
// Name returns the name of the TwoStepCircuitBreaker.
func (tscb *TwoStepCircuitBreaker) Name() string {
return tscb.cb.Name()
}
// State returns the current state of the TwoStepCircuitBreaker.
func (tscb *TwoStepCircuitBreaker) State() State {
return tscb.cb.State()
}
// Allow checks if a new request can proceed. It returns a callback that should be used to
// register the success or failure in a separate step. If the circuit breaker doesn't allow
// requests, it returns an error.
func (tscb *TwoStepCircuitBreaker) Allow() (done func(success bool), err error) {
generation, err := tscb.cb.beforeRequest()
if err != nil {
return nil, err
}
return func(success bool) {
tscb.cb.afterRequest(generation, success)
}, nil
}
func (cb *CircuitBreaker) beforeRequest() (uint64, error) {
cb.mutex.Lock()
defer cb.mutex.Unlock()
now := time.Now()
state, generation := cb.currentState(now)
if state == StateOpen {
return generation, ErrOpenState
} else if state == StateHalfOpen && cb.counts.Requests >= cb.maxRequests {
return generation, ErrTooManyRequests
}
cb.counts.onRequest()
return generation, nil
}
func (cb *CircuitBreaker) afterRequest(before uint64, success bool) {
cb.mutex.Lock()
defer cb.mutex.Unlock()
now := time.Now()
state, generation := cb.currentState(now)
if generation != before {
return
}
if success {
cb.onSuccess(state, now)
} else {
cb.onFailure(state, now)
}
}
func (cb *CircuitBreaker) onSuccess(state State, now time.Time) {
switch state {
case StateClosed:
cb.counts.onSuccess()
case StateHalfOpen:
cb.counts.onSuccess()
if cb.counts.ConsecutiveSuccesses >= cb.maxRequests {
cb.setState(StateClosed, now)
}
}
}
func (cb *CircuitBreaker) onFailure(state State, now time.Time) {
switch state {
case StateClosed:
cb.counts.onFailure()
if cb.readyToTrip(cb.counts) {
cb.setState(StateOpen, now)
}
case StateHalfOpen:
cb.setState(StateOpen, now)
}
}
func (cb *CircuitBreaker) currentState(now time.Time) (State, uint64) {
switch cb.state {
case StateClosed:
if !cb.expiry.IsZero() && cb.expiry.Before(now) {
cb.toNewGeneration(now)
}
case StateOpen:
if cb.expiry.Before(now) {
cb.setState(StateHalfOpen, now)
}
}
return cb.state, cb.generation
}
func (cb *CircuitBreaker) setState(state State, now time.Time) {
if cb.state == state {
return
}
prev := cb.state
cb.state = state
cb.toNewGeneration(now)
if cb.onStateChange != nil {
cb.onStateChange(cb.name, prev, state)
}
}
func (cb *CircuitBreaker) toNewGeneration(now time.Time) {
cb.generation++
cb.counts.clear()
var zero time.Time
switch cb.state {
case StateClosed:
if cb.interval == 0 {
cb.expiry = zero
} else {
cb.expiry = now.Add(cb.interval)
}
case StateOpen:
cb.expiry = now.Add(cb.timeout)
default: // StateHalfOpen
cb.expiry = zero
}
}

@ -131,7 +131,7 @@ github.com/blang/semver
# github.com/bmatcuk/doublestar v1.2.2
## explicit
github.com/bmatcuk/doublestar
# github.com/bradfitz/gomemcache v0.0.0-20190913173617-a41fca850d0b
# github.com/bradfitz/gomemcache v0.0.0-20190913173617-a41fca850d0b => github.com/themihai/gomemcache v0.0.0-20180902122335-24332e2d58ab
github.com/bradfitz/gomemcache/memcache
# github.com/c2h5oh/datasize v0.0.0-20200112174442-28bbd4740fee
## explicit
@ -158,7 +158,7 @@ github.com/coreos/go-systemd/journal
github.com/coreos/go-systemd/sdjournal
# github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f
github.com/coreos/pkg/capnslog
# github.com/cortexproject/cortex v1.3.1-0.20200824064113-84c06a83898e
# github.com/cortexproject/cortex v1.3.1-0.20200901164804-97d13c1ef16f
## explicit
github.com/cortexproject/cortex/pkg/alertmanager
github.com/cortexproject/cortex/pkg/alertmanager/alerts
@ -789,6 +789,8 @@ github.com/shurcooL/vfsgen
github.com/sirupsen/logrus
# github.com/soheilhy/cmux v0.1.4
github.com/soheilhy/cmux
# github.com/sony/gobreaker v0.4.1
github.com/sony/gobreaker
# github.com/spf13/afero v1.2.2
github.com/spf13/afero
github.com/spf13/afero/mem
@ -1452,3 +1454,4 @@ sigs.k8s.io/yaml
# github.com/satori/go.uuid => github.com/satori/go.uuid v1.2.0
# github.com/gocql/gocql => github.com/grafana/gocql v0.0.0-20200605141915-ba5dc39ece85
# google.golang.org/grpc => google.golang.org/grpc v1.29.1
# github.com/bradfitz/gomemcache => github.com/themihai/gomemcache v0.0.0-20180902122335-24332e2d58ab

Loading…
Cancel
Save