Extract scheduler queue metrics into separate field

This allows for easier passing of the metrics to the scheduler
instantiation.

Signed-off-by: Christian Haudum <christian.haudum@gmail.com>
pull/8898/head
Christian Haudum 2 years ago
parent 99acb9b345
commit 3344d59fb5
  1. 19
      pkg/lokifrontend/frontend/v1/frontend.go
  2. 8
      pkg/lokifrontend/frontend/v1/frontend_test.go
  3. 11
      pkg/scheduler/queue/dequeue_qos_test.go
  4. 28
      pkg/scheduler/queue/metrics.go
  5. 15
      pkg/scheduler/queue/queue.go
  6. 16
      pkg/scheduler/queue/queue_test.go
  7. 17
      pkg/scheduler/scheduler.go

@ -80,18 +80,13 @@ type request struct {
// New creates a new frontend. Frontend implements service, and must be started and stopped.
func New(cfg Config, limits Limits, log log.Logger, registerer prometheus.Registerer) (*Frontend, error) {
queueMetrics := queue.NewMetrics("query_frontend", registerer)
f := &Frontend{
cfg: cfg,
log: log,
limits: limits,
queueLength: promauto.With(registerer).NewGaugeVec(prometheus.GaugeOpts{
Name: "cortex_query_frontend_queue_length",
Help: "Number of queries in the queue.",
}, []string{"user"}),
discardedRequests: promauto.With(registerer).NewCounterVec(prometheus.CounterOpts{
Name: "cortex_query_frontend_discarded_requests_total",
Help: "Total number of query requests discarded.",
}, []string{"user"}),
cfg: cfg,
log: log,
limits: limits,
queueLength: queueMetrics.QueueLength,
discardedRequests: queueMetrics.DiscardedRequests,
queueDuration: promauto.With(registerer).NewHistogram(prometheus.HistogramOpts{
Name: "cortex_query_frontend_queue_duration_seconds",
Help: "Time spend by requests queued.",
@ -99,7 +94,7 @@ func New(cfg Config, limits Limits, log log.Logger, registerer prometheus.Regist
}),
}
f.requestQueue = queue.NewRequestQueue(cfg.MaxOutstandingPerTenant, cfg.QuerierForgetDelay, f.queueLength, f.discardedRequests)
f.requestQueue = queue.NewRequestQueue(cfg.MaxOutstandingPerTenant, cfg.QuerierForgetDelay, queueMetrics)
f.activeUsers = util.NewActiveUsersCleanupWithDefaultValues(f.cleanupInactiveUserMetrics)
var err error

@ -126,12 +126,10 @@ func TestFrontendCheckReady(t *testing.T) {
{"no url, no clients is not ready", 0, "not ready: number of queriers connected to query-frontend is 0", false},
} {
t.Run(tt.name, func(t *testing.T) {
qm := queue.NewMetrics("query_frontend", nil)
f := &Frontend{
log: log.NewNopLogger(),
requestQueue: queue.NewRequestQueue(5, 0,
prometheus.NewGaugeVec(prometheus.GaugeOpts{}, []string{"user"}),
prometheus.NewCounterVec(prometheus.CounterOpts{}, []string{"user"}),
),
log: log.NewNopLogger(),
requestQueue: queue.NewRequestQueue(5, 0, qm),
}
for i := 0; i < tt.connectedClients; i++ {
f.requestQueue.RegisterQuerierConnection("test")

@ -9,7 +9,6 @@ import (
"testing"
"time"
"github.com/prometheus/client_golang/prometheus"
"github.com/stretchr/testify/require"
"go.uber.org/atomic"
)
@ -57,10 +56,7 @@ func BenchmarkQueryFairness(t *testing.B) {
for _, useActor := range []bool{false, true} {
t.Run(fmt.Sprintf("use hierarchical queues = %v", useActor), func(t *testing.B) {
requestQueue := NewRequestQueue(1024, 0,
prometheus.NewGaugeVec(prometheus.GaugeOpts{}, []string{"user"}),
prometheus.NewCounterVec(prometheus.CounterOpts{}, []string{"user"}),
)
requestQueue := NewRequestQueue(1024, 0, NewMetrics("query_scheduler", nil))
enqueueRequestsForActor(t, []string{}, useActor, requestQueue, numSubRequestsActorA, 50*time.Millisecond)
enqueueRequestsForActor(t, []string{"a"}, useActor, requestQueue, numSubRequestsActorA, 100*time.Millisecond)
enqueueRequestsForActor(t, []string{"b"}, useActor, requestQueue, numSubRequestsActorB, 50*time.Millisecond)
@ -135,10 +131,7 @@ func TestQueryFairnessAcrossSameLevel(t *testing.T) {
456: [210]
**/
requestQueue := NewRequestQueue(1024, 0,
prometheus.NewGaugeVec(prometheus.GaugeOpts{}, []string{"user"}),
prometheus.NewCounterVec(prometheus.CounterOpts{}, []string{"user"}),
)
requestQueue := NewRequestQueue(1024, 0, NewMetrics("query_scheduler", nil))
_ = requestQueue.Enqueue("tenant1", []string{}, r(0), 0, nil)
_ = requestQueue.Enqueue("tenant1", []string{}, r(1), 0, nil)
_ = requestQueue.Enqueue("tenant1", []string{}, r(2), 0, nil)

@ -0,0 +1,28 @@
package queue
import (
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
)
type Metrics struct {
QueueLength *prometheus.GaugeVec // Per tenant and reason.
DiscardedRequests *prometheus.CounterVec // Per tenant.
}
func NewMetrics(subsystem string, registerer prometheus.Registerer) *Metrics {
return &Metrics{
QueueLength: promauto.With(registerer).NewGaugeVec(prometheus.GaugeOpts{
Namespace: "cortex",
Subsystem: subsystem,
Name: "queue_length",
Help: "Number of queries in the queue.",
}, []string{"user"}),
DiscardedRequests: promauto.With(registerer).NewCounterVec(prometheus.CounterOpts{
Namespace: "cortex",
Subsystem: subsystem,
Name: "discarded_requests_total",
Help: "Total number of query requests discarded.",
}, []string{"user"}),
}
}

@ -7,7 +7,6 @@ import (
"github.com/grafana/dskit/services"
"github.com/pkg/errors"
"github.com/prometheus/client_golang/prometheus"
"go.uber.org/atomic"
)
@ -58,16 +57,14 @@ type RequestQueue struct {
queues *tenantQueues
stopped bool
queueLength *prometheus.GaugeVec // Per tenant and reason.
discardedRequests *prometheus.CounterVec // Per tenant.
metrics *Metrics
}
func NewRequestQueue(maxOutstandingPerTenant int, forgetDelay time.Duration, queueLength *prometheus.GaugeVec, discardedRequests *prometheus.CounterVec) *RequestQueue {
func NewRequestQueue(maxOutstandingPerTenant int, forgetDelay time.Duration, metrics *Metrics) *RequestQueue {
q := &RequestQueue{
queues: newTenantQueues(maxOutstandingPerTenant, forgetDelay),
connectedQuerierWorkers: atomic.NewInt32(0),
queueLength: queueLength,
discardedRequests: discardedRequests,
metrics: metrics,
}
q.cond = contextCond{Cond: sync.NewCond(&q.mtx)}
@ -97,7 +94,7 @@ func (q *RequestQueue) Enqueue(tenant string, path []string, req Request, maxQue
select {
case queue.Chan() <- req:
q.queueLength.WithLabelValues(tenant).Inc()
q.metrics.QueueLength.WithLabelValues(tenant).Inc()
q.cond.Broadcast()
// Call this function while holding a lock. This guarantees that no querier can fetch the request before function returns.
if successFn != nil {
@ -105,7 +102,7 @@ func (q *RequestQueue) Enqueue(tenant string, path []string, req Request, maxQue
}
return nil
default:
q.discardedRequests.WithLabelValues(tenant).Inc()
q.metrics.DiscardedRequests.WithLabelValues(tenant).Inc()
return ErrTooManyRequests
}
}
@ -148,7 +145,7 @@ FindQueue:
q.queues.deleteQueue(tenant)
}
q.queueLength.WithLabelValues(tenant).Dec()
q.metrics.QueueLength.WithLabelValues(tenant).Dec()
// Tell close() we've processed a request.
q.cond.Broadcast()

@ -9,7 +9,6 @@ import (
"time"
"github.com/grafana/dskit/services"
"github.com/prometheus/client_golang/prometheus"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
@ -46,10 +45,7 @@ func BenchmarkGetNextRequest(b *testing.B) {
queues := make([]*RequestQueue, 0, b.N)
for n := 0; n < b.N; n++ {
queue := NewRequestQueue(maxOutstandingPerTenant, 0,
prometheus.NewGaugeVec(prometheus.GaugeOpts{}, []string{"user"}),
prometheus.NewCounterVec(prometheus.CounterOpts{}, []string{"user"}),
)
queue := NewRequestQueue(maxOutstandingPerTenant, 0, NewMetrics("query_scheduler", nil))
queues = append(queues, queue)
for ix := 0; ix < queriers; ix++ {
@ -107,10 +103,7 @@ func BenchmarkQueueRequest(b *testing.B) {
requests := make([]string, 0, numTenants)
for n := 0; n < b.N; n++ {
q := NewRequestQueue(maxOutstandingPerTenant, 0,
prometheus.NewGaugeVec(prometheus.GaugeOpts{}, []string{"user"}),
prometheus.NewCounterVec(prometheus.CounterOpts{}, []string{"user"}),
)
q := NewRequestQueue(maxOutstandingPerTenant, 0, NewMetrics("query_scheduler", nil))
for ix := 0; ix < queriers; ix++ {
q.RegisterQuerierConnection(fmt.Sprintf("querier-%d", ix))
@ -140,10 +133,7 @@ func BenchmarkQueueRequest(b *testing.B) {
func TestRequestQueue_GetNextRequestForQuerier_ShouldGetRequestAfterReshardingBecauseQuerierHasBeenForgotten(t *testing.T) {
const forgetDelay = 3 * time.Second
queue := NewRequestQueue(1, forgetDelay,
prometheus.NewGaugeVec(prometheus.GaugeOpts{}, []string{"user"}),
prometheus.NewCounterVec(prometheus.CounterOpts{}, []string{"user"}),
)
queue := NewRequestQueue(1, forgetDelay, NewMetrics("query_scheduler", nil))
// Start the queue service.
ctx := context.Background()

@ -140,6 +140,7 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
// NewScheduler creates a new Scheduler.
func NewScheduler(cfg Config, limits Limits, log log.Logger, registerer prometheus.Registerer) (*Scheduler, error) {
queueMetrics := queue.NewMetrics("query_scheduler", registerer)
s := &Scheduler{
cfg: cfg,
log: log,
@ -147,19 +148,11 @@ func NewScheduler(cfg Config, limits Limits, log log.Logger, registerer promethe
pendingRequests: map[requestKey]*schedulerRequest{},
connectedFrontends: map[string]*connectedFrontend{},
}
s.queueLength = promauto.With(registerer).NewGaugeVec(prometheus.GaugeOpts{
Name: "cortex_query_scheduler_queue_length",
Help: "Number of queries in the queue.",
}, []string{"user"})
queueLength: queueMetrics.QueueLength,
discardedRequests: queueMetrics.DiscardedRequests,
s.discardedRequests = promauto.With(registerer).NewCounterVec(prometheus.CounterOpts{
Name: "cortex_query_scheduler_discarded_requests_total",
Help: "Total number of query requests discarded.",
}, []string{"user"})
s.requestQueue = queue.NewRequestQueue(cfg.MaxOutstandingPerTenant, cfg.QuerierForgetDelay, s.queueLength, s.discardedRequests)
requestQueue: queue.NewRequestQueue(cfg.MaxOutstandingPerTenant, cfg.QuerierForgetDelay, queueMetrics),
}
s.queueDuration = promauto.With(registerer).NewHistogram(prometheus.HistogramOpts{
Name: "cortex_query_scheduler_queue_duration_seconds",

Loading…
Cancel
Save