mirror of https://github.com/grafana/loki
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
359 lines
13 KiB
359 lines
13 KiB
package queue
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/grafana/dskit/services"
|
|
"github.com/pkg/errors"
|
|
"go.uber.org/atomic"
|
|
)
|
|
|
|
const (
|
|
// How frequently to check for disconnected queriers that should be forgotten.
|
|
forgetCheckPeriod = 5 * time.Second
|
|
anyQueue = ""
|
|
)
|
|
|
|
var (
|
|
ErrTooManyRequests = errors.New("too many outstanding requests")
|
|
ErrStopped = errors.New("queue is stopped")
|
|
ErrQueueWasRemoved = errors.New("the queue has been removed or moved to another position")
|
|
)
|
|
|
|
// QueueIndex is opaque type that allows to resume iteration over tenants between successive calls
|
|
// of RequestQueue.GetNextRequestForQuerier method.
|
|
type QueueIndex int // nolint:revive
|
|
|
|
// StartIndexWithLocalQueue is the index of the queue that starts iteration over local and sub queues.
|
|
var StartIndexWithLocalQueue QueueIndex = -2
|
|
|
|
// StartIndex is the index of the queue that starts iteration over sub queues.
|
|
var StartIndex QueueIndex = -1
|
|
|
|
// Modify index to start iteration on the same tenant, for which last queue was returned.
|
|
func (ui QueueIndex) ReuseLastIndex() QueueIndex {
|
|
if ui < StartIndex {
|
|
return ui
|
|
}
|
|
return ui - 1
|
|
}
|
|
|
|
type Limits interface {
|
|
// MaxConsumers returns the max consumers to use per tenant or 0 to allow all consumers to consume from the queue.
|
|
MaxConsumers(user string, allConsumers int) int
|
|
}
|
|
|
|
// Request stored into the queue.
|
|
type Request any
|
|
|
|
// RequestChannel is a channel that queues Requests
|
|
type RequestChannel chan Request
|
|
|
|
// RequestQueue holds incoming requests in per-tenant queues. It also assigns each tenant specified number of queriers,
|
|
// and when querier asks for next request to handle (using GetNextRequestForQuerier), it returns requests
|
|
// in a fair fashion.
|
|
type RequestQueue struct {
|
|
services.Service
|
|
|
|
connectedConsumers *atomic.Int32
|
|
|
|
mtx sync.Mutex
|
|
cond contextCond // Notified when request is enqueued or dequeued, or querier is disconnected.
|
|
queues *tenantQueues
|
|
stopped bool
|
|
|
|
metrics *Metrics
|
|
pool *SlicePool[Request]
|
|
}
|
|
|
|
func NewRequestQueue(maxOutstandingPerTenant int, forgetDelay time.Duration, limits Limits, metrics *Metrics) *RequestQueue {
|
|
q := &RequestQueue{
|
|
queues: newTenantQueues(maxOutstandingPerTenant, forgetDelay, limits),
|
|
connectedConsumers: atomic.NewInt32(0),
|
|
metrics: metrics,
|
|
pool: NewSlicePool[Request](1<<6, 1<<10, 2), // Buckets are [64, 128, 256, 512, 1024].
|
|
}
|
|
|
|
q.cond = contextCond{Cond: sync.NewCond(&q.mtx)}
|
|
q.Service = services.NewTimerService(forgetCheckPeriod, nil, q.forgetDisconnectedConsumers, q.stopping).WithName("request queue")
|
|
|
|
return q
|
|
}
|
|
|
|
// Enqueue puts the request into the queue.
|
|
// If request is successfully enqueued, successFn is called with the lock held, before any querier can receive the request.
|
|
func (q *RequestQueue) Enqueue(tenant string, path []string, req Request, successFn func()) error {
|
|
q.mtx.Lock()
|
|
defer q.mtx.Unlock()
|
|
|
|
if q.stopped {
|
|
return ErrStopped
|
|
}
|
|
|
|
queue, err := q.queues.getOrAddQueue(tenant, path)
|
|
if err != nil {
|
|
return fmt.Errorf("no queue found: %w", err)
|
|
}
|
|
|
|
// Optimistically increase queue counter for tenant instead of doing separate
|
|
// get and set operations, because _most_ of the time the increased value is
|
|
// smaller than the max queue length.
|
|
// We need to keep track of queue length separately because the size of the
|
|
// buffered channel is the same across all sub-queues which would allow
|
|
// enqueuing more items than there are allowed at tenant level.
|
|
queueLen := q.queues.perUserQueueLen.Inc(tenant)
|
|
if queueLen > q.queues.maxUserQueueSize {
|
|
q.metrics.discardedRequests.WithLabelValues(tenant).Inc()
|
|
// decrement, because we already optimistically increased the counter
|
|
q.queues.perUserQueueLen.Dec(tenant)
|
|
return ErrTooManyRequests
|
|
}
|
|
|
|
select {
|
|
case queue.Chan() <- req:
|
|
q.metrics.queueLength.WithLabelValues(tenant).Inc()
|
|
q.metrics.enqueueCount.WithLabelValues(tenant, fmt.Sprint(len(path))).Inc()
|
|
q.cond.Broadcast()
|
|
// Call this function while holding a lock. This guarantees that no querier can fetch the request before function returns.
|
|
if successFn != nil {
|
|
successFn()
|
|
}
|
|
return nil
|
|
default:
|
|
q.metrics.discardedRequests.WithLabelValues(tenant).Inc()
|
|
// decrement, because we already optimistically increased the counter
|
|
q.queues.perUserQueueLen.Dec(tenant)
|
|
return ErrTooManyRequests
|
|
}
|
|
}
|
|
|
|
// ReleaseRequests returns items back to the slice pool.
|
|
// Must only be called in combination with DequeueMany().
|
|
func (q *RequestQueue) ReleaseRequests(items []Request) {
|
|
q.pool.Put(items)
|
|
}
|
|
|
|
// DequeueMany consumes multiple items for a single tenant from the queue.
|
|
// It blocks the execution until it dequeues at least 1 request and continue reading
|
|
// until it reaches `maxItems` requests or if no requests for this tenant are enqueued.
|
|
// The caller is responsible for returning the dequeued requests back to the
|
|
// pool by calling ReleaseRequests(items).
|
|
func (q *RequestQueue) DequeueMany(ctx context.Context, idx QueueIndex, consumerID string, maxItems int) ([]Request, QueueIndex, error) {
|
|
items := q.pool.Get(maxItems)
|
|
lastQueueName := anyQueue
|
|
for {
|
|
item, newIdx, newQueueName, isTenantQueueEmpty, err := q.dequeue(ctx, idx, lastQueueName, consumerID)
|
|
if err != nil {
|
|
// the consumer must receive the items if tenants queue is removed,
|
|
// even if it has collected less than `maxItems` requests.
|
|
if errors.Is(err, ErrQueueWasRemoved) {
|
|
err = nil
|
|
}
|
|
return items, newIdx, err
|
|
}
|
|
lastQueueName = newQueueName
|
|
items = append(items, item)
|
|
idx = newIdx.ReuseLastIndex()
|
|
if len(items) == maxItems || isTenantQueueEmpty {
|
|
return items, newIdx, nil
|
|
}
|
|
}
|
|
}
|
|
|
|
// Dequeue find next tenant queue and takes the next request off of it. Will block if there are no requests.
|
|
// By passing tenant index from previous call of this method, querier guarantees that it iterates over all tenants fairly.
|
|
// Even if the consumer used UserIndex.ReuseLastUser to fetch the request from the same tenant's queue, it does not provide
|
|
// any guaranties that the previously used queue is still at this position because another consumer could already read
|
|
// the last request and the queue could be removed and another queue is already placed at this position.
|
|
func (q *RequestQueue) Dequeue(ctx context.Context, last QueueIndex, consumerID string) (Request, QueueIndex, error) {
|
|
dequeue, queueIndex, _, _, err := q.dequeue(ctx, last, anyQueue, consumerID)
|
|
return dequeue, queueIndex, err
|
|
}
|
|
|
|
func (q *RequestQueue) dequeue(ctx context.Context, last QueueIndex, wantedQueueName string, consumerID string) (Request, QueueIndex, string, bool, error) {
|
|
q.mtx.Lock()
|
|
defer q.mtx.Unlock()
|
|
|
|
querierWait := false
|
|
|
|
FindQueue:
|
|
// We need to wait if there are no tenants, or no pending requests for given querier.
|
|
// However, if `wantedQueueName` is not empty, the caller must not be blocked because it wants to read exactly from that queue, not others.
|
|
for (q.queues.hasNoTenantQueues() || querierWait) && ctx.Err() == nil && !q.stopped && wantedQueueName == anyQueue {
|
|
querierWait = false
|
|
q.cond.Wait(ctx)
|
|
}
|
|
|
|
// If the current consumer wants to read from specific queue, but he does not have any queues available for him,
|
|
// return an error to notify that queue has been already removed.
|
|
if q.queues.hasNoTenantQueues() && wantedQueueName != anyQueue {
|
|
return nil, last, wantedQueueName, false, ErrQueueWasRemoved
|
|
}
|
|
|
|
if q.stopped {
|
|
return nil, last, wantedQueueName, false, ErrStopped
|
|
}
|
|
|
|
if err := ctx.Err(); err != nil {
|
|
return nil, last, wantedQueueName, false, err
|
|
}
|
|
|
|
queue, tenant, idx := q.queues.getNextQueueForConsumer(last, consumerID)
|
|
last = idx
|
|
if queue == nil {
|
|
// it can be a case the consumer has other tenants queues available for him,
|
|
// it allows the consumer to pass the wait block,
|
|
// but the queue with index `last+1` has been already removed,
|
|
// for example if another consumer has read the last request from that queue,
|
|
// and as long as this consumer wants to read from specific tenant queue,
|
|
// it's necessary to return `ErrQueueWasRemoved` error.
|
|
if wantedQueueName != anyQueue {
|
|
return nil, last, wantedQueueName, false, ErrQueueWasRemoved
|
|
}
|
|
// otherwise, if wantedQueueName is empty, then this consumer will go to the wait block again
|
|
// and as long as `last` index is updated, next time the consumer will request the queue
|
|
// with the new index that was returned from `getNextQueueForConsumer`.
|
|
// There are no unexpired requests, so we can get back
|
|
// and wait for more requests.
|
|
querierWait = true
|
|
goto FindQueue
|
|
}
|
|
|
|
if wantedQueueName != anyQueue && wantedQueueName != queue.Name() {
|
|
// it means that the consumer received another tenants queue because it was already removed
|
|
// or another queue is already at this index
|
|
return nil, last, queue.Name(), false, ErrQueueWasRemoved
|
|
}
|
|
// Pick next request from the queue.
|
|
request := queue.Dequeue()
|
|
isTenantQueueEmpty := queue.Len() == 0
|
|
if isTenantQueueEmpty {
|
|
q.queues.deleteQueue(tenant)
|
|
}
|
|
|
|
q.queues.perUserQueueLen.Dec(tenant)
|
|
q.metrics.queueLength.WithLabelValues(tenant).Dec()
|
|
|
|
// Tell close() we've processed a request.
|
|
q.cond.Broadcast()
|
|
|
|
return request, last, queue.Name(), isTenantQueueEmpty, nil
|
|
}
|
|
|
|
func (q *RequestQueue) forgetDisconnectedConsumers(_ context.Context) error {
|
|
q.mtx.Lock()
|
|
defer q.mtx.Unlock()
|
|
|
|
if q.queues.forgetDisconnectedConsumers(time.Now()) > 0 {
|
|
// We need to notify goroutines cause having removed some queriers
|
|
// may have caused a resharding.
|
|
q.cond.Broadcast()
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (q *RequestQueue) stopping(_ error) error {
|
|
q.mtx.Lock()
|
|
defer q.mtx.Unlock()
|
|
|
|
for !q.queues.hasNoTenantQueues() && q.connectedConsumers.Load() > 0 {
|
|
q.cond.Wait(context.Background())
|
|
}
|
|
|
|
// Only stop after dispatching enqueued requests.
|
|
q.stopped = true
|
|
|
|
// If there are still goroutines in GetNextRequestForQuerier method, they get notified.
|
|
q.cond.Broadcast()
|
|
|
|
return nil
|
|
}
|
|
|
|
func (q *RequestQueue) RegisterConsumerConnection(querier string) {
|
|
q.connectedConsumers.Inc()
|
|
|
|
q.mtx.Lock()
|
|
defer q.mtx.Unlock()
|
|
q.queues.addConsumerToConnection(querier)
|
|
}
|
|
|
|
func (q *RequestQueue) UnregisterConsumerConnection(querier string) {
|
|
q.connectedConsumers.Dec()
|
|
|
|
q.mtx.Lock()
|
|
defer q.mtx.Unlock()
|
|
q.queues.removeConsumerConnection(querier, time.Now())
|
|
}
|
|
|
|
func (q *RequestQueue) NotifyConsumerShutdown(querierID string) {
|
|
q.mtx.Lock()
|
|
defer q.mtx.Unlock()
|
|
q.queues.notifyQuerierShutdown(querierID)
|
|
}
|
|
|
|
func (q *RequestQueue) GetConnectedConsumersMetric() float64 {
|
|
return float64(q.connectedConsumers.Load())
|
|
}
|
|
|
|
// contextCond is a *sync.Cond with Wait() method overridden to support context-based waiting.
|
|
type contextCond struct {
|
|
*sync.Cond
|
|
|
|
// testHookBeforeWaiting is called before calling Cond.Wait() if it's not nil.
|
|
// Yes, it's ugly, but the http package settled jurisprudence:
|
|
// https://github.com/golang/go/blob/6178d25fc0b28724b1b5aec2b1b74fc06d9294c7/src/net/http/client.go#L596-L601
|
|
testHookBeforeWaiting func()
|
|
}
|
|
|
|
// Wait does c.cond.Wait() but will also return if the context provided is done.
|
|
// All the documentation of sync.Cond.Wait() applies, but it's especially important to remember that the mutex of
|
|
// the cond should be held while Wait() is called (and mutex will be held once it returns)
|
|
func (c contextCond) Wait(ctx context.Context) {
|
|
// "condWait" goroutine does q.cond.Wait() and signals through condWait channel.
|
|
condWait := make(chan struct{})
|
|
go func() {
|
|
if c.testHookBeforeWaiting != nil {
|
|
c.testHookBeforeWaiting()
|
|
}
|
|
c.Cond.Wait()
|
|
close(condWait)
|
|
}()
|
|
|
|
// "waiting" goroutine: signals that the condWait goroutine has started waiting.
|
|
// Notice that a closed waiting channel implies that the goroutine above has started waiting
|
|
// (because it has unlocked the mutex), but the other way is not true:
|
|
// - condWait it may have unlocked and is waiting, but someone else locked the mutex faster than us:
|
|
// in this case that caller will eventually unlock, and we'll be able to enter here.
|
|
// - condWait called Wait(), unlocked, received a broadcast and locked again faster than we were able to lock here:
|
|
// in this case condWait channel will be closed, and this goroutine will be waiting until we unlock.
|
|
waiting := make(chan struct{})
|
|
go func() {
|
|
c.L.Lock()
|
|
close(waiting)
|
|
c.L.Unlock()
|
|
}()
|
|
|
|
select {
|
|
case <-condWait:
|
|
// We don't know whether the waiting goroutine is done or not, but we don't care:
|
|
// it will be done once nobody is fighting for the mutex anymore.
|
|
case <-ctx.Done():
|
|
// In order to avoid leaking the condWait goroutine, we can send a broadcast.
|
|
// Before sending the broadcast we need to make sure that condWait goroutine is already waiting (or has already waited).
|
|
select {
|
|
case <-condWait:
|
|
// No need to broadcast as q.cond.Wait() has returned already.
|
|
return
|
|
case <-waiting:
|
|
// q.cond.Wait() might be still waiting (or maybe not!), so we'll poke it just in case.
|
|
c.Broadcast()
|
|
}
|
|
|
|
// Make sure we are not waiting anymore, we need to do that before returning as the caller will need to unlock the mutex.
|
|
<-condWait
|
|
}
|
|
}
|
|
|