mirror of https://github.com/grafana/loki
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
305 lines
7.7 KiB
305 lines
7.7 KiB
|
4 years ago
|
package queue
|
||
|
|
|
||
|
|
import (
|
||
|
|
"context"
|
||
|
|
"fmt"
|
||
|
|
"strconv"
|
||
|
|
"sync"
|
||
|
|
"testing"
|
||
|
|
"time"
|
||
|
|
|
||
|
|
"github.com/grafana/dskit/services"
|
||
|
|
"github.com/prometheus/client_golang/prometheus"
|
||
|
|
"github.com/stretchr/testify/assert"
|
||
|
|
"github.com/stretchr/testify/require"
|
||
|
|
)
|
||
|
|
|
||
|
|
func BenchmarkGetNextRequest(b *testing.B) {
|
||
|
|
const maxOutstandingPerTenant = 2
|
||
|
|
const numTenants = 50
|
||
|
|
const queriers = 5
|
||
|
|
|
||
|
|
queues := make([]*RequestQueue, 0, b.N)
|
||
|
|
|
||
|
|
for n := 0; n < b.N; n++ {
|
||
|
|
queue := NewRequestQueue(maxOutstandingPerTenant, 0,
|
||
|
|
prometheus.NewGaugeVec(prometheus.GaugeOpts{}, []string{"user"}),
|
||
|
|
prometheus.NewCounterVec(prometheus.CounterOpts{}, []string{"user"}),
|
||
|
|
)
|
||
|
|
queues = append(queues, queue)
|
||
|
|
|
||
|
|
for ix := 0; ix < queriers; ix++ {
|
||
|
|
queue.RegisterQuerierConnection(fmt.Sprintf("querier-%d", ix))
|
||
|
|
}
|
||
|
|
|
||
|
|
for i := 0; i < maxOutstandingPerTenant; i++ {
|
||
|
|
for j := 0; j < numTenants; j++ {
|
||
|
|
userID := strconv.Itoa(j)
|
||
|
|
|
||
|
|
err := queue.EnqueueRequest(userID, "request", 0, nil)
|
||
|
|
if err != nil {
|
||
|
|
b.Fatal(err)
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
ctx := context.Background()
|
||
|
|
b.ResetTimer()
|
||
|
|
|
||
|
|
for i := 0; i < b.N; i++ {
|
||
|
|
idx := FirstUser()
|
||
|
|
for j := 0; j < maxOutstandingPerTenant*numTenants; j++ {
|
||
|
|
querier := ""
|
||
|
|
b:
|
||
|
|
// Find querier with at least one request to avoid blocking in getNextRequestForQuerier.
|
||
|
|
for _, q := range queues[i].queues.userQueues {
|
||
|
|
for qid := range q.queriers {
|
||
|
|
querier = qid
|
||
|
|
break b
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
_, nidx, err := queues[i].GetNextRequestForQuerier(ctx, idx, querier)
|
||
|
|
if err != nil {
|
||
|
|
b.Fatal(err)
|
||
|
|
}
|
||
|
|
idx = nidx
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
func BenchmarkQueueRequest(b *testing.B) {
|
||
|
|
const maxOutstandingPerTenant = 2
|
||
|
|
const numTenants = 50
|
||
|
|
const queriers = 5
|
||
|
|
|
||
|
|
queues := make([]*RequestQueue, 0, b.N)
|
||
|
|
users := make([]string, 0, numTenants)
|
||
|
|
requests := make([]string, 0, numTenants)
|
||
|
|
|
||
|
|
for n := 0; n < b.N; n++ {
|
||
|
|
q := NewRequestQueue(maxOutstandingPerTenant, 0,
|
||
|
|
prometheus.NewGaugeVec(prometheus.GaugeOpts{}, []string{"user"}),
|
||
|
|
prometheus.NewCounterVec(prometheus.CounterOpts{}, []string{"user"}),
|
||
|
|
)
|
||
|
|
|
||
|
|
for ix := 0; ix < queriers; ix++ {
|
||
|
|
q.RegisterQuerierConnection(fmt.Sprintf("querier-%d", ix))
|
||
|
|
}
|
||
|
|
|
||
|
|
queues = append(queues, q)
|
||
|
|
|
||
|
|
for j := 0; j < numTenants; j++ {
|
||
|
|
requests = append(requests, fmt.Sprintf("%d-%d", n, j))
|
||
|
|
users = append(users, strconv.Itoa(j))
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
b.ResetTimer()
|
||
|
|
for n := 0; n < b.N; n++ {
|
||
|
|
for i := 0; i < maxOutstandingPerTenant; i++ {
|
||
|
|
for j := 0; j < numTenants; j++ {
|
||
|
|
err := queues[n].EnqueueRequest(users[j], requests[j], 0, nil)
|
||
|
|
if err != nil {
|
||
|
|
b.Fatal(err)
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
func TestRequestQueue_GetNextRequestForQuerier_ShouldGetRequestAfterReshardingBecauseQuerierHasBeenForgotten(t *testing.T) {
|
||
|
|
const forgetDelay = 3 * time.Second
|
||
|
|
|
||
|
|
queue := NewRequestQueue(1, forgetDelay,
|
||
|
|
prometheus.NewGaugeVec(prometheus.GaugeOpts{}, []string{"user"}),
|
||
|
|
prometheus.NewCounterVec(prometheus.CounterOpts{}, []string{"user"}))
|
||
|
|
|
||
|
|
// Start the queue service.
|
||
|
|
ctx := context.Background()
|
||
|
|
require.NoError(t, services.StartAndAwaitRunning(ctx, queue))
|
||
|
|
t.Cleanup(func() {
|
||
|
|
require.NoError(t, services.StopAndAwaitTerminated(ctx, queue))
|
||
|
|
})
|
||
|
|
|
||
|
|
// Two queriers connect.
|
||
|
|
queue.RegisterQuerierConnection("querier-1")
|
||
|
|
queue.RegisterQuerierConnection("querier-2")
|
||
|
|
|
||
|
|
// Querier-2 waits for a new request.
|
||
|
|
querier2wg := sync.WaitGroup{}
|
||
|
|
querier2wg.Add(1)
|
||
|
|
go func() {
|
||
|
|
defer querier2wg.Done()
|
||
|
|
_, _, err := queue.GetNextRequestForQuerier(ctx, FirstUser(), "querier-2")
|
||
|
|
require.NoError(t, err)
|
||
|
|
}()
|
||
|
|
|
||
|
|
// Querier-1 crashes (no graceful shutdown notification).
|
||
|
|
queue.UnregisterQuerierConnection("querier-1")
|
||
|
|
|
||
|
|
// Enqueue a request from an user which would be assigned to querier-1.
|
||
|
|
// NOTE: "user-1" hash falls in the querier-1 shard.
|
||
|
|
require.NoError(t, queue.EnqueueRequest("user-1", "request", 1, nil))
|
||
|
|
|
||
|
|
startTime := time.Now()
|
||
|
|
querier2wg.Wait()
|
||
|
|
waitTime := time.Since(startTime)
|
||
|
|
|
||
|
|
// We expect that querier-2 got the request only after querier-1 forget delay is passed.
|
||
|
|
assert.GreaterOrEqual(t, waitTime.Milliseconds(), forgetDelay.Milliseconds())
|
||
|
|
}
|
||
|
|
|
||
|
|
func TestContextCond(t *testing.T) {
|
||
|
|
t.Run("wait until broadcast", func(t *testing.T) {
|
||
|
|
t.Parallel()
|
||
|
|
mtx := &sync.Mutex{}
|
||
|
|
cond := contextCond{Cond: sync.NewCond(mtx)}
|
||
|
|
|
||
|
|
doneWaiting := make(chan struct{})
|
||
|
|
|
||
|
|
mtx.Lock()
|
||
|
|
go func() {
|
||
|
|
cond.Wait(context.Background())
|
||
|
|
mtx.Unlock()
|
||
|
|
close(doneWaiting)
|
||
|
|
}()
|
||
|
|
|
||
|
|
assertChanNotReceived(t, doneWaiting, 100*time.Millisecond, "cond.Wait returned, but it should not because we did not broadcast yet")
|
||
|
|
|
||
|
|
cond.Broadcast()
|
||
|
|
assertChanReceived(t, doneWaiting, 250*time.Millisecond, "cond.Wait did not return after broadcast")
|
||
|
|
})
|
||
|
|
|
||
|
|
t.Run("wait until context deadline", func(t *testing.T) {
|
||
|
|
t.Parallel()
|
||
|
|
mtx := &sync.Mutex{}
|
||
|
|
cond := contextCond{Cond: sync.NewCond(mtx)}
|
||
|
|
doneWaiting := make(chan struct{})
|
||
|
|
|
||
|
|
ctx, cancel := context.WithCancel(context.Background())
|
||
|
|
defer cancel()
|
||
|
|
|
||
|
|
mtx.Lock()
|
||
|
|
go func() {
|
||
|
|
cond.Wait(ctx)
|
||
|
|
mtx.Unlock()
|
||
|
|
close(doneWaiting)
|
||
|
|
}()
|
||
|
|
|
||
|
|
assertChanNotReceived(t, doneWaiting, 100*time.Millisecond, "cond.Wait returned, but it should not because we did not broadcast yet and didn't cancel the context")
|
||
|
|
|
||
|
|
cancel()
|
||
|
|
assertChanReceived(t, doneWaiting, 250*time.Millisecond, "cond.Wait did not return after cancelling the context")
|
||
|
|
})
|
||
|
|
|
||
|
|
t.Run("wait on already canceled context", func(t *testing.T) {
|
||
|
|
// This test represents the racy real world scenario,
|
||
|
|
// we don't know whether it's going to wait before the broadcast triggered by the context cancellation.
|
||
|
|
t.Parallel()
|
||
|
|
mtx := &sync.Mutex{}
|
||
|
|
cond := contextCond{Cond: sync.NewCond(mtx)}
|
||
|
|
doneWaiting := make(chan struct{})
|
||
|
|
|
||
|
|
alreadyCanceledContext, cancel := context.WithCancel(context.Background())
|
||
|
|
cancel()
|
||
|
|
|
||
|
|
mtx.Lock()
|
||
|
|
go func() {
|
||
|
|
cond.Wait(alreadyCanceledContext)
|
||
|
|
mtx.Unlock()
|
||
|
|
close(doneWaiting)
|
||
|
|
}()
|
||
|
|
|
||
|
|
assertChanReceived(t, doneWaiting, 250*time.Millisecond, "cond.Wait did not return after cancelling the context")
|
||
|
|
})
|
||
|
|
|
||
|
|
t.Run("wait on already canceled context, but it takes a while to wait", func(t *testing.T) {
|
||
|
|
t.Parallel()
|
||
|
|
mtx := &sync.Mutex{}
|
||
|
|
cond := contextCond{
|
||
|
|
Cond: sync.NewCond(mtx),
|
||
|
|
testHookBeforeWaiting: func() {
|
||
|
|
// This makes the waiting goroutine so slow that out Wait(ctx) will need to broadcast once it sees it waiting.
|
||
|
|
time.Sleep(250 * time.Millisecond)
|
||
|
|
},
|
||
|
|
}
|
||
|
|
doneWaiting := make(chan struct{})
|
||
|
|
|
||
|
|
alreadyCanceledContext, cancel := context.WithCancel(context.Background())
|
||
|
|
cancel()
|
||
|
|
|
||
|
|
mtx.Lock()
|
||
|
|
go func() {
|
||
|
|
cond.Wait(alreadyCanceledContext)
|
||
|
|
mtx.Unlock()
|
||
|
|
close(doneWaiting)
|
||
|
|
}()
|
||
|
|
|
||
|
|
assertChanReceived(t, doneWaiting, time.Second, "cond.Wait did not return after 500ms")
|
||
|
|
})
|
||
|
|
|
||
|
|
t.Run("lots of goroutines waiting at the same time, none of them misses it's broadcast from cancel", func(t *testing.T) {
|
||
|
|
t.Parallel()
|
||
|
|
mtx := &sync.Mutex{}
|
||
|
|
cond := contextCond{
|
||
|
|
Cond: sync.NewCond(mtx),
|
||
|
|
testHookBeforeWaiting: func() {
|
||
|
|
// Wait just a little bit to create every goroutine
|
||
|
|
time.Sleep(time.Millisecond)
|
||
|
|
},
|
||
|
|
}
|
||
|
|
const goroutines = 100
|
||
|
|
|
||
|
|
doneWaiting := make(chan struct{}, goroutines)
|
||
|
|
release := make(chan struct{})
|
||
|
|
|
||
|
|
ctx, cancel := context.WithCancel(context.Background())
|
||
|
|
cancel()
|
||
|
|
|
||
|
|
for i := 0; i < goroutines; i++ {
|
||
|
|
go func() {
|
||
|
|
<-release
|
||
|
|
|
||
|
|
mtx.Lock()
|
||
|
|
cond.Wait(ctx)
|
||
|
|
mtx.Unlock()
|
||
|
|
|
||
|
|
doneWaiting <- struct{}{}
|
||
|
|
}()
|
||
|
|
}
|
||
|
|
go func() {
|
||
|
|
<-release
|
||
|
|
cancel()
|
||
|
|
}()
|
||
|
|
|
||
|
|
close(release)
|
||
|
|
|
||
|
|
assert.Eventually(t, func() bool {
|
||
|
|
return len(doneWaiting) == goroutines
|
||
|
|
}, time.Second, 10*time.Millisecond)
|
||
|
|
})
|
||
|
|
}
|
||
|
|
|
||
|
|
func assertChanReceived(t *testing.T, c chan struct{}, timeout time.Duration, msg string) {
|
||
|
|
t.Helper()
|
||
|
|
|
||
|
|
select {
|
||
|
|
case <-c:
|
||
|
|
case <-time.After(timeout):
|
||
|
|
t.Fatalf(msg)
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
func assertChanNotReceived(t *testing.T, c chan struct{}, wait time.Duration, msg string, args ...interface{}) {
|
||
|
|
t.Helper()
|
||
|
|
|
||
|
|
select {
|
||
|
|
case <-c:
|
||
|
|
t.Fatalf(msg, args...)
|
||
|
|
case <-time.After(wait):
|
||
|
|
// OK!
|
||
|
|
}
|
||
|
|
}
|