Like Prometheus, but for logs.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 
loki/pkg/queue/tenant_queues_test.go

527 lines
16 KiB

// SPDX-License-Identifier: AGPL-3.0-only
// Provenance-includes-location: https://github.com/cortexproject/cortex/blob/master/pkg/scheduler/queue/user_queues_test.go
// Provenance-includes-license: Apache-2.0
// Provenance-includes-copyright: The Cortex Authors.
package queue
import (
"fmt"
"math"
"math/rand"
"sort"
"testing"
"time"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestQueues(t *testing.T) {
uq := newTenantQueues(0, 0)
assert.NotNil(t, uq)
assert.NoError(t, isConsistent(uq))
uq.addConsumerToConnection("querier-1")
uq.addConsumerToConnection("querier-2")
q, u, lastUserIndex := uq.getNextQueueForConsumer(-1, "querier-1")
assert.Nil(t, q)
assert.Equal(t, "", u)
// Add queues: [one]
qOne := getOrAdd(t, uq, "one", 0)
lastUserIndex = confirmOrderForQuerier(t, uq, "querier-1", lastUserIndex, qOne, qOne)
// [one two]
qTwo := getOrAdd(t, uq, "two", 0)
assert.NotEqual(t, qOne, qTwo)
lastUserIndex = confirmOrderForQuerier(t, uq, "querier-1", lastUserIndex, qTwo, qOne, qTwo, qOne)
confirmOrderForQuerier(t, uq, "querier-2", -1, qOne, qTwo, qOne)
// [one two three]
// confirm fifo by adding a third queue and iterating to it
qThree := getOrAdd(t, uq, "three", 0)
lastUserIndex = confirmOrderForQuerier(t, uq, "querier-1", lastUserIndex, qTwo, qThree, qOne)
// Remove one: ["" two three]
uq.deleteQueue("one")
assert.NoError(t, isConsistent(uq))
lastUserIndex = confirmOrderForQuerier(t, uq, "querier-1", lastUserIndex, qTwo, qThree, qTwo)
// "four" is added at the beginning of the list: [four two three]
qFour := getOrAdd(t, uq, "four", 0)
lastUserIndex = confirmOrderForQuerier(t, uq, "querier-1", lastUserIndex, qThree, qFour, qTwo, qThree)
// Remove two: [four "" three]
uq.deleteQueue("two")
assert.NoError(t, isConsistent(uq))
lastUserIndex = confirmOrderForQuerier(t, uq, "querier-1", lastUserIndex, qFour, qThree, qFour)
// Remove three: [four]
uq.deleteQueue("three")
assert.NoError(t, isConsistent(uq))
// Remove four: []
uq.deleteQueue("four")
assert.NoError(t, isConsistent(uq))
q, _, _ = uq.getNextQueueForConsumer(lastUserIndex, "querier-1")
assert.Nil(t, q)
}
func TestQueuesOnTerminatingQuerier(t *testing.T) {
uq := newTenantQueues(0, 0)
assert.NotNil(t, uq)
assert.NoError(t, isConsistent(uq))
uq.addConsumerToConnection("querier-1")
uq.addConsumerToConnection("querier-2")
// Add queues: [one, two]
qOne := getOrAdd(t, uq, "one", 0)
qTwo := getOrAdd(t, uq, "two", 0)
confirmOrderForQuerier(t, uq, "querier-1", -1, qOne, qTwo, qOne, qTwo)
confirmOrderForQuerier(t, uq, "querier-2", -1, qOne, qTwo, qOne, qTwo)
// After notify shutdown for querier-2, it's expected to own no queue.
uq.notifyQuerierShutdown("querier-2")
q, u, _ := uq.getNextQueueForConsumer(-1, "querier-2")
assert.Nil(t, q)
assert.Equal(t, "", u)
// However, querier-1 still get queues because it's still running.
confirmOrderForQuerier(t, uq, "querier-1", -1, qOne, qTwo, qOne, qTwo)
// After disconnecting querier-2, it's expected to own no queue.
uq.removeConsumer("querier-2")
q, u, _ = uq.getNextQueueForConsumer(-1, "querier-2")
assert.Nil(t, q)
assert.Equal(t, "", u)
}
func TestQueuesWithQueriers(t *testing.T) {
uq := newTenantQueues(0, 0)
assert.NotNil(t, uq)
assert.NoError(t, isConsistent(uq))
queriers := 30
users := 1000
maxQueriersPerUser := 5
// Add some queriers.
for ix := 0; ix < queriers; ix++ {
qid := fmt.Sprintf("querier-%d", ix)
uq.addConsumerToConnection(qid)
// No querier has any queues yet.
q, u, _ := uq.getNextQueueForConsumer(-1, qid)
assert.Nil(t, q)
assert.Equal(t, "", u)
}
assert.NoError(t, isConsistent(uq))
// Add user queues.
for u := 0; u < users; u++ {
uid := fmt.Sprintf("user-%d", u)
getOrAdd(t, uq, uid, maxQueriersPerUser)
// Verify it has maxQueriersPerUser queriers assigned now.
qs := uq.mapping.GetByKey(uid).consumers
assert.Equal(t, maxQueriersPerUser, len(qs))
}
// After adding all users, verify results. For each querier, find out how many different users it handles,
// and compute mean and stdDev.
queriersMap := make(map[string]int)
for q := 0; q < queriers; q++ {
qid := fmt.Sprintf("querier-%d", q)
lastUserIndex := StartIndex
for {
_, _, newIx := uq.getNextQueueForConsumer(lastUserIndex, qid)
if newIx < lastUserIndex {
break
}
lastUserIndex = newIx
queriersMap[qid]++
}
}
mean := float64(0)
for _, c := range queriersMap {
mean += float64(c)
}
mean = mean / float64(len(queriersMap))
stdDev := float64(0)
for _, c := range queriersMap {
d := float64(c) - mean
stdDev += (d * d)
}
stdDev = math.Sqrt(stdDev / float64(len(queriersMap)))
t.Log("mean:", mean, "stddev:", stdDev)
assert.InDelta(t, users*maxQueriersPerUser/queriers, mean, 1)
assert.InDelta(t, stdDev, 0, mean*0.2)
}
func TestQueuesConsistency(t *testing.T) {
tests := map[string]struct {
forgetDelay time.Duration
}{
"without forget delay": {},
"with forget delay": {forgetDelay: time.Minute},
}
for testName, testData := range tests {
t.Run(testName, func(t *testing.T) {
uq := newTenantQueues(0, testData.forgetDelay)
assert.NotNil(t, uq)
assert.NoError(t, isConsistent(uq))
r := rand.New(rand.NewSource(time.Now().Unix()))
lastUserIndexes := map[string]QueueIndex{}
conns := map[string]int{}
for i := 0; i < 10000; i++ {
switch r.Int() % 6 {
case 0:
assert.NotNil(t, uq.getOrAddQueue(generateTenant(r), generateActor(r), 3))
case 1:
qid := generateQuerier(r)
_, _, luid := uq.getNextQueueForConsumer(lastUserIndexes[qid], qid)
lastUserIndexes[qid] = luid
case 2:
uq.deleteQueue(generateTenant(r))
case 3:
q := generateQuerier(r)
uq.addConsumerToConnection(q)
conns[q]++
case 4:
q := generateQuerier(r)
if conns[q] > 0 {
uq.removeConsumerConnection(q, time.Now())
conns[q]--
}
case 5:
q := generateQuerier(r)
uq.notifyQuerierShutdown(q)
}
assert.NoErrorf(t, isConsistent(uq), "last action %d", i)
}
})
}
}
func TestQueues_ForgetDelay(t *testing.T) {
const (
forgetDelay = time.Minute
maxQueriersPerUser = 1
numUsers = 100
)
now := time.Now()
uq := newTenantQueues(0, forgetDelay)
assert.NotNil(t, uq)
assert.NoError(t, isConsistent(uq))
// 3 queriers open 2 connections each.
for i := 1; i <= 3; i++ {
uq.addConsumerToConnection(fmt.Sprintf("querier-%d", i))
uq.addConsumerToConnection(fmt.Sprintf("querier-%d", i))
}
// Add user queues.
for i := 0; i < numUsers; i++ {
userID := fmt.Sprintf("user-%d", i)
getOrAdd(t, uq, userID, maxQueriersPerUser)
}
// We expect querier-1 to have some users.
querier1Users := getUsersByQuerier(uq, "querier-1")
require.NotEmpty(t, querier1Users)
// Gracefully shutdown querier-1.
uq.removeConsumerConnection("querier-1", now.Add(20*time.Second))
uq.removeConsumerConnection("querier-1", now.Add(21*time.Second))
uq.notifyQuerierShutdown("querier-1")
// We expect querier-1 has been removed.
assert.NotContains(t, uq.consumers, "querier-1")
assert.NoError(t, isConsistent(uq))
// We expect querier-1 users have been shuffled to other queriers.
for _, userID := range querier1Users {
assert.Contains(t, append(getUsersByQuerier(uq, "querier-2"), getUsersByQuerier(uq, "querier-3")...), userID)
}
// Querier-1 reconnects.
uq.addConsumerToConnection("querier-1")
uq.addConsumerToConnection("querier-1")
// We expect the initial querier-1 users have got back to querier-1.
for _, userID := range querier1Users {
assert.Contains(t, getUsersByQuerier(uq, "querier-1"), userID)
assert.NotContains(t, getUsersByQuerier(uq, "querier-2"), userID)
assert.NotContains(t, getUsersByQuerier(uq, "querier-3"), userID)
}
// Querier-1 abruptly terminates (no shutdown notification received).
uq.removeConsumerConnection("querier-1", now.Add(40*time.Second))
uq.removeConsumerConnection("querier-1", now.Add(41*time.Second))
// We expect querier-1 has NOT been removed.
assert.Contains(t, uq.consumers, "querier-1")
assert.NoError(t, isConsistent(uq))
// We expect the querier-1 users have not been shuffled to other queriers.
for _, userID := range querier1Users {
assert.Contains(t, getUsersByQuerier(uq, "querier-1"), userID)
assert.NotContains(t, getUsersByQuerier(uq, "querier-2"), userID)
assert.NotContains(t, getUsersByQuerier(uq, "querier-3"), userID)
}
// Try to forget disconnected queriers, but querier-1 forget delay hasn't passed yet.
uq.forgetDisconnectedConsumers(now.Add(90 * time.Second))
assert.Contains(t, uq.consumers, "querier-1")
assert.NoError(t, isConsistent(uq))
for _, userID := range querier1Users {
assert.Contains(t, getUsersByQuerier(uq, "querier-1"), userID)
assert.NotContains(t, getUsersByQuerier(uq, "querier-2"), userID)
assert.NotContains(t, getUsersByQuerier(uq, "querier-3"), userID)
}
// Try to forget disconnected queriers. This time querier-1 forget delay has passed.
uq.forgetDisconnectedConsumers(now.Add(105 * time.Second))
assert.NotContains(t, uq.consumers, "querier-1")
assert.NoError(t, isConsistent(uq))
// We expect querier-1 users have been shuffled to other queriers.
for _, userID := range querier1Users {
assert.Contains(t, append(getUsersByQuerier(uq, "querier-2"), getUsersByQuerier(uq, "querier-3")...), userID)
}
}
func TestQueues_ForgetDelay_ShouldCorrectlyHandleQuerierReconnectingBeforeForgetDelayIsPassed(t *testing.T) {
const (
forgetDelay = time.Minute
maxQueriersPerUser = 1
numUsers = 100
)
now := time.Now()
uq := newTenantQueues(0, forgetDelay)
assert.NotNil(t, uq)
assert.NoError(t, isConsistent(uq))
// 3 queriers open 2 connections each.
for i := 1; i <= 3; i++ {
uq.addConsumerToConnection(fmt.Sprintf("querier-%d", i))
uq.addConsumerToConnection(fmt.Sprintf("querier-%d", i))
}
// Add user queues.
for i := 0; i < numUsers; i++ {
userID := fmt.Sprintf("user-%d", i)
getOrAdd(t, uq, userID, maxQueriersPerUser)
}
// We expect querier-1 to have some users.
querier1Users := getUsersByQuerier(uq, "querier-1")
require.NotEmpty(t, querier1Users)
// Querier-1 abruptly terminates (no shutdown notification received).
uq.removeConsumerConnection("querier-1", now.Add(40*time.Second))
uq.removeConsumerConnection("querier-1", now.Add(41*time.Second))
// We expect querier-1 has NOT been removed.
assert.Contains(t, uq.consumers, "querier-1")
assert.NoError(t, isConsistent(uq))
// We expect the querier-1 users have not been shuffled to other queriers.
for _, userID := range querier1Users {
assert.Contains(t, getUsersByQuerier(uq, "querier-1"), userID)
assert.NotContains(t, getUsersByQuerier(uq, "querier-2"), userID)
assert.NotContains(t, getUsersByQuerier(uq, "querier-3"), userID)
}
// Try to forget disconnected queriers, but querier-1 forget delay hasn't passed yet.
uq.forgetDisconnectedConsumers(now.Add(90 * time.Second))
// Querier-1 reconnects.
uq.addConsumerToConnection("querier-1")
uq.addConsumerToConnection("querier-1")
assert.Contains(t, uq.consumers, "querier-1")
assert.NoError(t, isConsistent(uq))
// We expect the querier-1 users have not been shuffled to other queriers.
for _, userID := range querier1Users {
assert.Contains(t, getUsersByQuerier(uq, "querier-1"), userID)
assert.NotContains(t, getUsersByQuerier(uq, "querier-2"), userID)
assert.NotContains(t, getUsersByQuerier(uq, "querier-3"), userID)
}
// Try to forget disconnected queriers far in the future, but there's no disconnected querier.
uq.forgetDisconnectedConsumers(now.Add(200 * time.Second))
assert.Contains(t, uq.consumers, "querier-1")
assert.NoError(t, isConsistent(uq))
for _, userID := range querier1Users {
assert.Contains(t, getUsersByQuerier(uq, "querier-1"), userID)
assert.NotContains(t, getUsersByQuerier(uq, "querier-2"), userID)
assert.NotContains(t, getUsersByQuerier(uq, "querier-3"), userID)
}
}
func generateActor(r *rand.Rand) []string {
return []string{fmt.Sprint("actor-", r.Int()%10)}
}
func generateTenant(r *rand.Rand) string {
return fmt.Sprint("tenant-", r.Int()%5)
}
func generateQuerier(r *rand.Rand) string {
return fmt.Sprint("querier-", r.Int()%5)
}
func getOrAdd(t *testing.T, uq *tenantQueues, tenant string, maxQueriers int) Queue {
actor := []string{}
q := uq.getOrAddQueue(tenant, actor, maxQueriers)
assert.NotNil(t, q)
assert.NoError(t, isConsistent(uq))
assert.Equal(t, q, uq.getOrAddQueue(tenant, actor, maxQueriers))
return q
}
func confirmOrderForQuerier(t *testing.T, uq *tenantQueues, querier string, lastUserIndex QueueIndex, qs ...Queue) QueueIndex {
t.Helper()
var n Queue
for _, q := range qs {
n, _, lastUserIndex = uq.getNextQueueForConsumer(lastUserIndex, querier)
assert.Equal(t, q, n)
assert.NoError(t, isConsistent(uq))
}
return lastUserIndex
}
func isConsistent(uq *tenantQueues) error {
if len(uq.sortedConsumers) != len(uq.consumers) {
return fmt.Errorf("inconsistent number of sorted queriers and querier connections")
}
uc := 0
for _, u := range uq.mapping.Keys() {
q := uq.mapping.GetByKey(u)
if u != empty && q == nil {
return fmt.Errorf("user %s doesn't have queue", u)
}
if u == empty && q != nil {
return fmt.Errorf("user %s shouldn't have queue", u)
}
if u == empty {
continue
}
uc++
if q.maxQueriers == 0 && q.consumers != nil {
return fmt.Errorf("user %s has queriers, but maxQueriers=0", u)
}
if q.maxQueriers > 0 && len(uq.sortedConsumers) <= q.maxQueriers && q.consumers != nil {
return fmt.Errorf("user %s has queriers set despite not enough queriers available", u)
}
if q.maxQueriers > 0 && len(uq.sortedConsumers) > q.maxQueriers && len(q.consumers) != q.maxQueriers {
return fmt.Errorf("user %s has incorrect number of queriers, expected=%d, got=%d", u, len(q.consumers), q.maxQueriers)
}
}
if uc != uq.mapping.Len() {
return fmt.Errorf("inconsistent number of users list and user queues")
}
return nil
}
// getUsersByQuerier returns the list of users handled by the provided querierID.
func getUsersByQuerier(queues *tenantQueues, querierID string) []string {
var userIDs []string
for _, userID := range queues.mapping.Keys() {
q := queues.mapping.GetByKey(userID)
if q.consumers == nil {
// If it's nil then all queriers can handle this user.
userIDs = append(userIDs, userID)
continue
}
if _, ok := q.consumers[querierID]; ok {
userIDs = append(userIDs, userID)
}
}
return userIDs
}
func TestShuffleQueriers(t *testing.T) {
allQueriers := []string{"a", "b", "c", "d", "e"}
require.Nil(t, shuffleConsumersForTenants(12345, 10, allQueriers, nil))
require.Nil(t, shuffleConsumersForTenants(12345, len(allQueriers), allQueriers, nil))
r1 := shuffleConsumersForTenants(12345, 3, allQueriers, nil)
require.Equal(t, 3, len(r1))
// Same input produces same output.
r2 := shuffleConsumersForTenants(12345, 3, allQueriers, nil)
require.Equal(t, 3, len(r2))
require.Equal(t, r1, r2)
}
func TestShuffleQueriersCorrectness(t *testing.T) {
const queriersCount = 100
var allSortedQueriers []string
for i := 0; i < queriersCount; i++ {
allSortedQueriers = append(allSortedQueriers, fmt.Sprintf("%d", i))
}
sort.Strings(allSortedQueriers)
r := rand.New(rand.NewSource(time.Now().UnixNano()))
const tests = 1000
for i := 0; i < tests; i++ {
toSelect := r.Intn(queriersCount)
if toSelect == 0 {
toSelect = 3
}
selected := shuffleConsumersForTenants(r.Int63(), toSelect, allSortedQueriers, nil)
require.Equal(t, toSelect, len(selected))
sort.Strings(allSortedQueriers)
prevQuerier := ""
for _, q := range allSortedQueriers {
require.True(t, prevQuerier < q, "non-unique querier")
prevQuerier = q
ix := sort.SearchStrings(allSortedQueriers, q)
require.True(t, ix < len(allSortedQueriers) && allSortedQueriers[ix] == q, "selected querier is not between all queriers")
}
}
}