Like Prometheus, but for logs.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
loki/pkg/querier/worker/processor_manager.go

90 lines
1.9 KiB

package worker
import (
"context"
"strconv"
"sync"
"time"
"github.com/pkg/errors"
"go.uber.org/atomic"
"google.golang.org/grpc"
)
const (
notifyShutdownTimeout = 5 * time.Second
)
// Manages processor goroutines for single grpc connection.
type processorManager struct {
p processor
conn *grpc.ClientConn
address string
// Main context to control all goroutines.
ctx context.Context
wg sync.WaitGroup
// Cancel functions for individual goroutines.
cancelsMu sync.Mutex
cancels []context.CancelFunc
currentProcessors *atomic.Int32
}
func newProcessorManager(ctx context.Context, p processor, conn *grpc.ClientConn, address string) *processorManager {
return &processorManager{
p: p,
ctx: ctx,
conn: conn,
address: address,
currentProcessors: atomic.NewInt32(0),
}
}
func (pm *processorManager) stop() {
// Notify the remote query-frontend or query-scheduler we're shutting down.
// We use a new context to make sure it's not cancelled.
notifyCtx, cancel := context.WithTimeoutCause(context.Background(), notifyShutdownTimeout, errors.New("notify shutdown timeout reached"))
defer cancel()
pm.p.notifyShutdown(notifyCtx, pm.conn, pm.address)
// Stop all goroutines.
pm.concurrency(0)
// Wait until they finish.
pm.wg.Wait()
_ = pm.conn.Close()
}
func (pm *processorManager) concurrency(n int) {
pm.cancelsMu.Lock()
defer pm.cancelsMu.Unlock()
if n < 0 {
n = 0
}
for len(pm.cancels) < n {
Cleanup data race associated with workerID var (#11922) **What this PR does / why we need it**: A data race existed with the workerID variable, as it could be modified by multiple goroutines. Relates to: https://github.com/grafana/loki/issues/8586 -- Before fix: ``` go test -count=1 -race ./pkg/querier/worker ================== WARNING: DATA RACE Read at 0x00c000494108 by goroutine 229: github.com/grafana/loki/pkg/querier/worker.(*processorManager).concurrency.func1() /Users/progers/dev/src/github.com/grafana/loki/pkg/querier/worker/processor_manager.go:81 +0x118 Previous write at 0x00c000494108 by goroutine 222: github.com/grafana/loki/pkg/querier/worker.(*processorManager).concurrency() /Users/progers/dev/src/github.com/grafana/loki/pkg/querier/worker/processor_manager.go:70 +0x108 github.com/grafana/loki/pkg/querier/worker.(*querierWorker).resetConcurrency() /Users/progers/dev/src/github.com/grafana/loki/pkg/querier/worker/worker.go:267 +0x10c github.com/grafana/loki/pkg/querier/worker.(*querierWorker).AddressAdded() /Users/progers/dev/src/github.com/grafana/loki/pkg/querier/worker/worker.go:219 +0x868 github.com/grafana/loki/pkg/querier/worker.TestResetConcurrency.func1() /Users/progers/dev/src/github.com/grafana/loki/pkg/querier/worker/worker_test.go:64 +0x1c8 testing.tRunner() /opt/homebrew/Cellar/go/1.21.6/libexec/src/testing/testing.go:1595 +0x1b0 testing.(*T).Run.func1() /opt/homebrew/Cellar/go/1.21.6/libexec/src/testing/testing.go:1648 +0x40 Goroutine 229 (running) created at: github.com/grafana/loki/pkg/querier/worker.(*processorManager).concurrency() /Users/progers/dev/src/github.com/grafana/loki/pkg/querier/worker/processor_manager.go:75 +0xcc github.com/grafana/loki/pkg/querier/worker.(*querierWorker).resetConcurrency() /Users/progers/dev/src/github.com/grafana/loki/pkg/querier/worker/worker.go:267 +0x10c github.com/grafana/loki/pkg/querier/worker.(*querierWorker).AddressAdded() /Users/progers/dev/src/github.com/grafana/loki/pkg/querier/worker/worker.go:219 +0x868 github.com/grafana/loki/pkg/querier/worker.TestResetConcurrency.func1() /Users/progers/dev/src/github.com/grafana/loki/pkg/querier/worker/worker_test.go:64 +0x1c8 testing.tRunner() /opt/homebrew/Cellar/go/1.21.6/libexec/src/testing/testing.go:1595 +0x1b0 testing.(*T).Run.func1() /opt/homebrew/Cellar/go/1.21.6/libexec/src/testing/testing.go:1648 +0x40 Goroutine 222 (running) created at: testing.(*T).Run() /opt/homebrew/Cellar/go/1.21.6/libexec/src/testing/testing.go:1648 +0x5e8 github.com/grafana/loki/pkg/querier/worker.TestResetConcurrency() /Users/progers/dev/src/github.com/grafana/loki/pkg/querier/worker/worker_test.go:52 +0x1b0 testing.tRunner() /opt/homebrew/Cellar/go/1.21.6/libexec/src/testing/testing.go:1595 +0x1b0 testing.(*T).Run.func1() /opt/homebrew/Cellar/go/1.21.6/libexec/src/testing/testing.go:1648 +0x40 ================== --- FAIL: TestResetConcurrency (0.02s) --- FAIL: TestResetConcurrency/concurrency_is_correct_when_numTargets_does_not_divide_evenly_into_maxConcurrent (0.01s) testing.go:1465: race detected during execution of test testing.go:1465: race detected during execution of test FAIL FAIL github.com/grafana/loki/pkg/querier/worker 4.626s FAIL ``` -- After fix: ``` go clean -testcache go test -count=1 -race ./pkg/querier/worker ok github.com/grafana/loki/pkg/querier/worker 6.034s ``` **Which issue(s) this PR fixes**: Fixes #<issue number> **Special notes for your reviewer**: **Checklist** - [ ] Reviewed the [`CONTRIBUTING.md`](https://github.com/grafana/loki/blob/main/CONTRIBUTING.md) guide (**required**) - [ ] Documentation added - [ ] Tests updated - [ ] `CHANGELOG.md` updated - [ ] If the change is worth mentioning in the release notes, add `add-to-release-notes` label - [ ] Changes that require user attention or interaction to upgrade are documented in `docs/sources/setup/upgrade/_index.md` - [ ] For Helm chart changes bump the Helm chart version in `production/helm/loki/Chart.yaml` and update `production/helm/loki/CHANGELOG.md` and `production/helm/loki/README.md`. [Example PR](https://github.com/grafana/loki/commit/d10549e3ece02120974929894ee333d07755d213) - [ ] If the change is deprecating or removing a configuration option, update the `deprecated-config.yaml` and `deleted-config.yaml` files respectively in the `tools/deprecated-config-checker` directory. [Example PR](https://github.com/grafana/loki/pull/10840/commits/0d4416a4b03739583349934b96f272fb4f685d15)
2 years ago
workerID := len(pm.cancels) + 1
ctx, cancel := context.WithCancel(pm.ctx)
pm.cancels = append(pm.cancels, cancel)
pm.wg.Add(1)
Cleanup data race associated with workerID var (#11922) **What this PR does / why we need it**: A data race existed with the workerID variable, as it could be modified by multiple goroutines. Relates to: https://github.com/grafana/loki/issues/8586 -- Before fix: ``` go test -count=1 -race ./pkg/querier/worker ================== WARNING: DATA RACE Read at 0x00c000494108 by goroutine 229: github.com/grafana/loki/pkg/querier/worker.(*processorManager).concurrency.func1() /Users/progers/dev/src/github.com/grafana/loki/pkg/querier/worker/processor_manager.go:81 +0x118 Previous write at 0x00c000494108 by goroutine 222: github.com/grafana/loki/pkg/querier/worker.(*processorManager).concurrency() /Users/progers/dev/src/github.com/grafana/loki/pkg/querier/worker/processor_manager.go:70 +0x108 github.com/grafana/loki/pkg/querier/worker.(*querierWorker).resetConcurrency() /Users/progers/dev/src/github.com/grafana/loki/pkg/querier/worker/worker.go:267 +0x10c github.com/grafana/loki/pkg/querier/worker.(*querierWorker).AddressAdded() /Users/progers/dev/src/github.com/grafana/loki/pkg/querier/worker/worker.go:219 +0x868 github.com/grafana/loki/pkg/querier/worker.TestResetConcurrency.func1() /Users/progers/dev/src/github.com/grafana/loki/pkg/querier/worker/worker_test.go:64 +0x1c8 testing.tRunner() /opt/homebrew/Cellar/go/1.21.6/libexec/src/testing/testing.go:1595 +0x1b0 testing.(*T).Run.func1() /opt/homebrew/Cellar/go/1.21.6/libexec/src/testing/testing.go:1648 +0x40 Goroutine 229 (running) created at: github.com/grafana/loki/pkg/querier/worker.(*processorManager).concurrency() /Users/progers/dev/src/github.com/grafana/loki/pkg/querier/worker/processor_manager.go:75 +0xcc github.com/grafana/loki/pkg/querier/worker.(*querierWorker).resetConcurrency() /Users/progers/dev/src/github.com/grafana/loki/pkg/querier/worker/worker.go:267 +0x10c github.com/grafana/loki/pkg/querier/worker.(*querierWorker).AddressAdded() /Users/progers/dev/src/github.com/grafana/loki/pkg/querier/worker/worker.go:219 +0x868 github.com/grafana/loki/pkg/querier/worker.TestResetConcurrency.func1() /Users/progers/dev/src/github.com/grafana/loki/pkg/querier/worker/worker_test.go:64 +0x1c8 testing.tRunner() /opt/homebrew/Cellar/go/1.21.6/libexec/src/testing/testing.go:1595 +0x1b0 testing.(*T).Run.func1() /opt/homebrew/Cellar/go/1.21.6/libexec/src/testing/testing.go:1648 +0x40 Goroutine 222 (running) created at: testing.(*T).Run() /opt/homebrew/Cellar/go/1.21.6/libexec/src/testing/testing.go:1648 +0x5e8 github.com/grafana/loki/pkg/querier/worker.TestResetConcurrency() /Users/progers/dev/src/github.com/grafana/loki/pkg/querier/worker/worker_test.go:52 +0x1b0 testing.tRunner() /opt/homebrew/Cellar/go/1.21.6/libexec/src/testing/testing.go:1595 +0x1b0 testing.(*T).Run.func1() /opt/homebrew/Cellar/go/1.21.6/libexec/src/testing/testing.go:1648 +0x40 ================== --- FAIL: TestResetConcurrency (0.02s) --- FAIL: TestResetConcurrency/concurrency_is_correct_when_numTargets_does_not_divide_evenly_into_maxConcurrent (0.01s) testing.go:1465: race detected during execution of test testing.go:1465: race detected during execution of test FAIL FAIL github.com/grafana/loki/pkg/querier/worker 4.626s FAIL ``` -- After fix: ``` go clean -testcache go test -count=1 -race ./pkg/querier/worker ok github.com/grafana/loki/pkg/querier/worker 6.034s ``` **Which issue(s) this PR fixes**: Fixes #<issue number> **Special notes for your reviewer**: **Checklist** - [ ] Reviewed the [`CONTRIBUTING.md`](https://github.com/grafana/loki/blob/main/CONTRIBUTING.md) guide (**required**) - [ ] Documentation added - [ ] Tests updated - [ ] `CHANGELOG.md` updated - [ ] If the change is worth mentioning in the release notes, add `add-to-release-notes` label - [ ] Changes that require user attention or interaction to upgrade are documented in `docs/sources/setup/upgrade/_index.md` - [ ] For Helm chart changes bump the Helm chart version in `production/helm/loki/Chart.yaml` and update `production/helm/loki/CHANGELOG.md` and `production/helm/loki/README.md`. [Example PR](https://github.com/grafana/loki/commit/d10549e3ece02120974929894ee333d07755d213) - [ ] If the change is deprecating or removing a configuration option, update the `deprecated-config.yaml` and `deleted-config.yaml` files respectively in the `tools/deprecated-config-checker` directory. [Example PR](https://github.com/grafana/loki/pull/10840/commits/0d4416a4b03739583349934b96f272fb4f685d15)
2 years ago
go func(workerID int) {
defer pm.wg.Done()
pm.currentProcessors.Inc()
defer pm.currentProcessors.Dec()
pm.p.processQueriesOnSingleStream(ctx, pm.conn, pm.address, strconv.Itoa(workerID))
Cleanup data race associated with workerID var (#11922) **What this PR does / why we need it**: A data race existed with the workerID variable, as it could be modified by multiple goroutines. Relates to: https://github.com/grafana/loki/issues/8586 -- Before fix: ``` go test -count=1 -race ./pkg/querier/worker ================== WARNING: DATA RACE Read at 0x00c000494108 by goroutine 229: github.com/grafana/loki/pkg/querier/worker.(*processorManager).concurrency.func1() /Users/progers/dev/src/github.com/grafana/loki/pkg/querier/worker/processor_manager.go:81 +0x118 Previous write at 0x00c000494108 by goroutine 222: github.com/grafana/loki/pkg/querier/worker.(*processorManager).concurrency() /Users/progers/dev/src/github.com/grafana/loki/pkg/querier/worker/processor_manager.go:70 +0x108 github.com/grafana/loki/pkg/querier/worker.(*querierWorker).resetConcurrency() /Users/progers/dev/src/github.com/grafana/loki/pkg/querier/worker/worker.go:267 +0x10c github.com/grafana/loki/pkg/querier/worker.(*querierWorker).AddressAdded() /Users/progers/dev/src/github.com/grafana/loki/pkg/querier/worker/worker.go:219 +0x868 github.com/grafana/loki/pkg/querier/worker.TestResetConcurrency.func1() /Users/progers/dev/src/github.com/grafana/loki/pkg/querier/worker/worker_test.go:64 +0x1c8 testing.tRunner() /opt/homebrew/Cellar/go/1.21.6/libexec/src/testing/testing.go:1595 +0x1b0 testing.(*T).Run.func1() /opt/homebrew/Cellar/go/1.21.6/libexec/src/testing/testing.go:1648 +0x40 Goroutine 229 (running) created at: github.com/grafana/loki/pkg/querier/worker.(*processorManager).concurrency() /Users/progers/dev/src/github.com/grafana/loki/pkg/querier/worker/processor_manager.go:75 +0xcc github.com/grafana/loki/pkg/querier/worker.(*querierWorker).resetConcurrency() /Users/progers/dev/src/github.com/grafana/loki/pkg/querier/worker/worker.go:267 +0x10c github.com/grafana/loki/pkg/querier/worker.(*querierWorker).AddressAdded() /Users/progers/dev/src/github.com/grafana/loki/pkg/querier/worker/worker.go:219 +0x868 github.com/grafana/loki/pkg/querier/worker.TestResetConcurrency.func1() /Users/progers/dev/src/github.com/grafana/loki/pkg/querier/worker/worker_test.go:64 +0x1c8 testing.tRunner() /opt/homebrew/Cellar/go/1.21.6/libexec/src/testing/testing.go:1595 +0x1b0 testing.(*T).Run.func1() /opt/homebrew/Cellar/go/1.21.6/libexec/src/testing/testing.go:1648 +0x40 Goroutine 222 (running) created at: testing.(*T).Run() /opt/homebrew/Cellar/go/1.21.6/libexec/src/testing/testing.go:1648 +0x5e8 github.com/grafana/loki/pkg/querier/worker.TestResetConcurrency() /Users/progers/dev/src/github.com/grafana/loki/pkg/querier/worker/worker_test.go:52 +0x1b0 testing.tRunner() /opt/homebrew/Cellar/go/1.21.6/libexec/src/testing/testing.go:1595 +0x1b0 testing.(*T).Run.func1() /opt/homebrew/Cellar/go/1.21.6/libexec/src/testing/testing.go:1648 +0x40 ================== --- FAIL: TestResetConcurrency (0.02s) --- FAIL: TestResetConcurrency/concurrency_is_correct_when_numTargets_does_not_divide_evenly_into_maxConcurrent (0.01s) testing.go:1465: race detected during execution of test testing.go:1465: race detected during execution of test FAIL FAIL github.com/grafana/loki/pkg/querier/worker 4.626s FAIL ``` -- After fix: ``` go clean -testcache go test -count=1 -race ./pkg/querier/worker ok github.com/grafana/loki/pkg/querier/worker 6.034s ``` **Which issue(s) this PR fixes**: Fixes #<issue number> **Special notes for your reviewer**: **Checklist** - [ ] Reviewed the [`CONTRIBUTING.md`](https://github.com/grafana/loki/blob/main/CONTRIBUTING.md) guide (**required**) - [ ] Documentation added - [ ] Tests updated - [ ] `CHANGELOG.md` updated - [ ] If the change is worth mentioning in the release notes, add `add-to-release-notes` label - [ ] Changes that require user attention or interaction to upgrade are documented in `docs/sources/setup/upgrade/_index.md` - [ ] For Helm chart changes bump the Helm chart version in `production/helm/loki/Chart.yaml` and update `production/helm/loki/CHANGELOG.md` and `production/helm/loki/README.md`. [Example PR](https://github.com/grafana/loki/commit/d10549e3ece02120974929894ee333d07755d213) - [ ] If the change is deprecating or removing a configuration option, update the `deprecated-config.yaml` and `deleted-config.yaml` files respectively in the `tools/deprecated-config-checker` directory. [Example PR](https://github.com/grafana/loki/pull/10840/commits/0d4416a4b03739583349934b96f272fb4f685d15)
2 years ago
}(workerID)
}
for len(pm.cancels) > n {
pm.cancels[0]()
pm.cancels = pm.cancels[1:]
}
}