mirror of https://github.com/grafana/loki
feat(compactor HS): add support for worker for processing of jobs from the compactor's job queue (#18165)
parent
4d8d05fdd7
commit
d05c4bc500
@ -0,0 +1,159 @@ |
||||
package jobqueue |
||||
|
||||
import ( |
||||
"context" |
||||
"fmt" |
||||
"sync" |
||||
"time" |
||||
|
||||
"github.com/go-kit/log/level" |
||||
"github.com/grafana/dskit/backoff" |
||||
"github.com/pkg/errors" |
||||
|
||||
"github.com/grafana/loki/v3/pkg/compactor/client/grpc" |
||||
util_log "github.com/grafana/loki/v3/pkg/util/log" |
||||
) |
||||
|
||||
var ( |
||||
connBackoffConfig = backoff.Config{ |
||||
MinBackoff: 500 * time.Millisecond, |
||||
MaxBackoff: 5 * time.Second, |
||||
} |
||||
) |
||||
|
||||
type CompactorClient interface { |
||||
JobQueueClient() grpc.JobQueueClient |
||||
} |
||||
|
||||
type JobRunner interface { |
||||
Run(ctx context.Context, job *grpc.Job) ([]byte, error) |
||||
} |
||||
|
||||
type WorkerManager struct { |
||||
grpcClient CompactorClient |
||||
jobRunners map[grpc.JobType]JobRunner |
||||
cancel context.CancelFunc |
||||
wg sync.WaitGroup |
||||
} |
||||
|
||||
func NewWorkerManager(grpcClient CompactorClient) *WorkerManager { |
||||
return &WorkerManager{ |
||||
grpcClient: grpcClient, |
||||
jobRunners: make(map[grpc.JobType]JobRunner), |
||||
} |
||||
} |
||||
|
||||
func (w *WorkerManager) RegisterJobRunner(jobType grpc.JobType, jobRunner JobRunner) error { |
||||
if _, exists := w.jobRunners[jobType]; exists { |
||||
return ErrJobTypeAlreadyRegistered |
||||
} |
||||
|
||||
w.jobRunners[jobType] = jobRunner |
||||
return nil |
||||
} |
||||
|
||||
func (w *WorkerManager) Start(ctx context.Context, numWorkers int) { |
||||
ctx, cancel := context.WithCancel(ctx) |
||||
w.cancel = cancel |
||||
|
||||
for i := 0; i < numWorkers; i++ { |
||||
w.wg.Add(1) |
||||
go func() { |
||||
defer w.wg.Done() |
||||
newWorker(w.grpcClient, w.jobRunners).start(ctx) |
||||
}() |
||||
} |
||||
} |
||||
|
||||
func (w *WorkerManager) Stop() { |
||||
if w.cancel != nil { |
||||
w.cancel() |
||||
} |
||||
w.wg.Wait() |
||||
} |
||||
|
||||
type worker struct { |
||||
grpcClient CompactorClient |
||||
jobRunners map[grpc.JobType]JobRunner |
||||
} |
||||
|
||||
func newWorker(grpcClient CompactorClient, jobRunners map[grpc.JobType]JobRunner) *worker { |
||||
return &worker{ |
||||
grpcClient: grpcClient, |
||||
jobRunners: jobRunners, |
||||
} |
||||
} |
||||
|
||||
func (w *worker) start(ctx context.Context) { |
||||
client := w.grpcClient.JobQueueClient() |
||||
|
||||
backoff := backoff.New(ctx, connBackoffConfig) |
||||
for backoff.Ongoing() { |
||||
c, err := client.Loop(ctx) |
||||
if err != nil { |
||||
level.Warn(util_log.Logger).Log("msg", "error contacting compactor", "err", err) |
||||
backoff.Wait() |
||||
continue |
||||
} |
||||
|
||||
if err := w.process(c); err != nil { |
||||
level.Error(util_log.Logger).Log("msg", "error running jobs", "err", err) |
||||
backoff.Wait() |
||||
continue |
||||
} |
||||
|
||||
backoff.Reset() |
||||
} |
||||
} |
||||
|
||||
// process pull jobs from the established stream, processes them and sends back the job result to the stream.
|
||||
func (w *worker) process(c grpc.JobQueue_LoopClient) error { |
||||
// Build a child context so we can cancel the job when the stream is closed.
|
||||
ctx, cancel := context.WithCancelCause(c.Context()) |
||||
defer cancel(errors.New("job queue stream closed")) |
||||
|
||||
for { |
||||
job, err := c.Recv() |
||||
if err != nil { |
||||
return err |
||||
} |
||||
|
||||
// Execute the job on a "background" goroutine, so we go back to
|
||||
// blocking on c.Recv(). This allows us to detect the stream closing
|
||||
// and cancel the job execution. We don't process jobs in parallel
|
||||
// here, as we're running in a lock-step with the server - each Recv is
|
||||
// paired with a Send.
|
||||
go func() { |
||||
jobResult := &grpc.JobResult{ |
||||
JobId: job.Id, |
||||
JobType: job.Type, |
||||
} |
||||
|
||||
jobRunner, ok := w.jobRunners[job.Type] |
||||
if !ok { |
||||
level.Error(util_log.Logger).Log("msg", "job runner for job type not registered", "jobType", job.Type) |
||||
jobResult.Error = fmt.Sprintf("unknown job type %s", job.Type) |
||||
if err := c.Send(jobResult); err != nil { |
||||
level.Error(util_log.Logger).Log("msg", "error sending job result", "err", err) |
||||
} |
||||
return |
||||
} |
||||
|
||||
jobResponse, err := jobRunner.Run(ctx, job) |
||||
if err != nil { |
||||
level.Error(util_log.Logger).Log("msg", "error running job", "err", err) |
||||
jobResult.Error = err.Error() |
||||
if err := c.Send(jobResult); err != nil { |
||||
level.Error(util_log.Logger).Log("msg", "error sending job result", "err", err) |
||||
} |
||||
return |
||||
} |
||||
|
||||
jobResult.Result = jobResponse |
||||
if err := c.Send(jobResult); err != nil { |
||||
level.Error(util_log.Logger).Log("msg", "error sending job result", "err", err) |
||||
return |
||||
} |
||||
}() |
||||
} |
||||
} |
@ -0,0 +1,180 @@ |
||||
package jobqueue |
||||
|
||||
import ( |
||||
"context" |
||||
"fmt" |
||||
"testing" |
||||
"time" |
||||
|
||||
"github.com/stretchr/testify/mock" |
||||
"github.com/stretchr/testify/require" |
||||
"go.uber.org/atomic" |
||||
"google.golang.org/grpc" |
||||
|
||||
compactor_grpc "github.com/grafana/loki/v3/pkg/compactor/client/grpc" |
||||
) |
||||
|
||||
type mockCompactorClient struct { |
||||
conn *grpc.ClientConn |
||||
} |
||||
|
||||
func (m mockCompactorClient) JobQueueClient() compactor_grpc.JobQueueClient { |
||||
return compactor_grpc.NewJobQueueClient(m.conn) |
||||
} |
||||
|
||||
type mockJobRunner struct { |
||||
mock.Mock |
||||
} |
||||
|
||||
func (m *mockJobRunner) Run(ctx context.Context, job *compactor_grpc.Job) ([]byte, error) { |
||||
args := m.Called(ctx, job) |
||||
if args.Get(0) == nil { |
||||
return nil, args.Error(1) |
||||
} |
||||
return args.Get(0).([]byte), args.Error(1) |
||||
} |
||||
|
||||
func TestWorkerManager(t *testing.T) { |
||||
// create a new job queue
|
||||
q := NewQueue() |
||||
conn, closer := setupGRPC(t, q) |
||||
defer closer() |
||||
|
||||
// create a mock job builder which would build only a single job
|
||||
mockJobBuilder := &mockBuilder{ |
||||
jobsToBuild: []*compactor_grpc.Job{ |
||||
{ |
||||
Id: "1", |
||||
Type: compactor_grpc.JOB_TYPE_DELETION, |
||||
}, |
||||
}, |
||||
} |
||||
|
||||
// register the job builder with the queue and start the queue
|
||||
require.NoError(t, q.RegisterBuilder(compactor_grpc.JOB_TYPE_DELETION, mockJobBuilder)) |
||||
require.NoError(t, q.Start(context.Background())) |
||||
require.Equal(t, int32(0), mockJobBuilder.jobsSentCount.Load()) |
||||
|
||||
jobRunner := &mockJobRunner{} |
||||
jobRunner.On("Run", mock.Anything, mock.Anything).Return(nil, nil) |
||||
|
||||
// create a new worker manager and register the mock job runner
|
||||
wm := NewWorkerManager(mockCompactorClient{conn}) |
||||
require.NoError(t, wm.RegisterJobRunner(compactor_grpc.JOB_TYPE_DELETION, jobRunner)) |
||||
|
||||
// trying to register job runner for same job type should throw an error
|
||||
require.Error(t, wm.RegisterJobRunner(compactor_grpc.JOB_TYPE_DELETION, &mockJobRunner{})) |
||||
|
||||
// start two workers so only one of them would get a job
|
||||
wm.Start(context.Background(), 2) |
||||
|
||||
// verify that the job builder got to send the job and that it got processed successfully
|
||||
require.Eventually(t, func() bool { |
||||
if mockJobBuilder.jobsSentCount.Load() != 1 { |
||||
return false |
||||
} |
||||
if mockJobBuilder.jobsSucceeded.Load() != 1 { |
||||
return false |
||||
} |
||||
return true |
||||
}, time.Second, time.Millisecond*100) |
||||
|
||||
// stop the worker manager
|
||||
wm.Stop() |
||||
} |
||||
|
||||
func TestWorker_ProcessJob(t *testing.T) { |
||||
// create a new job queue
|
||||
q := newQueue(50 * time.Millisecond) |
||||
conn, closer := setupGRPC(t, q) |
||||
defer closer() |
||||
|
||||
// create a mock job builder which would build a couple of jobs
|
||||
mockJobBuilder := &mockBuilder{ |
||||
jobsToBuild: []*compactor_grpc.Job{ |
||||
{ |
||||
Id: "1", |
||||
Type: compactor_grpc.JOB_TYPE_DELETION, |
||||
}, |
||||
{ |
||||
Id: "2", |
||||
Type: compactor_grpc.JOB_TYPE_DELETION + 1, // an unknown job should not break anything in processing further valid jobs
|
||||
}, |
||||
{ |
||||
Id: "3", |
||||
Type: compactor_grpc.JOB_TYPE_DELETION, |
||||
}, |
||||
}, |
||||
} |
||||
|
||||
jobRunner := &mockJobRunner{} |
||||
jobRunner.On("Run", mock.Anything, mock.Anything).Return(nil, nil).Once() |
||||
jobRunner.On("Run", mock.Anything, mock.Anything).Return(nil, fmt.Errorf("fail")).Times(3) |
||||
|
||||
// register the job builder with the queue and start the queue
|
||||
require.NoError(t, q.RegisterBuilder(compactor_grpc.JOB_TYPE_DELETION, mockJobBuilder)) |
||||
require.NoError(t, q.Start(context.Background())) |
||||
require.Equal(t, int32(0), mockJobBuilder.jobsSentCount.Load()) |
||||
|
||||
// build a worker and start it
|
||||
ctx, cancel := context.WithCancel(context.Background()) |
||||
defer cancel() |
||||
|
||||
go newWorker(mockCompactorClient{conn: conn}, map[compactor_grpc.JobType]JobRunner{ |
||||
compactor_grpc.JOB_TYPE_DELETION: jobRunner, |
||||
}).start(ctx) |
||||
|
||||
// verify that the job builder got to send all 3 jobs and that both the valid jobs got processed
|
||||
require.Eventually(t, func() bool { |
||||
if mockJobBuilder.jobsSentCount.Load() != 3 { |
||||
return false |
||||
} |
||||
if mockJobBuilder.jobsSucceeded.Load() != 1 { |
||||
return false |
||||
} |
||||
if mockJobBuilder.jobsFailed.Load() != 1 { |
||||
return false |
||||
} |
||||
return true |
||||
}, 2*time.Second, time.Millisecond*50) |
||||
|
||||
jobRunner.AssertExpectations(t) |
||||
} |
||||
|
||||
func TestWorker_StreamClosure(t *testing.T) { |
||||
// build a queue
|
||||
q := NewQueue() |
||||
conn, closer := setupGRPC(t, q) |
||||
defer closer() |
||||
|
||||
// register a builder and start the queue
|
||||
require.NoError(t, q.RegisterBuilder(compactor_grpc.JOB_TYPE_DELETION, &mockBuilder{})) |
||||
require.NoError(t, q.Start(context.Background())) |
||||
|
||||
// build a worker
|
||||
worker := newWorker(mockCompactorClient{conn: conn}, map[compactor_grpc.JobType]JobRunner{ |
||||
compactor_grpc.JOB_TYPE_DELETION: &mockJobRunner{}, |
||||
}) |
||||
ctx, cancel := context.WithCancel(context.Background()) |
||||
defer cancel() |
||||
|
||||
var running atomic.Bool |
||||
// start the worker and ensure that it is running
|
||||
go func() { |
||||
running.Store(true) |
||||
defer running.Store(false) |
||||
|
||||
worker.start(ctx) |
||||
}() |
||||
|
||||
require.Eventually(t, func() bool { |
||||
return running.Load() |
||||
}, time.Second, time.Millisecond*100) |
||||
|
||||
// close the queue so that it closes the stream
|
||||
q.Close() |
||||
|
||||
// sleep for a while and ensure that the worker is still running
|
||||
time.Sleep(100 * time.Millisecond) |
||||
require.True(t, running.Load()) |
||||
} |
Loading…
Reference in new issue