mirror of https://github.com/grafana/grafana
Alerting: Read from both proto and simple DB instance stores on startup (#99855)
parent
e03656669a
commit
f45265b5f7
@ -0,0 +1,133 @@ |
||||
package state |
||||
|
||||
import ( |
||||
"context" |
||||
"fmt" |
||||
"maps" |
||||
"slices" |
||||
"time" |
||||
|
||||
"github.com/grafana/grafana/pkg/infra/log" |
||||
"github.com/grafana/grafana/pkg/services/ngalert/models" |
||||
) |
||||
|
||||
// MultiInstanceReader merges results from two InstanceReaders.
|
||||
//
|
||||
// For each rule, it picks the state that has the newer LastEvalTime.
|
||||
type MultiInstanceReader struct { |
||||
ProtoDBReader InstanceReader |
||||
DBReader InstanceReader |
||||
logger log.Logger |
||||
} |
||||
|
||||
func NewMultiInstanceReader(logger log.Logger, r1, r2 InstanceReader) *MultiInstanceReader { |
||||
return &MultiInstanceReader{ |
||||
ProtoDBReader: r1, |
||||
DBReader: r2, |
||||
logger: logger, |
||||
} |
||||
} |
||||
|
||||
// FetchOrgIds merges org IDs from both readers.
|
||||
func (m *MultiInstanceReader) FetchOrgIds(ctx context.Context) ([]int64, error) { |
||||
orgsOne, err := m.ProtoDBReader.FetchOrgIds(ctx) |
||||
if err != nil { |
||||
return nil, fmt.Errorf("failed to fetch org IDs from ProtoDBReader: %w", err) |
||||
} |
||||
|
||||
orgsTwo, err := m.DBReader.FetchOrgIds(ctx) |
||||
if err != nil { |
||||
return nil, fmt.Errorf("failed to fetch org IDs from DBReader: %w", err) |
||||
} |
||||
|
||||
orgsSet := make(map[int64]struct{}) |
||||
|
||||
for _, orgID := range orgsOne { |
||||
orgsSet[orgID] = struct{}{} |
||||
} |
||||
for _, orgID := range orgsTwo { |
||||
orgsSet[orgID] = struct{}{} |
||||
} |
||||
|
||||
return slices.Collect(maps.Keys(orgsSet)), nil |
||||
} |
||||
|
||||
// ListAlertInstances fetches alert instances for a query from both readers,
|
||||
// groups them by rule UID, and returns the newest instances for each rule as a
|
||||
// single slice.
|
||||
func (m *MultiInstanceReader) ListAlertInstances(ctx context.Context, cmd *models.ListAlertInstancesQuery) ([]*models.AlertInstance, error) { |
||||
instancesOne, err := m.ProtoDBReader.ListAlertInstances(ctx, cmd) |
||||
if err != nil { |
||||
return nil, fmt.Errorf("failed to list alert instances from ProtoDBReader: %w", err) |
||||
} |
||||
|
||||
instancesTwo, err := m.DBReader.ListAlertInstances(ctx, cmd) |
||||
if err != nil { |
||||
return nil, fmt.Errorf("failed to list alert instances from DBReader: %w", err) |
||||
} |
||||
|
||||
byRuleOne := groupByRuleUID(instancesOne) |
||||
byRuleTwo := groupByRuleUID(instancesTwo) |
||||
|
||||
ruleSet := make(map[string]struct{}) |
||||
for uid := range byRuleOne { |
||||
ruleSet[uid] = struct{}{} |
||||
} |
||||
for uid := range byRuleTwo { |
||||
ruleSet[uid] = struct{}{} |
||||
} |
||||
|
||||
merged := make([]*models.AlertInstance, 0) |
||||
for ruleUID := range ruleSet { |
||||
sliceA := byRuleOne[ruleUID] |
||||
sliceB := byRuleTwo[ruleUID] |
||||
newer := getNewestAlertInstances(m.logger, sliceA, sliceB) |
||||
merged = append(merged, newer...) |
||||
} |
||||
|
||||
return merged, nil |
||||
} |
||||
|
||||
func groupByRuleUID(instances []*models.AlertInstance) map[string][]*models.AlertInstance { |
||||
result := make(map[string][]*models.AlertInstance) |
||||
for _, inst := range instances { |
||||
if inst == nil { |
||||
continue |
||||
} |
||||
result[inst.RuleUID] = append(result[inst.RuleUID], inst) |
||||
} |
||||
return result |
||||
} |
||||
|
||||
// getNewestAlertInstances returns the newest alert instances slice
|
||||
// by comparing the maximum LastEvalTime in each of them. If one is empty, returns the other.
|
||||
func getNewestAlertInstances(logger log.Logger, firstInstances, secondInstances []*models.AlertInstance) []*models.AlertInstance { |
||||
if len(firstInstances) == 0 { |
||||
return secondInstances |
||||
} |
||||
if len(secondInstances) == 0 { |
||||
return firstInstances |
||||
} |
||||
|
||||
maxFirst := maxLastEvalTime(firstInstances) |
||||
maxSecond := maxLastEvalTime(secondInstances) |
||||
|
||||
if maxSecond.After(maxFirst) { |
||||
logger.Debug("Newer alert instances found", "first_last_eval_time", maxFirst, "second_last_eval_time", maxSecond, "rule_uid", firstInstances[0].RuleUID) |
||||
return secondInstances |
||||
} |
||||
return firstInstances |
||||
} |
||||
|
||||
// maxLastEvalTime finds the maximum LastEvalTime among a slice of alert instances.
|
||||
func maxLastEvalTime(instances []*models.AlertInstance) time.Time { |
||||
var max time.Time |
||||
|
||||
for _, i := range instances { |
||||
if i != nil && i.LastEvalTime.After(max) { |
||||
max = i.LastEvalTime |
||||
} |
||||
} |
||||
|
||||
return max |
||||
} |
@ -0,0 +1,208 @@ |
||||
package state |
||||
|
||||
import ( |
||||
"context" |
||||
"errors" |
||||
"testing" |
||||
"time" |
||||
|
||||
"github.com/stretchr/testify/mock" |
||||
"github.com/stretchr/testify/require" |
||||
|
||||
"github.com/grafana/grafana/pkg/infra/log/logtest" |
||||
"github.com/grafana/grafana/pkg/services/ngalert/models" |
||||
) |
||||
|
||||
type mockInstanceReader struct { |
||||
mock.Mock |
||||
} |
||||
|
||||
func (m *mockInstanceReader) FetchOrgIds(ctx context.Context) ([]int64, error) { |
||||
args := m.Called(ctx) |
||||
return args.Get(0).([]int64), args.Error(1) |
||||
} |
||||
|
||||
func (m *mockInstanceReader) ListAlertInstances(ctx context.Context, cmd *models.ListAlertInstancesQuery) ([]*models.AlertInstance, error) { |
||||
args := m.Called(ctx, cmd) |
||||
return args.Get(0).([]*models.AlertInstance), args.Error(1) |
||||
} |
||||
|
||||
func TestMultiInstanceReader_FetchOrgIds(t *testing.T) { |
||||
tests := []struct { |
||||
name string |
||||
mockAOrgIDs []int64 |
||||
mockBOrgIDs []int64 |
||||
mockAError error |
||||
mockBError error |
||||
expectedOrgIDs []int64 |
||||
expectError bool |
||||
}{ |
||||
{ |
||||
name: "both readers empty, no errors", |
||||
mockAOrgIDs: []int64{}, |
||||
mockBOrgIDs: []int64{}, |
||||
expectedOrgIDs: []int64{}, |
||||
}, |
||||
{ |
||||
name: "simple union, no errors", |
||||
mockAOrgIDs: []int64{1, 2}, |
||||
mockBOrgIDs: []int64{2, 3}, |
||||
expectedOrgIDs: []int64{1, 2, 3}, |
||||
}, |
||||
{ |
||||
name: "error in readerA", |
||||
mockAError: errors.New("some error"), |
||||
expectError: true, |
||||
}, |
||||
{ |
||||
name: "error in readerB", |
||||
mockBError: errors.New("another error"), |
||||
expectError: true, |
||||
}, |
||||
} |
||||
|
||||
for _, tc := range tests { |
||||
t.Run(tc.name, func(t *testing.T) { |
||||
ctx := context.Background() |
||||
|
||||
readerA := &mockInstanceReader{} |
||||
if tc.mockAError != nil { |
||||
readerA.On("FetchOrgIds", mock.Anything).Return([]int64(nil), tc.mockAError).Once() |
||||
} else { |
||||
readerA.On("FetchOrgIds", mock.Anything).Return(tc.mockAOrgIDs, nil).Once() |
||||
} |
||||
|
||||
readerB := &mockInstanceReader{} |
||||
if tc.mockBError != nil { |
||||
readerB.On("FetchOrgIds", mock.Anything).Return([]int64(nil), tc.mockBError).Once() |
||||
} else { |
||||
readerB.On("FetchOrgIds", mock.Anything).Return(tc.mockBOrgIDs, nil).Once() |
||||
} |
||||
|
||||
multi := NewMultiInstanceReader(&logtest.Fake{}, readerA, readerB) |
||||
orgIDs, err := multi.FetchOrgIds(ctx) |
||||
|
||||
if tc.expectError { |
||||
require.Error(t, err) |
||||
return |
||||
} |
||||
require.NoError(t, err) |
||||
require.ElementsMatch(t, tc.expectedOrgIDs, orgIDs) |
||||
|
||||
readerA.AssertExpectations(t) |
||||
readerB.AssertExpectations(t) |
||||
}) |
||||
} |
||||
} |
||||
|
||||
func TestMultiInstanceReader_ListAlertInstances(t *testing.T) { |
||||
t1 := time.Unix(100, 0) |
||||
t2 := time.Unix(200, 0) |
||||
t3 := time.Unix(300, 0) |
||||
|
||||
ruleA := "rule-1" |
||||
ruleB := "rule-2" |
||||
|
||||
instAOld := &models.AlertInstance{AlertInstanceKey: models.AlertInstanceKey{RuleUID: ruleA}, LastEvalTime: t1} |
||||
instANew := &models.AlertInstance{AlertInstanceKey: models.AlertInstanceKey{RuleUID: ruleA}, LastEvalTime: t3} |
||||
instBMid := &models.AlertInstance{AlertInstanceKey: models.AlertInstanceKey{RuleUID: ruleB}, LastEvalTime: t2} |
||||
instASameTime := &models.AlertInstance{AlertInstanceKey: models.AlertInstanceKey{RuleUID: ruleA}, LastEvalTime: t1} |
||||
|
||||
tests := []struct { |
||||
name string |
||||
readerAInstances []*models.AlertInstance |
||||
readerBInstances []*models.AlertInstance |
||||
readerAError error |
||||
readerBError error |
||||
expectedInstances []*models.AlertInstance |
||||
expectError bool |
||||
}{ |
||||
{ |
||||
name: "both readers return empty lists", |
||||
readerAInstances: []*models.AlertInstance{}, |
||||
readerBInstances: []*models.AlertInstance{}, |
||||
expectedInstances: []*models.AlertInstance{}, |
||||
}, |
||||
{ |
||||
name: "when readerB is empty, use instances from readerA", |
||||
readerAInstances: []*models.AlertInstance{instAOld}, |
||||
readerBInstances: []*models.AlertInstance{}, |
||||
expectedInstances: []*models.AlertInstance{instAOld}, |
||||
}, |
||||
{ |
||||
name: "when readerA is empty, use instances from readerB", |
||||
readerAInstances: []*models.AlertInstance{}, |
||||
readerBInstances: []*models.AlertInstance{instANew}, |
||||
expectedInstances: []*models.AlertInstance{instANew}, |
||||
}, |
||||
{ |
||||
name: "when same rule exists in both readers, picks instances from reader with newer evaluation time", |
||||
readerAInstances: []*models.AlertInstance{instAOld}, |
||||
readerBInstances: []*models.AlertInstance{instANew}, |
||||
expectedInstances: []*models.AlertInstance{instANew}, |
||||
}, |
||||
{ |
||||
name: "combines instances across rules using newest evaluation time per rule", |
||||
readerAInstances: []*models.AlertInstance{instAOld, instBMid}, |
||||
readerBInstances: []*models.AlertInstance{instANew}, |
||||
expectedInstances: []*models.AlertInstance{instANew, instBMid}, |
||||
}, |
||||
{ |
||||
name: "error from readerA", |
||||
readerAError: errors.New("some error"), |
||||
expectError: true, |
||||
}, |
||||
{ |
||||
name: "error from readerB", |
||||
readerBError: errors.New("another error"), |
||||
expectError: true, |
||||
}, |
||||
{ |
||||
name: "nil instances are filtered out from results", |
||||
readerAInstances: []*models.AlertInstance{nil, instAOld}, |
||||
readerBInstances: []*models.AlertInstance{nil}, |
||||
expectedInstances: []*models.AlertInstance{instAOld}, |
||||
}, |
||||
{ |
||||
name: "when instances have equal evaluation times, picks instances from readerA", |
||||
readerAInstances: []*models.AlertInstance{instAOld}, |
||||
readerBInstances: []*models.AlertInstance{instASameTime}, |
||||
expectedInstances: []*models.AlertInstance{instAOld}, |
||||
}, |
||||
} |
||||
|
||||
for _, tc := range tests { |
||||
t.Run(tc.name, func(t *testing.T) { |
||||
ctx := context.Background() |
||||
|
||||
mockA := &mockInstanceReader{} |
||||
if tc.readerAError != nil { |
||||
mockA.On("ListAlertInstances", mock.Anything, mock.Anything).Return([]*models.AlertInstance(nil), tc.readerAError).Once() |
||||
} else { |
||||
mockA.On("ListAlertInstances", mock.Anything, mock.Anything).Return(tc.readerAInstances, nil).Once() |
||||
} |
||||
|
||||
mockB := &mockInstanceReader{} |
||||
if tc.readerBError != nil { |
||||
mockB.On("ListAlertInstances", mock.Anything, mock.Anything).Return([]*models.AlertInstance(nil), tc.readerBError).Once() |
||||
} else { |
||||
mockB.On("ListAlertInstances", mock.Anything, mock.Anything).Return(tc.readerBInstances, nil).Once() |
||||
} |
||||
|
||||
multi := NewMultiInstanceReader(&logtest.Fake{}, mockA, mockB) |
||||
cmd := &models.ListAlertInstancesQuery{RuleOrgID: 1234} |
||||
got, err := multi.ListAlertInstances(ctx, cmd) |
||||
|
||||
if tc.expectError { |
||||
require.Error(t, err) |
||||
return |
||||
} |
||||
require.NoError(t, err) |
||||
|
||||
require.ElementsMatch(t, tc.expectedInstances, got) |
||||
|
||||
mockA.AssertExpectations(t) |
||||
mockB.AssertExpectations(t) |
||||
}) |
||||
} |
||||
} |
Loading…
Reference in new issue