@ -43,13 +43,13 @@ type Manager struct {
externalURL * url . URL
}
func NewManager ( logger log . Logger , metrics * metrics . State , externalURL * url . URL ,
func NewManager ( metrics * metrics . State , externalURL * url . URL ,
ruleStore RuleReader , instanceStore InstanceStore , imageService image . ImageService , clock clock . Clock , historian Historian ) * Manager {
manager := & Manager {
cache : newCache ( ) ,
quit : make ( chan struct { } ) ,
ResendDelay : ResendDelay , // TODO: make this configurable
log : logger ,
log : log . New ( "ngalert.state.mana ger" ) ,
metrics : metrics ,
ruleStore : ruleStore ,
instanceStore : instanceStore ,
@ -72,7 +72,7 @@ func (st *Manager) Warm(ctx context.Context) {
orgIds , err := st . instanceStore . FetchOrgIds ( ctx )
if err != nil {
st . log . Error ( "u nable to fetch orgIds" , "error" , err . Error ( ) )
st . log . Error ( "U nable to fetch orgIds" , "error" , err )
}
statesCount := 0
@ -83,7 +83,7 @@ func (st *Manager) Warm(ctx context.Context) {
OrgID : orgId ,
}
if err := st . ruleStore . ListAlertRules ( ctx , & ruleCmd ) ; err != nil {
st . log . Error ( "unable to fetch previous state" , "msg " , err . Error ( ) )
st . log . Error ( "Unable to fetch previous state" , "error " , err )
}
ruleByUID := make ( map [ string ] * ngModels . AlertRule , len ( ruleCmd . Result ) )
@ -99,7 +99,7 @@ func (st *Manager) Warm(ctx context.Context) {
RuleOrgID : orgId ,
}
if err := st . instanceStore . ListAlertInstances ( ctx , & cmd ) ; err != nil {
st . log . Error ( "unable to fetch previous state" , "msg " , err . Error ( ) )
st . log . Error ( "Unable to fetch previous state" , "error " , err )
}
for _ , entry := range cmd . Result {
@ -118,7 +118,7 @@ func (st *Manager) Warm(ctx context.Context) {
lbs := map [ string ] string ( entry . Labels )
cacheID , err := entry . Labels . StringKey ( )
if err != nil {
st . log . Error ( "error getting cacheId for entry" , "msg " , err . Error ( ) )
st . log . Error ( "Error getting cacheId for entry" , "error " , err )
}
rulesStates . states [ cacheID ] = & State {
AlertRuleUID : entry . RuleUID ,
@ -137,7 +137,7 @@ func (st *Manager) Warm(ctx context.Context) {
}
}
st . cache . setAllStates ( states )
st . log . Info ( "State cache has been initialized" , "loaded_ states" , statesCount , "duration" , time . Since ( startTime ) )
st . log . Info ( "State cache has been initialized" , "states" , statesCount , "duration" , time . Since ( startTime ) )
}
func ( st * Manager ) Get ( orgID int64 , alertRuleUID , stateId string ) * State {
@ -147,15 +147,15 @@ func (st *Manager) Get(orgID int64, alertRuleUID, stateId string) *State {
// ResetStateByRuleUID deletes all entries in the state manager that match the given rule UID.
func ( st * Manager ) ResetStateByRuleUID ( ctx context . Context , ruleKey ngModels . AlertRuleKey ) [ ] * State {
logger := st . log . New ( ruleKey . LogContext ( ) ... )
logger . Debug ( "r esetting state of the rule" )
logger . Debug ( "R esetting state of the rule" )
states := st . cache . removeByRuleUID ( ruleKey . OrgID , ruleKey . UID )
if len ( states ) > 0 {
err := st . instanceStore . DeleteAlertInstancesByRule ( ctx , ruleKey )
if err != nil {
logger . Error ( "f ailed to delete states that belong to a rule from database" , ruleKey . LogContext ( ) ... )
logger . Error ( "F ailed to delete states that belong to a rule from database" , "error" , err )
}
}
logger . Info ( "r ules state was reset" , "deleted_ states" , len ( states ) )
logger . Info ( "R ules state was reset" , "states" , len ( states ) )
return states
}
@ -163,17 +163,17 @@ func (st *Manager) ResetStateByRuleUID(ctx context.Context, ruleKey ngModels.Ale
// if extraLabels is not empty, those labels will be added to every state. The extraLabels take precedence over rule labels and result labels
func ( st * Manager ) ProcessEvalResults ( ctx context . Context , evaluatedAt time . Time , alertRule * ngModels . AlertRule , results eval . Results , extraLabels data . Labels ) [ ] * State {
logger := st . log . New ( alertRule . GetKey ( ) . LogContext ( ) ... )
logger . Debug ( "s tate manager processing evaluation results" , "resultCount" , len ( results ) )
logger . Debug ( "S tate manager processing evaluation results" , "resultCount" , len ( results ) )
var states [ ] * State
processedResults := make ( map [ string ] * State , len ( results ) )
for _ , result := range results {
s := st . setNextState ( ctx , alertRule , result , extraLabels )
s := st . setNextState ( ctx , alertRule , result , extraLabels , logger )
states = append ( states , s )
processedResults [ s . CacheID ] = s
}
resolvedStates := st . staleResultsHandler ( ctx , evaluatedAt , alertRule , processedResults )
resolvedStates := st . staleResultsHandler ( ctx , evaluatedAt , alertRule , processedResults , logger )
if len ( states ) > 0 {
logger . Debug ( "s aving new states to the database" , "count" , len ( states ) )
logger . Debug ( "S aving new states to the database" , "count" , len ( states ) )
_ , _ = st . saveAlertStates ( ctx , states ... )
}
return append ( states , resolvedStates ... )
@ -211,7 +211,7 @@ func (st *Manager) maybeTakeScreenshot(
}
// Set the current state based on evaluation results
func ( st * Manager ) setNextState ( ctx context . Context , alertRule * ngModels . AlertRule , result eval . Result , extraLabels data . Labels ) * State {
func ( st * Manager ) setNextState ( ctx context . Context , alertRule * ngModels . AlertRule , result eval . Result , extraLabels data . Labels , logger log . Logger ) * State {
currentState := st . cache . getOrCreate ( ctx , st . log , alertRule , result , extraLabels , st . externalURL )
currentState . LastEvaluationTime = result . EvaluatedAt
@ -227,7 +227,7 @@ func (st *Manager) setNextState(ctx context.Context, alertRule *ngModels.AlertRu
oldState := currentState . State
oldReason := currentState . StateReason
st . log . Debug ( "setting alert state" , "uid" , alertRule . UID )
logger . Debug ( "Setting alert state" )
switch result . State {
case eval . Normal :
currentState . resultNormal ( alertRule , result )
@ -255,8 +255,7 @@ func (st *Manager) setNextState(ctx context.Context, alertRule *ngModels.AlertRu
err := st . maybeTakeScreenshot ( ctx , alertRule , currentState , oldState )
if err != nil {
st . log . Warn ( "failed to generate a screenshot for an alert instance" ,
"alert_rule" , alertRule . UID ,
logger . Warn ( "Failed to generate a screenshot for an alert instance" ,
"dashboard" , alertRule . DashboardUID ,
"panel" , alertRule . PanelID ,
"error" , err )
@ -287,10 +286,10 @@ func (st *Manager) recordMetrics() {
for {
select {
case <- ticker . C :
st . log . Debug ( "r ecording state cache metrics" , "now" , st . clock . Now ( ) )
st . log . Debug ( "R ecording state cache metrics" , "now" , st . clock . Now ( ) )
st . cache . recordMetrics ( st . metrics )
case <- st . quit :
st . log . Debug ( "s topping state cache metrics recording" , "now" , st . clock . Now ( ) )
st . log . Debug ( "S topping state cache metrics recording" , "now" , st . clock . Now ( ) )
ticker . Stop ( )
return
}
@ -305,7 +304,7 @@ func (st *Manager) Put(states []*State) {
// TODO: Is the `State` type necessary? Should it embed the instance?
func ( st * Manager ) saveAlertStates ( ctx context . Context , states ... * State ) ( saved , failed int ) {
st . log . Debug ( "s aving alert states" , "count" , len ( states ) )
st . log . Debug ( "S aving alert states" , "count" , len ( states ) )
instances := make ( [ ] ngModels . AlertInstance , 0 , len ( states ) )
type debugInfo struct {
@ -321,7 +320,7 @@ func (st *Manager) saveAlertStates(ctx context.Context, states ...*State) (saved
_ , hash , err := labels . StringAndHash ( )
if err != nil {
debug = append ( debug , debugInfo { s . OrgID , s . AlertRuleUID , s . State . String ( ) , s . Labels . String ( ) } )
st . log . Error ( "f ailed to save alert instance with invalid labels" , "orgID" , s . OrgID , "ruleUID " , s . AlertRuleUID , "error" , err )
st . log . Error ( "F ailed to save alert instance with invalid labels" , "orgID" , s . OrgID , "rule" , s . AlertRuleUID , "error" , err )
continue
}
fields := ngModels . AlertInstance {
@ -344,7 +343,7 @@ func (st *Manager) saveAlertStates(ctx context.Context, states ...*State) (saved
for _ , inst := range instances {
debug = append ( debug , debugInfo { inst . RuleOrgID , inst . RuleUID , string ( inst . CurrentState ) , data . Labels ( inst . Labels ) . String ( ) } )
}
st . log . Error ( "f ailed to save alert states" , "states" , debug , "error" , err )
st . log . Error ( "F ailed to save alert states" , "states" , debug , "error" , err )
return 0 , len ( debug )
}
@ -377,7 +376,7 @@ func (i InstanceStateAndReason) String() string {
return s
}
func ( st * Manager ) staleResultsHandler ( ctx context . Context , evaluatedAt time . Time , alertRule * ngModels . AlertRule , states map [ string ] * State ) [ ] * State {
func ( st * Manager ) staleResultsHandler ( ctx context . Context , evaluatedAt time . Time , alertRule * ngModels . AlertRule , states map [ string ] * State , logger log . Logger ) [ ] * State {
var resolvedStates [ ] * State
allStates := st . GetStatesForRuleUID ( alertRule . OrgID , alertRule . UID )
toDelete := make ( [ ] ngModels . AlertInstanceKey , 0 )
@ -385,12 +384,12 @@ func (st *Manager) staleResultsHandler(ctx context.Context, evaluatedAt time.Tim
for _ , s := range allStates {
// Is the cached state in our recently processed results? If not, is it stale?
if _ , ok := states [ s . CacheID ] ; ! ok && stateIsStale ( evaluatedAt , s . LastEvaluationTime , alertRule . IntervalSeconds ) {
st . log . Debug ( "removing stale state entry" , "org ID", s . Org ID, "aler tRul eUID " , s . AlertRuleUID , "cacheID ", s . CacheID )
logger . Info ( "Removing stale state entry" , "cache ID", s . Cache ID, "st ate" , s . State , "reason ", s . StateReason )
st . cache . deleteEntry ( s . OrgID , s . AlertRuleUID , s . CacheID )
ilbs := ngModels . InstanceLabels ( s . Labels )
_ , labelsHash , err := ilbs . StringAndHash ( )
if err != nil {
st . log . Error ( "u nable to get labelsHash" , "error" , err . Error ( ) , "orgID" , s . OrgID , "alertRuleUID" , s . AlertRuleUID )
logger . Error ( "U nable to get labelsHash" , "error" , err . Error ( ) , s . AlertRuleUID )
}
toDelete = append ( toDelete , ngModels . AlertInstanceKey { RuleOrgID : s . OrgID , RuleUID : s . AlertRuleUID , LabelsHash : labelsHash } )
@ -411,8 +410,7 @@ func (st *Manager) staleResultsHandler(ctx context.Context, evaluatedAt time.Tim
}
if err := st . instanceStore . DeleteAlertInstances ( ctx , toDelete ... ) ; err != nil {
st . log . Error ( "unable to delete stale instances from database" , "error" , err . Error ( ) ,
"orgID" , alertRule . OrgID , "alertRuleUID" , alertRule . UID , "count" , len ( toDelete ) )
logger . Error ( "Unable to delete stale instances from database" , "error" , err , "count" , len ( toDelete ) )
}
return resolvedStates
}