@ -397,122 +397,120 @@ func TestForStateRestore(t *testing.T) {
group . Eval ( context . TODO ( ) , evalTime )
}
exp := rule . ActiveAlerts ( )
for _ , aa := range exp {
require . Zero ( t , aa . Labels . Get ( model . MetricNameLabel ) , "%s label set on active alert: %s" , model . MetricNameLabel , aa . Labels )
}
sort . Slice ( exp , func ( i , j int ) bool {
return labels . Compare ( exp [ i ] . Labels , exp [ j ] . Labels ) < 0
} )
// Prometheus goes down here. We create new rules and groups.
type testInput struct {
name string
restoreDuration time . Duration
alerts [ ] * Alert
expectedAlerts [ ] * Alert
num int
noRestore bool
gracePeriod bool
downDuration time . Duration
before func ( )
}
tests := [ ] testInput {
{
// Normal restore (alerts were not firing).
name : "normal restore (alerts were not firing)" ,
restoreDuration : 15 * time . Minute ,
alerts : rule . ActiveAlerts ( ) ,
expectedAlerts : rule . ActiveAlerts ( ) ,
downDuration : 10 * time . Minute ,
} ,
{
// Testing Outage Tolerance.
name : "outage tolerance" ,
restoreDuration : 40 * time . Minute ,
noRestore : true ,
num : 2 ,
} ,
{
// No active alerts.
name : "no active alerts" ,
restoreDuration : 50 * time . Minute ,
alerts : [ ] * Alert { } ,
expectedAlerts : [ ] * Alert { } ,
} ,
{
name : "test the grace period" ,
restoreDuration : 25 * time . Minute ,
expectedAlerts : [ ] * Alert { } ,
gracePeriod : true ,
before : func ( ) {
for _ , duration := range [ ] time . Duration { 10 * time . Minute , 15 * time . Minute , 20 * time . Minute } {
evalTime := baseTime . Add ( duration )
group . Eval ( context . TODO ( ) , evalTime )
}
} ,
num : 2 ,
} ,
}
testFunc := func ( tst testInput ) {
newRule := NewAlertingRule (
"HTTPRequestRateLow" ,
expr ,
alertForDuration ,
0 ,
labels . FromStrings ( "severity" , "critical" ) ,
labels . EmptyLabels ( ) , labels . EmptyLabels ( ) , "" , false , nil ,
)
newGroup := NewGroup ( GroupOptions {
Name : "default" ,
Interval : time . Second ,
Rules : [ ] Rule { newRule } ,
ShouldRestore : true ,
Opts : opts ,
} )
for _ , tt := range tests {
t . Run ( tt . name , func ( t * testing . T ) {
if tt . before != nil {
tt . before ( )
}
newGroups := make ( map [ string ] * Group )
newGroups [ "default;" ] = newGroup
newRule := NewAlertingRule (
"HTTPRequestRateLow" ,
expr ,
alertForDuration ,
0 ,
labels . FromStrings ( "severity" , "critical" ) ,
labels . EmptyLabels ( ) , labels . EmptyLabels ( ) , "" , false , nil ,
)
newGroup := NewGroup ( GroupOptions {
Name : "default" ,
Interval : time . Second ,
Rules : [ ] Rule { newRule } ,
ShouldRestore : true ,
Opts : opts ,
} )
restoreTime := baseTime . Add ( tst . restoreDuration )
// First eval before restoration.
newGroup . Eval ( context . TODO ( ) , restoreTime )
// Restore happens here.
newGroup . RestoreForState ( restoreTime )
newGroups := make ( map [ string ] * Group )
newGroups [ "default;" ] = newGroup
got := newRule . ActiveAlerts ( )
for _ , aa := range got {
require . Zero ( t , aa . Labels . Get ( model . MetricNameLabel ) , "%s label set on active alert: %s" , model . MetricNameLabel , aa . Labels )
}
sort . Slice ( got , func ( i , j int ) bool {
return labels . Compare ( got [ i ] . Labels , got [ j ] . Labels ) < 0
} )
restoreTime := baseTime . Add ( tt . restoreDuration )
// First eval before restoration.
newGroup . Eval ( context . TODO ( ) , restoreTime )
// Restore happens here.
newGroup . RestoreForState ( restoreTime )
// Checking if we have restored it correctly.
switch {
case tst . noRestore :
require . Len ( t , got , tst . num )
for _ , e := range got {
require . Equal ( t , e . ActiveAt , restoreTime )
}
case tst . gracePeriod :
require . Len ( t , got , tst . num )
for _ , e := range got {
require . Equal ( t , opts . ForGracePeriod , e . ActiveAt . Add ( alertForDuration ) . Sub ( restoreTime ) )
got := newRule . ActiveAlerts ( )
for _ , aa := range got {
require . Zero ( t , aa . Labels . Get ( model . MetricNameLabel ) , "%s label set on active alert: %s" , model . MetricNameLabel , aa . Labels )
}
default :
exp := tst . alerts
require . Equal ( t , len ( exp ) , len ( got ) )
sortAlerts ( exp )
sortAlerts ( got )
for i , e := range exp {
require . Equal ( t , e . Labels , got [ i ] . Labels )
// Difference in time should be within 1e6 ns, i.e. 1ms
// (due to conversion between ns & ms, float64 & int64).
activeAtDiff := float64 ( e . ActiveAt . Unix ( ) + int64 ( tst . downDuration / time . Second ) - got [ i ] . ActiveAt . Unix ( ) )
require . Equal ( t , 0.0 , math . Abs ( activeAtDiff ) , "'for' state restored time is wrong" )
}
}
}
sort . Slice ( got , func ( i , j int ) bool {
return labels . Compare ( got [ i ] . Labels , got [ j ] . Labels ) < 0
} )
for _ , tst := range tests {
testFunc ( tst )
}
// Checking if we have restored it correctly.
switch {
case tt . noRestore :
require . Len ( t , got , tt . num )
for _ , e := range got {
require . Equal ( t , e . ActiveAt , restoreTime )
}
case tt . gracePeriod :
// Testing the grace period.
for _ , duration := range [ ] time . Duration { 10 * time . Minute , 15 * time . Minute , 20 * time . Minute } {
evalTime := baseTime . Add ( duration )
group . Eval ( context . TODO ( ) , evalTime )
require . Len ( t , got , tt . num )
for _ , e := range got {
require . Equal ( t , opts . ForGracePeriod , e . ActiveAt . Add ( alertForDuration ) . Sub ( restoreTime ) )
}
default :
exp := tt . expectedAlerts
require . Equal ( t , len ( exp ) , len ( got ) )
sortAlerts ( exp )
sortAlerts ( got )
for i , e := range exp {
require . Equal ( t , e . Labels , got [ i ] . Labels )
// Difference in time should be within 1e6 ns, i.e. 1ms
// (due to conversion between ns & ms, float64 & int64).
activeAtDiff := float64 ( e . ActiveAt . Unix ( ) + int64 ( tt . downDuration / time . Second ) - got [ i ] . ActiveAt . Unix ( ) )
require . Equal ( t , 0.0 , math . Abs ( activeAtDiff ) , "'for' state restored time is wrong" )
}
}
} )
}
testFunc ( testInput {
restoreDuration : 25 * time . Minute ,
alerts : [ ] * Alert { } ,
gracePeriod : true ,
num : 2 ,
} )
}
func TestStaleness ( t * testing . T ) {