@ -2,13 +2,16 @@ package notifier
import (
"context"
"crypto/md5"
"encoding/json"
"fmt"
"net/url"
"path/filepath"
"sync"
"time"
gokit_log "github.com/go-kit/kit/log"
"github.com/grafana/alerting-api/pkg/api"
apimodels "github.com/grafana/alerting-api/pkg/api"
"github.com/pkg/errors"
"github.com/prometheus/alertmanager/dispatch"
"github.com/prometheus/alertmanager/nflog"
@ -33,9 +36,33 @@ import (
)
const (
workingDir = "alerting"
pollInterval = 1 * time . Minute
workingDir = "alerting"
// How long should we keep silences and notification entries on-disk after they've served their purpose.
retentionNotificationsAndSilences = 5 * 24 * time . Hour
// To start, the alertmanager needs at least one route defined.
// TODO: we should move this to Grafana settings and define this as the default.
alertmanagerDefaultConfiguration = `
{
"alertmanager_config" : {
"route" : {
"receiver" : "grafana-default-email"
} ,
"receivers" : [ {
"name" : "grafana-default-email" ,
"grafana_managed_receiver_configs" : [ {
"uid" : "" ,
"name" : "email receiver" ,
"type" : "email" ,
"isDefault" : true ,
"settings" : {
"addresses" : "<example@email.com>"
}
} ]
} ]
}
}
`
)
type Alertmanager struct {
@ -59,6 +86,7 @@ type Alertmanager struct {
dispatcherMetrics * dispatch . DispatcherMetrics
reloadConfigMtx sync . RWMutex
config [ ] byte
}
func init ( ) {
@ -105,8 +133,8 @@ func (am *Alertmanager) Init() (err error) {
func ( am * Alertmanager ) Run ( ctx context . Context ) error {
// Make sure dispatcher starts. We can tolerate future reload failures.
if err := am . SyncAndApplyConfigFromDatabase ( ) ; err != nil && ! errors . Is ( err , store . ErrNoAlertmanagerConfiguration ) {
return err
if err := am . SyncAndApplyConfigFromDatabase ( ) ; err != nil {
am . logger . Error ( errors . Wrap ( err , "unable to sync configuration" ) . Error ( ) )
}
for {
@ -114,14 +142,10 @@ func (am *Alertmanager) Run(ctx context.Context) error {
case <- ctx . Done ( ) :
am . StopAndWait ( )
return nil
case <- time . After ( 1 * time . Minute ) :
// TODO: once we have a check to skip reload on same config, uncomment this.
//if err := am.SyncAndApplyConfigFromDatabase(); err != nil {
// if err == store.ErrNoAlertmanagerConfiguration {
// am.logger.Warn(errors.Wrap(err, "unable to sync configuration").Error())
// }
// am.logger.Error(errors.Wrap(err, "unable to sync configuration").Error())
//}
case <- time . After ( pollInterval ) :
if err := am . SyncAndApplyConfigFromDatabase ( ) ; err != nil {
am . logger . Error ( errors . Wrap ( err , "unable to sync configuration" ) . Error ( ) )
}
}
}
}
@ -138,33 +162,54 @@ func (am *Alertmanager) StopAndWait() {
am . dispatcherWG . Wait ( )
}
// SyncAndApplyConfigFromDatabase picks the latest config from database and restarts
// the components with the new config.
func ( am * Alertmanager ) SyncAndApplyConfigFromDatabase ( ) error {
func ( am * Alertmanager ) SaveAndApplyConfig ( cfg * apimodels . PostableUserConfig ) error {
rawConfig , err := json . Marshal ( & cfg )
if err != nil {
return errors . Wrap ( err , "failed to serialize to the Alertmanager configuration" )
}
am . reloadConfigMtx . Lock ( )
defer am . reloadConfigMtx . Unlock ( )
// TODO: check if config is same as before using hashes and skip reload in case they are same.
cfg , err := am . getConfigFromDatabase ( )
if err != nil {
return errors . Wrap ( err , "get config from database" )
cmd := & ngmodels . SaveAlertmanagerConfigurationCmd {
AlertmanagerConfiguration : string ( rawConfig ) ,
ConfigurationVersion : fmt . Sprintf ( "v%d" , ngmodels . AlertConfigurationVersion ) ,
}
if err := am . Store . SaveAlertmanagerConfiguration ( cmd ) ; err != nil {
return errors . Wrap ( err , "failed to save Alertmanager configuration" )
}
return errors . Wrap ( am . applyConfig ( cfg ) , "reload from config" )
return errors . Wrap ( am . applyConfig ( cfg ) , "unable to reload configuration" )
}
func ( am * Alertmanager ) getConfigFromDatabase ( ) ( * api . PostableUserConfig , error ) {
// SyncAndApplyConfigFromDatabase picks the latest config from database and restarts
// the components with the new config.
func ( am * Alertmanager ) SyncAndApplyConfigFromDatabase ( ) error {
am . reloadConfigMtx . Lock ( )
defer am . reloadConfigMtx . Unlock ( )
// First, let's get the configuration we need from the database.
q := & ngmodels . GetLatestAlertmanagerConfigurationQuery { }
if err := am . Store . GetLatestAlertmanagerConfiguration ( q ) ; err != nil {
return nil , err
// If there's no configuration in the database, let's use the default configuration.
if errors . Is ( err , store . ErrNoAlertmanagerConfiguration ) {
q . Result = & ngmodels . AlertConfiguration { AlertmanagerConfiguration : alertmanagerDefaultConfiguration }
} else {
return errors . Wrap ( err , "unable to get Alertmanager configuration from the database" )
}
}
cfg , err := Load ( [ ] byte ( q . Result . AlertmanagerConfiguration ) )
if err != nil {
return err
}
// Then, let's parse and return the alertmanager configuration.
return Load ( q . Result . AlertmanagerConfiguration )
return errors . Wrap ( am . applyConfig ( cfg ) , "unable to reload configuration" )
}
// ApplyConfig applies a new configuration by re-initializing all components using the configuration provided.
func ( am * Alertmanager ) ApplyConfig ( cfg * api . PostableUserConfig ) error {
func ( am * Alertmanager ) ApplyConfig ( cfg * apimodels . PostableUserConfig ) error {
am . reloadConfigMtx . Lock ( )
defer am . reloadConfigMtx . Unlock ( )
@ -175,13 +220,30 @@ const defaultTemplate = "templates/default.tmpl"
// applyConfig applies a new configuration by re-initializing all components using the configuration provided.
// It is not safe to call concurrently.
func ( am * Alertmanager ) applyConfig ( cfg * api . PostableUserConfig ) error {
// First, we need to make sure we persist the templates to disk.
paths , _ , err := PersistTemplates ( cfg , am . WorkingDirPath ( ) )
func ( am * Alertmanager ) applyConfig ( cfg * apimodels . PostableUserConfig ) error {
// First, let's make sure this config is not already loaded
var configChanged bool
rawConfig , err := json . Marshal ( cfg . AlertmanagerConfig )
if err != nil {
// In theory, this should never happen.
return err
}
if md5 . Sum ( am . config ) != md5 . Sum ( rawConfig ) {
configChanged = true
}
// next, we need to make sure we persist the templates to disk.
paths , templatesChanged , err := PersistTemplates ( cfg , am . WorkingDirPath ( ) )
if err != nil {
return err
}
// If neither the configuration nor templates have changed, we've got nothing to do.
if ! configChanged && ! templatesChanged {
am . logger . Debug ( "neither config nor template have changed, skipping configuration sync." )
return nil
}
paths = append ( [ ] string { defaultTemplate } , paths ... )
// With the templates persisted, create the template list using the paths.
@ -217,6 +279,7 @@ func (am *Alertmanager) applyConfig(cfg *api.PostableUserConfig) error {
am . dispatcher . Run ( )
} ( )
am . config = rawConfig
return nil
}
@ -225,7 +288,7 @@ func (am *Alertmanager) WorkingDirPath() string {
}
// buildIntegrationsMap builds a map of name to the list of Grafana integration notifiers off of a list of receiver config.
func ( am * Alertmanager ) buildIntegrationsMap ( receivers [ ] * api . PostableApiReceiver , templates * template . Template ) ( map [ string ] [ ] notify . Integration , error ) {
func ( am * Alertmanager ) buildIntegrationsMap ( receivers [ ] * apimodels . PostableApiReceiver , templates * template . Template ) ( map [ string ] [ ] notify . Integration , error ) {
integrationsMap := make ( map [ string ] [ ] notify . Integration , len ( receivers ) )
for _ , receiver := range receivers {
integrations , err := am . buildReceiverIntegrations ( receiver , templates )
@ -244,7 +307,7 @@ type NotificationChannel interface {
}
// buildReceiverIntegrations builds a list of integration notifiers off of a receiver config.
func ( am * Alertmanager ) buildReceiverIntegrations ( receiver * api . PostableApiReceiver , tmpl * template . Template ) ( [ ] notify . Integration , error ) {
func ( am * Alertmanager ) buildReceiverIntegrations ( receiver * apimodels . PostableApiReceiver , tmpl * template . Template ) ( [ ] notify . Integration , error ) {
var integrations [ ] notify . Integration
for i , r := range receiver . GrafanaManagedReceivers {