The open and composable observability and data visualization platform. Visualize metrics, logs, and traces from multiple sources like Prometheus, Loki, Elasticsearch, InfluxDB, Postgres and many more.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 
grafana/pkg/services/ngalert/sender/router.go

255 lines
8.2 KiB

package sender
import (
"context"
"errors"
"net/url"
"sync"
"time"
"github.com/benbjohnson/clock"
"github.com/grafana/grafana/pkg/infra/log"
"github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
"github.com/grafana/grafana/pkg/services/ngalert/models"
"github.com/grafana/grafana/pkg/services/ngalert/notifier"
"github.com/grafana/grafana/pkg/services/ngalert/store"
)
// AlertsRouter handles alerts generated during alert rule evaluation.
// Based on rule's orgID and the configuration for that organization,
// it determines whether an alert needs to be sent to an external Alertmanager and\or internal notifier.Alertmanager
//
// After creating a AlertsRouter, you must call Run to keep the AlertsRouter's
// state synchronized with the alerting configuration.
type AlertsRouter struct {
logger log.Logger
clock clock.Clock
adminConfigStore store.AdminConfigurationStore
// Senders help us send alerts to external Alertmanagers.
AdminConfigMtx sync.RWMutex
SendAlertsTo map[int64]models.AlertmanagersChoice
Senders map[int64]*Sender
SendersCfgHash map[int64]string
MultiOrgNotifier *notifier.MultiOrgAlertmanager
appURL *url.URL
disabledOrgs map[int64]struct{}
adminConfigPollInterval time.Duration
}
func NewAlertsRouter(multiOrgNotifier *notifier.MultiOrgAlertmanager, store store.AdminConfigurationStore, clk clock.Clock, appURL *url.URL, disabledOrgs map[int64]struct{}, configPollInterval time.Duration) *AlertsRouter {
d := &AlertsRouter{
logger: log.New("alerts-router"),
clock: clk,
adminConfigStore: store,
AdminConfigMtx: sync.RWMutex{},
Senders: map[int64]*Sender{},
SendersCfgHash: map[int64]string{},
SendAlertsTo: map[int64]models.AlertmanagersChoice{},
MultiOrgNotifier: multiOrgNotifier,
appURL: appURL,
disabledOrgs: disabledOrgs,
adminConfigPollInterval: configPollInterval,
}
return d
}
// SyncAndApplyConfigFromDatabase looks for the admin configuration in the database
// and adjusts the sender(s) and alert handling mechanism accordingly.
func (d *AlertsRouter) SyncAndApplyConfigFromDatabase() error {
d.logger.Debug("start of admin configuration sync")
cfgs, err := d.adminConfigStore.GetAdminConfigurations()
if err != nil {
return err
}
d.logger.Debug("found admin configurations", "count", len(cfgs))
orgsFound := make(map[int64]struct{}, len(cfgs))
d.AdminConfigMtx.Lock()
for _, cfg := range cfgs {
_, isDisabledOrg := d.disabledOrgs[cfg.OrgID]
if isDisabledOrg {
d.logger.Debug("skipping starting sender for disabled org", "org", cfg.OrgID)
continue
}
// Update the Alertmanagers choice for the organization.
d.SendAlertsTo[cfg.OrgID] = cfg.SendAlertsTo
orgsFound[cfg.OrgID] = struct{}{} // keep track of the which senders we need to keep.
existing, ok := d.Senders[cfg.OrgID]
// We have no running sender and no Alertmanager(s) configured, no-op.
if !ok && len(cfg.Alertmanagers) == 0 {
d.logger.Debug("no external alertmanagers configured", "org", cfg.OrgID)
continue
}
// We have no running sender and alerts are handled internally, no-op.
if !ok && cfg.SendAlertsTo == models.InternalAlertmanager {
d.logger.Debug("alerts are handled internally", "org", cfg.OrgID)
continue
}
// We have a running sender but no Alertmanager(s) configured, shut it down.
if ok && len(cfg.Alertmanagers) == 0 {
d.logger.Debug("no external alertmanager(s) configured, sender will be stopped", "org", cfg.OrgID)
delete(orgsFound, cfg.OrgID)
continue
}
// We have a running sender, check if we need to apply a new config.
if ok {
if d.SendersCfgHash[cfg.OrgID] == cfg.AsSHA256() {
d.logger.Debug("sender configuration is the same as the one running, no-op", "org", cfg.OrgID, "alertmanagers", cfg.Alertmanagers)
continue
}
d.logger.Debug("applying new configuration to sender", "org", cfg.OrgID, "alertmanagers", cfg.Alertmanagers)
err := existing.ApplyConfig(cfg)
if err != nil {
d.logger.Error("failed to apply configuration", "err", err, "org", cfg.OrgID)
continue
}
d.SendersCfgHash[cfg.OrgID] = cfg.AsSHA256()
continue
}
// No sender and have Alertmanager(s) to send to - start a new one.
d.logger.Info("creating new sender for the external alertmanagers", "org", cfg.OrgID, "alertmanagers", cfg.Alertmanagers)
s, err := New()
if err != nil {
d.logger.Error("unable to start the sender", "err", err, "org", cfg.OrgID)
continue
}
d.Senders[cfg.OrgID] = s
s.Run()
err = s.ApplyConfig(cfg)
if err != nil {
d.logger.Error("failed to apply configuration", "err", err, "org", cfg.OrgID)
continue
}
d.SendersCfgHash[cfg.OrgID] = cfg.AsSHA256()
}
sendersToStop := map[int64]*Sender{}
for orgID, s := range d.Senders {
if _, exists := orgsFound[orgID]; !exists {
sendersToStop[orgID] = s
delete(d.Senders, orgID)
delete(d.SendersCfgHash, orgID)
}
}
d.AdminConfigMtx.Unlock()
// We can now stop these senders w/o having to hold a lock.
for orgID, s := range sendersToStop {
d.logger.Info("stopping sender", "org", orgID)
s.Stop()
d.logger.Info("stopped sender", "org", orgID)
}
d.logger.Debug("finish of admin configuration sync")
return nil
}
func (d *AlertsRouter) Send(key models.AlertRuleKey, alerts definitions.PostableAlerts) {
logger := d.logger.New("rule_uid", key.UID, "org", key.OrgID)
if len(alerts.PostableAlerts) == 0 {
logger.Debug("no alerts to notify about")
return
}
// Send alerts to local notifier if they need to be handled internally
// or if no external AMs have been discovered yet.
var localNotifierExist, externalNotifierExist bool
if d.SendAlertsTo[key.OrgID] == models.ExternalAlertmanagers && len(d.AlertmanagersFor(key.OrgID)) > 0 {
logger.Debug("no alerts to put in the notifier")
} else {
logger.Debug("sending alerts to local notifier", "count", len(alerts.PostableAlerts), "alerts", alerts.PostableAlerts)
n, err := d.MultiOrgNotifier.AlertmanagerFor(key.OrgID)
if err == nil {
localNotifierExist = true
if err := n.PutAlerts(alerts); err != nil {
logger.Error("failed to put alerts in the local notifier", "count", len(alerts.PostableAlerts), "err", err)
}
} else {
if errors.Is(err, notifier.ErrNoAlertmanagerForOrg) {
logger.Debug("local notifier was not found")
} else {
logger.Error("local notifier is not available", "err", err)
}
}
}
// Send alerts to external Alertmanager(s) if we have a sender for this organization
// and alerts are not being handled just internally.
d.AdminConfigMtx.RLock()
defer d.AdminConfigMtx.RUnlock()
s, ok := d.Senders[key.OrgID]
if ok && d.SendAlertsTo[key.OrgID] != models.InternalAlertmanager {
logger.Debug("sending alerts to external notifier", "count", len(alerts.PostableAlerts), "alerts", alerts.PostableAlerts)
s.SendAlerts(alerts)
externalNotifierExist = true
}
if !localNotifierExist && !externalNotifierExist {
logger.Error("no external or internal notifier - [%d] alerts not delivered", len(alerts.PostableAlerts))
}
}
// AlertmanagersFor returns all the discovered Alertmanager(s) for a particular organization.
func (d *AlertsRouter) AlertmanagersFor(orgID int64) []*url.URL {
d.AdminConfigMtx.RLock()
defer d.AdminConfigMtx.RUnlock()
s, ok := d.Senders[orgID]
if !ok {
return []*url.URL{}
}
return s.Alertmanagers()
}
// DroppedAlertmanagersFor returns all the dropped Alertmanager(s) for a particular organization.
func (d *AlertsRouter) DroppedAlertmanagersFor(orgID int64) []*url.URL {
d.AdminConfigMtx.RLock()
defer d.AdminConfigMtx.RUnlock()
s, ok := d.Senders[orgID]
if !ok {
return []*url.URL{}
}
return s.DroppedAlertmanagers()
}
// Run starts regular updates of the configuration.
func (d *AlertsRouter) Run(ctx context.Context) error {
for {
select {
case <-time.After(d.adminConfigPollInterval):
if err := d.SyncAndApplyConfigFromDatabase(); err != nil {
d.logger.Error("unable to sync admin configuration", "err", err)
}
case <-ctx.Done():
// Stop sending alerts to all external Alertmanager(s).
d.AdminConfigMtx.Lock()
for orgID, s := range d.Senders {
delete(d.Senders, orgID) // delete before we stop to make sure we don't accept any more alerts.
s.Stop()
}
d.AdminConfigMtx.Unlock()
return nil
}
}
}