Update migration to not delete existing Grafana alerts (#39541)

* keep existing unified alert rules untouched
* move silences and other alertmanager files to the organization directory (only if it is a single organization deployment)
* assign the existing notification settings and routes to the first organization
* create default notification settings for each organization in the case of multi org deployment
pull/39750/head
Yuriy Tseretyan 4 years ago committed by GitHub
parent eead8cd8e1
commit 5d0d7dcb3a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 161
      pkg/services/sqlstore/migrations/ualert/ualert.go

@ -4,6 +4,8 @@ import (
"encoding/json"
"fmt"
"os"
"path/filepath"
"strconv"
"strings"
pb "github.com/prometheus/alertmanager/silence/silencepb"
@ -88,37 +90,16 @@ func RerunDashAlertMigration(mg *migrator.Migrator) {
}
cloneMigTitle := fmt.Sprintf("clone %s", migTitle)
cloneRmMigTitle := fmt.Sprintf("clone %s", rmMigTitle)
_, migrationRun := logs[cloneMigTitle]
ngEnabled := mg.Cfg.IsNgAlertEnabled()
switch {
case ngEnabled && !migrationRun:
// Removes all unified alerting data. It is not recorded so when the feature
// flag is removed in future the "clone remove unified alerting data" migration will be run again.
mg.AddMigration(cloneRmMigTitle, &rmMigrationWithoutLogging{})
mg.AddMigration(cloneMigTitle, &migration{
seenChannelUIDs: make(map[string]struct{}),
migratedChannelsPerOrg: make(map[int64]map[*notificationChannel]struct{}),
portedChannelGroupsPerOrg: make(map[int64]map[string]string),
silences: make(map[int64][]*pb.MeshSilence),
})
case !ngEnabled && migrationRun:
// Remove the migration entry that creates unified alerting data. This is so when the feature
// flag is enabled in the future the migration "move dashboard alerts to unified alerting" will be run again.
mg.AddMigration(fmt.Sprintf(clearMigrationEntryTitle, cloneMigTitle), &clearMigrationEntry{
migrationID: cloneMigTitle,
})
if err != nil {
mg.Logger.Error("alert migration error: could not clear clone dashboard alert migration", "error", err)
}
// Removes all unified alerting data. It is not recorded so when the feature
// flag is enabled in future the "clone remove unified alerting data" migration will be run again.
mg.AddMigration(cloneRmMigTitle, &rmMigrationWithoutLogging{})
// The only use of this migration is when a user enabled ng-alerting before 8.2.
mg.AddMigration(cloneMigTitle, &upgradeNgAlerting{})
// if user disables the feature flag and enables it back.
// This migration does not need to be run because the original migration AddDashAlertMigration does what's needed
}
}
@ -451,3 +432,133 @@ type rmMigrationWithoutLogging = rmMigration
func (m *rmMigrationWithoutLogging) SkipMigrationLog() bool {
return true
}
type upgradeNgAlerting struct {
migrator.MigrationBase
}
var _ migrator.CodeMigration = &upgradeNgAlerting{}
func (u *upgradeNgAlerting) Exec(sess *xorm.Session, migrator *migrator.Migrator) error {
firstOrgId, err := u.updateAlertConfigurations(sess, migrator)
if err != nil {
return err
}
u.updateAlertmanagerFiles(firstOrgId, migrator)
return nil
}
func (u *upgradeNgAlerting) updateAlertConfigurations(sess *xorm.Session, migrator *migrator.Migrator) (int64, error) {
// if there are records with org_id == 0 then the feature flag was enabled before 8.2 that introduced org separation.
// if feature is enabled in 8.2 the migration "AddDashAlertMigration", which is effectively different from what was run in 8.1.x and earlier versions,
// will handle organizations correctly, and, therefore, nothing needs to be fixed
count, err := sess.Table(&AlertConfiguration{}).Where("org_id = 0").Count()
if err != nil {
return 0, fmt.Errorf("failed to query table alert_configuration: %w", err)
}
if count == 0 {
return 0, nil // NOTHING TO DO
}
orgs := make([]int64, 0)
// get all org IDs sorted in ascending order
if err = sess.Table("org").OrderBy("id").Cols("id").Find(&orgs); err != nil {
return 0, fmt.Errorf("failed to query table org: %w", err)
}
if len(orgs) == 0 { // should not really happen
migrator.Logger.Info("No organizations are found. Nothing to migrate")
return 0, nil
}
firstOrg := orgs[0]
// assigning all configurations to the first org because 0 does not usually point to any
migrator.Logger.Info("Assigning all existing records from alert_configuration to the first organization", "org", firstOrg)
_, err = sess.Cols("org_id").Where("org_id = 0").Update(&AlertConfiguration{OrgID: firstOrg})
if err != nil {
return 0, fmt.Errorf("failed to update org_id for all rows in the table alert_configuration: %w", err)
}
// if there is a single organization it is safe to assume that all configurations belong to it.
if len(orgs) == 1 {
return firstOrg, nil
}
// if there are many organizations we cannot safely assume what organization an alert_configuration belongs to.
// Therefore, we apply the default configuration to all organizations. The previous version could be restored if needed.
migrator.Logger.Warn("Detected many organizations. The current alertmanager configuration will be replaced by the default one")
configs := make([]*AlertConfiguration, 0, len(orgs))
for _, org := range orgs {
configs = append(configs, &AlertConfiguration{
AlertmanagerConfiguration: migrator.Cfg.UnifiedAlerting.DefaultConfiguration,
// Since we are migration for a snapshot of the code, it is always going to migrate to
// the v1 config.
ConfigurationVersion: "v1",
OrgID: org,
})
}
_, err = sess.InsertMulti(configs)
if err != nil {
return 0, fmt.Errorf("failed to add default alertmanager configurations to every organization: %w", err)
}
return 0, nil
}
// updateAlertmanagerFiles scans the existing alerting directory '<data_dir>/alerting' for known files.
// If argument 'orgId' is not 0 updateAlertmanagerFiles moves all known files to the directory <data_dir>/alerting/<orgId>.
// Otherwise, it deletes those files.
// pre-8.2 version put all configuration files into the root of alerting directory. Since 8.2 configuration files are put in organization specific directory
func (u *upgradeNgAlerting) updateAlertmanagerFiles(orgId int64, migrator *migrator.Migrator) {
knownFiles := map[string]interface{}{"__default__.tmpl": nil, "silences": nil, "notifications": nil}
alertingDir := filepath.Join(migrator.Cfg.DataPath, "alerting")
// do not fail if something goes wrong because these files are not used anymore. the worst that can happen is that we leave some leftovers behind
deleteFile := func(fileName string) {
path := filepath.Join(alertingDir, fileName)
migrator.Logger.Info("Deleting alerting configuration file", "file", fileName)
err := os.Remove(path)
if err != nil {
migrator.Logger.Warn("Failed to delete file", "file", path, "error", err)
}
}
moveFile := func(fileName string) {
alertingOrgDir := filepath.Join(alertingDir, strconv.FormatInt(orgId, 10))
if err := os.MkdirAll(alertingOrgDir, 0750); err != nil {
migrator.Logger.Error("Failed to create alerting directory for organization. Skip moving the file and delete it instead", "target_dir", alertingOrgDir, "org_id", orgId, "error", err, "file", fileName)
deleteFile(fileName)
return
}
err := os.Rename(filepath.Join(alertingDir, fileName), filepath.Join(alertingOrgDir, fileName))
if err != nil {
migrator.Logger.Error("Failed to move alertmanager configuration file to organization.", "source_dir", alertingDir, "target_dir", alertingOrgDir, "org_id", orgId, "error", err, "file", fileName)
deleteFile(fileName)
}
}
entries, err := os.ReadDir(alertingDir)
if err != nil {
if !os.IsNotExist(err) {
keys := make([]string, 0, len(knownFiles))
for key := range knownFiles {
keys = append(keys, key)
}
migrator.Logger.Warn("Failed to clean up alerting directory. There may be files that are not used anymore.", "path", alertingDir, "files_to_delete", keys, "error", err)
}
}
for _, entry := range entries {
_, known := knownFiles[entry.Name()]
if known {
if orgId == 0 {
deleteFile(entry.Name())
} else {
moveFile(entry.Name())
}
}
}
}
func (u *upgradeNgAlerting) SQL(migrator.Dialect) string {
return "code migration"
}

Loading…
Cancel
Save