The open and composable observability and data visualization platform. Visualize metrics, logs, and traces from multiple sources like Prometheus, Loki, Elasticsearch, InfluxDB, Postgres and many more.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
grafana/pkg/services/ngalert/api/api_prometheus.go

178 lines
5.7 KiB

package api
import (
"encoding/json"
"errors"
"fmt"
"net/http"
"time"
"github.com/grafana/grafana/pkg/services/ngalert/eval"
apiv1 "github.com/prometheus/client_golang/api/prometheus/v1"
ngmodels "github.com/grafana/grafana/pkg/services/ngalert/models"
"github.com/grafana/grafana/pkg/services/ngalert/store"
"github.com/grafana/grafana/pkg/api/response"
"github.com/grafana/grafana/pkg/infra/log"
"github.com/grafana/grafana/pkg/models"
Inhouse alerting api (#33129) * init * autogens AM route * POST dashboards/db spec * POST alert-notifications spec * fix description * re inits vendor, updates grafana to master * go mod updates * alerting routes * renames to receivers * prometheus endpoints * align config endpoint with cortex, include templates * Change grafana receiver type * Update receivers.go * rename struct to stop swagger thrashing * add rules API * index html * standalone swagger ui html page * Update README.md * Expose GrafanaManagedAlert properties * Some fixes - /api/v1/rules/{Namespace} should return a map - update ExtendedUpsertAlertDefinitionCommand properties * am alerts routes * rename prom swagger section for clarity, remove example endpoints * Add missing json and yaml tags * folder perms * make folders POST again * fix grafana receiver type * rename fodler->namespace for perms * make ruler json again * PR fixes * silences * fix Ok -> Ack * Add id to POST /api/v1/silences (#9) Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in> * Add POST /api/v1/alerts (#10) Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in> * fix silences * Add testing endpoints * removes grpc replace directives * [wip] starts validation * pkg cleanup * go mod tidy * ignores vendor dir * Change response type for Cortex/Loki alerts * receiver unmarshaling tests * ability to split routes between AM & Grafana * api marshaling & validation * begins work on routing lib * [hack] ignores embedded field in generation * path specific datasource for alerting * align endpoint names with cloud * single route per Alerting config * removes unused routing pkg * regens spec * adds datasource param to ruler/prom route paths * Modifications for supporting migration * Apply suggestions from code review * hack for cleaning circular refs in swagger definition * generates files * minor fixes for prom endpoints * decorate prom apis with required: true where applicable * Revert "generates files" This reverts commit ef7e97558477d79bcad416e043b04dbd04a2c8f7. * removes server autogen * Update imported structs from ngalert * Fix listing rules response * Update github.com/prometheus/common dependency * Update get silence response * Update get silences response * adds ruler validation & backend switching * Fix GET /alertmanager/{DatasourceId}/config/api/v1/alerts response * Distinct gettable and postable grafana receivers * Remove permissions routes * Latest JSON specs * Fix testing routes * inline yaml annotation on apirulenode * yaml test & yamlv3 + comments * Fix yaml annotations for embedded type * Rename DatasourceId path parameter * Implement Backend.String() * backend zero value is a real backend * exports DiscoveryBase * Fix GO initialisms * Silences: Use PostableSilence as the base struct for creating silences * Use type alias instead of struct embedding * More fixes to alertmanager silencing routes * post and spec JSONs * Split rule config to postable/gettable * Fix empty POST /silences payload Recreating the generated JSON specs fixes the issue without further modifications * better yaml unmarshaling for nested yaml docs in cortex-am configs * regens spec * re-adds config.receivers * omitempty to align with prometheus API behavior * Prefix routes with /api * Update Alertmanager models * Make adjustments to follow the Alertmanager API * ruler: add for and annotations to grafana alert (#45) * Modify testing API routes * Fix grafana rule for field type * Move PostableUserConfig validation to this library * Fix PostableUserConfig YAML encoding/decoding * Use common fields for grafana and lotex rules * Add namespace id in GettableGrafanaRule * Apply suggestions from code review * fixup * more changes * Apply suggestions from code review * aligns structure pre merge * fix new imports & tests * updates tooling readme * goimports * lint * more linting!! * revive lint Co-authored-by: Sofia Papagiannaki <papagian@gmail.com> Co-authored-by: Domas <domasx2@gmail.com> Co-authored-by: Sofia Papagiannaki <papagian@users.noreply.github.com> Co-authored-by: Ganesh Vernekar <15064823+codesome@users.noreply.github.com> Co-authored-by: gotjosh <josue@grafana.com> Co-authored-by: David Parrott <stomp.box.yo@gmail.com> Co-authored-by: Kyle Brandt <kyle@grafana.com>
5 years ago
apimodels "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
"github.com/grafana/grafana/pkg/services/ngalert/state"
)
type PrometheusSrv struct {
log log.Logger
manager *state.Manager
store store.RuleStore
}
func (srv PrometheusSrv) RouteGetAlertStatuses(c *models.ReqContext) response.Response {
alertResponse := apimodels.AlertResponse{
DiscoveryBase: apimodels.DiscoveryBase{
Status: "success",
},
Data: apimodels.AlertDiscovery{
Alerts: []*apimodels.Alert{},
},
}
for _, alertState := range srv.manager.GetAll(c.OrgId) {
startsAt := alertState.StartsAt
valString := ""
if len(alertState.Results) > 0 && alertState.State == eval.Alerting {
valString = alertState.Results[0].EvaluationString
}
alertResponse.Data.Alerts = append(alertResponse.Data.Alerts, &apimodels.Alert{
Labels: map[string]string(alertState.Labels),
Annotations: map[string]string{}, //TODO: Once annotations are added to the evaluation result, set them here
State: alertState.State.String(),
ActiveAt: &startsAt,
Value: valString,
})
}
return response.JSON(http.StatusOK, alertResponse)
}
func (srv PrometheusSrv) RouteGetRuleStatuses(c *models.ReqContext) response.Response {
ruleResponse := apimodels.RuleResponse{
DiscoveryBase: apimodels.DiscoveryBase{
Status: "success",
},
Data: apimodels.RuleDiscovery{
RuleGroups: []*apimodels.RuleGroup{},
},
}
ruleGroupQuery := ngmodels.ListOrgRuleGroupsQuery{
OrgID: c.SignedInUser.OrgId,
}
if err := srv.store.GetOrgRuleGroups(&ruleGroupQuery); err != nil {
ruleResponse.DiscoveryBase.Status = "error"
ruleResponse.DiscoveryBase.Error = fmt.Sprintf("failure getting rule groups: %s", err.Error())
ruleResponse.DiscoveryBase.ErrorType = apiv1.ErrServer
return response.JSON(http.StatusInternalServerError, ruleResponse)
}
for _, r := range ruleGroupQuery.Result {
if len(r) < 3 {
continue
}
groupId, namespaceUID, namespace := r[0], r[1], r[2]
if _, err := srv.store.GetNamespaceByUID(namespaceUID, c.SignedInUser.OrgId, c.SignedInUser); err != nil {
if errors.Is(err, models.ErrFolderAccessDenied) {
// do not include it in the response
continue
}
return toNamespaceErrorResponse(err)
}
alertRuleQuery := ngmodels.ListRuleGroupAlertRulesQuery{OrgID: c.SignedInUser.OrgId, NamespaceUID: namespaceUID, RuleGroup: groupId}
if err := srv.store.GetRuleGroupAlertRules(&alertRuleQuery); err != nil {
ruleResponse.DiscoveryBase.Status = "error"
ruleResponse.DiscoveryBase.Error = fmt.Sprintf("failure getting rules for group %s: %s", groupId, err.Error())
ruleResponse.DiscoveryBase.ErrorType = apiv1.ErrServer
return response.JSON(http.StatusInternalServerError, ruleResponse)
}
newGroup := &apimodels.RuleGroup{
Name: groupId,
// This doesn't make sense in our architecture
Alerting: set query in rules response (#33010) * set query in rules response * Theme: tweaking dark theme colors (#33007) * Library Panels: Add library panel tab to share modal (#32953) * Explore: Scroll split panes in Explore independently (#32978) * Change default prometheus to latest and prometheus v1 to prometheus1 * Update README * Remove prometheus1 block as not used * Explore: Separatae scrolling in split view * Update snapshot * Allow skip migrations in tests via environment variable (#32958) * Dashboard: Fix issue where Slack notifications won't link to users (#32861) * DashboardPage: refactored styles from sass to emotion (#32955) * DashboardPage: refactored styles from sass to emotion * refactored dashboardPage component to be alot easier to read and understand * more refactoring... * more cleaning... * fixes frontend test * fixes frontend test- I hope * fixes frontend test- I hope * moves dashboard scss styles back to it's standalone file * GraphNG: use theme font family and size for axis labels (#33009) * GraphNG: use theme font family and size for axis labels * fix test * AlertingNG: Slack notification channel (#32675) * AlertingNG: Slack notification channel Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com> * Add tests Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com> * Fix review comments Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com> * Fix review comments and small refactoring Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com> * GraphNG: stacking (#30749) * First iteration * Dev dash * Re-use StackingMode type * Fix ts and api issues * Stacking work resurected * Fix overrides * Correct values in tooltip and updated test dashboard * Update dev dashboard * Apply correct bands for stacking * Merge fix * Update snapshot * Revert go.sum * Handle null values correctyl and make filleBelowTo and stacking mutual exclusive * Snapshots update * Graph->Time series stacking migration * Review comments * Indicate overrides in StandardEditorContext * Change stacking UI editor, migrate stacking to object option * Small refactor, fix for hiding series and dev dashboard * VizLegend: sets a min and max value of the seriesCount control in Storybook (#33022) * Alerting: Filter rules list (#32818) * Chore: Reduces strict errors (#33012) * Chore: reduces strict error in OptionPicker tests * Chore: reduces strict errors in FormDropdownCtrl * Chore: reduces has no initializer and is not definitely assigned in the constructor errors * Chore: reduces has no initializer and is not definitely assigned in the constructor errors * Chore: lowers strict count limit * Tests: updates snapshots * Tests: updates snapshots * Chore: updates after PR comments * Refactor: removes throw and changes signature for DashboardSrv.getCurrent * [Alerting]: Several modifications in alert rules (#32983) * [Alerting]: Use common properties for all rules * Add Labels in rules * Fix update ruleGroup API Return 400 Bad Request response when the request contains a UID that does not exist * Check permissions and return namespace id * Apply suggestions from code review Co-authored-by: gotjosh <josue@grafana.com> * WIP (#33025) * Chore: Bump strict error count limit (#33035) * set query in rules response Co-authored-by: Torkel Ödegaard <torkel@grafana.org> Co-authored-by: kay delaney <45561153+kaydelaney@users.noreply.github.com> Co-authored-by: Ivana Huckova <30407135+ivanahuckova@users.noreply.github.com> Co-authored-by: Dafydd <72009875+dafydd-t@users.noreply.github.com> Co-authored-by: n-wbrown <n-wbrown@users.noreply.github.com> Co-authored-by: Uchechukwu Obasi <obasiuche62@gmail.com> Co-authored-by: Leon Sorokin <leeoniya@gmail.com> Co-authored-by: Ganesh Vernekar <15064823+codesome@users.noreply.github.com> Co-authored-by: Dominik Prokop <dominik.prokop@grafana.com> Co-authored-by: Nathan Rodman <nathanrodman@gmail.com> Co-authored-by: Hugo Häggmark <hugo.haggmark@grafana.com> Co-authored-by: Sofia Papagiannaki <papagian@users.noreply.github.com> Co-authored-by: gotjosh <josue@grafana.com> Co-authored-by: Marcus Efraimsson <marcus.efraimsson@gmail.com>
5 years ago
// so we use this field for passing to the frontend the namespace
File: namespace,
LastEvaluation: time.Time{},
EvaluationTime: 0, // TODO: see if we are able to pass this along with evaluation results
}
for _, rule := range alertRuleQuery.Result {
var queryStr string
encodedQuery, err := json.Marshal(rule.Data)
if err != nil {
queryStr = err.Error()
} else {
queryStr = string(encodedQuery)
}
alertingRule := apimodels.AlertingRule{
State: "inactive",
Name: rule.Title,
Query: queryStr,
Duration: rule.For.Seconds(),
Annotations: rule.Annotations,
}
newRule := apimodels.Rule{
Name: rule.Title,
Labels: rule.Labels,
Health: "ok",
Type: apiv1.RuleTypeAlerting,
LastEvaluation: time.Time{},
}
for _, alertState := range srv.manager.GetStatesForRuleUID(c.OrgId, rule.UID) {
activeAt := alertState.StartsAt
valString := ""
if len(alertState.Results) > 0 && alertState.State == eval.Alerting {
valString = alertState.Results[0].EvaluationString
}
alert := &apimodels.Alert{
Labels: map[string]string(alertState.Labels),
Annotations: alertState.Annotations,
State: alertState.State.String(),
ActiveAt: &activeAt,
Value: valString, // TODO: set this once it is added to the evaluation results
}
if alertState.LastEvaluationTime.After(newRule.LastEvaluation) {
newRule.LastEvaluation = alertState.LastEvaluationTime
newGroup.LastEvaluation = alertState.LastEvaluationTime
}
newRule.EvaluationTime = alertState.EvaluationDuration.Seconds()
switch alertState.State {
case eval.Normal:
case eval.Pending:
if alertingRule.State == "inactive" {
alertingRule.State = "pending"
}
case eval.Alerting:
alertingRule.State = "firing"
case eval.Error:
newRule.Health = "error"
case eval.NoData:
newRule.Health = "nodata"
}
if alertState.Error != nil {
newRule.LastError = alertState.Error.Error()
newRule.Health = "error"
}
alertingRule.Alerts = append(alertingRule.Alerts, alert)
}
alertingRule.Rule = newRule
newGroup.Rules = append(newGroup.Rules, alertingRule)
newGroup.Interval = float64(rule.IntervalSeconds)
}
ruleResponse.Data.RuleGroups = append(ruleResponse.Data.RuleGroups, newGroup)
}
return response.JSON(http.StatusOK, ruleResponse)
}