The open and composable observability and data visualization platform. Visualize metrics, logs, and traces from multiple sources like Prometheus, Loki, Elasticsearch, InfluxDB, Postgres and many more.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 
grafana/pkg/tsdb/cloudwatch/cloudwatch.go

339 lines
11 KiB

package cloudwatch
import (
"context"
"encoding/json"
"fmt"
"net/http"
"sync"
"time"
"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/session"
"github.com/aws/aws-sdk-go/service/cloudwatch"
"github.com/aws/aws-sdk-go/service/cloudwatch/cloudwatchiface"
"github.com/aws/aws-sdk-go/service/cloudwatchlogs"
"github.com/aws/aws-sdk-go/service/cloudwatchlogs/cloudwatchlogsiface"
"github.com/aws/aws-sdk-go/service/resourcegroupstaggingapi/resourcegroupstaggingapiiface"
"github.com/grafana/grafana-aws-sdk/pkg/awsds"
"github.com/grafana/grafana-plugin-sdk-go/backend"
"github.com/grafana/grafana-plugin-sdk-go/backend/datasource"
"github.com/grafana/grafana-plugin-sdk-go/backend/instancemgmt"
"github.com/grafana/grafana-plugin-sdk-go/backend/resource/httpadapter"
"github.com/grafana/grafana/pkg/infra/httpclient"
"github.com/grafana/grafana/pkg/infra/log"
"github.com/grafana/grafana/pkg/services/featuremgmt"
ngalertmodels "github.com/grafana/grafana/pkg/services/ngalert/models"
"github.com/grafana/grafana/pkg/services/query"
"github.com/grafana/grafana/pkg/setting"
"github.com/grafana/grafana/pkg/tsdb/cloudwatch/clients"
"github.com/grafana/grafana/pkg/tsdb/cloudwatch/kinds/dataquery"
"github.com/grafana/grafana/pkg/tsdb/cloudwatch/models"
"github.com/patrickmn/go-cache"
)
const tagValueCacheExpiration = time.Hour * 24
type DataQueryJson struct {
dataquery.CloudWatchAnnotationQuery
Type string `json:"type,omitempty"`
}
type DataSource struct {
Settings models.CloudWatchSettings
HTTPClient *http.Client
tagValueCache *cache.Cache
}
const (
defaultRegion = "default"
logsQueryMode = "Logs"
// QueryTypes
annotationQuery = "annotationQuery"
logAction = "logAction"
timeSeriesQuery = "timeSeriesQuery"
)
var logger = log.New("tsdb.cloudwatch")
func ProvideService(cfg *setting.Cfg, httpClientProvider httpclient.Provider, features featuremgmt.FeatureToggles) *CloudWatchService {
logger.Debug("Initializing")
executor := newExecutor(datasource.NewInstanceManager(NewInstanceSettings(httpClientProvider)), cfg, awsds.NewSessionCache(), features)
return &CloudWatchService{
Cfg: cfg,
Executor: executor,
}
}
type CloudWatchService struct {
Cfg *setting.Cfg
Executor *cloudWatchExecutor
}
type SessionCache interface {
GetSession(c awsds.SessionConfig) (*session.Session, error)
}
func newExecutor(im instancemgmt.InstanceManager, cfg *setting.Cfg, sessions SessionCache, features featuremgmt.FeatureToggles) *cloudWatchExecutor {
e := &cloudWatchExecutor{
im: im,
cfg: cfg,
sessions: sessions,
features: features,
}
e.resourceHandler = httpadapter.New(e.newResourceMux())
return e
}
func NewInstanceSettings(httpClientProvider httpclient.Provider) datasource.InstanceFactoryFunc {
return func(ctx context.Context, settings backend.DataSourceInstanceSettings) (instancemgmt.Instance, error) {
instanceSettings, err := models.LoadCloudWatchSettings(settings)
if err != nil {
return nil, fmt.Errorf("error reading settings: %w", err)
}
opts, err := settings.HTTPClientOptions(ctx)
if err != nil {
return nil, err
}
httpClient, err := httpClientProvider.New(opts)
if err != nil {
return nil, fmt.Errorf("error creating http client: %w", err)
}
return DataSource{
Settings: instanceSettings,
HTTPClient: httpClient,
tagValueCache: cache.New(tagValueCacheExpiration, tagValueCacheExpiration*5),
}, nil
}
}
// cloudWatchExecutor executes CloudWatch requests.
type cloudWatchExecutor struct {
im instancemgmt.InstanceManager
cfg *setting.Cfg
sessions SessionCache
features featuremgmt.FeatureToggles
regionCache sync.Map
resourceHandler backend.CallResourceHandler
}
func (e *cloudWatchExecutor) getRequestContext(ctx context.Context, pluginCtx backend.PluginContext, region string) (models.RequestContext, error) {
r := region
instance, err := e.getInstance(ctx, pluginCtx)
if region == defaultRegion {
if err != nil {
return models.RequestContext{}, err
}
r = instance.Settings.Region
}
ec2Client, err := e.getEC2Client(ctx, pluginCtx, defaultRegion)
if err != nil {
return models.RequestContext{}, err
}
sess, err := e.newSession(ctx, pluginCtx, r)
if err != nil {
return models.RequestContext{}, err
}
return models.RequestContext{
OAMAPIProvider: NewOAMAPI(sess),
MetricsClientProvider: clients.NewMetricsClient(NewMetricsAPI(sess), e.cfg),
LogsAPIProvider: NewLogsAPI(sess),
EC2APIProvider: ec2Client,
Settings: instance.Settings,
Features: e.features,
Logger: logger,
}, nil
}
func (e *cloudWatchExecutor) CallResource(ctx context.Context, req *backend.CallResourceRequest, sender backend.CallResourceResponseSender) error {
return e.resourceHandler.CallResource(ctx, req, sender)
}
func (e *cloudWatchExecutor) QueryData(ctx context.Context, req *backend.QueryDataRequest) (*backend.QueryDataResponse, error) {
logger := logger.FromContext(ctx)
q := req.Queries[0]
var model DataQueryJson
err := json.Unmarshal(q.JSON, &model)
if err != nil {
return nil, err
}
_, fromAlert := req.Headers[ngalertmodels.FromAlertHeaderName]
fromExpression := req.GetHTTPHeader(query.HeaderFromExpression) != ""
// Public dashboard queries execute like alert queries, i.e. they execute on the backend, therefore, we need to handle them synchronously.
// Since `model.Type` is set during execution on the frontend by the query runner and isn't saved with the query, we are checking here is
// missing the `model.Type` property and if it is a log query in order to determine if it is a public dashboard query.
fromPublicDashboard := (model.Type == "" && model.QueryMode == logsQueryMode)
isSyncLogQuery := ((fromAlert || fromExpression) && model.QueryMode == logsQueryMode) || fromPublicDashboard
if isSyncLogQuery {
return executeSyncLogQuery(ctx, e, req)
}
var result *backend.QueryDataResponse
switch model.Type {
case annotationQuery:
result, err = e.executeAnnotationQuery(ctx, req.PluginContext, model, q)
case logAction:
result, err = e.executeLogActions(ctx, logger, req)
case timeSeriesQuery:
fallthrough
default:
result, err = e.executeTimeSeriesQuery(ctx, logger, req)
}
return result, err
}
func (e *cloudWatchExecutor) CheckHealth(ctx context.Context, req *backend.CheckHealthRequest) (*backend.CheckHealthResult, error) {
status := backend.HealthStatusOk
metricsTest := "Successfully queried the CloudWatch metrics API."
logsTest := "Successfully queried the CloudWatch logs API."
err := e.checkHealthMetrics(ctx, req.PluginContext)
if err != nil {
status = backend.HealthStatusError
metricsTest = fmt.Sprintf("CloudWatch metrics query failed: %s", err.Error())
}
err = e.checkHealthLogs(ctx, req.PluginContext)
if err != nil {
status = backend.HealthStatusError
logsTest = fmt.Sprintf("CloudWatch logs query failed: %s", err.Error())
}
return &backend.CheckHealthResult{
Status: status,
Message: fmt.Sprintf("1. %s\n2. %s", metricsTest, logsTest),
}, nil
}
func (e *cloudWatchExecutor) checkHealthMetrics(ctx context.Context, pluginCtx backend.PluginContext) error {
namespace := "AWS/Billing"
metric := "EstimatedCharges"
params := &cloudwatch.ListMetricsInput{
Namespace: &namespace,
MetricName: &metric,
}
session, err := e.newSession(ctx, pluginCtx, defaultRegion)
if err != nil {
return err
}
metricClient := clients.NewMetricsClient(NewMetricsAPI(session), e.cfg)
_, err = metricClient.ListMetricsWithPageLimit(ctx, params)
return err
}
func (e *cloudWatchExecutor) checkHealthLogs(ctx context.Context, pluginCtx backend.PluginContext) error {
session, err := e.newSession(ctx, pluginCtx, defaultRegion)
if err != nil {
return err
}
logsClient := NewLogsAPI(session)
_, err = logsClient.DescribeLogGroupsWithContext(ctx, &cloudwatchlogs.DescribeLogGroupsInput{Limit: aws.Int64(1)})
return err
}
func (e *cloudWatchExecutor) newSession(ctx context.Context, pluginCtx backend.PluginContext, region string) (*session.Session, error) {
instance, err := e.getInstance(ctx, pluginCtx)
if err != nil {
return nil, err
}
if region == defaultRegion {
if len(instance.Settings.Region) == 0 {
return nil, models.ErrMissingRegion
}
region = instance.Settings.Region
}
sess, err := e.sessions.GetSession(awsds.SessionConfig{
// https://github.com/grafana/grafana/issues/46365
// HTTPClient: instance.HTTPClient,
Settings: awsds.AWSDatasourceSettings{
Profile: instance.Settings.Profile,
Region: region,
AuthType: instance.Settings.AuthType,
AssumeRoleARN: instance.Settings.AssumeRoleARN,
ExternalID: instance.Settings.ExternalID,
Endpoint: instance.Settings.Endpoint,
DefaultRegion: instance.Settings.Region,
AccessKey: instance.Settings.AccessKey,
SecretKey: instance.Settings.SecretKey,
},
UserAgentName: aws.String("Cloudwatch"),
})
if err != nil {
return nil, err
}
// work around until https://github.com/grafana/grafana/issues/39089 is implemented
if e.cfg.SecureSocksDSProxy.Enabled && instance.Settings.SecureSocksProxyEnabled {
// only update the transport to try to avoid the issue mentioned here https://github.com/grafana/grafana/issues/46365
sess.Config.HTTPClient.Transport = instance.HTTPClient.Transport
}
return sess, nil
}
func (e *cloudWatchExecutor) getInstance(ctx context.Context, pluginCtx backend.PluginContext) (*DataSource, error) {
i, err := e.im.Get(ctx, pluginCtx)
if err != nil {
return nil, err
}
instance := i.(DataSource)
return &instance, nil
}
func (e *cloudWatchExecutor) getCWClient(ctx context.Context, pluginCtx backend.PluginContext, region string) (cloudwatchiface.CloudWatchAPI, error) {
sess, err := e.newSession(ctx, pluginCtx, region)
if err != nil {
return nil, err
}
return NewCWClient(sess), nil
}
func (e *cloudWatchExecutor) getCWLogsClient(ctx context.Context, pluginCtx backend.PluginContext, region string) (cloudwatchlogsiface.CloudWatchLogsAPI, error) {
sess, err := e.newSession(ctx, pluginCtx, region)
if err != nil {
return nil, err
}
logsClient := NewCWLogsClient(sess)
return logsClient, nil
}
func (e *cloudWatchExecutor) getEC2Client(ctx context.Context, pluginCtx backend.PluginContext, region string) (models.EC2APIProvider, error) {
sess, err := e.newSession(ctx, pluginCtx, region)
if err != nil {
return nil, err
}
return NewEC2Client(sess), nil
}
func (e *cloudWatchExecutor) getRGTAClient(ctx context.Context, pluginCtx backend.PluginContext, region string) (resourcegroupstaggingapiiface.ResourceGroupsTaggingAPIAPI,
error) {
sess, err := e.newSession(ctx, pluginCtx, region)
if err != nil {
return nil, err
}
return newRGTAClient(sess), nil
}
func isTerminated(queryStatus string) bool {
return queryStatus == "Complete" || queryStatus == "Cancelled" || queryStatus == "Failed" || queryStatus == "Timeout"
}