package screenshot import ( "context" "errors" "fmt" "net/url" "path" "strconv" "time" gocache "github.com/patrickmn/go-cache" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" "golang.org/x/sync/singleflight" "github.com/grafana/grafana/pkg/components/imguploader" "github.com/grafana/grafana/pkg/models" "github.com/grafana/grafana/pkg/services/dashboards" "github.com/grafana/grafana/pkg/services/rendering" "github.com/grafana/grafana/pkg/setting" ) const ( namespace = "grafana" subsystem = "screenshot" ) var ( DefaultTheme = models.ThemeDark DefaultTimeout = 15 * time.Second DefaultHeight = 500 DefaultWidth = 1000 ) var ( ErrScreenshotsUnavailable = errors.New("screenshots unavailable") ) // Screenshot represents a screenshot of a dashboard in Grafana. // // A screenshot can have a Path and an URL if the screenshot is stored on disk // and uploaded to a cloud storage service or made accessible via the Grafana // HTTP server. type Screenshot struct { Path string URL string } // ScreenshotOptions are the options for taking a screenshot. type ScreenshotOptions struct { DashboardUID string PanelID int64 Width int Height int Theme models.Theme Timeout time.Duration } // SetDefaults sets default values for missing or invalid options. func (s ScreenshotOptions) SetDefaults() ScreenshotOptions { if s.Width <= 0 { s.Width = DefaultWidth } if s.Height <= 0 { s.Height = DefaultHeight } switch s.Theme { case models.ThemeDark, models.ThemeLight: default: s.Theme = DefaultTheme } if s.Timeout <= 0 { s.Timeout = DefaultTimeout } return s } // ScreenshotService is an interface for taking screenshots. //go:generate mockgen -destination=mock.go -package=screenshot github.com/grafana/grafana/pkg/services/screenshot ScreenshotService type ScreenshotService interface { Take(ctx context.Context, opts ScreenshotOptions) (*Screenshot, error) } // CachableScreenshotService caches screenshots. type CachableScreenshotService struct { cache *gocache.Cache service ScreenshotService cacheHits prometheus.Counter cacheMisses prometheus.Counter } func NewCachableScreenshotService(r prometheus.Registerer, expiration time.Duration, service ScreenshotService) ScreenshotService { return &CachableScreenshotService{ cache: gocache.New(expiration, time.Minute), service: service, cacheHits: promauto.With(r).NewCounter(prometheus.CounterOpts{ Name: "cache_hits_total", Namespace: namespace, Subsystem: subsystem, }), cacheMisses: promauto.With(r).NewCounter(prometheus.CounterOpts{ Name: "cache_misses_total", Namespace: namespace, Subsystem: subsystem, }), } } // Take returns the screenshot from the cache or asks the service to take a // new screenshot and cache it before returning it. func (s *CachableScreenshotService) Take(ctx context.Context, opts ScreenshotOptions) (*Screenshot, error) { k := fmt.Sprintf("%s-%d-%s", opts.DashboardUID, opts.PanelID, opts.Theme) if v, ok := s.cache.Get(k); ok { defer s.cacheHits.Inc() return v.(*Screenshot), nil } defer s.cacheMisses.Inc() screenshot, err := s.service.Take(ctx, opts) if err != nil { return nil, err } s.cache.Set(k, screenshot, 0) return screenshot, nil } // NoopScreenshotService is a service that takes no-op screenshots. type NoopScreenshotService struct{} func (s *NoopScreenshotService) Take(_ context.Context, _ ScreenshotOptions) (*Screenshot, error) { return &Screenshot{}, nil } // ObservableScreenshotService is a service that records metrics about screenshots. type ObservableScreenshotService struct { service ScreenshotService duration prometheus.Histogram failures *prometheus.CounterVec successes prometheus.Counter } func NewObservableScreenshotService(r prometheus.Registerer, service ScreenshotService) ScreenshotService { return &ObservableScreenshotService{ service: service, duration: promauto.With(r).NewHistogram(prometheus.HistogramOpts{ Name: "duration_seconds", Buckets: []float64{0.1, 0.25, 0.5, 1, 2, 5, 10, 15}, Namespace: namespace, Subsystem: subsystem, }), failures: promauto.With(r).NewCounterVec(prometheus.CounterOpts{ Name: "failures_total", Namespace: namespace, Subsystem: subsystem, }, []string{"reason"}), successes: promauto.With(r).NewCounter(prometheus.CounterOpts{ Name: "successes_total", Namespace: namespace, Subsystem: subsystem, }), } } func (s *ObservableScreenshotService) Take(ctx context.Context, opts ScreenshotOptions) (*Screenshot, error) { start := time.Now() defer func() { s.duration.Observe(time.Since(start).Seconds()) }() screenshot, err := s.service.Take(ctx, opts) if err != nil { if errors.Is(err, dashboards.ErrDashboardNotFound) { defer s.failures.With(prometheus.Labels{ "reason": "dashboard_not_found", }).Inc() } else if errors.Is(err, context.Canceled) { defer s.failures.With(prometheus.Labels{ "reason": "context_canceled", }).Inc() } else { defer s.failures.With(prometheus.Labels{ "reason": "error", }).Inc() } } else { defer s.successes.Inc() } return screenshot, err } // RemoteRenderScreenshotService takes screenshots using a remote render service. type RemoteRenderScreenshotService struct { ds dashboards.DashboardService rs rendering.Service } func NewRemoteRenderScreenshotService(ds dashboards.DashboardService, rs rendering.Service) ScreenshotService { return &RemoteRenderScreenshotService{ ds: ds, rs: rs, } } // Take returns a screenshot or an error if either the dashboard does not exist or // the service failed to screenshot the dashboard. It uses both the context and the // timeout in ScreenshotOptions, however the context is used in any database queries // and the request to the remote render service, while the timeout in ScreenshotOptions // is passed to the remote render service where it is used as a client timeout. It is // not recommended to pass a context without a deadline. func (s *RemoteRenderScreenshotService) Take(ctx context.Context, opts ScreenshotOptions) (*Screenshot, error) { q := models.GetDashboardQuery{Uid: opts.DashboardUID} if err := s.ds.GetDashboard(ctx, &q); err != nil { return nil, err } u := url.URL{} u.Path = path.Join("d-solo", q.Result.Uid, q.Result.Slug) p := u.Query() p.Add("orgId", strconv.FormatInt(q.Result.OrgId, 10)) p.Add("panelId", strconv.FormatInt(opts.PanelID, 10)) u.RawQuery = p.Encode() opts = opts.SetDefaults() renderOpts := rendering.Opts{ AuthOpts: rendering.AuthOpts{ OrgID: q.Result.OrgId, OrgRole: models.ROLE_ADMIN, }, ErrorOpts: rendering.ErrorOpts{ ErrorConcurrentLimitReached: true, ErrorRenderUnavailable: true, }, TimeoutOpts: rendering.TimeoutOpts{ Timeout: opts.Timeout, }, Width: opts.Width, Height: opts.Height, Theme: opts.Theme, ConcurrentLimit: setting.AlertingRenderLimit, Path: u.String(), } result, err := s.rs.Render(ctx, renderOpts, nil) if err != nil { return nil, fmt.Errorf("failed to take screenshot: %w", err) } screenshot := Screenshot{Path: result.FilePath} return &screenshot, nil } type ScreenshotUnavailableService struct{} func (s *ScreenshotUnavailableService) Take(_ context.Context, _ ScreenshotOptions) (*Screenshot, error) { return nil, ErrScreenshotsUnavailable } // SingleFlightScreenshotService prevents duplicate screenshots. type SingleFlightScreenshotService struct { f singleflight.Group service ScreenshotService } func NewSingleFlightScreenshotService(service ScreenshotService) ScreenshotService { return &SingleFlightScreenshotService{service: service} } // Take returns a screenshot or an error. It ensures that at most one screenshot // can be taken at a time for the same dashboard and theme. Duplicate screenshots // wait for the first screenshot to complete and receive the same screenshot. func (s *SingleFlightScreenshotService) Take(ctx context.Context, opts ScreenshotOptions) (*Screenshot, error) { k := fmt.Sprintf("%s-%d-%s", opts.DashboardUID, opts.PanelID, opts.Theme) v, err, _ := s.f.Do(k, func() (interface{}, error) { return s.service.Take(ctx, opts) }) if err != nil { return nil, err } screenshot := v.(*Screenshot) return screenshot, err } // RateLimitScreenshotService ensures that at most N screenshots can be taken // at a time. type RateLimitScreenshotService struct { service ScreenshotService tokens chan struct{} } func NewRateLimitScreenshotService(service ScreenshotService, n int64) ScreenshotService { return &RateLimitScreenshotService{ service: service, tokens: make(chan struct{}, n), } } // Take returns a screenshot or an error. It ensures that at most N screenshots // can be taken at a time. The service has N tokens such that a token is consumed // at the start of a screenshot and returned when the screenshot has either // succeeded or failed. A screenshot can timeout if the context is canceled // while waiting for a token or while the screenshot is being taken. func (s *RateLimitScreenshotService) Take(ctx context.Context, opts ScreenshotOptions) (*Screenshot, error) { select { // the context is canceled case <-ctx.Done(): return nil, ctx.Err() // there is a token available case s.tokens <- struct{}{}: } // acquired token must be returned defer func() { <-s.tokens }() return s.service.Take(ctx, opts) } // UploadingScreenshotService uploads taken screenshots. type UploadingScreenshotService struct { service ScreenshotService uploader imguploader.ImageUploader uploadFailures prometheus.Counter uploadSuccesses prometheus.Counter } func NewUploadingScreenshotService(r prometheus.Registerer, service ScreenshotService, uploader imguploader.ImageUploader) ScreenshotService { return &UploadingScreenshotService{ service: service, uploader: uploader, uploadFailures: promauto.With(r).NewCounter(prometheus.CounterOpts{ Name: "upload_failures_total", Namespace: namespace, Subsystem: subsystem, }), uploadSuccesses: promauto.With(r).NewCounter(prometheus.CounterOpts{ Name: "upload_successes_total", Namespace: namespace, Subsystem: subsystem, }), } } // Take uploads a screenshot with a path and returns a new screenshot with the // unmodified path and a URL. It returns the unmodified screenshot on error. func (s *UploadingScreenshotService) Take(ctx context.Context, opts ScreenshotOptions) (*Screenshot, error) { screenshot, err := s.service.Take(ctx, opts) if err != nil { return nil, err } url, err := s.uploader.Upload(ctx, screenshot.Path) if err != nil { defer s.uploadFailures.Inc() return screenshot, fmt.Errorf("failed to upload screenshot: %w", err) } screenshot.URL = url defer s.uploadSuccesses.Inc() return screenshot, nil }