|
|
|
@ -12,6 +12,7 @@ import ( |
|
|
|
|
"time" |
|
|
|
|
|
|
|
|
|
"github.com/google/uuid" |
|
|
|
|
"github.com/grafana/grafana-plugin-sdk-go/backend/httpclient" |
|
|
|
|
"github.com/grafana/grafana/pkg/api/routing" |
|
|
|
|
"github.com/grafana/grafana/pkg/infra/db" |
|
|
|
|
"github.com/grafana/grafana/pkg/infra/kvstore" |
|
|
|
@ -35,6 +36,7 @@ import ( |
|
|
|
|
"github.com/grafana/grafana/pkg/util" |
|
|
|
|
"github.com/prometheus/client_golang/prometheus" |
|
|
|
|
"go.opentelemetry.io/otel/attribute" |
|
|
|
|
"go.opentelemetry.io/otel/codes" |
|
|
|
|
"go.opentelemetry.io/otel/trace" |
|
|
|
|
) |
|
|
|
|
|
|
|
|
@ -83,6 +85,7 @@ var _ cloudmigration.Service = (*Service)(nil) |
|
|
|
|
// builds the service, and api, and configures routes
|
|
|
|
|
func ProvideService( |
|
|
|
|
cfg *setting.Cfg, |
|
|
|
|
httpClientProvider *httpclient.Provider, |
|
|
|
|
features featuremgmt.FeatureToggles, |
|
|
|
|
db db.DB, |
|
|
|
|
dsService datasources.DataSourceService, |
|
|
|
@ -118,15 +121,29 @@ func ProvideService( |
|
|
|
|
} |
|
|
|
|
s.api = api.RegisterApi(routeRegister, s, tracer) |
|
|
|
|
|
|
|
|
|
s.objectStorage = objectstorage.NewS3() |
|
|
|
|
httpClientS3, err := httpClientProvider.New() |
|
|
|
|
if err != nil { |
|
|
|
|
return nil, fmt.Errorf("creating http client for S3: %w", err) |
|
|
|
|
} |
|
|
|
|
s.objectStorage = objectstorage.NewS3(httpClientS3, tracer) |
|
|
|
|
|
|
|
|
|
if !cfg.CloudMigration.IsDeveloperMode { |
|
|
|
|
c, err := gmsclient.NewGMSClient(cfg) |
|
|
|
|
httpClientGMS, err := httpClientProvider.New() |
|
|
|
|
if err != nil { |
|
|
|
|
return nil, fmt.Errorf("creating http client for GMS: %w", err) |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
c, err := gmsclient.NewGMSClient(cfg, httpClientGMS) |
|
|
|
|
if err != nil { |
|
|
|
|
return nil, fmt.Errorf("initializing GMS client: %w", err) |
|
|
|
|
} |
|
|
|
|
s.gmsClient = c |
|
|
|
|
s.gcomService = gcom.New(gcom.Config{ApiURL: cfg.GrafanaComAPIURL, Token: cfg.CloudMigration.GcomAPIToken}) |
|
|
|
|
|
|
|
|
|
httpClientGcom, err := httpClientProvider.New() |
|
|
|
|
if err != nil { |
|
|
|
|
return nil, fmt.Errorf("creating http client for GCOM: %w", err) |
|
|
|
|
} |
|
|
|
|
s.gcomService = gcom.New(gcom.Config{ApiURL: cfg.GrafanaComAPIURL, Token: cfg.CloudMigration.GcomAPIToken}, httpClientGcom) |
|
|
|
|
} else { |
|
|
|
|
s.gmsClient = gmsclient.NewInMemoryClient() |
|
|
|
|
s.gcomService = &gcomStub{policies: map[string]gcom.AccessPolicy{}, token: nil} |
|
|
|
@ -169,7 +186,8 @@ func (s *Service) GetToken(ctx context.Context) (gcom.TokenView, error) { |
|
|
|
|
RequestID: requestID, |
|
|
|
|
Region: instance.RegionSlug, |
|
|
|
|
AccessPolicyName: accessPolicyName, |
|
|
|
|
TokenName: accessTokenName}) |
|
|
|
|
TokenName: accessTokenName, |
|
|
|
|
}) |
|
|
|
|
if err != nil { |
|
|
|
|
return gcom.TokenView{}, fmt.Errorf("listing tokens: %w", err) |
|
|
|
|
} |
|
|
|
@ -279,9 +297,6 @@ func (s *Service) CreateToken(ctx context.Context) (cloudmigration.CreateAccessT |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
func (s *Service) findAccessPolicyByName(ctx context.Context, regionSlug, accessPolicyName string) (*gcom.AccessPolicy, error) { |
|
|
|
|
ctx, span := s.tracer.Start(ctx, "CloudMigrationService.findAccessPolicyByName") |
|
|
|
|
defer span.End() |
|
|
|
|
|
|
|
|
|
accessPolicies, err := s.gcomService.ListAccessPolicies(ctx, gcom.ListAccessPoliciesParams{ |
|
|
|
|
RequestID: tracing.TraceIDFromContext(ctx, false), |
|
|
|
|
Region: regionSlug, |
|
|
|
@ -341,7 +356,7 @@ func (s *Service) DeleteToken(ctx context.Context, tokenID string) error { |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
func (s *Service) GetSession(ctx context.Context, uid string) (*cloudmigration.CloudMigrationSession, error) { |
|
|
|
|
ctx, span := s.tracer.Start(ctx, "CloudMigrationService.GetMigration") |
|
|
|
|
ctx, span := s.tracer.Start(ctx, "CloudMigrationService.GetSession") |
|
|
|
|
defer span.End() |
|
|
|
|
migration, err := s.store.GetMigrationSessionByUID(ctx, uid) |
|
|
|
|
if err != nil { |
|
|
|
@ -352,6 +367,9 @@ func (s *Service) GetSession(ctx context.Context, uid string) (*cloudmigration.C |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
func (s *Service) GetSessionList(ctx context.Context) (*cloudmigration.CloudMigrationSessionListResponse, error) { |
|
|
|
|
ctx, span := s.tracer.Start(ctx, "CloudMigrationService.GetSessionList") |
|
|
|
|
defer span.End() |
|
|
|
|
|
|
|
|
|
values, err := s.store.GetCloudMigrationSessionList(ctx) |
|
|
|
|
if err != nil { |
|
|
|
|
return nil, fmt.Errorf("retrieving session list from store: %w", err) |
|
|
|
@ -370,7 +388,7 @@ func (s *Service) GetSessionList(ctx context.Context) (*cloudmigration.CloudMigr |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
func (s *Service) CreateSession(ctx context.Context, cmd cloudmigration.CloudMigrationSessionRequest) (*cloudmigration.CloudMigrationSessionResponse, error) { |
|
|
|
|
ctx, span := s.tracer.Start(ctx, "CloudMigrationService.createMigration") |
|
|
|
|
ctx, span := s.tracer.Start(ctx, "CloudMigrationService.CreateSession") |
|
|
|
|
defer span.End() |
|
|
|
|
|
|
|
|
|
base64Token := cmd.AuthToken |
|
|
|
@ -405,6 +423,9 @@ func (s *Service) CreateSession(ctx context.Context, cmd cloudmigration.CloudMig |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
func (s *Service) DeleteSession(ctx context.Context, sessionUID string) (*cloudmigration.CloudMigrationSession, error) { |
|
|
|
|
ctx, span := s.tracer.Start(ctx, "CloudMigrationService.DeleteSession") |
|
|
|
|
defer span.End() |
|
|
|
|
|
|
|
|
|
session, snapshots, err := s.store.DeleteMigrationSessionByUID(ctx, sessionUID) |
|
|
|
|
if err != nil { |
|
|
|
|
s.report(ctx, session, gmsclient.EventDisconnect, 0, err) |
|
|
|
@ -470,26 +491,36 @@ func (s *Service) CreateSnapshot(ctx context.Context, signedInUser *user.SignedI |
|
|
|
|
s.cancelMutex.Unlock() |
|
|
|
|
}() |
|
|
|
|
|
|
|
|
|
ctx, cancelFunc := context.WithCancel(context.Background()) |
|
|
|
|
// Create context out the span context to ensure the trace is propagated
|
|
|
|
|
asyncCtx := trace.ContextWithSpanContext(context.Background(), span.SpanContext()) |
|
|
|
|
asyncCtx, asyncSpan := s.tracer.Start(asyncCtx, "CloudMigrationService.CreateSnapshotAsync") |
|
|
|
|
defer asyncSpan.End() |
|
|
|
|
|
|
|
|
|
asyncCtx, cancelFunc := context.WithCancel(asyncCtx) |
|
|
|
|
s.cancelFunc = cancelFunc |
|
|
|
|
|
|
|
|
|
s.report(ctx, session, gmsclient.EventStartBuildingSnapshot, 0, nil) |
|
|
|
|
s.report(asyncCtx, session, gmsclient.EventStartBuildingSnapshot, 0, nil) |
|
|
|
|
|
|
|
|
|
start := time.Now() |
|
|
|
|
err := s.buildSnapshot(ctx, signedInUser, initResp.MaxItemsPerPartition, initResp.Metadata, snapshot) |
|
|
|
|
err := s.buildSnapshot(asyncCtx, signedInUser, initResp.MaxItemsPerPartition, initResp.Metadata, snapshot) |
|
|
|
|
if err != nil { |
|
|
|
|
asyncSpan.SetStatus(codes.Error, "error building snapshot") |
|
|
|
|
asyncSpan.RecordError(err) |
|
|
|
|
s.log.Error("building snapshot", "err", err.Error()) |
|
|
|
|
|
|
|
|
|
// Update status to error with retries
|
|
|
|
|
if err := s.updateSnapshotWithRetries(context.Background(), cloudmigration.UpdateSnapshotCmd{ |
|
|
|
|
if err := s.updateSnapshotWithRetries(asyncCtx, cloudmigration.UpdateSnapshotCmd{ |
|
|
|
|
UID: snapshot.UID, |
|
|
|
|
SessionID: sessionUid, |
|
|
|
|
Status: cloudmigration.SnapshotStatusError, |
|
|
|
|
}); err != nil { |
|
|
|
|
s.log.Error("critical failure during snapshot creation - please report any error logs") |
|
|
|
|
asyncSpan.RecordError(err) |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
s.report(ctx, session, gmsclient.EventDoneBuildingSnapshot, time.Since(start), err) |
|
|
|
|
span.SetStatus(codes.Ok, "snapshot built") |
|
|
|
|
s.report(asyncCtx, session, gmsclient.EventDoneBuildingSnapshot, time.Since(start), err) |
|
|
|
|
}() |
|
|
|
|
|
|
|
|
|
return &snapshot, nil |
|
|
|
@ -624,32 +655,48 @@ func (s *Service) UploadSnapshot(ctx context.Context, sessionUid string, snapsho |
|
|
|
|
s.cancelMutex.Unlock() |
|
|
|
|
}() |
|
|
|
|
|
|
|
|
|
ctx, cancelFunc := context.WithCancel(context.Background()) |
|
|
|
|
s.cancelFunc = cancelFunc |
|
|
|
|
// Create context out the span context to ensure the trace is propagated
|
|
|
|
|
asyncCtx := trace.ContextWithSpanContext(context.Background(), span.SpanContext()) |
|
|
|
|
asyncCtx, asyncSpan := s.tracer.Start(asyncCtx, "CloudMigrationService.UploadSnapshot") |
|
|
|
|
defer asyncSpan.End() |
|
|
|
|
|
|
|
|
|
s.report(ctx, session, gmsclient.EventStartUploadingSnapshot, 0, nil) |
|
|
|
|
asyncCtx, s.cancelFunc = context.WithCancel(asyncCtx) |
|
|
|
|
|
|
|
|
|
s.report(asyncCtx, session, gmsclient.EventStartUploadingSnapshot, 0, nil) |
|
|
|
|
|
|
|
|
|
start := time.Now() |
|
|
|
|
err := s.uploadSnapshot(ctx, session, snapshot, uploadUrl) |
|
|
|
|
err := s.uploadSnapshot(asyncCtx, session, snapshot, uploadUrl) |
|
|
|
|
if err != nil { |
|
|
|
|
asyncSpan.SetStatus(codes.Error, "error uploading snapshot") |
|
|
|
|
asyncSpan.RecordError(err) |
|
|
|
|
|
|
|
|
|
s.log.Error("uploading snapshot", "err", err.Error()) |
|
|
|
|
// Update status to error with retries
|
|
|
|
|
if err := s.updateSnapshotWithRetries(context.Background(), cloudmigration.UpdateSnapshotCmd{ |
|
|
|
|
if err := s.updateSnapshotWithRetries(asyncCtx, cloudmigration.UpdateSnapshotCmd{ |
|
|
|
|
UID: snapshot.UID, |
|
|
|
|
SessionID: sessionUid, |
|
|
|
|
Status: cloudmigration.SnapshotStatusError, |
|
|
|
|
}); err != nil { |
|
|
|
|
asyncSpan.RecordError(err) |
|
|
|
|
s.log.Error("critical failure during snapshot upload - please report any error logs") |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
s.report(ctx, session, gmsclient.EventDoneUploadingSnapshot, time.Since(start), err) |
|
|
|
|
s.report(asyncCtx, session, gmsclient.EventDoneUploadingSnapshot, time.Since(start), err) |
|
|
|
|
}() |
|
|
|
|
|
|
|
|
|
return nil |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
func (s *Service) CancelSnapshot(ctx context.Context, sessionUid string, snapshotUid string) (err error) { |
|
|
|
|
ctx, span := s.tracer.Start(ctx, "CloudMigrationService.CancelSnapshot", |
|
|
|
|
trace.WithAttributes( |
|
|
|
|
attribute.String("sessionUid", sessionUid), |
|
|
|
|
attribute.String("snapshotUid", snapshotUid), |
|
|
|
|
), |
|
|
|
|
) |
|
|
|
|
defer span.End() |
|
|
|
|
|
|
|
|
|
// The cancel func itself is protected by a mutex in the async threads, so it may or may not be set by the time CancelSnapshot is called
|
|
|
|
|
// Attempt to cancel and recover from the panic if the cancel function is nil
|
|
|
|
|
defer func() { |
|
|
|
@ -684,6 +731,9 @@ func (s *Service) report( |
|
|
|
|
d time.Duration, |
|
|
|
|
evtErr error, |
|
|
|
|
) { |
|
|
|
|
ctx, span := s.tracer.Start(ctx, "CloudMigrationService.report") |
|
|
|
|
defer span.End() |
|
|
|
|
|
|
|
|
|
id, err := s.getLocalEventId(ctx) |
|
|
|
|
if err != nil { |
|
|
|
|
s.log.Error("failed to report event", "type", t, "error", err.Error()) |
|
|
|
@ -738,6 +788,9 @@ func (s *Service) getLocalEventId(ctx context.Context) (string, error) { |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
func (s *Service) deleteLocalFiles(snapshots []cloudmigration.CloudMigrationSnapshot) error { |
|
|
|
|
_, span := s.tracer.Start(context.Background(), "CloudMigrationService.deleteLocalFiles") |
|
|
|
|
defer span.End() |
|
|
|
|
|
|
|
|
|
var err error |
|
|
|
|
for _, snapshot := range snapshots { |
|
|
|
|
err = os.RemoveAll(snapshot.LocalDir) |
|
|
|
|