diff --git a/pkg/services/authz/client.go b/pkg/services/authz/client.go index 9d7dcc6fee0..412a4e2acb9 100644 --- a/pkg/services/authz/client.go +++ b/pkg/services/authz/client.go @@ -8,6 +8,7 @@ import ( "github.com/fullstorydev/grpchan" "github.com/fullstorydev/grpchan/inprocgrpc" grpcAuth "github.com/grpc-ecosystem/go-grpc-middleware/v2/interceptors/auth" + "github.com/prometheus/client_golang/prometheus" "google.golang.org/grpc" "google.golang.org/grpc/credentials/insecure" @@ -38,6 +39,7 @@ func ProvideAuthZClient( features featuremgmt.FeatureToggles, grpcServer grpcserver.Provider, tracer tracing.Tracer, + reg prometheus.Registerer, db db.DB, acService accesscontrol.Service, ) (authlib.AccessClient, error) { @@ -69,6 +71,7 @@ func ProvideAuthZClient( ), log.New("authz-grpc-server"), tracer, + reg, cache.NewLocalCache(cache.Config{Expiry: 5 * time.Minute, CleanupInterval: 10 * time.Minute}), ) return newInProcLegacyClient(server, tracer) diff --git a/pkg/services/authz/rbac/metrics.go b/pkg/services/authz/rbac/metrics.go new file mode 100644 index 00000000000..3c3bb1e7dea --- /dev/null +++ b/pkg/services/authz/rbac/metrics.go @@ -0,0 +1,39 @@ +package rbac + +import ( + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" +) + +const ( + metricsNamespace = "iam" + metricsSubSystem = "authz_direct_db_service" +) + +type metrics struct { + requestCount *prometheus.CounterVec + permissionCacheUsage *prometheus.CounterVec +} + +func newMetrics(reg prometheus.Registerer) *metrics { + return &metrics{ + requestCount: promauto.With(reg).NewCounterVec( + prometheus.CounterOpts{ + Namespace: metricsNamespace, + Subsystem: metricsSubSystem, + Name: "invalid_request_count", + Help: "AuthZ service invalid request count", + }, + []string{"is_error", "valid", "verb", "group", "resource"}, + ), + permissionCacheUsage: promauto.With(reg).NewCounterVec( + prometheus.CounterOpts{ + Namespace: metricsNamespace, + Subsystem: metricsSubSystem, + Name: "permission_cache_usage", + Help: "AuthZ service permission cache usage", + }, + []string{"cache_hit", "action"}, + ), + } +} diff --git a/pkg/services/authz/rbac/service.go b/pkg/services/authz/rbac/service.go index a0d2c3b2408..9c8a82a8ee1 100644 --- a/pkg/services/authz/rbac/service.go +++ b/pkg/services/authz/rbac/service.go @@ -7,6 +7,7 @@ import ( "strings" "time" + "github.com/prometheus/client_golang/prometheus" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/trace" "golang.org/x/sync/singleflight" @@ -46,8 +47,9 @@ type Service struct { mapper mapper - logger log.Logger - tracer tracing.Tracer + logger log.Logger + tracer tracing.Tracer + metrics *metrics // Deduplication of concurrent requests sf *singleflight.Group @@ -66,6 +68,7 @@ func NewService( permissionStore store.PermissionStore, logger log.Logger, tracer tracing.Tracer, + reg prometheus.Registerer, cache cache.Cache, ) *Service { return &Service{ @@ -74,6 +77,7 @@ func NewService( identityStore: identityStore, logger: logger, tracer: tracer, + metrics: newMetrics(reg), mapper: newMapper(), idCache: newCacheWrap[store.UserIdentifiers](cache, logger, longCacheTTL), permCache: newCacheWrap[map[string]bool](cache, logger, shortCacheTTL), @@ -94,6 +98,7 @@ func (s *Service) Check(ctx context.Context, req *authzv1.CheckRequest) (*authzv checkReq, err := s.validateCheckRequest(ctx, req) if err != nil { ctxLogger.Error("invalid request", "error", err) + s.metrics.requestCount.WithLabelValues("true", "false", req.GetVerb(), req.GetGroup(), req.GetResource()).Inc() return deny, err } ctx = request.WithNamespace(ctx, req.GetNamespace()) @@ -101,14 +106,18 @@ func (s *Service) Check(ctx context.Context, req *authzv1.CheckRequest) (*authzv permissions, err := s.getIdentityPermissions(ctx, checkReq.Namespace, checkReq.IdentityType, checkReq.UserUID, checkReq.Action) if err != nil { ctxLogger.Error("could not get user permissions", "subject", req.GetSubject(), "error", err) + s.metrics.requestCount.WithLabelValues("true", "true", req.GetVerb(), req.GetGroup(), req.GetResource()).Inc() return deny, err } allowed, err := s.checkPermission(ctx, permissions, checkReq) if err != nil { ctxLogger.Error("could not check permission", "error", err) + s.metrics.requestCount.WithLabelValues("true", "true", req.GetVerb(), req.GetGroup(), req.GetResource()).Inc() return deny, err } + + s.metrics.requestCount.WithLabelValues("false", "true", req.GetVerb(), req.GetGroup(), req.GetResource()).Inc() return &authzv1.CheckResponse{Allowed: allowed}, nil } @@ -120,6 +129,7 @@ func (s *Service) List(ctx context.Context, req *authzv1.ListRequest) (*authzv1. listReq, err := s.validateListRequest(ctx, req) if err != nil { ctxLogger.Error("invalid request", "error", err) + s.metrics.requestCount.WithLabelValues("true", "false", req.GetVerb(), req.GetGroup(), req.GetResource()).Inc() return &authzv1.ListResponse{}, err } ctx = request.WithNamespace(ctx, req.GetNamespace()) @@ -127,10 +137,13 @@ func (s *Service) List(ctx context.Context, req *authzv1.ListRequest) (*authzv1. permissions, err := s.getIdentityPermissions(ctx, listReq.Namespace, listReq.IdentityType, listReq.UserUID, listReq.Action) if err != nil { ctxLogger.Error("could not get user permissions", "subject", req.GetSubject(), "error", err) + s.metrics.requestCount.WithLabelValues("true", "true", req.GetVerb(), req.GetGroup(), req.GetResource()).Inc() return nil, err } - return s.listPermission(ctx, permissions, listReq) + resp, err := s.listPermission(ctx, permissions, listReq) + s.metrics.requestCount.WithLabelValues(strconv.FormatBool(err != nil), "true", req.GetVerb(), req.GetGroup(), req.GetResource()).Inc() + return resp, err } func (s *Service) validateCheckRequest(ctx context.Context, req *authzv1.CheckRequest) (*CheckRequest, error) { @@ -286,8 +299,10 @@ func (s *Service) getUserPermissions(ctx context.Context, ns claims.NamespaceInf userPermKey := userPermCacheKey(ns.Value, userIdentifiers.UID, action) if cached, ok := s.permCache.Get(ctx, userPermKey); ok { + s.metrics.permissionCacheUsage.WithLabelValues("true", action).Inc() return cached, nil } + s.metrics.permissionCacheUsage.WithLabelValues("false", action).Inc() res, err, _ := s.sf.Do(userPermKey+"_getUserPermissions", func() (interface{}, error) { basicRoles, err := s.getUserBasicRole(ctx, ns, userIdentifiers) diff --git a/pkg/services/authz/rbac/service_test.go b/pkg/services/authz/rbac/service_test.go index 26b3cd02451..586f528f863 100644 --- a/pkg/services/authz/rbac/service_test.go +++ b/pkg/services/authz/rbac/service_test.go @@ -640,6 +640,7 @@ func setupService() *Service { logger: logger, mapper: newMapper(), tracer: tracing.NewNoopTracerService(), + metrics: newMetrics(nil), idCache: newCacheWrap[store.UserIdentifiers](cache, logger, longCacheTTL), permCache: newCacheWrap[map[string]bool](cache, logger, shortCacheTTL), teamCache: newCacheWrap[[]int64](cache, logger, shortCacheTTL), diff --git a/pkg/services/authz/server.go b/pkg/services/authz/server.go index 84a6b2dd9c3..868e7c2816b 100644 --- a/pkg/services/authz/server.go +++ b/pkg/services/authz/server.go @@ -12,12 +12,14 @@ import ( "github.com/grafana/grafana/pkg/services/authz/rbac/store" "github.com/grafana/grafana/pkg/services/grpcserver" "github.com/grafana/grafana/pkg/storage/legacysql" + "github.com/prometheus/client_golang/prometheus" ) func RegisterRBACAuthZService( handler grpcserver.Provider, db legacysql.LegacyDatabaseProvider, tracer tracing.Tracer, + reg prometheus.Registerer, cache cache.Cache) { server := rbac.NewService( db, @@ -25,6 +27,7 @@ func RegisterRBACAuthZService( store.NewSQLPermissionStore(db, tracer), log.New("authz-grpc-server"), tracer, + reg, cache, )