AuthZ service: Add metrics (#99007)

* add metrics for authZ MT service

* remove metrics that are already tracked by the GRPC server metrics

Co-authored-by: Gabriel MABILLE <gamab@users.noreply.github.com>

* undo unneeded change

* test fix

---------

Co-authored-by: Gabriel MABILLE <gamab@users.noreply.github.com>
pull/99395/head^2
Ieva 6 months ago committed by GitHub
parent 437b7a565d
commit 33a53d170b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
  1. 3
      pkg/services/authz/client.go
  2. 39
      pkg/services/authz/rbac/metrics.go
  3. 21
      pkg/services/authz/rbac/service.go
  4. 1
      pkg/services/authz/rbac/service_test.go
  5. 3
      pkg/services/authz/server.go

@ -8,6 +8,7 @@ import (
"github.com/fullstorydev/grpchan"
"github.com/fullstorydev/grpchan/inprocgrpc"
grpcAuth "github.com/grpc-ecosystem/go-grpc-middleware/v2/interceptors/auth"
"github.com/prometheus/client_golang/prometheus"
"google.golang.org/grpc"
"google.golang.org/grpc/credentials/insecure"
@ -38,6 +39,7 @@ func ProvideAuthZClient(
features featuremgmt.FeatureToggles,
grpcServer grpcserver.Provider,
tracer tracing.Tracer,
reg prometheus.Registerer,
db db.DB,
acService accesscontrol.Service,
) (authlib.AccessClient, error) {
@ -69,6 +71,7 @@ func ProvideAuthZClient(
),
log.New("authz-grpc-server"),
tracer,
reg,
cache.NewLocalCache(cache.Config{Expiry: 5 * time.Minute, CleanupInterval: 10 * time.Minute}),
)
return newInProcLegacyClient(server, tracer)

@ -0,0 +1,39 @@
package rbac
import (
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
)
const (
metricsNamespace = "iam"
metricsSubSystem = "authz_direct_db_service"
)
type metrics struct {
requestCount *prometheus.CounterVec
permissionCacheUsage *prometheus.CounterVec
}
func newMetrics(reg prometheus.Registerer) *metrics {
return &metrics{
requestCount: promauto.With(reg).NewCounterVec(
prometheus.CounterOpts{
Namespace: metricsNamespace,
Subsystem: metricsSubSystem,
Name: "invalid_request_count",
Help: "AuthZ service invalid request count",
},
[]string{"is_error", "valid", "verb", "group", "resource"},
),
permissionCacheUsage: promauto.With(reg).NewCounterVec(
prometheus.CounterOpts{
Namespace: metricsNamespace,
Subsystem: metricsSubSystem,
Name: "permission_cache_usage",
Help: "AuthZ service permission cache usage",
},
[]string{"cache_hit", "action"},
),
}
}

@ -7,6 +7,7 @@ import (
"strings"
"time"
"github.com/prometheus/client_golang/prometheus"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/trace"
"golang.org/x/sync/singleflight"
@ -46,8 +47,9 @@ type Service struct {
mapper mapper
logger log.Logger
tracer tracing.Tracer
logger log.Logger
tracer tracing.Tracer
metrics *metrics
// Deduplication of concurrent requests
sf *singleflight.Group
@ -66,6 +68,7 @@ func NewService(
permissionStore store.PermissionStore,
logger log.Logger,
tracer tracing.Tracer,
reg prometheus.Registerer,
cache cache.Cache,
) *Service {
return &Service{
@ -74,6 +77,7 @@ func NewService(
identityStore: identityStore,
logger: logger,
tracer: tracer,
metrics: newMetrics(reg),
mapper: newMapper(),
idCache: newCacheWrap[store.UserIdentifiers](cache, logger, longCacheTTL),
permCache: newCacheWrap[map[string]bool](cache, logger, shortCacheTTL),
@ -94,6 +98,7 @@ func (s *Service) Check(ctx context.Context, req *authzv1.CheckRequest) (*authzv
checkReq, err := s.validateCheckRequest(ctx, req)
if err != nil {
ctxLogger.Error("invalid request", "error", err)
s.metrics.requestCount.WithLabelValues("true", "false", req.GetVerb(), req.GetGroup(), req.GetResource()).Inc()
return deny, err
}
ctx = request.WithNamespace(ctx, req.GetNamespace())
@ -101,14 +106,18 @@ func (s *Service) Check(ctx context.Context, req *authzv1.CheckRequest) (*authzv
permissions, err := s.getIdentityPermissions(ctx, checkReq.Namespace, checkReq.IdentityType, checkReq.UserUID, checkReq.Action)
if err != nil {
ctxLogger.Error("could not get user permissions", "subject", req.GetSubject(), "error", err)
s.metrics.requestCount.WithLabelValues("true", "true", req.GetVerb(), req.GetGroup(), req.GetResource()).Inc()
return deny, err
}
allowed, err := s.checkPermission(ctx, permissions, checkReq)
if err != nil {
ctxLogger.Error("could not check permission", "error", err)
s.metrics.requestCount.WithLabelValues("true", "true", req.GetVerb(), req.GetGroup(), req.GetResource()).Inc()
return deny, err
}
s.metrics.requestCount.WithLabelValues("false", "true", req.GetVerb(), req.GetGroup(), req.GetResource()).Inc()
return &authzv1.CheckResponse{Allowed: allowed}, nil
}
@ -120,6 +129,7 @@ func (s *Service) List(ctx context.Context, req *authzv1.ListRequest) (*authzv1.
listReq, err := s.validateListRequest(ctx, req)
if err != nil {
ctxLogger.Error("invalid request", "error", err)
s.metrics.requestCount.WithLabelValues("true", "false", req.GetVerb(), req.GetGroup(), req.GetResource()).Inc()
return &authzv1.ListResponse{}, err
}
ctx = request.WithNamespace(ctx, req.GetNamespace())
@ -127,10 +137,13 @@ func (s *Service) List(ctx context.Context, req *authzv1.ListRequest) (*authzv1.
permissions, err := s.getIdentityPermissions(ctx, listReq.Namespace, listReq.IdentityType, listReq.UserUID, listReq.Action)
if err != nil {
ctxLogger.Error("could not get user permissions", "subject", req.GetSubject(), "error", err)
s.metrics.requestCount.WithLabelValues("true", "true", req.GetVerb(), req.GetGroup(), req.GetResource()).Inc()
return nil, err
}
return s.listPermission(ctx, permissions, listReq)
resp, err := s.listPermission(ctx, permissions, listReq)
s.metrics.requestCount.WithLabelValues(strconv.FormatBool(err != nil), "true", req.GetVerb(), req.GetGroup(), req.GetResource()).Inc()
return resp, err
}
func (s *Service) validateCheckRequest(ctx context.Context, req *authzv1.CheckRequest) (*CheckRequest, error) {
@ -286,8 +299,10 @@ func (s *Service) getUserPermissions(ctx context.Context, ns claims.NamespaceInf
userPermKey := userPermCacheKey(ns.Value, userIdentifiers.UID, action)
if cached, ok := s.permCache.Get(ctx, userPermKey); ok {
s.metrics.permissionCacheUsage.WithLabelValues("true", action).Inc()
return cached, nil
}
s.metrics.permissionCacheUsage.WithLabelValues("false", action).Inc()
res, err, _ := s.sf.Do(userPermKey+"_getUserPermissions", func() (interface{}, error) {
basicRoles, err := s.getUserBasicRole(ctx, ns, userIdentifiers)

@ -640,6 +640,7 @@ func setupService() *Service {
logger: logger,
mapper: newMapper(),
tracer: tracing.NewNoopTracerService(),
metrics: newMetrics(nil),
idCache: newCacheWrap[store.UserIdentifiers](cache, logger, longCacheTTL),
permCache: newCacheWrap[map[string]bool](cache, logger, shortCacheTTL),
teamCache: newCacheWrap[[]int64](cache, logger, shortCacheTTL),

@ -12,12 +12,14 @@ import (
"github.com/grafana/grafana/pkg/services/authz/rbac/store"
"github.com/grafana/grafana/pkg/services/grpcserver"
"github.com/grafana/grafana/pkg/storage/legacysql"
"github.com/prometheus/client_golang/prometheus"
)
func RegisterRBACAuthZService(
handler grpcserver.Provider,
db legacysql.LegacyDatabaseProvider,
tracer tracing.Tracer,
reg prometheus.Registerer,
cache cache.Cache) {
server := rbac.NewService(
db,
@ -25,6 +27,7 @@ func RegisterRBACAuthZService(
store.NewSQLPermissionStore(db, tracer),
log.New("authz-grpc-server"),
tracer,
reg,
cache,
)

Loading…
Cancel
Save