Zanzana: Perform shadow requests (#103444)

* Zanzana: Execute checks in the background

* add metrics

* collect metrics

* cleanup

* shadow compile checker

* add time metrics for compiler

* run compile in parallel

* prevent deadlock
pull/103571/head^2
Alexander Zobnin 3 months ago committed by GitHub
parent 54192e3308
commit 4bc9203cf6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
  1. 26
      pkg/services/authz/rbac.go
  2. 2
      pkg/services/authz/zanzana.go
  3. 6
      pkg/services/authz/zanzana/client.go
  4. 5
      pkg/services/authz/zanzana/client/client.go
  5. 54
      pkg/services/authz/zanzana/client/metrics.go
  6. 111
      pkg/services/authz/zanzana/client/shadow_client.go

@ -8,6 +8,11 @@ import (
"time"
"github.com/fullstorydev/grpchan/inprocgrpc"
authnlib "github.com/grafana/authlib/authn"
authzlib "github.com/grafana/authlib/authz"
authzv1 "github.com/grafana/authlib/authz/proto/v1"
"github.com/grafana/authlib/cache"
authlib "github.com/grafana/authlib/types"
grpcAuth "github.com/grpc-ecosystem/go-grpc-middleware/v2/interceptors/auth"
"github.com/prometheus/client_golang/prometheus"
"go.opentelemetry.io/otel/trace"
@ -16,11 +21,6 @@ import (
"google.golang.org/grpc/credentials/insecure"
"k8s.io/client-go/rest"
authnlib "github.com/grafana/authlib/authn"
authzlib "github.com/grafana/authlib/authz"
authzv1 "github.com/grafana/authlib/authz/proto/v1"
"github.com/grafana/authlib/cache"
authlib "github.com/grafana/authlib/types"
"github.com/grafana/grafana/pkg/infra/db"
"github.com/grafana/grafana/pkg/infra/log"
"github.com/grafana/grafana/pkg/infra/tracing"
@ -29,6 +29,7 @@ import (
"github.com/grafana/grafana/pkg/services/apiserver"
"github.com/grafana/grafana/pkg/services/authz/rbac"
"github.com/grafana/grafana/pkg/services/authz/rbac/store"
"github.com/grafana/grafana/pkg/services/authz/zanzana"
"github.com/grafana/grafana/pkg/services/featuremgmt"
"github.com/grafana/grafana/pkg/services/grpcserver"
"github.com/grafana/grafana/pkg/setting"
@ -47,6 +48,7 @@ func ProvideAuthZClient(
reg prometheus.Registerer,
db db.DB,
acService accesscontrol.Service,
zanzanaClient zanzana.Client,
restConfig apiserver.RestConfigProvider,
) (authlib.AccessClient, error) {
authCfg, err := readAuthzClientSettings(cfg)
@ -60,7 +62,11 @@ func ProvideAuthZClient(
switch authCfg.mode {
case clientModeCloud:
return newRemoteRBACClient(authCfg, tracer)
rbacClient, err := newRemoteRBACClient(authCfg, tracer)
if features.IsEnabledGlobally(featuremgmt.FlagZanzana) {
return zanzana.WithShadowClient(rbacClient, zanzanaClient, reg)
}
return rbacClient, err
default:
sql := legacysql.NewDatabaseProvider(db)
@ -92,7 +98,13 @@ func ProvideAuthZClient(
return ctx, nil
}))
authzv1.RegisterAuthzServiceServer(channel, server)
return newRBACClient(channel, tracer), nil
rbacClient := newRBACClient(channel, tracer)
if features.IsEnabledGlobally(featuremgmt.FlagZanzana) {
return zanzana.WithShadowClient(rbacClient, zanzanaClient, reg)
}
return rbacClient, nil
}
}

@ -34,7 +34,7 @@ import (
// ProvideZanzana used to register ZanzanaClient.
// It will also start an embedded ZanzanaSever if mode is set to "embedded".
func ProvideZanzana(cfg *setting.Cfg, db db.DB, tracer tracing.Tracer, features featuremgmt.FeatureToggles) (zanzana.Client, error) {
func ProvideZanzana(cfg *setting.Cfg, db db.DB, tracer tracing.Tracer, features featuremgmt.FeatureToggles, reg prometheus.Registerer) (zanzana.Client, error) {
if !features.IsEnabledGlobally(featuremgmt.FlagZanzana) {
return zanzana.NewNoopClient(), nil
}

@ -6,6 +6,8 @@ import (
"google.golang.org/grpc"
authlib "github.com/grafana/authlib/types"
"github.com/prometheus/client_golang/prometheus"
authzextv1 "github.com/grafana/grafana/pkg/services/authz/proto/v1"
"github.com/grafana/grafana/pkg/services/authz/zanzana/client"
)
@ -22,6 +24,10 @@ func NewClient(cc grpc.ClientConnInterface) (*client.Client, error) {
return client.New(cc)
}
func WithShadowClient(accessClient authlib.AccessClient, zanzanaClient authlib.AccessClient, reg prometheus.Registerer) (authlib.AccessClient, error) {
return client.WithShadowClient(accessClient, zanzanaClient, reg), nil
}
func NewNoopClient() *client.NoopClient {
return client.NewNoop()
}

@ -3,11 +3,10 @@ package client
import (
"context"
"go.opentelemetry.io/otel"
"google.golang.org/grpc"
authzv1 "github.com/grafana/authlib/authz/proto/v1"
authlib "github.com/grafana/authlib/types"
"go.opentelemetry.io/otel"
"google.golang.org/grpc"
"github.com/grafana/grafana/pkg/apimachinery/utils"
"github.com/grafana/grafana/pkg/infra/log"

@ -0,0 +1,54 @@
package client
import (
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
)
const (
metricsNamespace = "iam"
metricsSubSystem = "authz_zanzana"
)
type metrics struct {
// evaluationsSeconds is a summary for evaluating access for a specific engine (RBAC and zanzana)
evaluationsSeconds *prometheus.HistogramVec
// compileSeconds is a summary for compiling item checker for a specific engine (RBAC and zanzana)
compileSeconds *prometheus.HistogramVec
// evaluationStatusTotal is a metric for zanzana evaluation status
evaluationStatusTotal *prometheus.CounterVec
}
func newShadowClientMetrics(reg prometheus.Registerer) *metrics {
return &metrics{
evaluationsSeconds: promauto.With(reg).NewHistogramVec(
prometheus.HistogramOpts{
Name: "engine_evaluations_seconds",
Help: "Histogram for evaluation time for the specific access control engine (RBAC and zanzana).",
Namespace: metricsNamespace,
Subsystem: metricsSubSystem,
Buckets: prometheus.ExponentialBuckets(0.00001, 4, 10),
},
[]string{"engine"},
),
compileSeconds: promauto.With(reg).NewHistogramVec(
prometheus.HistogramOpts{
Name: "compile_seconds",
Help: "Histogram for item checker compilation time for the specific access control engine (RBAC and zanzana).",
Namespace: metricsNamespace,
Subsystem: metricsSubSystem,
Buckets: prometheus.ExponentialBuckets(0.00001, 4, 10),
},
[]string{"engine"},
),
evaluationStatusTotal: promauto.With(reg).NewCounterVec(
prometheus.CounterOpts{
Name: "evaluation_status_total",
Help: "evaluation status (success or error) for zanzana",
Namespace: metricsNamespace,
Subsystem: metricsSubSystem,
},
[]string{"status"},
),
}
}

@ -0,0 +1,111 @@
package client
import (
"context"
authlib "github.com/grafana/authlib/types"
"github.com/prometheus/client_golang/prometheus"
"github.com/grafana/grafana/pkg/infra/log"
)
type ShadowClient struct {
logger log.Logger
accessClient authlib.AccessClient
zanzanaClient authlib.AccessClient
metrics *metrics
}
// WithShadowClient returns a new access client that runs zanzana checks in the background.
func WithShadowClient(accessClient authlib.AccessClient, zanzanaClient authlib.AccessClient, reg prometheus.Registerer) authlib.AccessClient {
client := &ShadowClient{
logger: log.New("zanzana-shadow-client"),
accessClient: accessClient,
zanzanaClient: zanzanaClient,
metrics: newShadowClientMetrics(reg),
}
return client
}
func (c *ShadowClient) Check(ctx context.Context, id authlib.AuthInfo, req authlib.CheckRequest) (authlib.CheckResponse, error) {
acResChan := make(chan authlib.CheckResponse, 1)
acErrChan := make(chan error, 1)
go func() {
if c.zanzanaClient == nil {
return
}
timer := prometheus.NewTimer(c.metrics.evaluationsSeconds.WithLabelValues("zanzana"))
defer timer.ObserveDuration()
zanzanaCtx := context.WithoutCancel(ctx)
res, err := c.zanzanaClient.Check(zanzanaCtx, id, req)
if err != nil {
c.logger.Error("Failed to run zanzana check", "error", err)
}
acRes := <-acResChan
acErr := <-acErrChan
if acErr == nil {
if res.Allowed != acRes.Allowed {
c.metrics.evaluationStatusTotal.WithLabelValues("error").Inc()
c.logger.Warn("Zanzana check result does not match", "expected", acRes.Allowed, "actual", res.Allowed)
} else {
c.metrics.evaluationStatusTotal.WithLabelValues("success").Inc()
}
}
}()
timer := prometheus.NewTimer(c.metrics.evaluationsSeconds.WithLabelValues("rbac"))
res, err := c.accessClient.Check(ctx, id, req)
timer.ObserveDuration()
acResChan <- res
acErrChan <- err
return res, err
}
func (c *ShadowClient) Compile(ctx context.Context, id authlib.AuthInfo, req authlib.ListRequest) (authlib.ItemChecker, error) {
zanzanaItemCheckerChan := make(chan authlib.ItemChecker, 1)
go func() {
if c.zanzanaClient == nil {
zanzanaItemCheckerChan <- nil
return
}
timer := prometheus.NewTimer(c.metrics.compileSeconds.WithLabelValues("zanzana"))
itemChecker, err := c.zanzanaClient.Compile(ctx, id, req)
timer.ObserveDuration()
if err != nil {
c.logger.Warn("Failed to compile zanzana item checker", "error", err)
}
zanzanaItemCheckerChan <- itemChecker
}()
timer := prometheus.NewTimer(c.metrics.compileSeconds.WithLabelValues("rbac"))
rbacItemChecker, err := c.accessClient.Compile(ctx, id, req)
timer.ObserveDuration()
if err != nil {
return nil, err
}
zanzanaItemChecker := <-zanzanaItemCheckerChan
shadowItemChecker := func(name, folder string) bool {
rbacRes := rbacItemChecker(name, folder)
if zanzanaItemChecker != nil {
zanzanaRes := zanzanaItemChecker(name, folder)
if zanzanaRes != rbacRes {
c.metrics.evaluationStatusTotal.WithLabelValues("error").Inc()
c.logger.Warn("Zanzana compile result does not match", "expected", rbacRes, "actual", zanzanaRes)
} else {
c.metrics.evaluationStatusTotal.WithLabelValues("success").Inc()
}
}
return rbacRes
}
return shadowItemChecker, err
}
Loading…
Cancel
Save