Add per-tenant request counter metric to index gateway server (#9797)

This commit add a counter metric `loki_index_gateway_requests_total` with labels `operation`, `tenant`, `status` for gRPC requests that are served by the index gateway.

**What for?**

The per-tenant RPS on the index gateway is used to derive the per-tenant shard factor.

**Why tracking on the server?**

Unlike tracking index gateway RPS on the client side, tracking on the server side does not yield that many series, even in multi-tenant installations with a lot of tenants, because the amount of index gateway instances is relatively small compared to the amount of queriers and frontends.

**Special notes for your reviewer**:

The previous approach of tracking requests on the client https://github.com/grafana/loki/pull/9781 has been abandoned.


Signed-off-by: Christian Haudum <christian.haudum@gmail.com>
pull/9809/head
Christian Haudum 2 years ago committed by GitHub
parent 8ca035ffbf
commit a65c99d9bf
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 1
      CHANGELOG.md
  2. 3
      pkg/loki/loki.go
  3. 10
      pkg/loki/modules.go
  4. 45
      pkg/storage/stores/shipper/indexgateway/grpc.go

@ -6,6 +6,7 @@
##### Enhancements
* [9797](https://github.com/grafana/loki/pull/9797) **chaudum**: Add new `loki_index_gateway_requests_total` counter metric to observe per-tenant RPS
* [9710](https://github.com/grafana/loki/pull/9710) **chaudum**: Add shuffle sharding to index gateway
* [9573](https://github.com/grafana/loki/pull/9573) **CCOLLOT**: Lambda-Promtail: Add support for AWS CloudFront log ingestion.
* [9497](https://github.com/grafana/loki/pull/9497) **CCOLLOT**: Lambda-Promtail: Add support for AWS CloudTrail log ingestion.

@ -636,6 +636,7 @@ func (t *Loki) setupModuleManager() error {
mm.RegisterModule(Compactor, t.initCompactor)
mm.RegisterModule(IndexGateway, t.initIndexGateway)
mm.RegisterModule(IndexGatewayRing, t.initIndexGatewayRing, modules.UserInvisibleModule)
mm.RegisterModule(IndexGatewayInterceptors, t.initIndexGatewayInterceptors, modules.UserInvisibleModule)
mm.RegisterModule(QueryScheduler, t.initQueryScheduler)
mm.RegisterModule(QuerySchedulerRing, t.initQuerySchedulerRing, modules.UserInvisibleModule)
mm.RegisterModule(Analytics, t.initAnalytics)
@ -664,7 +665,7 @@ func (t *Loki) setupModuleManager() error {
RuleEvaluator: {Ring, Server, Store, IngesterQuerier, Overrides, TenantConfigs, Analytics},
TableManager: {Server, Analytics},
Compactor: {Server, Overrides, MemberlistKV, Analytics},
IndexGateway: {Server, Store, Overrides, Analytics, MemberlistKV, IndexGatewayRing},
IndexGateway: {Server, Store, Overrides, Analytics, MemberlistKV, IndexGatewayRing, IndexGatewayInterceptors},
IngesterQuerier: {Ring},
QuerySchedulerRing: {Overrides, Server, MemberlistKV},
IndexGatewayRing: {Overrides, Server, MemberlistKV},

@ -104,6 +104,7 @@ const (
Compactor string = "compactor"
IndexGateway string = "index-gateway"
IndexGatewayRing string = "index-gateway-ring"
IndexGatewayInterceptors string = "index-gateway-interceptors"
QueryScheduler string = "query-scheduler"
QuerySchedulerRing string = "query-scheduler-ring"
All string = "all"
@ -1242,6 +1243,15 @@ func (t *Loki) initIndexGatewayRing() (_ services.Service, err error) {
return t.indexGatewayRingManager, nil
}
func (t *Loki) initIndexGatewayInterceptors() (services.Service, error) {
// Only expose per-tenant metric if index gateway runs as standalone service
if t.Cfg.isModuleEnabled(IndexGateway) {
interceptors := indexgateway.NewServerInterceptors(prometheus.DefaultRegisterer)
t.Cfg.Server.GRPCMiddleware = append(t.Cfg.Server.GRPCMiddleware, interceptors.PerTenantRequestCount)
}
return nil, nil
}
func (t *Loki) initQueryScheduler() (services.Service, error) {
s, err := scheduler.NewScheduler(t.Cfg.QueryScheduler, t.Overrides, util_log.Logger, t.querySchedulerRingManager, prometheus.DefaultRegisterer)
if err != nil {

@ -0,0 +1,45 @@
package indexgateway
import (
"context"
"github.com/grafana/dskit/tenant"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
"google.golang.org/grpc"
)
type ServerInterceptors struct {
reqCount *prometheus.CounterVec
PerTenantRequestCount grpc.UnaryServerInterceptor
}
func NewServerInterceptors(r prometheus.Registerer) *ServerInterceptors {
requestCount := promauto.With(r).NewCounterVec(prometheus.CounterOpts{
Namespace: "loki",
Subsystem: "index_gateway",
Name: "requests_total",
Help: "Total amount of requests served by the index gateway",
}, []string{"operation", "status", "tenant"})
perTenantRequestCount := func(ctx context.Context, req interface{}, info *grpc.UnaryServerInfo, handler grpc.UnaryHandler) (resp interface{}, err error) {
tenantID, err := tenant.TenantID(ctx)
if err != nil {
// ignore requests without tenantID
return handler(ctx, req)
}
resp, err = handler(ctx, req)
status := "success"
if err != nil {
status = "error"
}
requestCount.WithLabelValues(info.FullMethod, status, tenantID).Inc()
return
}
return &ServerInterceptors{
reqCount: requestCount,
PerTenantRequestCount: perTenantRequestCount,
}
}
Loading…
Cancel
Save