mirror of https://github.com/grafana/grafana
Backend Plugins: Collect and expose metrics and plugin process health check (#21481)
Adds support for collecting metrics from backend plugins and exposing them thru Grafana's Prometheus metrics endpoint. Enables to check health of backend plugin by using the route `/api/plugins/<plugin id>/health`. Uses sdk v0.6.0. Closes #20984pull/21507/head
parent
f56f54b1a3
commit
5c711bfb79
@ -0,0 +1,89 @@ |
|||||||
|
package collector |
||||||
|
|
||||||
|
import ( |
||||||
|
"context" |
||||||
|
"sync" |
||||||
|
"time" |
||||||
|
|
||||||
|
"github.com/grafana/grafana/pkg/infra/log" |
||||||
|
"github.com/prometheus/client_golang/prometheus" |
||||||
|
) |
||||||
|
|
||||||
|
// Namespace collector metric namespace
|
||||||
|
const Namespace = "grafana_plugin" |
||||||
|
|
||||||
|
var ( |
||||||
|
scrapeDurationDesc = prometheus.NewDesc( |
||||||
|
prometheus.BuildFQName(Namespace, "scrape", "duration_seconds"), |
||||||
|
"grafana_plugin: Duration of a plugin collector scrape.", |
||||||
|
[]string{"plugin_id"}, |
||||||
|
nil, |
||||||
|
) |
||||||
|
scrapeSuccessDesc = prometheus.NewDesc( |
||||||
|
prometheus.BuildFQName(Namespace, "scrape", "success"), |
||||||
|
"grafana_plugin: Whether a plugin collector succeeded.", |
||||||
|
[]string{"plugin_id"}, |
||||||
|
nil, |
||||||
|
) |
||||||
|
) |
||||||
|
|
||||||
|
// Collector is the interface a plugin collector has to implement.
|
||||||
|
type Collector interface { |
||||||
|
// Get new metrics and expose them via prometheus registry.
|
||||||
|
CollectMetrics(ctx context.Context, ch chan<- prometheus.Metric) error |
||||||
|
} |
||||||
|
|
||||||
|
// PluginCollector implements the prometheus.Collector interface.
|
||||||
|
type PluginCollector struct { |
||||||
|
collectors map[string]Collector |
||||||
|
logger log.Logger |
||||||
|
} |
||||||
|
|
||||||
|
// NewPluginCollector creates a new PluginCollector..
|
||||||
|
func NewPluginCollector() PluginCollector { |
||||||
|
return PluginCollector{ |
||||||
|
collectors: make(map[string]Collector), |
||||||
|
logger: log.New("plugins.backend.collector"), |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
func (pc PluginCollector) Register(pluginID string, c Collector) { |
||||||
|
pc.collectors[pluginID] = c |
||||||
|
} |
||||||
|
|
||||||
|
// Describe implements the prometheus.Collector interface.
|
||||||
|
func (pc PluginCollector) Describe(ch chan<- *prometheus.Desc) { |
||||||
|
ch <- scrapeDurationDesc |
||||||
|
ch <- scrapeSuccessDesc |
||||||
|
} |
||||||
|
|
||||||
|
// Collect implements the prometheus.Collector interface.
|
||||||
|
func (pc PluginCollector) Collect(ch chan<- prometheus.Metric) { |
||||||
|
ctx := context.Background() |
||||||
|
wg := sync.WaitGroup{} |
||||||
|
wg.Add(len(pc.collectors)) |
||||||
|
for name, c := range pc.collectors { |
||||||
|
go func(name string, c Collector) { |
||||||
|
execute(ctx, name, c, ch, pc.logger) |
||||||
|
wg.Done() |
||||||
|
}(name, c) |
||||||
|
} |
||||||
|
wg.Wait() |
||||||
|
} |
||||||
|
|
||||||
|
func execute(ctx context.Context, pluginID string, c Collector, ch chan<- prometheus.Metric, logger log.Logger) { |
||||||
|
begin := time.Now() |
||||||
|
err := c.CollectMetrics(ctx, ch) |
||||||
|
duration := time.Since(begin) |
||||||
|
var success float64 |
||||||
|
|
||||||
|
if err != nil { |
||||||
|
logger.Error("collector failed", "pluginId", pluginID, "took", duration, "error", err) |
||||||
|
success = 0 |
||||||
|
} else { |
||||||
|
logger.Debug("collector succeeded", "pluginId", pluginID, "took", duration) |
||||||
|
success = 1 |
||||||
|
} |
||||||
|
ch <- prometheus.MustNewConstMetric(scrapeDurationDesc, prometheus.GaugeValue, duration.Seconds(), pluginID) |
||||||
|
ch <- prometheus.MustNewConstMetric(scrapeSuccessDesc, prometheus.GaugeValue, success, pluginID) |
||||||
|
} |
||||||
@ -1,61 +0,0 @@ |
|||||||
package backend |
|
||||||
|
|
||||||
import ( |
|
||||||
"bytes" |
|
||||||
"context" |
|
||||||
|
|
||||||
"github.com/grafana/grafana-plugin-sdk-go/genproto/pluginv2" |
|
||||||
"github.com/prometheus/client_golang/prometheus" |
|
||||||
"github.com/prometheus/common/expfmt" |
|
||||||
) |
|
||||||
|
|
||||||
// sdkAdapter adapter between protobuf and SDK interfaces.
|
|
||||||
type sdkAdapter struct { |
|
||||||
handlers PluginHandlers |
|
||||||
} |
|
||||||
|
|
||||||
func (a *sdkAdapter) CollectMetrics(ctx context.Context, protoReq *pluginv2.CollectMetrics_Request) (*pluginv2.CollectMetrics_Response, error) { |
|
||||||
metrics, err := prometheus.DefaultGatherer.Gather() |
|
||||||
if err != nil { |
|
||||||
return nil, err |
|
||||||
} |
|
||||||
|
|
||||||
var buf bytes.Buffer |
|
||||||
for _, m := range metrics { |
|
||||||
_, err := expfmt.MetricFamilyToText(&buf, m) |
|
||||||
if err != nil { |
|
||||||
continue |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
resp := &pluginv2.CollectMetrics_Response{ |
|
||||||
Metrics: &pluginv2.CollectMetrics_Payload{ |
|
||||||
Prometheus: buf.Bytes(), |
|
||||||
}, |
|
||||||
} |
|
||||||
|
|
||||||
return resp, nil |
|
||||||
} |
|
||||||
|
|
||||||
func (a *sdkAdapter) CheckHealth(ctx context.Context, protoReq *pluginv2.CheckHealth_Request) (*pluginv2.CheckHealth_Response, error) { |
|
||||||
return &pluginv2.CheckHealth_Response{ |
|
||||||
Status: pluginv2.CheckHealth_Response_OK, |
|
||||||
}, nil |
|
||||||
} |
|
||||||
|
|
||||||
func (a *sdkAdapter) DataQuery(ctx context.Context, req *pluginv2.DataQueryRequest) (*pluginv2.DataQueryResponse, error) { |
|
||||||
resp, err := a.handlers.DataQuery(ctx, dataQueryRequestFromProto(req)) |
|
||||||
if err != nil { |
|
||||||
return nil, err |
|
||||||
} |
|
||||||
|
|
||||||
return resp.toProtobuf() |
|
||||||
} |
|
||||||
|
|
||||||
func (a *sdkAdapter) Resource(ctx context.Context, req *pluginv2.ResourceRequest) (*pluginv2.ResourceResponse, error) { |
|
||||||
res, err := a.handlers.Resource(ctx, resourceRequestFromProtobuf(req)) |
|
||||||
if err != nil { |
|
||||||
return nil, err |
|
||||||
} |
|
||||||
return res.toProtobuf(), nil |
|
||||||
} |
|
||||||
@ -0,0 +1,53 @@ |
|||||||
|
package backend |
||||||
|
|
||||||
|
import ( |
||||||
|
"context" |
||||||
|
|
||||||
|
"github.com/grafana/grafana-plugin-sdk-go/genproto/pluginv2" |
||||||
|
) |
||||||
|
|
||||||
|
// DiagnosticsPlugin is the Grafana diagnostics plugin interface.
|
||||||
|
type DiagnosticsPlugin interface { |
||||||
|
CollectMetrics(ctx context.Context, req *pluginv2.CollectMetrics_Request) (*pluginv2.CollectMetrics_Response, error) |
||||||
|
CheckHealth(ctx context.Context, req *pluginv2.CheckHealth_Request) (*pluginv2.CheckHealth_Response, error) |
||||||
|
} |
||||||
|
|
||||||
|
type CheckHealthHandler interface { |
||||||
|
CheckHealth(ctx context.Context) (*CheckHealthResult, error) |
||||||
|
} |
||||||
|
|
||||||
|
// HealthStatus is the status of the plugin.
|
||||||
|
type HealthStatus int |
||||||
|
|
||||||
|
const ( |
||||||
|
// HealthStatusUnknown means the status of the plugin is unknown.
|
||||||
|
HealthStatusUnknown HealthStatus = iota |
||||||
|
// HealthStatusOk means the status of the plugin is good.
|
||||||
|
HealthStatusOk |
||||||
|
// HealthStatusError means the plugin is in an error state.
|
||||||
|
HealthStatusError |
||||||
|
) |
||||||
|
|
||||||
|
func (ps HealthStatus) toProtobuf() pluginv2.CheckHealth_Response_HealthStatus { |
||||||
|
switch ps { |
||||||
|
case HealthStatusUnknown: |
||||||
|
return pluginv2.CheckHealth_Response_UNKNOWN |
||||||
|
case HealthStatusOk: |
||||||
|
return pluginv2.CheckHealth_Response_OK |
||||||
|
case HealthStatusError: |
||||||
|
return pluginv2.CheckHealth_Response_ERROR |
||||||
|
} |
||||||
|
panic("unsupported protobuf health status type in sdk") |
||||||
|
} |
||||||
|
|
||||||
|
type CheckHealthResult struct { |
||||||
|
Status HealthStatus |
||||||
|
Info string |
||||||
|
} |
||||||
|
|
||||||
|
func (res *CheckHealthResult) toProtobuf() *pluginv2.CheckHealth_Response { |
||||||
|
return &pluginv2.CheckHealth_Response{ |
||||||
|
Status: res.Status.toProtobuf(), |
||||||
|
Info: res.Info, |
||||||
|
} |
||||||
|
} |
||||||
@ -0,0 +1,51 @@ |
|||||||
|
package backend |
||||||
|
|
||||||
|
import ( |
||||||
|
"context" |
||||||
|
|
||||||
|
"github.com/grafana/grafana-plugin-sdk-go/genproto/pluginv2" |
||||||
|
plugin "github.com/hashicorp/go-plugin" |
||||||
|
"google.golang.org/grpc" |
||||||
|
) |
||||||
|
|
||||||
|
// DiagnosticsGRPCPlugin implements the GRPCPlugin interface from github.com/hashicorp/go-plugin.
|
||||||
|
type DiagnosticsGRPCPlugin struct { |
||||||
|
plugin.NetRPCUnsupportedPlugin |
||||||
|
plugin.GRPCPlugin |
||||||
|
server pluginv2.DiagnosticsServer |
||||||
|
} |
||||||
|
|
||||||
|
func (p *DiagnosticsGRPCPlugin) GRPCServer(broker *plugin.GRPCBroker, s *grpc.Server) error { |
||||||
|
pluginv2.RegisterDiagnosticsServer(s, &diagnosticsGRPCServer{ |
||||||
|
server: p.server, |
||||||
|
}) |
||||||
|
return nil |
||||||
|
} |
||||||
|
|
||||||
|
func (p *DiagnosticsGRPCPlugin) GRPCClient(ctx context.Context, broker *plugin.GRPCBroker, c *grpc.ClientConn) (interface{}, error) { |
||||||
|
return &diagnosticsGRPCClient{client: pluginv2.NewDiagnosticsClient(c)}, nil |
||||||
|
} |
||||||
|
|
||||||
|
type diagnosticsGRPCServer struct { |
||||||
|
server pluginv2.DiagnosticsServer |
||||||
|
} |
||||||
|
|
||||||
|
func (s *diagnosticsGRPCServer) CollectMetrics(ctx context.Context, req *pluginv2.CollectMetrics_Request) (*pluginv2.CollectMetrics_Response, error) { |
||||||
|
return s.server.CollectMetrics(ctx, req) |
||||||
|
} |
||||||
|
|
||||||
|
func (s *diagnosticsGRPCServer) CheckHealth(ctx context.Context, req *pluginv2.CheckHealth_Request) (*pluginv2.CheckHealth_Response, error) { |
||||||
|
return s.server.CheckHealth(ctx, req) |
||||||
|
} |
||||||
|
|
||||||
|
type diagnosticsGRPCClient struct { |
||||||
|
client pluginv2.DiagnosticsClient |
||||||
|
} |
||||||
|
|
||||||
|
func (s *diagnosticsGRPCClient) CollectMetrics(ctx context.Context, req *pluginv2.CollectMetrics_Request) (*pluginv2.CollectMetrics_Response, error) { |
||||||
|
return s.client.CollectMetrics(ctx, req) |
||||||
|
} |
||||||
|
|
||||||
|
func (s *diagnosticsGRPCClient) CheckHealth(ctx context.Context, req *pluginv2.CheckHealth_Request) (*pluginv2.CheckHealth_Response, error) { |
||||||
|
return s.client.CheckHealth(ctx, req) |
||||||
|
} |
||||||
@ -1,4 +1,4 @@ |
|||||||
package common |
package backend |
||||||
|
|
||||||
import plugin "github.com/hashicorp/go-plugin" |
import plugin "github.com/hashicorp/go-plugin" |
||||||
|
|
||||||
@ -0,0 +1,91 @@ |
|||||||
|
package backend |
||||||
|
|
||||||
|
import ( |
||||||
|
"bytes" |
||||||
|
"context" |
||||||
|
|
||||||
|
"github.com/grafana/grafana-plugin-sdk-go/dataframe" |
||||||
|
"github.com/grafana/grafana-plugin-sdk-go/genproto/pluginv2" |
||||||
|
"github.com/prometheus/client_golang/prometheus" |
||||||
|
"github.com/prometheus/common/expfmt" |
||||||
|
) |
||||||
|
|
||||||
|
// sdkAdapter adapter between protobuf and SDK interfaces.
|
||||||
|
type sdkAdapter struct { |
||||||
|
checkHealthHandler CheckHealthHandler |
||||||
|
dataQueryHandler DataQueryHandler |
||||||
|
resourceHandler ResourceHandler |
||||||
|
transformDataHandler TransformDataHandler |
||||||
|
} |
||||||
|
|
||||||
|
func (a *sdkAdapter) CollectMetrics(ctx context.Context, protoReq *pluginv2.CollectMetrics_Request) (*pluginv2.CollectMetrics_Response, error) { |
||||||
|
mfs, err := prometheus.DefaultGatherer.Gather() |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
|
||||||
|
var buf bytes.Buffer |
||||||
|
for _, mf := range mfs { |
||||||
|
_, err := expfmt.MetricFamilyToText(&buf, mf) |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
return &pluginv2.CollectMetrics_Response{ |
||||||
|
Metrics: &pluginv2.CollectMetrics_Payload{ |
||||||
|
Prometheus: buf.Bytes(), |
||||||
|
}, |
||||||
|
}, nil |
||||||
|
} |
||||||
|
|
||||||
|
func (a *sdkAdapter) CheckHealth(ctx context.Context, protoReq *pluginv2.CheckHealth_Request) (*pluginv2.CheckHealth_Response, error) { |
||||||
|
if a.checkHealthHandler != nil { |
||||||
|
res, err := a.checkHealthHandler.CheckHealth(ctx) |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
return res.toProtobuf(), nil |
||||||
|
} |
||||||
|
|
||||||
|
return &pluginv2.CheckHealth_Response{ |
||||||
|
Status: pluginv2.CheckHealth_Response_OK, |
||||||
|
}, nil |
||||||
|
} |
||||||
|
|
||||||
|
func (a *sdkAdapter) DataQuery(ctx context.Context, req *pluginv2.DataQueryRequest) (*pluginv2.DataQueryResponse, error) { |
||||||
|
resp, err := a.dataQueryHandler.DataQuery(ctx, dataQueryRequestFromProto(req)) |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
|
||||||
|
return resp.toProtobuf() |
||||||
|
} |
||||||
|
|
||||||
|
func (a *sdkAdapter) Resource(ctx context.Context, req *pluginv2.ResourceRequest) (*pluginv2.ResourceResponse, error) { |
||||||
|
res, err := a.resourceHandler.Resource(ctx, resourceRequestFromProtobuf(req)) |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
return res.toProtobuf(), nil |
||||||
|
} |
||||||
|
|
||||||
|
func (a *sdkAdapter) TransformData(ctx context.Context, req *pluginv2.DataQueryRequest, callBack TransformCallBack) (*pluginv2.DataQueryResponse, error) { |
||||||
|
resp, err := a.transformDataHandler.TransformData(ctx, dataQueryRequestFromProto(req), &transformCallBackWrapper{callBack}) |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
|
||||||
|
encodedFrames := make([][]byte, len(resp.Frames)) |
||||||
|
for i, frame := range resp.Frames { |
||||||
|
encodedFrames[i], err = dataframe.MarshalArrow(frame) |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
return &pluginv2.DataQueryResponse{ |
||||||
|
Frames: encodedFrames, |
||||||
|
Metadata: resp.Metadata, |
||||||
|
}, nil |
||||||
|
} |
||||||
33
vendor/github.com/grafana/grafana-plugin-sdk-go/backend/transform_sdk_adapter.go
generated
vendored
33
vendor/github.com/grafana/grafana-plugin-sdk-go/backend/transform_sdk_adapter.go
generated
vendored
@ -1,33 +0,0 @@ |
|||||||
package backend |
|
||||||
|
|
||||||
import ( |
|
||||||
"context" |
|
||||||
|
|
||||||
"github.com/grafana/grafana-plugin-sdk-go/dataframe" |
|
||||||
"github.com/grafana/grafana-plugin-sdk-go/genproto/pluginv2" |
|
||||||
) |
|
||||||
|
|
||||||
// transformSDKAdapter adapter between protobuf and SDK interfaces.
|
|
||||||
type transformSDKAdapter struct { |
|
||||||
handlers TransformHandlers |
|
||||||
} |
|
||||||
|
|
||||||
func (a *transformSDKAdapter) TransformData(ctx context.Context, req *pluginv2.DataQueryRequest, callBack TransformCallBack) (*pluginv2.DataQueryResponse, error) { |
|
||||||
resp, err := a.handlers.DataQuery(ctx, dataQueryRequestFromProto(req), &transformCallBackWrapper{callBack}) |
|
||||||
if err != nil { |
|
||||||
return nil, err |
|
||||||
} |
|
||||||
|
|
||||||
encodedFrames := make([][]byte, len(resp.Frames)) |
|
||||||
for i, frame := range resp.Frames { |
|
||||||
encodedFrames[i], err = dataframe.MarshalArrow(frame) |
|
||||||
if err != nil { |
|
||||||
return nil, err |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
return &pluginv2.DataQueryResponse{ |
|
||||||
Frames: encodedFrames, |
|
||||||
Metadata: resp.Metadata, |
|
||||||
}, nil |
|
||||||
} |
|
||||||
982
vendor/github.com/grafana/grafana-plugin-sdk-go/genproto/pluginv2/backend.pb.go
generated
vendored
982
vendor/github.com/grafana/grafana-plugin-sdk-go/genproto/pluginv2/backend.pb.go
generated
vendored
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,201 @@ |
|||||||
|
#vendor |
||||||
|
vendor/ |
||||||
|
|
||||||
|
# Created by .ignore support plugin (hsz.mobi) |
||||||
|
coverage.txt |
||||||
|
### Go template |
||||||
|
# Compiled Object files, Static and Dynamic libs (Shared Objects) |
||||||
|
*.o |
||||||
|
*.a |
||||||
|
*.so |
||||||
|
|
||||||
|
# Folders |
||||||
|
_obj |
||||||
|
_test |
||||||
|
|
||||||
|
# Architecture specific extensions/prefixes |
||||||
|
*.[568vq] |
||||||
|
[568vq].out |
||||||
|
|
||||||
|
*.cgo1.go |
||||||
|
*.cgo2.c |
||||||
|
_cgo_defun.c |
||||||
|
_cgo_gotypes.go |
||||||
|
_cgo_export.* |
||||||
|
|
||||||
|
_testmain.go |
||||||
|
|
||||||
|
*.exe |
||||||
|
*.test |
||||||
|
*.prof |
||||||
|
### Windows template |
||||||
|
# Windows image file caches |
||||||
|
Thumbs.db |
||||||
|
ehthumbs.db |
||||||
|
|
||||||
|
# Folder config file |
||||||
|
Desktop.ini |
||||||
|
|
||||||
|
# Recycle Bin used on file shares |
||||||
|
$RECYCLE.BIN/ |
||||||
|
|
||||||
|
# Windows Installer files |
||||||
|
*.cab |
||||||
|
*.msi |
||||||
|
*.msm |
||||||
|
*.msp |
||||||
|
|
||||||
|
# Windows shortcuts |
||||||
|
*.lnk |
||||||
|
### Kate template |
||||||
|
# Swap Files # |
||||||
|
.*.kate-swp |
||||||
|
.swp.* |
||||||
|
### SublimeText template |
||||||
|
# cache files for sublime text |
||||||
|
*.tmlanguage.cache |
||||||
|
*.tmPreferences.cache |
||||||
|
*.stTheme.cache |
||||||
|
|
||||||
|
# workspace files are user-specific |
||||||
|
*.sublime-workspace |
||||||
|
|
||||||
|
# project files should be checked into the repository, unless a significant |
||||||
|
# proportion of contributors will probably not be using SublimeText |
||||||
|
# *.sublime-project |
||||||
|
|
||||||
|
# sftp configuration file |
||||||
|
sftp-config.json |
||||||
|
### Linux template |
||||||
|
*~ |
||||||
|
|
||||||
|
# temporary files which can be created if a process still has a handle open of a deleted file |
||||||
|
.fuse_hidden* |
||||||
|
|
||||||
|
# KDE directory preferences |
||||||
|
.directory |
||||||
|
|
||||||
|
# Linux trash folder which might appear on any partition or disk |
||||||
|
.Trash-* |
||||||
|
### JetBrains template |
||||||
|
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm |
||||||
|
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 |
||||||
|
|
||||||
|
# User-specific stuff: |
||||||
|
.idea |
||||||
|
.idea/tasks.xml |
||||||
|
.idea/dictionaries |
||||||
|
.idea/vcs.xml |
||||||
|
.idea/jsLibraryMappings.xml |
||||||
|
|
||||||
|
# Sensitive or high-churn files: |
||||||
|
.idea/dataSources.ids |
||||||
|
.idea/dataSources.xml |
||||||
|
.idea/dataSources.local.xml |
||||||
|
.idea/sqlDataSources.xml |
||||||
|
.idea/dynamic.xml |
||||||
|
.idea/uiDesigner.xml |
||||||
|
|
||||||
|
# Gradle: |
||||||
|
.idea/gradle.xml |
||||||
|
.idea/libraries |
||||||
|
|
||||||
|
# Mongo Explorer plugin: |
||||||
|
.idea/mongoSettings.xml |
||||||
|
|
||||||
|
## File-based project format: |
||||||
|
*.iws |
||||||
|
|
||||||
|
## Plugin-specific files: |
||||||
|
|
||||||
|
# IntelliJ |
||||||
|
/out/ |
||||||
|
|
||||||
|
# mpeltonen/sbt-idea plugin |
||||||
|
.idea_modules/ |
||||||
|
|
||||||
|
# JIRA plugin |
||||||
|
atlassian-ide-plugin.xml |
||||||
|
|
||||||
|
# Crashlytics plugin (for Android Studio and IntelliJ) |
||||||
|
com_crashlytics_export_strings.xml |
||||||
|
crashlytics.properties |
||||||
|
crashlytics-build.properties |
||||||
|
fabric.properties |
||||||
|
### Xcode template |
||||||
|
# Xcode |
||||||
|
# |
||||||
|
# gitignore contributors: remember to update Global/Xcode.gitignore, Objective-C.gitignore & Swift.gitignore |
||||||
|
|
||||||
|
## Build generated |
||||||
|
build/ |
||||||
|
DerivedData/ |
||||||
|
|
||||||
|
## Various settings |
||||||
|
*.pbxuser |
||||||
|
!default.pbxuser |
||||||
|
*.mode1v3 |
||||||
|
!default.mode1v3 |
||||||
|
*.mode2v3 |
||||||
|
!default.mode2v3 |
||||||
|
*.perspectivev3 |
||||||
|
!default.perspectivev3 |
||||||
|
xcuserdata/ |
||||||
|
|
||||||
|
## Other |
||||||
|
*.moved-aside |
||||||
|
*.xccheckout |
||||||
|
*.xcscmblueprint |
||||||
|
### Eclipse template |
||||||
|
|
||||||
|
.metadata |
||||||
|
bin/ |
||||||
|
tmp/ |
||||||
|
*.tmp |
||||||
|
*.bak |
||||||
|
*.swp |
||||||
|
*~.nib |
||||||
|
local.properties |
||||||
|
.settings/ |
||||||
|
.loadpath |
||||||
|
.recommenders |
||||||
|
|
||||||
|
# Eclipse Core |
||||||
|
.project |
||||||
|
|
||||||
|
# External tool builders |
||||||
|
.externalToolBuilders/ |
||||||
|
|
||||||
|
# Locally stored "Eclipse launch configurations" |
||||||
|
*.launch |
||||||
|
|
||||||
|
# PyDev specific (Python IDE for Eclipse) |
||||||
|
*.pydevproject |
||||||
|
|
||||||
|
# CDT-specific (C/C++ Development Tooling) |
||||||
|
.cproject |
||||||
|
|
||||||
|
# JDT-specific (Eclipse Java Development Tools) |
||||||
|
.classpath |
||||||
|
|
||||||
|
# Java annotation processor (APT) |
||||||
|
.factorypath |
||||||
|
|
||||||
|
# PDT-specific (PHP Development Tools) |
||||||
|
.buildpath |
||||||
|
|
||||||
|
# sbteclipse plugin |
||||||
|
.target |
||||||
|
|
||||||
|
# Tern plugin |
||||||
|
.tern-project |
||||||
|
|
||||||
|
# TeXlipse plugin |
||||||
|
.texlipse |
||||||
|
|
||||||
|
# STS (Spring Tool Suite) |
||||||
|
.springBeans |
||||||
|
|
||||||
|
# Code Recommenders |
||||||
|
.recommenders/ |
||||||
|
|
||||||
@ -0,0 +1,24 @@ |
|||||||
|
# Changelog |
||||||
|
All notable changes to this project will be documented in this file. |
||||||
|
|
||||||
|
The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) |
||||||
|
and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). |
||||||
|
|
||||||
|
## [Unreleased] |
||||||
|
|
||||||
|
## [1.2.0](https://github.com/grpc-ecosystem/go-grpc-prometheus/releases/tag/v1.2.0) - 2018-06-04 |
||||||
|
|
||||||
|
### Added |
||||||
|
|
||||||
|
* Provide metrics object as `prometheus.Collector`, for conventional metric registration. |
||||||
|
* Support non-default/global Prometheus registry. |
||||||
|
* Allow configuring counters with `prometheus.CounterOpts`. |
||||||
|
|
||||||
|
### Changed |
||||||
|
|
||||||
|
* Remove usage of deprecated `grpc.Code()`. |
||||||
|
* Remove usage of deprecated `grpc.Errorf` and replace with `status.Errorf`. |
||||||
|
|
||||||
|
--- |
||||||
|
|
||||||
|
This changelog was started with version `v1.2.0`, for earlier versions refer to the respective [GitHub releases](https://github.com/grpc-ecosystem/go-grpc-prometheus/releases). |
||||||
@ -0,0 +1,201 @@ |
|||||||
|
Apache License |
||||||
|
Version 2.0, January 2004 |
||||||
|
http://www.apache.org/licenses/ |
||||||
|
|
||||||
|
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION |
||||||
|
|
||||||
|
1. Definitions. |
||||||
|
|
||||||
|
"License" shall mean the terms and conditions for use, reproduction, |
||||||
|
and distribution as defined by Sections 1 through 9 of this document. |
||||||
|
|
||||||
|
"Licensor" shall mean the copyright owner or entity authorized by |
||||||
|
the copyright owner that is granting the License. |
||||||
|
|
||||||
|
"Legal Entity" shall mean the union of the acting entity and all |
||||||
|
other entities that control, are controlled by, or are under common |
||||||
|
control with that entity. For the purposes of this definition, |
||||||
|
"control" means (i) the power, direct or indirect, to cause the |
||||||
|
direction or management of such entity, whether by contract or |
||||||
|
otherwise, or (ii) ownership of fifty percent (50%) or more of the |
||||||
|
outstanding shares, or (iii) beneficial ownership of such entity. |
||||||
|
|
||||||
|
"You" (or "Your") shall mean an individual or Legal Entity |
||||||
|
exercising permissions granted by this License. |
||||||
|
|
||||||
|
"Source" form shall mean the preferred form for making modifications, |
||||||
|
including but not limited to software source code, documentation |
||||||
|
source, and configuration files. |
||||||
|
|
||||||
|
"Object" form shall mean any form resulting from mechanical |
||||||
|
transformation or translation of a Source form, including but |
||||||
|
not limited to compiled object code, generated documentation, |
||||||
|
and conversions to other media types. |
||||||
|
|
||||||
|
"Work" shall mean the work of authorship, whether in Source or |
||||||
|
Object form, made available under the License, as indicated by a |
||||||
|
copyright notice that is included in or attached to the work |
||||||
|
(an example is provided in the Appendix below). |
||||||
|
|
||||||
|
"Derivative Works" shall mean any work, whether in Source or Object |
||||||
|
form, that is based on (or derived from) the Work and for which the |
||||||
|
editorial revisions, annotations, elaborations, or other modifications |
||||||
|
represent, as a whole, an original work of authorship. For the purposes |
||||||
|
of this License, Derivative Works shall not include works that remain |
||||||
|
separable from, or merely link (or bind by name) to the interfaces of, |
||||||
|
the Work and Derivative Works thereof. |
||||||
|
|
||||||
|
"Contribution" shall mean any work of authorship, including |
||||||
|
the original version of the Work and any modifications or additions |
||||||
|
to that Work or Derivative Works thereof, that is intentionally |
||||||
|
submitted to Licensor for inclusion in the Work by the copyright owner |
||||||
|
or by an individual or Legal Entity authorized to submit on behalf of |
||||||
|
the copyright owner. For the purposes of this definition, "submitted" |
||||||
|
means any form of electronic, verbal, or written communication sent |
||||||
|
to the Licensor or its representatives, including but not limited to |
||||||
|
communication on electronic mailing lists, source code control systems, |
||||||
|
and issue tracking systems that are managed by, or on behalf of, the |
||||||
|
Licensor for the purpose of discussing and improving the Work, but |
||||||
|
excluding communication that is conspicuously marked or otherwise |
||||||
|
designated in writing by the copyright owner as "Not a Contribution." |
||||||
|
|
||||||
|
"Contributor" shall mean Licensor and any individual or Legal Entity |
||||||
|
on behalf of whom a Contribution has been received by Licensor and |
||||||
|
subsequently incorporated within the Work. |
||||||
|
|
||||||
|
2. Grant of Copyright License. Subject to the terms and conditions of |
||||||
|
this License, each Contributor hereby grants to You a perpetual, |
||||||
|
worldwide, non-exclusive, no-charge, royalty-free, irrevocable |
||||||
|
copyright license to reproduce, prepare Derivative Works of, |
||||||
|
publicly display, publicly perform, sublicense, and distribute the |
||||||
|
Work and such Derivative Works in Source or Object form. |
||||||
|
|
||||||
|
3. Grant of Patent License. Subject to the terms and conditions of |
||||||
|
this License, each Contributor hereby grants to You a perpetual, |
||||||
|
worldwide, non-exclusive, no-charge, royalty-free, irrevocable |
||||||
|
(except as stated in this section) patent license to make, have made, |
||||||
|
use, offer to sell, sell, import, and otherwise transfer the Work, |
||||||
|
where such license applies only to those patent claims licensable |
||||||
|
by such Contributor that are necessarily infringed by their |
||||||
|
Contribution(s) alone or by combination of their Contribution(s) |
||||||
|
with the Work to which such Contribution(s) was submitted. If You |
||||||
|
institute patent litigation against any entity (including a |
||||||
|
cross-claim or counterclaim in a lawsuit) alleging that the Work |
||||||
|
or a Contribution incorporated within the Work constitutes direct |
||||||
|
or contributory patent infringement, then any patent licenses |
||||||
|
granted to You under this License for that Work shall terminate |
||||||
|
as of the date such litigation is filed. |
||||||
|
|
||||||
|
4. Redistribution. You may reproduce and distribute copies of the |
||||||
|
Work or Derivative Works thereof in any medium, with or without |
||||||
|
modifications, and in Source or Object form, provided that You |
||||||
|
meet the following conditions: |
||||||
|
|
||||||
|
(a) You must give any other recipients of the Work or |
||||||
|
Derivative Works a copy of this License; and |
||||||
|
|
||||||
|
(b) You must cause any modified files to carry prominent notices |
||||||
|
stating that You changed the files; and |
||||||
|
|
||||||
|
(c) You must retain, in the Source form of any Derivative Works |
||||||
|
that You distribute, all copyright, patent, trademark, and |
||||||
|
attribution notices from the Source form of the Work, |
||||||
|
excluding those notices that do not pertain to any part of |
||||||
|
the Derivative Works; and |
||||||
|
|
||||||
|
(d) If the Work includes a "NOTICE" text file as part of its |
||||||
|
distribution, then any Derivative Works that You distribute must |
||||||
|
include a readable copy of the attribution notices contained |
||||||
|
within such NOTICE file, excluding those notices that do not |
||||||
|
pertain to any part of the Derivative Works, in at least one |
||||||
|
of the following places: within a NOTICE text file distributed |
||||||
|
as part of the Derivative Works; within the Source form or |
||||||
|
documentation, if provided along with the Derivative Works; or, |
||||||
|
within a display generated by the Derivative Works, if and |
||||||
|
wherever such third-party notices normally appear. The contents |
||||||
|
of the NOTICE file are for informational purposes only and |
||||||
|
do not modify the License. You may add Your own attribution |
||||||
|
notices within Derivative Works that You distribute, alongside |
||||||
|
or as an addendum to the NOTICE text from the Work, provided |
||||||
|
that such additional attribution notices cannot be construed |
||||||
|
as modifying the License. |
||||||
|
|
||||||
|
You may add Your own copyright statement to Your modifications and |
||||||
|
may provide additional or different license terms and conditions |
||||||
|
for use, reproduction, or distribution of Your modifications, or |
||||||
|
for any such Derivative Works as a whole, provided Your use, |
||||||
|
reproduction, and distribution of the Work otherwise complies with |
||||||
|
the conditions stated in this License. |
||||||
|
|
||||||
|
5. Submission of Contributions. Unless You explicitly state otherwise, |
||||||
|
any Contribution intentionally submitted for inclusion in the Work |
||||||
|
by You to the Licensor shall be under the terms and conditions of |
||||||
|
this License, without any additional terms or conditions. |
||||||
|
Notwithstanding the above, nothing herein shall supersede or modify |
||||||
|
the terms of any separate license agreement you may have executed |
||||||
|
with Licensor regarding such Contributions. |
||||||
|
|
||||||
|
6. Trademarks. This License does not grant permission to use the trade |
||||||
|
names, trademarks, service marks, or product names of the Licensor, |
||||||
|
except as required for reasonable and customary use in describing the |
||||||
|
origin of the Work and reproducing the content of the NOTICE file. |
||||||
|
|
||||||
|
7. Disclaimer of Warranty. Unless required by applicable law or |
||||||
|
agreed to in writing, Licensor provides the Work (and each |
||||||
|
Contributor provides its Contributions) on an "AS IS" BASIS, |
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or |
||||||
|
implied, including, without limitation, any warranties or conditions |
||||||
|
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A |
||||||
|
PARTICULAR PURPOSE. You are solely responsible for determining the |
||||||
|
appropriateness of using or redistributing the Work and assume any |
||||||
|
risks associated with Your exercise of permissions under this License. |
||||||
|
|
||||||
|
8. Limitation of Liability. In no event and under no legal theory, |
||||||
|
whether in tort (including negligence), contract, or otherwise, |
||||||
|
unless required by applicable law (such as deliberate and grossly |
||||||
|
negligent acts) or agreed to in writing, shall any Contributor be |
||||||
|
liable to You for damages, including any direct, indirect, special, |
||||||
|
incidental, or consequential damages of any character arising as a |
||||||
|
result of this License or out of the use or inability to use the |
||||||
|
Work (including but not limited to damages for loss of goodwill, |
||||||
|
work stoppage, computer failure or malfunction, or any and all |
||||||
|
other commercial damages or losses), even if such Contributor |
||||||
|
has been advised of the possibility of such damages. |
||||||
|
|
||||||
|
9. Accepting Warranty or Additional Liability. While redistributing |
||||||
|
the Work or Derivative Works thereof, You may choose to offer, |
||||||
|
and charge a fee for, acceptance of support, warranty, indemnity, |
||||||
|
or other liability obligations and/or rights consistent with this |
||||||
|
License. However, in accepting such obligations, You may act only |
||||||
|
on Your own behalf and on Your sole responsibility, not on behalf |
||||||
|
of any other Contributor, and only if You agree to indemnify, |
||||||
|
defend, and hold each Contributor harmless for any liability |
||||||
|
incurred by, or claims asserted against, such Contributor by reason |
||||||
|
of your accepting any such warranty or additional liability. |
||||||
|
|
||||||
|
END OF TERMS AND CONDITIONS |
||||||
|
|
||||||
|
APPENDIX: How to apply the Apache License to your work. |
||||||
|
|
||||||
|
To apply the Apache License to your work, attach the following |
||||||
|
boilerplate notice, with the fields enclosed by brackets "[]" |
||||||
|
replaced with your own identifying information. (Don't include |
||||||
|
the brackets!) The text should be enclosed in the appropriate |
||||||
|
comment syntax for the file format. We also recommend that a |
||||||
|
file or class name and description of purpose be included on the |
||||||
|
same "printed page" as the copyright notice for easier |
||||||
|
identification within third-party archives. |
||||||
|
|
||||||
|
Copyright [yyyy] [name of copyright owner] |
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License"); |
||||||
|
you may not use this file except in compliance with the License. |
||||||
|
You may obtain a copy of the License at |
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0 |
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software |
||||||
|
distributed under the License is distributed on an "AS IS" BASIS, |
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||||
|
See the License for the specific language governing permissions and |
||||||
|
limitations under the License. |
||||||
@ -0,0 +1,247 @@ |
|||||||
|
# Go gRPC Interceptors for Prometheus monitoring |
||||||
|
|
||||||
|
[](https://travis-ci.org/grpc-ecosystem/go-grpc-prometheus) |
||||||
|
[](http://goreportcard.com/report/grpc-ecosystem/go-grpc-prometheus) |
||||||
|
[](https://godoc.org/github.com/grpc-ecosystem/go-grpc-prometheus) |
||||||
|
[](https://sourcegraph.com/github.com/grpc-ecosystem/go-grpc-prometheus/?badge) |
||||||
|
[](https://codecov.io/gh/grpc-ecosystem/go-grpc-prometheus) |
||||||
|
[](LICENSE) |
||||||
|
|
||||||
|
[Prometheus](https://prometheus.io/) monitoring for your [gRPC Go](https://github.com/grpc/grpc-go) servers and clients. |
||||||
|
|
||||||
|
A sister implementation for [gRPC Java](https://github.com/grpc/grpc-java) (same metrics, same semantics) is in [grpc-ecosystem/java-grpc-prometheus](https://github.com/grpc-ecosystem/java-grpc-prometheus). |
||||||
|
|
||||||
|
## Interceptors |
||||||
|
|
||||||
|
[gRPC Go](https://github.com/grpc/grpc-go) recently acquired support for Interceptors, i.e. middleware that is executed |
||||||
|
by a gRPC Server before the request is passed onto the user's application logic. It is a perfect way to implement |
||||||
|
common patterns: auth, logging and... monitoring. |
||||||
|
|
||||||
|
To use Interceptors in chains, please see [`go-grpc-middleware`](https://github.com/mwitkow/go-grpc-middleware). |
||||||
|
|
||||||
|
## Usage |
||||||
|
|
||||||
|
There are two types of interceptors: client-side and server-side. This package provides monitoring Interceptors for both. |
||||||
|
|
||||||
|
### Server-side |
||||||
|
|
||||||
|
```go |
||||||
|
import "github.com/grpc-ecosystem/go-grpc-prometheus" |
||||||
|
... |
||||||
|
// Initialize your gRPC server's interceptor. |
||||||
|
myServer := grpc.NewServer( |
||||||
|
grpc.StreamInterceptor(grpc_prometheus.StreamServerInterceptor), |
||||||
|
grpc.UnaryInterceptor(grpc_prometheus.UnaryServerInterceptor), |
||||||
|
) |
||||||
|
// Register your gRPC service implementations. |
||||||
|
myservice.RegisterMyServiceServer(s.server, &myServiceImpl{}) |
||||||
|
// After all your registrations, make sure all of the Prometheus metrics are initialized. |
||||||
|
grpc_prometheus.Register(myServer) |
||||||
|
// Register Prometheus metrics handler. |
||||||
|
http.Handle("/metrics", promhttp.Handler()) |
||||||
|
... |
||||||
|
``` |
||||||
|
|
||||||
|
### Client-side |
||||||
|
|
||||||
|
```go |
||||||
|
import "github.com/grpc-ecosystem/go-grpc-prometheus" |
||||||
|
... |
||||||
|
clientConn, err = grpc.Dial( |
||||||
|
address, |
||||||
|
grpc.WithUnaryInterceptor(grpc_prometheus.UnaryClientInterceptor), |
||||||
|
grpc.WithStreamInterceptor(grpc_prometheus.StreamClientInterceptor) |
||||||
|
) |
||||||
|
client = pb_testproto.NewTestServiceClient(clientConn) |
||||||
|
resp, err := client.PingEmpty(s.ctx, &myservice.Request{Msg: "hello"}) |
||||||
|
... |
||||||
|
``` |
||||||
|
|
||||||
|
# Metrics |
||||||
|
|
||||||
|
## Labels |
||||||
|
|
||||||
|
All server-side metrics start with `grpc_server` as Prometheus subsystem name. All client-side metrics start with `grpc_client`. Both of them have mirror-concepts. Similarly all methods |
||||||
|
contain the same rich labels: |
||||||
|
|
||||||
|
* `grpc_service` - the [gRPC service](http://www.grpc.io/docs/#defining-a-service) name, which is the combination of protobuf `package` and |
||||||
|
the `grpc_service` section name. E.g. for `package = mwitkow.testproto` and |
||||||
|
`service TestService` the label will be `grpc_service="mwitkow.testproto.TestService"` |
||||||
|
* `grpc_method` - the name of the method called on the gRPC service. E.g. |
||||||
|
`grpc_method="Ping"` |
||||||
|
* `grpc_type` - the gRPC [type of request](http://www.grpc.io/docs/guides/concepts.html#rpc-life-cycle). |
||||||
|
Differentiating between the two is important especially for latency measurements. |
||||||
|
|
||||||
|
- `unary` is single request, single response RPC |
||||||
|
- `client_stream` is a multi-request, single response RPC |
||||||
|
- `server_stream` is a single request, multi-response RPC |
||||||
|
- `bidi_stream` is a multi-request, multi-response RPC |
||||||
|
|
||||||
|
|
||||||
|
Additionally for completed RPCs, the following labels are used: |
||||||
|
|
||||||
|
* `grpc_code` - the human-readable [gRPC status code](https://github.com/grpc/grpc-go/blob/master/codes/codes.go). |
||||||
|
The list of all statuses is to long, but here are some common ones: |
||||||
|
|
||||||
|
- `OK` - means the RPC was successful |
||||||
|
- `IllegalArgument` - RPC contained bad values |
||||||
|
- `Internal` - server-side error not disclosed to the clients |
||||||
|
|
||||||
|
## Counters |
||||||
|
|
||||||
|
The counters and their up to date documentation is in [server_reporter.go](server_reporter.go) and [client_reporter.go](client_reporter.go) |
||||||
|
the respective Prometheus handler (usually `/metrics`). |
||||||
|
|
||||||
|
For the purpose of this documentation we will only discuss `grpc_server` metrics. The `grpc_client` ones contain mirror concepts. |
||||||
|
|
||||||
|
For simplicity, let's assume we're tracking a single server-side RPC call of [`mwitkow.testproto.TestService`](examples/testproto/test.proto), |
||||||
|
calling the method `PingList`. The call succeeds and returns 20 messages in the stream. |
||||||
|
|
||||||
|
First, immediately after the server receives the call it will increment the |
||||||
|
`grpc_server_started_total` and start the handling time clock (if histograms are enabled). |
||||||
|
|
||||||
|
```jsoniq |
||||||
|
grpc_server_started_total{grpc_method="PingList",grpc_service="mwitkow.testproto.TestService",grpc_type="server_stream"} 1 |
||||||
|
``` |
||||||
|
|
||||||
|
Then the user logic gets invoked. It receives one message from the client containing the request |
||||||
|
(it's a `server_stream`): |
||||||
|
|
||||||
|
```jsoniq |
||||||
|
grpc_server_msg_received_total{grpc_method="PingList",grpc_service="mwitkow.testproto.TestService",grpc_type="server_stream"} 1 |
||||||
|
``` |
||||||
|
|
||||||
|
The user logic may return an error, or send multiple messages back to the client. In this case, on |
||||||
|
each of the 20 messages sent back, a counter will be incremented: |
||||||
|
|
||||||
|
```jsoniq |
||||||
|
grpc_server_msg_sent_total{grpc_method="PingList",grpc_service="mwitkow.testproto.TestService",grpc_type="server_stream"} 20 |
||||||
|
``` |
||||||
|
|
||||||
|
After the call completes, its status (`OK` or other [gRPC status code](https://github.com/grpc/grpc-go/blob/master/codes/codes.go)) |
||||||
|
and the relevant call labels increment the `grpc_server_handled_total` counter. |
||||||
|
|
||||||
|
```jsoniq |
||||||
|
grpc_server_handled_total{grpc_code="OK",grpc_method="PingList",grpc_service="mwitkow.testproto.TestService",grpc_type="server_stream"} 1 |
||||||
|
``` |
||||||
|
|
||||||
|
## Histograms |
||||||
|
|
||||||
|
[Prometheus histograms](https://prometheus.io/docs/concepts/metric_types/#histogram) are a great way |
||||||
|
to measure latency distributions of your RPCs. However, since it is bad practice to have metrics |
||||||
|
of [high cardinality](https://prometheus.io/docs/practices/instrumentation/#do-not-overuse-labels) |
||||||
|
the latency monitoring metrics are disabled by default. To enable them please call the following |
||||||
|
in your server initialization code: |
||||||
|
|
||||||
|
```jsoniq |
||||||
|
grpc_prometheus.EnableHandlingTimeHistogram() |
||||||
|
``` |
||||||
|
|
||||||
|
After the call completes, its handling time will be recorded in a [Prometheus histogram](https://prometheus.io/docs/concepts/metric_types/#histogram) |
||||||
|
variable `grpc_server_handling_seconds`. The histogram variable contains three sub-metrics: |
||||||
|
|
||||||
|
* `grpc_server_handling_seconds_count` - the count of all completed RPCs by status and method |
||||||
|
* `grpc_server_handling_seconds_sum` - cumulative time of RPCs by status and method, useful for |
||||||
|
calculating average handling times |
||||||
|
* `grpc_server_handling_seconds_bucket` - contains the counts of RPCs by status and method in respective |
||||||
|
handling-time buckets. These buckets can be used by Prometheus to estimate SLAs (see [here](https://prometheus.io/docs/practices/histograms/)) |
||||||
|
|
||||||
|
The counter values will look as follows: |
||||||
|
|
||||||
|
```jsoniq |
||||||
|
grpc_server_handling_seconds_bucket{grpc_code="OK",grpc_method="PingList",grpc_service="mwitkow.testproto.TestService",grpc_type="server_stream",le="0.005"} 1 |
||||||
|
grpc_server_handling_seconds_bucket{grpc_code="OK",grpc_method="PingList",grpc_service="mwitkow.testproto.TestService",grpc_type="server_stream",le="0.01"} 1 |
||||||
|
grpc_server_handling_seconds_bucket{grpc_code="OK",grpc_method="PingList",grpc_service="mwitkow.testproto.TestService",grpc_type="server_stream",le="0.025"} 1 |
||||||
|
grpc_server_handling_seconds_bucket{grpc_code="OK",grpc_method="PingList",grpc_service="mwitkow.testproto.TestService",grpc_type="server_stream",le="0.05"} 1 |
||||||
|
grpc_server_handling_seconds_bucket{grpc_code="OK",grpc_method="PingList",grpc_service="mwitkow.testproto.TestService",grpc_type="server_stream",le="0.1"} 1 |
||||||
|
grpc_server_handling_seconds_bucket{grpc_code="OK",grpc_method="PingList",grpc_service="mwitkow.testproto.TestService",grpc_type="server_stream",le="0.25"} 1 |
||||||
|
grpc_server_handling_seconds_bucket{grpc_code="OK",grpc_method="PingList",grpc_service="mwitkow.testproto.TestService",grpc_type="server_stream",le="0.5"} 1 |
||||||
|
grpc_server_handling_seconds_bucket{grpc_code="OK",grpc_method="PingList",grpc_service="mwitkow.testproto.TestService",grpc_type="server_stream",le="1"} 1 |
||||||
|
grpc_server_handling_seconds_bucket{grpc_code="OK",grpc_method="PingList",grpc_service="mwitkow.testproto.TestService",grpc_type="server_stream",le="2.5"} 1 |
||||||
|
grpc_server_handling_seconds_bucket{grpc_code="OK",grpc_method="PingList",grpc_service="mwitkow.testproto.TestService",grpc_type="server_stream",le="5"} 1 |
||||||
|
grpc_server_handling_seconds_bucket{grpc_code="OK",grpc_method="PingList",grpc_service="mwitkow.testproto.TestService",grpc_type="server_stream",le="10"} 1 |
||||||
|
grpc_server_handling_seconds_bucket{grpc_code="OK",grpc_method="PingList",grpc_service="mwitkow.testproto.TestService",grpc_type="server_stream",le="+Inf"} 1 |
||||||
|
grpc_server_handling_seconds_sum{grpc_code="OK",grpc_method="PingList",grpc_service="mwitkow.testproto.TestService",grpc_type="server_stream"} 0.0003866430000000001 |
||||||
|
grpc_server_handling_seconds_count{grpc_code="OK",grpc_method="PingList",grpc_service="mwitkow.testproto.TestService",grpc_type="server_stream"} 1 |
||||||
|
``` |
||||||
|
|
||||||
|
|
||||||
|
## Useful query examples |
||||||
|
|
||||||
|
Prometheus philosophy is to provide raw metrics to the monitoring system, and |
||||||
|
let the aggregations be handled there. The verbosity of above metrics make it possible to have that |
||||||
|
flexibility. Here's a couple of useful monitoring queries: |
||||||
|
|
||||||
|
|
||||||
|
### request inbound rate |
||||||
|
```jsoniq |
||||||
|
sum(rate(grpc_server_started_total{job="foo"}[1m])) by (grpc_service) |
||||||
|
``` |
||||||
|
For `job="foo"` (common label to differentiate between Prometheus monitoring targets), calculate the |
||||||
|
rate of requests per second (1 minute window) for each gRPC `grpc_service` that the job has. Please note |
||||||
|
how the `grpc_method` is being omitted here: all methods of a given gRPC service will be summed together. |
||||||
|
|
||||||
|
### unary request error rate |
||||||
|
```jsoniq |
||||||
|
sum(rate(grpc_server_handled_total{job="foo",grpc_type="unary",grpc_code!="OK"}[1m])) by (grpc_service) |
||||||
|
``` |
||||||
|
For `job="foo"`, calculate the per-`grpc_service` rate of `unary` (1:1) RPCs that failed, i.e. the |
||||||
|
ones that didn't finish with `OK` code. |
||||||
|
|
||||||
|
### unary request error percentage |
||||||
|
```jsoniq |
||||||
|
sum(rate(grpc_server_handled_total{job="foo",grpc_type="unary",grpc_code!="OK"}[1m])) by (grpc_service) |
||||||
|
/ |
||||||
|
sum(rate(grpc_server_started_total{job="foo",grpc_type="unary"}[1m])) by (grpc_service) |
||||||
|
* 100.0 |
||||||
|
``` |
||||||
|
For `job="foo"`, calculate the percentage of failed requests by service. It's easy to notice that |
||||||
|
this is a combination of the two above examples. This is an example of a query you would like to |
||||||
|
[alert on](https://prometheus.io/docs/alerting/rules/) in your system for SLA violations, e.g. |
||||||
|
"no more than 1% requests should fail". |
||||||
|
|
||||||
|
### average response stream size |
||||||
|
```jsoniq |
||||||
|
sum(rate(grpc_server_msg_sent_total{job="foo",grpc_type="server_stream"}[10m])) by (grpc_service) |
||||||
|
/ |
||||||
|
sum(rate(grpc_server_started_total{job="foo",grpc_type="server_stream"}[10m])) by (grpc_service) |
||||||
|
``` |
||||||
|
For `job="foo"` what is the `grpc_service`-wide `10m` average of messages returned for all ` |
||||||
|
server_stream` RPCs. This allows you to track the stream sizes returned by your system, e.g. allows |
||||||
|
you to track when clients started to send "wide" queries that ret |
||||||
|
Note the divisor is the number of started RPCs, in order to account for in-flight requests. |
||||||
|
|
||||||
|
### 99%-tile latency of unary requests |
||||||
|
```jsoniq |
||||||
|
histogram_quantile(0.99, |
||||||
|
sum(rate(grpc_server_handling_seconds_bucket{job="foo",grpc_type="unary"}[5m])) by (grpc_service,le) |
||||||
|
) |
||||||
|
``` |
||||||
|
For `job="foo"`, returns an 99%-tile [quantile estimation](https://prometheus.io/docs/practices/histograms/#quantiles) |
||||||
|
of the handling time of RPCs per service. Please note the `5m` rate, this means that the quantile |
||||||
|
estimation will take samples in a rolling `5m` window. When combined with other quantiles |
||||||
|
(e.g. 50%, 90%), this query gives you tremendous insight into the responsiveness of your system |
||||||
|
(e.g. impact of caching). |
||||||
|
|
||||||
|
### percentage of slow unary queries (>250ms) |
||||||
|
```jsoniq |
||||||
|
100.0 - ( |
||||||
|
sum(rate(grpc_server_handling_seconds_bucket{job="foo",grpc_type="unary",le="0.25"}[5m])) by (grpc_service) |
||||||
|
/ |
||||||
|
sum(rate(grpc_server_handling_seconds_count{job="foo",grpc_type="unary"}[5m])) by (grpc_service) |
||||||
|
) * 100.0 |
||||||
|
``` |
||||||
|
For `job="foo"` calculate the by-`grpc_service` fraction of slow requests that took longer than `0.25` |
||||||
|
seconds. This query is relatively complex, since the Prometheus aggregations use `le` (less or equal) |
||||||
|
buckets, meaning that counting "fast" requests fractions is easier. However, simple maths helps. |
||||||
|
This is an example of a query you would like to alert on in your system for SLA violations, |
||||||
|
e.g. "less than 1% of requests are slower than 250ms". |
||||||
|
|
||||||
|
|
||||||
|
## Status |
||||||
|
|
||||||
|
This code has been used since August 2015 as the basis for monitoring of *production* gRPC micro services at [Improbable](https://improbable.io). |
||||||
|
|
||||||
|
## License |
||||||
|
|
||||||
|
`go-grpc-prometheus` is released under the Apache 2.0 license. See the [LICENSE](LICENSE) file for details. |
||||||
@ -0,0 +1,39 @@ |
|||||||
|
// Copyright 2016 Michal Witkowski. All Rights Reserved.
|
||||||
|
// See LICENSE for licensing terms.
|
||||||
|
|
||||||
|
// gRPC Prometheus monitoring interceptors for client-side gRPC.
|
||||||
|
|
||||||
|
package grpc_prometheus |
||||||
|
|
||||||
|
import ( |
||||||
|
prom "github.com/prometheus/client_golang/prometheus" |
||||||
|
) |
||||||
|
|
||||||
|
var ( |
||||||
|
// DefaultClientMetrics is the default instance of ClientMetrics. It is
|
||||||
|
// intended to be used in conjunction the default Prometheus metrics
|
||||||
|
// registry.
|
||||||
|
DefaultClientMetrics = NewClientMetrics() |
||||||
|
|
||||||
|
// UnaryClientInterceptor is a gRPC client-side interceptor that provides Prometheus monitoring for Unary RPCs.
|
||||||
|
UnaryClientInterceptor = DefaultClientMetrics.UnaryClientInterceptor() |
||||||
|
|
||||||
|
// StreamClientInterceptor is a gRPC client-side interceptor that provides Prometheus monitoring for Streaming RPCs.
|
||||||
|
StreamClientInterceptor = DefaultClientMetrics.StreamClientInterceptor() |
||||||
|
) |
||||||
|
|
||||||
|
func init() { |
||||||
|
prom.MustRegister(DefaultClientMetrics.clientStartedCounter) |
||||||
|
prom.MustRegister(DefaultClientMetrics.clientHandledCounter) |
||||||
|
prom.MustRegister(DefaultClientMetrics.clientStreamMsgReceived) |
||||||
|
prom.MustRegister(DefaultClientMetrics.clientStreamMsgSent) |
||||||
|
} |
||||||
|
|
||||||
|
// EnableClientHandlingTimeHistogram turns on recording of handling time of
|
||||||
|
// RPCs. Histogram metrics can be very expensive for Prometheus to retain and
|
||||||
|
// query. This function acts on the DefaultClientMetrics variable and the
|
||||||
|
// default Prometheus metrics registry.
|
||||||
|
func EnableClientHandlingTimeHistogram(opts ...HistogramOption) { |
||||||
|
DefaultClientMetrics.EnableClientHandlingTimeHistogram(opts...) |
||||||
|
prom.Register(DefaultClientMetrics.clientHandledHistogram) |
||||||
|
} |
||||||
@ -0,0 +1,170 @@ |
|||||||
|
package grpc_prometheus |
||||||
|
|
||||||
|
import ( |
||||||
|
"io" |
||||||
|
|
||||||
|
prom "github.com/prometheus/client_golang/prometheus" |
||||||
|
"golang.org/x/net/context" |
||||||
|
"google.golang.org/grpc" |
||||||
|
"google.golang.org/grpc/codes" |
||||||
|
"google.golang.org/grpc/status" |
||||||
|
) |
||||||
|
|
||||||
|
// ClientMetrics represents a collection of metrics to be registered on a
|
||||||
|
// Prometheus metrics registry for a gRPC client.
|
||||||
|
type ClientMetrics struct { |
||||||
|
clientStartedCounter *prom.CounterVec |
||||||
|
clientHandledCounter *prom.CounterVec |
||||||
|
clientStreamMsgReceived *prom.CounterVec |
||||||
|
clientStreamMsgSent *prom.CounterVec |
||||||
|
clientHandledHistogramEnabled bool |
||||||
|
clientHandledHistogramOpts prom.HistogramOpts |
||||||
|
clientHandledHistogram *prom.HistogramVec |
||||||
|
} |
||||||
|
|
||||||
|
// NewClientMetrics returns a ClientMetrics object. Use a new instance of
|
||||||
|
// ClientMetrics when not using the default Prometheus metrics registry, for
|
||||||
|
// example when wanting to control which metrics are added to a registry as
|
||||||
|
// opposed to automatically adding metrics via init functions.
|
||||||
|
func NewClientMetrics(counterOpts ...CounterOption) *ClientMetrics { |
||||||
|
opts := counterOptions(counterOpts) |
||||||
|
return &ClientMetrics{ |
||||||
|
clientStartedCounter: prom.NewCounterVec( |
||||||
|
opts.apply(prom.CounterOpts{ |
||||||
|
Name: "grpc_client_started_total", |
||||||
|
Help: "Total number of RPCs started on the client.", |
||||||
|
}), []string{"grpc_type", "grpc_service", "grpc_method"}), |
||||||
|
|
||||||
|
clientHandledCounter: prom.NewCounterVec( |
||||||
|
opts.apply(prom.CounterOpts{ |
||||||
|
Name: "grpc_client_handled_total", |
||||||
|
Help: "Total number of RPCs completed by the client, regardless of success or failure.", |
||||||
|
}), []string{"grpc_type", "grpc_service", "grpc_method", "grpc_code"}), |
||||||
|
|
||||||
|
clientStreamMsgReceived: prom.NewCounterVec( |
||||||
|
opts.apply(prom.CounterOpts{ |
||||||
|
Name: "grpc_client_msg_received_total", |
||||||
|
Help: "Total number of RPC stream messages received by the client.", |
||||||
|
}), []string{"grpc_type", "grpc_service", "grpc_method"}), |
||||||
|
|
||||||
|
clientStreamMsgSent: prom.NewCounterVec( |
||||||
|
opts.apply(prom.CounterOpts{ |
||||||
|
Name: "grpc_client_msg_sent_total", |
||||||
|
Help: "Total number of gRPC stream messages sent by the client.", |
||||||
|
}), []string{"grpc_type", "grpc_service", "grpc_method"}), |
||||||
|
|
||||||
|
clientHandledHistogramEnabled: false, |
||||||
|
clientHandledHistogramOpts: prom.HistogramOpts{ |
||||||
|
Name: "grpc_client_handling_seconds", |
||||||
|
Help: "Histogram of response latency (seconds) of the gRPC until it is finished by the application.", |
||||||
|
Buckets: prom.DefBuckets, |
||||||
|
}, |
||||||
|
clientHandledHistogram: nil, |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// Describe sends the super-set of all possible descriptors of metrics
|
||||||
|
// collected by this Collector to the provided channel and returns once
|
||||||
|
// the last descriptor has been sent.
|
||||||
|
func (m *ClientMetrics) Describe(ch chan<- *prom.Desc) { |
||||||
|
m.clientStartedCounter.Describe(ch) |
||||||
|
m.clientHandledCounter.Describe(ch) |
||||||
|
m.clientStreamMsgReceived.Describe(ch) |
||||||
|
m.clientStreamMsgSent.Describe(ch) |
||||||
|
if m.clientHandledHistogramEnabled { |
||||||
|
m.clientHandledHistogram.Describe(ch) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// Collect is called by the Prometheus registry when collecting
|
||||||
|
// metrics. The implementation sends each collected metric via the
|
||||||
|
// provided channel and returns once the last metric has been sent.
|
||||||
|
func (m *ClientMetrics) Collect(ch chan<- prom.Metric) { |
||||||
|
m.clientStartedCounter.Collect(ch) |
||||||
|
m.clientHandledCounter.Collect(ch) |
||||||
|
m.clientStreamMsgReceived.Collect(ch) |
||||||
|
m.clientStreamMsgSent.Collect(ch) |
||||||
|
if m.clientHandledHistogramEnabled { |
||||||
|
m.clientHandledHistogram.Collect(ch) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// EnableClientHandlingTimeHistogram turns on recording of handling time of RPCs.
|
||||||
|
// Histogram metrics can be very expensive for Prometheus to retain and query.
|
||||||
|
func (m *ClientMetrics) EnableClientHandlingTimeHistogram(opts ...HistogramOption) { |
||||||
|
for _, o := range opts { |
||||||
|
o(&m.clientHandledHistogramOpts) |
||||||
|
} |
||||||
|
if !m.clientHandledHistogramEnabled { |
||||||
|
m.clientHandledHistogram = prom.NewHistogramVec( |
||||||
|
m.clientHandledHistogramOpts, |
||||||
|
[]string{"grpc_type", "grpc_service", "grpc_method"}, |
||||||
|
) |
||||||
|
} |
||||||
|
m.clientHandledHistogramEnabled = true |
||||||
|
} |
||||||
|
|
||||||
|
// UnaryClientInterceptor is a gRPC client-side interceptor that provides Prometheus monitoring for Unary RPCs.
|
||||||
|
func (m *ClientMetrics) UnaryClientInterceptor() func(ctx context.Context, method string, req, reply interface{}, cc *grpc.ClientConn, invoker grpc.UnaryInvoker, opts ...grpc.CallOption) error { |
||||||
|
return func(ctx context.Context, method string, req, reply interface{}, cc *grpc.ClientConn, invoker grpc.UnaryInvoker, opts ...grpc.CallOption) error { |
||||||
|
monitor := newClientReporter(m, Unary, method) |
||||||
|
monitor.SentMessage() |
||||||
|
err := invoker(ctx, method, req, reply, cc, opts...) |
||||||
|
if err != nil { |
||||||
|
monitor.ReceivedMessage() |
||||||
|
} |
||||||
|
st, _ := status.FromError(err) |
||||||
|
monitor.Handled(st.Code()) |
||||||
|
return err |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// StreamClientInterceptor is a gRPC client-side interceptor that provides Prometheus monitoring for Streaming RPCs.
|
||||||
|
func (m *ClientMetrics) StreamClientInterceptor() func(ctx context.Context, desc *grpc.StreamDesc, cc *grpc.ClientConn, method string, streamer grpc.Streamer, opts ...grpc.CallOption) (grpc.ClientStream, error) { |
||||||
|
return func(ctx context.Context, desc *grpc.StreamDesc, cc *grpc.ClientConn, method string, streamer grpc.Streamer, opts ...grpc.CallOption) (grpc.ClientStream, error) { |
||||||
|
monitor := newClientReporter(m, clientStreamType(desc), method) |
||||||
|
clientStream, err := streamer(ctx, desc, cc, method, opts...) |
||||||
|
if err != nil { |
||||||
|
st, _ := status.FromError(err) |
||||||
|
monitor.Handled(st.Code()) |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
return &monitoredClientStream{clientStream, monitor}, nil |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
func clientStreamType(desc *grpc.StreamDesc) grpcType { |
||||||
|
if desc.ClientStreams && !desc.ServerStreams { |
||||||
|
return ClientStream |
||||||
|
} else if !desc.ClientStreams && desc.ServerStreams { |
||||||
|
return ServerStream |
||||||
|
} |
||||||
|
return BidiStream |
||||||
|
} |
||||||
|
|
||||||
|
// monitoredClientStream wraps grpc.ClientStream allowing each Sent/Recv of message to increment counters.
|
||||||
|
type monitoredClientStream struct { |
||||||
|
grpc.ClientStream |
||||||
|
monitor *clientReporter |
||||||
|
} |
||||||
|
|
||||||
|
func (s *monitoredClientStream) SendMsg(m interface{}) error { |
||||||
|
err := s.ClientStream.SendMsg(m) |
||||||
|
if err == nil { |
||||||
|
s.monitor.SentMessage() |
||||||
|
} |
||||||
|
return err |
||||||
|
} |
||||||
|
|
||||||
|
func (s *monitoredClientStream) RecvMsg(m interface{}) error { |
||||||
|
err := s.ClientStream.RecvMsg(m) |
||||||
|
if err == nil { |
||||||
|
s.monitor.ReceivedMessage() |
||||||
|
} else if err == io.EOF { |
||||||
|
s.monitor.Handled(codes.OK) |
||||||
|
} else { |
||||||
|
st, _ := status.FromError(err) |
||||||
|
s.monitor.Handled(st.Code()) |
||||||
|
} |
||||||
|
return err |
||||||
|
} |
||||||
@ -0,0 +1,46 @@ |
|||||||
|
// Copyright 2016 Michal Witkowski. All Rights Reserved.
|
||||||
|
// See LICENSE for licensing terms.
|
||||||
|
|
||||||
|
package grpc_prometheus |
||||||
|
|
||||||
|
import ( |
||||||
|
"time" |
||||||
|
|
||||||
|
"google.golang.org/grpc/codes" |
||||||
|
) |
||||||
|
|
||||||
|
type clientReporter struct { |
||||||
|
metrics *ClientMetrics |
||||||
|
rpcType grpcType |
||||||
|
serviceName string |
||||||
|
methodName string |
||||||
|
startTime time.Time |
||||||
|
} |
||||||
|
|
||||||
|
func newClientReporter(m *ClientMetrics, rpcType grpcType, fullMethod string) *clientReporter { |
||||||
|
r := &clientReporter{ |
||||||
|
metrics: m, |
||||||
|
rpcType: rpcType, |
||||||
|
} |
||||||
|
if r.metrics.clientHandledHistogramEnabled { |
||||||
|
r.startTime = time.Now() |
||||||
|
} |
||||||
|
r.serviceName, r.methodName = splitMethodName(fullMethod) |
||||||
|
r.metrics.clientStartedCounter.WithLabelValues(string(r.rpcType), r.serviceName, r.methodName).Inc() |
||||||
|
return r |
||||||
|
} |
||||||
|
|
||||||
|
func (r *clientReporter) ReceivedMessage() { |
||||||
|
r.metrics.clientStreamMsgReceived.WithLabelValues(string(r.rpcType), r.serviceName, r.methodName).Inc() |
||||||
|
} |
||||||
|
|
||||||
|
func (r *clientReporter) SentMessage() { |
||||||
|
r.metrics.clientStreamMsgSent.WithLabelValues(string(r.rpcType), r.serviceName, r.methodName).Inc() |
||||||
|
} |
||||||
|
|
||||||
|
func (r *clientReporter) Handled(code codes.Code) { |
||||||
|
r.metrics.clientHandledCounter.WithLabelValues(string(r.rpcType), r.serviceName, r.methodName, code.String()).Inc() |
||||||
|
if r.metrics.clientHandledHistogramEnabled { |
||||||
|
r.metrics.clientHandledHistogram.WithLabelValues(string(r.rpcType), r.serviceName, r.methodName).Observe(time.Since(r.startTime).Seconds()) |
||||||
|
} |
||||||
|
} |
||||||
@ -0,0 +1,16 @@ |
|||||||
|
SHELL="/bin/bash"
|
||||||
|
|
||||||
|
GOFILES_NOVENDOR = $(shell go list ./... | grep -v /vendor/)
|
||||||
|
|
||||||
|
all: vet fmt test |
||||||
|
|
||||||
|
fmt: |
||||||
|
go fmt $(GOFILES_NOVENDOR)
|
||||||
|
|
||||||
|
vet: |
||||||
|
go vet $(GOFILES_NOVENDOR)
|
||||||
|
|
||||||
|
test: vet |
||||||
|
./scripts/test_all.sh
|
||||||
|
|
||||||
|
.PHONY: all vet test |
||||||
@ -0,0 +1,41 @@ |
|||||||
|
package grpc_prometheus |
||||||
|
|
||||||
|
import ( |
||||||
|
prom "github.com/prometheus/client_golang/prometheus" |
||||||
|
) |
||||||
|
|
||||||
|
// A CounterOption lets you add options to Counter metrics using With* funcs.
|
||||||
|
type CounterOption func(*prom.CounterOpts) |
||||||
|
|
||||||
|
type counterOptions []CounterOption |
||||||
|
|
||||||
|
func (co counterOptions) apply(o prom.CounterOpts) prom.CounterOpts { |
||||||
|
for _, f := range co { |
||||||
|
f(&o) |
||||||
|
} |
||||||
|
return o |
||||||
|
} |
||||||
|
|
||||||
|
// WithConstLabels allows you to add ConstLabels to Counter metrics.
|
||||||
|
func WithConstLabels(labels prom.Labels) CounterOption { |
||||||
|
return func(o *prom.CounterOpts) { |
||||||
|
o.ConstLabels = labels |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// A HistogramOption lets you add options to Histogram metrics using With*
|
||||||
|
// funcs.
|
||||||
|
type HistogramOption func(*prom.HistogramOpts) |
||||||
|
|
||||||
|
// WithHistogramBuckets allows you to specify custom bucket ranges for histograms if EnableHandlingTimeHistogram is on.
|
||||||
|
func WithHistogramBuckets(buckets []float64) HistogramOption { |
||||||
|
return func(o *prom.HistogramOpts) { o.Buckets = buckets } |
||||||
|
} |
||||||
|
|
||||||
|
// WithHistogramConstLabels allows you to add custom ConstLabels to
|
||||||
|
// histograms metrics.
|
||||||
|
func WithHistogramConstLabels(labels prom.Labels) HistogramOption { |
||||||
|
return func(o *prom.HistogramOpts) { |
||||||
|
o.ConstLabels = labels |
||||||
|
} |
||||||
|
} |
||||||
@ -0,0 +1,48 @@ |
|||||||
|
// Copyright 2016 Michal Witkowski. All Rights Reserved.
|
||||||
|
// See LICENSE for licensing terms.
|
||||||
|
|
||||||
|
// gRPC Prometheus monitoring interceptors for server-side gRPC.
|
||||||
|
|
||||||
|
package grpc_prometheus |
||||||
|
|
||||||
|
import ( |
||||||
|
prom "github.com/prometheus/client_golang/prometheus" |
||||||
|
"google.golang.org/grpc" |
||||||
|
) |
||||||
|
|
||||||
|
var ( |
||||||
|
// DefaultServerMetrics is the default instance of ServerMetrics. It is
|
||||||
|
// intended to be used in conjunction the default Prometheus metrics
|
||||||
|
// registry.
|
||||||
|
DefaultServerMetrics = NewServerMetrics() |
||||||
|
|
||||||
|
// UnaryServerInterceptor is a gRPC server-side interceptor that provides Prometheus monitoring for Unary RPCs.
|
||||||
|
UnaryServerInterceptor = DefaultServerMetrics.UnaryServerInterceptor() |
||||||
|
|
||||||
|
// StreamServerInterceptor is a gRPC server-side interceptor that provides Prometheus monitoring for Streaming RPCs.
|
||||||
|
StreamServerInterceptor = DefaultServerMetrics.StreamServerInterceptor() |
||||||
|
) |
||||||
|
|
||||||
|
func init() { |
||||||
|
prom.MustRegister(DefaultServerMetrics.serverStartedCounter) |
||||||
|
prom.MustRegister(DefaultServerMetrics.serverHandledCounter) |
||||||
|
prom.MustRegister(DefaultServerMetrics.serverStreamMsgReceived) |
||||||
|
prom.MustRegister(DefaultServerMetrics.serverStreamMsgSent) |
||||||
|
} |
||||||
|
|
||||||
|
// Register takes a gRPC server and pre-initializes all counters to 0. This
|
||||||
|
// allows for easier monitoring in Prometheus (no missing metrics), and should
|
||||||
|
// be called *after* all services have been registered with the server. This
|
||||||
|
// function acts on the DefaultServerMetrics variable.
|
||||||
|
func Register(server *grpc.Server) { |
||||||
|
DefaultServerMetrics.InitializeMetrics(server) |
||||||
|
} |
||||||
|
|
||||||
|
// EnableHandlingTimeHistogram turns on recording of handling time
|
||||||
|
// of RPCs. Histogram metrics can be very expensive for Prometheus
|
||||||
|
// to retain and query. This function acts on the DefaultServerMetrics
|
||||||
|
// variable and the default Prometheus metrics registry.
|
||||||
|
func EnableHandlingTimeHistogram(opts ...HistogramOption) { |
||||||
|
DefaultServerMetrics.EnableHandlingTimeHistogram(opts...) |
||||||
|
prom.Register(DefaultServerMetrics.serverHandledHistogram) |
||||||
|
} |
||||||
@ -0,0 +1,185 @@ |
|||||||
|
package grpc_prometheus |
||||||
|
|
||||||
|
import ( |
||||||
|
prom "github.com/prometheus/client_golang/prometheus" |
||||||
|
"golang.org/x/net/context" |
||||||
|
"google.golang.org/grpc" |
||||||
|
"google.golang.org/grpc/status" |
||||||
|
) |
||||||
|
|
||||||
|
// ServerMetrics represents a collection of metrics to be registered on a
|
||||||
|
// Prometheus metrics registry for a gRPC server.
|
||||||
|
type ServerMetrics struct { |
||||||
|
serverStartedCounter *prom.CounterVec |
||||||
|
serverHandledCounter *prom.CounterVec |
||||||
|
serverStreamMsgReceived *prom.CounterVec |
||||||
|
serverStreamMsgSent *prom.CounterVec |
||||||
|
serverHandledHistogramEnabled bool |
||||||
|
serverHandledHistogramOpts prom.HistogramOpts |
||||||
|
serverHandledHistogram *prom.HistogramVec |
||||||
|
} |
||||||
|
|
||||||
|
// NewServerMetrics returns a ServerMetrics object. Use a new instance of
|
||||||
|
// ServerMetrics when not using the default Prometheus metrics registry, for
|
||||||
|
// example when wanting to control which metrics are added to a registry as
|
||||||
|
// opposed to automatically adding metrics via init functions.
|
||||||
|
func NewServerMetrics(counterOpts ...CounterOption) *ServerMetrics { |
||||||
|
opts := counterOptions(counterOpts) |
||||||
|
return &ServerMetrics{ |
||||||
|
serverStartedCounter: prom.NewCounterVec( |
||||||
|
opts.apply(prom.CounterOpts{ |
||||||
|
Name: "grpc_server_started_total", |
||||||
|
Help: "Total number of RPCs started on the server.", |
||||||
|
}), []string{"grpc_type", "grpc_service", "grpc_method"}), |
||||||
|
serverHandledCounter: prom.NewCounterVec( |
||||||
|
opts.apply(prom.CounterOpts{ |
||||||
|
Name: "grpc_server_handled_total", |
||||||
|
Help: "Total number of RPCs completed on the server, regardless of success or failure.", |
||||||
|
}), []string{"grpc_type", "grpc_service", "grpc_method", "grpc_code"}), |
||||||
|
serverStreamMsgReceived: prom.NewCounterVec( |
||||||
|
opts.apply(prom.CounterOpts{ |
||||||
|
Name: "grpc_server_msg_received_total", |
||||||
|
Help: "Total number of RPC stream messages received on the server.", |
||||||
|
}), []string{"grpc_type", "grpc_service", "grpc_method"}), |
||||||
|
serverStreamMsgSent: prom.NewCounterVec( |
||||||
|
opts.apply(prom.CounterOpts{ |
||||||
|
Name: "grpc_server_msg_sent_total", |
||||||
|
Help: "Total number of gRPC stream messages sent by the server.", |
||||||
|
}), []string{"grpc_type", "grpc_service", "grpc_method"}), |
||||||
|
serverHandledHistogramEnabled: false, |
||||||
|
serverHandledHistogramOpts: prom.HistogramOpts{ |
||||||
|
Name: "grpc_server_handling_seconds", |
||||||
|
Help: "Histogram of response latency (seconds) of gRPC that had been application-level handled by the server.", |
||||||
|
Buckets: prom.DefBuckets, |
||||||
|
}, |
||||||
|
serverHandledHistogram: nil, |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// EnableHandlingTimeHistogram enables histograms being registered when
|
||||||
|
// registering the ServerMetrics on a Prometheus registry. Histograms can be
|
||||||
|
// expensive on Prometheus servers. It takes options to configure histogram
|
||||||
|
// options such as the defined buckets.
|
||||||
|
func (m *ServerMetrics) EnableHandlingTimeHistogram(opts ...HistogramOption) { |
||||||
|
for _, o := range opts { |
||||||
|
o(&m.serverHandledHistogramOpts) |
||||||
|
} |
||||||
|
if !m.serverHandledHistogramEnabled { |
||||||
|
m.serverHandledHistogram = prom.NewHistogramVec( |
||||||
|
m.serverHandledHistogramOpts, |
||||||
|
[]string{"grpc_type", "grpc_service", "grpc_method"}, |
||||||
|
) |
||||||
|
} |
||||||
|
m.serverHandledHistogramEnabled = true |
||||||
|
} |
||||||
|
|
||||||
|
// Describe sends the super-set of all possible descriptors of metrics
|
||||||
|
// collected by this Collector to the provided channel and returns once
|
||||||
|
// the last descriptor has been sent.
|
||||||
|
func (m *ServerMetrics) Describe(ch chan<- *prom.Desc) { |
||||||
|
m.serverStartedCounter.Describe(ch) |
||||||
|
m.serverHandledCounter.Describe(ch) |
||||||
|
m.serverStreamMsgReceived.Describe(ch) |
||||||
|
m.serverStreamMsgSent.Describe(ch) |
||||||
|
if m.serverHandledHistogramEnabled { |
||||||
|
m.serverHandledHistogram.Describe(ch) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// Collect is called by the Prometheus registry when collecting
|
||||||
|
// metrics. The implementation sends each collected metric via the
|
||||||
|
// provided channel and returns once the last metric has been sent.
|
||||||
|
func (m *ServerMetrics) Collect(ch chan<- prom.Metric) { |
||||||
|
m.serverStartedCounter.Collect(ch) |
||||||
|
m.serverHandledCounter.Collect(ch) |
||||||
|
m.serverStreamMsgReceived.Collect(ch) |
||||||
|
m.serverStreamMsgSent.Collect(ch) |
||||||
|
if m.serverHandledHistogramEnabled { |
||||||
|
m.serverHandledHistogram.Collect(ch) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// UnaryServerInterceptor is a gRPC server-side interceptor that provides Prometheus monitoring for Unary RPCs.
|
||||||
|
func (m *ServerMetrics) UnaryServerInterceptor() func(ctx context.Context, req interface{}, info *grpc.UnaryServerInfo, handler grpc.UnaryHandler) (interface{}, error) { |
||||||
|
return func(ctx context.Context, req interface{}, info *grpc.UnaryServerInfo, handler grpc.UnaryHandler) (interface{}, error) { |
||||||
|
monitor := newServerReporter(m, Unary, info.FullMethod) |
||||||
|
monitor.ReceivedMessage() |
||||||
|
resp, err := handler(ctx, req) |
||||||
|
st, _ := status.FromError(err) |
||||||
|
monitor.Handled(st.Code()) |
||||||
|
if err == nil { |
||||||
|
monitor.SentMessage() |
||||||
|
} |
||||||
|
return resp, err |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// StreamServerInterceptor is a gRPC server-side interceptor that provides Prometheus monitoring for Streaming RPCs.
|
||||||
|
func (m *ServerMetrics) StreamServerInterceptor() func(srv interface{}, ss grpc.ServerStream, info *grpc.StreamServerInfo, handler grpc.StreamHandler) error { |
||||||
|
return func(srv interface{}, ss grpc.ServerStream, info *grpc.StreamServerInfo, handler grpc.StreamHandler) error { |
||||||
|
monitor := newServerReporter(m, streamRPCType(info), info.FullMethod) |
||||||
|
err := handler(srv, &monitoredServerStream{ss, monitor}) |
||||||
|
st, _ := status.FromError(err) |
||||||
|
monitor.Handled(st.Code()) |
||||||
|
return err |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// InitializeMetrics initializes all metrics, with their appropriate null
|
||||||
|
// value, for all gRPC methods registered on a gRPC server. This is useful, to
|
||||||
|
// ensure that all metrics exist when collecting and querying.
|
||||||
|
func (m *ServerMetrics) InitializeMetrics(server *grpc.Server) { |
||||||
|
serviceInfo := server.GetServiceInfo() |
||||||
|
for serviceName, info := range serviceInfo { |
||||||
|
for _, mInfo := range info.Methods { |
||||||
|
preRegisterMethod(m, serviceName, &mInfo) |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
func streamRPCType(info *grpc.StreamServerInfo) grpcType { |
||||||
|
if info.IsClientStream && !info.IsServerStream { |
||||||
|
return ClientStream |
||||||
|
} else if !info.IsClientStream && info.IsServerStream { |
||||||
|
return ServerStream |
||||||
|
} |
||||||
|
return BidiStream |
||||||
|
} |
||||||
|
|
||||||
|
// monitoredStream wraps grpc.ServerStream allowing each Sent/Recv of message to increment counters.
|
||||||
|
type monitoredServerStream struct { |
||||||
|
grpc.ServerStream |
||||||
|
monitor *serverReporter |
||||||
|
} |
||||||
|
|
||||||
|
func (s *monitoredServerStream) SendMsg(m interface{}) error { |
||||||
|
err := s.ServerStream.SendMsg(m) |
||||||
|
if err == nil { |
||||||
|
s.monitor.SentMessage() |
||||||
|
} |
||||||
|
return err |
||||||
|
} |
||||||
|
|
||||||
|
func (s *monitoredServerStream) RecvMsg(m interface{}) error { |
||||||
|
err := s.ServerStream.RecvMsg(m) |
||||||
|
if err == nil { |
||||||
|
s.monitor.ReceivedMessage() |
||||||
|
} |
||||||
|
return err |
||||||
|
} |
||||||
|
|
||||||
|
// preRegisterMethod is invoked on Register of a Server, allowing all gRPC services labels to be pre-populated.
|
||||||
|
func preRegisterMethod(metrics *ServerMetrics, serviceName string, mInfo *grpc.MethodInfo) { |
||||||
|
methodName := mInfo.Name |
||||||
|
methodType := string(typeFromMethodInfo(mInfo)) |
||||||
|
// These are just references (no increments), as just referencing will create the labels but not set values.
|
||||||
|
metrics.serverStartedCounter.GetMetricWithLabelValues(methodType, serviceName, methodName) |
||||||
|
metrics.serverStreamMsgReceived.GetMetricWithLabelValues(methodType, serviceName, methodName) |
||||||
|
metrics.serverStreamMsgSent.GetMetricWithLabelValues(methodType, serviceName, methodName) |
||||||
|
if metrics.serverHandledHistogramEnabled { |
||||||
|
metrics.serverHandledHistogram.GetMetricWithLabelValues(methodType, serviceName, methodName) |
||||||
|
} |
||||||
|
for _, code := range allCodes { |
||||||
|
metrics.serverHandledCounter.GetMetricWithLabelValues(methodType, serviceName, methodName, code.String()) |
||||||
|
} |
||||||
|
} |
||||||
@ -0,0 +1,46 @@ |
|||||||
|
// Copyright 2016 Michal Witkowski. All Rights Reserved.
|
||||||
|
// See LICENSE for licensing terms.
|
||||||
|
|
||||||
|
package grpc_prometheus |
||||||
|
|
||||||
|
import ( |
||||||
|
"time" |
||||||
|
|
||||||
|
"google.golang.org/grpc/codes" |
||||||
|
) |
||||||
|
|
||||||
|
type serverReporter struct { |
||||||
|
metrics *ServerMetrics |
||||||
|
rpcType grpcType |
||||||
|
serviceName string |
||||||
|
methodName string |
||||||
|
startTime time.Time |
||||||
|
} |
||||||
|
|
||||||
|
func newServerReporter(m *ServerMetrics, rpcType grpcType, fullMethod string) *serverReporter { |
||||||
|
r := &serverReporter{ |
||||||
|
metrics: m, |
||||||
|
rpcType: rpcType, |
||||||
|
} |
||||||
|
if r.metrics.serverHandledHistogramEnabled { |
||||||
|
r.startTime = time.Now() |
||||||
|
} |
||||||
|
r.serviceName, r.methodName = splitMethodName(fullMethod) |
||||||
|
r.metrics.serverStartedCounter.WithLabelValues(string(r.rpcType), r.serviceName, r.methodName).Inc() |
||||||
|
return r |
||||||
|
} |
||||||
|
|
||||||
|
func (r *serverReporter) ReceivedMessage() { |
||||||
|
r.metrics.serverStreamMsgReceived.WithLabelValues(string(r.rpcType), r.serviceName, r.methodName).Inc() |
||||||
|
} |
||||||
|
|
||||||
|
func (r *serverReporter) SentMessage() { |
||||||
|
r.metrics.serverStreamMsgSent.WithLabelValues(string(r.rpcType), r.serviceName, r.methodName).Inc() |
||||||
|
} |
||||||
|
|
||||||
|
func (r *serverReporter) Handled(code codes.Code) { |
||||||
|
r.metrics.serverHandledCounter.WithLabelValues(string(r.rpcType), r.serviceName, r.methodName, code.String()).Inc() |
||||||
|
if r.metrics.serverHandledHistogramEnabled { |
||||||
|
r.metrics.serverHandledHistogram.WithLabelValues(string(r.rpcType), r.serviceName, r.methodName).Observe(time.Since(r.startTime).Seconds()) |
||||||
|
} |
||||||
|
} |
||||||
@ -0,0 +1,50 @@ |
|||||||
|
// Copyright 2016 Michal Witkowski. All Rights Reserved.
|
||||||
|
// See LICENSE for licensing terms.
|
||||||
|
|
||||||
|
package grpc_prometheus |
||||||
|
|
||||||
|
import ( |
||||||
|
"strings" |
||||||
|
|
||||||
|
"google.golang.org/grpc" |
||||||
|
"google.golang.org/grpc/codes" |
||||||
|
) |
||||||
|
|
||||||
|
type grpcType string |
||||||
|
|
||||||
|
const ( |
||||||
|
Unary grpcType = "unary" |
||||||
|
ClientStream grpcType = "client_stream" |
||||||
|
ServerStream grpcType = "server_stream" |
||||||
|
BidiStream grpcType = "bidi_stream" |
||||||
|
) |
||||||
|
|
||||||
|
var ( |
||||||
|
allCodes = []codes.Code{ |
||||||
|
codes.OK, codes.Canceled, codes.Unknown, codes.InvalidArgument, codes.DeadlineExceeded, codes.NotFound, |
||||||
|
codes.AlreadyExists, codes.PermissionDenied, codes.Unauthenticated, codes.ResourceExhausted, |
||||||
|
codes.FailedPrecondition, codes.Aborted, codes.OutOfRange, codes.Unimplemented, codes.Internal, |
||||||
|
codes.Unavailable, codes.DataLoss, |
||||||
|
} |
||||||
|
) |
||||||
|
|
||||||
|
func splitMethodName(fullMethodName string) (string, string) { |
||||||
|
fullMethodName = strings.TrimPrefix(fullMethodName, "/") // remove leading slash
|
||||||
|
if i := strings.Index(fullMethodName, "/"); i >= 0 { |
||||||
|
return fullMethodName[:i], fullMethodName[i+1:] |
||||||
|
} |
||||||
|
return "unknown", "unknown" |
||||||
|
} |
||||||
|
|
||||||
|
func typeFromMethodInfo(mInfo *grpc.MethodInfo) grpcType { |
||||||
|
if !mInfo.IsClientStream && !mInfo.IsServerStream { |
||||||
|
return Unary |
||||||
|
} |
||||||
|
if mInfo.IsClientStream && !mInfo.IsServerStream { |
||||||
|
return ClientStream |
||||||
|
} |
||||||
|
if !mInfo.IsClientStream && mInfo.IsServerStream { |
||||||
|
return ServerStream |
||||||
|
} |
||||||
|
return BidiStream |
||||||
|
} |
||||||
Loading…
Reference in new issue