From 6269f7502a80558da5c6ea2f40ef0c0261664321 Mon Sep 17 00:00:00 2001 From: Ed Schouten Date: Mon, 5 Dec 2016 11:37:35 +0100 Subject: [PATCH 1/7] Add a collector for DRBD. This collector exposes most of the useful information that can be found in /proc/drbd. Sizes are normalised to be in bytes, as /proc/drbd uses kibibytes. --- README.md | 1 + collector/drbd_linux.go | 211 ++++++++++++++++++++++++++++++ collector/fixtures/e2e-output.txt | 47 +++++++ collector/fixtures/proc/drbd | 5 + end-to-end-test.sh | 1 + 5 files changed, 265 insertions(+) create mode 100644 collector/drbd_linux.go create mode 100644 collector/fixtures/proc/drbd diff --git a/README.md b/README.md index 507851c1..69b5d8f4 100644 --- a/README.md +++ b/README.md @@ -42,6 +42,7 @@ Name | Description | OS ---------|-------------|---- bonding | Exposes the number of configured and active slaves of Linux bonding interfaces. | Linux devstat | Exposes device statistics | Dragonfly, FreeBSD +drbd | Exposes Distributed Replicated Block Device statistics | Linux gmond | Exposes statistics from Ganglia. | _any_ interrupts | Exposes detailed interrupts statistics. | Linux, OpenBSD ipvs | Exposes IPVS status from `/proc/net/ip_vs` and stats from `/proc/net/ip_vs_stats`. | Linux diff --git a/collector/drbd_linux.go b/collector/drbd_linux.go new file mode 100644 index 00000000..37cbe9cd --- /dev/null +++ b/collector/drbd_linux.go @@ -0,0 +1,211 @@ +// Copyright 2016 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package collector + +import ( + "bufio" + "fmt" + "os" + "strconv" + "strings" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/common/log" +) + +// Numerical metric provided by /proc/drbd. +type drbdNumericalMetric struct { + desc *prometheus.Desc + valueType prometheus.ValueType + multiplier float64 +} + +func newDrbdNumericalMetric(name string, desc string, valueType prometheus.ValueType, multiplier float64) drbdNumericalMetric { + return drbdNumericalMetric{ + desc: prometheus.NewDesc( + prometheus.BuildFQName(Namespace, "drbd", name), + desc, + []string{"device"}, nil), + valueType: valueType, + multiplier: multiplier, + } +} + +// String pair metric provided by /proc/drbd. +type drbdStringPairMetric struct { + desc *prometheus.Desc + valueOkay string +} + +func (metric *drbdStringPairMetric) isOkay(value string) float64 { + if value == metric.valueOkay { + return 1 + } + return 0 +} + +func newDrbdStringPairMetric(name string, desc string, valueOkay string) drbdStringPairMetric { + return drbdStringPairMetric{ + desc: prometheus.NewDesc( + prometheus.BuildFQName(Namespace, "drbd", name), + desc, + []string{"device", "node"}, nil), + valueOkay: valueOkay, + } +} + +var ( + drbdNumericalMetrics = map[string]drbdNumericalMetric{ + "ns": newDrbdNumericalMetric( + "network_sent_bytes", + "Volume of net data sent to the partner via the network connection.", + prometheus.CounterValue, + 1024), + "nr": newDrbdNumericalMetric( + "network_received_bytes", + "Volume of net data received by the partner via the network connection.", + prometheus.CounterValue, + 1), + "dw": newDrbdNumericalMetric( + "disk_written_bytes", + "Net data written on local hard disk.", + prometheus.CounterValue, + 1024), + "dr": newDrbdNumericalMetric( + "disk_read_bytes", + "Net data read from local hard disk.", + prometheus.CounterValue, + 1024), + "al": newDrbdNumericalMetric( + "activitylog_writes", + "Number of updates of the activity log area of the meta data.", + prometheus.CounterValue, + 1), + "bm": newDrbdNumericalMetric( + "bitmap_writes", + "Number of updates of the bitmap area of the meta data.", + prometheus.CounterValue, + 1), + "lo": newDrbdNumericalMetric( + "local_pending", + "Number of open requests to the local I/O sub-system.", + prometheus.GaugeValue, + 1), + "pe": newDrbdNumericalMetric( + "remote_pending", + "Number of requests sent to the partner, but that have not yet been answered by the latter.", + prometheus.GaugeValue, + 1), + "ua": newDrbdNumericalMetric( + "remote_unacknowledged", + "Number of requests received by the partner via the network connection, but that have not yet been answered.", + prometheus.GaugeValue, + 1), + "ap": newDrbdNumericalMetric( + "application_pending", + "Number of block I/O requests forwarded to DRBD, but not yet answered by DRBD.", + prometheus.GaugeValue, + 1), + "ep": newDrbdNumericalMetric( + "epochs", + "Number of Epochs currently on the fly.", + prometheus.GaugeValue, + 1), + "oos": newDrbdNumericalMetric( + "out_of_sync_bytes", + "Amount of data known to be out of sync.", + prometheus.GaugeValue, + 1024), + } + drbdStringPairMetrics = map[string]drbdStringPairMetric{ + "ro": newDrbdStringPairMetric( + "node_role_is_primary", + "Whether the role of the node is in the primary state.", + "Primary"), + "ds": newDrbdStringPairMetric( + "disk_state_is_up_to_date", + "Whether the disk of the node is up to date.", + "UpToDate"), + } + + drbdConnected = prometheus.NewDesc( + prometheus.BuildFQName(Namespace, "drbd", "connected"), + "Whether DRBD is connected to the partner.", + []string{"device"}, nil) +) + +type drbdCollector struct{} + +func init() { + Factories["drbd"] = NewDrbdCollector +} + +func NewDrbdCollector() (Collector, error) { + return &drbdCollector{}, nil +} + +func (c *drbdCollector) Update(ch chan<- prometheus.Metric) (err error) { + statsFile := procFilePath("drbd") + f, err := os.Open(statsFile) + if err != nil { + if os.IsNotExist(err) { + log.Debugf("Not collecting DRBD statistics, as %s does not exist: %s", statsFile) + return nil + } + return err + } + defer f.Close() + + scanner := bufio.NewScanner(f) + scanner.Split(bufio.ScanWords) + device := "unknown" + for scanner.Scan() { + field := scanner.Text() + if kv := strings.Split(field, ":"); len(kv) == 2 { + if id, err := strconv.ParseUint(kv[0], 10, 64); err == nil && kv[1] == "" { + device = fmt.Sprintf("drbd%d", id) + } else if metric, ok := drbdNumericalMetrics[kv[0]]; ok { + // Numerical value. + value, _ := strconv.ParseFloat(kv[1], 64) + ch <- prometheus.MustNewConstMetric( + metric.desc, metric.valueType, + value*metric.multiplier, device) + } else if metric, ok := drbdStringPairMetrics[kv[0]]; ok { + // String pair value. + values := strings.Split(kv[1], "/") + ch <- prometheus.MustNewConstMetric( + metric.desc, prometheus.GaugeValue, + metric.isOkay(values[0]), device, "local") + ch <- prometheus.MustNewConstMetric( + metric.desc, prometheus.GaugeValue, + metric.isOkay(values[1]), device, "remote") + } else if kv[0] == "cs" { + // Connection state. + var connected float64 = 0 + if kv[1] == "Connected" { + connected = 1 + } + ch <- prometheus.MustNewConstMetric( + drbdConnected, prometheus.GaugeValue, + connected, device) + + } else { + log.Infof("Don't know how to process key-value pair [%s: %s]", kv[0], kv[1]) + } + } else { + log.Infof("Don't know how to process string %s", field) + } + } + return nil +} diff --git a/collector/fixtures/e2e-output.txt b/collector/fixtures/e2e-output.txt index baf8f972..b7bf9641 100644 --- a/collector/fixtures/e2e-output.txt +++ b/collector/fixtures/e2e-output.txt @@ -375,6 +375,53 @@ node_disk_writes_merged{device="nvme0n1"} 43950 node_disk_writes_merged{device="sda"} 1.1134226e+07 node_disk_writes_merged{device="sr0"} 0 node_disk_writes_merged{device="vda"} 2.0711856e+07 +# HELP node_drbd_activitylog_writes Number of updates of the activity log area of the meta data. +# TYPE node_drbd_activitylog_writes counter +node_drbd_activitylog_writes{device="drbd1"} 1100 +# HELP node_drbd_application_pending Number of block I/O requests forwarded to DRBD, but not yet answered by DRBD. +# TYPE node_drbd_application_pending gauge +node_drbd_application_pending{device="drbd1"} 12348 +# HELP node_drbd_bitmap_writes Number of updates of the bitmap area of the meta data. +# TYPE node_drbd_bitmap_writes counter +node_drbd_bitmap_writes{device="drbd1"} 221 +# HELP node_drbd_connected Whether DRBD is connected to the partner. +# TYPE node_drbd_connected gauge +node_drbd_connected{device="drbd1"} 1 +# HELP node_drbd_disk_read_bytes Net data read from local hard disk. +# TYPE node_drbd_disk_read_bytes counter +node_drbd_disk_read_bytes{device="drbd1"} 1.2154539008e+11 +# HELP node_drbd_disk_state_is_up_to_date Whether the disk of the node is up to date. +# TYPE node_drbd_disk_state_is_up_to_date gauge +node_drbd_disk_state_is_up_to_date{device="drbd1",node="local"} 1 +node_drbd_disk_state_is_up_to_date{device="drbd1",node="remote"} 1 +# HELP node_drbd_disk_written_bytes Net data written on local hard disk. +# TYPE node_drbd_disk_written_bytes counter +node_drbd_disk_written_bytes{device="drbd1"} 2.8941845504e+10 +# HELP node_drbd_epochs Number of Epochs currently on the fly. +# TYPE node_drbd_epochs gauge +node_drbd_epochs{device="drbd1"} 1 +# HELP node_drbd_local_pending Number of open requests to the local I/O sub-system. +# TYPE node_drbd_local_pending gauge +node_drbd_local_pending{device="drbd1"} 12345 +# HELP node_drbd_network_received_bytes Volume of net data received by the partner via the network connection. +# TYPE node_drbd_network_received_bytes counter +node_drbd_network_received_bytes{device="drbd1"} 1.0961011e+07 +# HELP node_drbd_network_sent_bytes Volume of net data sent to the partner via the network connection. +# TYPE node_drbd_network_sent_bytes counter +node_drbd_network_sent_bytes{device="drbd1"} 1.7740228608e+10 +# HELP node_drbd_node_role_is_primary Whether the role of the node is in the primary state. +# TYPE node_drbd_node_role_is_primary gauge +node_drbd_node_role_is_primary{device="drbd1",node="local"} 1 +node_drbd_node_role_is_primary{device="drbd1",node="remote"} 1 +# HELP node_drbd_out_of_sync_bytes Amount of data known to be out of sync. +# TYPE node_drbd_out_of_sync_bytes gauge +node_drbd_out_of_sync_bytes{device="drbd1"} 1.2645376e+07 +# HELP node_drbd_remote_pending Number of requests sent to the partner, but that have not yet been answered by the latter. +# TYPE node_drbd_remote_pending gauge +node_drbd_remote_pending{device="drbd1"} 12346 +# HELP node_drbd_remote_unacknowledged Number of requests received by the partner via the network connection, but that have not yet been answered. +# TYPE node_drbd_remote_unacknowledged gauge +node_drbd_remote_unacknowledged{device="drbd1"} 12347 # HELP node_entropy_available_bits Bits of available entropy. # TYPE node_entropy_available_bits gauge node_entropy_available_bits 1337 diff --git a/collector/fixtures/proc/drbd b/collector/fixtures/proc/drbd new file mode 100644 index 00000000..77d16440 --- /dev/null +++ b/collector/fixtures/proc/drbd @@ -0,0 +1,5 @@ +version: 8.4.3 (api:1/proto:86-101) +srcversion: 1A9F77B1CA5FF92235C2213 + + 1: cs:Connected ro:Primary/Primary ds:UpToDate/UpToDate C r----- + ns:17324442 nr:10961011 dw:28263521 dr:118696670 al:1100 bm:221 lo:12345 pe:12346 ua:12347 ap:12348 ep:1 wo:d oos:12349 diff --git a/end-to-end-test.sh b/end-to-end-test.sh index ce6351e9..ecdab863 100755 --- a/end-to-end-test.sh +++ b/end-to-end-test.sh @@ -5,6 +5,7 @@ set -euf -o pipefail collectors=$(cat << COLLECTORS conntrack diskstats + drbd entropy filefd hwmon From 8c720baa58f55a3335089b1dc51af4cd5841dcf8 Mon Sep 17 00:00:00 2001 From: Ed Schouten Date: Sun, 11 Dec 2016 11:59:13 +0100 Subject: [PATCH 2/7] Add myself to the AUTHORS file. --- AUTHORS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS.md b/AUTHORS.md index 06e6bb39..d3c923ec 100644 --- a/AUTHORS.md +++ b/AUTHORS.md @@ -16,6 +16,7 @@ The following individuals have contributed code to this repository * Björn Rabenstein * Brian Brazil * Daniel Speichert +* Ed Schouten * Eric Ripa * Fabian Reinartz * Franklin Wise From 6ff620e387527363048c98e59b9db488c15b8d80 Mon Sep 17 00:00:00 2001 From: Ed Schouten Date: Fri, 16 Dec 2016 11:36:36 +0100 Subject: [PATCH 3/7] Properly propagate parse errors. --- collector/drbd_linux.go | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/collector/drbd_linux.go b/collector/drbd_linux.go index 37cbe9cd..a0a2684f 100644 --- a/collector/drbd_linux.go +++ b/collector/drbd_linux.go @@ -177,7 +177,10 @@ func (c *drbdCollector) Update(ch chan<- prometheus.Metric) (err error) { device = fmt.Sprintf("drbd%d", id) } else if metric, ok := drbdNumericalMetrics[kv[0]]; ok { // Numerical value. - value, _ := strconv.ParseFloat(kv[1], 64) + value, err := strconv.ParseFloat(kv[1], 64) + if err != nil { + return err + } ch <- prometheus.MustNewConstMetric( metric.desc, metric.valueType, value*metric.multiplier, device) @@ -199,7 +202,6 @@ func (c *drbdCollector) Update(ch chan<- prometheus.Metric) (err error) { ch <- prometheus.MustNewConstMetric( drbdConnected, prometheus.GaugeValue, connected, device) - } else { log.Infof("Don't know how to process key-value pair [%s: %s]", kv[0], kv[1]) } @@ -207,5 +209,5 @@ func (c *drbdCollector) Update(ch chan<- prometheus.Metric) (err error) { log.Infof("Don't know how to process string %s", field) } } - return nil + return scanner.Err() } From d1fa279105a638b0f0f9b41f94e012fe109d8498 Mon Sep 17 00:00:00 2001 From: Ed Schouten Date: Fri, 16 Dec 2016 11:45:14 +0100 Subject: [PATCH 4/7] Use a descriptive name for the file descriptor. --- collector/drbd_linux.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/collector/drbd_linux.go b/collector/drbd_linux.go index a0a2684f..f51186fc 100644 --- a/collector/drbd_linux.go +++ b/collector/drbd_linux.go @@ -157,7 +157,7 @@ func NewDrbdCollector() (Collector, error) { func (c *drbdCollector) Update(ch chan<- prometheus.Metric) (err error) { statsFile := procFilePath("drbd") - f, err := os.Open(statsFile) + file, err := os.Open(statsFile) if err != nil { if os.IsNotExist(err) { log.Debugf("Not collecting DRBD statistics, as %s does not exist: %s", statsFile) @@ -165,9 +165,9 @@ func (c *drbdCollector) Update(ch chan<- prometheus.Metric) (err error) { } return err } - defer f.Close() + defer file.Close() - scanner := bufio.NewScanner(f) + scanner := bufio.NewScanner(file) scanner.Split(bufio.ScanWords) device := "unknown" for scanner.Scan() { From b7daf276789ad75a9dbb17b36e8e6370868ab237 Mon Sep 17 00:00:00 2001 From: Ed Schouten Date: Thu, 22 Dec 2016 13:57:19 +0100 Subject: [PATCH 5/7] Process feedback from the code review. - Use the right number of printf() arguments. Use %q where it makes sense. - Use "DRBD" instead of "Drbd", per Go's style guide. - Add _total suffixes to counter metrics. - Mention the unit (bytes) in documentation strings once more. --- collector/drbd_linux.go | 66 +++++++++++++++---------------- collector/fixtures/e2e-output.txt | 38 +++++++++--------- 2 files changed, 52 insertions(+), 52 deletions(-) diff --git a/collector/drbd_linux.go b/collector/drbd_linux.go index f51186fc..d51bcec9 100644 --- a/collector/drbd_linux.go +++ b/collector/drbd_linux.go @@ -31,7 +31,7 @@ type drbdNumericalMetric struct { multiplier float64 } -func newDrbdNumericalMetric(name string, desc string, valueType prometheus.ValueType, multiplier float64) drbdNumericalMetric { +func newDRBDNumericalMetric(name string, desc string, valueType prometheus.ValueType, multiplier float64) drbdNumericalMetric { return drbdNumericalMetric{ desc: prometheus.NewDesc( prometheus.BuildFQName(Namespace, "drbd", name), @@ -55,7 +55,7 @@ func (metric *drbdStringPairMetric) isOkay(value string) float64 { return 0 } -func newDrbdStringPairMetric(name string, desc string, valueOkay string) drbdStringPairMetric { +func newDRBDStringPairMetric(name string, desc string, valueOkay string) drbdStringPairMetric { return drbdStringPairMetric{ desc: prometheus.NewDesc( prometheus.BuildFQName(Namespace, "drbd", name), @@ -67,73 +67,73 @@ func newDrbdStringPairMetric(name string, desc string, valueOkay string) drbdStr var ( drbdNumericalMetrics = map[string]drbdNumericalMetric{ - "ns": newDrbdNumericalMetric( - "network_sent_bytes", - "Volume of net data sent to the partner via the network connection.", + "ns": newDRBDNumericalMetric( + "network_sent_bytes_total", + "Volume of net data sent to the partner via the network connection; in bytes.", prometheus.CounterValue, 1024), - "nr": newDrbdNumericalMetric( - "network_received_bytes", - "Volume of net data received by the partner via the network connection.", + "nr": newDRBDNumericalMetric( + "network_received_bytes_total", + "Volume of net data received by the partner via the network connection; in bytes.", prometheus.CounterValue, 1), - "dw": newDrbdNumericalMetric( - "disk_written_bytes", - "Net data written on local hard disk.", + "dw": newDRBDNumericalMetric( + "disk_written_bytes_total", + "Net data written on local hard disk; in bytes.", prometheus.CounterValue, 1024), - "dr": newDrbdNumericalMetric( - "disk_read_bytes", - "Net data read from local hard disk.", + "dr": newDRBDNumericalMetric( + "disk_read_bytes_total", + "Net data read from local hard disk; in bytes.", prometheus.CounterValue, 1024), - "al": newDrbdNumericalMetric( - "activitylog_writes", + "al": newDRBDNumericalMetric( + "activitylog_writes_total", "Number of updates of the activity log area of the meta data.", prometheus.CounterValue, 1), - "bm": newDrbdNumericalMetric( - "bitmap_writes", + "bm": newDRBDNumericalMetric( + "bitmap_writes_total", "Number of updates of the bitmap area of the meta data.", prometheus.CounterValue, 1), - "lo": newDrbdNumericalMetric( + "lo": newDRBDNumericalMetric( "local_pending", "Number of open requests to the local I/O sub-system.", prometheus.GaugeValue, 1), - "pe": newDrbdNumericalMetric( + "pe": newDRBDNumericalMetric( "remote_pending", "Number of requests sent to the partner, but that have not yet been answered by the latter.", prometheus.GaugeValue, 1), - "ua": newDrbdNumericalMetric( + "ua": newDRBDNumericalMetric( "remote_unacknowledged", "Number of requests received by the partner via the network connection, but that have not yet been answered.", prometheus.GaugeValue, 1), - "ap": newDrbdNumericalMetric( + "ap": newDRBDNumericalMetric( "application_pending", "Number of block I/O requests forwarded to DRBD, but not yet answered by DRBD.", prometheus.GaugeValue, 1), - "ep": newDrbdNumericalMetric( + "ep": newDRBDNumericalMetric( "epochs", "Number of Epochs currently on the fly.", prometheus.GaugeValue, 1), - "oos": newDrbdNumericalMetric( + "oos": newDRBDNumericalMetric( "out_of_sync_bytes", - "Amount of data known to be out of sync.", + "Amount of data known to be out of sync; in bytes.", prometheus.GaugeValue, 1024), } drbdStringPairMetrics = map[string]drbdStringPairMetric{ - "ro": newDrbdStringPairMetric( + "ro": newDRBDStringPairMetric( "node_role_is_primary", "Whether the role of the node is in the primary state.", "Primary"), - "ds": newDrbdStringPairMetric( + "ds": newDRBDStringPairMetric( "disk_state_is_up_to_date", "Whether the disk of the node is up to date.", "UpToDate"), @@ -148,10 +148,10 @@ var ( type drbdCollector struct{} func init() { - Factories["drbd"] = NewDrbdCollector + Factories["drbd"] = newDRBDCollector } -func NewDrbdCollector() (Collector, error) { +func newDRBDCollector() (Collector, error) { return &drbdCollector{}, nil } @@ -160,7 +160,7 @@ func (c *drbdCollector) Update(ch chan<- prometheus.Metric) (err error) { file, err := os.Open(statsFile) if err != nil { if os.IsNotExist(err) { - log.Debugf("Not collecting DRBD statistics, as %s does not exist: %s", statsFile) + log.Debugf("Not collecting DRBD statistics, as %s does not exist: %s", statsFile, err) return nil } return err @@ -195,7 +195,7 @@ func (c *drbdCollector) Update(ch chan<- prometheus.Metric) (err error) { metric.isOkay(values[1]), device, "remote") } else if kv[0] == "cs" { // Connection state. - var connected float64 = 0 + var connected float64 if kv[1] == "Connected" { connected = 1 } @@ -203,10 +203,10 @@ func (c *drbdCollector) Update(ch chan<- prometheus.Metric) (err error) { drbdConnected, prometheus.GaugeValue, connected, device) } else { - log.Infof("Don't know how to process key-value pair [%s: %s]", kv[0], kv[1]) + log.Infof("Don't know how to process key-value pair [%s: %q]", kv[0], kv[1]) } } else { - log.Infof("Don't know how to process string %s", field) + log.Infof("Don't know how to process string %q", field) } } return scanner.Err() diff --git a/collector/fixtures/e2e-output.txt b/collector/fixtures/e2e-output.txt index b7bf9641..7ba713b3 100644 --- a/collector/fixtures/e2e-output.txt +++ b/collector/fixtures/e2e-output.txt @@ -375,45 +375,45 @@ node_disk_writes_merged{device="nvme0n1"} 43950 node_disk_writes_merged{device="sda"} 1.1134226e+07 node_disk_writes_merged{device="sr0"} 0 node_disk_writes_merged{device="vda"} 2.0711856e+07 -# HELP node_drbd_activitylog_writes Number of updates of the activity log area of the meta data. -# TYPE node_drbd_activitylog_writes counter -node_drbd_activitylog_writes{device="drbd1"} 1100 +# HELP node_drbd_activitylog_writes_total Number of updates of the activity log area of the meta data. +# TYPE node_drbd_activitylog_writes_total counter +node_drbd_activitylog_writes_total{device="drbd1"} 1100 # HELP node_drbd_application_pending Number of block I/O requests forwarded to DRBD, but not yet answered by DRBD. # TYPE node_drbd_application_pending gauge node_drbd_application_pending{device="drbd1"} 12348 -# HELP node_drbd_bitmap_writes Number of updates of the bitmap area of the meta data. -# TYPE node_drbd_bitmap_writes counter -node_drbd_bitmap_writes{device="drbd1"} 221 +# HELP node_drbd_bitmap_writes_total Number of updates of the bitmap area of the meta data. +# TYPE node_drbd_bitmap_writes_total counter +node_drbd_bitmap_writes_total{device="drbd1"} 221 # HELP node_drbd_connected Whether DRBD is connected to the partner. # TYPE node_drbd_connected gauge node_drbd_connected{device="drbd1"} 1 -# HELP node_drbd_disk_read_bytes Net data read from local hard disk. -# TYPE node_drbd_disk_read_bytes counter -node_drbd_disk_read_bytes{device="drbd1"} 1.2154539008e+11 +# HELP node_drbd_disk_read_bytes_total Net data read from local hard disk; in bytes. +# TYPE node_drbd_disk_read_bytes_total counter +node_drbd_disk_read_bytes_total{device="drbd1"} 1.2154539008e+11 # HELP node_drbd_disk_state_is_up_to_date Whether the disk of the node is up to date. # TYPE node_drbd_disk_state_is_up_to_date gauge node_drbd_disk_state_is_up_to_date{device="drbd1",node="local"} 1 node_drbd_disk_state_is_up_to_date{device="drbd1",node="remote"} 1 -# HELP node_drbd_disk_written_bytes Net data written on local hard disk. -# TYPE node_drbd_disk_written_bytes counter -node_drbd_disk_written_bytes{device="drbd1"} 2.8941845504e+10 +# HELP node_drbd_disk_written_bytes_total Net data written on local hard disk; in bytes. +# TYPE node_drbd_disk_written_bytes_total counter +node_drbd_disk_written_bytes_total{device="drbd1"} 2.8941845504e+10 # HELP node_drbd_epochs Number of Epochs currently on the fly. # TYPE node_drbd_epochs gauge node_drbd_epochs{device="drbd1"} 1 # HELP node_drbd_local_pending Number of open requests to the local I/O sub-system. # TYPE node_drbd_local_pending gauge node_drbd_local_pending{device="drbd1"} 12345 -# HELP node_drbd_network_received_bytes Volume of net data received by the partner via the network connection. -# TYPE node_drbd_network_received_bytes counter -node_drbd_network_received_bytes{device="drbd1"} 1.0961011e+07 -# HELP node_drbd_network_sent_bytes Volume of net data sent to the partner via the network connection. -# TYPE node_drbd_network_sent_bytes counter -node_drbd_network_sent_bytes{device="drbd1"} 1.7740228608e+10 +# HELP node_drbd_network_received_bytes_total Volume of net data received by the partner via the network connection; in bytes. +# TYPE node_drbd_network_received_bytes_total counter +node_drbd_network_received_bytes_total{device="drbd1"} 1.0961011e+07 +# HELP node_drbd_network_sent_bytes_total Volume of net data sent to the partner via the network connection; in bytes. +# TYPE node_drbd_network_sent_bytes_total counter +node_drbd_network_sent_bytes_total{device="drbd1"} 1.7740228608e+10 # HELP node_drbd_node_role_is_primary Whether the role of the node is in the primary state. # TYPE node_drbd_node_role_is_primary gauge node_drbd_node_role_is_primary{device="drbd1",node="local"} 1 node_drbd_node_role_is_primary{device="drbd1",node="remote"} 1 -# HELP node_drbd_out_of_sync_bytes Amount of data known to be out of sync. +# HELP node_drbd_out_of_sync_bytes Amount of data known to be out of sync; in bytes. # TYPE node_drbd_out_of_sync_bytes gauge node_drbd_out_of_sync_bytes{device="drbd1"} 1.2645376e+07 # HELP node_drbd_remote_pending Number of requests sent to the partner, but that have not yet been answered by the latter. From 4adf7fa96cee17741fdda415f29d19b3f4690f35 Mon Sep 17 00:00:00 2001 From: Ed Schouten Date: Fri, 23 Dec 2016 15:55:49 +0100 Subject: [PATCH 6/7] Improve the help strings, as proposed in the code review. --- collector/drbd_linux.go | 10 +++++----- collector/fixtures/e2e-output.txt | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/collector/drbd_linux.go b/collector/drbd_linux.go index d51bcec9..3c2e948e 100644 --- a/collector/drbd_linux.go +++ b/collector/drbd_linux.go @@ -69,12 +69,12 @@ var ( drbdNumericalMetrics = map[string]drbdNumericalMetric{ "ns": newDRBDNumericalMetric( "network_sent_bytes_total", - "Volume of net data sent to the partner via the network connection; in bytes.", + "Total number of bytes sent via the network.", prometheus.CounterValue, 1024), "nr": newDRBDNumericalMetric( "network_received_bytes_total", - "Volume of net data received by the partner via the network connection; in bytes.", + "Total number of bytes received via the network.", prometheus.CounterValue, 1), "dw": newDRBDNumericalMetric( @@ -104,12 +104,12 @@ var ( 1), "pe": newDRBDNumericalMetric( "remote_pending", - "Number of requests sent to the partner, but that have not yet been answered by the latter.", + "Number of requests sent to the peer, but that have not yet been answered by the latter.", prometheus.GaugeValue, 1), "ua": newDRBDNumericalMetric( "remote_unacknowledged", - "Number of requests received by the partner via the network connection, but that have not yet been answered.", + "Number of requests received by the peer via the network connection, but that have not yet been answered.", prometheus.GaugeValue, 1), "ap": newDRBDNumericalMetric( @@ -141,7 +141,7 @@ var ( drbdConnected = prometheus.NewDesc( prometheus.BuildFQName(Namespace, "drbd", "connected"), - "Whether DRBD is connected to the partner.", + "Whether DRBD is connected to the peer.", []string{"device"}, nil) ) diff --git a/collector/fixtures/e2e-output.txt b/collector/fixtures/e2e-output.txt index 7ba713b3..dfd24de9 100644 --- a/collector/fixtures/e2e-output.txt +++ b/collector/fixtures/e2e-output.txt @@ -384,7 +384,7 @@ node_drbd_application_pending{device="drbd1"} 12348 # HELP node_drbd_bitmap_writes_total Number of updates of the bitmap area of the meta data. # TYPE node_drbd_bitmap_writes_total counter node_drbd_bitmap_writes_total{device="drbd1"} 221 -# HELP node_drbd_connected Whether DRBD is connected to the partner. +# HELP node_drbd_connected Whether DRBD is connected to the peer. # TYPE node_drbd_connected gauge node_drbd_connected{device="drbd1"} 1 # HELP node_drbd_disk_read_bytes_total Net data read from local hard disk; in bytes. @@ -403,10 +403,10 @@ node_drbd_epochs{device="drbd1"} 1 # HELP node_drbd_local_pending Number of open requests to the local I/O sub-system. # TYPE node_drbd_local_pending gauge node_drbd_local_pending{device="drbd1"} 12345 -# HELP node_drbd_network_received_bytes_total Volume of net data received by the partner via the network connection; in bytes. +# HELP node_drbd_network_received_bytes_total Total number of bytes received via the network. # TYPE node_drbd_network_received_bytes_total counter node_drbd_network_received_bytes_total{device="drbd1"} 1.0961011e+07 -# HELP node_drbd_network_sent_bytes_total Volume of net data sent to the partner via the network connection; in bytes. +# HELP node_drbd_network_sent_bytes_total Total number of bytes sent via the network. # TYPE node_drbd_network_sent_bytes_total counter node_drbd_network_sent_bytes_total{device="drbd1"} 1.7740228608e+10 # HELP node_drbd_node_role_is_primary Whether the role of the node is in the primary state. @@ -416,10 +416,10 @@ node_drbd_node_role_is_primary{device="drbd1",node="remote"} 1 # HELP node_drbd_out_of_sync_bytes Amount of data known to be out of sync; in bytes. # TYPE node_drbd_out_of_sync_bytes gauge node_drbd_out_of_sync_bytes{device="drbd1"} 1.2645376e+07 -# HELP node_drbd_remote_pending Number of requests sent to the partner, but that have not yet been answered by the latter. +# HELP node_drbd_remote_pending Number of requests sent to the peer, but that have not yet been answered by the latter. # TYPE node_drbd_remote_pending gauge node_drbd_remote_pending{device="drbd1"} 12346 -# HELP node_drbd_remote_unacknowledged Number of requests received by the partner via the network connection, but that have not yet been answered. +# HELP node_drbd_remote_unacknowledged Number of requests received by the peer via the network connection, but that have not yet been answered. # TYPE node_drbd_remote_unacknowledged gauge node_drbd_remote_unacknowledged{device="drbd1"} 12347 # HELP node_entropy_available_bits Bits of available entropy. From b0d15eaac689e4922202cc6919c3a301384f535d Mon Sep 17 00:00:00 2001 From: Ed Schouten Date: Fri, 23 Dec 2016 15:57:46 +0100 Subject: [PATCH 7/7] Reduce the severity of these messages. They get printed all the time, as there are some tokens in the /proc file that we simply don't support. It's better to keep these as debugging messages, which may come in useful if new tags start to appear. --- collector/drbd_linux.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/collector/drbd_linux.go b/collector/drbd_linux.go index 3c2e948e..d5f4cd5e 100644 --- a/collector/drbd_linux.go +++ b/collector/drbd_linux.go @@ -203,10 +203,10 @@ func (c *drbdCollector) Update(ch chan<- prometheus.Metric) (err error) { drbdConnected, prometheus.GaugeValue, connected, device) } else { - log.Infof("Don't know how to process key-value pair [%s: %q]", kv[0], kv[1]) + log.Debugf("Don't know how to process key-value pair [%s: %q]", kv[0], kv[1]) } } else { - log.Infof("Don't know how to process string %q", field) + log.Debugf("Don't know how to process string %q", field) } } return scanner.Err()