pull/3581/merge
Shirly Radco 2 weeks ago committed by GitHub
commit 107afb5e41
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
  1. 27
      README.md
  2. 143
      collector/dmmultipath_linux.go
  3. 151
      collector/dmmultipath_linux_test.go
  4. 168
      collector/fixtures/sys.ttar

@ -192,6 +192,7 @@ buddyinfo | Exposes statistics of memory fragments as reported by /proc/buddyinf
cgroups | A summary of the number of active and enabled cgroups | Linux
cpu\_vulnerabilities | Exposes CPU vulnerability information from sysfs. | Linux
devstat | Exposes device statistics | Dragonfly, FreeBSD
dmmultipath | Exposes DM-multipath device and path metrics from `/sys/block/dm-*`. | Linux
drm | Expose GPU metrics using sysfs / DRM, `amdgpu` is the only driver which exposes this information through DRM | Linux
drbd | Exposes Distributed Replicated Block Device statistics (to version 8.4) | Linux
ethtool | Exposes network interface information and network driver statistics equivalent to `ethtool`, `ethtool -S`, and `ethtool -i`. | Linux
@ -339,6 +340,32 @@ echo 'role{role="application_server"} 1' > /path/to/directory/role.prom.$$
mv /path/to/directory/role.prom.$$ /path/to/directory/role.prom
```
### DM-Multipath Collector
The `dmmultipath` collector reads `/sys/block/dm-*` to discover Device Mapper
multipath devices and expose path health metrics. It identifies multipath
devices by checking that `dm/uuid` starts with `mpath-`, which distinguishes
them from LVM or other DM device types.
No special permissions are required — the collector reads only world-readable
sysfs attributes.
Enable it with `--collector.dmmultipath`.
#### Exposed metrics
| Metric | Type | Description |
|--------|------|-------------|
| `node_dmmultipath_device_info` | Gauge | Info metric with `device`, `sysfs_name`, and `uuid` (contains WWID for PV correlation). |
| `node_dmmultipath_device_active` | Gauge | Whether the DM device is active (1) or suspended (0). Labels: `device`, `sysfs_name`. |
| `node_dmmultipath_device_size_bytes` | Gauge | Size of the DM device in bytes. Labels: `device`, `sysfs_name`. |
| `node_dmmultipath_device_paths` | Gauge | Number of paths. Labels: `device`, `sysfs_name`. |
| `node_dmmultipath_device_paths_active` | Gauge | Number of paths in active state (SCSI `running` or NVMe `live`). Labels: `device`, `sysfs_name`. |
| `node_dmmultipath_device_paths_failed` | Gauge | Number of paths not in active state. Labels: `device`, `sysfs_name`. |
| `node_dmmultipath_path_state` | Gauge | Reports the underlying device state for each path. Labels: `device`, `path`, `state`. |
The `sysfs_name` label (e.g. `dm-0`) matches the `device` label in `node_disk_*` metrics, enabling direct correlation between multipath health and I/O statistics without recording rules.
### Filtering enabled collectors
The `node_exporter` will expose all metrics from enabled collectors by default. This is the recommended way to collect metrics to avoid errors when comparing metrics of different families.

@ -0,0 +1,143 @@
// Copyright The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//go:build !nodmmultipath
package collector
import (
"errors"
"fmt"
"log/slog"
"os"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/procfs/blockdevice"
)
// isPathActive returns true for device states that indicate a healthy,
// usable path. This covers SCSI ("running") and NVMe ("live") devices.
func isPathActive(state string) bool {
return state == "running" || state == "live"
}
type dmMultipathCollector struct {
fs blockdevice.FS
logger *slog.Logger
deviceInfo *prometheus.Desc
deviceActive *prometheus.Desc
deviceSizeBytes *prometheus.Desc
devicePaths *prometheus.Desc
devicePathsActive *prometheus.Desc
devicePathsFailed *prometheus.Desc
pathState *prometheus.Desc
}
func init() {
registerCollector("dmmultipath", defaultDisabled, NewDMMultipathCollector)
}
// NewDMMultipathCollector returns a new Collector exposing Device Mapper
// multipath device metrics from /sys/block/dm-*.
func NewDMMultipathCollector(logger *slog.Logger) (Collector, error) {
const subsystem = "dmmultipath"
fs, err := blockdevice.NewFS(*procPath, *sysPath)
if err != nil {
return nil, fmt.Errorf("failed to open sysfs: %w", err)
}
deviceLabels := []string{"device", "sysfs_name"}
return &dmMultipathCollector{
fs: fs,
logger: logger,
deviceInfo: prometheus.NewDesc(
prometheus.BuildFQName(namespace, subsystem, "device_info"),
"Non-numeric information about a DM-multipath device.",
[]string{"device", "sysfs_name", "uuid"}, nil,
),
deviceActive: prometheus.NewDesc(
prometheus.BuildFQName(namespace, subsystem, "device_active"),
"Whether the multipath device-mapper device is active (1) or suspended (0).",
deviceLabels, nil,
),
deviceSizeBytes: prometheus.NewDesc(
prometheus.BuildFQName(namespace, subsystem, "device_size_bytes"),
"Size of the multipath device in bytes, read from /sys/block/<dm>/size.",
deviceLabels, nil,
),
devicePaths: prometheus.NewDesc(
prometheus.BuildFQName(namespace, subsystem, "device_paths"),
"Number of paths for a multipath device.",
deviceLabels, nil,
),
devicePathsActive: prometheus.NewDesc(
prometheus.BuildFQName(namespace, subsystem, "device_paths_active"),
"Number of paths in active state (SCSI running or NVMe live) for a multipath device.",
deviceLabels, nil,
),
devicePathsFailed: prometheus.NewDesc(
prometheus.BuildFQName(namespace, subsystem, "device_paths_failed"),
"Number of paths not in active state for a multipath device.",
deviceLabels, nil,
),
pathState: prometheus.NewDesc(
prometheus.BuildFQName(namespace, subsystem, "path_state"),
"Reports the underlying device state for a multipath path, as read from /sys/block/<dev>/device/state.",
[]string{"device", "path", "state"}, nil,
),
}, nil
}
func (c *dmMultipathCollector) Update(ch chan<- prometheus.Metric) error {
devices, err := c.fs.DMMultipathDevices()
if err != nil {
if errors.Is(err, os.ErrNotExist) || errors.Is(err, os.ErrPermission) {
c.logger.Debug("Could not read DM-multipath devices", "err", err)
return ErrNoData
}
return fmt.Errorf("failed to scan DM-multipath devices: %w", err)
}
for _, dev := range devices {
ch <- prometheus.MustNewConstMetric(c.deviceInfo, prometheus.GaugeValue, 1,
dev.Name, dev.SysfsName, dev.UUID)
active := 0.0
if !dev.Suspended {
active = 1.0
}
ch <- prometheus.MustNewConstMetric(c.deviceActive, prometheus.GaugeValue, active, dev.Name, dev.SysfsName)
ch <- prometheus.MustNewConstMetric(c.deviceSizeBytes, prometheus.GaugeValue, float64(dev.SizeBytes), dev.Name, dev.SysfsName)
var activePaths, failedPaths float64
for _, p := range dev.Paths {
if isPathActive(p.State) {
activePaths++
} else {
failedPaths++
}
ch <- prometheus.MustNewConstMetric(c.pathState, prometheus.GaugeValue, 1,
dev.Name, p.Device, p.State)
}
ch <- prometheus.MustNewConstMetric(c.devicePaths, prometheus.GaugeValue, float64(len(dev.Paths)), dev.Name, dev.SysfsName)
ch <- prometheus.MustNewConstMetric(c.devicePathsActive, prometheus.GaugeValue, activePaths, dev.Name, dev.SysfsName)
ch <- prometheus.MustNewConstMetric(c.devicePathsFailed, prometheus.GaugeValue, failedPaths, dev.Name, dev.SysfsName)
}
return nil
}

@ -0,0 +1,151 @@
// Copyright The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//go:build !nodmmultipath
package collector
import (
"io"
"log/slog"
"strings"
"testing"
"github.com/prometheus/client_golang/prometheus"
dto "github.com/prometheus/client_model/go"
)
func TestDMMultipathMetrics(t *testing.T) {
*procPath = "fixtures/proc"
*sysPath = "fixtures/sys"
logger := slog.New(slog.NewTextHandler(io.Discard, nil))
coll, err := NewDMMultipathCollector(logger)
if err != nil {
t.Fatal(err)
}
c := coll.(*dmMultipathCollector)
ch := make(chan prometheus.Metric, 200)
if err := c.Update(ch); err != nil {
t.Fatal(err)
}
close(ch)
metrics := make(map[string][]*dto.Metric)
for m := range ch {
d := &dto.Metric{}
if err := m.Write(d); err != nil {
t.Fatal(err)
}
desc := m.Desc().String()
metrics[desc] = append(metrics[desc], d)
}
assertGaugeValue(t, metrics, "device_active", labelMap{"device": "mpathA", "sysfs_name": "dm-5"}, 1)
assertGaugeValue(t, metrics, "device_active", labelMap{"device": "mpathB", "sysfs_name": "dm-6"}, 1)
assertGaugeValue(t, metrics, "device_size_bytes", labelMap{"device": "mpathA", "sysfs_name": "dm-5"}, 53687091200)
assertGaugeValue(t, metrics, `device_paths"`, labelMap{"device": "mpathA", "sysfs_name": "dm-5"}, 4)
assertGaugeValue(t, metrics, `device_paths"`, labelMap{"device": "mpathB", "sysfs_name": "dm-6"}, 2)
// mpathA: sdi, sdj, sdk are running; sdl is offline → 3 active, 1 failed.
assertGaugeValue(t, metrics, "device_paths_active", labelMap{"device": "mpathA", "sysfs_name": "dm-5"}, 3)
assertGaugeValue(t, metrics, "device_paths_failed", labelMap{"device": "mpathA", "sysfs_name": "dm-5"}, 1)
// mpathB: sdm, sdn are both running → 2 active, 0 failed.
assertGaugeValue(t, metrics, "device_paths_active", labelMap{"device": "mpathB", "sysfs_name": "dm-6"}, 2)
assertGaugeValue(t, metrics, "device_paths_failed", labelMap{"device": "mpathB", "sysfs_name": "dm-6"}, 0)
assertGaugeValue(t, metrics, "path_state",
labelMap{"device": "mpathA", "path": "sdi", "state": "running"}, 1)
assertGaugeValue(t, metrics, "path_state",
labelMap{"device": "mpathA", "path": "sdl", "state": "offline"}, 1)
}
func TestDMMultipathNoDevices(t *testing.T) {
*procPath = "fixtures/proc"
*sysPath = t.TempDir()
logger := slog.New(slog.NewTextHandler(io.Discard, nil))
coll, err := NewDMMultipathCollector(logger)
if err != nil {
t.Fatal(err)
}
c := coll.(*dmMultipathCollector)
ch := make(chan prometheus.Metric, 200)
err = c.Update(ch)
close(ch)
if err != ErrNoData {
t.Fatalf("expected ErrNoData, got %v", err)
}
}
func TestIsPathActive(t *testing.T) {
tests := []struct {
state string
active bool
}{
{"running", true},
{"live", true},
{"offline", false},
{"blocked", false},
{"transport-offline", false},
{"dead", false},
{"unknown", false},
{"", false},
}
for _, tc := range tests {
got := isPathActive(tc.state)
if got != tc.active {
t.Errorf("isPathActive(%q) = %v, want %v", tc.state, got, tc.active)
}
}
}
type labelMap map[string]string
func assertGaugeValue(t *testing.T, metrics map[string][]*dto.Metric, metricSubstring string, labels labelMap, expected float64) {
t.Helper()
for desc, ms := range metrics {
if !strings.Contains(desc, metricSubstring) {
continue
}
for _, m := range ms {
if matchLabels(m.GetLabel(), labels) {
got := m.GetGauge().GetValue()
if got != expected {
t.Errorf("%s%v: got %v, want %v", metricSubstring, labels, got, expected)
}
return
}
}
}
t.Errorf("metric %s%v not found", metricSubstring, labels)
}
func matchLabels(pairs []*dto.LabelPair, want labelMap) bool {
if want == nil {
return len(pairs) == 0
}
found := 0
for _, lp := range pairs {
if v, ok := want[lp.GetName()]; ok && v == lp.GetValue() {
found++
}
}
return found == len(want)
}

@ -826,6 +826,174 @@ Lines: 1
none
Mode: 644
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Directory: sys/block/dm-5
Mode: 755
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Directory: sys/block/dm-5/dm
Mode: 755
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Path: sys/block/dm-5/dm/name
Lines: 1
mpathAEOF
Mode: 644
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Path: sys/block/dm-5/dm/suspended
Lines: 1
0EOF
Mode: 644
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Path: sys/block/dm-5/dm/uuid
Lines: 1
mpath-3600508b1001c1234567890abcdef1234EOF
Mode: 644
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Path: sys/block/dm-5/size
Lines: 1
104857600EOF
Mode: 644
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Directory: sys/block/dm-5/slaves
Mode: 755
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Directory: sys/block/dm-5/slaves/sdi
Mode: 755
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Directory: sys/block/dm-5/slaves/sdj
Mode: 755
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Directory: sys/block/dm-5/slaves/sdk
Mode: 755
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Directory: sys/block/dm-5/slaves/sdl
Mode: 755
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Directory: sys/block/dm-6
Mode: 755
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Directory: sys/block/dm-6/dm
Mode: 755
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Path: sys/block/dm-6/dm/name
Lines: 1
mpathBEOF
Mode: 644
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Path: sys/block/dm-6/dm/suspended
Lines: 1
0EOF
Mode: 644
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Path: sys/block/dm-6/dm/uuid
Lines: 1
mpath-3600508b1001cabcdef4567890123456EOF
Mode: 644
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Path: sys/block/dm-6/size
Lines: 1
209715200EOF
Mode: 644
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Directory: sys/block/dm-6/slaves
Mode: 755
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Directory: sys/block/dm-6/slaves/sdm
Mode: 755
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Directory: sys/block/dm-6/slaves/sdn
Mode: 755
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Directory: sys/block/dm-7
Mode: 755
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Directory: sys/block/dm-7/dm
Mode: 755
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Path: sys/block/dm-7/dm/name
Lines: 1
vg0-rootEOF
Mode: 644
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Path: sys/block/dm-7/dm/suspended
Lines: 1
0EOF
Mode: 644
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Path: sys/block/dm-7/dm/uuid
Lines: 1
LVM-abcdef1234567890abcdef1234567890EOF
Mode: 644
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Path: sys/block/dm-7/size
Lines: 1
41943040EOF
Mode: 644
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Directory: sys/block/sdi
Mode: 755
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Directory: sys/block/sdi/device
Mode: 755
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Path: sys/block/sdi/device/state
Lines: 1
runningEOF
Mode: 644
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Directory: sys/block/sdj
Mode: 755
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Directory: sys/block/sdj/device
Mode: 755
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Path: sys/block/sdj/device/state
Lines: 1
runningEOF
Mode: 644
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Directory: sys/block/sdk
Mode: 755
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Directory: sys/block/sdk/device
Mode: 755
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Path: sys/block/sdk/device/state
Lines: 1
runningEOF
Mode: 644
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Directory: sys/block/sdl
Mode: 755
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Directory: sys/block/sdl/device
Mode: 755
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Path: sys/block/sdl/device/state
Lines: 1
offlineEOF
Mode: 644
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Directory: sys/block/sdm
Mode: 755
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Directory: sys/block/sdm/device
Mode: 755
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Path: sys/block/sdm/device/state
Lines: 1
runningEOF
Mode: 644
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Directory: sys/block/sdn
Mode: 755
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Directory: sys/block/sdn/device
Mode: 755
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Path: sys/block/sdn/device/state
Lines: 1
runningEOF
Mode: 644
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Directory: sys/bus
Mode: 755
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Loading…
Cancel
Save