|
|
|
|
@ -17,6 +17,7 @@ |
|
|
|
|
package collector |
|
|
|
|
|
|
|
|
|
import ( |
|
|
|
|
"errors" |
|
|
|
|
"fmt" |
|
|
|
|
"log/slog" |
|
|
|
|
"os" |
|
|
|
|
@ -26,15 +27,17 @@ import ( |
|
|
|
|
"strconv" |
|
|
|
|
"sync" |
|
|
|
|
|
|
|
|
|
"golang.org/x/exp/maps" |
|
|
|
|
|
|
|
|
|
"github.com/alecthomas/kingpin/v2" |
|
|
|
|
"github.com/prometheus/client_golang/prometheus" |
|
|
|
|
"github.com/prometheus/procfs" |
|
|
|
|
"github.com/prometheus/procfs/sysfs" |
|
|
|
|
"golang.org/x/exp/maps" |
|
|
|
|
) |
|
|
|
|
|
|
|
|
|
type cpuCollector struct { |
|
|
|
|
fs procfs.FS |
|
|
|
|
procfs procfs.FS |
|
|
|
|
sysfs sysfs.FS |
|
|
|
|
cpu *prometheus.Desc |
|
|
|
|
cpuInfo *prometheus.Desc |
|
|
|
|
cpuFrequencyHz *prometheus.Desc |
|
|
|
|
@ -45,6 +48,7 @@ type cpuCollector struct { |
|
|
|
|
cpuPackageThrottle *prometheus.Desc |
|
|
|
|
cpuIsolated *prometheus.Desc |
|
|
|
|
logger *slog.Logger |
|
|
|
|
cpuOnline *prometheus.Desc |
|
|
|
|
cpuStats map[int64]procfs.CPUStat |
|
|
|
|
cpuStatsMutex sync.Mutex |
|
|
|
|
isolatedCpus []uint16 |
|
|
|
|
@ -70,17 +74,17 @@ func init() { |
|
|
|
|
|
|
|
|
|
// NewCPUCollector returns a new Collector exposing kernel/system statistics.
|
|
|
|
|
func NewCPUCollector(logger *slog.Logger) (Collector, error) { |
|
|
|
|
fs, err := procfs.NewFS(*procPath) |
|
|
|
|
pfs, err := procfs.NewFS(*procPath) |
|
|
|
|
if err != nil { |
|
|
|
|
return nil, fmt.Errorf("failed to open procfs: %w", err) |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
sysfs, err := sysfs.NewFS(*sysPath) |
|
|
|
|
sfs, err := sysfs.NewFS(*sysPath) |
|
|
|
|
if err != nil { |
|
|
|
|
return nil, fmt.Errorf("failed to open sysfs: %w", err) |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
isolcpus, err := sysfs.IsolatedCPUs() |
|
|
|
|
isolcpus, err := sfs.IsolatedCPUs() |
|
|
|
|
if err != nil { |
|
|
|
|
if !os.IsNotExist(err) { |
|
|
|
|
return nil, fmt.Errorf("Unable to get isolated cpus: %w", err) |
|
|
|
|
@ -89,8 +93,9 @@ func NewCPUCollector(logger *slog.Logger) (Collector, error) { |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
c := &cpuCollector{ |
|
|
|
|
fs: fs, |
|
|
|
|
cpu: nodeCPUSecondsDesc, |
|
|
|
|
procfs: pfs, |
|
|
|
|
sysfs: sfs, |
|
|
|
|
cpu: nodeCPUSecondsDesc, |
|
|
|
|
cpuInfo: prometheus.NewDesc( |
|
|
|
|
prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "info"), |
|
|
|
|
"CPU information from /proc/cpuinfo.", |
|
|
|
|
@ -131,6 +136,11 @@ func NewCPUCollector(logger *slog.Logger) (Collector, error) { |
|
|
|
|
"Whether each core is isolated, information from /sys/devices/system/cpu/isolated.", |
|
|
|
|
[]string{"cpu"}, nil, |
|
|
|
|
), |
|
|
|
|
cpuOnline: prometheus.NewDesc( |
|
|
|
|
prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "online"), |
|
|
|
|
"CPUs that are online and being scheduled.", |
|
|
|
|
[]string{"cpu"}, nil, |
|
|
|
|
), |
|
|
|
|
logger: logger, |
|
|
|
|
isolatedCpus: isolcpus, |
|
|
|
|
cpuStats: make(map[int64]procfs.CPUStat), |
|
|
|
|
@ -177,12 +187,21 @@ func (c *cpuCollector) Update(ch chan<- prometheus.Metric) error { |
|
|
|
|
if c.isolatedCpus != nil { |
|
|
|
|
c.updateIsolated(ch) |
|
|
|
|
} |
|
|
|
|
return c.updateThermalThrottle(ch) |
|
|
|
|
err := c.updateThermalThrottle(ch) |
|
|
|
|
if err != nil { |
|
|
|
|
return err |
|
|
|
|
} |
|
|
|
|
err = c.updateOnline(ch) |
|
|
|
|
if err != nil { |
|
|
|
|
return err |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
return nil |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// updateInfo reads /proc/cpuinfo
|
|
|
|
|
func (c *cpuCollector) updateInfo(ch chan<- prometheus.Metric) error { |
|
|
|
|
info, err := c.fs.CPUInfo() |
|
|
|
|
info, err := c.procfs.CPUInfo() |
|
|
|
|
if err != nil { |
|
|
|
|
return err |
|
|
|
|
} |
|
|
|
|
@ -333,9 +352,31 @@ func (c *cpuCollector) updateIsolated(ch chan<- prometheus.Metric) { |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// updateOnline reads /sys/devices/system/cpu/cpu*/online through sysfs and exports online status metrics.
|
|
|
|
|
func (c *cpuCollector) updateOnline(ch chan<- prometheus.Metric) error { |
|
|
|
|
cpus, err := c.sysfs.CPUs() |
|
|
|
|
if err != nil { |
|
|
|
|
return err |
|
|
|
|
} |
|
|
|
|
// No-op if the system does not support CPU online stats.
|
|
|
|
|
cpu0 := cpus[0] |
|
|
|
|
if _, err := cpu0.Online(); err != nil && errors.Is(err, os.ErrNotExist) { |
|
|
|
|
return nil |
|
|
|
|
} |
|
|
|
|
for _, cpu := range cpus { |
|
|
|
|
setOnline := float64(0) |
|
|
|
|
if online, _ := cpu.Online(); online { |
|
|
|
|
setOnline = 1 |
|
|
|
|
} |
|
|
|
|
ch <- prometheus.MustNewConstMetric(c.cpuOnline, prometheus.GaugeValue, setOnline, cpu.Number()) |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
return nil |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// updateStat reads /proc/stat through procfs and exports CPU-related metrics.
|
|
|
|
|
func (c *cpuCollector) updateStat(ch chan<- prometheus.Metric) error { |
|
|
|
|
stats, err := c.fs.Stat() |
|
|
|
|
stats, err := c.procfs.Stat() |
|
|
|
|
if err != nil { |
|
|
|
|
return err |
|
|
|
|
} |
|
|
|
|
|