Like Prometheus, but for logs.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
loki/clients/pkg/logentry/metric/metricvec.go

99 lines
2.8 KiB

package metric
import (
Fix promtail `metric` stage causing failure when getting extracted metrics (#7216) **What this PR does / why we need it**: Promtail allows ingested log entries to be modified before pushing them to Loki with the use of pipelines. One of the available pipeline "stages" is called `metrics`, and allows extracting metrics from incoming logs. These metrics are labeled with the labels available in the processed `Entry`. There's some edge cases in which if either a entry label is invalid, or it starts with the prometheus internal label prefix, `__`, calling the promtail `/metrics` fails. One way to easily reproduce this is by having the following configuration: ```yaml pipeline_stages: - static_labels: good_label: "1" - tenant: value: "2" - metrics: loki_count: type: Counter description: "should count all entries" config: match_all: true action: inc ``` Which exposes a `promtail_custom_loki_count` metrics that counts processed log entries. In this case, the step before the `metrics` one injects an internal label `__tenant_id__` to be used by the remote write client. Since this label starts with the prometheus reserved prefix, this will make calling the `metrics` endpoint fail with an error like the following: ``` An error has occurred while serving metrics: 1 error(s) occurred: * "__tenant_id__" is not a valid label name for metric "promtail_custom_loki_count" ``` This PR attempts to fix that by, before creating the corresponding metrics, dropping invalid or internal labels. That way promtail will just use labels that are valid. **Which issue(s) this PR fixes**: Related to https://github.com/grafana/cloud-onboarding/issues/2067 **Special notes for your reviewer**: **Checklist** - [ ] Reviewed the `CONTRIBUTING.md` guide - [ ] Documentation added - [x] Tests updated - [ ] `CHANGELOG.md` updated - [ ] Changes that require user attention or interaction to upgrade are documented in `docs/sources/upgrading/_index.md`
3 years ago
"strings"
"sync"
"time"
"github.com/grafana/loki/v3/pkg/util"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
)
// Expirable allows checking if something has exceeded the provided maxAge based on the provided currentTime
type Expirable interface {
HasExpired(currentTimeSec int64, maxAgeSec int64) bool
}
type metricVec struct {
factory func(labels map[string]string) prometheus.Metric
mtx sync.Mutex
metrics map[model.Fingerprint]prometheus.Metric
maxAgeSec int64
}
func newMetricVec(factory func(labels map[string]string) prometheus.Metric, maxAgeSec int64) *metricVec {
return &metricVec{
metrics: map[model.Fingerprint]prometheus.Metric{},
factory: factory,
maxAgeSec: maxAgeSec,
}
}
6 years ago
// Describe implements prometheus.Collector and doesn't declare any metrics on purpose to bypass prometheus validation.
// see https://godoc.org/github.com/prometheus/client_golang/prometheus#hdr-Custom_Collectors_and_constant_Metrics search for "unchecked"
func (c *metricVec) Describe(_ chan<- *prometheus.Desc) {}
6 years ago
// Collect implements prometheus.Collector
func (c *metricVec) Collect(ch chan<- prometheus.Metric) {
c.mtx.Lock()
defer c.mtx.Unlock()
for _, m := range c.metrics {
ch <- m
}
c.prune()
}
6 years ago
// With returns the metric associated with the labelset.
func (c *metricVec) With(labels model.LabelSet) prometheus.Metric {
c.mtx.Lock()
defer c.mtx.Unlock()
fp := labels.Fingerprint()
var ok bool
var metric prometheus.Metric
if metric, ok = c.metrics[fp]; !ok {
Fix promtail `metric` stage causing failure when getting extracted metrics (#7216) **What this PR does / why we need it**: Promtail allows ingested log entries to be modified before pushing them to Loki with the use of pipelines. One of the available pipeline "stages" is called `metrics`, and allows extracting metrics from incoming logs. These metrics are labeled with the labels available in the processed `Entry`. There's some edge cases in which if either a entry label is invalid, or it starts with the prometheus internal label prefix, `__`, calling the promtail `/metrics` fails. One way to easily reproduce this is by having the following configuration: ```yaml pipeline_stages: - static_labels: good_label: "1" - tenant: value: "2" - metrics: loki_count: type: Counter description: "should count all entries" config: match_all: true action: inc ``` Which exposes a `promtail_custom_loki_count` metrics that counts processed log entries. In this case, the step before the `metrics` one injects an internal label `__tenant_id__` to be used by the remote write client. Since this label starts with the prometheus reserved prefix, this will make calling the `metrics` endpoint fail with an error like the following: ``` An error has occurred while serving metrics: 1 error(s) occurred: * "__tenant_id__" is not a valid label name for metric "promtail_custom_loki_count" ``` This PR attempts to fix that by, before creating the corresponding metrics, dropping invalid or internal labels. That way promtail will just use labels that are valid. **Which issue(s) this PR fixes**: Related to https://github.com/grafana/cloud-onboarding/issues/2067 **Special notes for your reviewer**: **Checklist** - [ ] Reviewed the `CONTRIBUTING.md` guide - [ ] Documentation added - [x] Tests updated - [ ] `CHANGELOG.md` updated - [ ] Changes that require user attention or interaction to upgrade are documented in `docs/sources/upgrading/_index.md`
3 years ago
metric = c.factory(util.ModelLabelSetToMap(cleanLabels(labels)))
c.metrics[fp] = metric
}
return metric
}
Fix promtail `metric` stage causing failure when getting extracted metrics (#7216) **What this PR does / why we need it**: Promtail allows ingested log entries to be modified before pushing them to Loki with the use of pipelines. One of the available pipeline "stages" is called `metrics`, and allows extracting metrics from incoming logs. These metrics are labeled with the labels available in the processed `Entry`. There's some edge cases in which if either a entry label is invalid, or it starts with the prometheus internal label prefix, `__`, calling the promtail `/metrics` fails. One way to easily reproduce this is by having the following configuration: ```yaml pipeline_stages: - static_labels: good_label: "1" - tenant: value: "2" - metrics: loki_count: type: Counter description: "should count all entries" config: match_all: true action: inc ``` Which exposes a `promtail_custom_loki_count` metrics that counts processed log entries. In this case, the step before the `metrics` one injects an internal label `__tenant_id__` to be used by the remote write client. Since this label starts with the prometheus reserved prefix, this will make calling the `metrics` endpoint fail with an error like the following: ``` An error has occurred while serving metrics: 1 error(s) occurred: * "__tenant_id__" is not a valid label name for metric "promtail_custom_loki_count" ``` This PR attempts to fix that by, before creating the corresponding metrics, dropping invalid or internal labels. That way promtail will just use labels that are valid. **Which issue(s) this PR fixes**: Related to https://github.com/grafana/cloud-onboarding/issues/2067 **Special notes for your reviewer**: **Checklist** - [ ] Reviewed the `CONTRIBUTING.md` guide - [ ] Documentation added - [x] Tests updated - [ ] `CHANGELOG.md` updated - [ ] Changes that require user attention or interaction to upgrade are documented in `docs/sources/upgrading/_index.md`
3 years ago
// cleanLabels removes labels whose label name is not a valid prometheus one, or has the reserved `__` prefix.
func cleanLabels(set model.LabelSet) model.LabelSet {
out := make(model.LabelSet, len(set))
for k, v := range set {
// Performing the same label validity check the prometheus go client library does.
// https://github.com/prometheus/client_golang/blob/618194de6ad3db637313666104533639011b470d/prometheus/labels.go#L85
if !k.IsValid() || strings.HasPrefix(string(k), "__") {
continue
}
out[k] = v
}
return out
}
func (c *metricVec) Delete(labels model.LabelSet) bool {
c.mtx.Lock()
defer c.mtx.Unlock()
fp := labels.Fingerprint()
_, ok := c.metrics[fp]
if ok {
delete(c.metrics, fp)
}
return ok
}
// prune will remove all metrics which implement the Expirable interface and have expired
// it does not take out a lock on the metrics map so whoever calls this function should do so.
func (c *metricVec) prune() {
currentTimeSec := time.Now().Unix()
for fp, m := range c.metrics {
if em, ok := m.(Expirable); ok {
if em.HasExpired(currentTimeSec, c.maxAgeSec) {
delete(c.metrics, fp)
}
}
}
}