Add metrics for gcplog scrape. (#4235)

* Add metrics for gcplog scrape.

Also fix the Ready() method of target

* Fix typo with help message
pull/4223/head^2
Kaviraj 4 years ago committed by GitHub
parent b0646e7156
commit b36bc5ab32
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 11
      clients/pkg/promtail/targets/gcplog/metrics.go
  2. 10
      clients/pkg/promtail/targets/gcplog/target.go

@ -7,8 +7,9 @@ type Metrics struct {
// reg is the Registerer used to create this set of metrics.
reg prometheus.Registerer
gcplogEntries *prometheus.CounterVec
gcplogErrors *prometheus.CounterVec
gcplogEntries *prometheus.CounterVec
gcplogErrors *prometheus.CounterVec
gcplogTargetLastSuccessScrape *prometheus.GaugeVec
}
// NewMetrics creates a new set of metrics. Metrics will be registered to reg.
@ -28,6 +29,12 @@ func NewMetrics(reg prometheus.Registerer) *Metrics {
Help: "Total number of parsing errors while receiving gcplog messages",
}, []string{"project"})
m.gcplogTargetLastSuccessScrape = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Namespace: "promtail",
Name: "gcplog_target_last_success_scrape",
Help: "Timestamp of the specific target's last successful poll",
}, []string{"project", "target"})
reg.MustRegister(m.gcplogEntries, m.gcplogErrors)
return &m
}

@ -108,9 +108,9 @@ func (t *GcplogTarget) run() error {
t.msgs <- m
})
if err != nil {
// TODO(kavi): Add proper error propagation maybe?
level.Error(t.logger).Log("error", err)
level.Error(t.logger).Log("msg", "failed to receive pubsub messages", "error", err)
t.metrics.gcplogErrors.WithLabelValues(t.config.ProjectID).Inc()
t.metrics.gcplogTargetLastSuccessScrape.WithLabelValues(t.config.ProjectID, t.config.Subscription).SetToCurrentTime()
}
}()
@ -138,7 +138,11 @@ func (t *GcplogTarget) Type() target.TargetType {
}
func (t *GcplogTarget) Ready() bool {
return t.ctx.Err() == nil
// Return true just like all other targets.
// Rationale is gcplog scraping shouldn't stop because of some transient timeout errors.
// This transient failure can cause promtail readyness probe to fail which may prevent pod from starting.
// We have metrics now to track if scraping failed (`gcplog_target_last_success_scrape`).
return true
}
func (t *GcplogTarget) DiscoveredLabels() model.LabelSet {

Loading…
Cancel
Save