diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d1f6f22e67..992b9c8220 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -33,6 +33,7 @@ jobs: - uses: prometheus/promci@443c7fc2397e946bc9f5029e313a9c3441b9b86d # v0.4.7 - uses: ./.github/promci/actions/setup_environment - run: go test --tags=dedupelabels ./... + - run: go test -race ./cmd/prometheus - run: GOARCH=386 go test ./... - uses: ./.github/promci/actions/check_proto with: diff --git a/cmd/prometheus/main_test.go b/cmd/prometheus/main_test.go index 0d0ab56eb4..3083be841c 100644 --- a/cmd/prometheus/main_test.go +++ b/cmd/prometheus/main_test.go @@ -20,6 +20,7 @@ import ( "fmt" "io" "math" + "net/http" "os" "os/exec" "path/filepath" @@ -33,6 +34,7 @@ import ( "github.com/alecthomas/kingpin/v2" "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/common/expfmt" "github.com/prometheus/common/model" "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" @@ -41,6 +43,7 @@ import ( "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/notifier" "github.com/prometheus/prometheus/rules" + "github.com/prometheus/prometheus/util/testutil" ) func init() { @@ -646,3 +649,106 @@ func TestRwProtoMsgFlagParser(t *testing.T) { }) } } + +func getMetricValue(t *testing.T, body io.Reader, metricType model.MetricType, metricName string) (float64, error) { + t.Helper() + + p := expfmt.TextParser{} + metricFamilies, err := p.TextToMetricFamilies(body) + if err != nil { + return 0, err + } + metricFamily, ok := metricFamilies[metricName] + if !ok { + return 0, errors.New("metric family not found") + } + metric := metricFamily.GetMetric() + if len(metric) != 1 { + return 0, errors.New("metric not found") + } + switch metricType { + case model.MetricTypeGauge: + return metric[0].GetGauge().GetValue(), nil + case model.MetricTypeCounter: + return metric[0].GetCounter().GetValue(), nil + default: + t.Fatalf("metric type %s not supported", metricType) + } + + return 0, errors.New("cannot get value") +} + +// TestHeadCompactionWhileScraping verifies that running a head compaction +// concurrently with a scrape does not trigger the data race described in +// https://github.com/prometheus/prometheus/issues/16490. +func TestHeadCompactionWhileScraping(t *testing.T) { + t.Parallel() + + // To increase the chance of reproducing the data race + for i := range 5 { + t.Run(strconv.Itoa(i), func(t *testing.T) { + t.Parallel() + + tmpDir := t.TempDir() + configFile := filepath.Join(tmpDir, "prometheus.yml") + + port := testutil.RandomUnprivilegedPort(t) + config := fmt.Sprintf(` +scrape_configs: + - job_name: 'self1' + scrape_interval: 61ms + static_configs: + - targets: ['localhost:%d'] + - job_name: 'self2' + scrape_interval: 67ms + static_configs: + - targets: ['localhost:%d'] +`, port, port) + os.WriteFile(configFile, []byte(config), 0o777) + + prom := prometheusCommandWithLogging( + t, + configFile, + port, + fmt.Sprintf("--storage.tsdb.path=%s", tmpDir), + "--storage.tsdb.min-block-duration=100ms", + ) + require.NoError(t, prom.Start()) + + require.Eventually(t, func() bool { + r, err := http.Get(fmt.Sprintf("http://127.0.0.1:%d/metrics", port)) + if err != nil { + return false + } + defer r.Body.Close() + if r.StatusCode != http.StatusOK { + return false + } + metrics, err := io.ReadAll(r.Body) + if err != nil { + return false + } + + // Wait for some compactions to run + compactions, err := getMetricValue(t, bytes.NewReader(metrics), model.MetricTypeCounter, "prometheus_tsdb_compactions_total") + if err != nil { + return false + } + if compactions < 3 { + return false + } + + // Sanity check: Some actual scraping was done. + series, err := getMetricValue(t, bytes.NewReader(metrics), model.MetricTypeCounter, "prometheus_tsdb_head_series_created_total") + require.NoError(t, err) + require.NotZero(t, series) + + // No compaction must have failed + failures, err := getMetricValue(t, bytes.NewReader(metrics), model.MetricTypeCounter, "prometheus_tsdb_compactions_failed_total") + require.NoError(t, err) + require.Zero(t, failures) + return true + }, 15*time.Second, 500*time.Millisecond) + }) + } +} diff --git a/cmd/prometheus/reload_test.go b/cmd/prometheus/reload_test.go index 18a7ff2ad1..c59e51b316 100644 --- a/cmd/prometheus/reload_test.go +++ b/cmd/prometheus/reload_test.go @@ -119,7 +119,8 @@ func runTestSteps(t *testing.T, steps []struct { require.NoError(t, os.WriteFile(configFilePath, []byte(steps[0].configText), 0o644), "Failed to write initial config file") port := testutil.RandomUnprivilegedPort(t) - runPrometheusWithLogging(t, configFilePath, port) + prom := prometheusCommandWithLogging(t, configFilePath, port, "--enable-feature=auto-reload-config", "--config.auto-reload-interval=1s") + require.NoError(t, prom.Start()) baseURL := "http://localhost:" + strconv.Itoa(port) require.Eventually(t, func() bool { @@ -197,14 +198,20 @@ func captureLogsToTLog(t *testing.T, r io.Reader) { } } -func runPrometheusWithLogging(t *testing.T, configFilePath string, port int) { +func prometheusCommandWithLogging(t *testing.T, configFilePath string, port int, extraArgs ...string) *exec.Cmd { stdoutPipe, stdoutWriter := io.Pipe() stderrPipe, stderrWriter := io.Pipe() var wg sync.WaitGroup wg.Add(2) - prom := exec.Command(promPath, "-test.main", "--enable-feature=auto-reload-config", "--config.file="+configFilePath, "--config.auto-reload-interval=1s", "--web.listen-address=0.0.0.0:"+strconv.Itoa(port)) + args := []string{ + "-test.main", + "--config.file=" + configFilePath, + "--web.listen-address=0.0.0.0:" + strconv.Itoa(port), + } + args = append(args, extraArgs...) + prom := exec.Command(promPath, args...) prom.Stdout = stdoutWriter prom.Stderr = stderrWriter @@ -224,6 +231,5 @@ func runPrometheusWithLogging(t *testing.T, configFilePath string, port int) { stderrWriter.Close() wg.Wait() }) - - require.NoError(t, prom.Start()) + return prom } diff --git a/model/textparse/openmetricsparse.go b/model/textparse/openmetricsparse.go index 602fe5b5ae..be3ec67cac 100644 --- a/model/textparse/openmetricsparse.go +++ b/model/textparse/openmetricsparse.go @@ -199,7 +199,9 @@ func (p *OpenMetricsParser) Comment() []byte { // Labels writes the labels of the current sample into the passed labels. func (p *OpenMetricsParser) Labels(l *labels.Labels) { - s := yoloString(p.series) + // Defensive copy in case the following keeps a reference. + // See https://github.com/prometheus/prometheus/issues/16490 + s := string(p.series) p.builder.Reset() metricName := unreplace(s[p.offsets[0]-p.start : p.offsets[1]-p.start]) diff --git a/model/textparse/promparse.go b/model/textparse/promparse.go index 4ecd93c37b..c4dcb4aee3 100644 --- a/model/textparse/promparse.go +++ b/model/textparse/promparse.go @@ -225,8 +225,9 @@ func (p *PromParser) Comment() []byte { // Labels writes the labels of the current sample into the passed labels. func (p *PromParser) Labels(l *labels.Labels) { - s := yoloString(p.series) - + // Defensive copy in case the following keeps a reference. + // See https://github.com/prometheus/prometheus/issues/16490 + s := string(p.series) p.builder.Reset() metricName := unreplace(s[p.offsets[0]-p.start : p.offsets[1]-p.start]) p.builder.Add(labels.MetricName, metricName)