Fix issue where queries can fail or omit OOO samples if OOO head compaction occurs between creating a querier and reading chunks (#13115)
* Add failing test. Signed-off-by: Charles Korn <charles.korn@grafana.com> * Don't run OOO head garbage collection while reads are running. Signed-off-by: Charles Korn <charles.korn@grafana.com> * Add further test cases for different order of operations. Signed-off-by: Charles Korn <charles.korn@grafana.com> * Ensure all queriers are closed if `DB.blockChunkQuerierForRange()` fails. Signed-off-by: Charles Korn <charles.korn@grafana.com> * Ensure all queriers are closed if `DB.Querier()` fails. Signed-off-by: Charles Korn <charles.korn@grafana.com> * Invert error handling in `DB.Querier()` and `DB.blockChunkQuerierForRange()` to make it clearer Signed-off-by: Charles Korn <charles.korn@grafana.com> * Ensure that queries that touch OOO data can't block OOO head garbage collection forever. Signed-off-by: Charles Korn <charles.korn@grafana.com> * Address PR feedback: fix parameter name in comment Co-authored-by: Jesus Vazquez <jesusvazquez@users.noreply.github.com> Signed-off-by: Charles Korn <charleskorn@users.noreply.github.com> * Address PR feedback: use `lastGarbageCollectedMmapRef` Signed-off-by: Charles Korn <charles.korn@grafana.com> * Address PR feedback: ensure pending reads are cleaned up if creating an OOO querier fails Signed-off-by: Charles Korn <charles.korn@grafana.com> --------- Signed-off-by: Charles Korn <charles.korn@grafana.com> Signed-off-by: Charles Korn <charleskorn@users.noreply.github.com> Co-authored-by: Jesus Vazquez <jesusvazquez@users.noreply.github.com>pull/13184/head
parent
35a15e8f04
commit
59844498f7
@ -0,0 +1,79 @@ |
||||
// Copyright 2023 The Prometheus Authors
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package tsdb |
||||
|
||||
import ( |
||||
"container/list" |
||||
"sync" |
||||
|
||||
"github.com/prometheus/prometheus/tsdb/chunks" |
||||
) |
||||
|
||||
type oooIsolation struct { |
||||
mtx sync.RWMutex |
||||
openReads *list.List |
||||
} |
||||
|
||||
type oooIsolationState struct { |
||||
i *oooIsolation |
||||
e *list.Element |
||||
|
||||
minRef chunks.ChunkDiskMapperRef |
||||
} |
||||
|
||||
func newOOOIsolation() *oooIsolation { |
||||
return &oooIsolation{ |
||||
openReads: list.New(), |
||||
} |
||||
} |
||||
|
||||
// HasOpenReadsAtOrBefore returns true if this oooIsolation is aware of any reads that use
|
||||
// chunks with reference at or before ref.
|
||||
func (i *oooIsolation) HasOpenReadsAtOrBefore(ref chunks.ChunkDiskMapperRef) bool { |
||||
i.mtx.RLock() |
||||
defer i.mtx.RUnlock() |
||||
|
||||
for e := i.openReads.Front(); e != nil; e = e.Next() { |
||||
s := e.Value.(*oooIsolationState) |
||||
|
||||
if ref.GreaterThan(s.minRef) { |
||||
return true |
||||
} |
||||
} |
||||
|
||||
return false |
||||
} |
||||
|
||||
// TrackReadAfter records a read that uses chunks with reference after minRef.
|
||||
//
|
||||
// The caller must ensure that the returned oooIsolationState is eventually closed when
|
||||
// the read is complete.
|
||||
func (i *oooIsolation) TrackReadAfter(minRef chunks.ChunkDiskMapperRef) *oooIsolationState { |
||||
s := &oooIsolationState{ |
||||
i: i, |
||||
minRef: minRef, |
||||
} |
||||
|
||||
i.mtx.Lock() |
||||
s.e = i.openReads.PushBack(s) |
||||
i.mtx.Unlock() |
||||
|
||||
return s |
||||
} |
||||
|
||||
func (s oooIsolationState) Close() { |
||||
s.i.mtx.Lock() |
||||
s.i.openReads.Remove(s.e) |
||||
s.i.mtx.Unlock() |
||||
} |
||||
@ -0,0 +1,60 @@ |
||||
// Copyright 2023 The Prometheus Authors
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package tsdb |
||||
|
||||
import ( |
||||
"testing" |
||||
|
||||
"github.com/stretchr/testify/require" |
||||
) |
||||
|
||||
func TestOOOIsolation(t *testing.T) { |
||||
i := newOOOIsolation() |
||||
|
||||
// Empty state shouldn't have any open reads.
|
||||
require.False(t, i.HasOpenReadsAtOrBefore(0)) |
||||
require.False(t, i.HasOpenReadsAtOrBefore(1)) |
||||
require.False(t, i.HasOpenReadsAtOrBefore(2)) |
||||
require.False(t, i.HasOpenReadsAtOrBefore(3)) |
||||
|
||||
// Add a read.
|
||||
read1 := i.TrackReadAfter(1) |
||||
require.False(t, i.HasOpenReadsAtOrBefore(0)) |
||||
require.False(t, i.HasOpenReadsAtOrBefore(1)) |
||||
require.True(t, i.HasOpenReadsAtOrBefore(2)) |
||||
|
||||
// Add another overlapping read.
|
||||
read2 := i.TrackReadAfter(0) |
||||
require.False(t, i.HasOpenReadsAtOrBefore(0)) |
||||
require.True(t, i.HasOpenReadsAtOrBefore(1)) |
||||
require.True(t, i.HasOpenReadsAtOrBefore(2)) |
||||
|
||||
// Close the second read, should now only report open reads for the first read's ref.
|
||||
read2.Close() |
||||
require.False(t, i.HasOpenReadsAtOrBefore(0)) |
||||
require.False(t, i.HasOpenReadsAtOrBefore(1)) |
||||
require.True(t, i.HasOpenReadsAtOrBefore(2)) |
||||
|
||||
// Close the second read again: this should do nothing and ensures we can safely call Close() multiple times.
|
||||
read2.Close() |
||||
require.False(t, i.HasOpenReadsAtOrBefore(0)) |
||||
require.False(t, i.HasOpenReadsAtOrBefore(1)) |
||||
require.True(t, i.HasOpenReadsAtOrBefore(2)) |
||||
|
||||
// Closing the first read should indicate no further open reads.
|
||||
read1.Close() |
||||
require.False(t, i.HasOpenReadsAtOrBefore(0)) |
||||
require.False(t, i.HasOpenReadsAtOrBefore(1)) |
||||
require.False(t, i.HasOpenReadsAtOrBefore(2)) |
||||
} |
||||
Loading…
Reference in new issue