Update heap iterator to allow for entries with duplicate timestamps.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
pull/299/head
Tom Wilkie 7 years ago committed by Tom Wilkie
parent 26f3b52096
commit db36b3c74e
  1. 69
      pkg/iter/iterator.go
  2. 35
      pkg/iter/iterator_test.go

@ -5,6 +5,7 @@ import (
"fmt"
"io"
"regexp"
"sort"
"time"
"github.com/grafana/loki/pkg/helpers"
@ -95,8 +96,9 @@ type heapIterator struct {
heap.Interface
Peek() EntryIterator
}
curr EntryIterator
errs []error
currEntry logproto.Entry
currLabels string
errs []error
}
// NewHeapIterator returns a new iterator which uses a heap to merge together
@ -114,14 +116,14 @@ func NewHeapIterator(is []EntryIterator, direction logproto.Direction) EntryIter
// pre-next each iterator, drop empty.
for _, i := range is {
result.requeue(i)
result.requeue(i, false)
}
return result
}
func (i *heapIterator) requeue(ei EntryIterator) {
if ei.Next() {
func (i *heapIterator) requeue(ei EntryIterator, advanced bool) {
if advanced || ei.Next() {
heap.Push(i.heap, ei)
return
}
@ -133,38 +135,67 @@ func (i *heapIterator) requeue(ei EntryIterator) {
}
func (i *heapIterator) Next() bool {
if i.curr != nil {
i.requeue(i.curr)
}
if i.heap.Len() == 0 {
return false
}
i.curr = heap.Pop(i.heap).(EntryIterator)
currEntry := i.curr.Entry()
// keep popping entries off if they match, to dedupe
// We support multiple entries with the same timestamp, and we want to
// preserve their original order. We look at all the top entries in the
// heap with the same timestamp, and pop the ones whose common value
// occurs most often.
type tuple struct {
logproto.Entry
EntryIterator
}
tuples := make([]tuple, 0, i.heap.Len())
for i.heap.Len() > 0 {
next := i.heap.Peek()
nextEntry := next.Entry()
if !currEntry.Equal(nextEntry) {
entry := next.Entry()
if len(tuples) > 0 && !tuples[0].Timestamp.Equal(entry.Timestamp) {
break
}
next = heap.Pop(i.heap).(EntryIterator)
i.requeue(next)
heap.Pop(i.heap)
tuples = append(tuples, tuple{
Entry: entry,
EntryIterator: next,
})
}
// Find in entry which occurs most often which, due to quorum based
// replication, is guaranteed to be the correct next entry.
sort.Slice(tuples, func(i, j int) bool {
return tuples[i].Line < tuples[j].Line
})
i.currEntry = tuples[0].Entry
count, max := 1, 1
for j := 1; j < len(tuples); j++ {
if tuples[j].Equal(tuples[j-1]) {
count++
continue
}
if count > max {
i.currEntry = tuples[j-1].Entry
max = count
}
count++
}
// Requeue the iterators, only advancing them if they were not the
// correct pick.
for j := range tuples {
i.requeue(tuples[j].EntryIterator, tuples[j].Line != i.currEntry.Line)
}
return true
}
func (i *heapIterator) Entry() logproto.Entry {
return i.curr.Entry()
return i.currEntry
}
func (i *heapIterator) Labels() string {
return i.curr.Labels()
return i.currLabels
}
func (i *heapIterator) Error() error {

@ -34,9 +34,9 @@ func TestIterator(t *testing.T) {
// Test dedupe of overlapping iterators with the heap iterator.
{
iterator: NewHeapIterator([]EntryIterator{
mkStreamIterator(testSize, offset(0)),
mkStreamIterator(testSize, offset(testSize/2)),
mkStreamIterator(testSize, offset(testSize)),
mkStreamIterator(testSize, offset(0, identity)),
mkStreamIterator(testSize, offset(testSize/2, identity)),
mkStreamIterator(testSize, offset(testSize, identity)),
}, logproto.FORWARD),
generator: identity,
length: 2 * testSize,
@ -45,13 +45,24 @@ func TestIterator(t *testing.T) {
// Test dedupe of overlapping iterators with the heap iterator (backward).
{
iterator: NewHeapIterator([]EntryIterator{
mkStreamIterator(testSize, inverse(offset(0))),
mkStreamIterator(testSize, inverse(offset(-testSize/2))),
mkStreamIterator(testSize, inverse(offset(-testSize))),
mkStreamIterator(testSize, inverse(offset(0, identity))),
mkStreamIterator(testSize, inverse(offset(-testSize/2, identity))),
mkStreamIterator(testSize, inverse(offset(-testSize, identity))),
}, logproto.BACKWARD),
generator: inverse(identity),
length: 2 * testSize,
},
// Test dedupe of entries with the same timestamp but different entries.
{
iterator: NewHeapIterator([]EntryIterator{
mkStreamIterator(testSize, offset(0, constant(0))),
mkStreamIterator(testSize, offset(0, constant(0))),
mkStreamIterator(testSize, offset(testSize, constant(0))),
}, logproto.FORWARD),
generator: constant(0),
length: 2 * testSize,
},
} {
t.Run(fmt.Sprintf("%d", i), func(t *testing.T) {
for i := int64(0); i < tc.length; i++ {
@ -85,11 +96,17 @@ func identity(i int64) logproto.Entry {
}
}
func offset(j int64) generator {
func offset(j int64, g generator) generator {
return func(i int64) logproto.Entry {
return g(i + j)
}
}
func constant(t int64) generator {
return func(i int64) logproto.Entry {
return logproto.Entry{
Timestamp: time.Unix(i+j, 0),
Line: fmt.Sprintf("%d", i+j),
Timestamp: time.Unix(t, 0),
Line: fmt.Sprintf("%d", i),
}
}
}

Loading…
Cancel
Save