Fix `increment_duplicate_timestamps` for multiple dupped timestamps (#6924)

pull/6938/head
Dylan Guedes 3 years ago committed by GitHub
parent 72d8627a55
commit 2d7ef07494
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 14
      pkg/distributor/distributor.go
  2. 27
      pkg/distributor/distributor_test.go
  3. 2
      pkg/distributor/validator.go

@ -205,6 +205,7 @@ type pushTracker struct {
}
// Push a set of streams.
// The returned error is the last one seen.
func (d *Distributor) Push(ctx context.Context, req *logproto.PushRequest) (*logproto.PushResponse, error) {
userID, err := tenant.TenantID(ctx)
if err != nil {
@ -260,12 +261,12 @@ func (d *Distributor) Push(ctx context.Context, req *logproto.PushRequest) (*log
// If configured for this tenant, increment duplicate timestamps. Note, this is imperfect
// since Loki will accept out of order writes it doesn't account for separate
// pushes with overlapping time ranges having entries with duplicate timestamps
if validationContext.incrementDuplicateTimestamps && n != 0 && stream.Entries[n-1].Timestamp.Equal(entry.Timestamp) {
if validationContext.incrementDuplicateTimestamps && n != 0 {
// Traditional logic for Loki is that 2 lines with the same timestamp and
// exact same content will be de-duplicated, (i.e. only one will be stored, others dropped)
// To maintain this behavior, only increment the timestamp if the log content is different
if stream.Entries[n-1].Line != entry.Line {
stream.Entries[n].Timestamp = entry.Timestamp.Add(1 * time.Nanosecond)
stream.Entries[n].Timestamp = maxT(entry.Timestamp, stream.Entries[n-1].Timestamp.Add(1*time.Nanosecond))
}
}
@ -338,6 +339,15 @@ func (d *Distributor) Push(ctx context.Context, req *logproto.PushRequest) (*log
}
}
// maxT returns the highest between two given timestamps.
func maxT(t1, t2 time.Time) time.Time {
if t1.Before(t2) {
return t2
}
return t1
}
func (d *Distributor) truncateLines(vContext validationContext, stream *logproto.Stream) {
if !vContext.maxLineSizeTruncate {
return

@ -265,6 +265,33 @@ func Test_IncrementTimestamp(t *testing.T) {
},
},
},
"incrementing enabled, multiple repeated-timestamps": {
limits: incrementingEnabled,
push: &logproto.PushRequest{
Streams: []logproto.Stream{
{
Labels: "{job=\"foo\"}",
Entries: []logproto.Entry{
{Timestamp: time.Unix(123456, 0), Line: "heyooooooo"},
{Timestamp: time.Unix(123456, 0), Line: "hi"},
{Timestamp: time.Unix(123456, 0), Line: "hey there"},
},
},
},
},
expectedPush: &logproto.PushRequest{
Streams: []logproto.Stream{
{
Labels: "{job=\"foo\"}",
Entries: []logproto.Entry{
{Timestamp: time.Unix(123456, 0), Line: "heyooooooo"},
{Timestamp: time.Unix(123456, 1), Line: "hi"},
{Timestamp: time.Unix(123456, 2), Line: "hey there"},
},
},
},
},
},
"incrementing enabled, multiple subsequent increments": {
limits: incrementingEnabled,
push: &logproto.PushRequest{

@ -60,7 +60,7 @@ func (v Validator) getValidationContextForTime(now time.Time, userID string) val
}
}
// ValidateEntry returns an error if the entry is invalid
// ValidateEntry returns an error if the entry is invalid and report metrics for invalid entries accordingly.
func (v Validator) ValidateEntry(ctx validationContext, labels string, entry logproto.Entry) error {
ts := entry.Timestamp.UnixNano()
validation.LineLengthHist.Observe(float64(len(entry.Line)))

Loading…
Cancel
Save