mirror of https://github.com/grafana/loki
feat: Compactor deletion manifest builder (#17474)
parent
3ad1a64bc1
commit
47961f802d
@ -0,0 +1,156 @@ |
||||
package deletion |
||||
|
||||
import ( |
||||
"time" |
||||
|
||||
"github.com/go-kit/log/level" |
||||
"github.com/prometheus/common/model" |
||||
"github.com/prometheus/prometheus/model/labels" |
||||
|
||||
"github.com/grafana/loki/v3/pkg/compactor/retention" |
||||
"github.com/grafana/loki/v3/pkg/util/filter" |
||||
util_log "github.com/grafana/loki/v3/pkg/util/log" |
||||
) |
||||
|
||||
// deleteRequestBatch holds a batch of requests loaded for processing
|
||||
type deleteRequestBatch struct { |
||||
deleteRequestsToProcess map[string]*userDeleteRequests |
||||
duplicateRequests []DeleteRequest |
||||
count int |
||||
metrics *deleteRequestsManagerMetrics |
||||
} |
||||
|
||||
func newDeleteRequestBatch(metrics *deleteRequestsManagerMetrics) *deleteRequestBatch { |
||||
return &deleteRequestBatch{ |
||||
deleteRequestsToProcess: map[string]*userDeleteRequests{}, |
||||
metrics: metrics, |
||||
} |
||||
} |
||||
|
||||
func (b *deleteRequestBatch) reset() { |
||||
b.deleteRequestsToProcess = map[string]*userDeleteRequests{} |
||||
b.duplicateRequests = []DeleteRequest{} |
||||
b.count = 0 |
||||
} |
||||
|
||||
func (b *deleteRequestBatch) requestCount() int { |
||||
return b.count |
||||
} |
||||
|
||||
// addDeleteRequest add a requests to the batch
|
||||
func (b *deleteRequestBatch) addDeleteRequest(dr *DeleteRequest) { |
||||
dr.Metrics = b.metrics |
||||
ur, ok := b.deleteRequestsToProcess[dr.UserID] |
||||
if !ok { |
||||
ur = &userDeleteRequests{ |
||||
requestsInterval: model.Interval{ |
||||
Start: dr.StartTime, |
||||
End: dr.EndTime, |
||||
}, |
||||
} |
||||
b.deleteRequestsToProcess[dr.UserID] = ur |
||||
} |
||||
|
||||
ur.requests = append(ur.requests, dr) |
||||
if dr.StartTime < ur.requestsInterval.Start { |
||||
ur.requestsInterval.Start = dr.StartTime |
||||
} |
||||
if dr.EndTime > ur.requestsInterval.End { |
||||
ur.requestsInterval.End = dr.EndTime |
||||
} |
||||
b.count++ |
||||
} |
||||
|
||||
func (b *deleteRequestBatch) checkDuplicate(deleteRequest DeleteRequest) error { |
||||
ur, ok := b.deleteRequestsToProcess[deleteRequest.UserID] |
||||
if !ok { |
||||
return nil |
||||
} |
||||
for _, requestLoadedForProcessing := range ur.requests { |
||||
isDuplicate, err := requestLoadedForProcessing.IsDuplicate(&deleteRequest) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
if isDuplicate { |
||||
level.Info(util_log.Logger).Log( |
||||
"msg", "found duplicate request of one of the requests loaded for processing", |
||||
"loaded_request_id", requestLoadedForProcessing.RequestID, |
||||
"duplicate_request_id", deleteRequest.RequestID, |
||||
"user", deleteRequest.UserID, |
||||
) |
||||
b.duplicateRequests = append(b.duplicateRequests, deleteRequest) |
||||
} |
||||
} |
||||
|
||||
return nil |
||||
} |
||||
|
||||
func (b *deleteRequestBatch) expired(userID []byte, chk retention.Chunk, lbls labels.Labels, skipRequest func(*DeleteRequest) bool) (bool, filter.Func) { |
||||
userIDStr := unsafeGetString(userID) |
||||
if b.deleteRequestsToProcess[userIDStr] == nil || !intervalsOverlap(b.deleteRequestsToProcess[userIDStr].requestsInterval, model.Interval{ |
||||
Start: chk.From, |
||||
End: chk.Through, |
||||
}) { |
||||
return false, nil |
||||
} |
||||
|
||||
var filterFuncs []filter.Func |
||||
|
||||
for _, deleteRequest := range b.deleteRequestsToProcess[userIDStr].requests { |
||||
if skipRequest(deleteRequest) { |
||||
continue |
||||
} |
||||
isDeleted, ff := deleteRequest.GetChunkFilter(userID, lbls, chk) |
||||
if !isDeleted { |
||||
continue |
||||
} |
||||
|
||||
if ff == nil { |
||||
level.Info(util_log.Logger).Log( |
||||
"msg", "no chunks to retain: the whole chunk is deleted", |
||||
"delete_request_id", deleteRequest.RequestID, |
||||
"sequence_num", deleteRequest.SequenceNum, |
||||
"user", deleteRequest.UserID, |
||||
"chunkID", string(chk.ChunkID), |
||||
) |
||||
b.metrics.deleteRequestsChunksSelectedTotal.WithLabelValues(string(userID)).Inc() |
||||
return true, nil |
||||
} |
||||
filterFuncs = append(filterFuncs, ff) |
||||
} |
||||
|
||||
if len(filterFuncs) == 0 { |
||||
return false, nil |
||||
} |
||||
|
||||
b.metrics.deleteRequestsChunksSelectedTotal.WithLabelValues(string(userID)).Inc() |
||||
return true, func(ts time.Time, s string, structuredMetadata labels.Labels) bool { |
||||
for _, ff := range filterFuncs { |
||||
if ff(ts, s, structuredMetadata) { |
||||
return true |
||||
} |
||||
} |
||||
|
||||
return false |
||||
} |
||||
} |
||||
|
||||
func (b *deleteRequestBatch) intervalMayHaveExpiredChunks(userID string) bool { |
||||
// We can't do the overlap check between the passed interval and delete requests interval from a user because
|
||||
// if a request is issued just for today and there are chunks spanning today and yesterday then
|
||||
// the overlap check would skip processing yesterday's index which would result in the index pointing to deleted chunks.
|
||||
if userID != "" { |
||||
return b.deleteRequestsToProcess[userID] != nil |
||||
} |
||||
|
||||
return len(b.deleteRequestsToProcess) != 0 |
||||
} |
||||
|
||||
func (b *deleteRequestBatch) getAllRequestsForUser(userID string) []*DeleteRequest { |
||||
userRequests, ok := b.deleteRequestsToProcess[userID] |
||||
if !ok { |
||||
return nil |
||||
} |
||||
|
||||
return userRequests.requests |
||||
} |
@ -0,0 +1,719 @@ |
||||
package deletion |
||||
|
||||
import ( |
||||
"strings" |
||||
"testing" |
||||
"time" |
||||
|
||||
"github.com/prometheus/common/model" |
||||
"github.com/prometheus/prometheus/model/labels" |
||||
"github.com/stretchr/testify/require" |
||||
|
||||
"github.com/grafana/loki/v3/pkg/compactor/retention" |
||||
"github.com/grafana/loki/v3/pkg/logql/syntax" |
||||
"github.com/grafana/loki/v3/pkg/util/filter" |
||||
) |
||||
|
||||
func TestDeleteRequestBatch_Expired(t *testing.T) { |
||||
type resp struct { |
||||
isExpired bool |
||||
expectedFilter filter.Func |
||||
} |
||||
|
||||
now := model.Now() |
||||
lblFoo, err := syntax.ParseLabels(`{foo="bar"}`) |
||||
require.NoError(t, err) |
||||
streamSelectorWithLineFilters := lblFoo.String() + `|="fizz"` |
||||
streamSelectorWithStructuredMetadataFilters := lblFoo.String() + `| ping="pong"` |
||||
streamSelectorWithLineAndStructuredMetadataFilters := lblFoo.String() + `| ping="pong" |= "fizz"` |
||||
|
||||
chunkEntry := retention.Chunk{ |
||||
From: now.Add(-12 * time.Hour), |
||||
Through: now.Add(-time.Hour), |
||||
} |
||||
|
||||
for _, tc := range []struct { |
||||
name string |
||||
deleteRequests []DeleteRequest |
||||
expectedResp resp |
||||
expectedDeletionRangeByUser map[string]model.Interval |
||||
}{ |
||||
{ |
||||
name: "no delete requests", |
||||
expectedResp: resp{ |
||||
isExpired: false, |
||||
}, |
||||
}, |
||||
{ |
||||
name: "no relevant delete requests", |
||||
deleteRequests: []DeleteRequest{ |
||||
{ |
||||
UserID: "different-user", |
||||
Query: lblFoo.String(), |
||||
StartTime: now.Add(-24 * time.Hour), |
||||
EndTime: now, |
||||
Status: StatusReceived, |
||||
}, |
||||
}, |
||||
expectedResp: resp{ |
||||
isExpired: false, |
||||
}, |
||||
expectedDeletionRangeByUser: map[string]model.Interval{ |
||||
"different-user": { |
||||
Start: now.Add(-24 * time.Hour), |
||||
End: now, |
||||
}, |
||||
}, |
||||
}, |
||||
{ |
||||
name: "no relevant delete requests", |
||||
deleteRequests: []DeleteRequest{ |
||||
{ |
||||
UserID: "different-user", |
||||
Query: lblFoo.String(), |
||||
StartTime: now.Add(-24 * time.Hour), |
||||
EndTime: now, |
||||
Status: StatusReceived, |
||||
}, |
||||
}, |
||||
expectedResp: resp{ |
||||
isExpired: false, |
||||
}, |
||||
expectedDeletionRangeByUser: map[string]model.Interval{ |
||||
"different-user": { |
||||
Start: now.Add(-24 * time.Hour), |
||||
End: now, |
||||
}, |
||||
}, |
||||
}, |
||||
{ |
||||
name: "delete request not matching labels", |
||||
deleteRequests: []DeleteRequest{ |
||||
{ |
||||
UserID: testUserID, |
||||
Query: `{fizz="buzz"}`, |
||||
StartTime: now.Add(-24 * time.Hour), |
||||
EndTime: now, |
||||
Status: StatusReceived, |
||||
}, |
||||
}, |
||||
expectedResp: resp{ |
||||
isExpired: false, |
||||
}, |
||||
expectedDeletionRangeByUser: map[string]model.Interval{ |
||||
testUserID: { |
||||
Start: now.Add(-24 * time.Hour), |
||||
End: now, |
||||
}, |
||||
}, |
||||
}, |
||||
{ |
||||
name: "whole chunk deleted by single request", |
||||
deleteRequests: []DeleteRequest{ |
||||
{ |
||||
UserID: testUserID, |
||||
Query: lblFoo.String(), |
||||
StartTime: now.Add(-24 * time.Hour), |
||||
EndTime: now, |
||||
Status: StatusReceived, |
||||
}, |
||||
}, |
||||
expectedResp: resp{ |
||||
isExpired: true, |
||||
}, |
||||
expectedDeletionRangeByUser: map[string]model.Interval{ |
||||
testUserID: { |
||||
Start: now.Add(-24 * time.Hour), |
||||
End: now, |
||||
}, |
||||
}, |
||||
}, |
||||
{ |
||||
name: "whole chunk deleted by single request with line filters", |
||||
deleteRequests: []DeleteRequest{ |
||||
{ |
||||
UserID: testUserID, |
||||
Query: streamSelectorWithLineFilters, |
||||
StartTime: now.Add(-24 * time.Hour), |
||||
EndTime: now, |
||||
Status: StatusReceived, |
||||
}, |
||||
}, |
||||
expectedResp: resp{ |
||||
isExpired: true, |
||||
expectedFilter: func(_ time.Time, s string, _ labels.Labels) bool { |
||||
return strings.Contains(s, "fizz") |
||||
}, |
||||
}, |
||||
expectedDeletionRangeByUser: map[string]model.Interval{ |
||||
testUserID: { |
||||
Start: now.Add(-24 * time.Hour), |
||||
End: now, |
||||
}, |
||||
}, |
||||
}, |
||||
{ |
||||
name: "whole chunk deleted by single request with structured metadata filters", |
||||
deleteRequests: []DeleteRequest{ |
||||
{ |
||||
UserID: testUserID, |
||||
Query: streamSelectorWithStructuredMetadataFilters, |
||||
StartTime: now.Add(-24 * time.Hour), |
||||
EndTime: now, |
||||
Status: StatusReceived, |
||||
}, |
||||
}, |
||||
expectedResp: resp{ |
||||
isExpired: true, |
||||
expectedFilter: func(_ time.Time, _ string, structuredMetadata labels.Labels) bool { |
||||
return structuredMetadata.Get(lblPing) == lblPong |
||||
}, |
||||
}, |
||||
expectedDeletionRangeByUser: map[string]model.Interval{ |
||||
testUserID: { |
||||
Start: now.Add(-24 * time.Hour), |
||||
End: now, |
||||
}, |
||||
}, |
||||
}, |
||||
{ |
||||
name: "whole chunk deleted by single request with line and structured metadata filters", |
||||
deleteRequests: []DeleteRequest{ |
||||
{ |
||||
UserID: testUserID, |
||||
Query: streamSelectorWithLineAndStructuredMetadataFilters, |
||||
StartTime: now.Add(-24 * time.Hour), |
||||
EndTime: now, |
||||
Status: StatusReceived, |
||||
}, |
||||
}, |
||||
expectedResp: resp{ |
||||
isExpired: true, |
||||
expectedFilter: func(_ time.Time, s string, structuredMetadata labels.Labels) bool { |
||||
return structuredMetadata.Get(lblPing) == lblPong && strings.Contains(s, "fizz") |
||||
}, |
||||
}, |
||||
expectedDeletionRangeByUser: map[string]model.Interval{ |
||||
testUserID: { |
||||
Start: now.Add(-24 * time.Hour), |
||||
End: now, |
||||
}, |
||||
}, |
||||
}, |
||||
{ |
||||
name: "deleted interval out of range", |
||||
deleteRequests: []DeleteRequest{ |
||||
{ |
||||
UserID: testUserID, |
||||
Query: lblFoo.String(), |
||||
StartTime: now.Add(-48 * time.Hour), |
||||
EndTime: now.Add(-24 * time.Hour), |
||||
Status: StatusReceived, |
||||
}, |
||||
}, |
||||
expectedResp: resp{ |
||||
isExpired: false, |
||||
}, |
||||
expectedDeletionRangeByUser: map[string]model.Interval{ |
||||
testUserID: { |
||||
Start: now.Add(-48 * time.Hour), |
||||
End: now.Add(-24 * time.Hour), |
||||
}, |
||||
}, |
||||
}, |
||||
{ |
||||
name: "deleted interval out of range(with multiple user requests)", |
||||
deleteRequests: []DeleteRequest{ |
||||
{ |
||||
UserID: testUserID, |
||||
Query: lblFoo.String(), |
||||
StartTime: now.Add(-48 * time.Hour), |
||||
EndTime: now.Add(-24 * time.Hour), |
||||
Status: StatusReceived, |
||||
}, |
||||
{ |
||||
UserID: "different-user", |
||||
Query: lblFoo.String(), |
||||
StartTime: now.Add(-24 * time.Hour), |
||||
EndTime: now, |
||||
Status: StatusReceived, |
||||
}, |
||||
}, |
||||
expectedResp: resp{ |
||||
isExpired: false, |
||||
}, |
||||
expectedDeletionRangeByUser: map[string]model.Interval{ |
||||
testUserID: { |
||||
Start: now.Add(-48 * time.Hour), |
||||
End: now.Add(-24 * time.Hour), |
||||
}, |
||||
"different-user": { |
||||
Start: now.Add(-24 * time.Hour), |
||||
End: now, |
||||
}, |
||||
}, |
||||
}, |
||||
{ |
||||
name: "multiple delete requests with one deleting the whole chunk", |
||||
deleteRequests: []DeleteRequest{ |
||||
{ |
||||
UserID: testUserID, |
||||
Query: lblFoo.String(), |
||||
StartTime: now.Add(-48 * time.Hour), |
||||
EndTime: now.Add(-24 * time.Hour), |
||||
Status: StatusReceived, |
||||
}, |
||||
{ |
||||
UserID: testUserID, |
||||
Query: lblFoo.String(), |
||||
StartTime: now.Add(-12 * time.Hour), |
||||
EndTime: now, |
||||
Status: StatusReceived, |
||||
}, |
||||
}, |
||||
expectedResp: resp{ |
||||
isExpired: true, |
||||
}, |
||||
expectedDeletionRangeByUser: map[string]model.Interval{ |
||||
testUserID: { |
||||
Start: now.Add(-48 * time.Hour), |
||||
End: now, |
||||
}, |
||||
}, |
||||
}, |
||||
{ |
||||
name: "multiple delete requests with line filters and one deleting the whole chunk", |
||||
deleteRequests: []DeleteRequest{ |
||||
{ |
||||
UserID: testUserID, |
||||
Query: streamSelectorWithLineFilters, |
||||
StartTime: now.Add(-48 * time.Hour), |
||||
EndTime: now.Add(-24 * time.Hour), |
||||
Status: StatusReceived, |
||||
}, |
||||
{ |
||||
UserID: testUserID, |
||||
Query: streamSelectorWithLineFilters, |
||||
StartTime: now.Add(-12 * time.Hour), |
||||
EndTime: now, |
||||
Status: StatusReceived, |
||||
}, |
||||
}, |
||||
expectedResp: resp{ |
||||
isExpired: true, |
||||
expectedFilter: func(_ time.Time, s string, _ labels.Labels) bool { |
||||
return strings.Contains(s, "fizz") |
||||
}, |
||||
}, |
||||
expectedDeletionRangeByUser: map[string]model.Interval{ |
||||
testUserID: { |
||||
Start: now.Add(-48 * time.Hour), |
||||
End: now, |
||||
}, |
||||
}, |
||||
}, |
||||
{ |
||||
name: "multiple delete requests with structured metadata filters and one deleting the whole chunk", |
||||
deleteRequests: []DeleteRequest{ |
||||
{ |
||||
UserID: testUserID, |
||||
Query: streamSelectorWithStructuredMetadataFilters, |
||||
StartTime: now.Add(-48 * time.Hour), |
||||
EndTime: now.Add(-24 * time.Hour), |
||||
Status: StatusReceived, |
||||
}, |
||||
{ |
||||
UserID: testUserID, |
||||
Query: streamSelectorWithStructuredMetadataFilters, |
||||
StartTime: now.Add(-12 * time.Hour), |
||||
EndTime: now, |
||||
Status: StatusReceived, |
||||
}, |
||||
}, |
||||
expectedResp: resp{ |
||||
isExpired: true, |
||||
expectedFilter: func(_ time.Time, _ string, structuredMetadata labels.Labels) bool { |
||||
return structuredMetadata.Get(lblPing) == lblPong |
||||
}, |
||||
}, |
||||
expectedDeletionRangeByUser: map[string]model.Interval{ |
||||
testUserID: { |
||||
Start: now.Add(-48 * time.Hour), |
||||
End: now, |
||||
}, |
||||
}, |
||||
}, |
||||
{ |
||||
name: "multiple delete requests causing multiple holes", |
||||
deleteRequests: []DeleteRequest{ |
||||
{ |
||||
UserID: testUserID, |
||||
Query: lblFoo.String(), |
||||
StartTime: now.Add(-13 * time.Hour), |
||||
EndTime: now.Add(-11 * time.Hour), |
||||
Status: StatusReceived, |
||||
}, |
||||
{ |
||||
UserID: testUserID, |
||||
Query: lblFoo.String(), |
||||
StartTime: now.Add(-10 * time.Hour), |
||||
EndTime: now.Add(-8 * time.Hour), |
||||
Status: StatusReceived, |
||||
}, |
||||
{ |
||||
UserID: testUserID, |
||||
Query: lblFoo.String(), |
||||
StartTime: now.Add(-6 * time.Hour), |
||||
EndTime: now.Add(-5 * time.Hour), |
||||
Status: StatusReceived, |
||||
}, |
||||
{ |
||||
UserID: testUserID, |
||||
Query: lblFoo.String(), |
||||
StartTime: now.Add(-2 * time.Hour), |
||||
EndTime: now, |
||||
Status: StatusReceived, |
||||
}, |
||||
}, |
||||
expectedResp: resp{ |
||||
isExpired: true, |
||||
expectedFilter: func(ts time.Time, _ string, _ labels.Labels) bool { |
||||
tsUnixNano := ts.UnixNano() |
||||
if (now.Add(-13*time.Hour).UnixNano() <= tsUnixNano && tsUnixNano <= now.Add(-11*time.Hour).UnixNano()) || |
||||
(now.Add(-10*time.Hour).UnixNano() <= tsUnixNano && tsUnixNano <= now.Add(-8*time.Hour).UnixNano()) || |
||||
(now.Add(-6*time.Hour).UnixNano() <= tsUnixNano && tsUnixNano <= now.Add(-5*time.Hour).UnixNano()) || |
||||
(now.Add(-2*time.Hour).UnixNano() <= tsUnixNano && tsUnixNano <= now.UnixNano()) { |
||||
return true |
||||
} |
||||
return false |
||||
}, |
||||
}, |
||||
expectedDeletionRangeByUser: map[string]model.Interval{ |
||||
testUserID: { |
||||
Start: now.Add(-13 * time.Hour), |
||||
End: now, |
||||
}, |
||||
}, |
||||
}, |
||||
{ |
||||
name: "multiple overlapping requests deleting the whole chunk", |
||||
deleteRequests: []DeleteRequest{ |
||||
{ |
||||
UserID: testUserID, |
||||
Query: lblFoo.String(), |
||||
StartTime: now.Add(-13 * time.Hour), |
||||
EndTime: now.Add(-6 * time.Hour), |
||||
Status: StatusReceived, |
||||
}, |
||||
{ |
||||
UserID: testUserID, |
||||
Query: lblFoo.String(), |
||||
StartTime: now.Add(-8 * time.Hour), |
||||
EndTime: now, |
||||
Status: StatusReceived, |
||||
}, |
||||
}, |
||||
expectedResp: resp{ |
||||
isExpired: true, |
||||
expectedFilter: func(_ time.Time, _ string, _ labels.Labels) bool { |
||||
return true |
||||
}, |
||||
}, |
||||
expectedDeletionRangeByUser: map[string]model.Interval{ |
||||
testUserID: { |
||||
Start: now.Add(-13 * time.Hour), |
||||
End: now, |
||||
}, |
||||
}, |
||||
}, |
||||
{ |
||||
name: "multiple overlapping requests with line filters deleting the whole chunk", |
||||
deleteRequests: []DeleteRequest{ |
||||
{ |
||||
UserID: testUserID, |
||||
Query: streamSelectorWithLineFilters, |
||||
StartTime: now.Add(-13 * time.Hour), |
||||
EndTime: now.Add(-6 * time.Hour), |
||||
Status: StatusReceived, |
||||
}, |
||||
{ |
||||
UserID: testUserID, |
||||
Query: streamSelectorWithLineFilters, |
||||
StartTime: now.Add(-8 * time.Hour), |
||||
EndTime: now, |
||||
Status: StatusReceived, |
||||
}, |
||||
}, |
||||
expectedResp: resp{ |
||||
isExpired: true, |
||||
expectedFilter: func(_ time.Time, s string, _ labels.Labels) bool { |
||||
return strings.Contains(s, "fizz") |
||||
}, |
||||
}, |
||||
expectedDeletionRangeByUser: map[string]model.Interval{ |
||||
testUserID: { |
||||
Start: now.Add(-13 * time.Hour), |
||||
End: now, |
||||
}, |
||||
}, |
||||
}, |
||||
{ |
||||
name: "multiple overlapping requests with structured metadata filters deleting the whole chunk", |
||||
deleteRequests: []DeleteRequest{ |
||||
{ |
||||
UserID: testUserID, |
||||
Query: streamSelectorWithStructuredMetadataFilters, |
||||
StartTime: now.Add(-13 * time.Hour), |
||||
EndTime: now.Add(-6 * time.Hour), |
||||
Status: StatusReceived, |
||||
}, |
||||
{ |
||||
UserID: testUserID, |
||||
Query: streamSelectorWithStructuredMetadataFilters, |
||||
StartTime: now.Add(-8 * time.Hour), |
||||
EndTime: now, |
||||
Status: StatusReceived, |
||||
}, |
||||
}, |
||||
expectedResp: resp{ |
||||
isExpired: true, |
||||
expectedFilter: func(_ time.Time, _ string, structuredMetadata labels.Labels) bool { |
||||
return structuredMetadata.Get(lblPing) == lblPong |
||||
}, |
||||
}, |
||||
expectedDeletionRangeByUser: map[string]model.Interval{ |
||||
testUserID: { |
||||
Start: now.Add(-13 * time.Hour), |
||||
End: now, |
||||
}, |
||||
}, |
||||
}, |
||||
{ |
||||
name: "multiple non-overlapping requests deleting the whole chunk", |
||||
deleteRequests: []DeleteRequest{ |
||||
{ |
||||
UserID: testUserID, |
||||
Query: lblFoo.String(), |
||||
StartTime: now.Add(-12 * time.Hour), |
||||
EndTime: now.Add(-6*time.Hour) - 1, |
||||
Status: StatusReceived, |
||||
}, |
||||
{ |
||||
UserID: testUserID, |
||||
Query: lblFoo.String(), |
||||
StartTime: now.Add(-6 * time.Hour), |
||||
EndTime: now.Add(-4*time.Hour) - 1, |
||||
Status: StatusReceived, |
||||
}, |
||||
{ |
||||
UserID: testUserID, |
||||
Query: lblFoo.String(), |
||||
StartTime: now.Add(-4 * time.Hour), |
||||
EndTime: now, |
||||
Status: StatusReceived, |
||||
}, |
||||
}, |
||||
expectedResp: resp{ |
||||
isExpired: true, |
||||
expectedFilter: func(_ time.Time, _ string, _ labels.Labels) bool { |
||||
return true |
||||
}, |
||||
}, |
||||
expectedDeletionRangeByUser: map[string]model.Interval{ |
||||
testUserID: { |
||||
Start: now.Add(-12 * time.Hour), |
||||
End: now, |
||||
}, |
||||
}, |
||||
}, |
||||
{ |
||||
name: "multiple non-overlapping requests with line filter deleting the whole chunk", |
||||
deleteRequests: []DeleteRequest{ |
||||
{ |
||||
UserID: testUserID, |
||||
Query: streamSelectorWithLineFilters, |
||||
StartTime: now.Add(-12 * time.Hour), |
||||
EndTime: now.Add(-6*time.Hour) - 1, |
||||
Status: StatusReceived, |
||||
}, |
||||
{ |
||||
UserID: testUserID, |
||||
Query: streamSelectorWithLineFilters, |
||||
StartTime: now.Add(-6 * time.Hour), |
||||
EndTime: now.Add(-4*time.Hour) - 1, |
||||
Status: StatusReceived, |
||||
}, |
||||
{ |
||||
UserID: testUserID, |
||||
Query: streamSelectorWithLineFilters, |
||||
StartTime: now.Add(-4 * time.Hour), |
||||
EndTime: now, |
||||
Status: StatusReceived, |
||||
}, |
||||
}, |
||||
expectedResp: resp{ |
||||
isExpired: true, |
||||
expectedFilter: func(_ time.Time, s string, _ labels.Labels) bool { |
||||
return strings.Contains(s, "fizz") |
||||
}, |
||||
}, |
||||
expectedDeletionRangeByUser: map[string]model.Interval{ |
||||
testUserID: { |
||||
Start: now.Add(-12 * time.Hour), |
||||
End: now, |
||||
}, |
||||
}, |
||||
}, |
||||
{ |
||||
name: "multiple non-overlapping requests with structured metadata filter deleting the whole chunk", |
||||
deleteRequests: []DeleteRequest{ |
||||
{ |
||||
UserID: testUserID, |
||||
Query: streamSelectorWithStructuredMetadataFilters, |
||||
StartTime: now.Add(-12 * time.Hour), |
||||
EndTime: now.Add(-6*time.Hour) - 1, |
||||
Status: StatusReceived, |
||||
}, |
||||
{ |
||||
UserID: testUserID, |
||||
Query: streamSelectorWithStructuredMetadataFilters, |
||||
StartTime: now.Add(-6 * time.Hour), |
||||
EndTime: now.Add(-4*time.Hour) - 1, |
||||
Status: StatusReceived, |
||||
}, |
||||
{ |
||||
UserID: testUserID, |
||||
Query: streamSelectorWithStructuredMetadataFilters, |
||||
StartTime: now.Add(-4 * time.Hour), |
||||
EndTime: now, |
||||
Status: StatusReceived, |
||||
}, |
||||
}, |
||||
expectedResp: resp{ |
||||
isExpired: true, |
||||
expectedFilter: func(_ time.Time, _ string, structuredMetadata labels.Labels) bool { |
||||
return structuredMetadata.Get(lblPing) == lblPong |
||||
}, |
||||
}, |
||||
expectedDeletionRangeByUser: map[string]model.Interval{ |
||||
testUserID: { |
||||
Start: now.Add(-12 * time.Hour), |
||||
End: now, |
||||
}, |
||||
}, |
||||
}, |
||||
} { |
||||
t.Run(tc.name, func(t *testing.T) { |
||||
batch := newDeleteRequestBatch(newDeleteRequestsManagerMetrics(nil)) |
||||
for _, req := range tc.deleteRequests { |
||||
batch.addDeleteRequest(&req) |
||||
} |
||||
|
||||
for _, deleteRequests := range batch.deleteRequestsToProcess { |
||||
for _, dr := range deleteRequests.requests { |
||||
require.EqualValues(t, 0, dr.DeletedLines) |
||||
} |
||||
} |
||||
|
||||
isExpired, filterFunc := batch.expired([]byte(testUserID), chunkEntry, lblFoo, func(_ *DeleteRequest) bool { |
||||
return false |
||||
}) |
||||
require.Equal(t, tc.expectedResp.isExpired, isExpired) |
||||
if tc.expectedResp.expectedFilter == nil { |
||||
require.Nil(t, filterFunc) |
||||
} else { |
||||
require.NotNil(t, filterFunc) |
||||
|
||||
for start := chunkEntry.From; start <= chunkEntry.Through; start = start.Add(time.Minute) { |
||||
line := "foo bar" |
||||
if start.Time().Minute()%2 == 1 { |
||||
line = "fizz buzz" |
||||
} |
||||
// mix of empty, ding=dong and ping=pong as structured metadata
|
||||
var structuredMetadata labels.Labels |
||||
if start.Time().Minute()%3 == 0 { |
||||
structuredMetadata = labels.FromStrings(lblPing, lblPong) |
||||
} else if start.Time().Minute()%2 == 0 { |
||||
structuredMetadata = labels.FromStrings("ting", "tong") |
||||
} |
||||
require.Equal(t, tc.expectedResp.expectedFilter(start.Time(), line, structuredMetadata), filterFunc(start.Time(), line, structuredMetadata), "line", line, "time", start.Time(), "now", now.Time()) |
||||
} |
||||
|
||||
require.Equal(t, len(tc.expectedDeletionRangeByUser), len(batch.deleteRequestsToProcess)) |
||||
for userID, dr := range tc.expectedDeletionRangeByUser { |
||||
require.Equal(t, dr, batch.deleteRequestsToProcess[userID].requestsInterval) |
||||
} |
||||
} |
||||
}) |
||||
} |
||||
} |
||||
|
||||
func TestDeleteRequestBatch_IntervalMayHaveExpiredChunks(t *testing.T) { |
||||
tests := []struct { |
||||
name string |
||||
deleteRequests map[string]*userDeleteRequests |
||||
userID string |
||||
expected bool |
||||
}{ |
||||
{ |
||||
name: "no delete requests", |
||||
deleteRequests: map[string]*userDeleteRequests{}, |
||||
userID: "test-user", |
||||
expected: false, |
||||
}, |
||||
{ |
||||
name: "has delete requests for user", |
||||
deleteRequests: map[string]*userDeleteRequests{ |
||||
"test-user": { |
||||
requests: []*DeleteRequest{ |
||||
{ |
||||
UserID: "test-user", |
||||
}, |
||||
}, |
||||
}, |
||||
}, |
||||
userID: "test-user", |
||||
expected: true, |
||||
}, |
||||
{ |
||||
name: "has delete requests but not for user", |
||||
deleteRequests: map[string]*userDeleteRequests{ |
||||
"other-user": { |
||||
requests: []*DeleteRequest{ |
||||
{ |
||||
UserID: "other-user", |
||||
}, |
||||
}, |
||||
}, |
||||
}, |
||||
userID: "test-user", |
||||
expected: false, |
||||
}, |
||||
{ |
||||
name: "check for all users", |
||||
deleteRequests: map[string]*userDeleteRequests{ |
||||
"test-user": { |
||||
requests: []*DeleteRequest{ |
||||
{ |
||||
UserID: "test-user", |
||||
}, |
||||
}, |
||||
}, |
||||
}, |
||||
userID: "", |
||||
expected: true, |
||||
}, |
||||
} |
||||
|
||||
for _, tc := range tests { |
||||
t.Run(tc.name, func(t *testing.T) { |
||||
batch := &deleteRequestBatch{ |
||||
deleteRequestsToProcess: tc.deleteRequests, |
||||
metrics: &deleteRequestsManagerMetrics{}, |
||||
} |
||||
|
||||
result := batch.intervalMayHaveExpiredChunks(tc.userID) |
||||
require.Equal(t, tc.expected, result) |
||||
}) |
||||
} |
||||
} |
@ -0,0 +1,226 @@ |
||||
package deletion |
||||
|
||||
import ( |
||||
"context" |
||||
"encoding/json" |
||||
"fmt" |
||||
"path" |
||||
"strings" |
||||
"time" |
||||
|
||||
"github.com/grafana/loki/v3/pkg/compactor/retention" |
||||
"github.com/grafana/loki/v3/pkg/storage/chunk/client" |
||||
) |
||||
|
||||
var ErrNoChunksSelectedForDeletion = fmt.Errorf("no chunks selected for deletion") |
||||
|
||||
const ( |
||||
maxChunksPerSegment = 100000 |
||||
manifestFileName = "manifest.json" |
||||
) |
||||
|
||||
// ChunksGroup holds a group of chunks selected by the same set of requests
|
||||
type ChunksGroup struct { |
||||
Requests []DeleteRequest `json:"requests"` |
||||
Chunks []retention.Chunk `json:"chunks"` |
||||
} |
||||
|
||||
// segment holds limited chunks(upto maxChunksPerSegment) that needs to be processed.
|
||||
// It also helps segregate chunks belonging to different users/tables.
|
||||
type segment struct { |
||||
UserID string `json:"user_id"` |
||||
TableName string `json:"table_name"` |
||||
ChunksGroups []ChunksGroup `json:"chunk_groups"` |
||||
ChunksCount int `json:"chunks_count"` |
||||
} |
||||
|
||||
// manifest represents the completion state and summary of discovering chunks which processing for the loaded deleteRequestBatch.
|
||||
// It serves two purposes:
|
||||
// 1. Acts as a completion marker indicating all chunks for the given delete requests have been found
|
||||
// 2. Stores a summary of data stored in segments:
|
||||
// - Original and duplicate deletion requests
|
||||
// - Total number of segments and chunks to be processed
|
||||
//
|
||||
// Once all the segments are processed, Requests and DuplicateRequests in the manifest could be marked as processed.
|
||||
type manifest struct { |
||||
Requests []DeleteRequest `json:"requests"` |
||||
DuplicateRequests []DeleteRequest `json:"duplicate_requests"` |
||||
SegmentsCount int `json:"segments_count"` |
||||
ChunksCount int `json:"chunks_count"` |
||||
} |
||||
|
||||
// deletionManifestBuilder helps with building the manifest for listing out which chunks to process for a batch of delete requests.
|
||||
// It is not meant to be used concurrently.
|
||||
type deletionManifestBuilder struct { |
||||
deleteStoreClient client.ObjectClient |
||||
deleteRequestBatch deleteRequestBatch |
||||
|
||||
currentSegment map[uint64]ChunksGroup |
||||
currentSegmentChunksCount int |
||||
currentUserID string |
||||
currentTableName string |
||||
|
||||
allUserRequests []*DeleteRequest |
||||
creationTime time.Time |
||||
segmentsCount int |
||||
overallChunksCount int |
||||
} |
||||
|
||||
func newDeletionManifestBuilder(deleteStoreClient client.ObjectClient, deleteRequestBatch deleteRequestBatch) (*deletionManifestBuilder, error) { |
||||
requestCount := 0 |
||||
for _, userRequests := range deleteRequestBatch.deleteRequestsToProcess { |
||||
requestCount += len(userRequests.requests) |
||||
} |
||||
|
||||
// We use a uint64 as a bit field to track which delete requests apply to each chunk.
|
||||
// Since uint64 has 64 bits, we can only handle up to 64 delete requests at a time.
|
||||
if requestCount > 64 { |
||||
return nil, fmt.Errorf("only upto 64 delete requests allowed, current count: %d", requestCount) |
||||
} |
||||
|
||||
builder := &deletionManifestBuilder{ |
||||
deleteStoreClient: deleteStoreClient, |
||||
deleteRequestBatch: deleteRequestBatch, |
||||
currentSegment: make(map[uint64]ChunksGroup), |
||||
creationTime: time.Now(), |
||||
} |
||||
|
||||
return builder, nil |
||||
} |
||||
|
||||
// AddSeries adds a series and its chunks to the current segment.
|
||||
// It flushes the current segment if the user ID or table name changes.
|
||||
// It also ensures that the current segment does not exceed the maximum number of chunks.
|
||||
func (d *deletionManifestBuilder) AddSeries(ctx context.Context, tableName string, series retention.Series) error { |
||||
userIDStr := unsafeGetString(series.UserID()) |
||||
if userIDStr != d.currentUserID || tableName != d.currentTableName { |
||||
if err := d.flushCurrentBatch(ctx); err != nil { |
||||
return err |
||||
} |
||||
d.currentSegmentChunksCount = 0 |
||||
d.currentSegment = make(map[uint64]ChunksGroup) |
||||
|
||||
d.currentUserID = string(series.UserID()) |
||||
d.currentTableName = tableName |
||||
d.allUserRequests = d.deleteRequestBatch.getAllRequestsForUser(userIDStr) |
||||
if len(d.allUserRequests) == 0 { |
||||
return fmt.Errorf("no requests loaded for user: %s", userIDStr) |
||||
} |
||||
} |
||||
|
||||
var chunksGroupIdentifier uint64 |
||||
for _, chk := range series.Chunks() { |
||||
if d.currentSegmentChunksCount >= maxChunksPerSegment { |
||||
if err := d.flushCurrentBatch(ctx); err != nil { |
||||
return err |
||||
} |
||||
d.currentSegmentChunksCount = 0 |
||||
for chunksGroupIdentifier := range d.currentSegment { |
||||
group := d.currentSegment[chunksGroupIdentifier] |
||||
group.Chunks = group.Chunks[:0] |
||||
d.currentSegment[chunksGroupIdentifier] = group |
||||
} |
||||
} |
||||
|
||||
// We use a uint64 as a bit field to track which delete requests apply to each chunk.
|
||||
chunksGroupIdentifier = 0 |
||||
for i, deleteRequest := range d.allUserRequests { |
||||
if !deleteRequest.IsDeleted(series.UserID(), series.Labels(), chk) { |
||||
continue |
||||
} |
||||
|
||||
chunksGroupIdentifier |= 1 << i |
||||
} |
||||
|
||||
if chunksGroupIdentifier == 0 { |
||||
continue |
||||
} |
||||
d.currentSegmentChunksCount++ |
||||
|
||||
if _, ok := d.currentSegment[chunksGroupIdentifier]; !ok { |
||||
// Iterate through d.allUserRequests and find which bits are turned on in chunksGroupIdentifier
|
||||
var deleteRequests []DeleteRequest |
||||
for i := range d.allUserRequests { |
||||
if chunksGroupIdentifier&(1<<i) != 0 { // Check if the i-th bit is turned on
|
||||
deleteRequest := d.allUserRequests[i] |
||||
deleteRequests = append(deleteRequests, DeleteRequest{ |
||||
RequestID: deleteRequest.RequestID, |
||||
Query: deleteRequest.Query, |
||||
StartTime: deleteRequest.StartTime, |
||||
EndTime: deleteRequest.EndTime, |
||||
}) |
||||
} |
||||
} |
||||
|
||||
d.currentSegment[chunksGroupIdentifier] = ChunksGroup{ |
||||
Requests: deleteRequests, |
||||
} |
||||
} |
||||
|
||||
group := d.currentSegment[chunksGroupIdentifier] |
||||
group.Chunks = append(group.Chunks, chk) |
||||
d.currentSegment[chunksGroupIdentifier] = group |
||||
} |
||||
|
||||
return nil |
||||
} |
||||
|
||||
// Finish flushes the current segment and builds the manifest.
|
||||
func (d *deletionManifestBuilder) Finish(ctx context.Context) error { |
||||
if err := d.flushCurrentBatch(ctx); err != nil { |
||||
return err |
||||
} |
||||
|
||||
if d.overallChunksCount == 0 { |
||||
return ErrNoChunksSelectedForDeletion |
||||
} |
||||
|
||||
var requests []DeleteRequest |
||||
for userID := range d.deleteRequestBatch.deleteRequestsToProcess { |
||||
for i := range d.deleteRequestBatch.deleteRequestsToProcess[userID].requests { |
||||
requests = append(requests, *d.deleteRequestBatch.deleteRequestsToProcess[userID].requests[i]) |
||||
} |
||||
} |
||||
|
||||
manifestJSON, err := json.Marshal(manifest{ |
||||
Requests: requests, |
||||
DuplicateRequests: d.deleteRequestBatch.duplicateRequests, |
||||
SegmentsCount: d.segmentsCount, |
||||
ChunksCount: d.overallChunksCount, |
||||
}) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
|
||||
return d.deleteStoreClient.PutObject(ctx, d.buildObjectKey(manifestFileName), strings.NewReader(unsafeGetString(manifestJSON))) |
||||
} |
||||
|
||||
func (d *deletionManifestBuilder) flushCurrentBatch(ctx context.Context) error { |
||||
b := segment{ |
||||
UserID: d.currentUserID, |
||||
TableName: d.currentTableName, |
||||
ChunksCount: d.currentSegmentChunksCount, |
||||
} |
||||
for _, group := range d.currentSegment { |
||||
if len(group.Chunks) == 0 { |
||||
continue |
||||
} |
||||
b.ChunksGroups = append(b.ChunksGroups, group) |
||||
} |
||||
if len(b.ChunksGroups) == 0 { |
||||
return nil |
||||
} |
||||
batchJSON, err := json.Marshal(b) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
|
||||
d.segmentsCount++ |
||||
d.overallChunksCount += d.currentSegmentChunksCount |
||||
d.currentSegmentChunksCount = 0 |
||||
return d.deleteStoreClient.PutObject(ctx, d.buildObjectKey(fmt.Sprintf("%d.json", d.segmentsCount)), strings.NewReader(unsafeGetString(batchJSON))) |
||||
} |
||||
|
||||
func (d *deletionManifestBuilder) buildObjectKey(filename string) string { |
||||
return path.Join(fmt.Sprint(d.creationTime.UnixNano()), filename) |
||||
} |
@ -0,0 +1,637 @@ |
||||
package deletion |
||||
|
||||
import ( |
||||
"context" |
||||
"encoding/json" |
||||
"fmt" |
||||
"io" |
||||
"slices" |
||||
"strings" |
||||
"testing" |
||||
|
||||
"github.com/prometheus/common/model" |
||||
"github.com/prometheus/prometheus/model/labels" |
||||
"github.com/stretchr/testify/require" |
||||
|
||||
"github.com/grafana/loki/v3/pkg/compactor/retention" |
||||
"github.com/grafana/loki/v3/pkg/storage/chunk/client/local" |
||||
) |
||||
|
||||
const ( |
||||
req1 = "req1" |
||||
req2 = "req2" |
||||
table1 = "table1" |
||||
table2 = "table2" |
||||
lblFizzBuzz = `{fizz="buzz"}` |
||||
lblFooBarAndFizzBuzz = `{foo="bar", fizz="buzz"}` |
||||
) |
||||
|
||||
func buildChunks(start model.Time, count int) []retention.Chunk { |
||||
chunks := make([]retention.Chunk, 0, count) |
||||
chunks = append(chunks, retention.Chunk{ |
||||
ChunkID: []byte(fmt.Sprintf("%d", start)), |
||||
From: start, |
||||
Through: start + 1, |
||||
}) |
||||
|
||||
for i := 1; i < count; i++ { |
||||
from := chunks[i-1].From + 1 |
||||
chunks = append(chunks, retention.Chunk{ |
||||
ChunkID: []byte(fmt.Sprintf("%d", from)), |
||||
From: from, |
||||
Through: from + 1, |
||||
}) |
||||
} |
||||
|
||||
return chunks |
||||
} |
||||
|
||||
type mockSeries struct { |
||||
seriesID []byte |
||||
userID string |
||||
labels labels.Labels |
||||
chunks []retention.Chunk |
||||
} |
||||
|
||||
func (m *mockSeries) SeriesID() []byte { |
||||
return m.seriesID |
||||
} |
||||
|
||||
func (m *mockSeries) Reset(seriesID, userID []byte, labels labels.Labels) { |
||||
m.seriesID = seriesID |
||||
m.userID = string(userID) |
||||
m.labels = labels |
||||
m.chunks = nil |
||||
} |
||||
|
||||
func (m *mockSeries) AppendChunks(ref ...retention.Chunk) { |
||||
m.chunks = append(m.chunks, ref...) |
||||
} |
||||
|
||||
func (m *mockSeries) UserID() []byte { |
||||
return []byte(m.userID) |
||||
} |
||||
|
||||
func (m *mockSeries) Labels() labels.Labels { |
||||
return m.labels |
||||
} |
||||
|
||||
func (m *mockSeries) Chunks() []retention.Chunk { |
||||
return m.chunks |
||||
} |
||||
|
||||
func TestDeletionManifestBuilder(t *testing.T) { |
||||
tests := []struct { |
||||
name string |
||||
deleteRequests []DeleteRequest |
||||
series []struct { |
||||
tableName string |
||||
series *mockSeries |
||||
} |
||||
expectedManifest manifest |
||||
expectedSegments []segment |
||||
validateFunc func(t *testing.T, builder *deletionManifestBuilder) |
||||
}{ |
||||
{ |
||||
name: "single user with single segment", |
||||
deleteRequests: []DeleteRequest{ |
||||
{ |
||||
UserID: user1, |
||||
RequestID: req1, |
||||
Query: lblFooBar, |
||||
StartTime: 0, |
||||
EndTime: 100, |
||||
}, |
||||
}, |
||||
series: []struct { |
||||
tableName string |
||||
series *mockSeries |
||||
}{ |
||||
{ |
||||
tableName: table1, |
||||
series: &mockSeries{ |
||||
userID: user1, |
||||
labels: mustParseLabel(lblFooBar), |
||||
chunks: buildChunks(10, 100), |
||||
}, |
||||
}, |
||||
}, |
||||
expectedManifest: manifest{ |
||||
Requests: []DeleteRequest{ |
||||
{ |
||||
RequestID: req1, |
||||
Query: lblFooBar, |
||||
StartTime: 0, |
||||
EndTime: 100, |
||||
}, |
||||
}, |
||||
SegmentsCount: 1, |
||||
ChunksCount: 91, |
||||
}, |
||||
expectedSegments: []segment{ |
||||
{ |
||||
UserID: user1, |
||||
TableName: table1, |
||||
ChunksGroups: []ChunksGroup{ |
||||
{ |
||||
Requests: []DeleteRequest{ |
||||
{ |
||||
RequestID: req1, |
||||
Query: lblFooBar, |
||||
StartTime: 0, |
||||
EndTime: 100, |
||||
}, |
||||
}, |
||||
Chunks: buildChunks(10, 91), |
||||
}, |
||||
}, |
||||
ChunksCount: 91, |
||||
}, |
||||
}, |
||||
}, |
||||
{ |
||||
name: "single user with multiple segments due to chunks count", |
||||
deleteRequests: []DeleteRequest{ |
||||
{ |
||||
UserID: user1, |
||||
RequestID: req1, |
||||
Query: lblFooBar, |
||||
StartTime: 0, |
||||
EndTime: maxChunksPerSegment + 1, |
||||
}, |
||||
}, |
||||
series: []struct { |
||||
tableName string |
||||
series *mockSeries |
||||
}{ |
||||
{ |
||||
tableName: table1, |
||||
series: &mockSeries{ |
||||
userID: user1, |
||||
labels: mustParseLabel(lblFooBar), |
||||
chunks: buildChunks(0, maxChunksPerSegment+1), |
||||
}, |
||||
}, |
||||
}, |
||||
expectedManifest: manifest{ |
||||
Requests: []DeleteRequest{ |
||||
{ |
||||
RequestID: req1, |
||||
Query: lblFooBar, |
||||
StartTime: 0, |
||||
EndTime: maxChunksPerSegment + 1, |
||||
}, |
||||
}, |
||||
SegmentsCount: 2, |
||||
ChunksCount: maxChunksPerSegment + 1, |
||||
}, |
||||
expectedSegments: []segment{ |
||||
{ |
||||
UserID: user1, |
||||
TableName: table1, |
||||
ChunksGroups: []ChunksGroup{ |
||||
{ |
||||
Requests: []DeleteRequest{ |
||||
{ |
||||
RequestID: req1, |
||||
Query: lblFooBar, |
||||
StartTime: 0, |
||||
EndTime: maxChunksPerSegment + 1, |
||||
}, |
||||
}, |
||||
Chunks: buildChunks(0, maxChunksPerSegment), |
||||
}, |
||||
}, |
||||
ChunksCount: maxChunksPerSegment, |
||||
}, |
||||
{ |
||||
UserID: user1, |
||||
TableName: table1, |
||||
ChunksGroups: []ChunksGroup{ |
||||
{ |
||||
Requests: []DeleteRequest{ |
||||
{ |
||||
RequestID: req1, |
||||
Query: lblFooBar, |
||||
StartTime: 0, |
||||
EndTime: maxChunksPerSegment + 1, |
||||
}, |
||||
}, |
||||
Chunks: buildChunks(maxChunksPerSegment, 1), |
||||
}, |
||||
}, |
||||
ChunksCount: 1, |
||||
}, |
||||
}, |
||||
}, |
||||
{ |
||||
name: "single user with multiple segments due to multiple tables having chunks to delete", |
||||
deleteRequests: []DeleteRequest{ |
||||
{ |
||||
UserID: user1, |
||||
RequestID: req1, |
||||
Query: lblFooBar, |
||||
StartTime: 0, |
||||
EndTime: 100, |
||||
}, |
||||
}, |
||||
series: []struct { |
||||
tableName string |
||||
series *mockSeries |
||||
}{ |
||||
{ |
||||
tableName: table1, |
||||
series: &mockSeries{ |
||||
userID: user1, |
||||
labels: mustParseLabel(lblFooBar), |
||||
chunks: buildChunks(0, 50), |
||||
}, |
||||
}, |
||||
{ |
||||
tableName: table2, |
||||
series: &mockSeries{ |
||||
userID: user1, |
||||
labels: mustParseLabel(lblFooBar), |
||||
chunks: buildChunks(50, 50), |
||||
}, |
||||
}, |
||||
}, |
||||
expectedManifest: manifest{ |
||||
Requests: []DeleteRequest{ |
||||
{ |
||||
RequestID: req1, |
||||
Query: lblFooBar, |
||||
StartTime: 0, |
||||
EndTime: 100, |
||||
}, |
||||
}, |
||||
SegmentsCount: 2, |
||||
ChunksCount: 100, |
||||
}, |
||||
expectedSegments: []segment{ |
||||
{ |
||||
UserID: user1, |
||||
TableName: table1, |
||||
ChunksGroups: []ChunksGroup{ |
||||
{ |
||||
Requests: []DeleteRequest{ |
||||
{ |
||||
RequestID: req1, |
||||
Query: lblFooBar, |
||||
StartTime: 0, |
||||
EndTime: 100, |
||||
}, |
||||
}, |
||||
Chunks: buildChunks(0, 50), |
||||
}, |
||||
}, |
||||
ChunksCount: 50, |
||||
}, |
||||
{ |
||||
UserID: user1, |
||||
TableName: table2, |
||||
ChunksGroups: []ChunksGroup{ |
||||
{ |
||||
Requests: []DeleteRequest{ |
||||
{ |
||||
RequestID: req1, |
||||
Query: lblFooBar, |
||||
StartTime: 0, |
||||
EndTime: 100, |
||||
}, |
||||
}, |
||||
Chunks: buildChunks(50, 50), |
||||
}, |
||||
}, |
||||
ChunksCount: 50, |
||||
}, |
||||
}, |
||||
}, |
||||
{ |
||||
name: "multiple users with multiple segments", |
||||
deleteRequests: []DeleteRequest{ |
||||
{ |
||||
UserID: user1, |
||||
RequestID: req1, |
||||
Query: lblFooBar, |
||||
StartTime: 0, |
||||
EndTime: maxChunksPerSegment + 1, |
||||
}, |
||||
{ |
||||
UserID: user2, |
||||
RequestID: req2, |
||||
Query: lblFizzBuzz, |
||||
StartTime: 10, |
||||
EndTime: 10 + maxChunksPerSegment + 1, |
||||
}, |
||||
}, |
||||
series: []struct { |
||||
tableName string |
||||
series *mockSeries |
||||
}{ |
||||
{ |
||||
tableName: table1, |
||||
series: &mockSeries{ |
||||
userID: user1, |
||||
labels: mustParseLabel(lblFooBar), |
||||
chunks: buildChunks(0, maxChunksPerSegment+1), |
||||
}, |
||||
}, |
||||
{ |
||||
tableName: table1, |
||||
series: &mockSeries{ |
||||
userID: user2, |
||||
labels: mustParseLabel(lblFizzBuzz), |
||||
chunks: buildChunks(10, maxChunksPerSegment+1), |
||||
}, |
||||
}, |
||||
}, |
||||
expectedManifest: manifest{ |
||||
Requests: []DeleteRequest{ |
||||
{ |
||||
RequestID: req1, |
||||
Query: lblFooBar, |
||||
StartTime: 0, |
||||
EndTime: maxChunksPerSegment + 1, |
||||
}, |
||||
{ |
||||
RequestID: req2, |
||||
Query: lblFizzBuzz, |
||||
StartTime: 10, |
||||
EndTime: 10 + maxChunksPerSegment + 1, |
||||
}, |
||||
}, |
||||
SegmentsCount: 4, |
||||
ChunksCount: (maxChunksPerSegment + 1) * 2, |
||||
}, |
||||
expectedSegments: []segment{ |
||||
{ |
||||
UserID: user1, |
||||
TableName: table1, |
||||
ChunksGroups: []ChunksGroup{ |
||||
{ |
||||
Requests: []DeleteRequest{ |
||||
{ |
||||
RequestID: req1, |
||||
Query: lblFooBar, |
||||
StartTime: 0, |
||||
EndTime: maxChunksPerSegment + 1, |
||||
}, |
||||
}, |
||||
Chunks: buildChunks(0, maxChunksPerSegment), |
||||
}, |
||||
}, |
||||
ChunksCount: maxChunksPerSegment, |
||||
}, |
||||
{ |
||||
UserID: user1, |
||||
TableName: table1, |
||||
ChunksGroups: []ChunksGroup{ |
||||
{ |
||||
Requests: []DeleteRequest{ |
||||
{ |
||||
RequestID: req1, |
||||
Query: lblFooBar, |
||||
StartTime: 0, |
||||
EndTime: maxChunksPerSegment + 1, |
||||
}, |
||||
}, |
||||
Chunks: buildChunks(maxChunksPerSegment, 1), |
||||
}, |
||||
}, |
||||
ChunksCount: 1, |
||||
}, |
||||
{ |
||||
UserID: user2, |
||||
TableName: table1, |
||||
ChunksGroups: []ChunksGroup{ |
||||
{ |
||||
Requests: []DeleteRequest{ |
||||
{ |
||||
RequestID: req2, |
||||
Query: lblFizzBuzz, |
||||
StartTime: 10, |
||||
EndTime: 10 + maxChunksPerSegment + 1, |
||||
}, |
||||
}, |
||||
Chunks: buildChunks(10, maxChunksPerSegment), |
||||
}, |
||||
}, |
||||
ChunksCount: maxChunksPerSegment, |
||||
}, |
||||
{ |
||||
UserID: user2, |
||||
TableName: table1, |
||||
ChunksGroups: []ChunksGroup{ |
||||
{ |
||||
Requests: []DeleteRequest{ |
||||
{ |
||||
RequestID: req2, |
||||
Query: lblFizzBuzz, |
||||
StartTime: 10, |
||||
EndTime: 10 + maxChunksPerSegment + 1, |
||||
}, |
||||
}, |
||||
Chunks: buildChunks(10+maxChunksPerSegment, 1), |
||||
}, |
||||
}, |
||||
ChunksCount: 1, |
||||
}, |
||||
}, |
||||
}, |
||||
{ |
||||
name: "multiple delete requests covering same chunks", |
||||
deleteRequests: []DeleteRequest{ |
||||
{ |
||||
UserID: user1, |
||||
RequestID: req1, |
||||
Query: lblFooBar, |
||||
StartTime: 0, |
||||
EndTime: 100, |
||||
}, |
||||
{ |
||||
UserID: user1, |
||||
RequestID: req2, |
||||
Query: lblFizzBuzz, |
||||
StartTime: 51, |
||||
EndTime: 100, |
||||
}, |
||||
}, |
||||
series: []struct { |
||||
tableName string |
||||
series *mockSeries |
||||
}{ |
||||
{ |
||||
tableName: table1, |
||||
series: &mockSeries{ |
||||
userID: user1, |
||||
labels: mustParseLabel(lblFooBarAndFizzBuzz), |
||||
chunks: buildChunks(25, 50), |
||||
}, |
||||
}, |
||||
}, |
||||
expectedManifest: manifest{ |
||||
Requests: []DeleteRequest{ |
||||
{ |
||||
RequestID: req1, |
||||
Query: lblFooBar, |
||||
StartTime: 0, |
||||
EndTime: 100, |
||||
}, |
||||
{ |
||||
RequestID: req2, |
||||
Query: lblFizzBuzz, |
||||
StartTime: 51, |
||||
EndTime: 100, |
||||
}, |
||||
}, |
||||
SegmentsCount: 1, |
||||
ChunksCount: 50, |
||||
}, |
||||
expectedSegments: []segment{ |
||||
{ |
||||
UserID: user1, |
||||
TableName: table1, |
||||
ChunksGroups: []ChunksGroup{ |
||||
{ |
||||
Requests: []DeleteRequest{ |
||||
{ |
||||
RequestID: req1, |
||||
Query: lblFooBar, |
||||
StartTime: 0, |
||||
EndTime: 100, |
||||
}, |
||||
}, |
||||
Chunks: buildChunks(25, 25), |
||||
}, |
||||
{ |
||||
Requests: []DeleteRequest{ |
||||
{ |
||||
RequestID: req1, |
||||
Query: lblFooBar, |
||||
StartTime: 0, |
||||
EndTime: 100, |
||||
}, |
||||
{ |
||||
RequestID: req2, |
||||
Query: lblFizzBuzz, |
||||
StartTime: 51, |
||||
EndTime: 100, |
||||
}, |
||||
}, |
||||
Chunks: buildChunks(50, 25), |
||||
}, |
||||
}, |
||||
|
||||
ChunksCount: 50, |
||||
}, |
||||
}, |
||||
}, |
||||
} |
||||
|
||||
for _, tc := range tests { |
||||
t.Run(tc.name, func(t *testing.T) { |
||||
tempDir := t.TempDir() |
||||
ctx := context.Background() |
||||
objectClient, err := local.NewFSObjectClient(local.FSConfig{ |
||||
Directory: tempDir, |
||||
}) |
||||
require.NoError(t, err) |
||||
|
||||
// Create delete request batch
|
||||
batch := newDeleteRequestBatch(nil) |
||||
for _, req := range tc.deleteRequests { |
||||
batch.addDeleteRequest(&req) |
||||
} |
||||
|
||||
// Create builder
|
||||
builder, err := newDeletionManifestBuilder(objectClient, *batch) |
||||
require.NoError(t, err) |
||||
|
||||
// Process series
|
||||
for _, s := range tc.series { |
||||
err := builder.AddSeries(ctx, s.tableName, s.series) |
||||
require.NoError(t, err) |
||||
} |
||||
|
||||
// Finish and validate
|
||||
err = builder.Finish(ctx) |
||||
require.NoError(t, err) |
||||
|
||||
require.Equal(t, tc.expectedManifest.SegmentsCount, builder.segmentsCount) |
||||
require.Equal(t, tc.expectedManifest.ChunksCount, builder.overallChunksCount) |
||||
|
||||
reader, _, err := builder.deleteStoreClient.GetObject(context.Background(), builder.buildObjectKey(manifestFileName)) |
||||
require.NoError(t, err) |
||||
|
||||
manifestJSON, err := io.ReadAll(reader) |
||||
require.NoError(t, err) |
||||
require.NoError(t, reader.Close()) |
||||
|
||||
var manifest manifest |
||||
require.NoError(t, json.Unmarshal(manifestJSON, &manifest)) |
||||
slices.SortFunc(manifest.Requests, func(a, b DeleteRequest) int { |
||||
return strings.Compare(a.RequestID, b.RequestID) |
||||
}) |
||||
|
||||
require.Equal(t, tc.expectedManifest, manifest) |
||||
|
||||
for i := 0; i < tc.expectedManifest.SegmentsCount; i++ { |
||||
reader, _, err := builder.deleteStoreClient.GetObject(context.Background(), builder.buildObjectKey(fmt.Sprintf("%d.json", i+1))) |
||||
require.NoError(t, err) |
||||
|
||||
segmentJSON, err := io.ReadAll(reader) |
||||
require.NoError(t, err) |
||||
require.NoError(t, reader.Close()) |
||||
|
||||
var segment segment |
||||
require.NoError(t, json.Unmarshal(segmentJSON, &segment)) |
||||
|
||||
slices.SortFunc(segment.ChunksGroups, func(a, b ChunksGroup) int { |
||||
switch { |
||||
case len(a.Requests) < len(b.Requests): |
||||
return -1 |
||||
case len(a.Requests) > len(b.Requests): |
||||
return 1 |
||||
default: |
||||
return 0 |
||||
} |
||||
}) |
||||
require.Equal(t, tc.expectedSegments[i], segment) |
||||
} |
||||
}) |
||||
} |
||||
} |
||||
|
||||
func TestDeletionManifestBuilder_Errors(t *testing.T) { |
||||
tempDir := t.TempDir() |
||||
ctx := context.Background() |
||||
objectClient, err := local.NewFSObjectClient(local.FSConfig{ |
||||
Directory: tempDir, |
||||
}) |
||||
require.NoError(t, err) |
||||
|
||||
// Create delete request batch
|
||||
batch := newDeleteRequestBatch(nil) |
||||
batch.addDeleteRequest(&DeleteRequest{ |
||||
UserID: user1, |
||||
RequestID: req1, |
||||
Query: lblFooBar, |
||||
StartTime: 0, |
||||
EndTime: 100, |
||||
}) |
||||
|
||||
// Create builder
|
||||
builder, err := newDeletionManifestBuilder(objectClient, *batch) |
||||
require.NoError(t, err) |
||||
|
||||
err = builder.AddSeries(ctx, table1, &mockSeries{ |
||||
userID: user2, |
||||
labels: mustParseLabel(lblFooBar), |
||||
chunks: buildChunks(0, 25), |
||||
}) |
||||
require.EqualError(t, err, fmt.Sprintf("no requests loaded for user: %s", user2)) |
||||
|
||||
err = builder.Finish(ctx) |
||||
require.EqualError(t, err, ErrNoChunksSelectedForDeletion.Error()) |
||||
} |
Loading…
Reference in new issue