Invalidate caches on delete (#5661)

* Generate cache invalidation numbers in the delete store

* Get cache generation numbers from the store on request

* changlog

* rename tombstones to something more meaningful

* User invisible module

* query frontend relies on a compactor to get the cache generation number

* fix serialization

* source -> name

* lint errors

* lint errors

* log non-200 responses

* add jsonnet changes

* lint

* review feedback

* review feedback

* client rename
pull/5719/head^2
Travis Patterson 4 years ago committed by GitHub
parent 3fa6cc9fde
commit 9cef86b162
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 1
      CHANGELOG.md
  2. 9
      docs/sources/configuration/_index.md
  3. 42
      pkg/loki/modules.go
  4. 3
      pkg/lokifrontend/config.go
  5. 4
      pkg/querier/queryrange/limits_test.go
  6. 6
      pkg/querier/queryrange/roundtrip.go
  7. 20
      pkg/querier/queryrange/roundtrip_test.go
  8. 2
      pkg/storage/stores/shipper/compactor/compactor.go
  9. 49
      pkg/storage/stores/shipper/compactor/deletion/delete_requests_manager_test.go
  10. 46
      pkg/storage/stores/shipper/compactor/deletion/delete_requests_store.go
  11. 24
      pkg/storage/stores/shipper/compactor/deletion/delete_requests_store_test.go
  12. 8
      pkg/storage/stores/shipper/compactor/deletion/noop_delete_requests_store.go
  13. 22
      pkg/storage/stores/shipper/compactor/deletion/request_handler.go
  14. 81
      pkg/storage/stores/shipper/compactor/generationnumber/gennumber_client.go
  15. 40
      pkg/storage/stores/shipper/compactor/generationnumber/gennumber_client_test.go
  16. 153
      pkg/storage/stores/shipper/compactor/generationnumber/gennumber_loader.go
  17. 54
      pkg/storage/stores/shipper/compactor/generationnumber/gennumber_loader_test.go
  18. 36
      pkg/storage/stores/shipper/compactor/generationnumber/metrics.go
  19. 4
      production/ksonnet/loki/boltdb_shipper.libsonnet
  20. 1
      production/ksonnet/loki/config.libsonnet

@ -178,6 +178,7 @@ to include only the most relevant.
* [5543](https://github.com/grafana/loki/pull/5543) **cyriltovena**: update loki go version to 1.17.8
* [5450](https://github.com/grafana/loki/pull/5450) **BenoitKnecht**: pkg/ruler/base: Add external_labels option
* [5484](https://github.com/grafana/loki/pull/5450) **sandeepsukhani**: Add support for per user index query readiness with limits overrides
* [5661](https://github.com/grafana/loki/pull/5450) **masslessparticle**: Invalidate caches on deletes
* [5358](https://github.com/grafana/loki/pull/5358) **DylanGuedes**: Add `RingMode` support to `IndexGateway`
* [5435](https://github.com/grafana/loki/pull/5435) **slim-bean**: set match_max_concurrent true by default
* [5361](https://github.com/grafana/loki/pull/5361) **cyriltovena**: Add usage report into Loki.

@ -370,6 +370,10 @@ The `frontend` block configures the Loki query-frontend.
# CLI flag: -frontend.downstream-url
[downstream_url: <string> | default = ""]
# Address, including port, where the compactor api is served
# CLI flag: -frontend.compactor-address
[compactor_address: <string> | default = ""]
# Log queries that are slower than the specified duration. Set to 0 to disable.
# Set to < 0 to enable on all queries.
# CLI flag: -frontend.log-queries-longer-than
@ -2041,6 +2045,11 @@ compacts index shards to more performant forms.
# CLI flag: -boltdb.shipper.compactor.delete-request-cancel-period
[delete_request_cancel_period: <duration> | default = 24h]
# Which deletion mode to use. Supported values are: disabled,
# whole-stream-deletion, filter-only, filter-and-delete
# CLI flag: -boltdb.shipper.compactor.deletion-mode
[deletion_mode: <string> | default = "whole-stream-deletion"]
# Maximum number of tables to compact in parallel.
# While increasing this value, please make sure compactor has enough disk space
# allocated to be able to store and compact as many tables.

@ -10,18 +10,17 @@ import (
"os"
"time"
"github.com/grafana/dskit/kv"
"github.com/grafana/dskit/tenant"
gerrors "github.com/pkg/errors"
"github.com/NYTimes/gziphandler"
"github.com/go-kit/log"
"github.com/go-kit/log/level"
"github.com/grafana/dskit/kv"
"github.com/grafana/dskit/kv/codec"
"github.com/grafana/dskit/kv/memberlist"
"github.com/grafana/dskit/ring"
"github.com/grafana/dskit/runtimeconfig"
"github.com/grafana/dskit/services"
"github.com/grafana/dskit/tenant"
gerrors "github.com/pkg/errors"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
"github.com/prometheus/common/version"
@ -53,6 +52,7 @@ import (
"github.com/grafana/loki/pkg/storage/stores/shipper"
"github.com/grafana/loki/pkg/storage/stores/shipper/compactor"
"github.com/grafana/loki/pkg/storage/stores/shipper/compactor/deletion"
"github.com/grafana/loki/pkg/storage/stores/shipper/compactor/generationnumber"
"github.com/grafana/loki/pkg/storage/stores/shipper/indexgateway"
"github.com/grafana/loki/pkg/storage/stores/shipper/indexgateway/indexgatewaypb"
"github.com/grafana/loki/pkg/storage/stores/shipper/uploads"
@ -494,11 +494,17 @@ func (disabledShuffleShardingLimits) MaxQueriersPerUser(userID string) int { ret
func (t *Loki) initQueryFrontendTripperware() (_ services.Service, err error) {
level.Debug(util_log.Logger).Log("msg", "initializing query frontend tripperware")
cacheGenClient, err := t.cacheGenClient()
if err != nil {
return nil, err
}
tripperware, stopper, err := queryrange.NewTripperware(
t.Cfg.QueryRange,
util_log.Logger,
t.overrides,
t.Cfg.SchemaConfig,
generationnumber.NewGenNumberLoader(cacheGenClient, prometheus.DefaultRegisterer),
prometheus.DefaultRegisterer,
)
if err != nil {
@ -510,6 +516,29 @@ func (t *Loki) initQueryFrontendTripperware() (_ services.Service, err error) {
return services.NewIdleService(nil, nil), nil
}
func (t *Loki) cacheGenClient() (generationnumber.CacheGenClient, error) {
filteringEnabled, err := deletion.FilteringEnabled(t.Cfg.CompactorConfig.DeletionMode)
if err != nil {
return nil, err
}
if !filteringEnabled {
return deletion.NewNoOpDeleteRequestsStore(), nil
}
compactorAddress := t.Cfg.Frontend.CompactorAddress
if t.Cfg.isModuleEnabled(All) || t.Cfg.isModuleEnabled(Read) {
// In single binary or read modes, this module depends on Server
compactorAddress = fmt.Sprintf("http://127.0.0.1:%d", t.Cfg.Server.HTTPListenPort)
}
if compactorAddress == "" {
return nil, errors.New("query filtering for deletes requires 'compactor_address' to be configured")
}
return generationnumber.NewGenNumberClient(compactorAddress, &http.Client{Timeout: 5 * time.Second})
}
func (t *Loki) initQueryFrontend() (_ services.Service, err error) {
level.Debug(util_log.Logger).Log("msg", "initializing query frontend", "config", fmt.Sprintf("%+v", t.Cfg.Frontend))
@ -766,11 +795,12 @@ func (t *Loki) initCompactor() (services.Service, error) {
t.Server.HTTP.Path("/compactor/ring").Methods("GET", "POST").Handler(t.compactor)
// TODO: update this when the other deletion modes are available
if t.Cfg.CompactorConfig.RetentionEnabled && t.compactor.DeleteMode() == deletion.WholeStreamDeletion {
if t.Cfg.CompactorConfig.RetentionEnabled && t.compactor.DeleteMode() != deletion.Disabled {
t.Server.HTTP.Path("/loki/api/v1/delete").Methods("PUT", "POST").Handler(t.HTTPAuthMiddleware.Wrap(http.HandlerFunc(t.compactor.DeleteRequestsHandler.AddDeleteRequestHandler)))
t.Server.HTTP.Path("/loki/api/v1/delete").Methods("GET").Handler(t.HTTPAuthMiddleware.Wrap(http.HandlerFunc(t.compactor.DeleteRequestsHandler.GetAllDeleteRequestsHandler)))
t.Server.HTTP.Path("/loki/api/v1/delete").Methods("DELETE").Handler(t.HTTPAuthMiddleware.Wrap(http.HandlerFunc(t.compactor.DeleteRequestsHandler.CancelDeleteRequestHandler)))
t.Server.HTTP.Path("/loki/api/v1/cache/generation_numbers").Methods("GET").Handler(t.HTTPAuthMiddleware.Wrap(http.HandlerFunc(t.compactor.DeleteRequestsHandler.GetCacheGenerationNumberHandler)))
}
return t.compactor, nil

@ -15,6 +15,7 @@ type Config struct {
CompressResponses bool `yaml:"compress_responses"`
DownstreamURL string `yaml:"downstream_url"`
CompactorAddress string `yaml:"compactor_address"`
TailProxyURL string `yaml:"tail_proxy_url"`
}
@ -27,6 +28,6 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
f.BoolVar(&cfg.CompressResponses, "querier.compress-http-responses", false, "Compress HTTP responses.")
f.StringVar(&cfg.DownstreamURL, "frontend.downstream-url", "", "URL of downstream Prometheus.")
f.StringVar(&cfg.CompactorAddress, "frontend.compactor-address", "", "host and port where the compactor API is listening")
f.StringVar(&cfg.TailProxyURL, "frontend.tail-proxy-url", "", "URL of querier for tail proxy.")
}

@ -52,7 +52,7 @@ func Test_seriesLimiter(t *testing.T) {
cfg.CacheResults = false
// split in 7 with 2 in // max.
l := WithSplitByLimits(fakeLimits{maxSeries: 1, maxQueryParallelism: 2}, time.Hour)
tpw, stopper, err := NewTripperware(cfg, util_log.Logger, l, config.SchemaConfig{}, nil)
tpw, stopper, err := NewTripperware(cfg, util_log.Logger, l, config.SchemaConfig{}, nil, nil)
if stopper != nil {
defer stopper.Stop()
}
@ -237,7 +237,7 @@ func Test_MaxQueryLookBack(t *testing.T) {
tpw, stopper, err := NewTripperware(testConfig, util_log.Logger, fakeLimits{
maxQueryLookback: 1 * time.Hour,
maxQueryParallelism: 1,
}, config.SchemaConfig{}, nil)
}, config.SchemaConfig{}, nil, nil)
if stopper != nil {
defer stopper.Stop()
}

@ -42,6 +42,7 @@ func NewTripperware(
log log.Logger,
limits Limits,
schema config.SchemaConfig,
cacheGenNumLoader queryrangebase.CacheGenNumberLoader,
registerer prometheus.Registerer,
) (queryrangebase.Tripperware, Stopper, error) {
metrics := NewMetrics(registerer)
@ -61,7 +62,7 @@ func NewTripperware(
}
metricsTripperware, err := NewMetricTripperware(cfg, log, limits, schema, LokiCodec, c,
PrometheusExtractor{}, metrics, registerer)
cacheGenNumLoader, PrometheusExtractor{}, metrics, registerer)
if err != nil {
return nil, nil, err
}
@ -387,6 +388,7 @@ func NewMetricTripperware(
schema config.SchemaConfig,
codec queryrangebase.Codec,
c cache.Cache,
cacheGenNumLoader queryrangebase.CacheGenNumberLoader,
extractor queryrangebase.Extractor,
metrics *Metrics,
registerer prometheus.Registerer,
@ -414,7 +416,7 @@ func NewMetricTripperware(
limits,
codec,
extractor,
nil,
cacheGenNumLoader,
func(r queryrangebase.Request) bool {
return !r.GetCachingOptions().Disabled
},

@ -109,7 +109,7 @@ var (
// those tests are mostly for testing the glue between all component and make sure they activate correctly.
func TestMetricsTripperware(t *testing.T) {
l := WithSplitByLimits(fakeLimits{maxSeries: math.MaxInt32, maxQueryParallelism: 1}, 4*time.Hour)
tpw, stopper, err := NewTripperware(testConfig, util_log.Logger, l, config.SchemaConfig{}, nil)
tpw, stopper, err := NewTripperware(testConfig, util_log.Logger, l, config.SchemaConfig{}, nil, nil)
if stopper != nil {
defer stopper.Stop()
}
@ -172,7 +172,7 @@ func TestMetricsTripperware(t *testing.T) {
}
func TestLogFilterTripperware(t *testing.T) {
tpw, stopper, err := NewTripperware(testConfig, util_log.Logger, fakeLimits{maxQueryParallelism: 1}, config.SchemaConfig{}, nil)
tpw, stopper, err := NewTripperware(testConfig, util_log.Logger, fakeLimits{maxQueryParallelism: 1}, config.SchemaConfig{}, nil, nil)
if stopper != nil {
defer stopper.Stop()
}
@ -221,7 +221,7 @@ func TestLogFilterTripperware(t *testing.T) {
func TestInstantQueryTripperware(t *testing.T) {
testShardingConfig := testConfig
testShardingConfig.ShardedQueries = true
tpw, stopper, err := NewTripperware(testShardingConfig, util_log.Logger, fakeLimits{maxQueryParallelism: 1}, config.SchemaConfig{}, nil)
tpw, stopper, err := NewTripperware(testShardingConfig, util_log.Logger, fakeLimits{maxQueryParallelism: 1}, config.SchemaConfig{}, nil, nil)
if stopper != nil {
defer stopper.Stop()
}
@ -257,7 +257,7 @@ func TestInstantQueryTripperware(t *testing.T) {
}
func TestSeriesTripperware(t *testing.T) {
tpw, stopper, err := NewTripperware(testConfig, util_log.Logger, fakeLimits{maxQueryLength: 48 * time.Hour, maxQueryParallelism: 1}, config.SchemaConfig{}, nil)
tpw, stopper, err := NewTripperware(testConfig, util_log.Logger, fakeLimits{maxQueryLength: 48 * time.Hour, maxQueryParallelism: 1}, config.SchemaConfig{}, nil, nil)
if stopper != nil {
defer stopper.Stop()
}
@ -298,7 +298,7 @@ func TestSeriesTripperware(t *testing.T) {
}
func TestLabelsTripperware(t *testing.T) {
tpw, stopper, err := NewTripperware(testConfig, util_log.Logger, fakeLimits{maxQueryLength: 48 * time.Hour, maxQueryParallelism: 1}, config.SchemaConfig{}, nil)
tpw, stopper, err := NewTripperware(testConfig, util_log.Logger, fakeLimits{maxQueryLength: 48 * time.Hour, maxQueryParallelism: 1}, config.SchemaConfig{}, nil, nil)
if stopper != nil {
defer stopper.Stop()
}
@ -344,7 +344,7 @@ func TestLabelsTripperware(t *testing.T) {
}
func TestLogNoRegex(t *testing.T) {
tpw, stopper, err := NewTripperware(testConfig, util_log.Logger, fakeLimits{}, config.SchemaConfig{}, nil)
tpw, stopper, err := NewTripperware(testConfig, util_log.Logger, fakeLimits{}, config.SchemaConfig{}, nil, nil)
if stopper != nil {
defer stopper.Stop()
}
@ -378,7 +378,7 @@ func TestLogNoRegex(t *testing.T) {
}
func TestUnhandledPath(t *testing.T) {
tpw, stopper, err := NewTripperware(testConfig, util_log.Logger, fakeLimits{}, config.SchemaConfig{}, nil)
tpw, stopper, err := NewTripperware(testConfig, util_log.Logger, fakeLimits{}, config.SchemaConfig{}, nil, nil)
if stopper != nil {
defer stopper.Stop()
}
@ -403,7 +403,7 @@ func TestUnhandledPath(t *testing.T) {
func TestRegexpParamsSupport(t *testing.T) {
l := WithSplitByLimits(fakeLimits{maxSeries: 1, maxQueryParallelism: 2}, 4*time.Hour)
tpw, stopper, err := NewTripperware(testConfig, util_log.Logger, l, config.SchemaConfig{}, nil)
tpw, stopper, err := NewTripperware(testConfig, util_log.Logger, l, config.SchemaConfig{}, nil, nil)
if stopper != nil {
defer stopper.Stop()
}
@ -486,7 +486,7 @@ func TestPostQueries(t *testing.T) {
}
func TestEntriesLimitsTripperware(t *testing.T) {
tpw, stopper, err := NewTripperware(testConfig, util_log.Logger, fakeLimits{maxEntriesLimitPerQuery: 5000}, config.SchemaConfig{}, nil)
tpw, stopper, err := NewTripperware(testConfig, util_log.Logger, fakeLimits{maxEntriesLimitPerQuery: 5000}, config.SchemaConfig{}, nil, nil)
if stopper != nil {
defer stopper.Stop()
}
@ -517,7 +517,7 @@ func TestEntriesLimitsTripperware(t *testing.T) {
}
func TestEntriesLimitWithZeroTripperware(t *testing.T) {
tpw, stopper, err := NewTripperware(testConfig, util_log.Logger, fakeLimits{}, config.SchemaConfig{}, nil)
tpw, stopper, err := NewTripperware(testConfig, util_log.Logger, fakeLimits{}, config.SchemaConfig{}, nil, nil)
if stopper != nil {
defer stopper.Stop()
}

@ -229,7 +229,7 @@ func (c *Compactor) init(storageConfig storage.Config, schemaConfig config.Schem
return err
}
if c.deleteMode == deletion.WholeStreamDeletion {
if c.deleteMode != deletion.Disabled {
deletionWorkDir := filepath.Join(c.cfg.WorkingDirectory, "deletion")
c.deleteRequestsStore, err = deletion.NewDeleteStore(deletionWorkDir, c.indexStorageClient)

@ -14,46 +14,6 @@ import (
const testUserID = "test-user"
type mockDeleteRequestsStore struct {
deleteRequests []DeleteRequest
}
func (m mockDeleteRequestsStore) GetDeleteRequestsByStatus(ctx context.Context, status DeleteRequestStatus) ([]DeleteRequest, error) {
return m.deleteRequests, nil
}
func (m mockDeleteRequestsStore) UpdateStatus(ctx context.Context, userID, requestID string, newStatus DeleteRequestStatus) error {
return nil
}
func (m mockDeleteRequestsStore) AddDeleteRequest(ctx context.Context, userID string, startTime, endTime model.Time, query string) error {
panic("implement me")
}
func (m mockDeleteRequestsStore) GetDeleteRequestsForUserByStatus(ctx context.Context, userID string, status DeleteRequestStatus) ([]DeleteRequest, error) {
panic("implement me")
}
func (m mockDeleteRequestsStore) GetAllDeleteRequestsForUser(ctx context.Context, userID string) ([]DeleteRequest, error) {
panic("implement me")
}
func (m mockDeleteRequestsStore) GetDeleteRequest(ctx context.Context, userID, requestID string) (*DeleteRequest, error) {
panic("implement me")
}
func (m mockDeleteRequestsStore) GetPendingDeleteRequestsForUser(ctx context.Context, userID string) ([]DeleteRequest, error) {
panic("implement me")
}
func (m mockDeleteRequestsStore) RemoveDeleteRequest(ctx context.Context, userID, requestID string, createdAt, startTime, endTime model.Time) error {
panic("implement me")
}
func (m mockDeleteRequestsStore) Stop() {
panic("implement me")
}
func TestDeleteRequestsManager_Expired(t *testing.T) {
type resp struct {
isExpired bool
@ -256,3 +216,12 @@ func TestDeleteRequestsManager_Expired(t *testing.T) {
})
}
}
type mockDeleteRequestsStore struct {
DeleteRequestsStore
deleteRequests []DeleteRequest
}
func (m mockDeleteRequestsStore) GetDeleteRequestsByStatus(_ context.Context, _ DeleteRequestStatus) ([]DeleteRequest, error) {
return m.deleteRequests, nil
}

@ -12,6 +12,8 @@ import (
"time"
"unsafe"
"github.com/weaveworks/common/user"
"github.com/prometheus/common/model"
"github.com/grafana/loki/pkg/storage/stores/series/index"
@ -29,6 +31,7 @@ const (
deleteRequestID indexType = "1"
deleteRequestDetails indexType = "2"
cacheGenNum indexType = "3"
tempFileSuffix = ".temp"
DeleteRequestsTableName = "delete_requests"
@ -43,7 +46,9 @@ type DeleteRequestsStore interface {
UpdateStatus(ctx context.Context, userID, requestID string, newStatus DeleteRequestStatus) error
GetDeleteRequest(ctx context.Context, userID, requestID string) (*DeleteRequest, error)
RemoveDeleteRequest(ctx context.Context, userID, requestID string, createdAt, startTime, endTime model.Time) error
GetCacheGenerationNumber(ctx context.Context, userID string) (string, error)
Stop()
Name() string
}
// deleteRequestsStore provides all the methods required to manage lifecycle of delete request and things related to it.
@ -106,6 +111,9 @@ func (ds *deleteRequestsStore) addDeleteRequest(ctx context.Context, userID stri
writeBatch.Add(DeleteRequestsTableName, fmt.Sprintf("%s:%s", deleteRequestDetails, userIDAndRequestID),
[]byte(rangeValue), []byte(query))
// create a gen number for this result
writeBatch.Add(DeleteRequestsTableName, fmt.Sprintf("%s:%s", cacheGenNum, userID), []byte{}, generateCacheGenNumber())
err := ds.indexClient.BatchWrite(ctx, writeBatch)
if err != nil {
return nil, err
@ -139,6 +147,11 @@ func (ds *deleteRequestsStore) UpdateStatus(ctx context.Context, userID, request
writeBatch := ds.indexClient.NewWriteBatch()
writeBatch.Add(DeleteRequestsTableName, string(deleteRequestID), []byte(userIDAndRequestID), []byte(newStatus))
if newStatus == StatusProcessed {
// remove runtime filtering for deleted data
writeBatch.Add(DeleteRequestsTableName, fmt.Sprintf("%s:%s", cacheGenNum, userID), []byte{}, generateCacheGenNumber())
}
return ds.indexClient.BatchWrite(ctx, writeBatch)
}
@ -162,6 +175,27 @@ func (ds *deleteRequestsStore) GetDeleteRequest(ctx context.Context, userID, req
return &deleteRequests[0], nil
}
func (ds *deleteRequestsStore) GetCacheGenerationNumber(ctx context.Context, userID string) (string, error) {
query := index.Query{TableName: DeleteRequestsTableName, HashValue: fmt.Sprintf("%s:%s", cacheGenNum, userID)}
ctx = user.InjectOrgID(ctx, userID)
genNumber := ""
err := ds.indexClient.QueryPages(ctx, []index.Query{query}, func(query index.Query, batch index.ReadBatchResult) (shouldContinue bool) {
itr := batch.Iterator()
for itr.Next() {
genNumber = string(itr.Value())
break
}
return false
})
if err != nil {
return "", err
}
return genNumber, nil
}
func (ds *deleteRequestsStore) queryDeleteRequests(ctx context.Context, deleteQuery index.Query) ([]DeleteRequest, error) {
deleteRequests := []DeleteRequest{}
// No need to lock inside the callback since we run a single index query.
@ -234,9 +268,17 @@ func (ds *deleteRequestsStore) RemoveDeleteRequest(ctx context.Context, userID,
writeBatch.Delete(DeleteRequestsTableName, fmt.Sprintf("%s:%s", deleteRequestDetails, userIDAndRequestID),
[]byte(rangeValue))
// ensure caches are invalidated
writeBatch.Add(DeleteRequestsTableName, fmt.Sprintf("%s:%s", cacheGenNum, userID),
[]byte{}, []byte(strconv.FormatInt(time.Now().UnixNano(), 10)))
return ds.indexClient.BatchWrite(ctx, writeBatch)
}
func (ds *deleteRequestsStore) Name() string {
return "delete_requests_store"
}
func parseDeleteRequestTimestamps(rangeValue []byte, deleteRequest DeleteRequest) (DeleteRequest, error) {
hexParts := strings.Split(string(rangeValue), ":")
if len(hexParts) != 3 {
@ -299,3 +341,7 @@ func splitUserIDAndRequestID(rangeValue string) (userID, requestID string) {
func unsafeGetString(buf []byte) string {
return *((*string)(unsafe.Pointer(&buf)))
}
func generateCacheGenNumber() []byte {
return []byte(strconv.FormatInt(time.Now().UnixNano(), 10))
}

@ -99,6 +99,14 @@ func TestDeleteRequestsStore(t *testing.T) {
require.NoError(t, err)
compareRequests(t, user2ExpectedRequests, user2Requests)
createGenNumber, err := testDeleteRequestsStore.GetCacheGenerationNumber(context.Background(), user1)
require.NoError(t, err)
require.NotEmpty(t, createGenNumber)
createGenNumber2, err := testDeleteRequestsStore.GetCacheGenerationNumber(context.Background(), user2)
require.NoError(t, err)
require.NotEmpty(t, createGenNumber2)
// get individual delete requests by id and see if they have expected values
for _, expectedRequest := range append(user1Requests, user2Requests...) {
actualRequest, err := testDeleteRequestsStore.GetDeleteRequest(context.Background(), expectedRequest.UserID, expectedRequest.RequestID)
@ -133,6 +141,14 @@ func TestDeleteRequestsStore(t *testing.T) {
require.NoError(t, err)
compareRequests(t, user2ExpectedRequests, user2Requests)
updateGenNumber, err := testDeleteRequestsStore.GetCacheGenerationNumber(context.Background(), user1)
require.NoError(t, err)
require.NotEqual(t, createGenNumber, updateGenNumber)
updateGenNumber2, err := testDeleteRequestsStore.GetCacheGenerationNumber(context.Background(), user2)
require.NoError(t, err)
require.NotEqual(t, createGenNumber2, updateGenNumber2)
// delete the requests from the store updated previously
var remainingRequests []DeleteRequest
for i := 0; i < len(user1ExpectedRequests); i++ {
@ -154,6 +170,14 @@ func TestDeleteRequestsStore(t *testing.T) {
deleteRequests, err = testDeleteRequestsStore.GetDeleteRequestsByStatus(context.Background(), StatusReceived)
require.NoError(t, err)
compareRequests(t, remainingRequests, deleteRequests)
deleteGenNumber, err := testDeleteRequestsStore.GetCacheGenerationNumber(context.Background(), user1)
require.NoError(t, err)
require.NotEqual(t, updateGenNumber, deleteGenNumber)
deleteGenNumber2, err := testDeleteRequestsStore.GetCacheGenerationNumber(context.Background(), user2)
require.NoError(t, err)
require.NotEqual(t, updateGenNumber2, deleteGenNumber2)
}
func compareRequests(t *testing.T, expected []DeleteRequest, actual []DeleteRequest) {

@ -36,4 +36,12 @@ func (d *noOpDeleteRequestsStore) RemoveDeleteRequest(ctx context.Context, userI
return nil
}
func (d *noOpDeleteRequestsStore) GetCacheGenerationNumber(ctx context.Context, userID string) (string, error) {
return "", nil
}
func (d *noOpDeleteRequestsStore) Stop() {}
func (d *noOpDeleteRequestsStore) Name() string {
return ""
}

@ -161,3 +161,25 @@ func (dm *DeleteRequestHandler) CancelDeleteRequestHandler(w http.ResponseWriter
w.WriteHeader(http.StatusNoContent)
}
// GetCacheGenerationNumberHandler handles requests for a user's cache generation number
func (dm *DeleteRequestHandler) GetCacheGenerationNumberHandler(w http.ResponseWriter, r *http.Request) {
ctx := r.Context()
userID, err := tenant.TenantID(ctx)
if err != nil {
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
cacheGenNumber, err := dm.deleteRequestsStore.GetCacheGenerationNumber(ctx, userID)
if err != nil {
level.Error(util_log.Logger).Log("msg", "error getting cache generation number", "err", err)
http.Error(w, fmt.Sprintf("error getting cache generation number %v", err), http.StatusInternalServerError)
return
}
if err := json.NewEncoder(w).Encode(cacheGenNumber); err != nil {
level.Error(util_log.Logger).Log("msg", "error marshalling response", "err", err)
http.Error(w, fmt.Sprintf("Error marshalling response: %v", err), http.StatusInternalServerError)
}
}

@ -0,0 +1,81 @@
package generationnumber
import (
"context"
"encoding/json"
"fmt"
"net/http"
"net/url"
"github.com/go-kit/log/level"
"github.com/grafana/loki/pkg/util/log"
)
const (
orgHeaderKey = "X-Scope-OrgID"
cacheGenNumPath = "/loki/api/v1/cache/generation_numbers"
)
type CacheGenClient interface {
GetCacheGenerationNumber(ctx context.Context, userID string) (string, error)
Name() string
}
type genNumberClient struct {
url string
httpClient doer
}
type doer interface {
Do(*http.Request) (*http.Response, error)
}
func NewGenNumberClient(addr string, c doer) (CacheGenClient, error) {
u, err := url.Parse(addr)
if err != nil {
level.Error(log.Logger).Log("msg", "error parsing url", "err", err)
return nil, err
}
u.Path = cacheGenNumPath
return &genNumberClient{
url: u.String(),
httpClient: c,
}, nil
}
func (c *genNumberClient) Name() string {
return "gen_number_client"
}
func (c *genNumberClient) GetCacheGenerationNumber(ctx context.Context, userID string) (string, error) {
req, err := http.NewRequestWithContext(ctx, http.MethodGet, c.url, nil)
if err != nil {
level.Error(log.Logger).Log("msg", "error getting cache gen numbers from the store", "err", err)
return "", err
}
req.Header.Set(orgHeaderKey, userID)
resp, err := c.httpClient.Do(req)
if err != nil {
level.Error(log.Logger).Log("msg", "error getting cache gen numbers from the store", "err", err)
return "", err
}
defer resp.Body.Close()
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
err := fmt.Errorf("unexpected status code: %d", resp.StatusCode)
level.Error(log.Logger).Log("msg", "error getting cache gen numbers from the store", "err", err)
return "", err
}
var genNumber string
if err := json.NewDecoder(resp.Body).Decode(&genNumber); err != nil {
level.Error(log.Logger).Log("msg", "error marshalling response", "err", err)
return "", err
}
return genNumber, err
}

@ -0,0 +1,40 @@
package generationnumber
import (
"context"
"io"
"net/http"
"strings"
"testing"
"github.com/stretchr/testify/require"
)
func TestGetCacheGenNumberForUser(t *testing.T) {
httpClient := &mockHTTPClient{ret: `"42"`}
client, err := NewGenNumberClient("http://test-server", httpClient)
require.Nil(t, err)
cacheGenNumber, err := client.GetCacheGenerationNumber(context.Background(), "userID")
require.Nil(t, err)
require.Equal(t, "42", cacheGenNumber)
require.Equal(t, "http://test-server/loki/api/v1/cache/generation_numbers", httpClient.req.URL.String())
require.Equal(t, http.MethodGet, httpClient.req.Method)
require.Equal(t, "userID", httpClient.req.Header.Get("X-Scope-OrgID"))
}
type mockHTTPClient struct {
ret string
req *http.Request
}
func (c *mockHTTPClient) Do(req *http.Request) (*http.Response, error) {
c.req = req
return &http.Response{
StatusCode: 200,
Body: io.NopCloser(strings.NewReader(c.ret)),
}, nil
}

@ -0,0 +1,153 @@
package generationnumber
import (
"context"
"fmt"
"strconv"
"sync"
"time"
"github.com/go-kit/log/level"
"github.com/grafana/loki/pkg/util/log"
"github.com/prometheus/client_golang/prometheus"
)
const reloadDuration = 5 * time.Minute
type GenNumberLoader struct {
numberGetter CacheGenClient
numbers map[string]string
quit chan struct{}
lock sync.RWMutex
metrics *genLoaderMetrics
}
func NewGenNumberLoader(g CacheGenClient, registerer prometheus.Registerer) *GenNumberLoader {
if g == nil {
g = &noopNumberGetter{}
}
l := &GenNumberLoader{
numberGetter: g,
numbers: make(map[string]string),
metrics: newGenLoaderMetrics(registerer),
}
go l.loop()
return l
}
func (l *GenNumberLoader) loop() {
timer := time.NewTicker(reloadDuration)
for {
select {
case <-timer.C:
err := l.reload()
if err != nil {
level.Error(log.Logger).Log("msg", "error reloading generation numbers", "err", err)
}
case <-l.quit:
return
}
}
}
func (l *GenNumberLoader) reload() error {
updatedGenNumbers, err := l.getUpdatedGenNumbers()
if err != nil {
return err
}
l.lock.Lock()
defer l.lock.Unlock()
for userID, genNumber := range updatedGenNumbers {
l.numbers[userID] = genNumber
}
return nil
}
func (l *GenNumberLoader) getUpdatedGenNumbers() (map[string]string, error) {
l.lock.RLock()
defer l.lock.RUnlock()
updatedGenNumbers := make(map[string]string)
for userID, oldGenNumber := range l.numbers {
genNumber, err := l.numberGetter.GetCacheGenerationNumber(context.Background(), userID)
if err != nil {
return nil, err
}
if oldGenNumber != genNumber {
updatedGenNumbers[userID] = genNumber
}
}
return updatedGenNumbers, nil
}
func (l *GenNumberLoader) GetResultsCacheGenNumber(tenantIDs []string) string {
return l.getCacheGenNumbersPerTenants(tenantIDs)
}
func (l *GenNumberLoader) getCacheGenNumbersPerTenants(tenantIDs []string) string {
var max int
for _, tenantID := range tenantIDs {
genNumber := l.getCacheGenNumber(tenantID)
if genNumber == "" {
continue
}
number, err := strconv.Atoi(genNumber)
if err != nil {
level.Error(log.Logger).Log("msg", "error parsing resultsCacheGenNumber", "user", tenantID, "err", err)
}
if number > max {
max = number
}
}
if max == 0 {
return ""
}
return fmt.Sprint(max)
}
func (l *GenNumberLoader) getCacheGenNumber(userID string) string {
l.lock.RLock()
if genNumber, ok := l.numbers[userID]; ok {
l.lock.RUnlock()
return genNumber
}
l.lock.RUnlock()
genNumber, err := l.numberGetter.GetCacheGenerationNumber(context.Background(), userID)
if err != nil {
level.Error(log.Logger).Log("msg", "error loading cache generation numbers", "err", err)
l.metrics.cacheGenLoadFailures.WithLabelValues(l.numberGetter.Name()).Inc()
return ""
}
l.lock.Lock()
defer l.lock.Unlock()
l.numbers[userID] = genNumber
return genNumber
}
func (l *GenNumberLoader) Stop() {
close(l.quit)
}
type noopNumberGetter struct{}
func (g *noopNumberGetter) GetCacheGenerationNumber(_ context.Context, _ string) (string, error) {
return "", nil
}
func (g *noopNumberGetter) Name() string {
return ""
}

@ -0,0 +1,54 @@
package generationnumber
import (
"context"
"testing"
"github.com/stretchr/testify/assert"
)
func TestGetCacheGenNumber(t *testing.T) {
s := &mockGenNumberClient{
genNumbers: map[string]string{
"tenant-a": "1000",
"tenant-b": "1050",
},
}
loader := NewGenNumberLoader(s, nil)
for _, tc := range []struct {
name string
expectedResultsCacheGenNumber string
tenantIDs []string
}{
{
name: "single tenant with numeric values",
tenantIDs: []string{"tenant-a"},
expectedResultsCacheGenNumber: "1000",
},
{
name: "multiple tenants with numeric values",
tenantIDs: []string{"tenant-a", "tenant-b"},
expectedResultsCacheGenNumber: "1050",
},
{
name: "no tenants", // not really an expected call, edge case check to avoid any panics
},
} {
t.Run(tc.name, func(t *testing.T) {
assert.Equal(t, tc.expectedResultsCacheGenNumber, loader.GetResultsCacheGenNumber(tc.tenantIDs))
})
}
}
type mockGenNumberClient struct {
genNumbers map[string]string
}
func (g *mockGenNumberClient) GetCacheGenerationNumber(ctx context.Context, userID string) (string, error) {
return g.genNumbers[userID], nil
}
func (g *mockGenNumberClient) Name() string {
return ""
}

@ -0,0 +1,36 @@
package generationnumber
import (
"github.com/prometheus/client_golang/prometheus"
)
// Make this package level because we want several instances of a loader to be able to report metrics
var metrics *genLoaderMetrics
type genLoaderMetrics struct {
cacheGenLoadFailures *prometheus.CounterVec
}
func newGenLoaderMetrics(r prometheus.Registerer) *genLoaderMetrics {
if metrics != nil {
return metrics
}
if r == nil {
return nil
}
cacheGenLoadFailures := prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: "loki",
Name: "delete_cache_gen_load_failures_total",
Help: "Total number of failures while loading cache generation number using gen number loader",
}, []string{"source"})
r.MustRegister(cacheGenLoadFailures)
metrics = &genLoaderMetrics{
cacheGenLoadFailures: cacheGenLoadFailures,
}
return metrics
}

@ -74,4 +74,8 @@
statefulSet.mixin.spec.updateStrategy.withType('RollingUpdate') +
statefulSet.mixin.spec.template.spec.securityContext.withFsGroup(10001) // 10001 is the group ID assigned to Loki in the Dockerfile
else {},
compactor_service: if $._config.using_boltdb_shipper then
k.util.serviceFor($.compactor_statefulset)
else {},
}

@ -156,6 +156,7 @@
frontend: {
compress_responses: true,
log_queries_longer_than: '5s',
compactor_address: 'http://compactor.%s.svc.cluster.local:%d' % [$._config.namespace, $._config.http_listen_port],
},
frontend_worker: {
match_max_concurrent: true,

Loading…
Cancel
Save