Encryption: Make DEKs cache TTL & cleanup interval configurable (#46042)

* Make DEKs cache TTL & cleanup interval configurable

* Improve 'data_keys_cache_ttl' setting description

* Fix test
pull/46667/head
Joan López de la Franca Beltran 3 years ago committed by GitHub
parent fb06804450
commit 2081f37e95
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 9
      conf/defaults.ini
  2. 9
      conf/sample.ini
  3. 71
      pkg/services/secrets/manager/cache.go
  4. 6
      pkg/services/secrets/manager/helpers.go
  5. 61
      pkg/services/secrets/manager/manager.go
  6. 34
      pkg/services/secrets/manager/manager_test.go

@ -295,6 +295,15 @@ content_security_policy_template = """script-src 'self' 'unsafe-eval' 'unsafe-in
# Controls if old angular plugins are supported or not. This will be disabled by default in Grafana v9.
angular_support_enabled = true
[security.encryption]
# Defines the time-to-live (TTL) for decrypted data encryption keys stored in memory (cache).
# Please note that small values may cause performance issues due to a high frequency decryption operations.
data_keys_cache_ttl = 15m
# Defines the frequency of data encryption keys cache cleanup interval.
# On every interval, decrypted data encryption keys that reached the TTL are removed from the cache.
data_keys_cache_cleanup_interval = 1m
#################################### Snapshots ###########################
[snapshots]
# snapshot sharing options

@ -295,6 +295,15 @@
# Controls if old angular plugins are supported or not. This will be disabled by default in Grafana v9.
;angular_support_enabled = true
[security.encryption]
# Defines the time-to-live (TTL) for decrypted data encryption keys stored in memory (cache).
# Please note that small values may cause performance issues due to a high frequency decryption operations.
;data_keys_cache_ttl = 15m
# Defines the frequency of data encryption keys cache cleanup interval.
# On every interval, decrypted data encryption keys that reached the TTL are removed from the cache.
;data_keys_cache_cleanup_interval = 1m
#################################### Snapshots ###########################
[snapshots]
# snapshot sharing options

@ -0,0 +1,71 @@
package manager
import (
"sync"
"time"
)
var (
now = time.Now
)
type dataKeyCacheEntry struct {
dataKey []byte
expiration time.Time
}
func (e dataKeyCacheEntry) expired() bool {
return e.expiration.Before(now())
}
type dataKeyCache struct {
sync.RWMutex
entries map[string]dataKeyCacheEntry
cacheTTL time.Duration
}
func newDataKeyCache(ttl time.Duration) *dataKeyCache {
return &dataKeyCache{
entries: make(map[string]dataKeyCacheEntry),
cacheTTL: ttl,
}
}
func (c *dataKeyCache) get(id string) ([]byte, bool) {
c.RLock()
defer c.RUnlock()
entry, exists := c.entries[id]
if !exists || entry.expired() {
return nil, false
}
return entry.dataKey, true
}
func (c *dataKeyCache) add(id string, dataKey []byte) {
c.Lock()
defer c.Unlock()
c.entries[id] = dataKeyCacheEntry{
dataKey: dataKey,
expiration: now().Add(c.cacheTTL),
}
}
func (c *dataKeyCache) removeExpired() {
c.Lock()
defer c.Unlock()
for id, entry := range c.entries {
if entry.expired() {
delete(c.entries, id)
}
}
}
func (c *dataKeyCache) flush() {
c.Lock()
c.entries = make(map[string]dataKeyCacheEntry)
c.Unlock()
}

@ -22,7 +22,11 @@ func SetupTestService(tb testing.TB, store secrets.Store) *SecretsService {
}
raw, err := ini.Load([]byte(`
[security]
secret_key = ` + defaultKey))
secret_key = ` + defaultKey + `
[security.encryption]
data_keys_cache_ttl = 5m
data_keys_cache_cleanup_interval = 1ns`))
require.NoError(tb, err)
features := featuremgmt.WithFeatures(featuremgmt.FlagEnvelopeEncryption)

@ -29,7 +29,7 @@ type SecretsService struct {
currentProviderID secrets.ProviderID
providers map[secrets.ProviderID]secrets.Provider
dataKeyCache map[string]dataKeyCacheItem
dataKeyCache *dataKeyCache
log log.Logger
}
@ -62,6 +62,9 @@ func ProvideSecretsService(
logger.Debug("Envelope encryption state", "enabled", enabled, "current provider", currentProviderID)
ttl := settings.KeyValue("security.encryption", "data_keys_cache_ttl").MustDuration(15 * time.Minute)
cache := newDataKeyCache(ttl)
s := &SecretsService{
store: store,
enc: enc,
@ -69,7 +72,7 @@ func ProvideSecretsService(
usageStats: usageStats,
providers: providers,
currentProviderID: currentProviderID,
dataKeyCache: make(map[string]dataKeyCacheItem),
dataKeyCache: cache,
features: features,
log: logger,
}
@ -115,11 +118,6 @@ func (s *SecretsService) registerUsageMetrics() {
})
}
type dataKeyCacheItem struct {
expiry time.Time
dataKey []byte
}
var b64 = base64.RawStdEncoding
func (s *SecretsService) Encrypt(ctx context.Context, payload []byte, opt secrets.EncryptionOptions) ([]byte, error) {
@ -299,20 +297,15 @@ func (s *SecretsService) newDataKey(ctx context.Context, name string, scope stri
}
// 4. Cache its unencrypted value and return it
s.dataKeyCache[name] = dataKeyCacheItem{
expiry: now().Add(dekTTL),
dataKey: dataKey,
}
s.dataKeyCache.add(name, dataKey)
return dataKey, nil
}
// dataKey looks up DEK in cache or database, and decrypts it
func (s *SecretsService) dataKey(ctx context.Context, name string) ([]byte, error) {
if item, exists := s.dataKeyCache[name]; exists {
item.expiry = now().Add(dekTTL)
s.dataKeyCache[name] = item
return item.dataKey, nil
if dataKey, exists := s.dataKeyCache.get(name); exists {
return dataKey, nil
}
// 1. get encrypted data key from database
@ -333,10 +326,7 @@ func (s *SecretsService) dataKey(ctx context.Context, name string) ([]byte, erro
}
// 3. cache data key
s.dataKeyCache[name] = dataKeyCacheItem{
expiry: now().Add(dekTTL),
dataKey: decrypted,
}
s.dataKeyCache.add(name, decrypted)
return decrypted, nil
}
@ -348,25 +338,20 @@ func (s *SecretsService) GetProviders() map[secrets.ProviderID]secrets.Provider
func (s *SecretsService) ReEncryptDataKeys(ctx context.Context) error {
err := s.store.ReEncryptDataKeys(ctx, s.providers, s.currentProviderID)
if err != nil {
return nil
return err
}
// Invalidate cache
s.dataKeyCache = make(map[string]dataKeyCacheItem)
return err
}
s.dataKeyCache.flush()
// These variables are used to test the code
// responsible for periodically cleaning up
// data encryption keys cache.
var (
now = time.Now
dekTTL = 15 * time.Minute
gcInterval = time.Minute
)
return nil
}
func (s *SecretsService) Run(ctx context.Context) error {
gc := time.NewTicker(gcInterval)
gc := time.NewTicker(
s.settings.KeyValue("security.encryption", "data_keys_cache_cleanup_interval").
MustDuration(time.Minute),
)
grp, gCtx := errgroup.WithContext(ctx)
for _, p := range s.providers {
@ -381,7 +366,7 @@ func (s *SecretsService) Run(ctx context.Context) error {
select {
case <-gc.C:
s.log.Debug("removing expired data encryption keys from cache...")
s.removeExpiredItems()
s.dataKeyCache.removeExpired()
s.log.Debug("done removing expired data encryption keys from cache")
case <-gCtx.Done():
s.log.Debug("grafana is shutting down; stopping...")
@ -395,11 +380,3 @@ func (s *SecretsService) Run(ctx context.Context) error {
}
}
}
func (s *SecretsService) removeExpiredItems() {
for id, dek := range s.dataKeyCache {
if dek.expiry.Before(now()) {
delete(s.dataKeyCache, id)
}
}
}

@ -280,18 +280,12 @@ func TestSecretsService_Run(t *testing.T) {
require.NoError(t, err)
// Data encryption key cache should contain one element
require.Len(t, svc.dataKeyCache, 1)
// Execute background process after key's TTL, to force
// clean up process, during a hundred milliseconds with
// gc ticker configured on every nanosecond, to ensure
// the ticker is triggered.
gcInterval = time.Nanosecond
require.Len(t, svc.dataKeyCache.entries, 1)
t.Cleanup(func() { now = time.Now })
now = func() time.Time { return time.Now().Add(dekTTL) }
now = func() time.Time { return time.Now().Add(10 * time.Minute) }
ctx, cancel := context.WithTimeout(ctx, 100*time.Millisecond)
ctx, cancel := context.WithTimeout(ctx, 1*time.Second)
defer cancel()
err = svc.Run(ctx)
@ -300,23 +294,7 @@ func TestSecretsService_Run(t *testing.T) {
// Then, once the ticker has been triggered,
// the cleanup process should have happened,
// therefore the cache should be empty.
require.Len(t, svc.dataKeyCache, 0)
})
t.Run("should update data key expiry after every use", func(t *testing.T) {
// Encrypt to generate data encryption key
withoutScope := secrets.WithoutScope()
_, err := svc.Encrypt(ctx, []byte("grafana"), withoutScope)
require.NoError(t, err)
// New call to Encrypt one minute later should update cache entry's expiry
t.Cleanup(func() { now = time.Now })
now = func() time.Time { return time.Now().Add(time.Minute) }
_, err = svc.Encrypt(ctx, []byte("grafana"), withoutScope)
require.NoError(t, err)
dataKeyID := svc.keyName(withoutScope())
assert.True(t, svc.dataKeyCache[dataKeyID].expiry.After(time.Now().Add(dekTTL)))
require.Len(t, svc.dataKeyCache.entries, 0)
})
}
@ -350,11 +328,11 @@ func TestSecretsService_ReEncryptDataKeys(t *testing.T) {
// Decrypt to ensure data key is cached
_, err := svc.Decrypt(ctx, ciphertext)
require.NoError(t, err)
require.NotEmpty(t, svc.dataKeyCache)
require.NotEmpty(t, svc.dataKeyCache.entries)
err = svc.ReEncryptDataKeys(ctx)
require.NoError(t, err)
assert.Empty(t, svc.dataKeyCache)
assert.Empty(t, svc.dataKeyCache.entries)
})
}

Loading…
Cancel
Save