mirror of https://github.com/grafana/loki
reduce number of list calls on shared object store when using boltdb-shipper (#3283)
parent
1cd37a306c
commit
76e713f57d
@ -0,0 +1,111 @@ |
|||||||
|
package util |
||||||
|
|
||||||
|
import ( |
||||||
|
"context" |
||||||
|
"fmt" |
||||||
|
"strings" |
||||||
|
"sync" |
||||||
|
"time" |
||||||
|
|
||||||
|
"github.com/cortexproject/cortex/pkg/chunk" |
||||||
|
) |
||||||
|
|
||||||
|
const ( |
||||||
|
delimiter = "/" |
||||||
|
cacheTimeout = time.Minute |
||||||
|
) |
||||||
|
|
||||||
|
// CachedObjectClient is meant for reducing number of LIST calls on hosted object stores(S3, GCS, Azure Blob Storage and Swift).
|
||||||
|
// We as of now do a LIST call per table when we need to find its objects.
|
||||||
|
// CachedObjectClient does flat listing of objects which is only supported by hosted object stores mentioned above.
|
||||||
|
// In case of boltdb files stored by shipper, the listed objects would have keys like <table-name>/<filename>.
|
||||||
|
// For each List call without a prefix(which is actually done to get list of tables),
|
||||||
|
// CachedObjectClient would build a map of TableName -> chunk.StorageObject which would be used as a cache for subsequent List calls for getting list of objects for tables.
|
||||||
|
// Cache items are evicted after first read or a timeout. The cache is rebuilt during List call with empty prefix or we encounter a cache miss.
|
||||||
|
type CachedObjectClient struct { |
||||||
|
chunk.ObjectClient |
||||||
|
tables map[string][]chunk.StorageObject |
||||||
|
tablesMtx sync.Mutex |
||||||
|
cacheBuiltAt time.Time |
||||||
|
} |
||||||
|
|
||||||
|
func NewCachedObjectClient(downstreamClient chunk.ObjectClient) *CachedObjectClient { |
||||||
|
return &CachedObjectClient{ |
||||||
|
ObjectClient: downstreamClient, |
||||||
|
tables: map[string][]chunk.StorageObject{}, |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
func (c *CachedObjectClient) List(ctx context.Context, prefix, _ string) ([]chunk.StorageObject, []chunk.StorageCommonPrefix, error) { |
||||||
|
c.tablesMtx.Lock() |
||||||
|
defer c.tablesMtx.Unlock() |
||||||
|
|
||||||
|
if prefix == "" { |
||||||
|
tables, err := c.listTables(ctx) |
||||||
|
if err != nil { |
||||||
|
return nil, nil, err |
||||||
|
} |
||||||
|
|
||||||
|
return []chunk.StorageObject{}, tables, nil |
||||||
|
} |
||||||
|
|
||||||
|
// While listing objects in a table, prefix is set to <table-name>+delimiter so trim the delimiter first.
|
||||||
|
tableName := strings.TrimSuffix(prefix, delimiter) |
||||||
|
if strings.Contains(tableName, delimiter) { |
||||||
|
return nil, nil, fmt.Errorf("invalid prefix %s for listing table objects", prefix) |
||||||
|
} |
||||||
|
tableObjects, err := c.listTableObjects(ctx, tableName) |
||||||
|
if err != nil { |
||||||
|
return nil, nil, err |
||||||
|
} |
||||||
|
|
||||||
|
return tableObjects, []chunk.StorageCommonPrefix{}, nil |
||||||
|
} |
||||||
|
|
||||||
|
// listTables assumes that tablesMtx is already locked by the caller
|
||||||
|
func (c *CachedObjectClient) listTables(ctx context.Context) ([]chunk.StorageCommonPrefix, error) { |
||||||
|
// do a flat listing by setting delimiter to empty string
|
||||||
|
objects, _, err := c.ObjectClient.List(ctx, "", "") |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
|
||||||
|
// build the cache and response containing just table names as chunk.StorageCommonPrefix
|
||||||
|
var tableNames []chunk.StorageCommonPrefix |
||||||
|
for _, object := range objects { |
||||||
|
ss := strings.Split(object.Key, delimiter) |
||||||
|
if len(ss) != 2 { |
||||||
|
return nil, fmt.Errorf("invalid object key found %s", object.Key) |
||||||
|
} |
||||||
|
|
||||||
|
if _, ok := c.tables[ss[0]]; !ok { |
||||||
|
tableNames = append(tableNames, chunk.StorageCommonPrefix(ss[0])) |
||||||
|
} |
||||||
|
c.tables[ss[0]] = append(c.tables[ss[0]], object) |
||||||
|
} |
||||||
|
|
||||||
|
c.cacheBuiltAt = time.Now() |
||||||
|
|
||||||
|
return tableNames, nil |
||||||
|
} |
||||||
|
|
||||||
|
// listTableObjects assumes that tablesMtx is already locked by the caller
|
||||||
|
func (c *CachedObjectClient) listTableObjects(ctx context.Context, tableName string) ([]chunk.StorageObject, error) { |
||||||
|
objects, ok := c.tables[tableName] |
||||||
|
if ok && c.cacheBuiltAt.Add(cacheTimeout).After(time.Now()) { |
||||||
|
// evict the element read from cache
|
||||||
|
delete(c.tables, tableName) |
||||||
|
return objects, nil |
||||||
|
} |
||||||
|
|
||||||
|
// requested element not found in the cache, rebuild the cache.
|
||||||
|
_, err := c.listTables(ctx) |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
|
||||||
|
objects = c.tables[tableName] |
||||||
|
// evict the element read from cache
|
||||||
|
delete(c.tables, tableName) |
||||||
|
return objects, nil |
||||||
|
} |
@ -0,0 +1,80 @@ |
|||||||
|
package util |
||||||
|
|
||||||
|
import ( |
||||||
|
"context" |
||||||
|
"testing" |
||||||
|
|
||||||
|
"github.com/cortexproject/cortex/pkg/chunk" |
||||||
|
"github.com/stretchr/testify/require" |
||||||
|
) |
||||||
|
|
||||||
|
type mockHostedObjectClient struct { |
||||||
|
chunk.ObjectClient |
||||||
|
objects []chunk.StorageObject |
||||||
|
} |
||||||
|
|
||||||
|
func (m mockHostedObjectClient) List(_ context.Context, _, _ string) ([]chunk.StorageObject, []chunk.StorageCommonPrefix, error) { |
||||||
|
return m.objects, []chunk.StorageCommonPrefix{}, nil |
||||||
|
} |
||||||
|
|
||||||
|
func TestCachedObjectClient_List(t *testing.T) { |
||||||
|
objectClient := mockHostedObjectClient{ |
||||||
|
objects: []chunk.StorageObject{ |
||||||
|
{ |
||||||
|
Key: "table1/obj1", |
||||||
|
}, |
||||||
|
{ |
||||||
|
Key: "table1/obj2", |
||||||
|
}, |
||||||
|
{ |
||||||
|
Key: "table2/obj1", |
||||||
|
}, |
||||||
|
{ |
||||||
|
Key: "table2/obj2", |
||||||
|
}, |
||||||
|
}, |
||||||
|
} |
||||||
|
|
||||||
|
cachedObjectClient := NewCachedObjectClient(objectClient) |
||||||
|
|
||||||
|
// list tables which should build the cache
|
||||||
|
_, tables, err := cachedObjectClient.List(context.Background(), "", "") |
||||||
|
require.NoError(t, err) |
||||||
|
require.Equal(t, []chunk.StorageCommonPrefix{"table1", "table2"}, tables) |
||||||
|
|
||||||
|
// verify whether cache has right items
|
||||||
|
require.Len(t, cachedObjectClient.tables, 2) |
||||||
|
require.Equal(t, objectClient.objects[:2], cachedObjectClient.tables["table1"]) |
||||||
|
require.Equal(t, objectClient.objects[2:], cachedObjectClient.tables["table2"]) |
||||||
|
|
||||||
|
// list table1 objects
|
||||||
|
objects, _, err := cachedObjectClient.List(context.Background(), "table1/", "") |
||||||
|
require.NoError(t, err) |
||||||
|
require.Equal(t, objectClient.objects[:2], objects) |
||||||
|
|
||||||
|
// verify whether table1 got evicted
|
||||||
|
require.Len(t, cachedObjectClient.tables, 1) |
||||||
|
require.Contains(t, cachedObjectClient.tables, "table2") |
||||||
|
|
||||||
|
// list table2 objects
|
||||||
|
objects, _, err = cachedObjectClient.List(context.Background(), "table2/", "") |
||||||
|
require.NoError(t, err) |
||||||
|
require.Equal(t, objectClient.objects[2:], objects) |
||||||
|
|
||||||
|
// verify whether table2 got evicted as well
|
||||||
|
require.Len(t, cachedObjectClient.tables, 0) |
||||||
|
|
||||||
|
// list table1 again which should rebuild the cache
|
||||||
|
objects, _, err = cachedObjectClient.List(context.Background(), "table1/", "") |
||||||
|
require.NoError(t, err) |
||||||
|
require.Equal(t, objectClient.objects[:2], objects) |
||||||
|
|
||||||
|
// verify whether cache was rebuilt and table1 got evicted already
|
||||||
|
require.Len(t, cachedObjectClient.tables, 1) |
||||||
|
require.Contains(t, cachedObjectClient.tables, "table2") |
||||||
|
|
||||||
|
// verify whether listing non-existing table should not error
|
||||||
|
objects, _, err = cachedObjectClient.List(context.Background(), "table3/", "") |
||||||
|
require.NoError(t, err) |
||||||
|
require.Len(t, objects, 0) |
||||||
|
} |
Loading…
Reference in new issue