mirror of https://github.com/grafana/loki
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
304 lines
8.9 KiB
304 lines
8.9 KiB
package compactor
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"io"
|
|
"os"
|
|
"path/filepath"
|
|
"strings"
|
|
|
|
"github.com/go-kit/log"
|
|
"github.com/go-kit/log/level"
|
|
"github.com/pkg/errors"
|
|
|
|
"github.com/grafana/loki/pkg/chunkenc"
|
|
"github.com/grafana/loki/pkg/compactor/retention"
|
|
"github.com/grafana/loki/pkg/storage/chunk/client/util"
|
|
"github.com/grafana/loki/pkg/storage/stores/shipper/indexshipper/index"
|
|
"github.com/grafana/loki/pkg/storage/stores/shipper/indexshipper/storage"
|
|
util_log "github.com/grafana/loki/pkg/util/log"
|
|
)
|
|
|
|
type IndexSet interface {
|
|
GetTableName() string
|
|
ListSourceFiles() []storage.IndexFile
|
|
GetSourceFile(indexFile storage.IndexFile) (string, error)
|
|
GetLogger() log.Logger
|
|
GetWorkingDir() string
|
|
// SetCompactedIndex sets the CompactedIndex for upload/applying retention and making the compactor remove the source files.
|
|
// CompactedIndex can be nil only in case of all the source files in common index set being compacted away to per tenant index.
|
|
// It would return an error if the CompactedIndex is nil and removeSourceFiles is true in case of user index set since
|
|
// compaction should either create new files or can be a noop if there is nothing to compact.
|
|
// There is no need to call SetCompactedIndex if no changes were made to the index for this IndexSet.
|
|
SetCompactedIndex(compactedIndex CompactedIndex, removeSourceFiles bool) error
|
|
}
|
|
|
|
// CompactedIndex is built by TableCompactor for IndexSet after compaction.
|
|
// It would be used for:
|
|
// 1. applying custom retention, processing delete requests using IndexProcessor
|
|
// 2. uploading the compacted index to storage by converting it to index.Index using ToIndexFile
|
|
// After all the operations are successfully done or in case of failure, Cleanup would be called to cleanup the state.
|
|
type CompactedIndex interface {
|
|
// IndexProcessor is used for applying custom retention and processing delete requests.
|
|
retention.IndexProcessor
|
|
// Cleanup should clean up all the state built during compaction.
|
|
// It is typically called at the end or in case of an error.
|
|
Cleanup()
|
|
// ToIndexFile is used to convert the CompactedIndex to an IndexFile for uploading to the object store.
|
|
// Once the IndexFile is uploaded using Index.Reader, the file is closed using Index.Close and removed from disk using Index.Path.
|
|
ToIndexFile() (index.Index, error)
|
|
}
|
|
|
|
// indexSet helps with doing operations on a set of index files belonging to a single user or common index files shared by users.
|
|
type indexSet struct {
|
|
ctx context.Context
|
|
tableName, userID string
|
|
workingDir string
|
|
baseIndexSet storage.IndexSet
|
|
|
|
uploadCompactedDB bool
|
|
removeSourceObjects bool
|
|
|
|
compactedIndex CompactedIndex
|
|
sourceObjects []storage.IndexFile
|
|
logger log.Logger
|
|
}
|
|
|
|
// newUserIndexSet intializes a new index set for user index.
|
|
func newUserIndexSet(ctx context.Context, tableName, userID string, baseUserIndexSet storage.IndexSet, workingDir string, logger log.Logger) (*indexSet, error) {
|
|
if !baseUserIndexSet.IsUserBasedIndexSet() {
|
|
return nil, fmt.Errorf("base index set is not for user index")
|
|
}
|
|
|
|
return newIndexSet(ctx, tableName, userID, baseUserIndexSet, workingDir, log.With(logger, "user-id", userID))
|
|
}
|
|
|
|
// newCommonIndexSet intializes a new index set for common index.
|
|
func newCommonIndexSet(ctx context.Context, tableName string, baseUserIndexSet storage.IndexSet, workingDir string, logger log.Logger) (*indexSet, error) {
|
|
if baseUserIndexSet.IsUserBasedIndexSet() {
|
|
return nil, fmt.Errorf("base index set is not for common index")
|
|
}
|
|
|
|
return newIndexSet(ctx, tableName, "", baseUserIndexSet, workingDir, logger)
|
|
}
|
|
|
|
func newIndexSet(ctx context.Context, tableName, userID string, baseIndexSet storage.IndexSet, workingDir string, logger log.Logger) (*indexSet, error) {
|
|
if err := util.EnsureDirectory(workingDir); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
ui := &indexSet{
|
|
ctx: ctx,
|
|
tableName: tableName,
|
|
userID: userID,
|
|
workingDir: workingDir,
|
|
baseIndexSet: baseIndexSet,
|
|
logger: logger,
|
|
}
|
|
|
|
if userID != "" {
|
|
ui.logger = log.With(logger, "user-id", userID)
|
|
}
|
|
|
|
var err error
|
|
ui.sourceObjects, err = ui.baseIndexSet.ListFiles(ui.ctx, ui.tableName, ui.userID, false)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return ui, nil
|
|
}
|
|
|
|
func (is *indexSet) GetTableName() string {
|
|
return is.tableName
|
|
}
|
|
|
|
func (is *indexSet) GetWorkingDir() string {
|
|
return is.workingDir
|
|
}
|
|
|
|
func (is *indexSet) ListSourceFiles() []storage.IndexFile {
|
|
return is.sourceObjects
|
|
}
|
|
|
|
func (is *indexSet) GetSourceFile(indexFile storage.IndexFile) (string, error) {
|
|
decompress := storage.IsCompressedFile(indexFile.Name)
|
|
dst := filepath.Join(is.workingDir, indexFile.Name)
|
|
if decompress {
|
|
dst = strings.Trim(dst, gzipExtension)
|
|
}
|
|
|
|
err := storage.DownloadFileFromStorage(dst, storage.IsCompressedFile(indexFile.Name),
|
|
false, storage.LoggerWithFilename(is.logger, indexFile.Name),
|
|
func() (io.ReadCloser, error) {
|
|
return is.baseIndexSet.GetFile(is.ctx, is.tableName, is.userID, indexFile.Name)
|
|
})
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
|
|
return dst, nil
|
|
}
|
|
|
|
func (is *indexSet) GetLogger() log.Logger {
|
|
return is.logger
|
|
}
|
|
|
|
func (is *indexSet) SetCompactedIndex(compactedIndex CompactedIndex, removeSourceFiles bool) error {
|
|
if compactedIndex == nil && removeSourceFiles && is.userID != "" {
|
|
return errors.New("compacted index can't be nil when remove source files is true for user index set")
|
|
}
|
|
|
|
is.setCompactedIndex(compactedIndex, compactedIndex != nil, removeSourceFiles)
|
|
return nil
|
|
}
|
|
|
|
func (is *indexSet) setCompactedIndex(compactedIndex CompactedIndex, uploadCompactedDB, removeSourceObjects bool) {
|
|
is.compactedIndex = compactedIndex
|
|
is.uploadCompactedDB = uploadCompactedDB
|
|
is.removeSourceObjects = removeSourceObjects
|
|
}
|
|
|
|
// runRetention runs the retention on index set
|
|
func (is *indexSet) runRetention(tableMarker retention.TableMarker) error {
|
|
if is.compactedIndex == nil {
|
|
return nil
|
|
}
|
|
|
|
empty, modified, err := tableMarker.MarkForDelete(is.ctx, is.tableName, is.userID, is.compactedIndex, is.logger)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if empty {
|
|
is.uploadCompactedDB = false
|
|
is.removeSourceObjects = true
|
|
} else if modified {
|
|
is.uploadCompactedDB = true
|
|
is.removeSourceObjects = true
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// upload uploads the compacted index in compressed format.
|
|
func (is *indexSet) upload() error {
|
|
if is.compactedIndex == nil {
|
|
return errors.New("can't upload nil or empty compacted index")
|
|
}
|
|
|
|
// ToDo(Sandeep): move index uploading to a common function and share it with generic index-shipper
|
|
idx, err := is.compactedIndex.ToIndexFile()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
defer func() {
|
|
filePath := idx.Path()
|
|
|
|
if err := idx.Close(); err != nil {
|
|
level.Error(is.logger).Log("msg", "failed to close indexFile", "err", err)
|
|
return
|
|
}
|
|
|
|
if err := os.Remove(filePath); err != nil {
|
|
level.Error(is.logger).Log("msg", "failed to remove indexFile", "err", err)
|
|
return
|
|
}
|
|
}()
|
|
|
|
fileName := idx.Name()
|
|
level.Debug(is.logger).Log("msg", fmt.Sprintf("uploading index %s", fileName))
|
|
|
|
idxPath := idx.Path()
|
|
|
|
filePath := fmt.Sprintf("%s%s", idxPath, ".temp")
|
|
f, err := os.Create(filePath)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
defer func() {
|
|
if err := f.Close(); err != nil {
|
|
level.Error(util_log.Logger).Log("msg", "failed to close temp file", "path", filePath, "err", err)
|
|
}
|
|
|
|
if err := os.Remove(filePath); err != nil {
|
|
level.Error(util_log.Logger).Log("msg", "failed to remove temp file", "path", filePath, "err", err)
|
|
}
|
|
}()
|
|
|
|
compressedWriter := chunkenc.Gzip.GetWriter(f)
|
|
defer chunkenc.Gzip.PutWriter(compressedWriter)
|
|
|
|
idxReader, err := idx.Reader()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
_, err = idxReader.Seek(0, 0)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
_, err = io.Copy(compressedWriter, idxReader)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
err = compressedWriter.Close()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// flush the file to disk and seek the file to the beginning.
|
|
if err := f.Sync(); err != nil {
|
|
return err
|
|
}
|
|
|
|
if _, err := f.Seek(0, 0); err != nil {
|
|
return err
|
|
}
|
|
|
|
return is.baseIndexSet.PutFile(is.ctx, is.tableName, is.userID, fmt.Sprintf("%s.gz", fileName), f)
|
|
}
|
|
|
|
// removeFilesFromStorage deletes source objects from storage.
|
|
func (is *indexSet) removeFilesFromStorage() error {
|
|
level.Info(is.logger).Log("msg", "removing source db files from storage", "count", len(is.sourceObjects))
|
|
|
|
for _, object := range is.sourceObjects {
|
|
err := is.baseIndexSet.DeleteFile(is.ctx, is.tableName, is.userID, object.Name)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// done takes care of file operations which includes:
|
|
// - recreate the compacted db if required.
|
|
// - upload the compacted db if required.
|
|
// - remove the source objects from storage if required.
|
|
func (is *indexSet) done() error {
|
|
if is.uploadCompactedDB {
|
|
if err := is.upload(); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
if is.removeSourceObjects {
|
|
return is.removeFilesFromStorage()
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (is *indexSet) cleanup() {
|
|
if is.compactedIndex == nil {
|
|
return
|
|
}
|
|
is.compactedIndex.Cleanup()
|
|
}
|
|
|