From a6f774e57ee8931ecdf13c9a63a044b7fc5c0793 Mon Sep 17 00:00:00 2001 From: Andreas Karlsson Date: Sat, 19 Apr 2025 03:10:28 +0200 Subject: [PATCH] PG-1444 Move relation key deleteion to smgr_unlink() Replaces the old way we deleted keys which was built for tde_heap_basic with deleting the the relation key when smgr_unlink() is called on the main fork. This function is always called after commit/abort when a relation deletion has been registered, even if no main fork would exist. This approach means we do not need to WAL log any event for deleting relation keys, the normal SMGR unlink also handles that which fits well into the current approach of doing most of the encryption at the SMGR layer. We also remove the subtransaction test which is no longer useful since it tested things very specific to the old key deleteion. --- contrib/pg_tde/Makefile | 2 - contrib/pg_tde/expected/subtransaction.out | 30 --- contrib/pg_tde/meson.build | 2 - contrib/pg_tde/sql/subtransaction.sql | 25 --- contrib/pg_tde/src/access/pg_tde_tdemap.c | 91 ++------- .../pg_tde/src/include/access/pg_tde_tdemap.h | 2 +- .../src/include/transam/pg_tde_xact_handler.h | 18 -- contrib/pg_tde/src/pg_tde.c | 2 - contrib/pg_tde/src/smgr/pg_tde_smgr.c | 24 ++- .../pg_tde/src/transam/pg_tde_xact_handler.c | 186 ------------------ 10 files changed, 40 insertions(+), 342 deletions(-) delete mode 100644 contrib/pg_tde/expected/subtransaction.out delete mode 100644 contrib/pg_tde/sql/subtransaction.sql delete mode 100644 contrib/pg_tde/src/include/transam/pg_tde_xact_handler.h delete mode 100644 contrib/pg_tde/src/transam/pg_tde_xact_handler.c diff --git a/contrib/pg_tde/Makefile b/contrib/pg_tde/Makefile index 4ec814004e9..2c3567cbd6e 100644 --- a/contrib/pg_tde/Makefile +++ b/contrib/pg_tde/Makefile @@ -21,7 +21,6 @@ partition_table \ pg_tde_is_encrypted \ recreate_storage \ relocate \ -subtransaction \ tablespace \ vault_v2_test \ version \ @@ -33,7 +32,6 @@ src/encryption/enc_aes.o \ src/access/pg_tde_tdemap.o \ src/access/pg_tde_xlog.o \ src/access/pg_tde_xlog_encrypt.o \ -src/transam/pg_tde_xact_handler.o \ src/keyring/keyring_curl.o \ src/keyring/keyring_file.o \ src/keyring/keyring_vault.o \ diff --git a/contrib/pg_tde/expected/subtransaction.out b/contrib/pg_tde/expected/subtransaction.out deleted file mode 100644 index ebfde89210a..00000000000 --- a/contrib/pg_tde/expected/subtransaction.out +++ /dev/null @@ -1,30 +0,0 @@ -CREATE EXTENSION IF NOT EXISTS pg_tde; -SELECT pg_tde_add_database_key_provider_file('file-vault','/tmp/pg_tde_test_keyring.per'); - pg_tde_add_database_key_provider_file ---------------------------------------- - 1 -(1 row) - -SELECT pg_tde_set_key_using_database_key_provider('test-db-key','file-vault'); - pg_tde_set_key_using_database_key_provider --------------------------------------------- - -(1 row) - -BEGIN; -- Nesting level 1 -SAVEPOINT sp; -CREATE TABLE foo(s TEXT); -- Nesting level 2 -RELEASE SAVEPOINT sp; -SAVEPOINT sp; -CREATE TABLE bar(s TEXT); -- Nesting level 2 -ROLLBACK TO sp; -- Rollback should not affect first subtransaction -COMMIT; -BEGIN; -- Nesting level 1 -SAVEPOINT sp; -DROP TABLE foo; -- Nesting level 2 -RELEASE SAVEPOINT sp; -SAVEPOINT sp; -CREATE TABLE bar(s TEXT); -- Nesting level 2 -ROLLBACK TO sp; -- Rollback should not affect first subtransaction -COMMIT; -DROP EXTENSION pg_tde; diff --git a/contrib/pg_tde/meson.build b/contrib/pg_tde/meson.build index da12b21a0e4..9f28aef15af 100644 --- a/contrib/pg_tde/meson.build +++ b/contrib/pg_tde/meson.build @@ -21,7 +21,6 @@ pg_tde_sources = files( 'src/pg_tde_event_capture.c', 'src/pg_tde_guc.c', 'src/smgr/pg_tde_smgr.c', - 'src/transam/pg_tde_xact_handler.c', ) tde_frontend_sources = files( @@ -97,7 +96,6 @@ sql_tests = [ 'pg_tde_is_encrypted', 'relocate', 'recreate_storage', - 'subtransaction', 'tablespace', 'vault_v2_test', 'version', diff --git a/contrib/pg_tde/sql/subtransaction.sql b/contrib/pg_tde/sql/subtransaction.sql deleted file mode 100644 index 121f1b67c3a..00000000000 --- a/contrib/pg_tde/sql/subtransaction.sql +++ /dev/null @@ -1,25 +0,0 @@ -CREATE EXTENSION IF NOT EXISTS pg_tde; - -SELECT pg_tde_add_database_key_provider_file('file-vault','/tmp/pg_tde_test_keyring.per'); -SELECT pg_tde_set_key_using_database_key_provider('test-db-key','file-vault'); - - -BEGIN; -- Nesting level 1 -SAVEPOINT sp; -CREATE TABLE foo(s TEXT); -- Nesting level 2 -RELEASE SAVEPOINT sp; -SAVEPOINT sp; -CREATE TABLE bar(s TEXT); -- Nesting level 2 -ROLLBACK TO sp; -- Rollback should not affect first subtransaction -COMMIT; - -BEGIN; -- Nesting level 1 -SAVEPOINT sp; -DROP TABLE foo; -- Nesting level 2 -RELEASE SAVEPOINT sp; -SAVEPOINT sp; -CREATE TABLE bar(s TEXT); -- Nesting level 2 -ROLLBACK TO sp; -- Rollback should not affect first subtransaction -COMMIT; - -DROP EXTENSION pg_tde; \ No newline at end of file diff --git a/contrib/pg_tde/src/access/pg_tde_tdemap.c b/contrib/pg_tde/src/access/pg_tde_tdemap.c index b0b7fd97267..316e0b09408 100644 --- a/contrib/pg_tde/src/access/pg_tde_tdemap.c +++ b/contrib/pg_tde/src/access/pg_tde_tdemap.c @@ -13,7 +13,6 @@ #include "postgres.h" #include "access/pg_tde_tdemap.h" #include "common/file_perm.h" -#include "transam/pg_tde_xact_handler.h" #include "storage/fd.h" #include "utils/wait_event.h" #include "utils/memutils.h" @@ -129,7 +128,6 @@ static int pg_tde_file_header_write(const char *tde_filename, int fd, const TDES static void pg_tde_sign_principal_key_info(TDESignedPrincipalKeyInfo *signed_key_info, const TDEPrincipalKey *principal_key); static off_t pg_tde_write_one_map_entry(int fd, const TDEMapEntry *map_entry, off_t *offset, const char *db_map_path); static void pg_tde_write_key_map_entry(const RelFileLocator *rlocator, InternalKey *rel_key_data, TDEPrincipalKey *principal_key, bool write_xlog); -static bool pg_tde_delete_map_entry(const RelFileLocator *rlocator, char *db_map_path, off_t offset); static int keyrotation_init_file(const TDESignedPrincipalKeyInfo *signed_key_info, char *rotated_filename, const char *filename, off_t *curr_pos); static void finalize_key_rotation(const char *path_old, const char *path_new); static int pg_tde_open_file_write(const char *tde_filename, const TDESignedPrincipalKeyInfo *signed_key_info, bool truncate, off_t *curr_pos); @@ -486,9 +484,6 @@ pg_tde_write_key_map_entry(const RelFileLocator *rlocator, InternalKey *rel_key_ /* Let's close the file. */ close(map_fd); - - /* Register the entry to be freed in case the transaction aborts */ - RegisterEntryForDeletion(rlocator, curr_pos, false); } /* @@ -548,43 +543,33 @@ pg_tde_write_key_map_entry_redo(const TDEMapEntry *write_map_entry, TDESignedPri LWLockRelease(tde_lwlock_enc_keys()); } -static bool -pg_tde_delete_map_entry(const RelFileLocator *rlocator, char *db_map_path, off_t offset) +/* + * Mark relation map entry as free and overwrite the key + * + * This fucntion is called by the pg_tde SMGR when storage is unlinked on + * transaction commit/abort. + */ +void +pg_tde_free_key_map_entry(const RelFileLocator *rlocator) { + char db_map_path[MAXPGPATH]; File map_fd; - bool found = false; off_t curr_pos = 0; - /* Open and validate file for basic correctness. */ - map_fd = pg_tde_open_file_write(db_map_path, NULL, false, &curr_pos); + Assert(rlocator); - /* - * If we need to delete an entry, we expect an offset value to the start - * of the entry to speed up the operation. Otherwise, we'd be sequentially - * scanning the entire map file. - */ - if (offset > 0) - { - curr_pos = lseek(map_fd, offset, SEEK_SET); + pg_tde_set_db_file_path(rlocator->dbOid, db_map_path); - if (curr_pos == -1) - { - ereport(ERROR, - errcode_for_file_access(), - errmsg("could not seek in tde map file \"%s\": %m", - db_map_path)); - } - } + LWLockAcquire(tde_lwlock_enc_keys(), LW_EXCLUSIVE); + + /* Open and validate file for basic correctness. */ + map_fd = pg_tde_open_file_write(db_map_path, NULL, false, &curr_pos); - /* - * Read until we find an empty slot. Otherwise, read until end. This seems - * to be less frequent than vacuum. So let's keep this function here - * rather than overloading the vacuum process. - */ while (1) { TDEMapEntry read_map_entry; off_t prev_pos = curr_pos; + bool found; found = pg_tde_read_one_map_entry(map_fd, rlocator, MAP_ENTRY_VALID, &read_map_entry, &curr_pos); @@ -592,7 +577,6 @@ pg_tde_delete_map_entry(const RelFileLocator *rlocator, char *db_map_path, off_t if (curr_pos == prev_pos) break; - /* We found a valid entry for the relation */ if (found) { TDEMapEntry empty_map_entry = { @@ -607,52 +591,9 @@ pg_tde_delete_map_entry(const RelFileLocator *rlocator, char *db_map_path, off_t } } - /* Let's close the file. */ close(map_fd); - /* Return -1 indicating that no entry was removed */ - return found; -} - -/* - * Called when transaction is being completed; either committed or aborted. - * By default, when a transaction creates an entry, we mark it as MAP_ENTRY_VALID. - * Only during the abort phase of the transaction that we are proceed on with - * marking the entry as MAP_ENTRY_FREE. This optimistic strategy that assumes - * that transaction will commit more often then getting aborted avoids - * unnecessary locking. - * - * The offset allows us to simply seek to the desired location and mark the entry - * as MAP_ENTRY_FREE without needing any further processing. - */ -void -pg_tde_free_key_map_entry(const RelFileLocator *rlocator, off_t offset) -{ - bool found; - char db_map_path[MAXPGPATH] = {0}; - - Assert(rlocator); - - /* Get the file paths */ - pg_tde_set_db_file_path(rlocator->dbOid, db_map_path); - - LWLockAcquire(tde_lwlock_enc_keys(), LW_EXCLUSIVE); - - /* Remove the map entry if found */ - found = pg_tde_delete_map_entry(rlocator, db_map_path, offset); - LWLockRelease(tde_lwlock_enc_keys()); - - if (!found) - { - ereport(WARNING, - errcode(ERRCODE_NO_DATA_FOUND), - errmsg("could not find the required map entry for deletion of relation %d in tablespace %d in tde map file \"%s\": %m", - rlocator->relNumber, - rlocator->spcOid, - db_map_path)); - - } } /* diff --git a/contrib/pg_tde/src/include/access/pg_tde_tdemap.h b/contrib/pg_tde/src/include/access/pg_tde_tdemap.h index 20f0acd269f..da0f4669141 100644 --- a/contrib/pg_tde/src/include/access/pg_tde_tdemap.h +++ b/contrib/pg_tde/src/include/access/pg_tde_tdemap.h @@ -101,7 +101,7 @@ extern void pg_tde_wal_last_key_set_lsn(XLogRecPtr lsn, const char *keyfile_path extern InternalKey *pg_tde_create_smgr_key(const RelFileLocatorBackend *newrlocator); extern void pg_tde_create_wal_key(InternalKey *rel_key_data, const RelFileLocator *newrlocator, uint32 flags); -extern void pg_tde_free_key_map_entry(const RelFileLocator *rlocator, off_t offset); +extern void pg_tde_free_key_map_entry(const RelFileLocator *rlocator); extern void pg_tde_write_key_map_entry_redo(const TDEMapEntry *write_map_entry, TDESignedPrincipalKeyInfo *signed_key_info); #define PG_TDE_MAP_FILENAME "pg_tde_%d_map" diff --git a/contrib/pg_tde/src/include/transam/pg_tde_xact_handler.h b/contrib/pg_tde/src/include/transam/pg_tde_xact_handler.h deleted file mode 100644 index 7838423ae95..00000000000 --- a/contrib/pg_tde/src/include/transam/pg_tde_xact_handler.h +++ /dev/null @@ -1,18 +0,0 @@ -/*------------------------------------------------------------------------- - * - * pg_tde_xact_handler.h - * TDE transaction handling. - * - *------------------------------------------------------------------------- - */ -#ifndef PG_TDE_XACT_HANDLER_H -#define PG_TDE_XACT_HANDLER_H - -#include "postgres.h" -#include "storage/relfilelocator.h" - -extern void RegisterTdeXactCallbacks(void); -extern void RegisterEntryForDeletion(const RelFileLocator *rlocator, off_t map_entry_offset, bool atCommit); - - -#endif /* PG_TDE_XACT_HANDLER_H */ diff --git a/contrib/pg_tde/src/pg_tde.c b/contrib/pg_tde/src/pg_tde.c index aedb8eb6994..77ee55609a8 100644 --- a/contrib/pg_tde/src/pg_tde.c +++ b/contrib/pg_tde/src/pg_tde.c @@ -13,7 +13,6 @@ #include "postgres.h" #include "funcapi.h" #include "pg_tde.h" -#include "transam/pg_tde_xact_handler.h" #include "miscadmin.h" #include "storage/ipc.h" #include "storage/lwlock.h" @@ -121,7 +120,6 @@ _PG_init(void) prev_shmem_startup_hook = shmem_startup_hook; shmem_startup_hook = tde_shmem_startup; - RegisterTdeXactCallbacks(); InstallFileKeyring(); InstallVaultV2Keyring(); InstallKmipKeyring(); diff --git a/contrib/pg_tde/src/smgr/pg_tde_smgr.c b/contrib/pg_tde/src/smgr/pg_tde_smgr.c index a83fb9ebda8..9b11f57aced 100644 --- a/contrib/pg_tde/src/smgr/pg_tde_smgr.c +++ b/contrib/pg_tde/src/smgr/pg_tde_smgr.c @@ -115,6 +115,28 @@ tde_mdwritev(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, } } +static void +tde_mdunlink(RelFileLocatorBackend rlocator, ForkNumber forknum, bool isRedo) +{ + mdunlink(rlocator, forknum, isRedo); + + /* + * As of PostgreSQL 17 we are called once per forks, no matter if they + * exist or not, from smgrdounlinkall() so deleting the relation key on + * attempting to delete the main fork is safe. Additionally since we + * unlink the files after commit/abort we do not need to care about + * concurrent accesses. + * + * We support InvalidForkNumber to be similar to mdunlink() but it can + * actually never happen. + */ + if (forknum == MAIN_FORKNUM || forknum == InvalidForkNumber) + { + if (!RelFileLocatorBackendIsTemp(rlocator) && GetSMGRRelationKey(rlocator)) + pg_tde_free_key_map_entry(&rlocator.locator); + } +} + static void tde_mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void *buffer, bool skipFsync) @@ -274,7 +296,7 @@ static const struct f_smgr tde_smgr = { .smgr_close = mdclose, .smgr_create = tde_mdcreate, .smgr_exists = mdexists, - .smgr_unlink = mdunlink, + .smgr_unlink = tde_mdunlink, .smgr_extend = tde_mdextend, .smgr_zeroextend = mdzeroextend, .smgr_prefetch = mdprefetch, diff --git a/contrib/pg_tde/src/transam/pg_tde_xact_handler.c b/contrib/pg_tde/src/transam/pg_tde_xact_handler.c deleted file mode 100644 index 3f0a84df69f..00000000000 --- a/contrib/pg_tde/src/transam/pg_tde_xact_handler.c +++ /dev/null @@ -1,186 +0,0 @@ -/*------------------------------------------------------------------------- - * - * pg_tde_xact_handler.c - * Transaction handling routines for pg_tde - * - * - * IDENTIFICATION - * src/transam/pg_tde_xact_handler.c - * - *------------------------------------------------------------------------- - */ - -#include "postgres.h" -#include "access/xact.h" -#include "utils/memutils.h" -#include "utils/palloc.h" -#include "utils/elog.h" -#include "storage/fd.h" -#include "transam/pg_tde_xact_handler.h" -#include "access/pg_tde_tdemap.h" - -typedef struct PendingMapEntryDelete -{ - off_t map_entry_offset; /* map entry offset */ - RelFileLocator rlocator; /* main for use as relation OID */ - bool atCommit; /* T=delete at commit; F=delete at abort */ - int nestLevel; /* xact nesting level of request */ - struct PendingMapEntryDelete *next; /* linked-list link */ -} PendingMapEntryDelete; - -static PendingMapEntryDelete *pendingDeletes = NULL; /* head of linked list */ - -static void do_pending_deletes(bool isCommit); -static void reassign_pending_deletes_to_parent_xact(void); -static void pending_delete_cleanup(void); - -/* Transaction Callbacks from Backend*/ -static void -pg_tde_xact_callback(XactEvent event, void *arg) -{ - if (event == XACT_EVENT_PARALLEL_ABORT || - event == XACT_EVENT_ABORT) - { - ereport(DEBUG2, errmsg("pg_tde_xact_callback: aborting transaction")); - do_pending_deletes(false); - } - else if (event == XACT_EVENT_COMMIT) - { - do_pending_deletes(true); - pending_delete_cleanup(); - } - else if (event == XACT_EVENT_PREPARE) - { - pending_delete_cleanup(); - } -} - -static void -pg_tde_subxact_callback(SubXactEvent event, SubTransactionId mySubid, - SubTransactionId parentSubid, void *arg) -{ - /* TODO: takle all possible transaction states */ - if (event == SUBXACT_EVENT_ABORT_SUB) - { - ereport(DEBUG2, - errmsg("pg_tde_subxact_callback: aborting subtransaction")); - do_pending_deletes(false); - } - else if (event == SUBXACT_EVENT_COMMIT_SUB) - { - ereport(DEBUG2, - errmsg("pg_tde_subxact_callback: committing subtransaction")); - reassign_pending_deletes_to_parent_xact(); - } -} - -void -RegisterTdeXactCallbacks(void) -{ - RegisterXactCallback(pg_tde_xact_callback, NULL); - RegisterSubXactCallback(pg_tde_subxact_callback, NULL); -} - -void -RegisterEntryForDeletion(const RelFileLocator *rlocator, off_t map_entry_offset, bool atCommit) -{ - PendingMapEntryDelete *pending; - - pending = (PendingMapEntryDelete *) MemoryContextAlloc(TopMemoryContext, sizeof(PendingMapEntryDelete)); - pending->map_entry_offset = map_entry_offset; - pending->rlocator = *rlocator; - pending->atCommit = atCommit; /* delete if abort */ - pending->nestLevel = GetCurrentTransactionNestLevel(); - pending->next = pendingDeletes; - pendingDeletes = pending; -} - -/* - * do_pending_deletes() -- Take care of file deletes at end of xact. - * - * This also runs when aborting a subxact; we want to clean up a failed - * subxact immediately. - * - */ -static void -do_pending_deletes(bool isCommit) -{ - int nestLevel = GetCurrentTransactionNestLevel(); - PendingMapEntryDelete *pending; - PendingMapEntryDelete *prev; - PendingMapEntryDelete *next; - - prev = NULL; - for (pending = pendingDeletes; pending != NULL; pending = next) - { - next = pending->next; - if (pending->nestLevel != nestLevel) - { - /* outer-level entries should not be processed yet */ - prev = pending; - continue; - } - - /* unlink list entry first, so we don't retry on failure */ - if (prev) - prev->next = next; - else - pendingDeletes = next; - /* do deletion if called for */ - if (pending->atCommit == isCommit) - { - ereport(LOG, - errmsg("pg_tde_xact_callback: deleting entry at offset %d", - (int) (pending->map_entry_offset))); - pg_tde_free_key_map_entry(&pending->rlocator, pending->map_entry_offset); - } - pfree(pending); - /* prev does not change */ - - } -} - - -/* - * reassign_pending_deletes_to_parent_xact() -- Adjust nesting level of pending deletes. - * - * There are several cases to consider: - * 1. Only top level transaction can perform on-commit deletes. - * 2. Subtransaction and top level transaction can perform on-abort deletes. - * So we have to decrement the nesting level of pending deletes to reassing them to the parent transaction - * if subtransaction was not self aborted. In other words if subtransaction state is commited all its pending - * deletes are reassigned to the parent transaction. - */ -static void -reassign_pending_deletes_to_parent_xact(void) -{ - PendingMapEntryDelete *pending; - int nestLevel = GetCurrentTransactionNestLevel(); - - for (pending = pendingDeletes; pending != NULL; pending = pending->next) - { - if (pending->nestLevel == nestLevel) - pending->nestLevel--; - } -} - -/* - * pending_delete_cleanup -- Clean up after a successful PREPARE or COMMIT - * - * What we have to do here is throw away the in-memory state about pending - * file deletes. It's all been recorded in the 2PC state file and - * it's no longer our job to worry about it. - */ -static void -pending_delete_cleanup(void) -{ - PendingMapEntryDelete *pending; - PendingMapEntryDelete *next; - - for (pending = pendingDeletes; pending != NULL; pending = next) - { - next = pending->next; - pendingDeletes = next; - pfree(pending); - } -}