PG-1444 Move relation key deleteion to smgr_unlink()

Replaces the old way we deleted keys which was built for tde_heap_basic
with deleting the the relation key when smgr_unlink() is called on the
main fork. This function is always called after commit/abort when a
relation deletion has been registered, even if no main fork would exist.

This approach means we do not need to WAL log any event for deleting
relation keys, the normal SMGR unlink also handles that which fits well
into the current approach of doing most of the encryption at the SMGR
layer.

We also remove the subtransaction test which is no longer useful since
it tested things very specific to the old key deleteion.
pull/220/head
Andreas Karlsson 5 months ago committed by Andreas Karlsson
parent ec51d0895a
commit a6f774e57e
  1. 2
      contrib/pg_tde/Makefile
  2. 30
      contrib/pg_tde/expected/subtransaction.out
  3. 2
      contrib/pg_tde/meson.build
  4. 25
      contrib/pg_tde/sql/subtransaction.sql
  5. 91
      contrib/pg_tde/src/access/pg_tde_tdemap.c
  6. 2
      contrib/pg_tde/src/include/access/pg_tde_tdemap.h
  7. 18
      contrib/pg_tde/src/include/transam/pg_tde_xact_handler.h
  8. 2
      contrib/pg_tde/src/pg_tde.c
  9. 24
      contrib/pg_tde/src/smgr/pg_tde_smgr.c
  10. 186
      contrib/pg_tde/src/transam/pg_tde_xact_handler.c

@ -21,7 +21,6 @@ partition_table \
pg_tde_is_encrypted \
recreate_storage \
relocate \
subtransaction \
tablespace \
vault_v2_test \
version \
@ -33,7 +32,6 @@ src/encryption/enc_aes.o \
src/access/pg_tde_tdemap.o \
src/access/pg_tde_xlog.o \
src/access/pg_tde_xlog_encrypt.o \
src/transam/pg_tde_xact_handler.o \
src/keyring/keyring_curl.o \
src/keyring/keyring_file.o \
src/keyring/keyring_vault.o \

@ -1,30 +0,0 @@
CREATE EXTENSION IF NOT EXISTS pg_tde;
SELECT pg_tde_add_database_key_provider_file('file-vault','/tmp/pg_tde_test_keyring.per');
pg_tde_add_database_key_provider_file
---------------------------------------
1
(1 row)
SELECT pg_tde_set_key_using_database_key_provider('test-db-key','file-vault');
pg_tde_set_key_using_database_key_provider
--------------------------------------------
(1 row)
BEGIN; -- Nesting level 1
SAVEPOINT sp;
CREATE TABLE foo(s TEXT); -- Nesting level 2
RELEASE SAVEPOINT sp;
SAVEPOINT sp;
CREATE TABLE bar(s TEXT); -- Nesting level 2
ROLLBACK TO sp; -- Rollback should not affect first subtransaction
COMMIT;
BEGIN; -- Nesting level 1
SAVEPOINT sp;
DROP TABLE foo; -- Nesting level 2
RELEASE SAVEPOINT sp;
SAVEPOINT sp;
CREATE TABLE bar(s TEXT); -- Nesting level 2
ROLLBACK TO sp; -- Rollback should not affect first subtransaction
COMMIT;
DROP EXTENSION pg_tde;

@ -21,7 +21,6 @@ pg_tde_sources = files(
'src/pg_tde_event_capture.c',
'src/pg_tde_guc.c',
'src/smgr/pg_tde_smgr.c',
'src/transam/pg_tde_xact_handler.c',
)
tde_frontend_sources = files(
@ -97,7 +96,6 @@ sql_tests = [
'pg_tde_is_encrypted',
'relocate',
'recreate_storage',
'subtransaction',
'tablespace',
'vault_v2_test',
'version',

@ -1,25 +0,0 @@
CREATE EXTENSION IF NOT EXISTS pg_tde;
SELECT pg_tde_add_database_key_provider_file('file-vault','/tmp/pg_tde_test_keyring.per');
SELECT pg_tde_set_key_using_database_key_provider('test-db-key','file-vault');
BEGIN; -- Nesting level 1
SAVEPOINT sp;
CREATE TABLE foo(s TEXT); -- Nesting level 2
RELEASE SAVEPOINT sp;
SAVEPOINT sp;
CREATE TABLE bar(s TEXT); -- Nesting level 2
ROLLBACK TO sp; -- Rollback should not affect first subtransaction
COMMIT;
BEGIN; -- Nesting level 1
SAVEPOINT sp;
DROP TABLE foo; -- Nesting level 2
RELEASE SAVEPOINT sp;
SAVEPOINT sp;
CREATE TABLE bar(s TEXT); -- Nesting level 2
ROLLBACK TO sp; -- Rollback should not affect first subtransaction
COMMIT;
DROP EXTENSION pg_tde;

@ -13,7 +13,6 @@
#include "postgres.h"
#include "access/pg_tde_tdemap.h"
#include "common/file_perm.h"
#include "transam/pg_tde_xact_handler.h"
#include "storage/fd.h"
#include "utils/wait_event.h"
#include "utils/memutils.h"
@ -129,7 +128,6 @@ static int pg_tde_file_header_write(const char *tde_filename, int fd, const TDES
static void pg_tde_sign_principal_key_info(TDESignedPrincipalKeyInfo *signed_key_info, const TDEPrincipalKey *principal_key);
static off_t pg_tde_write_one_map_entry(int fd, const TDEMapEntry *map_entry, off_t *offset, const char *db_map_path);
static void pg_tde_write_key_map_entry(const RelFileLocator *rlocator, InternalKey *rel_key_data, TDEPrincipalKey *principal_key, bool write_xlog);
static bool pg_tde_delete_map_entry(const RelFileLocator *rlocator, char *db_map_path, off_t offset);
static int keyrotation_init_file(const TDESignedPrincipalKeyInfo *signed_key_info, char *rotated_filename, const char *filename, off_t *curr_pos);
static void finalize_key_rotation(const char *path_old, const char *path_new);
static int pg_tde_open_file_write(const char *tde_filename, const TDESignedPrincipalKeyInfo *signed_key_info, bool truncate, off_t *curr_pos);
@ -486,9 +484,6 @@ pg_tde_write_key_map_entry(const RelFileLocator *rlocator, InternalKey *rel_key_
/* Let's close the file. */
close(map_fd);
/* Register the entry to be freed in case the transaction aborts */
RegisterEntryForDeletion(rlocator, curr_pos, false);
}
/*
@ -548,43 +543,33 @@ pg_tde_write_key_map_entry_redo(const TDEMapEntry *write_map_entry, TDESignedPri
LWLockRelease(tde_lwlock_enc_keys());
}
static bool
pg_tde_delete_map_entry(const RelFileLocator *rlocator, char *db_map_path, off_t offset)
/*
* Mark relation map entry as free and overwrite the key
*
* This fucntion is called by the pg_tde SMGR when storage is unlinked on
* transaction commit/abort.
*/
void
pg_tde_free_key_map_entry(const RelFileLocator *rlocator)
{
char db_map_path[MAXPGPATH];
File map_fd;
bool found = false;
off_t curr_pos = 0;
/* Open and validate file for basic correctness. */
map_fd = pg_tde_open_file_write(db_map_path, NULL, false, &curr_pos);
Assert(rlocator);
/*
* If we need to delete an entry, we expect an offset value to the start
* of the entry to speed up the operation. Otherwise, we'd be sequentially
* scanning the entire map file.
*/
if (offset > 0)
{
curr_pos = lseek(map_fd, offset, SEEK_SET);
pg_tde_set_db_file_path(rlocator->dbOid, db_map_path);
if (curr_pos == -1)
{
ereport(ERROR,
errcode_for_file_access(),
errmsg("could not seek in tde map file \"%s\": %m",
db_map_path));
}
}
LWLockAcquire(tde_lwlock_enc_keys(), LW_EXCLUSIVE);
/* Open and validate file for basic correctness. */
map_fd = pg_tde_open_file_write(db_map_path, NULL, false, &curr_pos);
/*
* Read until we find an empty slot. Otherwise, read until end. This seems
* to be less frequent than vacuum. So let's keep this function here
* rather than overloading the vacuum process.
*/
while (1)
{
TDEMapEntry read_map_entry;
off_t prev_pos = curr_pos;
bool found;
found = pg_tde_read_one_map_entry(map_fd, rlocator, MAP_ENTRY_VALID, &read_map_entry, &curr_pos);
@ -592,7 +577,6 @@ pg_tde_delete_map_entry(const RelFileLocator *rlocator, char *db_map_path, off_t
if (curr_pos == prev_pos)
break;
/* We found a valid entry for the relation */
if (found)
{
TDEMapEntry empty_map_entry = {
@ -607,52 +591,9 @@ pg_tde_delete_map_entry(const RelFileLocator *rlocator, char *db_map_path, off_t
}
}
/* Let's close the file. */
close(map_fd);
/* Return -1 indicating that no entry was removed */
return found;
}
/*
* Called when transaction is being completed; either committed or aborted.
* By default, when a transaction creates an entry, we mark it as MAP_ENTRY_VALID.
* Only during the abort phase of the transaction that we are proceed on with
* marking the entry as MAP_ENTRY_FREE. This optimistic strategy that assumes
* that transaction will commit more often then getting aborted avoids
* unnecessary locking.
*
* The offset allows us to simply seek to the desired location and mark the entry
* as MAP_ENTRY_FREE without needing any further processing.
*/
void
pg_tde_free_key_map_entry(const RelFileLocator *rlocator, off_t offset)
{
bool found;
char db_map_path[MAXPGPATH] = {0};
Assert(rlocator);
/* Get the file paths */
pg_tde_set_db_file_path(rlocator->dbOid, db_map_path);
LWLockAcquire(tde_lwlock_enc_keys(), LW_EXCLUSIVE);
/* Remove the map entry if found */
found = pg_tde_delete_map_entry(rlocator, db_map_path, offset);
LWLockRelease(tde_lwlock_enc_keys());
if (!found)
{
ereport(WARNING,
errcode(ERRCODE_NO_DATA_FOUND),
errmsg("could not find the required map entry for deletion of relation %d in tablespace %d in tde map file \"%s\": %m",
rlocator->relNumber,
rlocator->spcOid,
db_map_path));
}
}
/*

@ -101,7 +101,7 @@ extern void pg_tde_wal_last_key_set_lsn(XLogRecPtr lsn, const char *keyfile_path
extern InternalKey *pg_tde_create_smgr_key(const RelFileLocatorBackend *newrlocator);
extern void pg_tde_create_wal_key(InternalKey *rel_key_data, const RelFileLocator *newrlocator, uint32 flags);
extern void pg_tde_free_key_map_entry(const RelFileLocator *rlocator, off_t offset);
extern void pg_tde_free_key_map_entry(const RelFileLocator *rlocator);
extern void pg_tde_write_key_map_entry_redo(const TDEMapEntry *write_map_entry, TDESignedPrincipalKeyInfo *signed_key_info);
#define PG_TDE_MAP_FILENAME "pg_tde_%d_map"

@ -1,18 +0,0 @@
/*-------------------------------------------------------------------------
*
* pg_tde_xact_handler.h
* TDE transaction handling.
*
*-------------------------------------------------------------------------
*/
#ifndef PG_TDE_XACT_HANDLER_H
#define PG_TDE_XACT_HANDLER_H
#include "postgres.h"
#include "storage/relfilelocator.h"
extern void RegisterTdeXactCallbacks(void);
extern void RegisterEntryForDeletion(const RelFileLocator *rlocator, off_t map_entry_offset, bool atCommit);
#endif /* PG_TDE_XACT_HANDLER_H */

@ -13,7 +13,6 @@
#include "postgres.h"
#include "funcapi.h"
#include "pg_tde.h"
#include "transam/pg_tde_xact_handler.h"
#include "miscadmin.h"
#include "storage/ipc.h"
#include "storage/lwlock.h"
@ -121,7 +120,6 @@ _PG_init(void)
prev_shmem_startup_hook = shmem_startup_hook;
shmem_startup_hook = tde_shmem_startup;
RegisterTdeXactCallbacks();
InstallFileKeyring();
InstallVaultV2Keyring();
InstallKmipKeyring();

@ -115,6 +115,28 @@ tde_mdwritev(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
}
}
static void
tde_mdunlink(RelFileLocatorBackend rlocator, ForkNumber forknum, bool isRedo)
{
mdunlink(rlocator, forknum, isRedo);
/*
* As of PostgreSQL 17 we are called once per forks, no matter if they
* exist or not, from smgrdounlinkall() so deleting the relation key on
* attempting to delete the main fork is safe. Additionally since we
* unlink the files after commit/abort we do not need to care about
* concurrent accesses.
*
* We support InvalidForkNumber to be similar to mdunlink() but it can
* actually never happen.
*/
if (forknum == MAIN_FORKNUM || forknum == InvalidForkNumber)
{
if (!RelFileLocatorBackendIsTemp(rlocator) && GetSMGRRelationKey(rlocator))
pg_tde_free_key_map_entry(&rlocator.locator);
}
}
static void
tde_mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
const void *buffer, bool skipFsync)
@ -274,7 +296,7 @@ static const struct f_smgr tde_smgr = {
.smgr_close = mdclose,
.smgr_create = tde_mdcreate,
.smgr_exists = mdexists,
.smgr_unlink = mdunlink,
.smgr_unlink = tde_mdunlink,
.smgr_extend = tde_mdextend,
.smgr_zeroextend = mdzeroextend,
.smgr_prefetch = mdprefetch,

@ -1,186 +0,0 @@
/*-------------------------------------------------------------------------
*
* pg_tde_xact_handler.c
* Transaction handling routines for pg_tde
*
*
* IDENTIFICATION
* src/transam/pg_tde_xact_handler.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "access/xact.h"
#include "utils/memutils.h"
#include "utils/palloc.h"
#include "utils/elog.h"
#include "storage/fd.h"
#include "transam/pg_tde_xact_handler.h"
#include "access/pg_tde_tdemap.h"
typedef struct PendingMapEntryDelete
{
off_t map_entry_offset; /* map entry offset */
RelFileLocator rlocator; /* main for use as relation OID */
bool atCommit; /* T=delete at commit; F=delete at abort */
int nestLevel; /* xact nesting level of request */
struct PendingMapEntryDelete *next; /* linked-list link */
} PendingMapEntryDelete;
static PendingMapEntryDelete *pendingDeletes = NULL; /* head of linked list */
static void do_pending_deletes(bool isCommit);
static void reassign_pending_deletes_to_parent_xact(void);
static void pending_delete_cleanup(void);
/* Transaction Callbacks from Backend*/
static void
pg_tde_xact_callback(XactEvent event, void *arg)
{
if (event == XACT_EVENT_PARALLEL_ABORT ||
event == XACT_EVENT_ABORT)
{
ereport(DEBUG2, errmsg("pg_tde_xact_callback: aborting transaction"));
do_pending_deletes(false);
}
else if (event == XACT_EVENT_COMMIT)
{
do_pending_deletes(true);
pending_delete_cleanup();
}
else if (event == XACT_EVENT_PREPARE)
{
pending_delete_cleanup();
}
}
static void
pg_tde_subxact_callback(SubXactEvent event, SubTransactionId mySubid,
SubTransactionId parentSubid, void *arg)
{
/* TODO: takle all possible transaction states */
if (event == SUBXACT_EVENT_ABORT_SUB)
{
ereport(DEBUG2,
errmsg("pg_tde_subxact_callback: aborting subtransaction"));
do_pending_deletes(false);
}
else if (event == SUBXACT_EVENT_COMMIT_SUB)
{
ereport(DEBUG2,
errmsg("pg_tde_subxact_callback: committing subtransaction"));
reassign_pending_deletes_to_parent_xact();
}
}
void
RegisterTdeXactCallbacks(void)
{
RegisterXactCallback(pg_tde_xact_callback, NULL);
RegisterSubXactCallback(pg_tde_subxact_callback, NULL);
}
void
RegisterEntryForDeletion(const RelFileLocator *rlocator, off_t map_entry_offset, bool atCommit)
{
PendingMapEntryDelete *pending;
pending = (PendingMapEntryDelete *) MemoryContextAlloc(TopMemoryContext, sizeof(PendingMapEntryDelete));
pending->map_entry_offset = map_entry_offset;
pending->rlocator = *rlocator;
pending->atCommit = atCommit; /* delete if abort */
pending->nestLevel = GetCurrentTransactionNestLevel();
pending->next = pendingDeletes;
pendingDeletes = pending;
}
/*
* do_pending_deletes() -- Take care of file deletes at end of xact.
*
* This also runs when aborting a subxact; we want to clean up a failed
* subxact immediately.
*
*/
static void
do_pending_deletes(bool isCommit)
{
int nestLevel = GetCurrentTransactionNestLevel();
PendingMapEntryDelete *pending;
PendingMapEntryDelete *prev;
PendingMapEntryDelete *next;
prev = NULL;
for (pending = pendingDeletes; pending != NULL; pending = next)
{
next = pending->next;
if (pending->nestLevel != nestLevel)
{
/* outer-level entries should not be processed yet */
prev = pending;
continue;
}
/* unlink list entry first, so we don't retry on failure */
if (prev)
prev->next = next;
else
pendingDeletes = next;
/* do deletion if called for */
if (pending->atCommit == isCommit)
{
ereport(LOG,
errmsg("pg_tde_xact_callback: deleting entry at offset %d",
(int) (pending->map_entry_offset)));
pg_tde_free_key_map_entry(&pending->rlocator, pending->map_entry_offset);
}
pfree(pending);
/* prev does not change */
}
}
/*
* reassign_pending_deletes_to_parent_xact() -- Adjust nesting level of pending deletes.
*
* There are several cases to consider:
* 1. Only top level transaction can perform on-commit deletes.
* 2. Subtransaction and top level transaction can perform on-abort deletes.
* So we have to decrement the nesting level of pending deletes to reassing them to the parent transaction
* if subtransaction was not self aborted. In other words if subtransaction state is commited all its pending
* deletes are reassigned to the parent transaction.
*/
static void
reassign_pending_deletes_to_parent_xact(void)
{
PendingMapEntryDelete *pending;
int nestLevel = GetCurrentTransactionNestLevel();
for (pending = pendingDeletes; pending != NULL; pending = pending->next)
{
if (pending->nestLevel == nestLevel)
pending->nestLevel--;
}
}
/*
* pending_delete_cleanup -- Clean up after a successful PREPARE or COMMIT
*
* What we have to do here is throw away the in-memory state about pending
* file deletes. It's all been recorded in the 2PC state file and
* it's no longer our job to worry about it.
*/
static void
pending_delete_cleanup(void)
{
PendingMapEntryDelete *pending;
PendingMapEntryDelete *next;
for (pending = pendingDeletes; pending != NULL; pending = next)
{
next = pending->next;
pendingDeletes = next;
pfree(pending);
}
}
Loading…
Cancel
Save