diff --git a/contrib/pg_tde/Makefile b/contrib/pg_tde/Makefile index 3eba916acc9..25726c46112 100644 --- a/contrib/pg_tde/Makefile +++ b/contrib/pg_tde/Makefile @@ -31,6 +31,7 @@ OBJS = src/encryption/enc_tde.o \ src/encryption/enc_aes.o \ src/access/pg_tde_tdemap.o \ src/access/pg_tde_xlog.o \ +src/access/pg_tde_xlog_keys.o \ src/access/pg_tde_xlog_smgr.o \ src/keyring/keyring_curl.o \ src/keyring/keyring_file.o \ diff --git a/contrib/pg_tde/Makefile.tools b/contrib/pg_tde/Makefile.tools index 4832c74c9db..6ab1ad75166 100644 --- a/contrib/pg_tde/Makefile.tools +++ b/contrib/pg_tde/Makefile.tools @@ -4,6 +4,7 @@ TDE_XLOG_OBJS = \ TDE_OBJS = \ src/access/pg_tde_tdemap.frontend \ src/catalog/tde_keyring.frontend \ + src/access/pg_tde_xlog_keys.frontend \ src/catalog/tde_keyring_parse_opts.frontend \ src/catalog/tde_principal_key.frontend \ src/common/pg_tde_utils.frontend \ diff --git a/contrib/pg_tde/meson.build b/contrib/pg_tde/meson.build index 5785f3148a2..f2c21edc8dd 100644 --- a/contrib/pg_tde/meson.build +++ b/contrib/pg_tde/meson.build @@ -3,6 +3,7 @@ curldep = dependency('libcurl') pg_tde_sources = files( 'src/access/pg_tde_tdemap.c', 'src/access/pg_tde_xlog.c', + 'src/access/pg_tde_xlog_keys.c', 'src/access/pg_tde_xlog_smgr.c', 'src/catalog/tde_keyring.c', 'src/catalog/tde_keyring_parse_opts.c', @@ -24,6 +25,7 @@ pg_tde_sources = files( tde_frontend_sources = files( 'src/access/pg_tde_tdemap.c', + 'src/access/pg_tde_xlog_keys.c', 'src/access/pg_tde_xlog_smgr.c', 'src/catalog/tde_keyring.c', 'src/catalog/tde_keyring_parse_opts.c', diff --git a/contrib/pg_tde/src/access/pg_tde_tdemap.c b/contrib/pg_tde/src/access/pg_tde_tdemap.c index 8cb182e3146..fa8d01d54ad 100644 --- a/contrib/pg_tde/src/access/pg_tde_tdemap.c +++ b/contrib/pg_tde/src/access/pg_tde_tdemap.c @@ -43,34 +43,27 @@ #define MAP_ENTRY_SIZE sizeof(TDEMapEntry) #define TDE_FILE_HEADER_SIZE sizeof(TDEFileHeader) -#define MaxXLogRecPtr (~(XLogRecPtr)0) - typedef struct TDEFileHeader { int32 file_version; TDESignedPrincipalKeyInfo signed_key_info; } TDEFileHeader; -static WALKeyCacheRec *tde_wal_key_cache = NULL; -static WALKeyCacheRec *tde_wal_key_last_rec = NULL; - -static void pg_tde_initialize_map_entry(TDEMapEntry *map_entry, const TDEPrincipalKey *principal_key, const RelFileLocator *rlocator, const InternalKey *rel_key_data); -static void pg_tde_write_key_map_entry(const RelFileLocator *rlocator, const InternalKey *rel_key_data, TDEPrincipalKey *principal_key); static bool pg_tde_find_map_entry(const RelFileLocator *rlocator, TDEMapEntryType key_type, char *db_map_path, TDEMapEntry *map_entry); static InternalKey *tde_decrypt_rel_key(TDEPrincipalKey *principal_key, TDEMapEntry *map_entry); static int pg_tde_open_file_basic(const char *tde_filename, int fileFlags, bool ignore_missing); static int pg_tde_open_file_read(const char *tde_filename, bool ignore_missing, off_t *curr_pos); -static int pg_tde_open_file_write(const char *tde_filename, const TDESignedPrincipalKeyInfo *signed_key_info, bool truncate, off_t *curr_pos); static void pg_tde_file_header_read(const char *tde_filename, int fd, TDEFileHeader *fheader, off_t *bytes_read); -static int pg_tde_file_header_write(const char *tde_filename, int fd, const TDESignedPrincipalKeyInfo *signed_key_info, off_t *bytes_written); static bool pg_tde_read_one_map_entry(int fd, TDEMapEntry *map_entry, off_t *offset); -static void pg_tde_read_one_map_entry2(int keydata_fd, int32 key_index, TDEMapEntry *map_entry, Oid databaseId); -static WALKeyCacheRec *pg_tde_add_wal_key_to_cache(InternalKey *cached_key, XLogRecPtr start_lsn); #ifndef FRONTEND static void pg_tde_write_one_map_entry(int fd, const TDEMapEntry *map_entry, off_t *offset, const char *db_map_path); static int keyrotation_init_file(const TDESignedPrincipalKeyInfo *signed_key_info, char *rotated_filename, const char *filename, off_t *curr_pos); static void finalize_key_rotation(const char *path_old, const char *path_new); +static int pg_tde_file_header_write(const char *tde_filename, int fd, const TDESignedPrincipalKeyInfo *signed_key_info, off_t *bytes_written); +static void pg_tde_initialize_map_entry(TDEMapEntry *map_entry, const TDEPrincipalKey *principal_key, const RelFileLocator *rlocator, const InternalKey *rel_key_data); +static int pg_tde_open_file_write(const char *tde_filename, const TDESignedPrincipalKeyInfo *signed_key_info, bool truncate, off_t *curr_pos); +static void pg_tde_write_key_map_entry(const RelFileLocator *rlocator, const InternalKey *rel_key_data, TDEPrincipalKey *principal_key); void pg_tde_save_smgr_key(RelFileLocator rel, const InternalKey *rel_key_data) @@ -259,6 +252,10 @@ pg_tde_perform_rotate_key(TDEPrincipalKey *principal_key, TDEPrincipalKey *new_p char old_path[MAXPGPATH], new_path[MAXPGPATH]; + /* This function cannot be used to rotate the server key. */ + Assert(principal_key); + Assert(principal_key->keyInfo.databaseId != GLOBAL_DATA_TDE_OID); + pg_tde_sign_principal_key_info(&new_signed_key_info, new_principal_key); pg_tde_set_db_file_path(principal_key->keyInfo.databaseId, old_path); @@ -363,74 +360,6 @@ pg_tde_delete_principal_key(Oid dbOid) #endif /* !FRONTEND */ -/* - * It's called by seg_write inside crit section so no pallocs, hence - * needs keyfile_path - */ -void -pg_tde_wal_last_key_set_lsn(XLogRecPtr lsn, const char *keyfile_path) -{ - LWLock *lock_pk = tde_lwlock_enc_keys(); - int fd; - off_t read_pos, - write_pos, - last_key_idx; - - LWLockAcquire(lock_pk, LW_EXCLUSIVE); - - fd = pg_tde_open_file_write(keyfile_path, NULL, false, &read_pos); - - last_key_idx = ((lseek(fd, 0, SEEK_END) - TDE_FILE_HEADER_SIZE) / MAP_ENTRY_SIZE) - 1; - write_pos = TDE_FILE_HEADER_SIZE + (last_key_idx * MAP_ENTRY_SIZE) + offsetof(TDEMapEntry, enc_key) + offsetof(InternalKey, start_lsn); - - if (pg_pwrite(fd, &lsn, sizeof(XLogRecPtr), write_pos) != sizeof(XLogRecPtr)) - { - ereport(ERROR, - errcode_for_file_access(), - errmsg("could not write tde key data file: %m")); - } - - /* - * If the last key overlaps with the previous, then invalidate the - * previous one. This may (and will) happen on replicas because it - * re-reads primary's data from the beginning of the segment on restart. - */ - if (last_key_idx > 0) - { - off_t prev_key_pos = TDE_FILE_HEADER_SIZE + ((last_key_idx - 1) * MAP_ENTRY_SIZE); - TDEMapEntry prev_map_entry; - - if (pg_pread(fd, &prev_map_entry, MAP_ENTRY_SIZE, prev_key_pos) != MAP_ENTRY_SIZE) - { - ereport(ERROR, - errcode_for_file_access(), - errmsg("could not read previous WAL key: %m")); - } - - if (prev_map_entry.enc_key.start_lsn >= lsn) - { - prev_map_entry.enc_key.type = TDE_KEY_TYPE_WAL_INVALID; - - if (pg_pwrite(fd, &prev_map_entry, MAP_ENTRY_SIZE, prev_key_pos) != MAP_ENTRY_SIZE) - { - ereport(ERROR, - errcode_for_file_access(), - errmsg("could not write invalidated key: %m")); - } - } - } - - if (pg_fsync(fd) != 0) - { - ereport(data_sync_elevel(ERROR), - errcode_for_file_access(), - errmsg("could not fsync file: %m")); - } - - LWLockRelease(lock_pk); - CloseTransientFile(fd); -} - void pg_tde_sign_principal_key_info(TDESignedPrincipalKeyInfo *signed_key_info, const TDEPrincipalKey *principal_key) { @@ -449,6 +378,7 @@ pg_tde_sign_principal_key_info(TDESignedPrincipalKeyInfo *signed_key_info, const signed_key_info->aead_tag, MAP_ENTRY_AEAD_TAG_SIZE); } +#ifndef FRONTEND static void pg_tde_initialize_map_entry(TDEMapEntry *map_entry, const TDEPrincipalKey *principal_key, const RelFileLocator *rlocator, const InternalKey *rel_key_data) { @@ -469,7 +399,9 @@ pg_tde_initialize_map_entry(TDEMapEntry *map_entry, const TDEPrincipalKey *princ map_entry->enc_key.key, map_entry->aead_tag, MAP_ENTRY_AEAD_TAG_SIZE); } +#endif +#ifndef FRONTEND static void pg_tde_write_one_map_entry(int fd, const TDEMapEntry *map_entry, off_t *offset, const char *db_map_path) { @@ -492,41 +424,9 @@ pg_tde_write_one_map_entry(int fd, const TDEMapEntry *map_entry, off_t *offset, *offset += bytes_written; } - -/* - * Generates a new internal key for WAL and adds it to the key file. - * - * We have a special function for WAL as it is being called during recovery - * start so there should be no XLog records and aquired locks. The key is - * always created with start_lsn = InvalidXLogRecPtr. Which will be updated - * with the actual lsn by the first WAL write. - */ -void -pg_tde_create_wal_key(InternalKey *rel_key_data, const RelFileLocator *newrlocator, TDEMapEntryType entry_type) -{ - TDEPrincipalKey *principal_key; - - LWLockAcquire(tde_lwlock_enc_keys(), LW_EXCLUSIVE); - - principal_key = GetPrincipalKey(newrlocator->dbOid, LW_EXCLUSIVE); - if (principal_key == NULL) - { - ereport(ERROR, - errmsg("principal key not configured"), - errhint("Use pg_tde_set_server_key_using_global_key_provider() to configure one.")); - } - - /* TODO: no need in generating key if TDE_KEY_TYPE_WAL_UNENCRYPTED */ - pg_tde_generate_internal_key(rel_key_data, entry_type); - - pg_tde_write_key_map_entry(newrlocator, rel_key_data, principal_key); - -#ifdef FRONTEND - free(principal_key); #endif - LWLockRelease(tde_lwlock_enc_keys()); -} +#ifndef FRONTEND /* * The caller must hold an exclusive lock on the key file to avoid * concurrent in place updates leading to data conflicts. @@ -580,6 +480,7 @@ pg_tde_write_key_map_entry(const RelFileLocator *rlocator, const InternalKey *re CloseTransientFile(map_fd); } +#endif /* * Returns true if we find a valid match; e.g. type is not set to @@ -727,6 +628,7 @@ pg_tde_open_file_read(const char *tde_filename, bool ignore_missing, off_t *curr return fd; } +#ifndef FRONTEND /* * Open for write and Validate File Header: * header: {Format Version, Principal Key Name} @@ -756,6 +658,7 @@ pg_tde_open_file_write(const char *tde_filename, const TDESignedPrincipalKeyInfo *curr_pos = bytes_read + bytes_written; return fd; } +#endif /* * Read TDE file header from a TDE file and fill in the fheader data structure. @@ -780,6 +683,7 @@ pg_tde_file_header_read(const char *tde_filename, int fd, TDEFileHeader *fheader } } +#ifndef FRONTEND /* * Write TDE file header to a TDE file. */ @@ -812,6 +716,7 @@ pg_tde_file_header_write(const char *tde_filename, int fd, const TDESignedPrinci return fd; } +#endif /* * Returns true if a map entry if found or false if we have reached the end of @@ -836,27 +741,6 @@ pg_tde_read_one_map_entry(int map_file, TDEMapEntry *map_entry, off_t *offset) return true; } -/* - * TODO: Unify with pg_tde_read_one_map_entry() - */ -static void -pg_tde_read_one_map_entry2(int fd, int32 key_index, TDEMapEntry *map_entry, Oid databaseId) -{ - off_t read_pos; - - read_pos = TDE_FILE_HEADER_SIZE + key_index * MAP_ENTRY_SIZE; - if (pg_pread(fd, map_entry, MAP_ENTRY_SIZE, read_pos) != MAP_ENTRY_SIZE) - { - char db_map_path[MAXPGPATH]; - - pg_tde_set_db_file_path(databaseId, db_map_path); - ereport(FATAL, - errcode_for_file_access(), - errmsg("could not find the required key at index %d in tde data file \"%s\": %m", - key_index, db_map_path)); - } -} - /* * Get the principal key from the key file. The caller must hold * a LW_SHARED or higher lock on files before calling this function. @@ -975,186 +859,3 @@ pg_tde_get_smgr_key(RelFileLocator rel) return rel_key; } - -/* - * Returns last (the most recent) key for a given relation - */ -WALKeyCacheRec * -pg_tde_get_last_wal_key(void) -{ - return tde_wal_key_last_rec; -} - -WALKeyCacheRec * -pg_tde_get_wal_cache_keys(void) -{ - return tde_wal_key_cache; -} - -InternalKey * -pg_tde_read_last_wal_key(void) -{ - RelFileLocator rlocator = GLOBAL_SPACE_RLOCATOR(XLOG_TDE_OID); - char db_map_path[MAXPGPATH]; - off_t read_pos = 0; - LWLock *lock_pk = tde_lwlock_enc_keys(); - TDEPrincipalKey *principal_key; - int fd; - int file_idx; - TDEMapEntry map_entry; - InternalKey *rel_key_data; - off_t fsize; - - LWLockAcquire(lock_pk, LW_EXCLUSIVE); - principal_key = GetPrincipalKey(rlocator.dbOid, LW_EXCLUSIVE); - if (principal_key == NULL) - { - LWLockRelease(lock_pk); - elog(DEBUG1, "init WAL encryption: no principal key"); - return NULL; - } - pg_tde_set_db_file_path(rlocator.dbOid, db_map_path); - - fd = pg_tde_open_file_read(db_map_path, false, &read_pos); - fsize = lseek(fd, 0, SEEK_END); - /* No keys */ - if (fsize == TDE_FILE_HEADER_SIZE) - { -#ifdef FRONTEND - pfree(principal_key); -#endif - LWLockRelease(lock_pk); - CloseTransientFile(fd); - return NULL; - } - - file_idx = ((fsize - TDE_FILE_HEADER_SIZE) / MAP_ENTRY_SIZE) - 1; - pg_tde_read_one_map_entry2(fd, file_idx, &map_entry, rlocator.dbOid); - - rel_key_data = tde_decrypt_rel_key(principal_key, &map_entry); -#ifdef FRONTEND - pfree(principal_key); -#endif - LWLockRelease(lock_pk); - CloseTransientFile(fd); - - return rel_key_data; -} - -/* Fetches WAL keys from disk and adds them to the WAL cache */ -WALKeyCacheRec * -pg_tde_fetch_wal_keys(XLogRecPtr start_lsn) -{ - RelFileLocator rlocator = GLOBAL_SPACE_RLOCATOR(XLOG_TDE_OID); - char db_map_path[MAXPGPATH]; - off_t read_pos = 0; - LWLock *lock_pk = tde_lwlock_enc_keys(); - TDEPrincipalKey *principal_key; - int fd; - int keys_count; - WALKeyCacheRec *return_wal_rec = NULL; - - LWLockAcquire(lock_pk, LW_SHARED); - principal_key = GetPrincipalKey(rlocator.dbOid, LW_SHARED); - if (principal_key == NULL) - { - LWLockRelease(lock_pk); - elog(DEBUG1, "fetch WAL keys: no principal key"); - return NULL; - } - - pg_tde_set_db_file_path(rlocator.dbOid, db_map_path); - - fd = pg_tde_open_file_read(db_map_path, false, &read_pos); - - keys_count = (lseek(fd, 0, SEEK_END) - TDE_FILE_HEADER_SIZE) / MAP_ENTRY_SIZE; - - /* - * If there is no keys, return a fake one (with the range 0-infinity) so - * the reader won't try to check the disk all the time. This for the - * walsender in case if WAL is unencrypted and never was. - */ - if (keys_count == 0) - { - WALKeyCacheRec *wal_rec; - InternalKey stub_key = { - .start_lsn = InvalidXLogRecPtr, - }; - - wal_rec = pg_tde_add_wal_key_to_cache(&stub_key, InvalidXLogRecPtr); - -#ifdef FRONTEND - /* The backend frees it after copying to the cache. */ - pfree(principal_key); -#endif - LWLockRelease(lock_pk); - CloseTransientFile(fd); - return wal_rec; - } - - for (int file_idx = 0; file_idx < keys_count; file_idx++) - { - TDEMapEntry map_entry; - - pg_tde_read_one_map_entry2(fd, file_idx, &map_entry, rlocator.dbOid); - - /* - * Skip new (just created but not updated by write) and invalid keys - */ - if (map_entry.enc_key.start_lsn != InvalidXLogRecPtr && - (map_entry.enc_key.type == TDE_KEY_TYPE_WAL_UNENCRYPTED || - map_entry.enc_key.type == TDE_KEY_TYPE_WAL_ENCRYPTED) && - map_entry.enc_key.start_lsn >= start_lsn) - { - InternalKey *rel_key_data = tde_decrypt_rel_key(principal_key, &map_entry); - WALKeyCacheRec *wal_rec; - - wal_rec = pg_tde_add_wal_key_to_cache(rel_key_data, map_entry.enc_key.start_lsn); - - pfree(rel_key_data); - - if (!return_wal_rec) - return_wal_rec = wal_rec; - } - } -#ifdef FRONTEND - pfree(principal_key); -#endif - LWLockRelease(lock_pk); - CloseTransientFile(fd); - - return return_wal_rec; -} - -static WALKeyCacheRec * -pg_tde_add_wal_key_to_cache(InternalKey *key, XLogRecPtr start_lsn) -{ - WALKeyCacheRec *wal_rec; -#ifndef FRONTEND - MemoryContext oldCtx; - - oldCtx = MemoryContextSwitchTo(TopMemoryContext); -#endif - wal_rec = palloc0_object(WALKeyCacheRec); -#ifndef FRONTEND - MemoryContextSwitchTo(oldCtx); -#endif - - wal_rec->start_lsn = start_lsn; - wal_rec->end_lsn = MaxXLogRecPtr; - wal_rec->key = *key; - wal_rec->crypt_ctx = NULL; - if (!tde_wal_key_last_rec) - { - tde_wal_key_last_rec = wal_rec; - tde_wal_key_cache = tde_wal_key_last_rec; - } - else - { - tde_wal_key_last_rec->next = wal_rec; - tde_wal_key_last_rec->end_lsn = wal_rec->start_lsn; - tde_wal_key_last_rec = wal_rec; - } - - return wal_rec; -} diff --git a/contrib/pg_tde/src/access/pg_tde_xlog.c b/contrib/pg_tde/src/access/pg_tde_xlog.c index 0081acb269d..2ffe99e7f44 100644 --- a/contrib/pg_tde/src/access/pg_tde_xlog.c +++ b/contrib/pg_tde/src/access/pg_tde_xlog.c @@ -12,7 +12,9 @@ #include "utils/guc.h" #include "utils/memutils.h" +#include "access/pg_tde_xlog_keys.h" #include "access/pg_tde_xlog.h" +#include "catalog/tde_global_space.h" #include "catalog/tde_keyring.h" #include "encryption/enc_tde.h" #include "pg_tde.h" @@ -51,7 +53,10 @@ tdeheap_rmgr_redo(XLogReaderState *record) { TDESignedPrincipalKeyInfo *mkey = (TDESignedPrincipalKeyInfo *) XLogRecGetData(record); - pg_tde_save_principal_key_redo(mkey); + if (mkey->data.databaseId == GLOBAL_DATA_TDE_OID) + pg_tde_save_server_key_redo(mkey); + else + pg_tde_save_principal_key_redo(mkey); } else if (info == XLOG_TDE_DELETE_RELATION_KEY) { diff --git a/contrib/pg_tde/src/access/pg_tde_xlog_keys.c b/contrib/pg_tde/src/access/pg_tde_xlog_keys.c new file mode 100644 index 00000000000..4187fa9768e --- /dev/null +++ b/contrib/pg_tde/src/access/pg_tde_xlog_keys.c @@ -0,0 +1,871 @@ +#include "postgres.h" + +#include +#include + +#include "access/xlog_internal.h" +#include "access/xlog.h" +#include "access/xloginsert.h" +#include "storage/fd.h" +#include "utils/memutils.h" + +#include "access/pg_tde_tdemap.h" +#include "access/pg_tde_xlog_keys.h" +#include "access/pg_tde_xlog.h" +#include "catalog/tde_global_space.h" +#include "catalog/tde_principal_key.h" +#include "encryption/enc_aes.h" +#include "encryption/enc_tde.h" + +#ifdef FRONTEND +#include "pg_tde_fe.h" +#endif + +#define PG_TDE_WAL_KEY_FILE_MAGIC 0x03454454 /* version ID value = TDE 03 */ + +#define MaxXLogRecPtr (~(XLogRecPtr)0) + +static WALKeyCacheRec *tde_wal_key_cache = NULL; +static WALKeyCacheRec *tde_wal_key_last_rec = NULL; + +static WALKeyCacheRec *pg_tde_add_wal_key_to_cache(WalEncryptionKey *cached_key, XLogRecPtr start_lsn); +static WalEncryptionKey *pg_tde_decrypt_wal_key(TDEPrincipalKey *principal_key, WalKeyFileEntry *entry); +static void pg_tde_initialize_wal_key_file_entry(WalKeyFileEntry *entry, const TDEPrincipalKey *principal_key, const RelFileLocator *rlocator, const WalEncryptionKey *rel_key_data); +static int pg_tde_open_wal_key_file_basic(const char *filename, int flags, bool ignore_missing); +static int pg_tde_open_wal_key_file_read(const char *filename, bool ignore_missing, off_t *curr_pos); +static int pg_tde_open_wal_key_file_write(const char *filename, const TDESignedPrincipalKeyInfo *signed_key_info, bool truncate, off_t *curr_pos); +static bool pg_tde_read_one_wal_key_file_entry(int fd, WalKeyFileEntry *entry, off_t *offset); +static void pg_tde_read_one_wal_key_file_entry2(int fd, int32 key_index, WalKeyFileEntry *entry, Oid databaseId); +static void pg_tde_wal_key_file_header_read(const char *filename, int fd, WalKeyFileHeader *fheader, off_t *bytes_read); +static int pg_tde_wal_key_file_header_write(const char *filename, int fd, const TDESignedPrincipalKeyInfo *signed_key_info, off_t *bytes_written); +static void pg_tde_write_one_wal_key_file_entry(int fd, const WalKeyFileEntry *entry, off_t *offset, const char *db_map_path); +static void pg_tde_write_wal_key_file_entry(const RelFileLocator *rlocator, const WalEncryptionKey *rel_key_data, TDEPrincipalKey *principal_key); + +/* + * It's called by seg_write inside crit section so no pallocs, hence + * needs keyfile_path + */ +void +pg_tde_wal_last_key_set_lsn(XLogRecPtr lsn, const char *keyfile_path) +{ + LWLock *lock_pk = tde_lwlock_enc_keys(); + int fd; + off_t read_pos, + write_pos, + last_key_idx; + + LWLockAcquire(lock_pk, LW_EXCLUSIVE); + + fd = pg_tde_open_wal_key_file_write(keyfile_path, NULL, false, &read_pos); + + last_key_idx = ((lseek(fd, 0, SEEK_END) - sizeof(WalKeyFileHeader)) / sizeof(WalKeyFileEntry)) - 1; + write_pos = sizeof(WalKeyFileHeader) + + (last_key_idx * sizeof(WalKeyFileEntry)) + + offsetof(WalKeyFileEntry, enc_key) + + offsetof(WalEncryptionKey, start_lsn); + + if (pg_pwrite(fd, &lsn, sizeof(XLogRecPtr), write_pos) != sizeof(XLogRecPtr)) + { + ereport(ERROR, + errcode_for_file_access(), + errmsg("could not write WAL key data file: %m")); + } + + /* + * If the last key overlaps with the previous, then invalidate the + * previous one. This may (and will) happen on replicas because it + * re-reads primary's data from the beginning of the segment on restart. + */ + if (last_key_idx > 0) + { + off_t prev_key_pos = sizeof(WalKeyFileHeader) + ((last_key_idx - 1) * sizeof(WalKeyFileEntry)); + WalKeyFileEntry prev_entry; + + if (pg_pread(fd, &prev_entry, sizeof(WalKeyFileEntry), prev_key_pos) != sizeof(WalKeyFileEntry)) + { + ereport(ERROR, + errcode_for_file_access(), + errmsg("could not read previous WAL key: %m")); + } + + if (prev_entry.enc_key.start_lsn >= lsn) + { + prev_entry.enc_key.type = TDE_KEY_TYPE_WAL_INVALID; + + if (pg_pwrite(fd, &prev_entry, sizeof(WalKeyFileEntry), prev_key_pos) != sizeof(WalKeyFileEntry)) + { + ereport(ERROR, + errcode_for_file_access(), + errmsg("could not write invalidated key: %m")); + } + } + } + + if (pg_fsync(fd) != 0) + { + ereport(data_sync_elevel(ERROR), + errcode_for_file_access(), + errmsg("could not fsync file: %m")); + } + + LWLockRelease(lock_pk); + CloseTransientFile(fd); +} + +/* + * Generates a new internal key for WAL and adds it to the key file. + * + * We have a special function for WAL as it is being called during recovery + * start so there should be no XLog records and aquired locks. The key is + * always created with start_lsn = InvalidXLogRecPtr. Which will be updated + * with the actual lsn by the first WAL write. + */ +void +pg_tde_create_wal_key(WalEncryptionKey *rel_key_data, + const RelFileLocator *newrlocator, + TDEMapEntryType entry_type) +{ + TDEPrincipalKey *principal_key; + + LWLockAcquire(tde_lwlock_enc_keys(), LW_EXCLUSIVE); + + principal_key = GetPrincipalKey(newrlocator->dbOid, LW_EXCLUSIVE); + if (principal_key == NULL) + { + ereport(ERROR, + errmsg("principal key not configured"), + errhint("Use pg_tde_set_server_key_using_global_key_provider() to configure one.")); + } + + /* TODO: no need in generating key if TDE_KEY_TYPE_WAL_UNENCRYPTED */ + rel_key_data->type = entry_type; + rel_key_data->start_lsn = InvalidXLogRecPtr; + + if (!RAND_bytes(rel_key_data->key, INTERNAL_KEY_LEN)) + ereport(ERROR, + errcode(ERRCODE_INTERNAL_ERROR), + errmsg("could not generate WAL encryption key: %s", + ERR_error_string(ERR_get_error(), NULL))); + if (!RAND_bytes(rel_key_data->base_iv, INTERNAL_KEY_IV_LEN)) + ereport(ERROR, + errcode(ERRCODE_INTERNAL_ERROR), + errmsg("could not generate IV for WAL encryption key: %s", + ERR_error_string(ERR_get_error(), NULL))); + + pg_tde_write_wal_key_file_entry(newrlocator, rel_key_data, principal_key); + +#ifdef FRONTEND + free(principal_key); +#endif + LWLockRelease(tde_lwlock_enc_keys()); +} + +/* + * Returns last (the most recent) key for a given relation + */ +WALKeyCacheRec * +pg_tde_get_last_wal_key(void) +{ + return tde_wal_key_last_rec; +} + +WALKeyCacheRec * +pg_tde_get_wal_cache_keys(void) +{ + return tde_wal_key_cache; +} + +WalEncryptionKey * +pg_tde_read_last_wal_key(void) +{ + RelFileLocator rlocator = GLOBAL_SPACE_RLOCATOR(XLOG_TDE_OID); + char db_map_path[MAXPGPATH]; + off_t read_pos = 0; + LWLock *lock_pk = tde_lwlock_enc_keys(); + TDEPrincipalKey *principal_key; + int fd; + int file_idx; + WalKeyFileEntry entry; + WalEncryptionKey *rel_key_data; + off_t fsize; + + LWLockAcquire(lock_pk, LW_EXCLUSIVE); + principal_key = GetPrincipalKey(rlocator.dbOid, LW_EXCLUSIVE); + if (principal_key == NULL) + { + LWLockRelease(lock_pk); + elog(DEBUG1, "init WAL encryption: no principal key"); + return NULL; + } + pg_tde_set_db_file_path(rlocator.dbOid, db_map_path); + + fd = pg_tde_open_wal_key_file_read(db_map_path, false, &read_pos); + fsize = lseek(fd, 0, SEEK_END); + /* No keys */ + if (fsize == sizeof(WalKeyFileHeader)) + { +#ifdef FRONTEND + pfree(principal_key); +#endif + LWLockRelease(lock_pk); + CloseTransientFile(fd); + return NULL; + } + + file_idx = ((fsize - sizeof(WalKeyFileHeader)) / sizeof(WalKeyFileEntry)) - 1; + pg_tde_read_one_wal_key_file_entry2(fd, file_idx, &entry, rlocator.dbOid); + + rel_key_data = pg_tde_decrypt_wal_key(principal_key, &entry); +#ifdef FRONTEND + pfree(principal_key); +#endif + LWLockRelease(lock_pk); + CloseTransientFile(fd); + + return rel_key_data; +} + +/* Fetches WAL keys from disk and adds them to the WAL cache */ +WALKeyCacheRec * +pg_tde_fetch_wal_keys(XLogRecPtr start_lsn) +{ + RelFileLocator rlocator = GLOBAL_SPACE_RLOCATOR(XLOG_TDE_OID); + char db_map_path[MAXPGPATH]; + off_t read_pos = 0; + LWLock *lock_pk = tde_lwlock_enc_keys(); + TDEPrincipalKey *principal_key; + int fd; + int keys_count; + WALKeyCacheRec *return_wal_rec = NULL; + + LWLockAcquire(lock_pk, LW_SHARED); + principal_key = GetPrincipalKey(rlocator.dbOid, LW_SHARED); + if (principal_key == NULL) + { + LWLockRelease(lock_pk); + elog(DEBUG1, "fetch WAL keys: no principal key"); + return NULL; + } + + pg_tde_set_db_file_path(rlocator.dbOid, db_map_path); + + fd = pg_tde_open_wal_key_file_read(db_map_path, false, &read_pos); + + keys_count = (lseek(fd, 0, SEEK_END) - sizeof(WalKeyFileHeader)) / sizeof(WalKeyFileEntry); + + /* + * If there is no keys, return a fake one (with the range 0-infinity) so + * the reader won't try to check the disk all the time. This for the + * walsender in case if WAL is unencrypted and never was. + */ + if (keys_count == 0) + { + WALKeyCacheRec *wal_rec; + WalEncryptionKey stub_key = { + .start_lsn = InvalidXLogRecPtr, + }; + + wal_rec = pg_tde_add_wal_key_to_cache(&stub_key, InvalidXLogRecPtr); + +#ifdef FRONTEND + /* The backend frees it after copying to the cache. */ + pfree(principal_key); +#endif + LWLockRelease(lock_pk); + CloseTransientFile(fd); + return wal_rec; + } + + for (int file_idx = 0; file_idx < keys_count; file_idx++) + { + WalKeyFileEntry entry; + + pg_tde_read_one_wal_key_file_entry2(fd, file_idx, &entry, rlocator.dbOid); + + /* + * Skip new (just created but not updated by write) and invalid keys + */ + if (entry.enc_key.start_lsn != InvalidXLogRecPtr && + (entry.enc_key.type == TDE_KEY_TYPE_WAL_UNENCRYPTED || + entry.enc_key.type == TDE_KEY_TYPE_WAL_ENCRYPTED) && + entry.enc_key.start_lsn >= start_lsn) + { + WalEncryptionKey *rel_key_data = pg_tde_decrypt_wal_key(principal_key, &entry); + WALKeyCacheRec *wal_rec; + + wal_rec = pg_tde_add_wal_key_to_cache(rel_key_data, entry.enc_key.start_lsn); + + pfree(rel_key_data); + + if (!return_wal_rec) + return_wal_rec = wal_rec; + } + } +#ifdef FRONTEND + pfree(principal_key); +#endif + LWLockRelease(lock_pk); + CloseTransientFile(fd); + + return return_wal_rec; +} + +static WALKeyCacheRec * +pg_tde_add_wal_key_to_cache(WalEncryptionKey *key, XLogRecPtr start_lsn) +{ + WALKeyCacheRec *wal_rec; +#ifndef FRONTEND + MemoryContext oldCtx; + + oldCtx = MemoryContextSwitchTo(TopMemoryContext); +#endif + wal_rec = palloc0_object(WALKeyCacheRec); +#ifndef FRONTEND + MemoryContextSwitchTo(oldCtx); +#endif + + wal_rec->start_lsn = start_lsn; + wal_rec->end_lsn = MaxXLogRecPtr; + wal_rec->key = *key; + wal_rec->crypt_ctx = NULL; + if (!tde_wal_key_last_rec) + { + tde_wal_key_last_rec = wal_rec; + tde_wal_key_cache = tde_wal_key_last_rec; + } + else + { + tde_wal_key_last_rec->next = wal_rec; + tde_wal_key_last_rec->end_lsn = wal_rec->start_lsn; + tde_wal_key_last_rec = wal_rec; + } + + return wal_rec; +} + +static int +pg_tde_open_wal_key_file_basic(const char *filename, + int flags, + bool ignore_missing) +{ + int fd; + + fd = OpenTransientFile(filename, flags); + if (fd < 0 && !(errno == ENOENT && ignore_missing == true)) + { + ereport(ERROR, + errcode_for_file_access(), + errmsg("could not open WAL key file \"%s\": %m", filename)); + } + + return fd; +} + +static int +pg_tde_open_wal_key_file_read(const char *filename, + bool ignore_missing, + off_t *curr_pos) +{ + int fd; + WalKeyFileHeader fheader; + off_t bytes_read = 0; + + Assert(LWLockHeldByMeInMode(tde_lwlock_enc_keys(), LW_SHARED) || + LWLockHeldByMeInMode(tde_lwlock_enc_keys(), LW_EXCLUSIVE)); + + fd = pg_tde_open_wal_key_file_basic(filename, O_RDONLY | PG_BINARY, ignore_missing); + if (ignore_missing && fd < 0) + return fd; + + pg_tde_wal_key_file_header_read(filename, fd, &fheader, &bytes_read); + *curr_pos = bytes_read; + + return fd; +} + +static int +pg_tde_open_wal_key_file_write(const char *filename, + const TDESignedPrincipalKeyInfo *signed_key_info, + bool truncate, + off_t *curr_pos) +{ + int fd; + WalKeyFileHeader fheader; + off_t bytes_read = 0; + off_t bytes_written = 0; + int file_flags = O_RDWR | O_CREAT | PG_BINARY | (truncate ? O_TRUNC : 0); + + Assert(LWLockHeldByMeInMode(tde_lwlock_enc_keys(), LW_EXCLUSIVE)); + + fd = pg_tde_open_wal_key_file_basic(filename, file_flags, false); + + pg_tde_wal_key_file_header_read(filename, fd, &fheader, &bytes_read); + + /* In case it's a new file, let's add the header now. */ + if (bytes_read == 0 && signed_key_info) + pg_tde_wal_key_file_header_write(filename, fd, signed_key_info, &bytes_written); + + *curr_pos = bytes_read + bytes_written; + return fd; +} + +static void +pg_tde_wal_key_file_header_read(const char *filename, + int fd, + WalKeyFileHeader *fheader, + off_t *bytes_read) +{ + Assert(fheader); + + *bytes_read = pg_pread(fd, fheader, sizeof(WalKeyFileHeader), 0); + + /* File is empty */ + if (*bytes_read == 0) + return; + + if (*bytes_read != sizeof(WalKeyFileHeader) + || fheader->file_version != PG_TDE_WAL_KEY_FILE_MAGIC) + { + ereport(FATAL, + errcode_for_file_access(), + errmsg("WAL key file \"%s\" is corrupted: %m", filename)); + } +} + +static int +pg_tde_wal_key_file_header_write(const char *filename, + int fd, + const TDESignedPrincipalKeyInfo *signed_key_info, + off_t *bytes_written) +{ + WalKeyFileHeader fheader; + + Assert(signed_key_info); + + fheader.file_version = PG_TDE_WAL_KEY_FILE_MAGIC; + fheader.signed_key_info = *signed_key_info; + *bytes_written = pg_pwrite(fd, &fheader, sizeof(WalKeyFileHeader), 0); + + if (*bytes_written != sizeof(WalKeyFileHeader)) + { + ereport(ERROR, + errcode_for_file_access(), + errmsg("could not write WAL key file \"%s\": %m", filename)); + } + + if (pg_fsync(fd) != 0) + { + ereport(data_sync_elevel(ERROR), + errcode_for_file_access(), + errmsg("could not fsync file \"%s\": %m", filename)); + } + + ereport(DEBUG2, errmsg("Wrote the header to %s", filename)); + + return fd; +} + +/* + * Returns true if an entry is found or false if we have reached the end of the + * file. + */ +static bool +pg_tde_read_one_wal_key_file_entry(int fd, + WalKeyFileEntry *entry, + off_t *offset) +{ + off_t bytes_read = 0; + + Assert(entry); + Assert(offset); + + bytes_read = pg_pread(fd, entry, sizeof(WalKeyFileEntry), *offset); + + /* We've reached the end of the file. */ + if (bytes_read != sizeof(WalKeyFileEntry)) + return false; + + *offset += bytes_read; + + return true; +} + +static void +pg_tde_read_one_wal_key_file_entry2(int fd, + int32 key_index, + WalKeyFileEntry *entry, + Oid databaseId) +{ + off_t read_pos; + + read_pos = sizeof(WalKeyFileHeader) + key_index * sizeof(WalKeyFileEntry); + if (pg_pread(fd, entry, sizeof(WalKeyFileEntry), read_pos) != sizeof(WalKeyFileEntry)) + { + char db_map_path[MAXPGPATH]; + + pg_tde_set_db_file_path(databaseId, db_map_path); + ereport(FATAL, + errcode_for_file_access(), + errmsg("could not find the required key at index %d in WAL key file \"%s\": %m", + key_index, db_map_path)); + } +} + +static void +pg_tde_write_wal_key_file_entry(const RelFileLocator *rlocator, + const WalEncryptionKey *rel_key_data, + TDEPrincipalKey *principal_key) +{ + char db_map_path[MAXPGPATH]; + int fd; + off_t curr_pos = 0; + WalKeyFileEntry write_entry; + TDESignedPrincipalKeyInfo signed_key_Info; + + Assert(rlocator); + + pg_tde_set_db_file_path(rlocator->dbOid, db_map_path); + + pg_tde_sign_principal_key_info(&signed_key_Info, principal_key); + + /* Open and validate file for basic correctness. */ + fd = pg_tde_open_wal_key_file_write(db_map_path, &signed_key_Info, false, &curr_pos); + + /* + * Read until we find an empty slot. Otherwise, read until end. This seems + * to be less frequent than vacuum. So let's keep this function here + * rather than overloading the vacuum process. + */ + while (1) + { + WalKeyFileEntry read_entry; + off_t prev_pos = curr_pos; + + if (!pg_tde_read_one_wal_key_file_entry(fd, &read_entry, &curr_pos)) + { + curr_pos = prev_pos; + break; + } + + if (read_entry.type == MAP_ENTRY_EMPTY) + { + curr_pos = prev_pos; + break; + } + } + + /* Initialize WAL key file entry and encrypt key */ + pg_tde_initialize_wal_key_file_entry(&write_entry, principal_key, rlocator, rel_key_data); + + /* Write the given entry at curr_pos; i.e. the free entry. */ + pg_tde_write_one_wal_key_file_entry(fd, &write_entry, &curr_pos, db_map_path); + + CloseTransientFile(fd); +} + +static WalEncryptionKey * +pg_tde_decrypt_wal_key(TDEPrincipalKey *principal_key, WalKeyFileEntry *entry) +{ + WalEncryptionKey *key = palloc_object(WalEncryptionKey); + + Assert(principal_key); + + *key = entry->enc_key; + + if (!AesGcmDecrypt(principal_key->keyData, + entry->entry_iv, MAP_ENTRY_IV_SIZE, + (unsigned char *) entry, offsetof(TDEMapEntry, enc_key), + entry->enc_key.key, INTERNAL_KEY_LEN, + key->key, + entry->aead_tag, MAP_ENTRY_AEAD_TAG_SIZE)) + ereport(ERROR, + errmsg("Failed to decrypt key, incorrect principal key or corrupted key file")); + + return key; +} + +static void +pg_tde_write_one_wal_key_file_entry(int fd, + const WalKeyFileEntry *entry, + off_t *offset, + const char *db_map_path) +{ + int bytes_written = 0; + + bytes_written = pg_pwrite(fd, entry, sizeof(WalKeyFileEntry), *offset); + + if (bytes_written != sizeof(WalKeyFileEntry)) + { + ereport(ERROR, + errcode_for_file_access(), + errmsg("could not write WAL key file \"%s\": %m", db_map_path)); + } + if (pg_fsync(fd) != 0) + { + ereport(data_sync_elevel(ERROR), + errcode_for_file_access(), + errmsg("could not fsync file \"%s\": %m", db_map_path)); + } + + *offset += bytes_written; +} + +static void +pg_tde_initialize_wal_key_file_entry(WalKeyFileEntry *entry, + const TDEPrincipalKey *principal_key, + const RelFileLocator *rlocator, + const WalEncryptionKey *rel_key_data) +{ + entry->spcOid = rlocator->spcOid; + entry->relNumber = rlocator->relNumber; + entry->type = rel_key_data->type; + entry->enc_key = *rel_key_data; + + if (!RAND_bytes(entry->entry_iv, MAP_ENTRY_IV_SIZE)) + ereport(ERROR, + errcode(ERRCODE_INTERNAL_ERROR), + errmsg("could not generate iv for wal key file entry: %s", ERR_error_string(ERR_get_error(), NULL))); + + AesGcmEncrypt(principal_key->keyData, + entry->entry_iv, MAP_ENTRY_IV_SIZE, + (unsigned char *) entry, offsetof(WalKeyFileEntry, enc_key), + rel_key_data->key, INTERNAL_KEY_LEN, + entry->enc_key.key, + entry->aead_tag, MAP_ENTRY_AEAD_TAG_SIZE); +} + +#ifndef FRONTEND +/* + * Rotate keys and generates the WAL record for it. + */ +void +pg_tde_perform_rotate_server_key(TDEPrincipalKey *principal_key, + TDEPrincipalKey *new_principal_key, + bool write_xlog) +{ + TDESignedPrincipalKeyInfo new_signed_key_info; + off_t old_curr_pos, + new_curr_pos; + int old_fd, + new_fd; + char old_path[MAXPGPATH], + new_path[MAXPGPATH]; + + Assert(principal_key); + Assert(principal_key->keyInfo.databaseId == GLOBAL_DATA_TDE_OID); + + pg_tde_sign_principal_key_info(&new_signed_key_info, new_principal_key); + + pg_tde_set_db_file_path(principal_key->keyInfo.databaseId, old_path); + snprintf(new_path, MAXPGPATH, "%s.r", old_path); + + old_fd = pg_tde_open_wal_key_file_read(old_path, false, &old_curr_pos); + new_fd = pg_tde_open_wal_key_file_write(new_path, &new_signed_key_info, true, &new_curr_pos); + + /* Read all entries until EOF */ + while (1) + { + WalEncryptionKey *key; + WalKeyFileEntry read_map_entry; + WalKeyFileEntry write_map_entry; + RelFileLocator rloc = GLOBAL_SPACE_RLOCATOR(XLOG_TDE_OID); + + if (!pg_tde_read_one_wal_key_file_entry(old_fd, &read_map_entry, &old_curr_pos)) + break; + + if (read_map_entry.type == MAP_ENTRY_EMPTY) + continue; + + /* Decrypt and re-encrypt key */ + key = pg_tde_decrypt_wal_key(principal_key, &read_map_entry); + pg_tde_initialize_wal_key_file_entry(&write_map_entry, new_principal_key, &rloc, key); + + pg_tde_write_one_wal_key_file_entry(new_fd, &write_map_entry, &new_curr_pos, new_path); + + pfree(key); + } + + CloseTransientFile(old_fd); + CloseTransientFile(new_fd); + + /* + * Do the final steps - replace the current WAL key file with the file + * with new data. + */ + durable_unlink(old_path, ERROR); + durable_rename(new_path, old_path, ERROR); + + /* + * We do WAL writes past the event ("the write behind logging") rather + * than before ("the write ahead") because we need logging here only for + * replication purposes. The rotation results in data written and fsynced + * to disk. Which in most cases would happen way before it's written to + * the WAL disk file. As WAL will be flushed at the end of the + * transaction, on its commit, hence after this function returns (there is + * also a bg writer, but the commit is what is guaranteed). And it makes + * sense to replicate the event only after its effect has been + * successfully applied to the source. + */ + if (write_xlog) + { + XLogPrincipalKeyRotate xlrec; + + xlrec.databaseId = principal_key->keyInfo.databaseId; + xlrec.keyringId = principal_key->keyInfo.keyringId; + memcpy(xlrec.keyName, new_principal_key->keyInfo.name, sizeof(new_principal_key->keyInfo.name)); + + XLogBeginInsert(); + XLogRegisterData((char *) &xlrec, sizeof(XLogPrincipalKeyRotate)); + XLogInsert(RM_TDERMGR_ID, XLOG_TDE_ROTATE_PRINCIPAL_KEY); + } +} +#endif + +#ifndef FRONTEND +void +pg_tde_save_server_key_redo(const TDESignedPrincipalKeyInfo *signed_key_info) +{ + int fd; + off_t curr_pos; + char db_map_path[MAXPGPATH]; + + pg_tde_set_db_file_path(signed_key_info->data.databaseId, db_map_path); + + LWLockAcquire(tde_lwlock_enc_keys(), LW_EXCLUSIVE); + + fd = pg_tde_open_wal_key_file_write(db_map_path, signed_key_info, false, &curr_pos); + CloseTransientFile(fd); + + LWLockRelease(tde_lwlock_enc_keys()); +} +#endif + +#ifndef FRONTEND +/* + * Creates the key file and saves the principal key information. + * + * If the file pre-exist, it truncates the file before adding principal key + * information. + * + * The caller must have an EXCLUSIVE LOCK on the files before calling this function. + * + * write_xlog: if true, the function will write an XLOG record about the + * principal key addition. We may want to skip this during server recovery/startup + * or in some other cases when WAL writes are not allowed. + */ +void +pg_tde_save_server_key(const TDEPrincipalKey *principal_key, bool write_xlog) +{ + int fd; + off_t curr_pos = 0; + char db_map_path[MAXPGPATH]; + TDESignedPrincipalKeyInfo signed_key_Info; + + pg_tde_set_db_file_path(principal_key->keyInfo.databaseId, db_map_path); + + ereport(DEBUG2, errmsg("pg_tde_save_server_key")); + + pg_tde_sign_principal_key_info(&signed_key_Info, principal_key); + + if (write_xlog) + { + XLogBeginInsert(); + XLogRegisterData((char *) &signed_key_Info, sizeof(TDESignedPrincipalKeyInfo)); + XLogInsert(RM_TDERMGR_ID, XLOG_TDE_ADD_PRINCIPAL_KEY); + } + + fd = pg_tde_open_wal_key_file_write(db_map_path, &signed_key_Info, true, &curr_pos); + CloseTransientFile(fd); +} +#endif + +/* + * Get the principal key from the key file. The caller must hold + * a LW_SHARED or higher lock on files before calling this function. + */ +TDESignedPrincipalKeyInfo * +pg_tde_get_server_key_info(Oid dbOid) +{ + char db_map_path[MAXPGPATH]; + int fd; + WalKeyFileHeader fheader; + TDESignedPrincipalKeyInfo *signed_key_info = NULL; + off_t bytes_read = 0; + + pg_tde_set_db_file_path(dbOid, db_map_path); + + /* + * Ensuring that we always open the file in binary mode. The caller must + * specify other flags for reading, writing or creating the file. + */ + fd = pg_tde_open_wal_key_file_basic(db_map_path, O_RDONLY, true); + + /* The file does not exist. */ + if (fd < 0) + return NULL; + + pg_tde_wal_key_file_header_read(db_map_path, fd, &fheader, &bytes_read); + + CloseTransientFile(fd); + + /* + * It's not a new file. So we can copy the principal key info from the + * header + */ + if (bytes_read > 0) + { + signed_key_info = palloc_object(TDESignedPrincipalKeyInfo); + *signed_key_info = fheader.signed_key_info; + } + + return signed_key_info; +} + +int +pg_tde_count_wal_keys_in_file(Oid dbOid) +{ + char db_map_path[MAXPGPATH]; + File fd; + off_t curr_pos = 0; + WalKeyFileEntry entry; + int count = 0; + + Assert(LWLockHeldByMeInMode(tde_lwlock_enc_keys(), LW_SHARED) || + LWLockHeldByMeInMode(tde_lwlock_enc_keys(), LW_EXCLUSIVE)); + + pg_tde_set_db_file_path(dbOid, db_map_path); + + fd = pg_tde_open_wal_key_file_read(db_map_path, true, &curr_pos); + if (fd < 0) + return count; + + while (pg_tde_read_one_wal_key_file_entry(fd, &entry, &curr_pos)) + { + if (entry.type != MAP_ENTRY_EMPTY) + count++; + } + + CloseTransientFile(fd); + + return count; +} + +#ifndef FRONTEND +void +pg_tde_delete_server_key(Oid dbOid) +{ + char path[MAXPGPATH]; + + Assert(LWLockHeldByMeInMode(tde_lwlock_enc_keys(), LW_EXCLUSIVE)); + Assert(pg_tde_count_wal_keys_in_file(dbOid) == 0); + + pg_tde_set_db_file_path(dbOid, path); + + XLogBeginInsert(); + XLogRegisterData((char *) &dbOid, sizeof(Oid)); + XLogInsert(RM_TDERMGR_ID, XLOG_TDE_DELETE_PRINCIPAL_KEY); + + /* Remove whole key map file */ + durable_unlink(path, ERROR); +} +#endif diff --git a/contrib/pg_tde/src/access/pg_tde_xlog_smgr.c b/contrib/pg_tde/src/access/pg_tde_xlog_smgr.c index 1c51ebffd36..7e1ecbfc383 100644 --- a/contrib/pg_tde/src/access/pg_tde_xlog_smgr.c +++ b/contrib/pg_tde/src/access/pg_tde_xlog_smgr.c @@ -13,7 +13,7 @@ #include "utils/guc.h" #include "utils/memutils.h" -#include "access/pg_tde_tdemap.h" +#include "access/pg_tde_xlog_keys.h" #include "access/pg_tde_xlog_smgr.h" #include "catalog/tde_global_space.h" #include "encryption/enc_tde.h" @@ -42,7 +42,7 @@ static const XLogSmgr tde_xlog_smgr = { static void *EncryptionCryptCtx = NULL; /* TODO: can be swapped out to the disk */ -static InternalKey EncryptionKey = +static WalEncryptionKey EncryptionKey = { .type = MAP_ENTRY_EMPTY, .start_lsn = InvalidXLogRecPtr, @@ -200,7 +200,7 @@ TDEXLogSmgrInit() void TDEXLogSmgrInitWrite(bool encrypt_xlog) { - InternalKey *key = pg_tde_read_last_wal_key(); + WalEncryptionKey *key = pg_tde_read_last_wal_key(); /* * Always generate a new key on starting PostgreSQL to protect against @@ -232,7 +232,7 @@ TDEXLogSmgrInitWrite(bool encrypt_xlog) void TDEXLogSmgrInitWriteReuseKey() { - InternalKey *key = pg_tde_read_last_wal_key(); + WalEncryptionKey *key = pg_tde_read_last_wal_key(); if (key) { @@ -252,7 +252,7 @@ TDEXLogWriteEncryptedPages(int fd, const void *buf, size_t count, off_t offset, TimeLineID tli, XLogSegNo segno) { char iv_prefix[16]; - InternalKey *key = &EncryptionKey; + WalEncryptionKey *key = &EncryptionKey; char *enc_buff = EncryptionBuf; #ifndef FRONTEND diff --git a/contrib/pg_tde/src/catalog/tde_principal_key.c b/contrib/pg_tde/src/catalog/tde_principal_key.c index 178461905a9..49915f8f416 100644 --- a/contrib/pg_tde/src/catalog/tde_principal_key.c +++ b/contrib/pg_tde/src/catalog/tde_principal_key.c @@ -22,6 +22,7 @@ #include "utils/wait_event.h" #include "access/pg_tde_tdemap.h" +#include "access/pg_tde_xlog_keys.h" #include "access/pg_tde_xlog.h" #include "catalog/tde_global_space.h" #include "catalog/tde_principal_key.h" @@ -276,13 +277,19 @@ set_principal_key_with_keyring(const char *key_name, if (!already_has_key) { /* First key created for the database */ - pg_tde_save_principal_key(new_principal_key, true); + if (dbOid == GLOBAL_DATA_TDE_OID) + pg_tde_save_server_key(new_principal_key, true); + else + pg_tde_save_principal_key(new_principal_key, true); push_principal_key_to_cache(new_principal_key); } else { /* key rotation */ - pg_tde_perform_rotate_key(curr_principal_key, new_principal_key, true); + if (dbOid == GLOBAL_DATA_TDE_OID) + pg_tde_perform_rotate_server_key(curr_principal_key, new_principal_key, true); + else + pg_tde_perform_rotate_key(curr_principal_key, new_principal_key, true); clear_principal_key_cache(curr_principal_key->keyInfo.databaseId); push_principal_key_to_cache(new_principal_key); @@ -343,7 +350,10 @@ xl_tde_perform_rotate_key(XLogPrincipalKeyRotate *xlrec) memcpy(new_principal_key->keyData, keyInfo->data.data, keyInfo->data.len); - pg_tde_perform_rotate_key(curr_principal_key, new_principal_key, false); + if (xlrec->databaseId == GLOBAL_DATA_TDE_OID) + pg_tde_perform_rotate_server_key(curr_principal_key, new_principal_key, false); + else + pg_tde_perform_rotate_key(curr_principal_key, new_principal_key, false); clear_principal_key_cache(curr_principal_key->keyInfo.databaseId); push_principal_key_to_cache(new_principal_key); @@ -807,12 +817,14 @@ pg_tde_delete_default_key(PG_FUNCTION_ARGS) principal_key = GetPrincipalKeyNoDefault(GLOBAL_DATA_TDE_OID, LW_EXCLUSIVE); if (pg_tde_is_same_principal_key(default_principal_key, principal_key)) { - if (pg_tde_count_encryption_keys(GLOBAL_DATA_TDE_OID) != 0) + if (pg_tde_count_wal_keys_in_file(GLOBAL_DATA_TDE_OID) != 0) ereport(ERROR, errcode(ERRCODE_OBJECT_IN_USE), errmsg("cannot delete default principal key"), errhint("There are WAL encryption keys.")); - dbs = lappend_oid(dbs, GLOBAL_DATA_TDE_OID); + + pg_tde_delete_server_key(GLOBAL_DATA_TDE_OID); + clear_principal_key_cache(GLOBAL_DATA_TDE_OID); } /* @@ -950,7 +962,11 @@ get_principal_key_from_keyring(Oid dbOid) Assert(LWLockHeldByMeInMode(tde_lwlock_enc_keys(), LW_EXCLUSIVE)); - principalKeyInfo = pg_tde_get_principal_key_info(dbOid); + if (dbOid == GLOBAL_DATA_TDE_OID) + principalKeyInfo = pg_tde_get_server_key_info(dbOid); + else + principalKeyInfo = pg_tde_get_principal_key_info(dbOid); + if (principalKeyInfo == NULL) return NULL; @@ -1083,7 +1099,10 @@ GetPrincipalKey(Oid dbOid, LWLockMode lockMode) * current funcion may be invoked during server startup/recovery where * WAL writes forbidden. */ - pg_tde_save_principal_key(newPrincipalKey, false); + if (dbOid == GLOBAL_DATA_TDE_OID) + pg_tde_save_server_key(newPrincipalKey, false); + else + pg_tde_save_principal_key(newPrincipalKey, false); push_principal_key_to_cache(newPrincipalKey); @@ -1204,7 +1223,7 @@ pg_tde_verify_provider_keys_in_use(GenericKeyring *modified_provider) LWLockAcquire(tde_lwlock_enc_keys(), LW_EXCLUSIVE); /* Check the server key that is used for WAL encryption */ - existing_principal_key = pg_tde_get_principal_key_info(GLOBAL_DATA_TDE_OID); + existing_principal_key = pg_tde_get_server_key_info(GLOBAL_DATA_TDE_OID); if (existing_principal_key != NULL && existing_principal_key->data.keyringId == modified_provider->keyring_id) { @@ -1309,7 +1328,10 @@ pg_tde_rotate_default_key_for_database(TDEPrincipalKey *oldKey, TDEPrincipalKey *newKey = *newKeyTemplate; newKey->keyInfo.databaseId = oldKey->keyInfo.databaseId; - pg_tde_perform_rotate_key(oldKey, newKey, true); + if (oldKey->keyInfo.databaseId == GLOBAL_DATA_TDE_OID) + pg_tde_perform_rotate_server_key(oldKey, newKey, true); + else + pg_tde_perform_rotate_key(oldKey, newKey, true); clear_principal_key_cache(oldKey->keyInfo.databaseId); push_principal_key_to_cache(newKey); diff --git a/contrib/pg_tde/src/include/access/pg_tde_tdemap.h b/contrib/pg_tde/src/include/access/pg_tde_tdemap.h index 121720827be..88a6249bff8 100644 --- a/contrib/pg_tde/src/include/access/pg_tde_tdemap.h +++ b/contrib/pg_tde/src/include/access/pg_tde_tdemap.h @@ -54,28 +54,6 @@ typedef struct XLogRelKey RelFileLocator rlocator; } XLogRelKey; -/* - * TODO: For now it's a simple linked list which is no good. So consider having - * dedicated WAL keys cache inside some proper data structure. - */ -typedef struct WALKeyCacheRec -{ - XLogRecPtr start_lsn; - XLogRecPtr end_lsn; - - InternalKey key; - void *crypt_ctx; - - struct WALKeyCacheRec *next; -} WALKeyCacheRec; - -extern InternalKey *pg_tde_read_last_wal_key(void); -extern WALKeyCacheRec *pg_tde_get_last_wal_key(void); -extern WALKeyCacheRec *pg_tde_fetch_wal_keys(XLogRecPtr start_lsn); -extern WALKeyCacheRec *pg_tde_get_wal_cache_keys(void); -extern void pg_tde_wal_last_key_set_lsn(XLogRecPtr lsn, const char *keyfile_path); -extern void pg_tde_create_wal_key(InternalKey *rel_key_data, const RelFileLocator *newrlocator, TDEMapEntryType entry_type); - #define PG_TDE_MAP_FILENAME "%d_keys" static inline void diff --git a/contrib/pg_tde/src/include/access/pg_tde_xlog_keys.h b/contrib/pg_tde/src/include/access/pg_tde_xlog_keys.h new file mode 100644 index 00000000000..1d9b85fa2a8 --- /dev/null +++ b/contrib/pg_tde/src/include/access/pg_tde_xlog_keys.h @@ -0,0 +1,65 @@ +#ifndef PG_TDE_XLOG_KEYS_H +#define PG_TDE_XLOG_KEYS_H + +#include "access/xlog_internal.h" +#include "storage/relfilelocator.h" + +#include "access/pg_tde_tdemap.h" +#include "catalog/tde_principal_key.h" +#include "common/pg_tde_utils.h" + +typedef struct WalEncryptionKey +{ + uint8 key[INTERNAL_KEY_LEN]; + uint8 base_iv[INTERNAL_KEY_IV_LEN]; + uint32 type; + + XLogRecPtr start_lsn; +} WalEncryptionKey; + +typedef struct WalKeyFileEntry +{ + Oid spcOid; + RelFileNumber relNumber; + uint32 type; + WalEncryptionKey enc_key; + /* IV and tag used when encrypting the key itself */ + unsigned char entry_iv[MAP_ENTRY_IV_SIZE]; + unsigned char aead_tag[MAP_ENTRY_AEAD_TAG_SIZE]; +} WalKeyFileEntry; + +typedef struct WalKeyFileHeader +{ + int32 file_version; + TDESignedPrincipalKeyInfo signed_key_info; +} WalKeyFileHeader; + +/* + * TODO: For now it's a simple linked list which is no good. So consider having + * dedicated WAL keys cache inside some proper data structure. + */ +typedef struct WALKeyCacheRec +{ + XLogRecPtr start_lsn; + XLogRecPtr end_lsn; + + WalEncryptionKey key; + void *crypt_ctx; + + struct WALKeyCacheRec *next; +} WALKeyCacheRec; + +extern int pg_tde_count_wal_keys_in_file(Oid dbOid); +extern void pg_tde_create_wal_key(WalEncryptionKey *rel_key_data, const RelFileLocator *newrlocator, TDEMapEntryType entry_type); +extern void pg_tde_delete_server_key(Oid dbOid); +extern WALKeyCacheRec *pg_tde_fetch_wal_keys(XLogRecPtr start_lsn); +extern WALKeyCacheRec *pg_tde_get_last_wal_key(void); +extern TDESignedPrincipalKeyInfo *pg_tde_get_server_key_info(Oid dbOid); +extern WALKeyCacheRec *pg_tde_get_wal_cache_keys(void); +extern void pg_tde_perform_rotate_server_key(TDEPrincipalKey *principal_key, TDEPrincipalKey *new_principal_key, bool write_xlog); +extern WalEncryptionKey *pg_tde_read_last_wal_key(void); +extern void pg_tde_save_server_key(const TDEPrincipalKey *principal_key, bool write_xlog); +extern void pg_tde_save_server_key_redo(const TDESignedPrincipalKeyInfo *signed_key_info); +extern void pg_tde_wal_last_key_set_lsn(XLogRecPtr lsn, const char *keyfile_path); + +#endif /* PG_TDE_XLOG_KEYS_H */