PG-1813 Separate format for smgr and wal keyfiles

We want to add timeline information to the wal keys and cannot easily do
so without affecting existing clusters' relation key files.

This commit does the bare minimum to separate the two completely and as
such contains a fair bit of duplicated code.

The file format for the WAL key file is exactly the same before and
after this commit.

There is _a lot_ of cleanup that will have to be done on both sides of
this separation, but this is a bit of "it gets worse before it gets
better".
pull/238/head
Anders Åstrand 1 month ago committed by AndersAstrand
parent a90de9898a
commit 25ba877852
  1. 1
      contrib/pg_tde/Makefile
  2. 1
      contrib/pg_tde/Makefile.tools
  3. 2
      contrib/pg_tde/meson.build
  4. 333
      contrib/pg_tde/src/access/pg_tde_tdemap.c
  5. 7
      contrib/pg_tde/src/access/pg_tde_xlog.c
  6. 871
      contrib/pg_tde/src/access/pg_tde_xlog_keys.c
  7. 10
      contrib/pg_tde/src/access/pg_tde_xlog_smgr.c
  8. 40
      contrib/pg_tde/src/catalog/tde_principal_key.c
  9. 22
      contrib/pg_tde/src/include/access/pg_tde_tdemap.h
  10. 65
      contrib/pg_tde/src/include/access/pg_tde_xlog_keys.h

@ -31,6 +31,7 @@ OBJS = src/encryption/enc_tde.o \
src/encryption/enc_aes.o \
src/access/pg_tde_tdemap.o \
src/access/pg_tde_xlog.o \
src/access/pg_tde_xlog_keys.o \
src/access/pg_tde_xlog_smgr.o \
src/keyring/keyring_curl.o \
src/keyring/keyring_file.o \

@ -4,6 +4,7 @@ TDE_XLOG_OBJS = \
TDE_OBJS = \
src/access/pg_tde_tdemap.frontend \
src/catalog/tde_keyring.frontend \
src/access/pg_tde_xlog_keys.frontend \
src/catalog/tde_keyring_parse_opts.frontend \
src/catalog/tde_principal_key.frontend \
src/common/pg_tde_utils.frontend \

@ -3,6 +3,7 @@ curldep = dependency('libcurl')
pg_tde_sources = files(
'src/access/pg_tde_tdemap.c',
'src/access/pg_tde_xlog.c',
'src/access/pg_tde_xlog_keys.c',
'src/access/pg_tde_xlog_smgr.c',
'src/catalog/tde_keyring.c',
'src/catalog/tde_keyring_parse_opts.c',
@ -24,6 +25,7 @@ pg_tde_sources = files(
tde_frontend_sources = files(
'src/access/pg_tde_tdemap.c',
'src/access/pg_tde_xlog_keys.c',
'src/access/pg_tde_xlog_smgr.c',
'src/catalog/tde_keyring.c',
'src/catalog/tde_keyring_parse_opts.c',

@ -43,34 +43,27 @@
#define MAP_ENTRY_SIZE sizeof(TDEMapEntry)
#define TDE_FILE_HEADER_SIZE sizeof(TDEFileHeader)
#define MaxXLogRecPtr (~(XLogRecPtr)0)
typedef struct TDEFileHeader
{
int32 file_version;
TDESignedPrincipalKeyInfo signed_key_info;
} TDEFileHeader;
static WALKeyCacheRec *tde_wal_key_cache = NULL;
static WALKeyCacheRec *tde_wal_key_last_rec = NULL;
static void pg_tde_initialize_map_entry(TDEMapEntry *map_entry, const TDEPrincipalKey *principal_key, const RelFileLocator *rlocator, const InternalKey *rel_key_data);
static void pg_tde_write_key_map_entry(const RelFileLocator *rlocator, const InternalKey *rel_key_data, TDEPrincipalKey *principal_key);
static bool pg_tde_find_map_entry(const RelFileLocator *rlocator, TDEMapEntryType key_type, char *db_map_path, TDEMapEntry *map_entry);
static InternalKey *tde_decrypt_rel_key(TDEPrincipalKey *principal_key, TDEMapEntry *map_entry);
static int pg_tde_open_file_basic(const char *tde_filename, int fileFlags, bool ignore_missing);
static int pg_tde_open_file_read(const char *tde_filename, bool ignore_missing, off_t *curr_pos);
static int pg_tde_open_file_write(const char *tde_filename, const TDESignedPrincipalKeyInfo *signed_key_info, bool truncate, off_t *curr_pos);
static void pg_tde_file_header_read(const char *tde_filename, int fd, TDEFileHeader *fheader, off_t *bytes_read);
static int pg_tde_file_header_write(const char *tde_filename, int fd, const TDESignedPrincipalKeyInfo *signed_key_info, off_t *bytes_written);
static bool pg_tde_read_one_map_entry(int fd, TDEMapEntry *map_entry, off_t *offset);
static void pg_tde_read_one_map_entry2(int keydata_fd, int32 key_index, TDEMapEntry *map_entry, Oid databaseId);
static WALKeyCacheRec *pg_tde_add_wal_key_to_cache(InternalKey *cached_key, XLogRecPtr start_lsn);
#ifndef FRONTEND
static void pg_tde_write_one_map_entry(int fd, const TDEMapEntry *map_entry, off_t *offset, const char *db_map_path);
static int keyrotation_init_file(const TDESignedPrincipalKeyInfo *signed_key_info, char *rotated_filename, const char *filename, off_t *curr_pos);
static void finalize_key_rotation(const char *path_old, const char *path_new);
static int pg_tde_file_header_write(const char *tde_filename, int fd, const TDESignedPrincipalKeyInfo *signed_key_info, off_t *bytes_written);
static void pg_tde_initialize_map_entry(TDEMapEntry *map_entry, const TDEPrincipalKey *principal_key, const RelFileLocator *rlocator, const InternalKey *rel_key_data);
static int pg_tde_open_file_write(const char *tde_filename, const TDESignedPrincipalKeyInfo *signed_key_info, bool truncate, off_t *curr_pos);
static void pg_tde_write_key_map_entry(const RelFileLocator *rlocator, const InternalKey *rel_key_data, TDEPrincipalKey *principal_key);
void
pg_tde_save_smgr_key(RelFileLocator rel, const InternalKey *rel_key_data)
@ -259,6 +252,10 @@ pg_tde_perform_rotate_key(TDEPrincipalKey *principal_key, TDEPrincipalKey *new_p
char old_path[MAXPGPATH],
new_path[MAXPGPATH];
/* This function cannot be used to rotate the server key. */
Assert(principal_key);
Assert(principal_key->keyInfo.databaseId != GLOBAL_DATA_TDE_OID);
pg_tde_sign_principal_key_info(&new_signed_key_info, new_principal_key);
pg_tde_set_db_file_path(principal_key->keyInfo.databaseId, old_path);
@ -363,74 +360,6 @@ pg_tde_delete_principal_key(Oid dbOid)
#endif /* !FRONTEND */
/*
* It's called by seg_write inside crit section so no pallocs, hence
* needs keyfile_path
*/
void
pg_tde_wal_last_key_set_lsn(XLogRecPtr lsn, const char *keyfile_path)
{
LWLock *lock_pk = tde_lwlock_enc_keys();
int fd;
off_t read_pos,
write_pos,
last_key_idx;
LWLockAcquire(lock_pk, LW_EXCLUSIVE);
fd = pg_tde_open_file_write(keyfile_path, NULL, false, &read_pos);
last_key_idx = ((lseek(fd, 0, SEEK_END) - TDE_FILE_HEADER_SIZE) / MAP_ENTRY_SIZE) - 1;
write_pos = TDE_FILE_HEADER_SIZE + (last_key_idx * MAP_ENTRY_SIZE) + offsetof(TDEMapEntry, enc_key) + offsetof(InternalKey, start_lsn);
if (pg_pwrite(fd, &lsn, sizeof(XLogRecPtr), write_pos) != sizeof(XLogRecPtr))
{
ereport(ERROR,
errcode_for_file_access(),
errmsg("could not write tde key data file: %m"));
}
/*
* If the last key overlaps with the previous, then invalidate the
* previous one. This may (and will) happen on replicas because it
* re-reads primary's data from the beginning of the segment on restart.
*/
if (last_key_idx > 0)
{
off_t prev_key_pos = TDE_FILE_HEADER_SIZE + ((last_key_idx - 1) * MAP_ENTRY_SIZE);
TDEMapEntry prev_map_entry;
if (pg_pread(fd, &prev_map_entry, MAP_ENTRY_SIZE, prev_key_pos) != MAP_ENTRY_SIZE)
{
ereport(ERROR,
errcode_for_file_access(),
errmsg("could not read previous WAL key: %m"));
}
if (prev_map_entry.enc_key.start_lsn >= lsn)
{
prev_map_entry.enc_key.type = TDE_KEY_TYPE_WAL_INVALID;
if (pg_pwrite(fd, &prev_map_entry, MAP_ENTRY_SIZE, prev_key_pos) != MAP_ENTRY_SIZE)
{
ereport(ERROR,
errcode_for_file_access(),
errmsg("could not write invalidated key: %m"));
}
}
}
if (pg_fsync(fd) != 0)
{
ereport(data_sync_elevel(ERROR),
errcode_for_file_access(),
errmsg("could not fsync file: %m"));
}
LWLockRelease(lock_pk);
CloseTransientFile(fd);
}
void
pg_tde_sign_principal_key_info(TDESignedPrincipalKeyInfo *signed_key_info, const TDEPrincipalKey *principal_key)
{
@ -449,6 +378,7 @@ pg_tde_sign_principal_key_info(TDESignedPrincipalKeyInfo *signed_key_info, const
signed_key_info->aead_tag, MAP_ENTRY_AEAD_TAG_SIZE);
}
#ifndef FRONTEND
static void
pg_tde_initialize_map_entry(TDEMapEntry *map_entry, const TDEPrincipalKey *principal_key, const RelFileLocator *rlocator, const InternalKey *rel_key_data)
{
@ -469,7 +399,9 @@ pg_tde_initialize_map_entry(TDEMapEntry *map_entry, const TDEPrincipalKey *princ
map_entry->enc_key.key,
map_entry->aead_tag, MAP_ENTRY_AEAD_TAG_SIZE);
}
#endif
#ifndef FRONTEND
static void
pg_tde_write_one_map_entry(int fd, const TDEMapEntry *map_entry, off_t *offset, const char *db_map_path)
{
@ -492,41 +424,9 @@ pg_tde_write_one_map_entry(int fd, const TDEMapEntry *map_entry, off_t *offset,
*offset += bytes_written;
}
/*
* Generates a new internal key for WAL and adds it to the key file.
*
* We have a special function for WAL as it is being called during recovery
* start so there should be no XLog records and aquired locks. The key is
* always created with start_lsn = InvalidXLogRecPtr. Which will be updated
* with the actual lsn by the first WAL write.
*/
void
pg_tde_create_wal_key(InternalKey *rel_key_data, const RelFileLocator *newrlocator, TDEMapEntryType entry_type)
{
TDEPrincipalKey *principal_key;
LWLockAcquire(tde_lwlock_enc_keys(), LW_EXCLUSIVE);
principal_key = GetPrincipalKey(newrlocator->dbOid, LW_EXCLUSIVE);
if (principal_key == NULL)
{
ereport(ERROR,
errmsg("principal key not configured"),
errhint("Use pg_tde_set_server_key_using_global_key_provider() to configure one."));
}
/* TODO: no need in generating key if TDE_KEY_TYPE_WAL_UNENCRYPTED */
pg_tde_generate_internal_key(rel_key_data, entry_type);
pg_tde_write_key_map_entry(newrlocator, rel_key_data, principal_key);
#ifdef FRONTEND
free(principal_key);
#endif
LWLockRelease(tde_lwlock_enc_keys());
}
#ifndef FRONTEND
/*
* The caller must hold an exclusive lock on the key file to avoid
* concurrent in place updates leading to data conflicts.
@ -580,6 +480,7 @@ pg_tde_write_key_map_entry(const RelFileLocator *rlocator, const InternalKey *re
CloseTransientFile(map_fd);
}
#endif
/*
* Returns true if we find a valid match; e.g. type is not set to
@ -727,6 +628,7 @@ pg_tde_open_file_read(const char *tde_filename, bool ignore_missing, off_t *curr
return fd;
}
#ifndef FRONTEND
/*
* Open for write and Validate File Header:
* header: {Format Version, Principal Key Name}
@ -756,6 +658,7 @@ pg_tde_open_file_write(const char *tde_filename, const TDESignedPrincipalKeyInfo
*curr_pos = bytes_read + bytes_written;
return fd;
}
#endif
/*
* Read TDE file header from a TDE file and fill in the fheader data structure.
@ -780,6 +683,7 @@ pg_tde_file_header_read(const char *tde_filename, int fd, TDEFileHeader *fheader
}
}
#ifndef FRONTEND
/*
* Write TDE file header to a TDE file.
*/
@ -812,6 +716,7 @@ pg_tde_file_header_write(const char *tde_filename, int fd, const TDESignedPrinci
return fd;
}
#endif
/*
* Returns true if a map entry if found or false if we have reached the end of
@ -836,27 +741,6 @@ pg_tde_read_one_map_entry(int map_file, TDEMapEntry *map_entry, off_t *offset)
return true;
}
/*
* TODO: Unify with pg_tde_read_one_map_entry()
*/
static void
pg_tde_read_one_map_entry2(int fd, int32 key_index, TDEMapEntry *map_entry, Oid databaseId)
{
off_t read_pos;
read_pos = TDE_FILE_HEADER_SIZE + key_index * MAP_ENTRY_SIZE;
if (pg_pread(fd, map_entry, MAP_ENTRY_SIZE, read_pos) != MAP_ENTRY_SIZE)
{
char db_map_path[MAXPGPATH];
pg_tde_set_db_file_path(databaseId, db_map_path);
ereport(FATAL,
errcode_for_file_access(),
errmsg("could not find the required key at index %d in tde data file \"%s\": %m",
key_index, db_map_path));
}
}
/*
* Get the principal key from the key file. The caller must hold
* a LW_SHARED or higher lock on files before calling this function.
@ -975,186 +859,3 @@ pg_tde_get_smgr_key(RelFileLocator rel)
return rel_key;
}
/*
* Returns last (the most recent) key for a given relation
*/
WALKeyCacheRec *
pg_tde_get_last_wal_key(void)
{
return tde_wal_key_last_rec;
}
WALKeyCacheRec *
pg_tde_get_wal_cache_keys(void)
{
return tde_wal_key_cache;
}
InternalKey *
pg_tde_read_last_wal_key(void)
{
RelFileLocator rlocator = GLOBAL_SPACE_RLOCATOR(XLOG_TDE_OID);
char db_map_path[MAXPGPATH];
off_t read_pos = 0;
LWLock *lock_pk = tde_lwlock_enc_keys();
TDEPrincipalKey *principal_key;
int fd;
int file_idx;
TDEMapEntry map_entry;
InternalKey *rel_key_data;
off_t fsize;
LWLockAcquire(lock_pk, LW_EXCLUSIVE);
principal_key = GetPrincipalKey(rlocator.dbOid, LW_EXCLUSIVE);
if (principal_key == NULL)
{
LWLockRelease(lock_pk);
elog(DEBUG1, "init WAL encryption: no principal key");
return NULL;
}
pg_tde_set_db_file_path(rlocator.dbOid, db_map_path);
fd = pg_tde_open_file_read(db_map_path, false, &read_pos);
fsize = lseek(fd, 0, SEEK_END);
/* No keys */
if (fsize == TDE_FILE_HEADER_SIZE)
{
#ifdef FRONTEND
pfree(principal_key);
#endif
LWLockRelease(lock_pk);
CloseTransientFile(fd);
return NULL;
}
file_idx = ((fsize - TDE_FILE_HEADER_SIZE) / MAP_ENTRY_SIZE) - 1;
pg_tde_read_one_map_entry2(fd, file_idx, &map_entry, rlocator.dbOid);
rel_key_data = tde_decrypt_rel_key(principal_key, &map_entry);
#ifdef FRONTEND
pfree(principal_key);
#endif
LWLockRelease(lock_pk);
CloseTransientFile(fd);
return rel_key_data;
}
/* Fetches WAL keys from disk and adds them to the WAL cache */
WALKeyCacheRec *
pg_tde_fetch_wal_keys(XLogRecPtr start_lsn)
{
RelFileLocator rlocator = GLOBAL_SPACE_RLOCATOR(XLOG_TDE_OID);
char db_map_path[MAXPGPATH];
off_t read_pos = 0;
LWLock *lock_pk = tde_lwlock_enc_keys();
TDEPrincipalKey *principal_key;
int fd;
int keys_count;
WALKeyCacheRec *return_wal_rec = NULL;
LWLockAcquire(lock_pk, LW_SHARED);
principal_key = GetPrincipalKey(rlocator.dbOid, LW_SHARED);
if (principal_key == NULL)
{
LWLockRelease(lock_pk);
elog(DEBUG1, "fetch WAL keys: no principal key");
return NULL;
}
pg_tde_set_db_file_path(rlocator.dbOid, db_map_path);
fd = pg_tde_open_file_read(db_map_path, false, &read_pos);
keys_count = (lseek(fd, 0, SEEK_END) - TDE_FILE_HEADER_SIZE) / MAP_ENTRY_SIZE;
/*
* If there is no keys, return a fake one (with the range 0-infinity) so
* the reader won't try to check the disk all the time. This for the
* walsender in case if WAL is unencrypted and never was.
*/
if (keys_count == 0)
{
WALKeyCacheRec *wal_rec;
InternalKey stub_key = {
.start_lsn = InvalidXLogRecPtr,
};
wal_rec = pg_tde_add_wal_key_to_cache(&stub_key, InvalidXLogRecPtr);
#ifdef FRONTEND
/* The backend frees it after copying to the cache. */
pfree(principal_key);
#endif
LWLockRelease(lock_pk);
CloseTransientFile(fd);
return wal_rec;
}
for (int file_idx = 0; file_idx < keys_count; file_idx++)
{
TDEMapEntry map_entry;
pg_tde_read_one_map_entry2(fd, file_idx, &map_entry, rlocator.dbOid);
/*
* Skip new (just created but not updated by write) and invalid keys
*/
if (map_entry.enc_key.start_lsn != InvalidXLogRecPtr &&
(map_entry.enc_key.type == TDE_KEY_TYPE_WAL_UNENCRYPTED ||
map_entry.enc_key.type == TDE_KEY_TYPE_WAL_ENCRYPTED) &&
map_entry.enc_key.start_lsn >= start_lsn)
{
InternalKey *rel_key_data = tde_decrypt_rel_key(principal_key, &map_entry);
WALKeyCacheRec *wal_rec;
wal_rec = pg_tde_add_wal_key_to_cache(rel_key_data, map_entry.enc_key.start_lsn);
pfree(rel_key_data);
if (!return_wal_rec)
return_wal_rec = wal_rec;
}
}
#ifdef FRONTEND
pfree(principal_key);
#endif
LWLockRelease(lock_pk);
CloseTransientFile(fd);
return return_wal_rec;
}
static WALKeyCacheRec *
pg_tde_add_wal_key_to_cache(InternalKey *key, XLogRecPtr start_lsn)
{
WALKeyCacheRec *wal_rec;
#ifndef FRONTEND
MemoryContext oldCtx;
oldCtx = MemoryContextSwitchTo(TopMemoryContext);
#endif
wal_rec = palloc0_object(WALKeyCacheRec);
#ifndef FRONTEND
MemoryContextSwitchTo(oldCtx);
#endif
wal_rec->start_lsn = start_lsn;
wal_rec->end_lsn = MaxXLogRecPtr;
wal_rec->key = *key;
wal_rec->crypt_ctx = NULL;
if (!tde_wal_key_last_rec)
{
tde_wal_key_last_rec = wal_rec;
tde_wal_key_cache = tde_wal_key_last_rec;
}
else
{
tde_wal_key_last_rec->next = wal_rec;
tde_wal_key_last_rec->end_lsn = wal_rec->start_lsn;
tde_wal_key_last_rec = wal_rec;
}
return wal_rec;
}

@ -12,7 +12,9 @@
#include "utils/guc.h"
#include "utils/memutils.h"
#include "access/pg_tde_xlog_keys.h"
#include "access/pg_tde_xlog.h"
#include "catalog/tde_global_space.h"
#include "catalog/tde_keyring.h"
#include "encryption/enc_tde.h"
#include "pg_tde.h"
@ -51,7 +53,10 @@ tdeheap_rmgr_redo(XLogReaderState *record)
{
TDESignedPrincipalKeyInfo *mkey = (TDESignedPrincipalKeyInfo *) XLogRecGetData(record);
pg_tde_save_principal_key_redo(mkey);
if (mkey->data.databaseId == GLOBAL_DATA_TDE_OID)
pg_tde_save_server_key_redo(mkey);
else
pg_tde_save_principal_key_redo(mkey);
}
else if (info == XLOG_TDE_DELETE_RELATION_KEY)
{

@ -0,0 +1,871 @@
#include "postgres.h"
#include <openssl/err.h>
#include <openssl/rand.h>
#include "access/xlog_internal.h"
#include "access/xlog.h"
#include "access/xloginsert.h"
#include "storage/fd.h"
#include "utils/memutils.h"
#include "access/pg_tde_tdemap.h"
#include "access/pg_tde_xlog_keys.h"
#include "access/pg_tde_xlog.h"
#include "catalog/tde_global_space.h"
#include "catalog/tde_principal_key.h"
#include "encryption/enc_aes.h"
#include "encryption/enc_tde.h"
#ifdef FRONTEND
#include "pg_tde_fe.h"
#endif
#define PG_TDE_WAL_KEY_FILE_MAGIC 0x03454454 /* version ID value = TDE 03 */
#define MaxXLogRecPtr (~(XLogRecPtr)0)
static WALKeyCacheRec *tde_wal_key_cache = NULL;
static WALKeyCacheRec *tde_wal_key_last_rec = NULL;
static WALKeyCacheRec *pg_tde_add_wal_key_to_cache(WalEncryptionKey *cached_key, XLogRecPtr start_lsn);
static WalEncryptionKey *pg_tde_decrypt_wal_key(TDEPrincipalKey *principal_key, WalKeyFileEntry *entry);
static void pg_tde_initialize_wal_key_file_entry(WalKeyFileEntry *entry, const TDEPrincipalKey *principal_key, const RelFileLocator *rlocator, const WalEncryptionKey *rel_key_data);
static int pg_tde_open_wal_key_file_basic(const char *filename, int flags, bool ignore_missing);
static int pg_tde_open_wal_key_file_read(const char *filename, bool ignore_missing, off_t *curr_pos);
static int pg_tde_open_wal_key_file_write(const char *filename, const TDESignedPrincipalKeyInfo *signed_key_info, bool truncate, off_t *curr_pos);
static bool pg_tde_read_one_wal_key_file_entry(int fd, WalKeyFileEntry *entry, off_t *offset);
static void pg_tde_read_one_wal_key_file_entry2(int fd, int32 key_index, WalKeyFileEntry *entry, Oid databaseId);
static void pg_tde_wal_key_file_header_read(const char *filename, int fd, WalKeyFileHeader *fheader, off_t *bytes_read);
static int pg_tde_wal_key_file_header_write(const char *filename, int fd, const TDESignedPrincipalKeyInfo *signed_key_info, off_t *bytes_written);
static void pg_tde_write_one_wal_key_file_entry(int fd, const WalKeyFileEntry *entry, off_t *offset, const char *db_map_path);
static void pg_tde_write_wal_key_file_entry(const RelFileLocator *rlocator, const WalEncryptionKey *rel_key_data, TDEPrincipalKey *principal_key);
/*
* It's called by seg_write inside crit section so no pallocs, hence
* needs keyfile_path
*/
void
pg_tde_wal_last_key_set_lsn(XLogRecPtr lsn, const char *keyfile_path)
{
LWLock *lock_pk = tde_lwlock_enc_keys();
int fd;
off_t read_pos,
write_pos,
last_key_idx;
LWLockAcquire(lock_pk, LW_EXCLUSIVE);
fd = pg_tde_open_wal_key_file_write(keyfile_path, NULL, false, &read_pos);
last_key_idx = ((lseek(fd, 0, SEEK_END) - sizeof(WalKeyFileHeader)) / sizeof(WalKeyFileEntry)) - 1;
write_pos = sizeof(WalKeyFileHeader) +
(last_key_idx * sizeof(WalKeyFileEntry)) +
offsetof(WalKeyFileEntry, enc_key) +
offsetof(WalEncryptionKey, start_lsn);
if (pg_pwrite(fd, &lsn, sizeof(XLogRecPtr), write_pos) != sizeof(XLogRecPtr))
{
ereport(ERROR,
errcode_for_file_access(),
errmsg("could not write WAL key data file: %m"));
}
/*
* If the last key overlaps with the previous, then invalidate the
* previous one. This may (and will) happen on replicas because it
* re-reads primary's data from the beginning of the segment on restart.
*/
if (last_key_idx > 0)
{
off_t prev_key_pos = sizeof(WalKeyFileHeader) + ((last_key_idx - 1) * sizeof(WalKeyFileEntry));
WalKeyFileEntry prev_entry;
if (pg_pread(fd, &prev_entry, sizeof(WalKeyFileEntry), prev_key_pos) != sizeof(WalKeyFileEntry))
{
ereport(ERROR,
errcode_for_file_access(),
errmsg("could not read previous WAL key: %m"));
}
if (prev_entry.enc_key.start_lsn >= lsn)
{
prev_entry.enc_key.type = TDE_KEY_TYPE_WAL_INVALID;
if (pg_pwrite(fd, &prev_entry, sizeof(WalKeyFileEntry), prev_key_pos) != sizeof(WalKeyFileEntry))
{
ereport(ERROR,
errcode_for_file_access(),
errmsg("could not write invalidated key: %m"));
}
}
}
if (pg_fsync(fd) != 0)
{
ereport(data_sync_elevel(ERROR),
errcode_for_file_access(),
errmsg("could not fsync file: %m"));
}
LWLockRelease(lock_pk);
CloseTransientFile(fd);
}
/*
* Generates a new internal key for WAL and adds it to the key file.
*
* We have a special function for WAL as it is being called during recovery
* start so there should be no XLog records and aquired locks. The key is
* always created with start_lsn = InvalidXLogRecPtr. Which will be updated
* with the actual lsn by the first WAL write.
*/
void
pg_tde_create_wal_key(WalEncryptionKey *rel_key_data,
const RelFileLocator *newrlocator,
TDEMapEntryType entry_type)
{
TDEPrincipalKey *principal_key;
LWLockAcquire(tde_lwlock_enc_keys(), LW_EXCLUSIVE);
principal_key = GetPrincipalKey(newrlocator->dbOid, LW_EXCLUSIVE);
if (principal_key == NULL)
{
ereport(ERROR,
errmsg("principal key not configured"),
errhint("Use pg_tde_set_server_key_using_global_key_provider() to configure one."));
}
/* TODO: no need in generating key if TDE_KEY_TYPE_WAL_UNENCRYPTED */
rel_key_data->type = entry_type;
rel_key_data->start_lsn = InvalidXLogRecPtr;
if (!RAND_bytes(rel_key_data->key, INTERNAL_KEY_LEN))
ereport(ERROR,
errcode(ERRCODE_INTERNAL_ERROR),
errmsg("could not generate WAL encryption key: %s",
ERR_error_string(ERR_get_error(), NULL)));
if (!RAND_bytes(rel_key_data->base_iv, INTERNAL_KEY_IV_LEN))
ereport(ERROR,
errcode(ERRCODE_INTERNAL_ERROR),
errmsg("could not generate IV for WAL encryption key: %s",
ERR_error_string(ERR_get_error(), NULL)));
pg_tde_write_wal_key_file_entry(newrlocator, rel_key_data, principal_key);
#ifdef FRONTEND
free(principal_key);
#endif
LWLockRelease(tde_lwlock_enc_keys());
}
/*
* Returns last (the most recent) key for a given relation
*/
WALKeyCacheRec *
pg_tde_get_last_wal_key(void)
{
return tde_wal_key_last_rec;
}
WALKeyCacheRec *
pg_tde_get_wal_cache_keys(void)
{
return tde_wal_key_cache;
}
WalEncryptionKey *
pg_tde_read_last_wal_key(void)
{
RelFileLocator rlocator = GLOBAL_SPACE_RLOCATOR(XLOG_TDE_OID);
char db_map_path[MAXPGPATH];
off_t read_pos = 0;
LWLock *lock_pk = tde_lwlock_enc_keys();
TDEPrincipalKey *principal_key;
int fd;
int file_idx;
WalKeyFileEntry entry;
WalEncryptionKey *rel_key_data;
off_t fsize;
LWLockAcquire(lock_pk, LW_EXCLUSIVE);
principal_key = GetPrincipalKey(rlocator.dbOid, LW_EXCLUSIVE);
if (principal_key == NULL)
{
LWLockRelease(lock_pk);
elog(DEBUG1, "init WAL encryption: no principal key");
return NULL;
}
pg_tde_set_db_file_path(rlocator.dbOid, db_map_path);
fd = pg_tde_open_wal_key_file_read(db_map_path, false, &read_pos);
fsize = lseek(fd, 0, SEEK_END);
/* No keys */
if (fsize == sizeof(WalKeyFileHeader))
{
#ifdef FRONTEND
pfree(principal_key);
#endif
LWLockRelease(lock_pk);
CloseTransientFile(fd);
return NULL;
}
file_idx = ((fsize - sizeof(WalKeyFileHeader)) / sizeof(WalKeyFileEntry)) - 1;
pg_tde_read_one_wal_key_file_entry2(fd, file_idx, &entry, rlocator.dbOid);
rel_key_data = pg_tde_decrypt_wal_key(principal_key, &entry);
#ifdef FRONTEND
pfree(principal_key);
#endif
LWLockRelease(lock_pk);
CloseTransientFile(fd);
return rel_key_data;
}
/* Fetches WAL keys from disk and adds them to the WAL cache */
WALKeyCacheRec *
pg_tde_fetch_wal_keys(XLogRecPtr start_lsn)
{
RelFileLocator rlocator = GLOBAL_SPACE_RLOCATOR(XLOG_TDE_OID);
char db_map_path[MAXPGPATH];
off_t read_pos = 0;
LWLock *lock_pk = tde_lwlock_enc_keys();
TDEPrincipalKey *principal_key;
int fd;
int keys_count;
WALKeyCacheRec *return_wal_rec = NULL;
LWLockAcquire(lock_pk, LW_SHARED);
principal_key = GetPrincipalKey(rlocator.dbOid, LW_SHARED);
if (principal_key == NULL)
{
LWLockRelease(lock_pk);
elog(DEBUG1, "fetch WAL keys: no principal key");
return NULL;
}
pg_tde_set_db_file_path(rlocator.dbOid, db_map_path);
fd = pg_tde_open_wal_key_file_read(db_map_path, false, &read_pos);
keys_count = (lseek(fd, 0, SEEK_END) - sizeof(WalKeyFileHeader)) / sizeof(WalKeyFileEntry);
/*
* If there is no keys, return a fake one (with the range 0-infinity) so
* the reader won't try to check the disk all the time. This for the
* walsender in case if WAL is unencrypted and never was.
*/
if (keys_count == 0)
{
WALKeyCacheRec *wal_rec;
WalEncryptionKey stub_key = {
.start_lsn = InvalidXLogRecPtr,
};
wal_rec = pg_tde_add_wal_key_to_cache(&stub_key, InvalidXLogRecPtr);
#ifdef FRONTEND
/* The backend frees it after copying to the cache. */
pfree(principal_key);
#endif
LWLockRelease(lock_pk);
CloseTransientFile(fd);
return wal_rec;
}
for (int file_idx = 0; file_idx < keys_count; file_idx++)
{
WalKeyFileEntry entry;
pg_tde_read_one_wal_key_file_entry2(fd, file_idx, &entry, rlocator.dbOid);
/*
* Skip new (just created but not updated by write) and invalid keys
*/
if (entry.enc_key.start_lsn != InvalidXLogRecPtr &&
(entry.enc_key.type == TDE_KEY_TYPE_WAL_UNENCRYPTED ||
entry.enc_key.type == TDE_KEY_TYPE_WAL_ENCRYPTED) &&
entry.enc_key.start_lsn >= start_lsn)
{
WalEncryptionKey *rel_key_data = pg_tde_decrypt_wal_key(principal_key, &entry);
WALKeyCacheRec *wal_rec;
wal_rec = pg_tde_add_wal_key_to_cache(rel_key_data, entry.enc_key.start_lsn);
pfree(rel_key_data);
if (!return_wal_rec)
return_wal_rec = wal_rec;
}
}
#ifdef FRONTEND
pfree(principal_key);
#endif
LWLockRelease(lock_pk);
CloseTransientFile(fd);
return return_wal_rec;
}
static WALKeyCacheRec *
pg_tde_add_wal_key_to_cache(WalEncryptionKey *key, XLogRecPtr start_lsn)
{
WALKeyCacheRec *wal_rec;
#ifndef FRONTEND
MemoryContext oldCtx;
oldCtx = MemoryContextSwitchTo(TopMemoryContext);
#endif
wal_rec = palloc0_object(WALKeyCacheRec);
#ifndef FRONTEND
MemoryContextSwitchTo(oldCtx);
#endif
wal_rec->start_lsn = start_lsn;
wal_rec->end_lsn = MaxXLogRecPtr;
wal_rec->key = *key;
wal_rec->crypt_ctx = NULL;
if (!tde_wal_key_last_rec)
{
tde_wal_key_last_rec = wal_rec;
tde_wal_key_cache = tde_wal_key_last_rec;
}
else
{
tde_wal_key_last_rec->next = wal_rec;
tde_wal_key_last_rec->end_lsn = wal_rec->start_lsn;
tde_wal_key_last_rec = wal_rec;
}
return wal_rec;
}
static int
pg_tde_open_wal_key_file_basic(const char *filename,
int flags,
bool ignore_missing)
{
int fd;
fd = OpenTransientFile(filename, flags);
if (fd < 0 && !(errno == ENOENT && ignore_missing == true))
{
ereport(ERROR,
errcode_for_file_access(),
errmsg("could not open WAL key file \"%s\": %m", filename));
}
return fd;
}
static int
pg_tde_open_wal_key_file_read(const char *filename,
bool ignore_missing,
off_t *curr_pos)
{
int fd;
WalKeyFileHeader fheader;
off_t bytes_read = 0;
Assert(LWLockHeldByMeInMode(tde_lwlock_enc_keys(), LW_SHARED) ||
LWLockHeldByMeInMode(tde_lwlock_enc_keys(), LW_EXCLUSIVE));
fd = pg_tde_open_wal_key_file_basic(filename, O_RDONLY | PG_BINARY, ignore_missing);
if (ignore_missing && fd < 0)
return fd;
pg_tde_wal_key_file_header_read(filename, fd, &fheader, &bytes_read);
*curr_pos = bytes_read;
return fd;
}
static int
pg_tde_open_wal_key_file_write(const char *filename,
const TDESignedPrincipalKeyInfo *signed_key_info,
bool truncate,
off_t *curr_pos)
{
int fd;
WalKeyFileHeader fheader;
off_t bytes_read = 0;
off_t bytes_written = 0;
int file_flags = O_RDWR | O_CREAT | PG_BINARY | (truncate ? O_TRUNC : 0);
Assert(LWLockHeldByMeInMode(tde_lwlock_enc_keys(), LW_EXCLUSIVE));
fd = pg_tde_open_wal_key_file_basic(filename, file_flags, false);
pg_tde_wal_key_file_header_read(filename, fd, &fheader, &bytes_read);
/* In case it's a new file, let's add the header now. */
if (bytes_read == 0 && signed_key_info)
pg_tde_wal_key_file_header_write(filename, fd, signed_key_info, &bytes_written);
*curr_pos = bytes_read + bytes_written;
return fd;
}
static void
pg_tde_wal_key_file_header_read(const char *filename,
int fd,
WalKeyFileHeader *fheader,
off_t *bytes_read)
{
Assert(fheader);
*bytes_read = pg_pread(fd, fheader, sizeof(WalKeyFileHeader), 0);
/* File is empty */
if (*bytes_read == 0)
return;
if (*bytes_read != sizeof(WalKeyFileHeader)
|| fheader->file_version != PG_TDE_WAL_KEY_FILE_MAGIC)
{
ereport(FATAL,
errcode_for_file_access(),
errmsg("WAL key file \"%s\" is corrupted: %m", filename));
}
}
static int
pg_tde_wal_key_file_header_write(const char *filename,
int fd,
const TDESignedPrincipalKeyInfo *signed_key_info,
off_t *bytes_written)
{
WalKeyFileHeader fheader;
Assert(signed_key_info);
fheader.file_version = PG_TDE_WAL_KEY_FILE_MAGIC;
fheader.signed_key_info = *signed_key_info;
*bytes_written = pg_pwrite(fd, &fheader, sizeof(WalKeyFileHeader), 0);
if (*bytes_written != sizeof(WalKeyFileHeader))
{
ereport(ERROR,
errcode_for_file_access(),
errmsg("could not write WAL key file \"%s\": %m", filename));
}
if (pg_fsync(fd) != 0)
{
ereport(data_sync_elevel(ERROR),
errcode_for_file_access(),
errmsg("could not fsync file \"%s\": %m", filename));
}
ereport(DEBUG2, errmsg("Wrote the header to %s", filename));
return fd;
}
/*
* Returns true if an entry is found or false if we have reached the end of the
* file.
*/
static bool
pg_tde_read_one_wal_key_file_entry(int fd,
WalKeyFileEntry *entry,
off_t *offset)
{
off_t bytes_read = 0;
Assert(entry);
Assert(offset);
bytes_read = pg_pread(fd, entry, sizeof(WalKeyFileEntry), *offset);
/* We've reached the end of the file. */
if (bytes_read != sizeof(WalKeyFileEntry))
return false;
*offset += bytes_read;
return true;
}
static void
pg_tde_read_one_wal_key_file_entry2(int fd,
int32 key_index,
WalKeyFileEntry *entry,
Oid databaseId)
{
off_t read_pos;
read_pos = sizeof(WalKeyFileHeader) + key_index * sizeof(WalKeyFileEntry);
if (pg_pread(fd, entry, sizeof(WalKeyFileEntry), read_pos) != sizeof(WalKeyFileEntry))
{
char db_map_path[MAXPGPATH];
pg_tde_set_db_file_path(databaseId, db_map_path);
ereport(FATAL,
errcode_for_file_access(),
errmsg("could not find the required key at index %d in WAL key file \"%s\": %m",
key_index, db_map_path));
}
}
static void
pg_tde_write_wal_key_file_entry(const RelFileLocator *rlocator,
const WalEncryptionKey *rel_key_data,
TDEPrincipalKey *principal_key)
{
char db_map_path[MAXPGPATH];
int fd;
off_t curr_pos = 0;
WalKeyFileEntry write_entry;
TDESignedPrincipalKeyInfo signed_key_Info;
Assert(rlocator);
pg_tde_set_db_file_path(rlocator->dbOid, db_map_path);
pg_tde_sign_principal_key_info(&signed_key_Info, principal_key);
/* Open and validate file for basic correctness. */
fd = pg_tde_open_wal_key_file_write(db_map_path, &signed_key_Info, false, &curr_pos);
/*
* Read until we find an empty slot. Otherwise, read until end. This seems
* to be less frequent than vacuum. So let's keep this function here
* rather than overloading the vacuum process.
*/
while (1)
{
WalKeyFileEntry read_entry;
off_t prev_pos = curr_pos;
if (!pg_tde_read_one_wal_key_file_entry(fd, &read_entry, &curr_pos))
{
curr_pos = prev_pos;
break;
}
if (read_entry.type == MAP_ENTRY_EMPTY)
{
curr_pos = prev_pos;
break;
}
}
/* Initialize WAL key file entry and encrypt key */
pg_tde_initialize_wal_key_file_entry(&write_entry, principal_key, rlocator, rel_key_data);
/* Write the given entry at curr_pos; i.e. the free entry. */
pg_tde_write_one_wal_key_file_entry(fd, &write_entry, &curr_pos, db_map_path);
CloseTransientFile(fd);
}
static WalEncryptionKey *
pg_tde_decrypt_wal_key(TDEPrincipalKey *principal_key, WalKeyFileEntry *entry)
{
WalEncryptionKey *key = palloc_object(WalEncryptionKey);
Assert(principal_key);
*key = entry->enc_key;
if (!AesGcmDecrypt(principal_key->keyData,
entry->entry_iv, MAP_ENTRY_IV_SIZE,
(unsigned char *) entry, offsetof(TDEMapEntry, enc_key),
entry->enc_key.key, INTERNAL_KEY_LEN,
key->key,
entry->aead_tag, MAP_ENTRY_AEAD_TAG_SIZE))
ereport(ERROR,
errmsg("Failed to decrypt key, incorrect principal key or corrupted key file"));
return key;
}
static void
pg_tde_write_one_wal_key_file_entry(int fd,
const WalKeyFileEntry *entry,
off_t *offset,
const char *db_map_path)
{
int bytes_written = 0;
bytes_written = pg_pwrite(fd, entry, sizeof(WalKeyFileEntry), *offset);
if (bytes_written != sizeof(WalKeyFileEntry))
{
ereport(ERROR,
errcode_for_file_access(),
errmsg("could not write WAL key file \"%s\": %m", db_map_path));
}
if (pg_fsync(fd) != 0)
{
ereport(data_sync_elevel(ERROR),
errcode_for_file_access(),
errmsg("could not fsync file \"%s\": %m", db_map_path));
}
*offset += bytes_written;
}
static void
pg_tde_initialize_wal_key_file_entry(WalKeyFileEntry *entry,
const TDEPrincipalKey *principal_key,
const RelFileLocator *rlocator,
const WalEncryptionKey *rel_key_data)
{
entry->spcOid = rlocator->spcOid;
entry->relNumber = rlocator->relNumber;
entry->type = rel_key_data->type;
entry->enc_key = *rel_key_data;
if (!RAND_bytes(entry->entry_iv, MAP_ENTRY_IV_SIZE))
ereport(ERROR,
errcode(ERRCODE_INTERNAL_ERROR),
errmsg("could not generate iv for wal key file entry: %s", ERR_error_string(ERR_get_error(), NULL)));
AesGcmEncrypt(principal_key->keyData,
entry->entry_iv, MAP_ENTRY_IV_SIZE,
(unsigned char *) entry, offsetof(WalKeyFileEntry, enc_key),
rel_key_data->key, INTERNAL_KEY_LEN,
entry->enc_key.key,
entry->aead_tag, MAP_ENTRY_AEAD_TAG_SIZE);
}
#ifndef FRONTEND
/*
* Rotate keys and generates the WAL record for it.
*/
void
pg_tde_perform_rotate_server_key(TDEPrincipalKey *principal_key,
TDEPrincipalKey *new_principal_key,
bool write_xlog)
{
TDESignedPrincipalKeyInfo new_signed_key_info;
off_t old_curr_pos,
new_curr_pos;
int old_fd,
new_fd;
char old_path[MAXPGPATH],
new_path[MAXPGPATH];
Assert(principal_key);
Assert(principal_key->keyInfo.databaseId == GLOBAL_DATA_TDE_OID);
pg_tde_sign_principal_key_info(&new_signed_key_info, new_principal_key);
pg_tde_set_db_file_path(principal_key->keyInfo.databaseId, old_path);
snprintf(new_path, MAXPGPATH, "%s.r", old_path);
old_fd = pg_tde_open_wal_key_file_read(old_path, false, &old_curr_pos);
new_fd = pg_tde_open_wal_key_file_write(new_path, &new_signed_key_info, true, &new_curr_pos);
/* Read all entries until EOF */
while (1)
{
WalEncryptionKey *key;
WalKeyFileEntry read_map_entry;
WalKeyFileEntry write_map_entry;
RelFileLocator rloc = GLOBAL_SPACE_RLOCATOR(XLOG_TDE_OID);
if (!pg_tde_read_one_wal_key_file_entry(old_fd, &read_map_entry, &old_curr_pos))
break;
if (read_map_entry.type == MAP_ENTRY_EMPTY)
continue;
/* Decrypt and re-encrypt key */
key = pg_tde_decrypt_wal_key(principal_key, &read_map_entry);
pg_tde_initialize_wal_key_file_entry(&write_map_entry, new_principal_key, &rloc, key);
pg_tde_write_one_wal_key_file_entry(new_fd, &write_map_entry, &new_curr_pos, new_path);
pfree(key);
}
CloseTransientFile(old_fd);
CloseTransientFile(new_fd);
/*
* Do the final steps - replace the current WAL key file with the file
* with new data.
*/
durable_unlink(old_path, ERROR);
durable_rename(new_path, old_path, ERROR);
/*
* We do WAL writes past the event ("the write behind logging") rather
* than before ("the write ahead") because we need logging here only for
* replication purposes. The rotation results in data written and fsynced
* to disk. Which in most cases would happen way before it's written to
* the WAL disk file. As WAL will be flushed at the end of the
* transaction, on its commit, hence after this function returns (there is
* also a bg writer, but the commit is what is guaranteed). And it makes
* sense to replicate the event only after its effect has been
* successfully applied to the source.
*/
if (write_xlog)
{
XLogPrincipalKeyRotate xlrec;
xlrec.databaseId = principal_key->keyInfo.databaseId;
xlrec.keyringId = principal_key->keyInfo.keyringId;
memcpy(xlrec.keyName, new_principal_key->keyInfo.name, sizeof(new_principal_key->keyInfo.name));
XLogBeginInsert();
XLogRegisterData((char *) &xlrec, sizeof(XLogPrincipalKeyRotate));
XLogInsert(RM_TDERMGR_ID, XLOG_TDE_ROTATE_PRINCIPAL_KEY);
}
}
#endif
#ifndef FRONTEND
void
pg_tde_save_server_key_redo(const TDESignedPrincipalKeyInfo *signed_key_info)
{
int fd;
off_t curr_pos;
char db_map_path[MAXPGPATH];
pg_tde_set_db_file_path(signed_key_info->data.databaseId, db_map_path);
LWLockAcquire(tde_lwlock_enc_keys(), LW_EXCLUSIVE);
fd = pg_tde_open_wal_key_file_write(db_map_path, signed_key_info, false, &curr_pos);
CloseTransientFile(fd);
LWLockRelease(tde_lwlock_enc_keys());
}
#endif
#ifndef FRONTEND
/*
* Creates the key file and saves the principal key information.
*
* If the file pre-exist, it truncates the file before adding principal key
* information.
*
* The caller must have an EXCLUSIVE LOCK on the files before calling this function.
*
* write_xlog: if true, the function will write an XLOG record about the
* principal key addition. We may want to skip this during server recovery/startup
* or in some other cases when WAL writes are not allowed.
*/
void
pg_tde_save_server_key(const TDEPrincipalKey *principal_key, bool write_xlog)
{
int fd;
off_t curr_pos = 0;
char db_map_path[MAXPGPATH];
TDESignedPrincipalKeyInfo signed_key_Info;
pg_tde_set_db_file_path(principal_key->keyInfo.databaseId, db_map_path);
ereport(DEBUG2, errmsg("pg_tde_save_server_key"));
pg_tde_sign_principal_key_info(&signed_key_Info, principal_key);
if (write_xlog)
{
XLogBeginInsert();
XLogRegisterData((char *) &signed_key_Info, sizeof(TDESignedPrincipalKeyInfo));
XLogInsert(RM_TDERMGR_ID, XLOG_TDE_ADD_PRINCIPAL_KEY);
}
fd = pg_tde_open_wal_key_file_write(db_map_path, &signed_key_Info, true, &curr_pos);
CloseTransientFile(fd);
}
#endif
/*
* Get the principal key from the key file. The caller must hold
* a LW_SHARED or higher lock on files before calling this function.
*/
TDESignedPrincipalKeyInfo *
pg_tde_get_server_key_info(Oid dbOid)
{
char db_map_path[MAXPGPATH];
int fd;
WalKeyFileHeader fheader;
TDESignedPrincipalKeyInfo *signed_key_info = NULL;
off_t bytes_read = 0;
pg_tde_set_db_file_path(dbOid, db_map_path);
/*
* Ensuring that we always open the file in binary mode. The caller must
* specify other flags for reading, writing or creating the file.
*/
fd = pg_tde_open_wal_key_file_basic(db_map_path, O_RDONLY, true);
/* The file does not exist. */
if (fd < 0)
return NULL;
pg_tde_wal_key_file_header_read(db_map_path, fd, &fheader, &bytes_read);
CloseTransientFile(fd);
/*
* It's not a new file. So we can copy the principal key info from the
* header
*/
if (bytes_read > 0)
{
signed_key_info = palloc_object(TDESignedPrincipalKeyInfo);
*signed_key_info = fheader.signed_key_info;
}
return signed_key_info;
}
int
pg_tde_count_wal_keys_in_file(Oid dbOid)
{
char db_map_path[MAXPGPATH];
File fd;
off_t curr_pos = 0;
WalKeyFileEntry entry;
int count = 0;
Assert(LWLockHeldByMeInMode(tde_lwlock_enc_keys(), LW_SHARED) ||
LWLockHeldByMeInMode(tde_lwlock_enc_keys(), LW_EXCLUSIVE));
pg_tde_set_db_file_path(dbOid, db_map_path);
fd = pg_tde_open_wal_key_file_read(db_map_path, true, &curr_pos);
if (fd < 0)
return count;
while (pg_tde_read_one_wal_key_file_entry(fd, &entry, &curr_pos))
{
if (entry.type != MAP_ENTRY_EMPTY)
count++;
}
CloseTransientFile(fd);
return count;
}
#ifndef FRONTEND
void
pg_tde_delete_server_key(Oid dbOid)
{
char path[MAXPGPATH];
Assert(LWLockHeldByMeInMode(tde_lwlock_enc_keys(), LW_EXCLUSIVE));
Assert(pg_tde_count_wal_keys_in_file(dbOid) == 0);
pg_tde_set_db_file_path(dbOid, path);
XLogBeginInsert();
XLogRegisterData((char *) &dbOid, sizeof(Oid));
XLogInsert(RM_TDERMGR_ID, XLOG_TDE_DELETE_PRINCIPAL_KEY);
/* Remove whole key map file */
durable_unlink(path, ERROR);
}
#endif

@ -13,7 +13,7 @@
#include "utils/guc.h"
#include "utils/memutils.h"
#include "access/pg_tde_tdemap.h"
#include "access/pg_tde_xlog_keys.h"
#include "access/pg_tde_xlog_smgr.h"
#include "catalog/tde_global_space.h"
#include "encryption/enc_tde.h"
@ -42,7 +42,7 @@ static const XLogSmgr tde_xlog_smgr = {
static void *EncryptionCryptCtx = NULL;
/* TODO: can be swapped out to the disk */
static InternalKey EncryptionKey =
static WalEncryptionKey EncryptionKey =
{
.type = MAP_ENTRY_EMPTY,
.start_lsn = InvalidXLogRecPtr,
@ -200,7 +200,7 @@ TDEXLogSmgrInit()
void
TDEXLogSmgrInitWrite(bool encrypt_xlog)
{
InternalKey *key = pg_tde_read_last_wal_key();
WalEncryptionKey *key = pg_tde_read_last_wal_key();
/*
* Always generate a new key on starting PostgreSQL to protect against
@ -232,7 +232,7 @@ TDEXLogSmgrInitWrite(bool encrypt_xlog)
void
TDEXLogSmgrInitWriteReuseKey()
{
InternalKey *key = pg_tde_read_last_wal_key();
WalEncryptionKey *key = pg_tde_read_last_wal_key();
if (key)
{
@ -252,7 +252,7 @@ TDEXLogWriteEncryptedPages(int fd, const void *buf, size_t count, off_t offset,
TimeLineID tli, XLogSegNo segno)
{
char iv_prefix[16];
InternalKey *key = &EncryptionKey;
WalEncryptionKey *key = &EncryptionKey;
char *enc_buff = EncryptionBuf;
#ifndef FRONTEND

@ -22,6 +22,7 @@
#include "utils/wait_event.h"
#include "access/pg_tde_tdemap.h"
#include "access/pg_tde_xlog_keys.h"
#include "access/pg_tde_xlog.h"
#include "catalog/tde_global_space.h"
#include "catalog/tde_principal_key.h"
@ -276,13 +277,19 @@ set_principal_key_with_keyring(const char *key_name,
if (!already_has_key)
{
/* First key created for the database */
pg_tde_save_principal_key(new_principal_key, true);
if (dbOid == GLOBAL_DATA_TDE_OID)
pg_tde_save_server_key(new_principal_key, true);
else
pg_tde_save_principal_key(new_principal_key, true);
push_principal_key_to_cache(new_principal_key);
}
else
{
/* key rotation */
pg_tde_perform_rotate_key(curr_principal_key, new_principal_key, true);
if (dbOid == GLOBAL_DATA_TDE_OID)
pg_tde_perform_rotate_server_key(curr_principal_key, new_principal_key, true);
else
pg_tde_perform_rotate_key(curr_principal_key, new_principal_key, true);
clear_principal_key_cache(curr_principal_key->keyInfo.databaseId);
push_principal_key_to_cache(new_principal_key);
@ -343,7 +350,10 @@ xl_tde_perform_rotate_key(XLogPrincipalKeyRotate *xlrec)
memcpy(new_principal_key->keyData, keyInfo->data.data, keyInfo->data.len);
pg_tde_perform_rotate_key(curr_principal_key, new_principal_key, false);
if (xlrec->databaseId == GLOBAL_DATA_TDE_OID)
pg_tde_perform_rotate_server_key(curr_principal_key, new_principal_key, false);
else
pg_tde_perform_rotate_key(curr_principal_key, new_principal_key, false);
clear_principal_key_cache(curr_principal_key->keyInfo.databaseId);
push_principal_key_to_cache(new_principal_key);
@ -807,12 +817,14 @@ pg_tde_delete_default_key(PG_FUNCTION_ARGS)
principal_key = GetPrincipalKeyNoDefault(GLOBAL_DATA_TDE_OID, LW_EXCLUSIVE);
if (pg_tde_is_same_principal_key(default_principal_key, principal_key))
{
if (pg_tde_count_encryption_keys(GLOBAL_DATA_TDE_OID) != 0)
if (pg_tde_count_wal_keys_in_file(GLOBAL_DATA_TDE_OID) != 0)
ereport(ERROR,
errcode(ERRCODE_OBJECT_IN_USE),
errmsg("cannot delete default principal key"),
errhint("There are WAL encryption keys."));
dbs = lappend_oid(dbs, GLOBAL_DATA_TDE_OID);
pg_tde_delete_server_key(GLOBAL_DATA_TDE_OID);
clear_principal_key_cache(GLOBAL_DATA_TDE_OID);
}
/*
@ -950,7 +962,11 @@ get_principal_key_from_keyring(Oid dbOid)
Assert(LWLockHeldByMeInMode(tde_lwlock_enc_keys(), LW_EXCLUSIVE));
principalKeyInfo = pg_tde_get_principal_key_info(dbOid);
if (dbOid == GLOBAL_DATA_TDE_OID)
principalKeyInfo = pg_tde_get_server_key_info(dbOid);
else
principalKeyInfo = pg_tde_get_principal_key_info(dbOid);
if (principalKeyInfo == NULL)
return NULL;
@ -1083,7 +1099,10 @@ GetPrincipalKey(Oid dbOid, LWLockMode lockMode)
* current funcion may be invoked during server startup/recovery where
* WAL writes forbidden.
*/
pg_tde_save_principal_key(newPrincipalKey, false);
if (dbOid == GLOBAL_DATA_TDE_OID)
pg_tde_save_server_key(newPrincipalKey, false);
else
pg_tde_save_principal_key(newPrincipalKey, false);
push_principal_key_to_cache(newPrincipalKey);
@ -1204,7 +1223,7 @@ pg_tde_verify_provider_keys_in_use(GenericKeyring *modified_provider)
LWLockAcquire(tde_lwlock_enc_keys(), LW_EXCLUSIVE);
/* Check the server key that is used for WAL encryption */
existing_principal_key = pg_tde_get_principal_key_info(GLOBAL_DATA_TDE_OID);
existing_principal_key = pg_tde_get_server_key_info(GLOBAL_DATA_TDE_OID);
if (existing_principal_key != NULL &&
existing_principal_key->data.keyringId == modified_provider->keyring_id)
{
@ -1309,7 +1328,10 @@ pg_tde_rotate_default_key_for_database(TDEPrincipalKey *oldKey, TDEPrincipalKey
*newKey = *newKeyTemplate;
newKey->keyInfo.databaseId = oldKey->keyInfo.databaseId;
pg_tde_perform_rotate_key(oldKey, newKey, true);
if (oldKey->keyInfo.databaseId == GLOBAL_DATA_TDE_OID)
pg_tde_perform_rotate_server_key(oldKey, newKey, true);
else
pg_tde_perform_rotate_key(oldKey, newKey, true);
clear_principal_key_cache(oldKey->keyInfo.databaseId);
push_principal_key_to_cache(newKey);

@ -54,28 +54,6 @@ typedef struct XLogRelKey
RelFileLocator rlocator;
} XLogRelKey;
/*
* TODO: For now it's a simple linked list which is no good. So consider having
* dedicated WAL keys cache inside some proper data structure.
*/
typedef struct WALKeyCacheRec
{
XLogRecPtr start_lsn;
XLogRecPtr end_lsn;
InternalKey key;
void *crypt_ctx;
struct WALKeyCacheRec *next;
} WALKeyCacheRec;
extern InternalKey *pg_tde_read_last_wal_key(void);
extern WALKeyCacheRec *pg_tde_get_last_wal_key(void);
extern WALKeyCacheRec *pg_tde_fetch_wal_keys(XLogRecPtr start_lsn);
extern WALKeyCacheRec *pg_tde_get_wal_cache_keys(void);
extern void pg_tde_wal_last_key_set_lsn(XLogRecPtr lsn, const char *keyfile_path);
extern void pg_tde_create_wal_key(InternalKey *rel_key_data, const RelFileLocator *newrlocator, TDEMapEntryType entry_type);
#define PG_TDE_MAP_FILENAME "%d_keys"
static inline void

@ -0,0 +1,65 @@
#ifndef PG_TDE_XLOG_KEYS_H
#define PG_TDE_XLOG_KEYS_H
#include "access/xlog_internal.h"
#include "storage/relfilelocator.h"
#include "access/pg_tde_tdemap.h"
#include "catalog/tde_principal_key.h"
#include "common/pg_tde_utils.h"
typedef struct WalEncryptionKey
{
uint8 key[INTERNAL_KEY_LEN];
uint8 base_iv[INTERNAL_KEY_IV_LEN];
uint32 type;
XLogRecPtr start_lsn;
} WalEncryptionKey;
typedef struct WalKeyFileEntry
{
Oid spcOid;
RelFileNumber relNumber;
uint32 type;
WalEncryptionKey enc_key;
/* IV and tag used when encrypting the key itself */
unsigned char entry_iv[MAP_ENTRY_IV_SIZE];
unsigned char aead_tag[MAP_ENTRY_AEAD_TAG_SIZE];
} WalKeyFileEntry;
typedef struct WalKeyFileHeader
{
int32 file_version;
TDESignedPrincipalKeyInfo signed_key_info;
} WalKeyFileHeader;
/*
* TODO: For now it's a simple linked list which is no good. So consider having
* dedicated WAL keys cache inside some proper data structure.
*/
typedef struct WALKeyCacheRec
{
XLogRecPtr start_lsn;
XLogRecPtr end_lsn;
WalEncryptionKey key;
void *crypt_ctx;
struct WALKeyCacheRec *next;
} WALKeyCacheRec;
extern int pg_tde_count_wal_keys_in_file(Oid dbOid);
extern void pg_tde_create_wal_key(WalEncryptionKey *rel_key_data, const RelFileLocator *newrlocator, TDEMapEntryType entry_type);
extern void pg_tde_delete_server_key(Oid dbOid);
extern WALKeyCacheRec *pg_tde_fetch_wal_keys(XLogRecPtr start_lsn);
extern WALKeyCacheRec *pg_tde_get_last_wal_key(void);
extern TDESignedPrincipalKeyInfo *pg_tde_get_server_key_info(Oid dbOid);
extern WALKeyCacheRec *pg_tde_get_wal_cache_keys(void);
extern void pg_tde_perform_rotate_server_key(TDEPrincipalKey *principal_key, TDEPrincipalKey *new_principal_key, bool write_xlog);
extern WalEncryptionKey *pg_tde_read_last_wal_key(void);
extern void pg_tde_save_server_key(const TDEPrincipalKey *principal_key, bool write_xlog);
extern void pg_tde_save_server_key_redo(const TDESignedPrincipalKeyInfo *signed_key_info);
extern void pg_tde_wal_last_key_set_lsn(XLogRecPtr lsn, const char *keyfile_path);
#endif /* PG_TDE_XLOG_KEYS_H */
Loading…
Cancel
Save