From d352e5af41f2f5ce449434d33a2baa1e4dbe6b44 Mon Sep 17 00:00:00 2001 From: Andrew Pogrebnoi Date: Thu, 27 Feb 2025 21:15:26 +0200 Subject: [PATCH] PG-1294 WAL: encrypt segment ranges This commit changes the approach to WAL encryption. Instead of encrypting each WAL page and keeping its header unencrypted with a special encrypted flag, this change moves encrypted/unencrypted bookkeeping to WAL internal keys. Now every [WAL] internal key has an additional field `start_lsn`. The field indicates the first WAL record that was encrypted with this key. This means everything starting from that LSN is encrypted with the key until the next or the end of the WAL. In order to have unencrypted WAL (when the user sets `pg_tde.wal_encrypt = off`), we insert a special key with the flag `TDE_KEY_TYPE_WAL_UNENCRYPTED`. The user can turn WAL encryption on and off, which will generate a new WAL key with the respective state (`TDE_KEY_TYPE_WAL_ENCRYPTED` or `TDE_KEY_TYPE_WAL_UNENCRYPTED `). If GUC pg_tde.wal_encrypt was changed, the server will generate a new WAL key with `start_lsn` set to `InvalidXLogRecPtr` on start. WAL writer, in turn, will update `start_lsn` with the actual LSN on the first write since the key creation. We use the current key _map and _dat files infrastructure along with the Internal key cache but with some special cases. There might be multiple internal keys for WAL but only one for the SMGR (relations, indexes etc). Creating a new WAL key, we write it the same as the SMGR key, so key rotation, for example, doesn't require any changes. But reads and start_lsn happen directly from/in _dat file (omitting _map). This needs revision and refactoring (along with _map, _dat files in general). As well as WAL keys cache, which is currently a simple linked list referencing the actual internal key case. That allows WAL key changing. --- contrib/pg_tde/Makefile | 2 - contrib/pg_tde/Makefile.tools | 1 - contrib/pg_tde/documentation/docs/faq.md | 8 +- contrib/pg_tde/documentation/docs/setup.md | 2 +- .../documentation/docs/wal-encryption.md | 58 +-- .../pg_tde/expected/default_principal_key.out | 15 +- contrib/pg_tde/expected/wal_key.out | 36 -- contrib/pg_tde/expected/wal_key_1.out | 36 -- contrib/pg_tde/expected/wal_key_2.out | 32 -- contrib/pg_tde/meson.build | 3 - contrib/pg_tde/pg_tde--1.0-beta2.sql | 5 - contrib/pg_tde/sql/wal_key.sql | 25 -- contrib/pg_tde/src/access/pg_tde_tdemap.c | 317 +++++++++++++++- .../pg_tde/src/access/pg_tde_xlog_encrypt.c | 340 +++++++++--------- contrib/pg_tde/src/catalog/tde_global_space.c | 54 --- .../pg_tde/src/catalog/tde_principal_key.c | 5 +- contrib/pg_tde/src/common/pg_tde_utils.c | 6 +- .../pg_tde/src/include/access/pg_tde_tdemap.h | 65 +++- .../src/include/access/pg_tde_xlog_encrypt.h | 10 +- .../src/include/catalog/tde_global_space.h | 6 - .../src/include/catalog/tde_principal_key.h | 2 + contrib/pg_tde/src/pg_tde.c | 4 +- contrib/pg_tde/t/010_wal_encrypt.pl | 3 - contrib/pg_tde/t/expected/010_wal_encrypt.out | 2 - src/backend/access/transam/xlog.c | 2 +- src/backend/access/transam/xlogreader.c | 4 +- src/backend/access/transam/xlogrecovery.c | 3 +- src/backend/replication/walreceiver.c | 4 +- .../pg_tde_alter_key_provider.c | 1 + src/bin/pg_waldump/Makefile | 4 +- src/bin/pg_waldump/pg_waldump.c | 20 +- src/bin/pg_waldump/t/003_basic_encrypted.pl | 3 +- .../t/004_save_fullpage_encrypted.pl | 1 - src/include/access/xlog_internal.h | 4 +- src/include/access/xlog_smgr.h | 24 +- 35 files changed, 645 insertions(+), 462 deletions(-) delete mode 100644 contrib/pg_tde/expected/wal_key.out delete mode 100644 contrib/pg_tde/expected/wal_key_1.out delete mode 100644 contrib/pg_tde/expected/wal_key_2.out delete mode 100644 contrib/pg_tde/sql/wal_key.sql delete mode 100644 contrib/pg_tde/src/catalog/tde_global_space.c diff --git a/contrib/pg_tde/Makefile b/contrib/pg_tde/Makefile index f0060fd261b..61056a3fbc7 100644 --- a/contrib/pg_tde/Makefile +++ b/contrib/pg_tde/Makefile @@ -24,7 +24,6 @@ vault_v2_test_basic \ alter_index_basic \ merge_join_basic \ tablespace_basic \ -wal_key \ relocate TAP_TESTS = 1 @@ -51,7 +50,6 @@ src/keyring/keyring_vault.o \ src/keyring/keyring_kmip.o \ src/keyring/keyring_kmip_ereport.o \ src/keyring/keyring_api.o \ -src/catalog/tde_global_space.o \ src/catalog/tde_keyring.o \ src/catalog/tde_keyring_parse_opts.o \ src/catalog/tde_principal_key.o \ diff --git a/contrib/pg_tde/Makefile.tools b/contrib/pg_tde/Makefile.tools index d9a66cfaa83..1f4a57addb7 100644 --- a/contrib/pg_tde/Makefile.tools +++ b/contrib/pg_tde/Makefile.tools @@ -3,7 +3,6 @@ TDE_XLOG_OBJS = \ TDE_OBJS = \ src/access/pg_tde_tdemap.frontend \ - src/catalog/tde_global_space.frontend \ src/catalog/tde_keyring.frontend \ src/catalog/tde_keyring_parse_opts.frontend \ src/catalog/tde_principal_key.frontend \ diff --git a/contrib/pg_tde/documentation/docs/faq.md b/contrib/pg_tde/documentation/docs/faq.md index 5a61fc5d3c6..835a90f4702 100644 --- a/contrib/pg_tde/documentation/docs/faq.md +++ b/contrib/pg_tde/documentation/docs/faq.md @@ -85,13 +85,13 @@ The principal key is used to encrypt the internal keys. The principal key is sto WAL encryption is done globally for the entire database cluster. All modifications to any database within a PostgreSQL cluster are written to the same WAL to maintain data consistency and integrity and ensure that PostgreSQL cluster can be restored to a consistent state. Therefore, WAL is encrypted globally. -When you turn on WAL encryption, `pg_tde` encrypts entire WAL pages except for the header. The header contains a marker if a page is encrypted or not. +When you turn on WAL encryption, `pg_tde` encrypts entire WAL files starting from the first WAL write after the server was started with the encryption turned on. The same 2-tier approach is used with WAL as with the table data: WAL pages are first encrypted with the internal key. Then the internal key is encrypted with the global principal key. -You can turn WAL encryption on and off so WAL can contain both encrypted and unencrypted pages. The WAL encryption GUC variable influences only writes. +You can turn WAL encryption on and off so WAL can contain both encrypted and unencrypted data. The WAL encryption GUC variable influences only writes. -Whenever the WAL is being read (by the recovery process or tools), the decision on what pages should be decrypted is based solely on the encryption flag of each page. +Whenever the WAL is being read (by the recovery process or tools), the decision on what should be decrypted is based solely on the metadata of WAL encryption keys. ## Should I encrypt all my data? @@ -145,7 +145,7 @@ In `pg_tde`, multi-tenancy is supported via a separate principal key per databas To control user access to the databases, you can use role-based access control (RBAC). -WAL files are encrypted globally across the entire PostgreSQL cluster using the same encryption key. Users don't interact with WAL files as these are used by the database management system to ensure data integrity and durability. +WAL files are encrypted globally across the entire PostgreSQL cluster using the same encryption keys. Users don't interact with WAL files as these are used by the database management system to ensure data integrity and durability. ## Are my backups safe? Can I restore from them? diff --git a/contrib/pg_tde/documentation/docs/setup.md b/contrib/pg_tde/documentation/docs/setup.md index 24e70064073..156a45fdeea 100644 --- a/contrib/pg_tde/documentation/docs/setup.md +++ b/contrib/pg_tde/documentation/docs/setup.md @@ -133,4 +133,4 @@ Load the `pg_tde` at startup time. The extension requires additional shared memo ## Next steps [WAL encryption](wal-encryption.md){.md-button} - + \ No newline at end of file diff --git a/contrib/pg_tde/documentation/docs/wal-encryption.md b/contrib/pg_tde/documentation/docs/wal-encryption.md index 1c03e6f624c..9abdddd72d6 100644 --- a/contrib/pg_tde/documentation/docs/wal-encryption.md +++ b/contrib/pg_tde/documentation/docs/wal-encryption.md @@ -1,32 +1,15 @@ # WAL encryption configuration (tech preview) -After you [enabled `pg_tde`](#enable-extension) and restarted the Percona Server for PostgreSQL, a principal key and a keyring for WAL are created. Now you need to instruct `pg_tde ` to encrypt WAL files by configuring WAL encryption. +Before turning WAL encryption on, you must first create a principal key for WAL. -Here's how to do it: +Here's what to do: -1. Enable WAL level encryption using the `ALTER SYSTEM SET` command. You need the superuser privileges to run this command: +1. Create pg_tde extesion if it is not exists: ```sql - ALTER SYSTEM set pg_tde.wal_encrypt = on; + CREATE EXTENSION IF NOT EXISTS pg_tde; ``` - -2. Restart the server to apply the changes. - - * On Debian and Ubuntu: - - ```sh - sudo systemctl restart postgresql-17 - ``` - - * On RHEL and derivatives - - ```sh - sudo systemctl restart postgresql-17 - ``` - -3. We highly recommend you to create your own keyring and rotate the principal key. This is because the default principal key is created from the local keyfile and is stored unencrypted. - - Set up the key provider for WAL encryption +2. Set up the key provider for WAL encryption === "With KMIP server" @@ -51,11 +34,11 @@ Here's how to do it: ``` SELECT pg_tde_add_key_global_provider_kmip('kmip','127.0.0.1', 5696, '/tmp/server_certificate.pem', '/tmp/client_key_jane_doe.pem'); ``` - + === "With HashiCorp Vault" ```sql - SELECT pg_tde_add_global_key_provider_vault_v2('provider-name',:'secret_token','url','mount','ca_path'); + SELECT pg_tde_add_global_key_provider_vault_v2('provider-name', 'secret_token', 'url', 'mount', 'ca_path'); ``` where: @@ -72,19 +55,36 @@ Here's how to do it: This setup is intended for development and stores the keys unencrypted in the specified data file. ```sql - SELECT pg_tde_add_key_provider_file('provider-name','/path/to/the/keyring/data.file'); + SELECT pg_tde_add_global_key_provider_file('provider-name','/path/to/the/keyring/data.file'); ``` -4. Rotate the principal key for WAL encryption. +3. Create principal key + + ```sql + SELECT pg_tde_set_server_principal_key('principal-key', 'provider-name'); + ``` + +4. Enable WAL level encryption using the `ALTER SYSTEM` command. You need the privileges of the superuser to run this command: ```sql - SELECT pg_tde_set_server_principal_key('new-principal-key', 'provider-name','ensure_new_key'); + ALTER SYSTEM SET pg_tde.wal_encrypt = on; ``` - The `ensure_new_key` parameter is set to `true` by default. It ensures that a new key must be unique. If set to `false`, an existing principal key will be reused. +5. Restart the server to apply the changes. + + * On Debian and Ubuntu: + ```sh + sudo systemctl restart postgresql + ``` + + * On RHEL and derivatives + + ```sh + sudo systemctl restart postgresql-17 + ``` -Now all WAL files are encrypted for both encrypted and unencrypted tables. +Now WAL files start to be encrypted for both encrypted and unencrypted tables. ## Next steps diff --git a/contrib/pg_tde/expected/default_principal_key.out b/contrib/pg_tde/expected/default_principal_key.out index 346cda7f7c5..dfea64ae262 100644 --- a/contrib/pg_tde/expected/default_principal_key.out +++ b/contrib/pg_tde/expected/default_principal_key.out @@ -2,7 +2,7 @@ CREATE EXTENSION IF NOT EXISTS pg_tde; SELECT pg_tde_add_global_key_provider_file('file-provider','/tmp/pg_tde_regression_default_principal_key.per'); pg_tde_add_global_key_provider_file ------------------------------------- - -4 + -3 (1 row) SELECT pg_tde_set_default_principal_key('default-principal-key', 'file-provider', false); @@ -18,9 +18,8 @@ SELECT id, provider_name FROM pg_tde_list_all_global_key_providers(); id | provider_name ----+--------------- -2 | file-keyring2 - -3 | file-keyring - -4 | file-provider -(3 rows) + -3 | file-provider +(2 rows) -- Should fail: no principal key for the database yet SELECT key_provider_id, key_provider_name, principal_key_name @@ -40,7 +39,7 @@ SELECT key_provider_id, key_provider_name, principal_key_name FROM pg_tde_principal_key_info(); key_provider_id | key_provider_name | principal_key_name -----------------+-------------------+----------------------- - -4 | file-provider | default-principal-key + -3 | file-provider | default-principal-key (1 row) CREATE DATABASE regress_pg_tde_other; @@ -63,7 +62,7 @@ SELECT key_provider_id, key_provider_name, principal_key_name FROM pg_tde_principal_key_info(); key_provider_id | key_provider_name | principal_key_name -----------------+-------------------+----------------------- - -4 | file-provider | default-principal-key + -3 | file-provider | default-principal-key (1 row) \c regression_pg_tde @@ -77,7 +76,7 @@ SELECT key_provider_id, key_provider_name, principal_key_name FROM pg_tde_principal_key_info(); key_provider_id | key_provider_name | principal_key_name -----------------+-------------------+--------------------------- - -4 | file-provider | new-default-principal-key + -3 | file-provider | new-default-principal-key (1 row) \c regress_pg_tde_other @@ -85,7 +84,7 @@ SELECT key_provider_id, key_provider_name, principal_key_name FROM pg_tde_principal_key_info(); key_provider_id | key_provider_name | principal_key_name -----------------+-------------------+--------------------------- - -4 | file-provider | new-default-principal-key + -3 | file-provider | new-default-principal-key (1 row) DROP TABLE test_enc; diff --git a/contrib/pg_tde/expected/wal_key.out b/contrib/pg_tde/expected/wal_key.out deleted file mode 100644 index 8e59ae58221..00000000000 --- a/contrib/pg_tde/expected/wal_key.out +++ /dev/null @@ -1,36 +0,0 @@ --- basic tests for pg_tde_create_wal_key --- doesn't test actual wal encryption, as that requires a server restart, --- only sanity checks for the key creation -CREATE EXTENSION IF NOT EXISTS pg_tde; -SELECT pg_tde_create_wal_key(); -ERROR: failed to retrieve principal key. Create one using pg_tde_set_principal_key before using encrypted tables. -SELECT pg_tde_add_global_key_provider_file('file-keyring','/tmp/pg_tde_test_keyring.per'); - pg_tde_add_global_key_provider_file -------------------------------------- - -3 -(1 row) - -SELECT pg_tde_create_wal_key(); -ERROR: failed to retrieve principal key. Create one using pg_tde_set_principal_key before using encrypted tables. --- db local principal key with global provider -SELECT pg_tde_set_global_principal_key('test-db-principal-key', 'file-keyring', true); -ERROR: failed to create principal key: already exists -SELECT pg_tde_create_wal_key(); -ERROR: failed to retrieve principal key. Create one using pg_tde_set_principal_key before using encrypted tables. -SELECT pg_tde_set_server_principal_key('test-db-principal-key', 'file-keyring'); - pg_tde_set_server_principal_key ---------------------------------- - t -(1 row) - --- and now it should work! -SELECT pg_tde_create_wal_key(); - pg_tde_create_wal_key ------------------------ - t -(1 row) - --- and now it shouldn't create a new one! -SELECT pg_tde_create_wal_key(); -ERROR: WAL key already exists. -DROP EXTENSION pg_tde; diff --git a/contrib/pg_tde/expected/wal_key_1.out b/contrib/pg_tde/expected/wal_key_1.out deleted file mode 100644 index c374630388b..00000000000 --- a/contrib/pg_tde/expected/wal_key_1.out +++ /dev/null @@ -1,36 +0,0 @@ --- basic tests for pg_tde_create_wal_key --- doesn't test actual wal encryption, as that requires a server restart, --- only sanity checks for the key creation -CREATE EXTENSION IF NOT EXISTS pg_tde; -SELECT pg_tde_create_wal_key(); -ERROR: failed to retrieve principal key. Create one using pg_tde_set_principal_key before using encrypted tables. -SELECT pg_tde_add_global_key_provider_file('file-keyring','/tmp/pg_tde_test_keyring.per'); - pg_tde_add_global_key_provider_file -------------------------------------- - -1 -(1 row) - -SELECT pg_tde_create_wal_key(); -ERROR: failed to retrieve principal key. Create one using pg_tde_set_principal_key before using encrypted tables. --- db local principal key with global provider -SELECT pg_tde_set_global_principal_key('test-db-principal-key', 'file-keyring', true); -ERROR: failed to create principal key: already exists -SELECT pg_tde_create_wal_key(); -ERROR: failed to retrieve principal key. Create one using pg_tde_set_principal_key before using encrypted tables. -SELECT pg_tde_set_server_principal_key('test-db-principal-key', 'file-keyring'); - pg_tde_set_server_principal_key ---------------------------------- - t -(1 row) - --- and now it should work! -SELECT pg_tde_create_wal_key(); - pg_tde_create_wal_key ------------------------ - t -(1 row) - --- and now it shouldn't create a new one! -SELECT pg_tde_create_wal_key(); -ERROR: WAL key already exists. -DROP EXTENSION pg_tde; diff --git a/contrib/pg_tde/expected/wal_key_2.out b/contrib/pg_tde/expected/wal_key_2.out deleted file mode 100644 index d380b7432b9..00000000000 --- a/contrib/pg_tde/expected/wal_key_2.out +++ /dev/null @@ -1,32 +0,0 @@ --- basic tests for pg_tde_create_wal_key --- doesn't test actual wal encryption, as that requires a server restart, --- only sanity checks for the key creation -CREATE EXTENSION IF NOT EXISTS pg_tde; -SELECT pg_tde_create_wal_key(); -ERROR: WAL key already exists. -SELECT pg_tde_add_global_key_provider_file('file-keyring','/tmp/pg_tde_test_keyring.per'); - pg_tde_add_global_key_provider_file -------------------------------------- - -2 -(1 row) - -SELECT pg_tde_create_wal_key(); -ERROR: WAL key already exists. --- db local principal key with global provider -SELECT pg_tde_set_global_principal_key('test-db-principal-key', 'file-keyring', true); -ERROR: failed to create principal key: already exists -SELECT pg_tde_create_wal_key(); -ERROR: WAL key already exists. -SELECT pg_tde_set_server_principal_key('test-db-principal-key', 'file-keyring'); - pg_tde_set_server_principal_key ---------------------------------- - t -(1 row) - --- and now it should work! -SELECT pg_tde_create_wal_key(); -ERROR: WAL key already exists. --- and now it shouldn't create a new one! -SELECT pg_tde_create_wal_key(); -ERROR: WAL key already exists. -DROP EXTENSION pg_tde; diff --git a/contrib/pg_tde/meson.build b/contrib/pg_tde/meson.build index 3ce638774e4..86a5ead8cff 100644 --- a/contrib/pg_tde/meson.build +++ b/contrib/pg_tde/meson.build @@ -34,7 +34,6 @@ pg_tde_sources = files( 'src/smgr/pg_tde_smgr.c', - 'src/catalog/tde_global_space.c', 'src/catalog/tde_keyring.c', 'src/catalog/tde_keyring_parse_opts.c', 'src/catalog/tde_principal_key.c', @@ -108,7 +107,6 @@ sql_tests = [ 'cache_alloc', 'update_basic', 'key_provider', - 'wal_key', 'relocate', ] @@ -172,7 +170,6 @@ tests += { tde_decrypt_sources = files( 'src/access/pg_tde_tdemap.c', 'src/access/pg_tde_xlog_encrypt.c', - 'src/catalog/tde_global_space.c', 'src/catalog/tde_keyring.c', 'src/catalog/tde_keyring_parse_opts.c', 'src/catalog/tde_principal_key.c', diff --git a/contrib/pg_tde/pg_tde--1.0-beta2.sql b/contrib/pg_tde/pg_tde--1.0-beta2.sql index e1b7e555f09..f876e364efd 100644 --- a/contrib/pg_tde/pg_tde--1.0-beta2.sql +++ b/contrib/pg_tde/pg_tde--1.0-beta2.sql @@ -481,11 +481,6 @@ RETURNS boolean LANGUAGE C AS 'MODULE_PATHNAME'; -CREATE FUNCTION pg_tde_create_wal_key() -RETURNS boolean -LANGUAGE C -AS 'MODULE_PATHNAME'; - CREATE FUNCTION pg_tde_extension_initialize() RETURNS VOID LANGUAGE C diff --git a/contrib/pg_tde/sql/wal_key.sql b/contrib/pg_tde/sql/wal_key.sql deleted file mode 100644 index 1084159ba7e..00000000000 --- a/contrib/pg_tde/sql/wal_key.sql +++ /dev/null @@ -1,25 +0,0 @@ --- basic tests for pg_tde_create_wal_key --- doesn't test actual wal encryption, as that requires a server restart, --- only sanity checks for the key creation -CREATE EXTENSION IF NOT EXISTS pg_tde; - -SELECT pg_tde_create_wal_key(); - -SELECT pg_tde_add_global_key_provider_file('file-keyring','/tmp/pg_tde_test_keyring.per'); - -SELECT pg_tde_create_wal_key(); - --- db local principal key with global provider -SELECT pg_tde_set_global_principal_key('test-db-principal-key', 'file-keyring', true); - -SELECT pg_tde_create_wal_key(); - -SELECT pg_tde_set_server_principal_key('test-db-principal-key', 'file-keyring'); - --- and now it should work! -SELECT pg_tde_create_wal_key(); - --- and now it shouldn't create a new one! -SELECT pg_tde_create_wal_key(); - -DROP EXTENSION pg_tde; \ No newline at end of file diff --git a/contrib/pg_tde/src/access/pg_tde_tdemap.c b/contrib/pg_tde/src/access/pg_tde_tdemap.c index e6ce7b38852..82188fe3e10 100644 --- a/contrib/pg_tde/src/access/pg_tde_tdemap.c +++ b/contrib/pg_tde/src/access/pg_tde_tdemap.c @@ -25,6 +25,7 @@ #include "access/pg_tde_tdemap.h" #include "access/pg_tde_xlog.h" +#include "catalog/tde_global_space.h" #include "catalog/tde_principal_key.h" #include "encryption/enc_aes.h" #include "encryption/enc_tde.h" @@ -61,6 +62,8 @@ #define MAP_ENTRY_SIZE sizeof(TDEMapEntry) #define TDE_FILE_HEADER_SIZE sizeof(TDEFileHeader) +#define MaxXLogRecPtr (~(XLogRecPtr)0) + typedef struct TDEFileHeader { int32 file_version; @@ -116,6 +119,10 @@ RelKeyCache tde_rel_key_cache = { .cap = 0, }; + +static WALKeyCacheRec *tde_wal_key_cache = NULL; +static WALKeyCacheRec *tde_wal_key_last_rec = NULL; + static int32 pg_tde_process_map_entry(const RelFileLocator *rlocator, uint32 key_type, char *db_map_path, off_t *offset, bool should_delete); static InternalKey *pg_tde_read_keydata(char *db_keydata_path, int32 key_index, TDEPrincipalKey *principal_key); static InternalKey *tde_decrypt_rel_key(TDEPrincipalKey *principal_key, InternalKey *enc_rel_key_data, Oid dbOid); @@ -125,18 +132,7 @@ static bool pg_tde_read_one_map_entry(int fd, const RelFileLocator *rlocator, in static InternalKey *pg_tde_read_one_keydata(int keydata_fd, int32 key_index, TDEPrincipalKey *principal_key); static int pg_tde_open_file(char *tde_filename, TDEPrincipalKeyInfo *principal_key_info, bool update_header, int fileFlags, bool *is_new_file, off_t *curr_pos); static InternalKey *pg_tde_get_key_from_cache(const RelFileLocator *rlocator, uint32 key_type); - -#define PG_TDE_MAP_FILENAME "pg_tde_%d_map" -#define PG_TDE_KEYDATA_FILENAME "pg_tde_%d_dat" - -static inline void -pg_tde_set_db_file_paths(Oid dbOid, char *map_path, char *keydata_path) -{ - if (map_path) - join_path_components(map_path, pg_tde_get_tde_data_dir(), psprintf(PG_TDE_MAP_FILENAME, dbOid)); - if (keydata_path) - join_path_components(keydata_path, pg_tde_get_tde_data_dir(), psprintf(PG_TDE_KEYDATA_FILENAME, dbOid)); -} +static WALKeyCacheRec *pg_tde_add_wal_key_to_cache(InternalKey *cached_key, XLogRecPtr start_lsn); #ifndef FRONTEND @@ -161,12 +157,6 @@ pg_tde_create_smgr_key(const RelFileLocatorBackend *newrlocator) return pg_tde_create_key_map_entry(&newrlocator->locator, TDE_KEY_TYPE_SMGR); } -InternalKey * -pg_tde_create_global_key(const RelFileLocator *newrlocator) -{ - return pg_tde_create_key_map_entry(newrlocator, TDE_KEY_TYPE_GLOBAL); -} - InternalKey * pg_tde_create_heap_basic_key(const RelFileLocator *newrlocator) { @@ -235,6 +225,7 @@ static void pg_tde_generate_internal_key(InternalKey *int_key, uint32 entry_type) { int_key->rel_type = entry_type; + int_key->start_lsn = InvalidXLogRecPtr; int_key->ctx = NULL; if (!RAND_bytes(int_key->key, INTERNAL_KEY_LEN)) @@ -256,6 +247,41 @@ tde_sprint_key(InternalKey *k) return buf; } +/* + * Generates a new internal key for WAL and adds it to the _dat file. It doesn't + * add unecnrypted key into cache but rather sets it in `rel_key_data`. + * + * We have a special function for WAL as it is being called during recovery + * (start) so there should be no XLog records, aquired locks, and reads from + * cache. The key is always created with start_lsn = InvalidXLogRecPtr. Which + * will be updated with the actual lsn by the first WAL write. + */ +void +pg_tde_create_wal_key(InternalKey *rel_key_data, const RelFileLocator *newrlocator, uint32 entry_type) +{ + InternalKey *enc_rel_key_data; + TDEPrincipalKey *principal_key; + + principal_key = get_principal_key_from_keyring(newrlocator->dbOid, false); + if (principal_key == NULL) + { + ereport(ERROR, + (errmsg("failed to retrieve principal key. Create one using pg_tde_set_principal_key before using encrypted WAL."))); + + return; + } + + /* TODO: no need in generating key if TDE_KEY_TYPE_WAL_UNENCRYPTED */ + pg_tde_generate_internal_key(rel_key_data, TDE_KEY_TYPE_GLOBAL | entry_type); + enc_rel_key_data = tde_encrypt_rel_key(principal_key, rel_key_data, newrlocator->dbOid); + + /* + * Add the encrypted key to the key map data file structure. + */ + pg_tde_write_key_map_entry(newrlocator, enc_rel_key_data, &principal_key->keyInfo); + pfree(enc_rel_key_data); +} + /* * Encrypts a given key and returns the encrypted one. */ @@ -930,6 +956,85 @@ pg_tde_move_rel_key(const RelFileLocator *newrlocator, const RelFileLocator *old pfree(enc_key); } +/* It's called by seg_write inside crit section so no pallocs, hence + * needs keyfile_path + */ +void +pg_tde_wal_last_key_set_lsn(XLogRecPtr lsn, const char *keyfile_path) +{ + LWLock *lock_pk = tde_lwlock_enc_keys(); + int fd = -1; + off_t write_pos, + last_key_idx, + prev_key_pos; + InternalKey prev_key; + + fd = BasicOpenFile(keyfile_path, O_RDWR | PG_BINARY); + if (fd < 0) + { + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not open tde file \"%s\": %m", + keyfile_path))); + } + + last_key_idx = ((lseek(fd, 0, SEEK_END) - TDE_FILE_HEADER_SIZE) / INTERNAL_KEY_DAT_LEN) - 1; + write_pos = TDE_FILE_HEADER_SIZE + (last_key_idx * INTERNAL_KEY_DAT_LEN) + offsetof(InternalKey, start_lsn); + + LWLockAcquire(lock_pk, LW_EXCLUSIVE); + /* TODO: pgstat_report_wait_start / pgstat_report_wait_end */ + if (pg_pwrite(fd, &lsn, sizeof(XLogRecPtr), write_pos) != sizeof(XLogRecPtr)) + { + /* TODO: what now? File is corrupted */ + LWLockRelease(lock_pk); + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not write tde key data file: %m"))); + } + + /* + * If the last key overlaps with the previous, then invalidate the + * previous one. This may (and will) happen on replicas because it + * re-reads primary's data from the beginning of the segment on restart. + */ + if (last_key_idx > 0) + { + prev_key_pos = TDE_FILE_HEADER_SIZE + ((last_key_idx - 1) * INTERNAL_KEY_DAT_LEN); + + if (pg_pread(fd, &prev_key, INTERNAL_KEY_DAT_LEN, prev_key_pos) != INTERNAL_KEY_DAT_LEN) + { + LWLockRelease(lock_pk); + ereport(WARNING, + (errcode_for_file_access(), + errmsg("could not read previous WAL key: %m"))); + } + + if (prev_key.start_lsn >= lsn) + { + WALKeySetInvalid(&prev_key); + + if (pg_pwrite(fd, &prev_key, INTERNAL_KEY_DAT_LEN, prev_key_pos) != INTERNAL_KEY_DAT_LEN) + { + LWLockRelease(lock_pk); + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not write invalidated key: %m"))); + } + } + } + + if (pg_fsync(fd) != 0) + { + LWLockRelease(lock_pk); + ereport(data_sync_elevel(ERROR), + (errcode_for_file_access(), + errmsg("could not fsync file: %m"))); + } + + LWLockRelease(lock_pk); + close(fd); +} + #endif /* !FRONTEND */ /* @@ -1431,6 +1536,182 @@ pg_tde_get_key_from_cache(const RelFileLocator *rlocator, uint32 key_type) return NULL; } +/* + * Returns last (the most recent) key for a given relation + */ +WALKeyCacheRec * +pg_tde_get_last_wal_key(void) +{ + return tde_wal_key_last_rec; +} + +WALKeyCacheRec * +pg_tde_get_wal_cache_keys(void) +{ + return tde_wal_key_cache; +} + +InternalKey * +pg_tde_read_last_wal_key(void) +{ + RelFileLocator rlocator = GLOBAL_SPACE_RLOCATOR(XLOG_TDE_OID); + char db_keydata_path[MAXPGPATH] = {0}; + off_t read_pos = 0; + LWLock *lock_pk = tde_lwlock_enc_keys(); + TDEPrincipalKey *principal_key; + int fd = -1; + int file_idx = 0; + bool is_new; + InternalKey *enc_rel_key_data, + *rel_key_data; + off_t fsize; + + LWLockAcquire(lock_pk, LW_EXCLUSIVE); + principal_key = GetPrincipalKey(rlocator.dbOid, LW_EXCLUSIVE); + if (principal_key == NULL) + { + LWLockRelease(lock_pk); + elog(DEBUG1, "init WAL encryption: no principal key"); + return NULL; + } + pg_tde_set_db_file_paths(rlocator.dbOid, NULL, db_keydata_path); + + fd = pg_tde_open_file(db_keydata_path, &principal_key->keyInfo, false, O_RDONLY, &is_new, &read_pos); + fsize = lseek(fd, 0, SEEK_END); + /* No keys */ + if (fsize == TDE_FILE_HEADER_SIZE) + { + LWLockRelease(lock_pk); + return NULL; + } + + file_idx = ((fsize - TDE_FILE_HEADER_SIZE) / INTERNAL_KEY_DAT_LEN) - 1; + enc_rel_key_data = pg_tde_read_one_keydata(fd, file_idx, principal_key); + if (!enc_rel_key_data) + { + LWLockRelease(lock_pk); + return NULL; + } + + rel_key_data = tde_decrypt_rel_key(principal_key, enc_rel_key_data, rlocator.dbOid); + LWLockRelease(lock_pk); + close(fd); + + return rel_key_data; +} + +/* Fetches WAL keys from disk and adds them to the WAL cache */ +WALKeyCacheRec * +pg_tde_fetch_wal_keys(XLogRecPtr start_lsn) +{ + RelFileLocator rlocator = GLOBAL_SPACE_RLOCATOR(XLOG_TDE_OID); + char db_keydata_path[MAXPGPATH] = {0}; + off_t read_pos = 0; + LWLock *lock_pk = tde_lwlock_enc_keys(); + TDEPrincipalKey *principal_key; + int fd = -1; + InternalKey *enc_rel_key_data, + *rel_key_data, + *cached_key; + int keys_count; + WALKeyCacheRec *wal_rec, + *return_wal_rec = NULL; + bool new_file; + + LWLockAcquire(lock_pk, LW_SHARED); + principal_key = GetPrincipalKey(rlocator.dbOid, LW_SHARED); + if (principal_key == NULL) + { + LWLockRelease(lock_pk); + elog(DEBUG1, "fetch WAL keys: no principal key"); + return NULL; + } + + pg_tde_set_db_file_paths(rlocator.dbOid, NULL, db_keydata_path); + + fd = pg_tde_open_file(db_keydata_path, &principal_key->keyInfo, false, O_RDONLY, &new_file, &read_pos); + + keys_count = (lseek(fd, 0, SEEK_END) - TDE_FILE_HEADER_SIZE) / INTERNAL_KEY_DAT_LEN; + + /* + * If there is no keys, return a fake one (with the range 0-infinity) so + * the reader won't try to check the disk all the time. This for the + * walsender in case if WAL is unencrypted and never was. + */ + if (keys_count == 0) + { + InternalKey stub_key = { + .start_lsn = InvalidXLogRecPtr, + }; + + cached_key = pg_tde_put_key_into_cache(&rlocator, &stub_key); + wal_rec = pg_tde_add_wal_key_to_cache(cached_key, InvalidXLogRecPtr); + + LWLockRelease(lock_pk); + close(fd); + return wal_rec; + } + + for (int file_idx = 0; file_idx < keys_count; file_idx++) + { + enc_rel_key_data = pg_tde_read_one_keydata(fd, file_idx, principal_key); + + /* + * Skip new (just created but not updated by write) and invalid keys + */ + if (enc_rel_key_data->start_lsn != InvalidXLogRecPtr && + WALKeyIsValid(enc_rel_key_data) && + enc_rel_key_data->start_lsn >= start_lsn) + { + rel_key_data = tde_decrypt_rel_key(principal_key, enc_rel_key_data, rlocator.dbOid); + cached_key = pg_tde_put_key_into_cache(&rlocator, rel_key_data); + pfree(rel_key_data); + + wal_rec = pg_tde_add_wal_key_to_cache(cached_key, enc_rel_key_data->start_lsn); + if (!return_wal_rec) + return_wal_rec = wal_rec; + } + pfree(enc_rel_key_data); + } + LWLockRelease(lock_pk); + close(fd); + + return return_wal_rec; +} + +static WALKeyCacheRec * +pg_tde_add_wal_key_to_cache(InternalKey *cached_key, XLogRecPtr start_lsn) +{ + WALKeyCacheRec *wal_rec; +#ifndef FRONTEND + MemoryContext oldCtx; + + oldCtx = MemoryContextSwitchTo(TopMemoryContext); +#endif + wal_rec = (WALKeyCacheRec *) palloc(sizeof(WALKeyCacheRec)); + memset(wal_rec, 0, sizeof(WALKeyCacheRec)); +#ifndef FRONTEND + MemoryContextSwitchTo(oldCtx); +#endif + + wal_rec->start_lsn = start_lsn; + wal_rec->end_lsn = MaxXLogRecPtr; + wal_rec->key = cached_key; + if (!tde_wal_key_last_rec) + { + tde_wal_key_last_rec = wal_rec; + tde_wal_key_cache = tde_wal_key_last_rec; + } + else + { + tde_wal_key_last_rec->next = wal_rec; + tde_wal_key_last_rec->end_lsn = wal_rec->start_lsn - 1; + tde_wal_key_last_rec = wal_rec; + } + + return wal_rec; +} + /* Add key to cache. See comments on `RelKeyCache`. * * TODO: add tests. diff --git a/contrib/pg_tde/src/access/pg_tde_xlog_encrypt.c b/contrib/pg_tde/src/access/pg_tde_xlog_encrypt.c index a612b1e368d..be173f11f16 100644 --- a/contrib/pg_tde/src/access/pg_tde_xlog_encrypt.c +++ b/contrib/pg_tde/src/access/pg_tde_xlog_encrypt.c @@ -15,6 +15,7 @@ #ifdef PERCONA_EXT #include "pg_tde.h" #include "pg_tde_defines.h" +#include "pg_tde_guc.h" #include "access/xlog.h" #include "access/xlog_internal.h" #include "access/xloginsert.h" @@ -23,53 +24,47 @@ #include "utils/guc.h" #include "utils/memutils.h" +#include "access/pg_tde_tdemap.h" #include "access/pg_tde_xlog_encrypt.h" #include "catalog/tde_global_space.h" #include "encryption/enc_tde.h" #ifdef FRONTEND #include "pg_tde_fe.h" +#else +#include "port/atomics.h" #endif -#include "pg_tde_guc.h" - static const XLogSmgr tde_xlog_smgr = { .seg_read = tdeheap_xlog_seg_read, .seg_write = tdeheap_xlog_seg_write, }; -static XLogPageHeaderData DecryptCurrentPageHrd; - static void SetXLogPageIVPrefix(TimeLineID tli, XLogRecPtr lsn, char *iv_prefix); #ifndef FRONTEND -/* GUC */ - -static XLogPageHeaderData EncryptCurrentPageHrd; - -static ssize_t TDEXLogWriteEncryptedPages(int fd, const void *buf, size_t count, off_t offset); -static char *TDEXLogEncryptBuf = NULL; -static int XLOGChooseNumBuffers(void); +static ssize_t TDEXLogWriteEncryptedPages(int fd, const void *buf, size_t count, + off_t offset, TimeLineID tli, + XLogSegNo segno); -Datum pg_tde_create_wal_key(PG_FUNCTION_ARGS); +typedef struct EncryptionStateData +{ + char *segBuf; + char db_keydata_path[MAXPGPATH]; + pg_atomic_uint64 enc_key_lsn; /* to sync with readers */ +} EncryptionStateData; -PG_FUNCTION_INFO_V1(pg_tde_create_wal_key); +static EncryptionStateData *EncryptionState = NULL; -Datum -pg_tde_create_wal_key(PG_FUNCTION_ARGS) +/* TODO: can be swapped out to the disk */ +static InternalKey EncryptionKey = { - InternalKey *key = GetRelationKey(GLOBAL_SPACE_RLOCATOR(XLOG_TDE_OID), TDE_KEY_TYPE_GLOBAL, true); - - if (key != NULL) - { - ereport(ERROR, - (errmsg("WAL key already exists."))); - PG_RETURN_BOOL(false); - } + .rel_type = MAP_ENTRY_EMPTY, + .start_lsn = InvalidXLogRecPtr, + .ctx = NULL, +}; - pg_tde_create_global_key(&GLOBAL_SPACE_RLOCATOR(XLOG_TDE_OID)); - PG_RETURN_BOOL(true); -} +static int XLOGChooseNumBuffers(void); /* This can't be a GUC check hook, because that would run too soon during startup */ void @@ -82,7 +77,7 @@ TDEXlogCheckSane(void) if (key == NULL) { ereport(ERROR, - (errmsg("WAL encryption can only be enabled with a properly configured key. Disable pg_tde.wal_encrypt and create one using pg_tde_create_wal_key() before enabling it."))); + (errmsg("WAL encryption can only be enabled with a properly configured principal key. Disable pg_tde.wal_encrypt and create one using pg_tde_set_server_principal_key() or pg_tde_set_global_principal_key() before enabling it."))); } } } @@ -112,6 +107,17 @@ TDEXLogEncryptBuffSize(void) return (Size) XLOG_BLCKSZ * xbuffers; } +Size +TDEXLogEncryptStateSize(void) +{ + Size sz; + + sz = TYPEALIGN(PG_IO_ALIGN_SIZE, TDEXLogEncryptBuffSize()); + sz = add_size(sz, sizeof(EncryptionStateData)); + + return MAXALIGN(sz); +} + /* * Alloc memory for the encryption buffer. * @@ -126,121 +132,105 @@ void TDEXLogShmemInit(void) { bool foundBuf; + char *allocptr; - if (EncryptXLog) - { - TDEXLogEncryptBuf = (char *) - TYPEALIGN(PG_IO_ALIGN_SIZE, - ShmemInitStruct("TDE XLog Encryption Buffer", - XLOG_TDE_ENC_BUFF_ALIGNED_SIZE, - &foundBuf)); + /* + * TODO: we need enc_key_lsn all the time but encrypt buffer only when + * EncryptXLog is on + */ + EncryptionState = (EncryptionStateData *) + ShmemInitStruct("TDE XLog Encryption State", + TDEXLogEncryptStateSize(), + &foundBuf); - elog(DEBUG1, "pg_tde: initialized encryption buffer %lu bytes", XLOG_TDE_ENC_BUFF_ALIGNED_SIZE); - } + allocptr = ((char *) EncryptionState) + TYPEALIGN(PG_IO_ALIGN_SIZE, sizeof(EncryptionStateData)); + EncryptionState->segBuf = allocptr; + + pg_atomic_init_u64(&EncryptionState->enc_key_lsn, 0); + + elog(DEBUG1, "pg_tde: initialized encryption buffer %lu bytes", TDEXLogEncryptStateSize()); } /* * Encrypt XLog page(s) from the buf and write to the segment file. */ static ssize_t -TDEXLogWriteEncryptedPages(int fd, const void *buf, size_t count, off_t offset) +TDEXLogWriteEncryptedPages(int fd, const void *buf, size_t count, off_t offset, + TimeLineID tli, XLogSegNo segno) { char iv_prefix[16] = {0,}; - size_t data_size = 0; - XLogPageHeader curr_page_hdr = &EncryptCurrentPageHrd; - XLogPageHeader enc_buf_page = NULL; - InternalKey *key = GetTdeGlobaleRelationKey(GLOBAL_SPACE_RLOCATOR(XLOG_TDE_OID)); - off_t enc_off; - size_t page_size = XLOG_BLCKSZ - offset % XLOG_BLCKSZ; - uint32 iv_ctr = 0; + InternalKey *key = &EncryptionKey; + char *enc_buff = EncryptionState->segBuf; #ifdef TDE_XLOG_DEBUG - elog(DEBUG1, "write encrypted WAL, pages amount: %d, size: %lu offset: %ld", count / (Size) XLOG_BLCKSZ, count, offset); + elog(DEBUG1, "write encrypted WAL, size: %lu, offset: %ld [%lX], seg: %X/%X, key_start_lsn: %X/%X", + count, offset, offset, LSN_FORMAT_ARGS(segno), LSN_FORMAT_ARGS(key->start_lsn)); #endif - /* - * Go through the buf page-by-page and encrypt them. We may start or - * finish writing from/in the middle of the page (walsender or - * `full_page_writes = off`). So preserve a page header for the IV init - * data. - * - * TODO: check if walsender restarts form the beggining of the page in - * case of the crash. - */ - for (enc_off = 0; enc_off < count;) - { - data_size = Min(page_size, count); - - if (page_size == XLOG_BLCKSZ) - { - memcpy((char *) curr_page_hdr, (char *) buf + enc_off, SizeOfXLogShortPHD); - - /* - * Need to use a separate buf for the encryption so the page - * remains non-crypted in the XLog buf (XLogInsert has to have - * access to records' lsn). - */ - enc_buf_page = (XLogPageHeader) (TDEXLogEncryptBuf + enc_off); - memcpy((char *) enc_buf_page, (char *) buf + enc_off, (Size) XLogPageHeaderSize(curr_page_hdr)); - enc_buf_page->xlp_info |= XLP_ENCRYPTED; - - enc_off += XLogPageHeaderSize(curr_page_hdr); - data_size -= XLogPageHeaderSize(curr_page_hdr); - /* it's a beginning of the page */ - iv_ctr = 0; - } - else - { - /* we're in the middle of the page */ - iv_ctr = (offset % XLOG_BLCKSZ) - XLogPageHeaderSize(curr_page_hdr); - } - - if (data_size + enc_off > count) - { - data_size = count - enc_off; - } - - /* - * The page is zeroed (no data), no sense to encrypt. This may happen - * when base_backup or other requests XLOG SWITCH and some pages in - * XLog buffer still not used. - */ - if (curr_page_hdr->xlp_magic == 0) - { - /* ensure all the page is {0} */ - Assert((*((char *) buf + enc_off) == 0) && - memcmp((char *) buf + enc_off, (char *) buf + enc_off + 1, data_size - 1) == 0); - - enc_buf_page = (XLogPageHeader) (TDEXLogEncryptBuf + enc_off); - memcpy((char *) enc_buf_page, (char *) buf + enc_off, data_size); - } - else - { - SetXLogPageIVPrefix(curr_page_hdr->xlp_tli, curr_page_hdr->xlp_pageaddr, iv_prefix); - PG_TDE_ENCRYPT_DATA(iv_prefix, iv_ctr, (char *) buf + enc_off, data_size, - TDEXLogEncryptBuf + enc_off, key); - } - - page_size = XLOG_BLCKSZ; - enc_off += data_size; - } + SetXLogPageIVPrefix(tli, segno, iv_prefix); + PG_TDE_ENCRYPT_DATA(iv_prefix, offset, + (char *) buf, count, + enc_buff, key); - return pg_pwrite(fd, TDEXLogEncryptBuf, count, offset); + return pg_pwrite(fd, enc_buff, count, offset); } + #endif /* !FRONTEND */ void TDEXLogSmgrInit(void) { +#ifndef FRONTEND + /* TODO: move to the separate func, it's not an SMGR init */ + InternalKey *key = pg_tde_read_last_wal_key(); + + /* TDOO: clean-up this mess */ + if ((!key && EncryptXLog) || (key && + ((key->rel_type & TDE_KEY_TYPE_WAL_ENCRYPTED && !EncryptXLog) || + (key->rel_type & TDE_KEY_TYPE_WAL_UNENCRYPTED && EncryptXLog)))) + { + pg_tde_create_wal_key( + &EncryptionKey, &GLOBAL_SPACE_RLOCATOR(XLOG_TDE_OID), + (EncryptXLog ? TDE_KEY_TYPE_WAL_ENCRYPTED : TDE_KEY_TYPE_WAL_UNENCRYPTED)); + } + else if (key) + { + EncryptionKey = *key; + pfree(key); + pg_atomic_write_u64(&EncryptionState->enc_key_lsn, EncryptionKey.start_lsn); + } + + pg_tde_set_db_file_paths(GLOBAL_SPACE_RLOCATOR(XLOG_TDE_OID).dbOid, NULL, EncryptionState->db_keydata_path); + +#endif SetXLogSmgr(&tde_xlog_smgr); } ssize_t -tdeheap_xlog_seg_write(int fd, const void *buf, size_t count, off_t offset) +tdeheap_xlog_seg_write(int fd, const void *buf, size_t count, off_t offset, + TimeLineID tli, XLogSegNo segno) { #ifndef FRONTEND + + /* + * Set the last (most recent) key's start LSN if not set. + * + * This func called with WALWriteLock held, so no need in any extra sync. + */ + if (EncryptionKey.rel_type & TDE_KEY_TYPE_GLOBAL && + pg_atomic_read_u64(&EncryptionState->enc_key_lsn) == 0) + { + XLogRecPtr lsn; + + XLogSegNoOffsetToRecPtr(segno, offset, wal_segment_size, lsn); + + pg_tde_wal_last_key_set_lsn(lsn, EncryptionState->db_keydata_path); + EncryptionKey.start_lsn = lsn; + pg_atomic_write_u64(&EncryptionState->enc_key_lsn, lsn); + } + if (EncryptXLog) - return TDEXLogWriteEncryptedPages(fd, buf, count, offset); + return TDEXLogWriteEncryptedPages(fd, buf, count, offset, tli, segno); else #endif return pg_pwrite(fd, buf, count, offset); @@ -250,81 +240,109 @@ tdeheap_xlog_seg_write(int fd, const void *buf, size_t count, off_t offset) * Read the XLog pages from the segment file and dectypt if need. */ ssize_t -tdeheap_xlog_seg_read(int fd, void *buf, size_t count, off_t offset) +tdeheap_xlog_seg_read(int fd, void *buf, size_t count, off_t offset, + TimeLineID tli, XLogSegNo segno, int segSize) { ssize_t readsz; char iv_prefix[16] = {0,}; - size_t data_size = 0; - XLogPageHeader curr_page_hdr = &DecryptCurrentPageHrd; - InternalKey *key = NULL; - size_t page_size = XLOG_BLCKSZ - offset % XLOG_BLCKSZ; - off_t dec_off; - uint32 iv_ctr = 0; + WALKeyCacheRec *keys = pg_tde_get_wal_cache_keys(); + XLogRecPtr write_key_lsn = 0; + WALKeyCacheRec *curr_key = NULL; + off_t dec_off = 0; + size_t dec_sz = 0; + XLogRecPtr data_start; + XLogRecPtr data_end; #ifdef TDE_XLOG_DEBUG - elog(DEBUG1, "read from a WAL segment, pages amount: %d, size: %lu offset: %ld", count / (Size) XLOG_BLCKSZ, count, offset); + elog(DEBUG1, "read from a WAL segment, size: %lu offset: %ld [%lX], seg: %X/%X", + count, offset, offset, LSN_FORMAT_ARGS(segno)); #endif - readsz = pg_pread(fd, buf, count, offset); - /* - * Read the buf page by page and decypt ecnrypted pages. We may start or - * fihish reading from/in the middle of the page (walreceiver) in such a - * case we should preserve the last read page header for the IV data and - * the encryption state. - * - * TODO: check if walsender/receiver restarts form the beggining of the - * page in case of the crash. + * Read data from disk */ - for (dec_off = 0; dec_off < readsz;) + readsz = pg_pread(fd, buf, count, offset); + + if (!keys) { - data_size = Min(page_size, readsz); + /* cache is empty, try to read keys from disk */ + keys = pg_tde_fetch_wal_keys(0); + } - if (page_size == XLOG_BLCKSZ) - { - memcpy((char *) curr_page_hdr, (char *) buf + dec_off, SizeOfXLogShortPHD); +#ifndef FRONTEND + write_key_lsn = pg_atomic_read_u64(&EncryptionState->enc_key_lsn); +#endif - /* set the flag to "not encrypted" for the walreceiver */ - ((XLogPageHeader) ((char *) buf + dec_off))->xlp_info &= ~XLP_ENCRYPTED; + if (write_key_lsn != 0) + { + WALKeyCacheRec *last_key = pg_tde_get_last_wal_key(); - Assert(curr_page_hdr->xlp_magic == XLOG_PAGE_MAGIC || curr_page_hdr->xlp_magic == 0); - dec_off += XLogPageHeaderSize(curr_page_hdr); - data_size -= XLogPageHeaderSize(curr_page_hdr); - /* it's a beginning of the page */ - iv_ctr = 0; - } - else - { - /* we're in the middle of the page */ - iv_ctr = (offset % XLOG_BLCKSZ) - XLogPageHeaderSize(curr_page_hdr); - } + Assert(last_key); - if ((data_size + dec_off) > readsz) + /* write has generated a new key, need to fetch it */ + if (last_key->start_lsn < write_key_lsn) { - data_size = readsz - dec_off; + pg_tde_fetch_wal_keys(write_key_lsn); + + /* in case cache was empty before */ + keys = pg_tde_get_wal_cache_keys(); } + } - if (curr_page_hdr->xlp_info & XLP_ENCRYPTED) + SetXLogPageIVPrefix(tli, segno, iv_prefix); + + XLogSegNoOffsetToRecPtr(segno, offset, segSize, data_start); + XLogSegNoOffsetToRecPtr(segno, offset + count, segSize, data_end); + + /* + * TODO: this is higly ineffective. We should get rid of linked list and + * search from the last key as this is what the walsender is useing. + */ + curr_key = keys; + while (curr_key) + { +#ifdef TDE_XLOG_DEBUG + elog(DEBUG1, "WAL key %X/%X-%X/%X, encrypted: %s", + LSN_FORMAT_ARGS(curr_key->start_lsn), + LSN_FORMAT_ARGS(curr_key->end_lsn), + curr_key->key->rel_type & TDE_KEY_TYPE_WAL_ENCRYPTED ? "yes" : "no"); +#endif + + if (curr_key->key->start_lsn != InvalidXLogRecPtr && + (curr_key->key->rel_type & TDE_KEY_TYPE_WAL_ENCRYPTED)) { - if (key == NULL) + /* + * Check if the key's range overlaps with the buffer's and decypt + * the part that does. + */ + if (data_start <= curr_key->end_lsn && curr_key->start_lsn <= data_end) { - key = GetTdeGlobaleRelationKey(GLOBAL_SPACE_RLOCATOR(XLOG_TDE_OID)); + dec_off = XLogSegmentOffset(Max(data_start, curr_key->start_lsn), segSize); + dec_sz = XLogSegmentOffset(Min(data_end, curr_key->end_lsn), segSize) - dec_off; +#ifdef TDE_XLOG_DEBUG + elog(DEBUG1, "decrypt WAL, dec_off: %lu [buff_off %lu], sz: %lu | key %X/%X", + dec_off, offset - dec_off, dec_sz, LSN_FORMAT_ARGS(curr_key->key->start_lsn)); +#endif + PG_TDE_DECRYPT_DATA(iv_prefix, dec_off, + (char *) buf + (offset - dec_off), + dec_sz, (char *) buf + (offset - dec_off), + curr_key->key); + + if (dec_off + dec_sz == offset) + { + break; + } } - SetXLogPageIVPrefix(curr_page_hdr->xlp_tli, curr_page_hdr->xlp_pageaddr, iv_prefix); - PG_TDE_DECRYPT_DATA( - iv_prefix, iv_ctr, - (char *) buf + dec_off, data_size, (char *) buf + dec_off, key); } - page_size = XLOG_BLCKSZ; - dec_off += data_size; + curr_key = curr_key->next; } return readsz; } /* IV: TLI(uint32) + XLogRecPtr(uint64)*/ -static void +static inline void SetXLogPageIVPrefix(TimeLineID tli, XLogRecPtr lsn, char *iv_prefix) { iv_prefix[0] = (tli >> 24); diff --git a/contrib/pg_tde/src/catalog/tde_global_space.c b/contrib/pg_tde/src/catalog/tde_global_space.c deleted file mode 100644 index 8c1812f244c..00000000000 --- a/contrib/pg_tde/src/catalog/tde_global_space.c +++ /dev/null @@ -1,54 +0,0 @@ -/*------------------------------------------------------------------------- - * - * tde_global_space.c - * Global catalog key management - * - * - * IDENTIFICATION - * src/catalog/tde_global_space.c - * - *------------------------------------------------------------------------- - */ - -#include "postgres.h" - -#ifdef PERCONA_EXT - -#include "utils/memutils.h" - -#include "access/pg_tde_tdemap.h" -#include "catalog/tde_global_space.h" -#include "catalog/tde_keyring.h" -#include "common/pg_tde_utils.h" - -#ifdef FRONTEND -#include "pg_tde_fe.h" -#endif - -#include -#include -#include -#include - -void -TDEInitGlobalKeys(void) -{ - InternalKey *key; - - key = pg_tde_get_key_from_file(&GLOBAL_SPACE_RLOCATOR(XLOG_TDE_OID), TDE_KEY_TYPE_GLOBAL, true); - - /* - * Internal Key should be in the TopMemmoryContext because of SSL - * contexts. This context is being initialized by OpenSSL with the pointer - * to the encryption context which is valid only for the current backend. - * So new backends have to inherit a cached key with NULL SSL connext and - * any changes to it have to remain local ot the backend. (see - * https://github.com/percona-Lab/pg_tde/pull/214#discussion_r1648998317) - */ - if (key != NULL) - { - pg_tde_put_key_into_cache(&GLOBAL_SPACE_RLOCATOR(XLOG_TDE_OID), key); - } -} - -#endif /* PERCONA_EXT */ diff --git a/contrib/pg_tde/src/catalog/tde_principal_key.c b/contrib/pg_tde/src/catalog/tde_principal_key.c index c1280b7777f..06ad43c068e 100644 --- a/contrib/pg_tde/src/catalog/tde_principal_key.c +++ b/contrib/pg_tde/src/catalog/tde_principal_key.c @@ -87,7 +87,6 @@ static void shared_memory_shutdown(int code, Datum arg); static void principal_key_startup_cleanup(int tde_tbl_count, XLogExtensionInstall *ext_info, bool redo, void *arg); static void clear_principal_key_cache(Oid databaseId); static inline dshash_table *get_principal_key_Hash(void); -static TDEPrincipalKey *get_principal_key_from_keyring(Oid dbOid, bool pushToCache); static TDEPrincipalKey *get_principal_key_from_cache(Oid dbOid); static bool pg_tde_is_same_principal_key(TDEPrincipalKey *a, TDEPrincipalKey *b); static void pg_tde_update_global_principal_key_everywhere(TDEPrincipalKey *oldKey, TDEPrincipalKey *newKey); @@ -776,7 +775,7 @@ pg_tde_get_key_info(PG_FUNCTION_ARGS, Oid dbOid) * Gets principal key form the keyring and pops it into cache if key exists * Caller should hold an exclusive tde_lwlock_enc_keys lock */ -static TDEPrincipalKey * +TDEPrincipalKey * get_principal_key_from_keyring(Oid dbOid, bool pushToCache) { GenericKeyring *keyring; @@ -785,7 +784,7 @@ get_principal_key_from_keyring(Oid dbOid, bool pushToCache) const KeyInfo *keyInfo = NULL; KeyringReturnCodes keyring_ret; - Assert(LWLockHeldByMeInMode(tde_lwlock_enc_keys(), LW_EXCLUSIVE)); + /* Assert(LWLockHeldByMeInMode(tde_lwlock_enc_keys(), LW_EXCLUSIVE)); */ principalKeyInfo = pg_tde_get_principal_key_info(dbOid); if (principalKeyInfo == NULL) diff --git a/contrib/pg_tde/src/common/pg_tde_utils.c b/contrib/pg_tde/src/common/pg_tde_utils.c index 59a4a9681e2..86be4188c95 100644 --- a/contrib/pg_tde/src/common/pg_tde_utils.c +++ b/contrib/pg_tde/src/common/pg_tde_utils.c @@ -134,18 +134,18 @@ get_tde_tables_count(void) #endif /* !FRONTEND */ -static char globalspace_dir[MAXPGPATH] = PG_TDE_DATA_DIR; +static char tde_data_dir[MAXPGPATH] = PG_TDE_DATA_DIR; void pg_tde_set_data_dir(const char *dir) { Assert(dir != NULL); - strlcpy(globalspace_dir, dir, sizeof(globalspace_dir)); + strlcpy(tde_data_dir, dir, sizeof(tde_data_dir)); } /* returns the palloc'd string */ char * pg_tde_get_tde_data_dir(void) { - return globalspace_dir; + return tde_data_dir; } diff --git a/contrib/pg_tde/src/include/access/pg_tde_tdemap.h b/contrib/pg_tde/src/include/access/pg_tde_tdemap.h index fdb0ded32d7..8db643317bc 100644 --- a/contrib/pg_tde/src/include/access/pg_tde_tdemap.h +++ b/contrib/pg_tde/src/include/access/pg_tde_tdemap.h @@ -8,15 +8,21 @@ #ifndef PG_TDE_MAP_H #define PG_TDE_MAP_H +#include "access/xlog_internal.h" +#include "port.h" +#include "storage/relfilelocator.h" + #include "pg_tde.h" #include "catalog/tde_principal_key.h" -#include "storage/relfilelocator.h" +#include "common/pg_tde_utils.h" /* Map entry flags */ -#define MAP_ENTRY_EMPTY 0x00 -#define TDE_KEY_TYPE_HEAP_BASIC 0x01 -#define TDE_KEY_TYPE_SMGR 0x02 -#define TDE_KEY_TYPE_GLOBAL 0x04 +#define MAP_ENTRY_EMPTY 0x00 +#define TDE_KEY_TYPE_HEAP_BASIC 0x01 +#define TDE_KEY_TYPE_SMGR 0x02 +#define TDE_KEY_TYPE_GLOBAL 0x04 +#define TDE_KEY_TYPE_WAL_UNENCRYPTED 0x08 +#define TDE_KEY_TYPE_WAL_ENCRYPTED 0x10 #define MAP_ENTRY_VALID (TDE_KEY_TYPE_HEAP_BASIC | TDE_KEY_TYPE_SMGR | TDE_KEY_TYPE_GLOBAL) typedef struct InternalKey @@ -28,11 +34,19 @@ typedef struct InternalKey uint8 key[INTERNAL_KEY_LEN]; uint32 rel_type; + XLogRecPtr start_lsn; + void *ctx; } InternalKey; #define INTERNAL_KEY_DAT_LEN offsetof(InternalKey, ctx) +#define WALKeySetInvalid(key) \ + ((key)->rel_type &= ~(TDE_KEY_TYPE_WAL_ENCRYPTED | TDE_KEY_TYPE_WAL_UNENCRYPTED)) +#define WALKeyIsValid(key) \ + (((key)->rel_type & TDE_KEY_TYPE_WAL_UNENCRYPTED) != 0 || \ + ((key)->rel_type & TDE_KEY_TYPE_WAL_ENCRYPTED) != 0) + typedef struct XLogRelKey { RelFileLocator rlocator; @@ -40,13 +54,52 @@ typedef struct XLogRelKey TDEPrincipalKeyInfo pkInfo; } XLogRelKey; +/* + * WALKeyCacheRec is built on top of the InternalKeys cache. We still don't + * want to key data be swapped out to the disk (implemented in the InternalKeys + * cache) but we need extra information and the ability to have and reference + * a sequence of keys. + * + * TODO: For now it's a simple linked list which is no good. So consider having + * dedicated WAL keys cache inside some proper data structure. + */ +typedef struct WALKeyCacheRec +{ + XLogRecPtr start_lsn; + XLogRecPtr end_lsn; + + InternalKey *key; + + struct WALKeyCacheRec *next; +} WALKeyCacheRec; + + +extern InternalKey *pg_tde_read_last_wal_key(void); + +extern WALKeyCacheRec *pg_tde_get_last_wal_key(void); +extern WALKeyCacheRec *pg_tde_fetch_wal_keys(XLogRecPtr start_lsn); +extern WALKeyCacheRec *pg_tde_get_wal_cache_keys(void); +extern void pg_tde_wal_last_key_set_lsn(XLogRecPtr lsn, const char *keyfile_path); + extern InternalKey *pg_tde_create_smgr_key(const RelFileLocatorBackend *newrlocator); -extern InternalKey *pg_tde_create_global_key(const RelFileLocator *newrlocator); extern InternalKey *pg_tde_create_heap_basic_key(const RelFileLocator *newrlocator); +extern void pg_tde_create_wal_key(InternalKey *rel_key_data, const RelFileLocator *newrlocator, uint32 flags); extern void pg_tde_write_key_map_entry(const RelFileLocator *rlocator, InternalKey *enc_rel_key_data, TDEPrincipalKeyInfo *principal_key_info); extern void pg_tde_delete_key_map_entry(const RelFileLocator *rlocator, uint32 key_type); extern void pg_tde_free_key_map_entry(const RelFileLocator *rlocator, uint32 key_type, off_t offset); +#define PG_TDE_MAP_FILENAME "pg_tde_%d_map" +#define PG_TDE_KEYDATA_FILENAME "pg_tde_%d_dat" + +static inline void +pg_tde_set_db_file_paths(Oid dbOid, char *map_path, char *keydata_path) +{ + if (map_path) + join_path_components(map_path, pg_tde_get_tde_data_dir(), psprintf(PG_TDE_MAP_FILENAME, dbOid)); + if (keydata_path) + join_path_components(keydata_path, pg_tde_get_tde_data_dir(), psprintf(PG_TDE_KEYDATA_FILENAME, dbOid)); +} + extern InternalKey *GetRelationKey(RelFileLocator rel, uint32 entry_type, bool no_map_ok); extern InternalKey *GetSMGRRelationKey(RelFileLocatorBackend rel); extern InternalKey *GetHeapBaiscRelationKey(RelFileLocator rel); diff --git a/contrib/pg_tde/src/include/access/pg_tde_xlog_encrypt.h b/contrib/pg_tde/src/include/access/pg_tde_xlog_encrypt.h index 7ff0b474ef7..6be0f835059 100644 --- a/contrib/pg_tde/src/include/access/pg_tde_xlog_encrypt.h +++ b/contrib/pg_tde/src/include/access/pg_tde_xlog_encrypt.h @@ -15,12 +15,14 @@ extern Size TDEXLogEncryptBuffSize(void); -#define XLOG_TDE_ENC_BUFF_ALIGNED_SIZE add_size(TDEXLogEncryptBuffSize(), PG_IO_ALIGN_SIZE) - +extern Size TDEXLogEncryptStateSize(void); extern void TDEXLogShmemInit(void); -extern ssize_t tdeheap_xlog_seg_read(int fd, void *buf, size_t count, off_t offset); -extern ssize_t tdeheap_xlog_seg_write(int fd, const void *buf, size_t count, off_t offset); +extern ssize_t tdeheap_xlog_seg_read(int fd, void *buf, size_t count, off_t offset, + TimeLineID tli, XLogSegNo segno, int segSize); +extern ssize_t tdeheap_xlog_seg_write(int fd, const void *buf, size_t count, + off_t offset, TimeLineID tli, + XLogSegNo segno); extern void TDEXLogSmgrInit(void); extern void XLogInitGUC(void); diff --git a/contrib/pg_tde/src/include/catalog/tde_global_space.h b/contrib/pg_tde/src/include/catalog/tde_global_space.h index ece25bb14a5..a3f91097fa1 100644 --- a/contrib/pg_tde/src/include/catalog/tde_global_space.h +++ b/contrib/pg_tde/src/include/catalog/tde_global_space.h @@ -14,9 +14,6 @@ #include "postgres.h" #include "catalog/pg_tablespace_d.h" -#include "access/pg_tde_tdemap.h" -#include "catalog/tde_principal_key.h" - /* * Needed for global data (WAL etc) keys identification in caches and storage. * We take Oids of the sql operators, so there is no overlap with the "real" @@ -31,12 +28,9 @@ _obj_oid \ } - /* Needed for using the same default key for multiple databases */ #define DEFAULT_DATA_TDE_OID DEFAULTTABLESPACE_OID #define TDEisInGlobalSpace(dbOid) (dbOid == GLOBAL_DATA_TDE_OID) -extern void TDEInitGlobalKeys(void); - #endif /* TDE_GLOBAL_CATALOG_H */ diff --git a/contrib/pg_tde/src/include/catalog/tde_principal_key.h b/contrib/pg_tde/src/include/catalog/tde_principal_key.h index 014a7326405..d512e5ac7f9 100644 --- a/contrib/pg_tde/src/include/catalog/tde_principal_key.h +++ b/contrib/pg_tde/src/include/catalog/tde_principal_key.h @@ -68,4 +68,6 @@ extern bool xl_tde_perform_rotate_key(XLogPrincipalKeyRotate *xlrec); extern void PrincipalKeyGucInit(void); +extern TDEPrincipalKey *get_principal_key_from_keyring(Oid dbOid, bool pushToCache); + #endif /* PG_TDE_PRINCIPAL_KEY_H */ diff --git a/contrib/pg_tde/src/pg_tde.c b/contrib/pg_tde/src/pg_tde.c index c3f13421f32..b42876a82ca 100644 --- a/contrib/pg_tde/src/pg_tde.c +++ b/contrib/pg_tde/src/pg_tde.c @@ -79,7 +79,7 @@ tde_shmem_request(void) int required_locks = TdeRequiredLocksCount(); #ifdef PERCONA_EXT - sz = add_size(sz, XLOG_TDE_ENC_BUFF_ALIGNED_SIZE); + sz = add_size(sz, TDEXLogEncryptStateSize()); #endif if (prev_shmem_request_hook) @@ -99,8 +99,6 @@ tde_shmem_startup(void) AesInit(); #ifdef PERCONA_EXT - TDEInitGlobalKeys(); - TDEXLogShmemInit(); TDEXLogSmgrInit(); diff --git a/contrib/pg_tde/t/010_wal_encrypt.pl b/contrib/pg_tde/t/010_wal_encrypt.pl index 31ded5a0b0f..77e9633ca41 100644 --- a/contrib/pg_tde/t/010_wal_encrypt.pl +++ b/contrib/pg_tde/t/010_wal_encrypt.pl @@ -35,9 +35,6 @@ PGTDE::append_to_file($stdout); $stdout = $node->safe_psql('postgres', "SELECT pg_tde_set_server_principal_key('global-db-principal-key', 'file-keyring-010');", extra_params => ['-a']); PGTDE::append_to_file($stdout); -$stdout = $node->safe_psql('postgres', "SELECT pg_tde_create_wal_key();", extra_params => ['-a']); -PGTDE::append_to_file($stdout); - # Restart the server, it should work with encryption now PGTDE::append_to_file("-- server restart with wal encryption"); $node->stop(); diff --git a/contrib/pg_tde/t/expected/010_wal_encrypt.out b/contrib/pg_tde/t/expected/010_wal_encrypt.out index 7df67d02653..3b0b96725ef 100644 --- a/contrib/pg_tde/t/expected/010_wal_encrypt.out +++ b/contrib/pg_tde/t/expected/010_wal_encrypt.out @@ -3,8 +3,6 @@ SELECT pg_tde_add_global_key_provider_file('file-keyring-010','/tmp/pg_tde_test_ -1 SELECT pg_tde_set_server_principal_key('global-db-principal-key', 'file-keyring-010'); t -SELECT pg_tde_create_wal_key(); -t -- server restart with wal encryption SHOW pg_tde.wal_encrypt; on diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index eb86bb44206..631deea344d 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -2446,7 +2446,7 @@ XLogWrite(XLogwrtRqst WriteRqst, TimeLineID tli, bool flexible) INSTR_TIME_SET_ZERO(start); pgstat_report_wait_start(WAIT_EVENT_WAL_WRITE); - written = xlog_smgr->seg_write(openLogFile, from, nleft, startoffset); + written = xlog_smgr->seg_write(openLogFile, from, nleft, startoffset, tli, openLogSegNo); pgstat_report_wait_end(); /* diff --git a/src/backend/access/transam/xlogreader.c b/src/backend/access/transam/xlogreader.c index 8482caeb18c..d7892ae5c62 100644 --- a/src/backend/access/transam/xlogreader.c +++ b/src/backend/access/transam/xlogreader.c @@ -1573,7 +1573,9 @@ WALRead(XLogReaderState *state, /* Reset errno first; eases reporting non-errno-affecting errors */ errno = 0; - readbytes = xlog_smgr->seg_read(state->seg.ws_file, p, segbytes, (off_t) startoff); + readbytes = xlog_smgr->seg_read(state->seg.ws_file, p, segbytes, + (off_t) startoff, state->seg.ws_tli, + state->seg.ws_segno, state->segcxt.ws_segsize); #ifndef FRONTEND pgstat_report_wait_end(); diff --git a/src/backend/access/transam/xlogrecovery.c b/src/backend/access/transam/xlogrecovery.c index f84488e9c55..104687b9cd7 100644 --- a/src/backend/access/transam/xlogrecovery.c +++ b/src/backend/access/transam/xlogrecovery.c @@ -3398,7 +3398,8 @@ retry: readOff = targetPageOff; pgstat_report_wait_start(WAIT_EVENT_WAL_READ); - r = xlog_smgr->seg_read(readFile, readBuf, XLOG_BLCKSZ, (off_t) readOff); + r = xlog_smgr->seg_read(readFile, readBuf, XLOG_BLCKSZ, (off_t) readOff, + curFileTLI, readSegNo, wal_segment_size); if (r != XLOG_BLCKSZ) { char fname[MAXFNAMELEN]; diff --git a/src/backend/replication/walreceiver.c b/src/backend/replication/walreceiver.c index fd347b129ff..973ef75f138 100644 --- a/src/backend/replication/walreceiver.c +++ b/src/backend/replication/walreceiver.c @@ -942,7 +942,9 @@ XLogWalRcvWrite(char *buf, Size nbytes, XLogRecPtr recptr, TimeLineID tli) /* OK to write the logs */ errno = 0; - byteswritten = xlog_smgr->seg_write(recvFile, buf, segbytes, (off_t) startoff); + byteswritten = xlog_smgr->seg_write(recvFile, buf, segbytes, + (off_t) startoff, recvFileTLI, + recvSegNo); if (byteswritten <= 0) { char xlogfname[MAXFNAMELEN]; diff --git a/src/bin/pg_tde_alter_key_provider/pg_tde_alter_key_provider.c b/src/bin/pg_tde_alter_key_provider/pg_tde_alter_key_provider.c index d73f3eecb9b..b966c9d6cbc 100644 --- a/src/bin/pg_tde_alter_key_provider/pg_tde_alter_key_provider.c +++ b/src/bin/pg_tde_alter_key_provider/pg_tde_alter_key_provider.c @@ -1,6 +1,7 @@ #include "postgres_fe.h" +#include "pg_tde.h" #include "catalog/tde_keyring.h" #include "catalog/tde_global_space.h" #include "common/logging.h" diff --git a/src/bin/pg_waldump/Makefile b/src/bin/pg_waldump/Makefile index 1a937692c74..8049faf9122 100644 --- a/src/bin/pg_waldump/Makefile +++ b/src/bin/pg_waldump/Makefile @@ -22,8 +22,8 @@ ifeq ($(enable_percona_ext),yes) OBJS += \ $(top_srcdir)/src/fe_utils/simple_list.o \ - $(top_builddir)/src/libtde/libtde.a \ - $(top_builddir)/src/libtde/libtdexlog.a + $(top_builddir)/src/libtde/libtdexlog.a \ + $(top_builddir)/src/libtde/libtde.a override CPPFLAGS := -I$(top_srcdir)/contrib/pg_tde/src/include -I$(top_srcdir)/contrib/pg_tde/src/libkmip/libkmip/include $(CPPFLAGS) endif diff --git a/src/bin/pg_waldump/pg_waldump.c b/src/bin/pg_waldump/pg_waldump.c index 02753042db2..63feb1329bb 100644 --- a/src/bin/pg_waldump/pg_waldump.c +++ b/src/bin/pg_waldump/pg_waldump.c @@ -250,7 +250,26 @@ search_directory(const char *directory, const char *fname) PGAlignedXLogBlock buf; int r; +#ifdef PERCONA_EXT + off_t fsize; + TimeLineID tli; + XLogSegNo segno; + + /* + * WalSegSz extracted from the first page header but it might be + * encrypted. But we need to know the segment seize to decrypt it + * (it's required for encryption offset calculations). So we get the + * segment size from the file's actual size. + * XLogLongPageHeaderData->xlp_seg_size there is "just as a + * cross-check" anyway. + */ + fsize = lseek(fd, 0, SEEK_END); + XLogFromFileName(fname, &tli, &segno, fsize); + + r = xlog_smgr->seg_read(fd, buf.data, XLOG_BLCKSZ, 0, tli, segno, fsize); +#else r = read(fd, buf.data, XLOG_BLCKSZ); +#endif if (r == XLOG_BLCKSZ) { XLogLongPageHeader longhdr = (XLogLongPageHeader) buf.data; @@ -1136,7 +1155,6 @@ main(int argc, char **argv) if (kringdir != NULL) { pg_tde_fe_init(kringdir); - TDEInitGlobalKeys(); TDEXLogSmgrInit(); } #endif diff --git a/src/bin/pg_waldump/t/003_basic_encrypted.pl b/src/bin/pg_waldump/t/003_basic_encrypted.pl index e998c910fa0..957decf71ad 100644 --- a/src/bin/pg_waldump/t/003_basic_encrypted.pl +++ b/src/bin/pg_waldump/t/003_basic_encrypted.pl @@ -29,7 +29,6 @@ $node->start; $node->safe_psql('postgres', "CREATE EXTENSION IF NOT EXISTS pg_tde;"); $node->safe_psql('postgres', "SELECT pg_tde_add_global_key_provider_file('file-keyring-wal','/tmp/pg_tde_test_keyring-wal.per');");; $node->safe_psql('postgres', "SELECT pg_tde_set_server_principal_key('global-db-principal-key', 'file-keyring-wal');"); -$node->safe_psql('postgres', "SELECT pg_tde_create_wal_key();"); $node->append_conf( 'postgresql.conf', q{ @@ -132,7 +131,7 @@ command_fails_like( command_like([ 'pg_waldump', '-k', $node->data_dir. '/pg_tde', $node->data_dir . '/pg_wal/' . $start_walfile ], qr/./, 'runs with start segment specified'); command_fails_like( - [ 'pg_waldump', $node->data_dir . '/pg_wal/' . $start_walfile, 'bar' ], + [ 'pg_waldump', '-k', $node->data_dir. '/pg_tde', $node->data_dir . '/pg_wal/' . $start_walfile, 'bar' ], qr/error: could not open file "bar"/, 'end file not found'); command_like( diff --git a/src/bin/pg_waldump/t/004_save_fullpage_encrypted.pl b/src/bin/pg_waldump/t/004_save_fullpage_encrypted.pl index 170bb55fe0b..9b88ec89a9b 100644 --- a/src/bin/pg_waldump/t/004_save_fullpage_encrypted.pl +++ b/src/bin/pg_waldump/t/004_save_fullpage_encrypted.pl @@ -43,7 +43,6 @@ $node->start; $node->safe_psql('postgres', "CREATE EXTENSION IF NOT EXISTS pg_tde;"); $node->safe_psql('postgres', "SELECT pg_tde_add_global_key_provider_file('file-keyring-wal','/tmp/pg_tde_test_keyring-wal.per');");; $node->safe_psql('postgres', "SELECT pg_tde_set_server_principal_key('global-db-principal-key', 'file-keyring-wal');"); -$node->safe_psql('postgres', "SELECT pg_tde_create_wal_key();"); $node->append_conf( 'postgresql.conf', q{ diff --git a/src/include/access/xlog_internal.h b/src/include/access/xlog_internal.h index 1b164a3b5a7..c6a91fb4560 100644 --- a/src/include/access/xlog_internal.h +++ b/src/include/access/xlog_internal.h @@ -78,10 +78,8 @@ typedef XLogLongPageHeaderData *XLogLongPageHeader; #define XLP_BKP_REMOVABLE 0x0004 /* Replaces a missing contrecord; see CreateOverwriteContrecordRecord */ #define XLP_FIRST_IS_OVERWRITE_CONTRECORD 0x0008 -/* The page is encrypted */ -#define XLP_ENCRYPTED 0x0010 /* All defined flag bits in xlp_info (used for validity checking of header) */ -#define XLP_ALL_FLAGS 0x001F +#define XLP_ALL_FLAGS 0x000F #define XLogPageHeaderSize(hdr) \ (((hdr)->xlp_info & XLP_LONG_HEADER) ? SizeOfXLogLongPHD : SizeOfXLogShortPHD) diff --git a/src/include/access/xlog_smgr.h b/src/include/access/xlog_smgr.h index 08875b95d5e..808a07f502f 100644 --- a/src/include/access/xlog_smgr.h +++ b/src/include/access/xlog_smgr.h @@ -8,15 +8,31 @@ /* XLog storage manager interface */ typedef struct XLogSmgr { - ssize_t (*seg_read) (int fd, void *buf, size_t count, off_t offset); + ssize_t (*seg_read) (int fd, void *buf, size_t count, off_t offset, + TimeLineID tli, XLogSegNo segno, int segSize); - ssize_t (*seg_write) (int fd, const void *buf, size_t count, off_t offset); + ssize_t (*seg_write) (int fd, const void *buf, size_t count, off_t offset, + TimeLineID tli, XLogSegNo segno); } XLogSmgr; +static inline ssize_t +default_seg_write(int fd, const void *buf, size_t count, off_t offset, + TimeLineID tli, XLogSegNo segno) +{ + return pg_pwrite(fd, buf, count, offset); +} + +static inline ssize_t +default_seg_read(int fd, void *buf, size_t count, off_t offset, + TimeLineID tli, XLogSegNo segno, int segSize) +{ + return pg_pread(fd, buf, count, offset); +} + /* Default (standard) XLog storage manager */ static const XLogSmgr xlog_smgr_standard = { - .seg_read = pg_pread, - .seg_write = pg_pwrite, + .seg_read = default_seg_read, + .seg_write = default_seg_write, }; extern const XLogSmgr *xlog_smgr;