PG-1294 WAL: encrypt segment ranges

This commit changes the approach to WAL encryption. Instead of
encrypting each WAL page and keeping its header unencrypted with a special
encrypted flag, this change moves encrypted/unencrypted bookkeeping to
WAL internal keys. Now every [WAL] internal key has an additional field
`start_lsn`. The field indicates the first WAL record that was encrypted
with this key. This means everything starting from that LSN is encrypted
with the key until the next or the end of the WAL. In order to have
unencrypted WAL (when the user sets `pg_tde.wal_encrypt = off`), we insert
a special key with the flag `TDE_KEY_TYPE_WAL_UNENCRYPTED`.
The user can turn WAL encryption on and off, which will generate a new WAL
key with the respective state (`TDE_KEY_TYPE_WAL_ENCRYPTED` or
`TDE_KEY_TYPE_WAL_UNENCRYPTED `). If GUC pg_tde.wal_encrypt was changed,
the server will generate a new WAL key with `start_lsn` set to
`InvalidXLogRecPtr` on start. WAL writer, in turn, will update `start_lsn`
with the actual LSN on the first write since the key creation.

We use the current key _map and _dat files infrastructure along with the
Internal key cache but with some special cases. There might be multiple
internal keys for WAL but only one for the SMGR (relations, indexes etc).
Creating a new WAL key, we write it the same as the SMGR key, so key
rotation, for example, doesn't require any changes. But reads and start_lsn
happen directly from/in _dat file (omitting _map). This needs revision and
refactoring (along with _map, _dat files in general). As well as WAL keys
cache, which is currently a simple linked list referencing the actual
internal key case. That allows WAL key changing.
pull/209/head
Andrew Pogrebnoi 7 months ago committed by GitHub
parent 8a7a951db4
commit d352e5af41
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
  1. 2
      contrib/pg_tde/Makefile
  2. 1
      contrib/pg_tde/Makefile.tools
  3. 8
      contrib/pg_tde/documentation/docs/faq.md
  4. 2
      contrib/pg_tde/documentation/docs/setup.md
  5. 58
      contrib/pg_tde/documentation/docs/wal-encryption.md
  6. 15
      contrib/pg_tde/expected/default_principal_key.out
  7. 36
      contrib/pg_tde/expected/wal_key.out
  8. 36
      contrib/pg_tde/expected/wal_key_1.out
  9. 32
      contrib/pg_tde/expected/wal_key_2.out
  10. 3
      contrib/pg_tde/meson.build
  11. 5
      contrib/pg_tde/pg_tde--1.0-beta2.sql
  12. 25
      contrib/pg_tde/sql/wal_key.sql
  13. 317
      contrib/pg_tde/src/access/pg_tde_tdemap.c
  14. 340
      contrib/pg_tde/src/access/pg_tde_xlog_encrypt.c
  15. 54
      contrib/pg_tde/src/catalog/tde_global_space.c
  16. 5
      contrib/pg_tde/src/catalog/tde_principal_key.c
  17. 6
      contrib/pg_tde/src/common/pg_tde_utils.c
  18. 65
      contrib/pg_tde/src/include/access/pg_tde_tdemap.h
  19. 10
      contrib/pg_tde/src/include/access/pg_tde_xlog_encrypt.h
  20. 6
      contrib/pg_tde/src/include/catalog/tde_global_space.h
  21. 2
      contrib/pg_tde/src/include/catalog/tde_principal_key.h
  22. 4
      contrib/pg_tde/src/pg_tde.c
  23. 3
      contrib/pg_tde/t/010_wal_encrypt.pl
  24. 2
      contrib/pg_tde/t/expected/010_wal_encrypt.out
  25. 2
      src/backend/access/transam/xlog.c
  26. 4
      src/backend/access/transam/xlogreader.c
  27. 3
      src/backend/access/transam/xlogrecovery.c
  28. 4
      src/backend/replication/walreceiver.c
  29. 1
      src/bin/pg_tde_alter_key_provider/pg_tde_alter_key_provider.c
  30. 4
      src/bin/pg_waldump/Makefile
  31. 20
      src/bin/pg_waldump/pg_waldump.c
  32. 3
      src/bin/pg_waldump/t/003_basic_encrypted.pl
  33. 1
      src/bin/pg_waldump/t/004_save_fullpage_encrypted.pl
  34. 4
      src/include/access/xlog_internal.h
  35. 24
      src/include/access/xlog_smgr.h

@ -24,7 +24,6 @@ vault_v2_test_basic \
alter_index_basic \
merge_join_basic \
tablespace_basic \
wal_key \
relocate
TAP_TESTS = 1
@ -51,7 +50,6 @@ src/keyring/keyring_vault.o \
src/keyring/keyring_kmip.o \
src/keyring/keyring_kmip_ereport.o \
src/keyring/keyring_api.o \
src/catalog/tde_global_space.o \
src/catalog/tde_keyring.o \
src/catalog/tde_keyring_parse_opts.o \
src/catalog/tde_principal_key.o \

@ -3,7 +3,6 @@ TDE_XLOG_OBJS = \
TDE_OBJS = \
src/access/pg_tde_tdemap.frontend \
src/catalog/tde_global_space.frontend \
src/catalog/tde_keyring.frontend \
src/catalog/tde_keyring_parse_opts.frontend \
src/catalog/tde_principal_key.frontend \

@ -85,13 +85,13 @@ The principal key is used to encrypt the internal keys. The principal key is sto
WAL encryption is done globally for the entire database cluster. All modifications to any database within a PostgreSQL cluster are written to the same WAL to maintain data consistency and integrity and ensure that PostgreSQL cluster can be restored to a consistent state. Therefore, WAL is encrypted globally.
When you turn on WAL encryption, `pg_tde` encrypts entire WAL pages except for the header. The header contains a marker if a page is encrypted or not.
When you turn on WAL encryption, `pg_tde` encrypts entire WAL files starting from the first WAL write after the server was started with the encryption turned on.
The same 2-tier approach is used with WAL as with the table data: WAL pages are first encrypted with the internal key. Then the internal key is encrypted with the global principal key.
You can turn WAL encryption on and off so WAL can contain both encrypted and unencrypted pages. The WAL encryption GUC variable influences only writes.
You can turn WAL encryption on and off so WAL can contain both encrypted and unencrypted data. The WAL encryption GUC variable influences only writes.
Whenever the WAL is being read (by the recovery process or tools), the decision on what pages should be decrypted is based solely on the encryption flag of each page.
Whenever the WAL is being read (by the recovery process or tools), the decision on what should be decrypted is based solely on the metadata of WAL encryption keys.
## Should I encrypt all my data?
@ -145,7 +145,7 @@ In `pg_tde`, multi-tenancy is supported via a separate principal key per databas
To control user access to the databases, you can use role-based access control (RBAC).
WAL files are encrypted globally across the entire PostgreSQL cluster using the same encryption key. Users don't interact with WAL files as these are used by the database management system to ensure data integrity and durability.
WAL files are encrypted globally across the entire PostgreSQL cluster using the same encryption keys. Users don't interact with WAL files as these are used by the database management system to ensure data integrity and durability.
## Are my backups safe? Can I restore from them?

@ -133,4 +133,4 @@ Load the `pg_tde` at startup time. The extension requires additional shared memo
## Next steps
[WAL encryption](wal-encryption.md){.md-button}

@ -1,32 +1,15 @@
# WAL encryption configuration (tech preview)
After you [enabled `pg_tde`](#enable-extension) and restarted the Percona Server for PostgreSQL, a principal key and a keyring for WAL are created. Now you need to instruct `pg_tde ` to encrypt WAL files by configuring WAL encryption.
Before turning WAL encryption on, you must first create a principal key for WAL.
Here's how to do it:
Here's what to do:
1. Enable WAL level encryption using the `ALTER SYSTEM SET` command. You need the superuser privileges to run this command:
1. Create pg_tde extesion if it is not exists:
```sql
ALTER SYSTEM set pg_tde.wal_encrypt = on;
CREATE EXTENSION IF NOT EXISTS pg_tde;
```
2. Restart the server to apply the changes.
* On Debian and Ubuntu:
```sh
sudo systemctl restart postgresql-17
```
* On RHEL and derivatives
```sh
sudo systemctl restart postgresql-17
```
3. We highly recommend you to create your own keyring and rotate the principal key. This is because the default principal key is created from the local keyfile and is stored unencrypted.
Set up the key provider for WAL encryption
2. Set up the key provider for WAL encryption
=== "With KMIP server"
@ -51,11 +34,11 @@ Here's how to do it:
```
SELECT pg_tde_add_key_global_provider_kmip('kmip','127.0.0.1', 5696, '/tmp/server_certificate.pem', '/tmp/client_key_jane_doe.pem');
```
=== "With HashiCorp Vault"
```sql
SELECT pg_tde_add_global_key_provider_vault_v2('provider-name',:'secret_token','url','mount','ca_path');
SELECT pg_tde_add_global_key_provider_vault_v2('provider-name', 'secret_token', 'url', 'mount', 'ca_path');
```
where:
@ -72,19 +55,36 @@ Here's how to do it:
This setup is intended for development and stores the keys unencrypted in the specified data file.
```sql
SELECT pg_tde_add_key_provider_file('provider-name','/path/to/the/keyring/data.file');
SELECT pg_tde_add_global_key_provider_file('provider-name','/path/to/the/keyring/data.file');
```
4. Rotate the principal key for WAL encryption.
3. Create principal key
```sql
SELECT pg_tde_set_server_principal_key('principal-key', 'provider-name');
```
4. Enable WAL level encryption using the `ALTER SYSTEM` command. You need the privileges of the superuser to run this command:
```sql
SELECT pg_tde_set_server_principal_key('new-principal-key', 'provider-name','ensure_new_key');
ALTER SYSTEM SET pg_tde.wal_encrypt = on;
```
The `ensure_new_key` parameter is set to `true` by default. It ensures that a new key must be unique. If set to `false`, an existing principal key will be reused.
5. Restart the server to apply the changes.
* On Debian and Ubuntu:
```sh
sudo systemctl restart postgresql
```
* On RHEL and derivatives
```sh
sudo systemctl restart postgresql-17
```
Now all WAL files are encrypted for both encrypted and unencrypted tables.
Now WAL files start to be encrypted for both encrypted and unencrypted tables.
## Next steps

@ -2,7 +2,7 @@ CREATE EXTENSION IF NOT EXISTS pg_tde;
SELECT pg_tde_add_global_key_provider_file('file-provider','/tmp/pg_tde_regression_default_principal_key.per');
pg_tde_add_global_key_provider_file
-------------------------------------
-4
-3
(1 row)
SELECT pg_tde_set_default_principal_key('default-principal-key', 'file-provider', false);
@ -18,9 +18,8 @@ SELECT id, provider_name FROM pg_tde_list_all_global_key_providers();
id | provider_name
----+---------------
-2 | file-keyring2
-3 | file-keyring
-4 | file-provider
(3 rows)
-3 | file-provider
(2 rows)
-- Should fail: no principal key for the database yet
SELECT key_provider_id, key_provider_name, principal_key_name
@ -40,7 +39,7 @@ SELECT key_provider_id, key_provider_name, principal_key_name
FROM pg_tde_principal_key_info();
key_provider_id | key_provider_name | principal_key_name
-----------------+-------------------+-----------------------
-4 | file-provider | default-principal-key
-3 | file-provider | default-principal-key
(1 row)
CREATE DATABASE regress_pg_tde_other;
@ -63,7 +62,7 @@ SELECT key_provider_id, key_provider_name, principal_key_name
FROM pg_tde_principal_key_info();
key_provider_id | key_provider_name | principal_key_name
-----------------+-------------------+-----------------------
-4 | file-provider | default-principal-key
-3 | file-provider | default-principal-key
(1 row)
\c regression_pg_tde
@ -77,7 +76,7 @@ SELECT key_provider_id, key_provider_name, principal_key_name
FROM pg_tde_principal_key_info();
key_provider_id | key_provider_name | principal_key_name
-----------------+-------------------+---------------------------
-4 | file-provider | new-default-principal-key
-3 | file-provider | new-default-principal-key
(1 row)
\c regress_pg_tde_other
@ -85,7 +84,7 @@ SELECT key_provider_id, key_provider_name, principal_key_name
FROM pg_tde_principal_key_info();
key_provider_id | key_provider_name | principal_key_name
-----------------+-------------------+---------------------------
-4 | file-provider | new-default-principal-key
-3 | file-provider | new-default-principal-key
(1 row)
DROP TABLE test_enc;

@ -1,36 +0,0 @@
-- basic tests for pg_tde_create_wal_key
-- doesn't test actual wal encryption, as that requires a server restart,
-- only sanity checks for the key creation
CREATE EXTENSION IF NOT EXISTS pg_tde;
SELECT pg_tde_create_wal_key();
ERROR: failed to retrieve principal key. Create one using pg_tde_set_principal_key before using encrypted tables.
SELECT pg_tde_add_global_key_provider_file('file-keyring','/tmp/pg_tde_test_keyring.per');
pg_tde_add_global_key_provider_file
-------------------------------------
-3
(1 row)
SELECT pg_tde_create_wal_key();
ERROR: failed to retrieve principal key. Create one using pg_tde_set_principal_key before using encrypted tables.
-- db local principal key with global provider
SELECT pg_tde_set_global_principal_key('test-db-principal-key', 'file-keyring', true);
ERROR: failed to create principal key: already exists
SELECT pg_tde_create_wal_key();
ERROR: failed to retrieve principal key. Create one using pg_tde_set_principal_key before using encrypted tables.
SELECT pg_tde_set_server_principal_key('test-db-principal-key', 'file-keyring');
pg_tde_set_server_principal_key
---------------------------------
t
(1 row)
-- and now it should work!
SELECT pg_tde_create_wal_key();
pg_tde_create_wal_key
-----------------------
t
(1 row)
-- and now it shouldn't create a new one!
SELECT pg_tde_create_wal_key();
ERROR: WAL key already exists.
DROP EXTENSION pg_tde;

@ -1,36 +0,0 @@
-- basic tests for pg_tde_create_wal_key
-- doesn't test actual wal encryption, as that requires a server restart,
-- only sanity checks for the key creation
CREATE EXTENSION IF NOT EXISTS pg_tde;
SELECT pg_tde_create_wal_key();
ERROR: failed to retrieve principal key. Create one using pg_tde_set_principal_key before using encrypted tables.
SELECT pg_tde_add_global_key_provider_file('file-keyring','/tmp/pg_tde_test_keyring.per');
pg_tde_add_global_key_provider_file
-------------------------------------
-1
(1 row)
SELECT pg_tde_create_wal_key();
ERROR: failed to retrieve principal key. Create one using pg_tde_set_principal_key before using encrypted tables.
-- db local principal key with global provider
SELECT pg_tde_set_global_principal_key('test-db-principal-key', 'file-keyring', true);
ERROR: failed to create principal key: already exists
SELECT pg_tde_create_wal_key();
ERROR: failed to retrieve principal key. Create one using pg_tde_set_principal_key before using encrypted tables.
SELECT pg_tde_set_server_principal_key('test-db-principal-key', 'file-keyring');
pg_tde_set_server_principal_key
---------------------------------
t
(1 row)
-- and now it should work!
SELECT pg_tde_create_wal_key();
pg_tde_create_wal_key
-----------------------
t
(1 row)
-- and now it shouldn't create a new one!
SELECT pg_tde_create_wal_key();
ERROR: WAL key already exists.
DROP EXTENSION pg_tde;

@ -1,32 +0,0 @@
-- basic tests for pg_tde_create_wal_key
-- doesn't test actual wal encryption, as that requires a server restart,
-- only sanity checks for the key creation
CREATE EXTENSION IF NOT EXISTS pg_tde;
SELECT pg_tde_create_wal_key();
ERROR: WAL key already exists.
SELECT pg_tde_add_global_key_provider_file('file-keyring','/tmp/pg_tde_test_keyring.per');
pg_tde_add_global_key_provider_file
-------------------------------------
-2
(1 row)
SELECT pg_tde_create_wal_key();
ERROR: WAL key already exists.
-- db local principal key with global provider
SELECT pg_tde_set_global_principal_key('test-db-principal-key', 'file-keyring', true);
ERROR: failed to create principal key: already exists
SELECT pg_tde_create_wal_key();
ERROR: WAL key already exists.
SELECT pg_tde_set_server_principal_key('test-db-principal-key', 'file-keyring');
pg_tde_set_server_principal_key
---------------------------------
t
(1 row)
-- and now it should work!
SELECT pg_tde_create_wal_key();
ERROR: WAL key already exists.
-- and now it shouldn't create a new one!
SELECT pg_tde_create_wal_key();
ERROR: WAL key already exists.
DROP EXTENSION pg_tde;

@ -34,7 +34,6 @@ pg_tde_sources = files(
'src/smgr/pg_tde_smgr.c',
'src/catalog/tde_global_space.c',
'src/catalog/tde_keyring.c',
'src/catalog/tde_keyring_parse_opts.c',
'src/catalog/tde_principal_key.c',
@ -108,7 +107,6 @@ sql_tests = [
'cache_alloc',
'update_basic',
'key_provider',
'wal_key',
'relocate',
]
@ -172,7 +170,6 @@ tests += {
tde_decrypt_sources = files(
'src/access/pg_tde_tdemap.c',
'src/access/pg_tde_xlog_encrypt.c',
'src/catalog/tde_global_space.c',
'src/catalog/tde_keyring.c',
'src/catalog/tde_keyring_parse_opts.c',
'src/catalog/tde_principal_key.c',

@ -481,11 +481,6 @@ RETURNS boolean
LANGUAGE C
AS 'MODULE_PATHNAME';
CREATE FUNCTION pg_tde_create_wal_key()
RETURNS boolean
LANGUAGE C
AS 'MODULE_PATHNAME';
CREATE FUNCTION pg_tde_extension_initialize()
RETURNS VOID
LANGUAGE C

@ -1,25 +0,0 @@
-- basic tests for pg_tde_create_wal_key
-- doesn't test actual wal encryption, as that requires a server restart,
-- only sanity checks for the key creation
CREATE EXTENSION IF NOT EXISTS pg_tde;
SELECT pg_tde_create_wal_key();
SELECT pg_tde_add_global_key_provider_file('file-keyring','/tmp/pg_tde_test_keyring.per');
SELECT pg_tde_create_wal_key();
-- db local principal key with global provider
SELECT pg_tde_set_global_principal_key('test-db-principal-key', 'file-keyring', true);
SELECT pg_tde_create_wal_key();
SELECT pg_tde_set_server_principal_key('test-db-principal-key', 'file-keyring');
-- and now it should work!
SELECT pg_tde_create_wal_key();
-- and now it shouldn't create a new one!
SELECT pg_tde_create_wal_key();
DROP EXTENSION pg_tde;

@ -25,6 +25,7 @@
#include "access/pg_tde_tdemap.h"
#include "access/pg_tde_xlog.h"
#include "catalog/tde_global_space.h"
#include "catalog/tde_principal_key.h"
#include "encryption/enc_aes.h"
#include "encryption/enc_tde.h"
@ -61,6 +62,8 @@
#define MAP_ENTRY_SIZE sizeof(TDEMapEntry)
#define TDE_FILE_HEADER_SIZE sizeof(TDEFileHeader)
#define MaxXLogRecPtr (~(XLogRecPtr)0)
typedef struct TDEFileHeader
{
int32 file_version;
@ -116,6 +119,10 @@ RelKeyCache tde_rel_key_cache = {
.cap = 0,
};
static WALKeyCacheRec *tde_wal_key_cache = NULL;
static WALKeyCacheRec *tde_wal_key_last_rec = NULL;
static int32 pg_tde_process_map_entry(const RelFileLocator *rlocator, uint32 key_type, char *db_map_path, off_t *offset, bool should_delete);
static InternalKey *pg_tde_read_keydata(char *db_keydata_path, int32 key_index, TDEPrincipalKey *principal_key);
static InternalKey *tde_decrypt_rel_key(TDEPrincipalKey *principal_key, InternalKey *enc_rel_key_data, Oid dbOid);
@ -125,18 +132,7 @@ static bool pg_tde_read_one_map_entry(int fd, const RelFileLocator *rlocator, in
static InternalKey *pg_tde_read_one_keydata(int keydata_fd, int32 key_index, TDEPrincipalKey *principal_key);
static int pg_tde_open_file(char *tde_filename, TDEPrincipalKeyInfo *principal_key_info, bool update_header, int fileFlags, bool *is_new_file, off_t *curr_pos);
static InternalKey *pg_tde_get_key_from_cache(const RelFileLocator *rlocator, uint32 key_type);
#define PG_TDE_MAP_FILENAME "pg_tde_%d_map"
#define PG_TDE_KEYDATA_FILENAME "pg_tde_%d_dat"
static inline void
pg_tde_set_db_file_paths(Oid dbOid, char *map_path, char *keydata_path)
{
if (map_path)
join_path_components(map_path, pg_tde_get_tde_data_dir(), psprintf(PG_TDE_MAP_FILENAME, dbOid));
if (keydata_path)
join_path_components(keydata_path, pg_tde_get_tde_data_dir(), psprintf(PG_TDE_KEYDATA_FILENAME, dbOid));
}
static WALKeyCacheRec *pg_tde_add_wal_key_to_cache(InternalKey *cached_key, XLogRecPtr start_lsn);
#ifndef FRONTEND
@ -161,12 +157,6 @@ pg_tde_create_smgr_key(const RelFileLocatorBackend *newrlocator)
return pg_tde_create_key_map_entry(&newrlocator->locator, TDE_KEY_TYPE_SMGR);
}
InternalKey *
pg_tde_create_global_key(const RelFileLocator *newrlocator)
{
return pg_tde_create_key_map_entry(newrlocator, TDE_KEY_TYPE_GLOBAL);
}
InternalKey *
pg_tde_create_heap_basic_key(const RelFileLocator *newrlocator)
{
@ -235,6 +225,7 @@ static void
pg_tde_generate_internal_key(InternalKey *int_key, uint32 entry_type)
{
int_key->rel_type = entry_type;
int_key->start_lsn = InvalidXLogRecPtr;
int_key->ctx = NULL;
if (!RAND_bytes(int_key->key, INTERNAL_KEY_LEN))
@ -256,6 +247,41 @@ tde_sprint_key(InternalKey *k)
return buf;
}
/*
* Generates a new internal key for WAL and adds it to the _dat file. It doesn't
* add unecnrypted key into cache but rather sets it in `rel_key_data`.
*
* We have a special function for WAL as it is being called during recovery
* (start) so there should be no XLog records, aquired locks, and reads from
* cache. The key is always created with start_lsn = InvalidXLogRecPtr. Which
* will be updated with the actual lsn by the first WAL write.
*/
void
pg_tde_create_wal_key(InternalKey *rel_key_data, const RelFileLocator *newrlocator, uint32 entry_type)
{
InternalKey *enc_rel_key_data;
TDEPrincipalKey *principal_key;
principal_key = get_principal_key_from_keyring(newrlocator->dbOid, false);
if (principal_key == NULL)
{
ereport(ERROR,
(errmsg("failed to retrieve principal key. Create one using pg_tde_set_principal_key before using encrypted WAL.")));
return;
}
/* TODO: no need in generating key if TDE_KEY_TYPE_WAL_UNENCRYPTED */
pg_tde_generate_internal_key(rel_key_data, TDE_KEY_TYPE_GLOBAL | entry_type);
enc_rel_key_data = tde_encrypt_rel_key(principal_key, rel_key_data, newrlocator->dbOid);
/*
* Add the encrypted key to the key map data file structure.
*/
pg_tde_write_key_map_entry(newrlocator, enc_rel_key_data, &principal_key->keyInfo);
pfree(enc_rel_key_data);
}
/*
* Encrypts a given key and returns the encrypted one.
*/
@ -930,6 +956,85 @@ pg_tde_move_rel_key(const RelFileLocator *newrlocator, const RelFileLocator *old
pfree(enc_key);
}
/* It's called by seg_write inside crit section so no pallocs, hence
* needs keyfile_path
*/
void
pg_tde_wal_last_key_set_lsn(XLogRecPtr lsn, const char *keyfile_path)
{
LWLock *lock_pk = tde_lwlock_enc_keys();
int fd = -1;
off_t write_pos,
last_key_idx,
prev_key_pos;
InternalKey prev_key;
fd = BasicOpenFile(keyfile_path, O_RDWR | PG_BINARY);
if (fd < 0)
{
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not open tde file \"%s\": %m",
keyfile_path)));
}
last_key_idx = ((lseek(fd, 0, SEEK_END) - TDE_FILE_HEADER_SIZE) / INTERNAL_KEY_DAT_LEN) - 1;
write_pos = TDE_FILE_HEADER_SIZE + (last_key_idx * INTERNAL_KEY_DAT_LEN) + offsetof(InternalKey, start_lsn);
LWLockAcquire(lock_pk, LW_EXCLUSIVE);
/* TODO: pgstat_report_wait_start / pgstat_report_wait_end */
if (pg_pwrite(fd, &lsn, sizeof(XLogRecPtr), write_pos) != sizeof(XLogRecPtr))
{
/* TODO: what now? File is corrupted */
LWLockRelease(lock_pk);
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not write tde key data file: %m")));
}
/*
* If the last key overlaps with the previous, then invalidate the
* previous one. This may (and will) happen on replicas because it
* re-reads primary's data from the beginning of the segment on restart.
*/
if (last_key_idx > 0)
{
prev_key_pos = TDE_FILE_HEADER_SIZE + ((last_key_idx - 1) * INTERNAL_KEY_DAT_LEN);
if (pg_pread(fd, &prev_key, INTERNAL_KEY_DAT_LEN, prev_key_pos) != INTERNAL_KEY_DAT_LEN)
{
LWLockRelease(lock_pk);
ereport(WARNING,
(errcode_for_file_access(),
errmsg("could not read previous WAL key: %m")));
}
if (prev_key.start_lsn >= lsn)
{
WALKeySetInvalid(&prev_key);
if (pg_pwrite(fd, &prev_key, INTERNAL_KEY_DAT_LEN, prev_key_pos) != INTERNAL_KEY_DAT_LEN)
{
LWLockRelease(lock_pk);
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not write invalidated key: %m")));
}
}
}
if (pg_fsync(fd) != 0)
{
LWLockRelease(lock_pk);
ereport(data_sync_elevel(ERROR),
(errcode_for_file_access(),
errmsg("could not fsync file: %m")));
}
LWLockRelease(lock_pk);
close(fd);
}
#endif /* !FRONTEND */
/*
@ -1431,6 +1536,182 @@ pg_tde_get_key_from_cache(const RelFileLocator *rlocator, uint32 key_type)
return NULL;
}
/*
* Returns last (the most recent) key for a given relation
*/
WALKeyCacheRec *
pg_tde_get_last_wal_key(void)
{
return tde_wal_key_last_rec;
}
WALKeyCacheRec *
pg_tde_get_wal_cache_keys(void)
{
return tde_wal_key_cache;
}
InternalKey *
pg_tde_read_last_wal_key(void)
{
RelFileLocator rlocator = GLOBAL_SPACE_RLOCATOR(XLOG_TDE_OID);
char db_keydata_path[MAXPGPATH] = {0};
off_t read_pos = 0;
LWLock *lock_pk = tde_lwlock_enc_keys();
TDEPrincipalKey *principal_key;
int fd = -1;
int file_idx = 0;
bool is_new;
InternalKey *enc_rel_key_data,
*rel_key_data;
off_t fsize;
LWLockAcquire(lock_pk, LW_EXCLUSIVE);
principal_key = GetPrincipalKey(rlocator.dbOid, LW_EXCLUSIVE);
if (principal_key == NULL)
{
LWLockRelease(lock_pk);
elog(DEBUG1, "init WAL encryption: no principal key");
return NULL;
}
pg_tde_set_db_file_paths(rlocator.dbOid, NULL, db_keydata_path);
fd = pg_tde_open_file(db_keydata_path, &principal_key->keyInfo, false, O_RDONLY, &is_new, &read_pos);
fsize = lseek(fd, 0, SEEK_END);
/* No keys */
if (fsize == TDE_FILE_HEADER_SIZE)
{
LWLockRelease(lock_pk);
return NULL;
}
file_idx = ((fsize - TDE_FILE_HEADER_SIZE) / INTERNAL_KEY_DAT_LEN) - 1;
enc_rel_key_data = pg_tde_read_one_keydata(fd, file_idx, principal_key);
if (!enc_rel_key_data)
{
LWLockRelease(lock_pk);
return NULL;
}
rel_key_data = tde_decrypt_rel_key(principal_key, enc_rel_key_data, rlocator.dbOid);
LWLockRelease(lock_pk);
close(fd);
return rel_key_data;
}
/* Fetches WAL keys from disk and adds them to the WAL cache */
WALKeyCacheRec *
pg_tde_fetch_wal_keys(XLogRecPtr start_lsn)
{
RelFileLocator rlocator = GLOBAL_SPACE_RLOCATOR(XLOG_TDE_OID);
char db_keydata_path[MAXPGPATH] = {0};
off_t read_pos = 0;
LWLock *lock_pk = tde_lwlock_enc_keys();
TDEPrincipalKey *principal_key;
int fd = -1;
InternalKey *enc_rel_key_data,
*rel_key_data,
*cached_key;
int keys_count;
WALKeyCacheRec *wal_rec,
*return_wal_rec = NULL;
bool new_file;
LWLockAcquire(lock_pk, LW_SHARED);
principal_key = GetPrincipalKey(rlocator.dbOid, LW_SHARED);
if (principal_key == NULL)
{
LWLockRelease(lock_pk);
elog(DEBUG1, "fetch WAL keys: no principal key");
return NULL;
}
pg_tde_set_db_file_paths(rlocator.dbOid, NULL, db_keydata_path);
fd = pg_tde_open_file(db_keydata_path, &principal_key->keyInfo, false, O_RDONLY, &new_file, &read_pos);
keys_count = (lseek(fd, 0, SEEK_END) - TDE_FILE_HEADER_SIZE) / INTERNAL_KEY_DAT_LEN;
/*
* If there is no keys, return a fake one (with the range 0-infinity) so
* the reader won't try to check the disk all the time. This for the
* walsender in case if WAL is unencrypted and never was.
*/
if (keys_count == 0)
{
InternalKey stub_key = {
.start_lsn = InvalidXLogRecPtr,
};
cached_key = pg_tde_put_key_into_cache(&rlocator, &stub_key);
wal_rec = pg_tde_add_wal_key_to_cache(cached_key, InvalidXLogRecPtr);
LWLockRelease(lock_pk);
close(fd);
return wal_rec;
}
for (int file_idx = 0; file_idx < keys_count; file_idx++)
{
enc_rel_key_data = pg_tde_read_one_keydata(fd, file_idx, principal_key);
/*
* Skip new (just created but not updated by write) and invalid keys
*/
if (enc_rel_key_data->start_lsn != InvalidXLogRecPtr &&
WALKeyIsValid(enc_rel_key_data) &&
enc_rel_key_data->start_lsn >= start_lsn)
{
rel_key_data = tde_decrypt_rel_key(principal_key, enc_rel_key_data, rlocator.dbOid);
cached_key = pg_tde_put_key_into_cache(&rlocator, rel_key_data);
pfree(rel_key_data);
wal_rec = pg_tde_add_wal_key_to_cache(cached_key, enc_rel_key_data->start_lsn);
if (!return_wal_rec)
return_wal_rec = wal_rec;
}
pfree(enc_rel_key_data);
}
LWLockRelease(lock_pk);
close(fd);
return return_wal_rec;
}
static WALKeyCacheRec *
pg_tde_add_wal_key_to_cache(InternalKey *cached_key, XLogRecPtr start_lsn)
{
WALKeyCacheRec *wal_rec;
#ifndef FRONTEND
MemoryContext oldCtx;
oldCtx = MemoryContextSwitchTo(TopMemoryContext);
#endif
wal_rec = (WALKeyCacheRec *) palloc(sizeof(WALKeyCacheRec));
memset(wal_rec, 0, sizeof(WALKeyCacheRec));
#ifndef FRONTEND
MemoryContextSwitchTo(oldCtx);
#endif
wal_rec->start_lsn = start_lsn;
wal_rec->end_lsn = MaxXLogRecPtr;
wal_rec->key = cached_key;
if (!tde_wal_key_last_rec)
{
tde_wal_key_last_rec = wal_rec;
tde_wal_key_cache = tde_wal_key_last_rec;
}
else
{
tde_wal_key_last_rec->next = wal_rec;
tde_wal_key_last_rec->end_lsn = wal_rec->start_lsn - 1;
tde_wal_key_last_rec = wal_rec;
}
return wal_rec;
}
/* Add key to cache. See comments on `RelKeyCache`.
*
* TODO: add tests.

@ -15,6 +15,7 @@
#ifdef PERCONA_EXT
#include "pg_tde.h"
#include "pg_tde_defines.h"
#include "pg_tde_guc.h"
#include "access/xlog.h"
#include "access/xlog_internal.h"
#include "access/xloginsert.h"
@ -23,53 +24,47 @@
#include "utils/guc.h"
#include "utils/memutils.h"
#include "access/pg_tde_tdemap.h"
#include "access/pg_tde_xlog_encrypt.h"
#include "catalog/tde_global_space.h"
#include "encryption/enc_tde.h"
#ifdef FRONTEND
#include "pg_tde_fe.h"
#else
#include "port/atomics.h"
#endif
#include "pg_tde_guc.h"
static const XLogSmgr tde_xlog_smgr = {
.seg_read = tdeheap_xlog_seg_read,
.seg_write = tdeheap_xlog_seg_write,
};
static XLogPageHeaderData DecryptCurrentPageHrd;
static void SetXLogPageIVPrefix(TimeLineID tli, XLogRecPtr lsn, char *iv_prefix);
#ifndef FRONTEND
/* GUC */
static XLogPageHeaderData EncryptCurrentPageHrd;
static ssize_t TDEXLogWriteEncryptedPages(int fd, const void *buf, size_t count, off_t offset);
static char *TDEXLogEncryptBuf = NULL;
static int XLOGChooseNumBuffers(void);
static ssize_t TDEXLogWriteEncryptedPages(int fd, const void *buf, size_t count,
off_t offset, TimeLineID tli,
XLogSegNo segno);
Datum pg_tde_create_wal_key(PG_FUNCTION_ARGS);
typedef struct EncryptionStateData
{
char *segBuf;
char db_keydata_path[MAXPGPATH];
pg_atomic_uint64 enc_key_lsn; /* to sync with readers */
} EncryptionStateData;
PG_FUNCTION_INFO_V1(pg_tde_create_wal_key);
static EncryptionStateData *EncryptionState = NULL;
Datum
pg_tde_create_wal_key(PG_FUNCTION_ARGS)
/* TODO: can be swapped out to the disk */
static InternalKey EncryptionKey =
{
InternalKey *key = GetRelationKey(GLOBAL_SPACE_RLOCATOR(XLOG_TDE_OID), TDE_KEY_TYPE_GLOBAL, true);
if (key != NULL)
{
ereport(ERROR,
(errmsg("WAL key already exists.")));
PG_RETURN_BOOL(false);
}
.rel_type = MAP_ENTRY_EMPTY,
.start_lsn = InvalidXLogRecPtr,
.ctx = NULL,
};
pg_tde_create_global_key(&GLOBAL_SPACE_RLOCATOR(XLOG_TDE_OID));
PG_RETURN_BOOL(true);
}
static int XLOGChooseNumBuffers(void);
/* This can't be a GUC check hook, because that would run too soon during startup */
void
@ -82,7 +77,7 @@ TDEXlogCheckSane(void)
if (key == NULL)
{
ereport(ERROR,
(errmsg("WAL encryption can only be enabled with a properly configured key. Disable pg_tde.wal_encrypt and create one using pg_tde_create_wal_key() before enabling it.")));
(errmsg("WAL encryption can only be enabled with a properly configured principal key. Disable pg_tde.wal_encrypt and create one using pg_tde_set_server_principal_key() or pg_tde_set_global_principal_key() before enabling it.")));
}
}
}
@ -112,6 +107,17 @@ TDEXLogEncryptBuffSize(void)
return (Size) XLOG_BLCKSZ * xbuffers;
}
Size
TDEXLogEncryptStateSize(void)
{
Size sz;
sz = TYPEALIGN(PG_IO_ALIGN_SIZE, TDEXLogEncryptBuffSize());
sz = add_size(sz, sizeof(EncryptionStateData));
return MAXALIGN(sz);
}
/*
* Alloc memory for the encryption buffer.
*
@ -126,121 +132,105 @@ void
TDEXLogShmemInit(void)
{
bool foundBuf;
char *allocptr;
if (EncryptXLog)
{
TDEXLogEncryptBuf = (char *)
TYPEALIGN(PG_IO_ALIGN_SIZE,
ShmemInitStruct("TDE XLog Encryption Buffer",
XLOG_TDE_ENC_BUFF_ALIGNED_SIZE,
&foundBuf));
/*
* TODO: we need enc_key_lsn all the time but encrypt buffer only when
* EncryptXLog is on
*/
EncryptionState = (EncryptionStateData *)
ShmemInitStruct("TDE XLog Encryption State",
TDEXLogEncryptStateSize(),
&foundBuf);
elog(DEBUG1, "pg_tde: initialized encryption buffer %lu bytes", XLOG_TDE_ENC_BUFF_ALIGNED_SIZE);
}
allocptr = ((char *) EncryptionState) + TYPEALIGN(PG_IO_ALIGN_SIZE, sizeof(EncryptionStateData));
EncryptionState->segBuf = allocptr;
pg_atomic_init_u64(&EncryptionState->enc_key_lsn, 0);
elog(DEBUG1, "pg_tde: initialized encryption buffer %lu bytes", TDEXLogEncryptStateSize());
}
/*
* Encrypt XLog page(s) from the buf and write to the segment file.
*/
static ssize_t
TDEXLogWriteEncryptedPages(int fd, const void *buf, size_t count, off_t offset)
TDEXLogWriteEncryptedPages(int fd, const void *buf, size_t count, off_t offset,
TimeLineID tli, XLogSegNo segno)
{
char iv_prefix[16] = {0,};
size_t data_size = 0;
XLogPageHeader curr_page_hdr = &EncryptCurrentPageHrd;
XLogPageHeader enc_buf_page = NULL;
InternalKey *key = GetTdeGlobaleRelationKey(GLOBAL_SPACE_RLOCATOR(XLOG_TDE_OID));
off_t enc_off;
size_t page_size = XLOG_BLCKSZ - offset % XLOG_BLCKSZ;
uint32 iv_ctr = 0;
InternalKey *key = &EncryptionKey;
char *enc_buff = EncryptionState->segBuf;
#ifdef TDE_XLOG_DEBUG
elog(DEBUG1, "write encrypted WAL, pages amount: %d, size: %lu offset: %ld", count / (Size) XLOG_BLCKSZ, count, offset);
elog(DEBUG1, "write encrypted WAL, size: %lu, offset: %ld [%lX], seg: %X/%X, key_start_lsn: %X/%X",
count, offset, offset, LSN_FORMAT_ARGS(segno), LSN_FORMAT_ARGS(key->start_lsn));
#endif
/*
* Go through the buf page-by-page and encrypt them. We may start or
* finish writing from/in the middle of the page (walsender or
* `full_page_writes = off`). So preserve a page header for the IV init
* data.
*
* TODO: check if walsender restarts form the beggining of the page in
* case of the crash.
*/
for (enc_off = 0; enc_off < count;)
{
data_size = Min(page_size, count);
if (page_size == XLOG_BLCKSZ)
{
memcpy((char *) curr_page_hdr, (char *) buf + enc_off, SizeOfXLogShortPHD);
/*
* Need to use a separate buf for the encryption so the page
* remains non-crypted in the XLog buf (XLogInsert has to have
* access to records' lsn).
*/
enc_buf_page = (XLogPageHeader) (TDEXLogEncryptBuf + enc_off);
memcpy((char *) enc_buf_page, (char *) buf + enc_off, (Size) XLogPageHeaderSize(curr_page_hdr));
enc_buf_page->xlp_info |= XLP_ENCRYPTED;
enc_off += XLogPageHeaderSize(curr_page_hdr);
data_size -= XLogPageHeaderSize(curr_page_hdr);
/* it's a beginning of the page */
iv_ctr = 0;
}
else
{
/* we're in the middle of the page */
iv_ctr = (offset % XLOG_BLCKSZ) - XLogPageHeaderSize(curr_page_hdr);
}
if (data_size + enc_off > count)
{
data_size = count - enc_off;
}
/*
* The page is zeroed (no data), no sense to encrypt. This may happen
* when base_backup or other requests XLOG SWITCH and some pages in
* XLog buffer still not used.
*/
if (curr_page_hdr->xlp_magic == 0)
{
/* ensure all the page is {0} */
Assert((*((char *) buf + enc_off) == 0) &&
memcmp((char *) buf + enc_off, (char *) buf + enc_off + 1, data_size - 1) == 0);
enc_buf_page = (XLogPageHeader) (TDEXLogEncryptBuf + enc_off);
memcpy((char *) enc_buf_page, (char *) buf + enc_off, data_size);
}
else
{
SetXLogPageIVPrefix(curr_page_hdr->xlp_tli, curr_page_hdr->xlp_pageaddr, iv_prefix);
PG_TDE_ENCRYPT_DATA(iv_prefix, iv_ctr, (char *) buf + enc_off, data_size,
TDEXLogEncryptBuf + enc_off, key);
}
page_size = XLOG_BLCKSZ;
enc_off += data_size;
}
SetXLogPageIVPrefix(tli, segno, iv_prefix);
PG_TDE_ENCRYPT_DATA(iv_prefix, offset,
(char *) buf, count,
enc_buff, key);
return pg_pwrite(fd, TDEXLogEncryptBuf, count, offset);
return pg_pwrite(fd, enc_buff, count, offset);
}
#endif /* !FRONTEND */
void
TDEXLogSmgrInit(void)
{
#ifndef FRONTEND
/* TODO: move to the separate func, it's not an SMGR init */
InternalKey *key = pg_tde_read_last_wal_key();
/* TDOO: clean-up this mess */
if ((!key && EncryptXLog) || (key &&
((key->rel_type & TDE_KEY_TYPE_WAL_ENCRYPTED && !EncryptXLog) ||
(key->rel_type & TDE_KEY_TYPE_WAL_UNENCRYPTED && EncryptXLog))))
{
pg_tde_create_wal_key(
&EncryptionKey, &GLOBAL_SPACE_RLOCATOR(XLOG_TDE_OID),
(EncryptXLog ? TDE_KEY_TYPE_WAL_ENCRYPTED : TDE_KEY_TYPE_WAL_UNENCRYPTED));
}
else if (key)
{
EncryptionKey = *key;
pfree(key);
pg_atomic_write_u64(&EncryptionState->enc_key_lsn, EncryptionKey.start_lsn);
}
pg_tde_set_db_file_paths(GLOBAL_SPACE_RLOCATOR(XLOG_TDE_OID).dbOid, NULL, EncryptionState->db_keydata_path);
#endif
SetXLogSmgr(&tde_xlog_smgr);
}
ssize_t
tdeheap_xlog_seg_write(int fd, const void *buf, size_t count, off_t offset)
tdeheap_xlog_seg_write(int fd, const void *buf, size_t count, off_t offset,
TimeLineID tli, XLogSegNo segno)
{
#ifndef FRONTEND
/*
* Set the last (most recent) key's start LSN if not set.
*
* This func called with WALWriteLock held, so no need in any extra sync.
*/
if (EncryptionKey.rel_type & TDE_KEY_TYPE_GLOBAL &&
pg_atomic_read_u64(&EncryptionState->enc_key_lsn) == 0)
{
XLogRecPtr lsn;
XLogSegNoOffsetToRecPtr(segno, offset, wal_segment_size, lsn);
pg_tde_wal_last_key_set_lsn(lsn, EncryptionState->db_keydata_path);
EncryptionKey.start_lsn = lsn;
pg_atomic_write_u64(&EncryptionState->enc_key_lsn, lsn);
}
if (EncryptXLog)
return TDEXLogWriteEncryptedPages(fd, buf, count, offset);
return TDEXLogWriteEncryptedPages(fd, buf, count, offset, tli, segno);
else
#endif
return pg_pwrite(fd, buf, count, offset);
@ -250,81 +240,109 @@ tdeheap_xlog_seg_write(int fd, const void *buf, size_t count, off_t offset)
* Read the XLog pages from the segment file and dectypt if need.
*/
ssize_t
tdeheap_xlog_seg_read(int fd, void *buf, size_t count, off_t offset)
tdeheap_xlog_seg_read(int fd, void *buf, size_t count, off_t offset,
TimeLineID tli, XLogSegNo segno, int segSize)
{
ssize_t readsz;
char iv_prefix[16] = {0,};
size_t data_size = 0;
XLogPageHeader curr_page_hdr = &DecryptCurrentPageHrd;
InternalKey *key = NULL;
size_t page_size = XLOG_BLCKSZ - offset % XLOG_BLCKSZ;
off_t dec_off;
uint32 iv_ctr = 0;
WALKeyCacheRec *keys = pg_tde_get_wal_cache_keys();
XLogRecPtr write_key_lsn = 0;
WALKeyCacheRec *curr_key = NULL;
off_t dec_off = 0;
size_t dec_sz = 0;
XLogRecPtr data_start;
XLogRecPtr data_end;
#ifdef TDE_XLOG_DEBUG
elog(DEBUG1, "read from a WAL segment, pages amount: %d, size: %lu offset: %ld", count / (Size) XLOG_BLCKSZ, count, offset);
elog(DEBUG1, "read from a WAL segment, size: %lu offset: %ld [%lX], seg: %X/%X",
count, offset, offset, LSN_FORMAT_ARGS(segno));
#endif
readsz = pg_pread(fd, buf, count, offset);
/*
* Read the buf page by page and decypt ecnrypted pages. We may start or
* fihish reading from/in the middle of the page (walreceiver) in such a
* case we should preserve the last read page header for the IV data and
* the encryption state.
*
* TODO: check if walsender/receiver restarts form the beggining of the
* page in case of the crash.
* Read data from disk
*/
for (dec_off = 0; dec_off < readsz;)
readsz = pg_pread(fd, buf, count, offset);
if (!keys)
{
data_size = Min(page_size, readsz);
/* cache is empty, try to read keys from disk */
keys = pg_tde_fetch_wal_keys(0);
}
if (page_size == XLOG_BLCKSZ)
{
memcpy((char *) curr_page_hdr, (char *) buf + dec_off, SizeOfXLogShortPHD);
#ifndef FRONTEND
write_key_lsn = pg_atomic_read_u64(&EncryptionState->enc_key_lsn);
#endif
/* set the flag to "not encrypted" for the walreceiver */
((XLogPageHeader) ((char *) buf + dec_off))->xlp_info &= ~XLP_ENCRYPTED;
if (write_key_lsn != 0)
{
WALKeyCacheRec *last_key = pg_tde_get_last_wal_key();
Assert(curr_page_hdr->xlp_magic == XLOG_PAGE_MAGIC || curr_page_hdr->xlp_magic == 0);
dec_off += XLogPageHeaderSize(curr_page_hdr);
data_size -= XLogPageHeaderSize(curr_page_hdr);
/* it's a beginning of the page */
iv_ctr = 0;
}
else
{
/* we're in the middle of the page */
iv_ctr = (offset % XLOG_BLCKSZ) - XLogPageHeaderSize(curr_page_hdr);
}
Assert(last_key);
if ((data_size + dec_off) > readsz)
/* write has generated a new key, need to fetch it */
if (last_key->start_lsn < write_key_lsn)
{
data_size = readsz - dec_off;
pg_tde_fetch_wal_keys(write_key_lsn);
/* in case cache was empty before */
keys = pg_tde_get_wal_cache_keys();
}
}
if (curr_page_hdr->xlp_info & XLP_ENCRYPTED)
SetXLogPageIVPrefix(tli, segno, iv_prefix);
XLogSegNoOffsetToRecPtr(segno, offset, segSize, data_start);
XLogSegNoOffsetToRecPtr(segno, offset + count, segSize, data_end);
/*
* TODO: this is higly ineffective. We should get rid of linked list and
* search from the last key as this is what the walsender is useing.
*/
curr_key = keys;
while (curr_key)
{
#ifdef TDE_XLOG_DEBUG
elog(DEBUG1, "WAL key %X/%X-%X/%X, encrypted: %s",
LSN_FORMAT_ARGS(curr_key->start_lsn),
LSN_FORMAT_ARGS(curr_key->end_lsn),
curr_key->key->rel_type & TDE_KEY_TYPE_WAL_ENCRYPTED ? "yes" : "no");
#endif
if (curr_key->key->start_lsn != InvalidXLogRecPtr &&
(curr_key->key->rel_type & TDE_KEY_TYPE_WAL_ENCRYPTED))
{
if (key == NULL)
/*
* Check if the key's range overlaps with the buffer's and decypt
* the part that does.
*/
if (data_start <= curr_key->end_lsn && curr_key->start_lsn <= data_end)
{
key = GetTdeGlobaleRelationKey(GLOBAL_SPACE_RLOCATOR(XLOG_TDE_OID));
dec_off = XLogSegmentOffset(Max(data_start, curr_key->start_lsn), segSize);
dec_sz = XLogSegmentOffset(Min(data_end, curr_key->end_lsn), segSize) - dec_off;
#ifdef TDE_XLOG_DEBUG
elog(DEBUG1, "decrypt WAL, dec_off: %lu [buff_off %lu], sz: %lu | key %X/%X",
dec_off, offset - dec_off, dec_sz, LSN_FORMAT_ARGS(curr_key->key->start_lsn));
#endif
PG_TDE_DECRYPT_DATA(iv_prefix, dec_off,
(char *) buf + (offset - dec_off),
dec_sz, (char *) buf + (offset - dec_off),
curr_key->key);
if (dec_off + dec_sz == offset)
{
break;
}
}
SetXLogPageIVPrefix(curr_page_hdr->xlp_tli, curr_page_hdr->xlp_pageaddr, iv_prefix);
PG_TDE_DECRYPT_DATA(
iv_prefix, iv_ctr,
(char *) buf + dec_off, data_size, (char *) buf + dec_off, key);
}
page_size = XLOG_BLCKSZ;
dec_off += data_size;
curr_key = curr_key->next;
}
return readsz;
}
/* IV: TLI(uint32) + XLogRecPtr(uint64)*/
static void
static inline void
SetXLogPageIVPrefix(TimeLineID tli, XLogRecPtr lsn, char *iv_prefix)
{
iv_prefix[0] = (tli >> 24);

@ -1,54 +0,0 @@
/*-------------------------------------------------------------------------
*
* tde_global_space.c
* Global catalog key management
*
*
* IDENTIFICATION
* src/catalog/tde_global_space.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#ifdef PERCONA_EXT
#include "utils/memutils.h"
#include "access/pg_tde_tdemap.h"
#include "catalog/tde_global_space.h"
#include "catalog/tde_keyring.h"
#include "common/pg_tde_utils.h"
#ifdef FRONTEND
#include "pg_tde_fe.h"
#endif
#include <unistd.h>
#include <openssl/rand.h>
#include <openssl/err.h>
#include <sys/time.h>
void
TDEInitGlobalKeys(void)
{
InternalKey *key;
key = pg_tde_get_key_from_file(&GLOBAL_SPACE_RLOCATOR(XLOG_TDE_OID), TDE_KEY_TYPE_GLOBAL, true);
/*
* Internal Key should be in the TopMemmoryContext because of SSL
* contexts. This context is being initialized by OpenSSL with the pointer
* to the encryption context which is valid only for the current backend.
* So new backends have to inherit a cached key with NULL SSL connext and
* any changes to it have to remain local ot the backend. (see
* https://github.com/percona-Lab/pg_tde/pull/214#discussion_r1648998317)
*/
if (key != NULL)
{
pg_tde_put_key_into_cache(&GLOBAL_SPACE_RLOCATOR(XLOG_TDE_OID), key);
}
}
#endif /* PERCONA_EXT */

@ -87,7 +87,6 @@ static void shared_memory_shutdown(int code, Datum arg);
static void principal_key_startup_cleanup(int tde_tbl_count, XLogExtensionInstall *ext_info, bool redo, void *arg);
static void clear_principal_key_cache(Oid databaseId);
static inline dshash_table *get_principal_key_Hash(void);
static TDEPrincipalKey *get_principal_key_from_keyring(Oid dbOid, bool pushToCache);
static TDEPrincipalKey *get_principal_key_from_cache(Oid dbOid);
static bool pg_tde_is_same_principal_key(TDEPrincipalKey *a, TDEPrincipalKey *b);
static void pg_tde_update_global_principal_key_everywhere(TDEPrincipalKey *oldKey, TDEPrincipalKey *newKey);
@ -776,7 +775,7 @@ pg_tde_get_key_info(PG_FUNCTION_ARGS, Oid dbOid)
* Gets principal key form the keyring and pops it into cache if key exists
* Caller should hold an exclusive tde_lwlock_enc_keys lock
*/
static TDEPrincipalKey *
TDEPrincipalKey *
get_principal_key_from_keyring(Oid dbOid, bool pushToCache)
{
GenericKeyring *keyring;
@ -785,7 +784,7 @@ get_principal_key_from_keyring(Oid dbOid, bool pushToCache)
const KeyInfo *keyInfo = NULL;
KeyringReturnCodes keyring_ret;
Assert(LWLockHeldByMeInMode(tde_lwlock_enc_keys(), LW_EXCLUSIVE));
/* Assert(LWLockHeldByMeInMode(tde_lwlock_enc_keys(), LW_EXCLUSIVE)); */
principalKeyInfo = pg_tde_get_principal_key_info(dbOid);
if (principalKeyInfo == NULL)

@ -134,18 +134,18 @@ get_tde_tables_count(void)
#endif /* !FRONTEND */
static char globalspace_dir[MAXPGPATH] = PG_TDE_DATA_DIR;
static char tde_data_dir[MAXPGPATH] = PG_TDE_DATA_DIR;
void
pg_tde_set_data_dir(const char *dir)
{
Assert(dir != NULL);
strlcpy(globalspace_dir, dir, sizeof(globalspace_dir));
strlcpy(tde_data_dir, dir, sizeof(tde_data_dir));
}
/* returns the palloc'd string */
char *
pg_tde_get_tde_data_dir(void)
{
return globalspace_dir;
return tde_data_dir;
}

@ -8,15 +8,21 @@
#ifndef PG_TDE_MAP_H
#define PG_TDE_MAP_H
#include "access/xlog_internal.h"
#include "port.h"
#include "storage/relfilelocator.h"
#include "pg_tde.h"
#include "catalog/tde_principal_key.h"
#include "storage/relfilelocator.h"
#include "common/pg_tde_utils.h"
/* Map entry flags */
#define MAP_ENTRY_EMPTY 0x00
#define TDE_KEY_TYPE_HEAP_BASIC 0x01
#define TDE_KEY_TYPE_SMGR 0x02
#define TDE_KEY_TYPE_GLOBAL 0x04
#define MAP_ENTRY_EMPTY 0x00
#define TDE_KEY_TYPE_HEAP_BASIC 0x01
#define TDE_KEY_TYPE_SMGR 0x02
#define TDE_KEY_TYPE_GLOBAL 0x04
#define TDE_KEY_TYPE_WAL_UNENCRYPTED 0x08
#define TDE_KEY_TYPE_WAL_ENCRYPTED 0x10
#define MAP_ENTRY_VALID (TDE_KEY_TYPE_HEAP_BASIC | TDE_KEY_TYPE_SMGR | TDE_KEY_TYPE_GLOBAL)
typedef struct InternalKey
@ -28,11 +34,19 @@ typedef struct InternalKey
uint8 key[INTERNAL_KEY_LEN];
uint32 rel_type;
XLogRecPtr start_lsn;
void *ctx;
} InternalKey;
#define INTERNAL_KEY_DAT_LEN offsetof(InternalKey, ctx)
#define WALKeySetInvalid(key) \
((key)->rel_type &= ~(TDE_KEY_TYPE_WAL_ENCRYPTED | TDE_KEY_TYPE_WAL_UNENCRYPTED))
#define WALKeyIsValid(key) \
(((key)->rel_type & TDE_KEY_TYPE_WAL_UNENCRYPTED) != 0 || \
((key)->rel_type & TDE_KEY_TYPE_WAL_ENCRYPTED) != 0)
typedef struct XLogRelKey
{
RelFileLocator rlocator;
@ -40,13 +54,52 @@ typedef struct XLogRelKey
TDEPrincipalKeyInfo pkInfo;
} XLogRelKey;
/*
* WALKeyCacheRec is built on top of the InternalKeys cache. We still don't
* want to key data be swapped out to the disk (implemented in the InternalKeys
* cache) but we need extra information and the ability to have and reference
* a sequence of keys.
*
* TODO: For now it's a simple linked list which is no good. So consider having
* dedicated WAL keys cache inside some proper data structure.
*/
typedef struct WALKeyCacheRec
{
XLogRecPtr start_lsn;
XLogRecPtr end_lsn;
InternalKey *key;
struct WALKeyCacheRec *next;
} WALKeyCacheRec;
extern InternalKey *pg_tde_read_last_wal_key(void);
extern WALKeyCacheRec *pg_tde_get_last_wal_key(void);
extern WALKeyCacheRec *pg_tde_fetch_wal_keys(XLogRecPtr start_lsn);
extern WALKeyCacheRec *pg_tde_get_wal_cache_keys(void);
extern void pg_tde_wal_last_key_set_lsn(XLogRecPtr lsn, const char *keyfile_path);
extern InternalKey *pg_tde_create_smgr_key(const RelFileLocatorBackend *newrlocator);
extern InternalKey *pg_tde_create_global_key(const RelFileLocator *newrlocator);
extern InternalKey *pg_tde_create_heap_basic_key(const RelFileLocator *newrlocator);
extern void pg_tde_create_wal_key(InternalKey *rel_key_data, const RelFileLocator *newrlocator, uint32 flags);
extern void pg_tde_write_key_map_entry(const RelFileLocator *rlocator, InternalKey *enc_rel_key_data, TDEPrincipalKeyInfo *principal_key_info);
extern void pg_tde_delete_key_map_entry(const RelFileLocator *rlocator, uint32 key_type);
extern void pg_tde_free_key_map_entry(const RelFileLocator *rlocator, uint32 key_type, off_t offset);
#define PG_TDE_MAP_FILENAME "pg_tde_%d_map"
#define PG_TDE_KEYDATA_FILENAME "pg_tde_%d_dat"
static inline void
pg_tde_set_db_file_paths(Oid dbOid, char *map_path, char *keydata_path)
{
if (map_path)
join_path_components(map_path, pg_tde_get_tde_data_dir(), psprintf(PG_TDE_MAP_FILENAME, dbOid));
if (keydata_path)
join_path_components(keydata_path, pg_tde_get_tde_data_dir(), psprintf(PG_TDE_KEYDATA_FILENAME, dbOid));
}
extern InternalKey *GetRelationKey(RelFileLocator rel, uint32 entry_type, bool no_map_ok);
extern InternalKey *GetSMGRRelationKey(RelFileLocatorBackend rel);
extern InternalKey *GetHeapBaiscRelationKey(RelFileLocator rel);

@ -15,12 +15,14 @@
extern Size TDEXLogEncryptBuffSize(void);
#define XLOG_TDE_ENC_BUFF_ALIGNED_SIZE add_size(TDEXLogEncryptBuffSize(), PG_IO_ALIGN_SIZE)
extern Size TDEXLogEncryptStateSize(void);
extern void TDEXLogShmemInit(void);
extern ssize_t tdeheap_xlog_seg_read(int fd, void *buf, size_t count, off_t offset);
extern ssize_t tdeheap_xlog_seg_write(int fd, const void *buf, size_t count, off_t offset);
extern ssize_t tdeheap_xlog_seg_read(int fd, void *buf, size_t count, off_t offset,
TimeLineID tli, XLogSegNo segno, int segSize);
extern ssize_t tdeheap_xlog_seg_write(int fd, const void *buf, size_t count,
off_t offset, TimeLineID tli,
XLogSegNo segno);
extern void TDEXLogSmgrInit(void);
extern void XLogInitGUC(void);

@ -14,9 +14,6 @@
#include "postgres.h"
#include "catalog/pg_tablespace_d.h"
#include "access/pg_tde_tdemap.h"
#include "catalog/tde_principal_key.h"
/*
* Needed for global data (WAL etc) keys identification in caches and storage.
* We take Oids of the sql operators, so there is no overlap with the "real"
@ -31,12 +28,9 @@
_obj_oid \
}
/* Needed for using the same default key for multiple databases */
#define DEFAULT_DATA_TDE_OID DEFAULTTABLESPACE_OID
#define TDEisInGlobalSpace(dbOid) (dbOid == GLOBAL_DATA_TDE_OID)
extern void TDEInitGlobalKeys(void);
#endif /* TDE_GLOBAL_CATALOG_H */

@ -68,4 +68,6 @@ extern bool xl_tde_perform_rotate_key(XLogPrincipalKeyRotate *xlrec);
extern void PrincipalKeyGucInit(void);
extern TDEPrincipalKey *get_principal_key_from_keyring(Oid dbOid, bool pushToCache);
#endif /* PG_TDE_PRINCIPAL_KEY_H */

@ -79,7 +79,7 @@ tde_shmem_request(void)
int required_locks = TdeRequiredLocksCount();
#ifdef PERCONA_EXT
sz = add_size(sz, XLOG_TDE_ENC_BUFF_ALIGNED_SIZE);
sz = add_size(sz, TDEXLogEncryptStateSize());
#endif
if (prev_shmem_request_hook)
@ -99,8 +99,6 @@ tde_shmem_startup(void)
AesInit();
#ifdef PERCONA_EXT
TDEInitGlobalKeys();
TDEXLogShmemInit();
TDEXLogSmgrInit();

@ -35,9 +35,6 @@ PGTDE::append_to_file($stdout);
$stdout = $node->safe_psql('postgres', "SELECT pg_tde_set_server_principal_key('global-db-principal-key', 'file-keyring-010');", extra_params => ['-a']);
PGTDE::append_to_file($stdout);
$stdout = $node->safe_psql('postgres', "SELECT pg_tde_create_wal_key();", extra_params => ['-a']);
PGTDE::append_to_file($stdout);
# Restart the server, it should work with encryption now
PGTDE::append_to_file("-- server restart with wal encryption");
$node->stop();

@ -3,8 +3,6 @@ SELECT pg_tde_add_global_key_provider_file('file-keyring-010','/tmp/pg_tde_test_
-1
SELECT pg_tde_set_server_principal_key('global-db-principal-key', 'file-keyring-010');
t
SELECT pg_tde_create_wal_key();
t
-- server restart with wal encryption
SHOW pg_tde.wal_encrypt;
on

@ -2446,7 +2446,7 @@ XLogWrite(XLogwrtRqst WriteRqst, TimeLineID tli, bool flexible)
INSTR_TIME_SET_ZERO(start);
pgstat_report_wait_start(WAIT_EVENT_WAL_WRITE);
written = xlog_smgr->seg_write(openLogFile, from, nleft, startoffset);
written = xlog_smgr->seg_write(openLogFile, from, nleft, startoffset, tli, openLogSegNo);
pgstat_report_wait_end();
/*

@ -1573,7 +1573,9 @@ WALRead(XLogReaderState *state,
/* Reset errno first; eases reporting non-errno-affecting errors */
errno = 0;
readbytes = xlog_smgr->seg_read(state->seg.ws_file, p, segbytes, (off_t) startoff);
readbytes = xlog_smgr->seg_read(state->seg.ws_file, p, segbytes,
(off_t) startoff, state->seg.ws_tli,
state->seg.ws_segno, state->segcxt.ws_segsize);
#ifndef FRONTEND
pgstat_report_wait_end();

@ -3398,7 +3398,8 @@ retry:
readOff = targetPageOff;
pgstat_report_wait_start(WAIT_EVENT_WAL_READ);
r = xlog_smgr->seg_read(readFile, readBuf, XLOG_BLCKSZ, (off_t) readOff);
r = xlog_smgr->seg_read(readFile, readBuf, XLOG_BLCKSZ, (off_t) readOff,
curFileTLI, readSegNo, wal_segment_size);
if (r != XLOG_BLCKSZ)
{
char fname[MAXFNAMELEN];

@ -942,7 +942,9 @@ XLogWalRcvWrite(char *buf, Size nbytes, XLogRecPtr recptr, TimeLineID tli)
/* OK to write the logs */
errno = 0;
byteswritten = xlog_smgr->seg_write(recvFile, buf, segbytes, (off_t) startoff);
byteswritten = xlog_smgr->seg_write(recvFile, buf, segbytes,
(off_t) startoff, recvFileTLI,
recvSegNo);
if (byteswritten <= 0)
{
char xlogfname[MAXFNAMELEN];

@ -1,6 +1,7 @@
#include "postgres_fe.h"
#include "pg_tde.h"
#include "catalog/tde_keyring.h"
#include "catalog/tde_global_space.h"
#include "common/logging.h"

@ -22,8 +22,8 @@ ifeq ($(enable_percona_ext),yes)
OBJS += \
$(top_srcdir)/src/fe_utils/simple_list.o \
$(top_builddir)/src/libtde/libtde.a \
$(top_builddir)/src/libtde/libtdexlog.a
$(top_builddir)/src/libtde/libtdexlog.a \
$(top_builddir)/src/libtde/libtde.a
override CPPFLAGS := -I$(top_srcdir)/contrib/pg_tde/src/include -I$(top_srcdir)/contrib/pg_tde/src/libkmip/libkmip/include $(CPPFLAGS)
endif

@ -250,7 +250,26 @@ search_directory(const char *directory, const char *fname)
PGAlignedXLogBlock buf;
int r;
#ifdef PERCONA_EXT
off_t fsize;
TimeLineID tli;
XLogSegNo segno;
/*
* WalSegSz extracted from the first page header but it might be
* encrypted. But we need to know the segment seize to decrypt it
* (it's required for encryption offset calculations). So we get the
* segment size from the file's actual size.
* XLogLongPageHeaderData->xlp_seg_size there is "just as a
* cross-check" anyway.
*/
fsize = lseek(fd, 0, SEEK_END);
XLogFromFileName(fname, &tli, &segno, fsize);
r = xlog_smgr->seg_read(fd, buf.data, XLOG_BLCKSZ, 0, tli, segno, fsize);
#else
r = read(fd, buf.data, XLOG_BLCKSZ);
#endif
if (r == XLOG_BLCKSZ)
{
XLogLongPageHeader longhdr = (XLogLongPageHeader) buf.data;
@ -1136,7 +1155,6 @@ main(int argc, char **argv)
if (kringdir != NULL)
{
pg_tde_fe_init(kringdir);
TDEInitGlobalKeys();
TDEXLogSmgrInit();
}
#endif

@ -29,7 +29,6 @@ $node->start;
$node->safe_psql('postgres', "CREATE EXTENSION IF NOT EXISTS pg_tde;");
$node->safe_psql('postgres', "SELECT pg_tde_add_global_key_provider_file('file-keyring-wal','/tmp/pg_tde_test_keyring-wal.per');");;
$node->safe_psql('postgres', "SELECT pg_tde_set_server_principal_key('global-db-principal-key', 'file-keyring-wal');");
$node->safe_psql('postgres', "SELECT pg_tde_create_wal_key();");
$node->append_conf(
'postgresql.conf', q{
@ -132,7 +131,7 @@ command_fails_like(
command_like([ 'pg_waldump', '-k', $node->data_dir. '/pg_tde', $node->data_dir . '/pg_wal/' . $start_walfile ],
qr/./, 'runs with start segment specified');
command_fails_like(
[ 'pg_waldump', $node->data_dir . '/pg_wal/' . $start_walfile, 'bar' ],
[ 'pg_waldump', '-k', $node->data_dir. '/pg_tde', $node->data_dir . '/pg_wal/' . $start_walfile, 'bar' ],
qr/error: could not open file "bar"/,
'end file not found');
command_like(

@ -43,7 +43,6 @@ $node->start;
$node->safe_psql('postgres', "CREATE EXTENSION IF NOT EXISTS pg_tde;");
$node->safe_psql('postgres', "SELECT pg_tde_add_global_key_provider_file('file-keyring-wal','/tmp/pg_tde_test_keyring-wal.per');");;
$node->safe_psql('postgres', "SELECT pg_tde_set_server_principal_key('global-db-principal-key', 'file-keyring-wal');");
$node->safe_psql('postgres', "SELECT pg_tde_create_wal_key();");
$node->append_conf(
'postgresql.conf', q{

@ -78,10 +78,8 @@ typedef XLogLongPageHeaderData *XLogLongPageHeader;
#define XLP_BKP_REMOVABLE 0x0004
/* Replaces a missing contrecord; see CreateOverwriteContrecordRecord */
#define XLP_FIRST_IS_OVERWRITE_CONTRECORD 0x0008
/* The page is encrypted */
#define XLP_ENCRYPTED 0x0010
/* All defined flag bits in xlp_info (used for validity checking of header) */
#define XLP_ALL_FLAGS 0x001F
#define XLP_ALL_FLAGS 0x000F
#define XLogPageHeaderSize(hdr) \
(((hdr)->xlp_info & XLP_LONG_HEADER) ? SizeOfXLogLongPHD : SizeOfXLogShortPHD)

@ -8,15 +8,31 @@
/* XLog storage manager interface */
typedef struct XLogSmgr
{
ssize_t (*seg_read) (int fd, void *buf, size_t count, off_t offset);
ssize_t (*seg_read) (int fd, void *buf, size_t count, off_t offset,
TimeLineID tli, XLogSegNo segno, int segSize);
ssize_t (*seg_write) (int fd, const void *buf, size_t count, off_t offset);
ssize_t (*seg_write) (int fd, const void *buf, size_t count, off_t offset,
TimeLineID tli, XLogSegNo segno);
} XLogSmgr;
static inline ssize_t
default_seg_write(int fd, const void *buf, size_t count, off_t offset,
TimeLineID tli, XLogSegNo segno)
{
return pg_pwrite(fd, buf, count, offset);
}
static inline ssize_t
default_seg_read(int fd, void *buf, size_t count, off_t offset,
TimeLineID tli, XLogSegNo segno, int segSize)
{
return pg_pread(fd, buf, count, offset);
}
/* Default (standard) XLog storage manager */
static const XLogSmgr xlog_smgr_standard = {
.seg_read = pg_pread,
.seg_write = pg_pwrite,
.seg_read = default_seg_read,
.seg_write = default_seg_write,
};
extern const XLogSmgr *xlog_smgr;

Loading…
Cancel
Save