Merge pull request #227 from dutow/smgrmerge

Merge the smgr branch back to main
pull/209/head
Zsolt Parragi 1 year ago committed by GitHub
commit a21bfac9b6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
  1. 3
      Makefile.in
  2. 4
      meson.build
  3. 27
      pg_tde--1.0.sql
  4. 12
      src/access/pg_tde_prune.c
  5. 504
      src/access/pg_tde_tdemap.c
  6. 7
      src/access/pg_tde_vacuumlazy.c
  7. 294
      src/access/pg_tde_xlog.c
  8. 25
      src/access/pg_tdeam.c
  9. 10
      src/access/pg_tdeam_handler.c
  10. 248
      src/catalog/tde_global_catalog.c
  11. 3
      src/catalog/tde_keyring.c
  12. 132
      src/catalog/tde_master_key.c
  13. 8
      src/common/pg_tde_utils.c
  14. 17
      src/include/access/pg_tde_tdemap.h
  15. 29
      src/include/access/pg_tde_xlog.h
  16. 3
      src/include/access/pg_tdeam.h
  17. 41
      src/include/catalog/tde_global_catalog.h
  18. 7
      src/include/catalog/tde_keyring.h
  19. 12
      src/include/catalog/tde_master_key.h
  20. 3
      src/include/common/pg_tde_utils.h
  21. 3
      src/include/pg_tde_defines.h
  22. 33
      src/include/pg_tde_event_capture.h
  23. 4
      src/include/smgr/pg_tde_smgr.h
  24. BIN
      src/keyring/.keyring_api.c.swp
  25. 69
      src/keyring/keyring_file.c
  26. 24
      src/pg_tde.c
  27. 147
      src/pg_tde_event_capture.c
  28. 213
      src/smgr/pg_tde_smgr.c
  29. 2
      src/transam/pg_tde_xact_handler.c
  30. 13
      t/results/001_basic.out

@ -42,11 +42,14 @@ src/keyring/keyring_curl.o \
src/keyring/keyring_file.o \
src/keyring/keyring_vault.o \
src/keyring/keyring_api.o \
src/catalog/tde_global_catalog.o \
src/catalog/tde_keyring.o \
src/catalog/tde_master_key.o \
src/common/pg_tde_shmem.o \
src/common/pg_tde_utils.o \
src/smgr/pg_tde_smgr.o \
src/pg_tde_defs.o \
src/pg_tde_event_capture.o \
src/pg_tde.o
override PG_CPPFLAGS += @tde_CPPFLAGS@

@ -39,12 +39,16 @@ pg_tde_sources = files(
'src/keyring/keyring_vault.c',
'src/keyring/keyring_api.c',
'src/smgr/pg_tde_smgr.c',
'src/catalog/tde_global_catalog.c',
'src/catalog/tde_keyring.c',
'src/catalog/tde_master_key.c',
'src/common/pg_tde_shmem.c',
'src/common/pg_tde_utils.c',
'src/pg_tde_defs.c',
'src/pg_tde.c',
'src/pg_tde_event_capture.c',
)
incdir = include_directories('src/include', '.')

@ -87,6 +87,12 @@ RETURNS table_am_handler
AS 'MODULE_PATHNAME'
LANGUAGE C;
-- Table access method
CREATE FUNCTION pg_tde2am_handler(internal)
RETURNS table_am_handler
AS 'MODULE_PATHNAME'
LANGUAGE C;
CREATE FUNCTION pgtde_is_encrypted(table_name VARCHAR)
RETURNS boolean
AS $$
@ -129,5 +135,26 @@ CREATE FUNCTION pg_tde_version() RETURNS TEXT AS 'MODULE_PATHNAME' LANGUAGE C;
CREATE ACCESS METHOD pg_tde TYPE TABLE HANDLER pg_tdeam_handler;
COMMENT ON ACCESS METHOD pg_tde IS 'pg_tde table access method';
CREATE ACCESS METHOD pg_tde2 TYPE TABLE HANDLER pg_tde2am_handler;
COMMENT ON ACCESS METHOD pg_tde2 IS 'pg_tde2 table access method';
-- Per database extension initialization
SELECT pg_tde_extension_initialize();
CREATE OR REPLACE FUNCTION pg_tde_ddl_command_start_capture()
RETURNS event_trigger
AS 'MODULE_PATHNAME'
LANGUAGE C;
CREATE OR REPLACE FUNCTION pg_tde_ddl_command_end_capture()
RETURNS event_trigger
AS 'MODULE_PATHNAME'
LANGUAGE C;
CREATE EVENT TRIGGER pg_tde_trigger_create_index
ON ddl_command_start
EXECUTE FUNCTION pg_tde_ddl_command_start_capture();
CREATE EVENT TRIGGER pg_tde_trigger_create_index_2
ON ddl_command_end
EXECUTE FUNCTION pg_tde_ddl_command_end_capture();

@ -127,6 +127,7 @@ pg_tde_page_prune_opt(Relation relation, Buffer buffer)
if (RecoveryInProgress())
return;
#if PG_VERSION_NUM < 170000
/*
* XXX: Magic to keep old_snapshot_threshold tests appear "working". They
* currently are broken, and discussion of what to do about them is
@ -135,7 +136,7 @@ pg_tde_page_prune_opt(Relation relation, Buffer buffer)
*/
if (old_snapshot_threshold == 0)
SnapshotTooOldMagicForTest();
#endif
/*
* First check whether there's any chance there's something to prune,
* determining the appropriate horizon is a waste if there's no prune_xid
@ -166,14 +167,14 @@ pg_tde_page_prune_opt(Relation relation, Buffer buffer)
if (!GlobalVisTestIsRemovableXid(vistest, prune_xid))
{
if (!OldSnapshotThresholdActive())
#if PG_VERSION_NUM < 170000
if ( !OldSnapshotThresholdActive())
return;
if (!TransactionIdLimitedForOldSnapshots(GlobalVisTestNonRemovableHorizon(vistest),
relation,
&limited_xmin, &limited_ts))
return;
#endif
if (!TransactionIdPrecedes(prune_xid, limited_xmin))
return;
}
@ -539,6 +540,7 @@ heap_prune_satisfies_vacuum(PruneState *prstate, HeapTuple tup, Buffer buffer)
*/
if (GlobalVisTestIsRemovableXid(prstate->vistest, dead_after))
res = HEAPTUPLE_DEAD;
#if PG_VERSION_NUM < 170000
else if (OldSnapshotThresholdActive())
{
/* haven't determined limited horizon yet, requests */
@ -566,7 +568,7 @@ heap_prune_satisfies_vacuum(PruneState *prstate, HeapTuple tup, Buffer buffer)
res = HEAPTUPLE_DEAD;
}
}
#endif
return res;
}

File diff suppressed because it is too large Load Diff

@ -2828,8 +2828,11 @@ should_attempt_truncation(LVRelState *vacrel)
{
BlockNumber possibly_freeable;
if (!vacrel->do_rel_truncate || VacuumFailsafeActive ||
old_snapshot_threshold >= 0)
if (!vacrel->do_rel_truncate || VacuumFailsafeActive
#if PG_VERSION_NUM < 170000
|| old_snapshot_threshold >= 0
#endif
)
return false;
possibly_freeable = vacrel->rel_pages - vacrel->nonempty_pages;

@ -12,13 +12,34 @@
#include "postgres.h"
#include "pg_tde_defines.h"
#include "access/xlog.h"
#include "access/xlog_internal.h"
#include "access/xloginsert.h"
#include "catalog/pg_tablespace_d.h"
#include "storage/bufmgr.h"
#include "storage/shmem.h"
#include "utils/guc.h"
#include "utils/memutils.h"
#include "access/pg_tde_tdemap.h"
#include "access/pg_tde_xlog.h"
#include "catalog/tde_master_key.h"
#include "encryption/enc_tde.h"
#ifdef PERCONA_FORK
#include "catalog/tde_global_catalog.h"
static char *TDEXLogEncryptBuf = NULL;
/* GUC */
static bool EncryptXLog = false;
static XLogPageHeaderData EncryptCurrentPageHrd;
static XLogPageHeaderData DecryptCurrentPageHrd;
static ssize_t TDEXLogWriteEncryptedPages(int fd, const void *buf, size_t count, off_t offset);
static void SetXLogPageIVPrefix(TimeLineID tli, XLogRecPtr lsn, char* iv_prefix);
static int XLOGChooseNumBuffers(void);
#endif
/*
* TDE fork XLog
@ -103,3 +124,274 @@ pg_tde_rmgr_identify(uint8 info)
return NULL;
}
#ifdef PERCONA_FORK
/*
* -------------------------
* XLog Storage Manager
*/
void
XLogInitGUC(void)
{
DefineCustomBoolVariable("pg_tde.wal_encrypt", /* name */
"Enable/Disable encryption of WAL.", /* short_desc */
NULL, /* long_desc */
&EncryptXLog, /* value address */
false, /* boot value */
PGC_POSTMASTER, /* context */
0, /* flags */
NULL, /* check_hook */
NULL, /* assign_hook */
NULL /* show_hook */
);
}
static int
XLOGChooseNumBuffers(void)
{
int xbuffers;
xbuffers = NBuffers / 32;
if (xbuffers > (wal_segment_size / XLOG_BLCKSZ))
xbuffers = (wal_segment_size / XLOG_BLCKSZ);
if (xbuffers < 8)
xbuffers = 8;
return xbuffers;
}
/*
* Defines the size of the XLog encryption buffer
*/
Size
TDEXLogEncryptBuffSize(void)
{
int xbuffers;
xbuffers = (XLOGbuffers == -1) ? XLOGChooseNumBuffers() : XLOGbuffers;
return (Size) XLOG_BLCKSZ * xbuffers;
}
/*
* Alloc memory for the encryption buffer.
*
* It should fit XLog buffers (XLOG_BLCKSZ * wal_buffers). We can't
* (re)alloc this buf in pg_tde_xlog_seg_write() based on the write size as
* it's called in the CRIT section, hence no allocations are allowed.
*
* Access to this buffer happens during XLogWrite() call which should
* be called with WALWriteLock held, hence no need in extra locks.
*/
void
TDEXLogShmemInit(void)
{
bool foundBuf;
if (EncryptXLog)
{
TDEXLogEncryptBuf = (char *)
TYPEALIGN(PG_IO_ALIGN_SIZE,
ShmemInitStruct("TDE XLog Encryption Buffer",
XLOG_TDE_ENC_BUFF_ALIGNED_SIZE,
&foundBuf));
elog(DEBUG1, "pg_tde: initialized encryption buffer %lu bytes", XLOG_TDE_ENC_BUFF_ALIGNED_SIZE);
}
}
void
TDEXLogSmgrInit(void)
{
SetXLogSmgr(&tde_xlog_smgr);
}
ssize_t
pg_tde_xlog_seg_write(int fd, const void *buf, size_t count, off_t offset)
{
if (EncryptXLog)
return TDEXLogWriteEncryptedPages(fd, buf, count, offset);
else
return pg_pwrite(fd, buf, count, offset);
}
/*
* Encrypt XLog page(s) from the buf and write to the segment file.
*/
static ssize_t
TDEXLogWriteEncryptedPages(int fd, const void *buf, size_t count, off_t offset)
{
char iv_prefix[16] = {0,};
size_t data_size = 0;
XLogPageHeader curr_page_hdr = &EncryptCurrentPageHrd;
XLogPageHeader enc_buf_page;
RelKeyData *key = GetGlCatInternalKey(XLOG_TDE_OID);
off_t enc_off;
size_t page_size = XLOG_BLCKSZ - offset % XLOG_BLCKSZ;
uint32 iv_ctr = 0;
#ifdef TDE_XLOG_DEBUG
elog(DEBUG1, "write encrypted WAL, pages amount: %d, size: %lu offset: %ld", count / (Size) XLOG_BLCKSZ, count, offset);
#endif
/*
* Go through the buf page-by-page and encrypt them.
* We may start or finish writing from/in the middle of the page
* (walsender or `full_page_writes = off`). So preserve a page header
* for the IV init data.
*
* TODO: check if walsender restarts form the beggining of the page
* in case of the crash.
*/
for (enc_off = 0; enc_off < count;)
{
data_size = Min(page_size, count);
if (page_size == XLOG_BLCKSZ)
{
memcpy((char *) curr_page_hdr, (char *) buf + enc_off, SizeOfXLogShortPHD);
/*
* Need to use a separate buf for the encryption so the page remains non-crypted
* in the XLog buf (XLogInsert has to have access to records' lsn).
*/
enc_buf_page = (XLogPageHeader) (TDEXLogEncryptBuf + enc_off);
memcpy((char *) enc_buf_page, (char *) buf + enc_off, (Size) XLogPageHeaderSize(curr_page_hdr));
enc_buf_page->xlp_info |= XLP_ENCRYPTED;
enc_off += XLogPageHeaderSize(curr_page_hdr);
data_size -= XLogPageHeaderSize(curr_page_hdr);
/* it's a beginning of the page */
iv_ctr = 0;
}
else
{
/* we're in the middle of the page */
iv_ctr = (offset % XLOG_BLCKSZ) - XLogPageHeaderSize(curr_page_hdr);
}
if (data_size + enc_off > count)
{
data_size = count - enc_off;
}
/*
* The page is zeroed (no data), no sense to enctypt.
* This may happen when base_backup or other requests XLOG SWITCH and
* some pages in XLog buffer still not used.
*/
if (curr_page_hdr->xlp_magic == 0)
{
/* ensure all the page is {0} */
Assert((*((char *) buf + enc_off) == 0) &&
memcmp((char *) buf + enc_off, (char *) buf + enc_off + 1, data_size - 1) == 0);
memcpy((char *) enc_buf_page, (char *) buf + enc_off, data_size);
}
else
{
SetXLogPageIVPrefix(curr_page_hdr->xlp_tli, curr_page_hdr->xlp_pageaddr, iv_prefix);
PG_TDE_ENCRYPT_DATA(iv_prefix, iv_ctr, (char *) buf + enc_off, data_size,
TDEXLogEncryptBuf + enc_off, key);
}
page_size = XLOG_BLCKSZ;
enc_off += data_size;
}
return pg_pwrite(fd, TDEXLogEncryptBuf, count, offset);
}
/*
* Read the XLog pages from the segment file and dectypt if need.
*/
ssize_t
pg_tde_xlog_seg_read(int fd, void *buf, size_t count, off_t offset)
{
ssize_t readsz;
char iv_prefix[16] = {0,};
size_t data_size = 0;
XLogPageHeader curr_page_hdr = &DecryptCurrentPageHrd;
RelKeyData *key = GetGlCatInternalKey(XLOG_TDE_OID);
size_t page_size = XLOG_BLCKSZ - offset % XLOG_BLCKSZ;
off_t dec_off;
uint32 iv_ctr = 0;
#ifdef TDE_XLOG_DEBUG
elog(DEBUG1, "read from a WAL segment, pages amount: %d, size: %lu offset: %ld", count / (Size) XLOG_BLCKSZ, count, offset);
#endif
readsz = pg_pread(fd, buf, count, offset);
/*
* Read the buf page by page and decypt ecnrypted pages.
* We may start or fihish reading from/in the middle of the page (walreceiver)
* in such a case we should preserve the last read page header for
* the IV data and the encryption state.
*
* TODO: check if walsender/receiver restarts form the beggining of the page
* in case of the crash.
*/
for (dec_off = 0; dec_off < readsz;)
{
data_size = Min(page_size, readsz);
if (page_size == XLOG_BLCKSZ)
{
memcpy((char *) curr_page_hdr, (char *) buf + dec_off, SizeOfXLogShortPHD);
/* set the flag to "not encrypted" for the walreceiver */
((XLogPageHeader) ((char *) buf + dec_off))->xlp_info &= ~XLP_ENCRYPTED;
Assert(curr_page_hdr->xlp_magic == XLOG_PAGE_MAGIC || curr_page_hdr->xlp_magic == 0);
dec_off += XLogPageHeaderSize(curr_page_hdr);
data_size -= XLogPageHeaderSize(curr_page_hdr);
/* it's a beginning of the page */
iv_ctr = 0;
}
else
{
/* we're in the middle of the page */
iv_ctr = (offset % XLOG_BLCKSZ) - XLogPageHeaderSize(curr_page_hdr);
}
if ((data_size + dec_off) > readsz)
{
data_size = readsz - dec_off;
}
if (curr_page_hdr->xlp_info & XLP_ENCRYPTED)
{
SetXLogPageIVPrefix(curr_page_hdr->xlp_tli, curr_page_hdr->xlp_pageaddr, iv_prefix);
PG_TDE_DECRYPT_DATA(
iv_prefix, iv_ctr,
(char *) buf + dec_off, data_size, (char *) buf + dec_off, key);
}
page_size = XLOG_BLCKSZ;
dec_off += data_size;
}
return readsz;
}
/* IV: TLI(uint32) + XLogRecPtr(uint64)*/
static void
SetXLogPageIVPrefix(TimeLineID tli, XLogRecPtr lsn, char* iv_prefix)
{
iv_prefix[0] = (tli >> 24);
iv_prefix[1] = ((tli >> 16) & 0xFF);
iv_prefix[2] = ((tli >> 8) & 0xFF);
iv_prefix[3] = (tli & 0xFF);
iv_prefix[4] = (lsn >> 56);
iv_prefix[5] = ((lsn >> 48) & 0xFF);
iv_prefix[6] = ((lsn >> 40) & 0xFF);
iv_prefix[7] = ((lsn >> 32) & 0xFF);
iv_prefix[8] = ((lsn >> 24) & 0xFF);
iv_prefix[9] = ((lsn >> 16) & 0xFF);
iv_prefix[10] = ((lsn >> 8) & 0xFF);
iv_prefix[11] = (lsn & 0xFF);
}
#endif

@ -431,7 +431,9 @@ pg_tde_getpage(TableScanDesc sscan, BlockNumber block)
LockBuffer(buffer, BUFFER_LOCK_SHARE);
page = BufferGetPage(buffer);
#if PG_VERSION_NUM < 170000
TestForOldSnapshot(snapshot, scan->rs_base.rs_rd, page);
#endif
lines = PageGetMaxOffsetNumber(page);
ntup = 0;
@ -570,9 +572,9 @@ pg_tde_gettup_start_page(HeapScanDesc scan, ScanDirection dir, int *linesleft,
/* Caller is responsible for ensuring buffer is locked if needed */
page = BufferGetPage(scan->rs_cbuf);
#if PG_VERSION_NUM < 170000
TestForOldSnapshot(scan->rs_base.rs_snapshot, scan->rs_base.rs_rd, page);
#endif
*linesleft = PageGetMaxOffsetNumber(page) - FirstOffsetNumber + 1;
if (ScanDirectionIsForward(dir))
@ -603,9 +605,9 @@ pg_tde_gettup_continue_page(HeapScanDesc scan, ScanDirection dir, int *linesleft
/* Caller is responsible for ensuring buffer is locked if needed */
page = BufferGetPage(scan->rs_cbuf);
#if PG_VERSION_NUM < 170000
TestForOldSnapshot(scan->rs_base.rs_snapshot, scan->rs_base.rs_rd, page);
#endif
if (ScanDirectionIsForward(dir))
{
*lineoff = OffsetNumberNext(scan->rs_coffset);
@ -870,8 +872,9 @@ pg_tde_gettup_pagemode(HeapScanDesc scan,
/* continue from previously returned page/tuple */
block = scan->rs_cblock; /* current page */
page = BufferGetPage(scan->rs_cbuf);
#if PG_VERSION_NUM < 170000
TestForOldSnapshot(scan->rs_base.rs_snapshot, scan->rs_base.rs_rd, page);
#endif
lineindex = scan->rs_cindex + dir;
if (ScanDirectionIsForward(dir))
linesleft = scan->rs_ntuples - lineindex;
@ -890,7 +893,9 @@ pg_tde_gettup_pagemode(HeapScanDesc scan,
{
pg_tde_getpage((TableScanDesc) scan, block);
page = BufferGetPage(scan->rs_cbuf);
#if PG_VERSION_NUM < 170000
TestForOldSnapshot(scan->rs_base.rs_snapshot, scan->rs_base.rs_rd, page);
#endif
linesleft = scan->rs_ntuples;
lineindex = ScanDirectionIsForward(dir) ? 0 : linesleft - 1;
@ -1107,10 +1112,10 @@ pg_tde_getnext(TableScanDesc sscan, ScanDirection direction)
* rather than the AM oid, is that this allows to write regression tests
* that create another AM reusing the heap handler.
*/
if (unlikely(sscan->rs_rd->rd_tableam != GetHeapamTableAmRoutine()))
if (unlikely(sscan->rs_rd->rd_tableam != GetPGTdeamTableAmRoutine()))
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg_internal("only heap AM is supported")));
errmsg_internal("only pg_tde AM is supported")));
/*
* We don't expect direct calls to pg_tde_getnext with valid CheckXidAlive
@ -1380,8 +1385,9 @@ pg_tde_fetch(Relation relation,
*/
LockBuffer(buffer, BUFFER_LOCK_SHARE);
page = BufferGetPage(buffer);
#if PG_VERSION_NUM < 170000
TestForOldSnapshot(snapshot, relation, page);
#endif
/*
* We'd better check for out-of-range offnum in case of VACUUM since the
* TID was obtained.
@ -1671,8 +1677,9 @@ pg_tde_get_latest_tid(TableScanDesc sscan,
buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(&ctid));
LockBuffer(buffer, BUFFER_LOCK_SHARE);
page = BufferGetPage(buffer);
#if PG_VERSION_NUM < 170000
TestForOldSnapshot(snapshot, relation, page);
#endif
/*
* Check for bogus item number. This is not treated as an error
* condition because it can happen while following a t_ctid link. We

@ -55,6 +55,7 @@
#include "utils/rel.h"
PG_FUNCTION_INFO_V1(pg_tdeam_handler);
PG_FUNCTION_INFO_V1(pg_tde2am_handler);
static void reform_and_rewrite_tuple(HeapTuple tuple,
@ -645,7 +646,7 @@ pg_tdeam_relation_set_new_filelocator(Relation rel,
ereport(DEBUG1,
(errmsg("creating key file for relation %s", RelationGetRelationName(rel))));
pg_tde_create_key_map_entry(newrlocator, rel);
pg_tde_create_key_map_entry(newrlocator);
}
}
@ -2633,7 +2634,6 @@ static const TableAmRoutine pg_tdeam_methods = {
.scan_sample_next_tuple = pg_tdeam_scan_sample_next_tuple
};
const TableAmRoutine *
GetPGTdeamTableAmRoutine(void)
{
@ -2646,6 +2646,12 @@ pg_tdeam_handler(PG_FUNCTION_ARGS)
PG_RETURN_POINTER(&pg_tdeam_methods);
}
Datum
pg_tde2am_handler(PG_FUNCTION_ARGS)
{
PG_RETURN_POINTER(GetHeapamTableAmRoutine());
}
bool
is_pg_tde_rel(Relation rel)
{

@ -0,0 +1,248 @@
/*-------------------------------------------------------------------------
*
* tde_global_catalog.c
* Global catalog key management
*
*
* IDENTIFICATION
* src/catalog/tde_global_catalog.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#ifdef PERCONA_FORK
#include "storage/shmem.h"
#include "utils/guc.h"
#include "access/pg_tde_tdemap.h"
#include "catalog/tde_global_catalog.h"
#include "catalog/tde_keyring.h"
#include "catalog/tde_master_key.h"
#include <openssl/rand.h>
#include <openssl/err.h>
#include <sys/time.h>
#define MASTER_KEY_DEFAULT_NAME "tde-global-catalog-key"
/* TODO: not sure if we need an option of multiple master keys for the global catalog */
typedef enum
{
TDE_GCAT_XLOG_KEY,
/* must be last */
TDE_GCAT_KEYS_COUNT
} GlobalCatalogKeyTypes;
typedef struct EncryptionStateData
{
GenericKeyring *keyring;
TDEMasterKey master_keys[TDE_GCAT_KEYS_COUNT];
} EncryptionStateData;
static EncryptionStateData * EncryptionState = NULL;
/* GUC */
static char *KRingProviderType = NULL;
static char *KRingProviderFilePath = NULL;
static void init_gl_catalog_keys(void);
static void init_keyring(void);
static TDEMasterKey * create_master_key(const char *key_name,
GenericKeyring * keyring, Oid dbOid, Oid spcOid,
bool ensure_new_key);
void
TDEGlCatInitGUC(void)
{
DefineCustomStringVariable("pg_tde.global_keyring_type",
"Keyring type for global catalog",
NULL,
&KRingProviderType,
NULL,
PGC_POSTMASTER,
0, /* no flags required */
NULL,
NULL,
NULL
);
DefineCustomStringVariable("pg_tde.global_keyring_file_path",
"Keyring file options for global catalog",
NULL,
&KRingProviderFilePath,
NULL,
PGC_POSTMASTER,
0, /* no flags required */
NULL,
NULL,
NULL
);
}
Size
TDEGlCatEncStateSize(void)
{
Size size;
size = sizeof(EncryptionStateData);
size = add_size(size, sizeof(KeyringProviders));
return MAXALIGN(size);
}
void
TDEGlCatShmemInit(void)
{
bool foundBuf;
char *allocptr;
EncryptionState = (EncryptionStateData *)
ShmemInitStruct("TDE XLog Encryption State",
TDEGlCatEncStateSize(), &foundBuf);
allocptr = ((char *) EncryptionState) + MAXALIGN(sizeof(EncryptionStateData));
EncryptionState->keyring = (GenericKeyring *) allocptr;
memset(EncryptionState->keyring, 0, sizeof(KeyringProviders));
memset(EncryptionState->master_keys, 0, sizeof(TDEMasterKey) * TDE_GCAT_KEYS_COUNT);
}
void
TDEGlCatKeyInit(void)
{
char db_map_path[MAXPGPATH] = {0};
init_keyring();
pg_tde_set_db_file_paths(&GLOBAL_SPACE_RLOCATOR(XLOG_TDE_OID),
db_map_path, NULL);
if (access(db_map_path, F_OK) == -1)
{
init_gl_catalog_keys();
}
else
{
/* put an internal key into the cache */
GetGlCatInternalKey(XLOG_TDE_OID);
}
}
TDEMasterKey *
TDEGetGlCatKeyFromCache(void)
{
TDEMasterKey *mkey;
mkey = &EncryptionState->master_keys[TDE_GCAT_XLOG_KEY];
if (mkey->keyLength == 0)
return NULL;
return mkey;
}
void
TDEPutGlCatKeyInCache(TDEMasterKey * mkey)
{
memcpy(EncryptionState->master_keys + TDE_GCAT_XLOG_KEY, mkey, sizeof(TDEMasterKey));
}
RelKeyData *
GetGlCatInternalKey(Oid obj_id)
{
return GetRelationKeyWithKeyring(GLOBAL_SPACE_RLOCATOR(obj_id), EncryptionState->keyring);
}
/*
* TODO: should be aligned with the rest of the keyring_provider code after its
* refactoring
*
* TODO: add Vault
*/
static void
init_keyring(void)
{
EncryptionState->keyring->type = get_keyring_provider_from_typename(KRingProviderType);
switch (EncryptionState->keyring->type)
{
case FILE_KEY_PROVIDER:
FileKeyring * kring = (FileKeyring *) EncryptionState->keyring;
strncpy(kring->file_name, KRingProviderFilePath, sizeof(kring->file_name));
break;
}
}
/*
* Keys are created during the cluster start only, so no locks needed here.
*/
static void
init_gl_catalog_keys(void)
{
InternalKey int_key;
RelKeyData *rel_key_data;
RelKeyData *enc_rel_key_data;
RelFileLocator *rlocator;
TDEMasterKey *mkey;
mkey = create_master_key(MASTER_KEY_DEFAULT_NAME,
EncryptionState->keyring,
GLOBAL_DATA_TDE_OID, GLOBALTABLESPACE_OID, false);
memset(&int_key, 0, sizeof(InternalKey));
/* Create and store an internal key for XLog */
if (!RAND_bytes(int_key.key, INTERNAL_KEY_LEN))
{
ereport(FATAL,
(errcode(ERRCODE_INTERNAL_ERROR),
errmsg("could not generate internal key for \"WAL\": %s",
ERR_error_string(ERR_get_error(), NULL))));
}
rlocator = &GLOBAL_SPACE_RLOCATOR(XLOG_TDE_OID);
rel_key_data = tde_create_rel_key(rlocator->relNumber, &int_key, &mkey->keyInfo);
enc_rel_key_data = tde_encrypt_rel_key(mkey, rel_key_data, rlocator);
pg_tde_write_key_map_entry(rlocator, enc_rel_key_data, &mkey->keyInfo);
/*
* TODO: move global catalog internal keys into own cache. This cache should
* be in the TopMemmoryContext because of SSL contexts
* (see https://github.com/Percona-Lab/pg_tde/pull/214#discussion_r1648998317)
*/
pg_tde_put_key_into_map(rlocator->relNumber, rel_key_data);
TDEPutGlCatKeyInCache(mkey);
}
static TDEMasterKey *
create_master_key(const char *key_name, GenericKeyring * keyring,
Oid dbOid, Oid spcOid, bool ensure_new_key)
{
TDEMasterKey *masterKey;
keyInfo *keyInfo = NULL;
masterKey = palloc(sizeof(TDEMasterKey));
masterKey->keyInfo.databaseId = dbOid;
masterKey->keyInfo.tablespaceId = spcOid;
masterKey->keyInfo.keyId.version = DEFAULT_MASTER_KEY_VERSION;
masterKey->keyInfo.keyringId = keyring->key_id;
strncpy(masterKey->keyInfo.keyId.name, key_name, TDE_KEY_NAME_LEN);
gettimeofday(&masterKey->keyInfo.creationTime, NULL);
keyInfo = load_latest_versioned_key_name(&masterKey->keyInfo, keyring, ensure_new_key);
if (keyInfo == NULL)
keyInfo = KeyringGenerateNewKeyAndStore(keyring, masterKey->keyInfo.keyId.versioned_name, INTERNAL_KEY_LEN, false);
if (keyInfo == NULL)
{
ereport(ERROR,
(errmsg("failed to retrieve master key")));
}
masterKey->keyLength = keyInfo->data.len;
memcpy(masterKey->keyData, keyInfo->data.data, keyInfo->data.len);
return masterKey;
}
#endif /* PERCONA_FORK */

@ -50,13 +50,12 @@ PG_FUNCTION_INFO_V1(keyring_delete_dependency_check_trigger);
#define FILE_KEYRING_TYPE_KEY "type"
static FileKeyring *load_file_keyring_provider_options(Datum keyring_options);
static ProviderType get_keyring_provider_from_typename(char *provider_type);
static GenericKeyring *load_keyring_provider_options(ProviderType provider_type, Datum keyring_options);
static VaultV2Keyring *load_vaultV2_keyring_provider_options(Datum keyring_options);
static void debug_print_kerying(GenericKeyring *keyring);
static GenericKeyring *load_keyring_provider_from_tuple(HeapTuple tuple, TupleDesc tupDesc);
static ProviderType
ProviderType
get_keyring_provider_from_typename(char *provider_type)
{
if (provider_type == NULL)

@ -29,8 +29,9 @@
#include <sys/time.h>
#include "access/pg_tde_tdemap.h"
#define DEFAULT_MASTER_KEY_VERSION 1
#ifdef PERCONA_FORK
#include "catalog/tde_global_catalog.h"
#endif
typedef struct TdeMasterKeySharedState
{
@ -67,12 +68,10 @@ static Size required_shared_mem_size(void);
static int required_locks_count(void);
static void shared_memory_shutdown(int code, Datum arg);
static void master_key_startup_cleanup(int tde_tbl_count, void *arg);
static keyInfo *load_latest_versioned_key_name(TDEMasterKeyInfo *mastere_key_info, GenericKeyring *keyring, bool ensure_new_key);
static void clear_master_key_cache(Oid databaseId, Oid tablespaceId) ;
static void clear_master_key_cache(Oid databaseId) ;
static inline dshash_table *get_master_key_Hash(void);
static TDEMasterKey *get_master_key_from_cache(Oid dbOid);
static void push_master_key_to_cache(TDEMasterKey *masterKey);
static TDEMasterKey *set_master_key_with_keyring(const char *key_name, GenericKeyring *keyring, bool ensure_new_key);
static const TDEShmemSetupRoutine master_key_info_shmem_routine = {
.init_shared_state = initialize_shared_state,
@ -214,23 +213,41 @@ save_master_key_info(TDEMasterKeyInfo *master_key_info)
* throws an error.
*/
TDEMasterKey *
GetMasterKey(void)
GetMasterKey(Oid dbOid, Oid spcOid, GenericKeyring *keyring)
{
TDEMasterKey *masterKey = NULL;
TDEMasterKeyInfo *masterKeyInfo = NULL;
GenericKeyring *keyring = NULL;
const keyInfo *keyInfo = NULL;
KeyringReturnCodes keyring_ret;
Oid dbOid = MyDatabaseId;
LWLock *lock_files = tde_lwlock_mk_files();
LWLock *lock_cache = tde_lwlock_mk_cache();
// TODO: This recursion counter is a dirty hack until the metadata is in the catalog
// As otherwise we would call GetMasterKey recursively and deadlock
static int recursion = 0;
if(recursion > 0)
{
return NULL;
}
recursion++;
LWLockAcquire(lock_cache, LW_SHARED);
masterKey = get_master_key_from_cache(dbOid);
#ifdef PERCONA_FORK
/* Global catalog has its own cache */
if (spcOid == GLOBALTABLESPACE_OID)
masterKey = TDEGetGlCatKeyFromCache();
else
#endif
masterKey = get_master_key_from_cache(dbOid);
LWLockRelease(lock_cache);
if (masterKey)
{
recursion--;
return masterKey;
}
/*
* We should hold an exclusive lock here to ensure that a valid master key, if found, is added
@ -239,38 +256,44 @@ GetMasterKey(void)
LWLockAcquire(lock_files, LW_SHARED);
LWLockAcquire(lock_cache, LW_EXCLUSIVE);
masterKey = get_master_key_from_cache(dbOid);
#ifdef PERCONA_FORK
/* Global catalog has its own cache */
if (spcOid == GLOBALTABLESPACE_OID)
masterKey = TDEGetGlCatKeyFromCache();
else
#endif
masterKey = get_master_key_from_cache(dbOid);
if (masterKey)
{
LWLockRelease(lock_cache);
LWLockRelease(lock_files);
recursion--;
return masterKey;
}
/* Master key not present in cache. Load from the keyring */
masterKeyInfo = pg_tde_get_master_key(dbOid);
masterKeyInfo = pg_tde_get_master_key(dbOid, spcOid);
if (masterKeyInfo == NULL)
{
LWLockRelease(lock_cache);
LWLockRelease(lock_files);
ereport(ERROR,
(errmsg("Master key does not exists for the database"),
errhint("Use set_master_key interface to set the master key")));
recursion--;
return NULL;
}
/* Load the master key from keyring and store it in cache */
keyring = GetKeyProviderByID(masterKeyInfo->keyringId);
if (keyring == NULL)
{
LWLockRelease(lock_cache);
LWLockRelease(lock_files);
keyring = GetKeyProviderByID(masterKeyInfo->keyringId);
if (keyring == NULL)
{
LWLockRelease(lock_cache);
LWLockRelease(lock_files);
ereport(ERROR,
(errmsg("Key provider with ID:\"%d\" does not exists", masterKeyInfo->keyringId)));
return NULL;
recursion--;
return NULL;
}
}
keyInfo = KeyringGetKey(keyring, masterKeyInfo->keyId.versioned_name, false, &keyring_ret);
@ -279,8 +302,7 @@ GetMasterKey(void)
LWLockRelease(lock_cache);
LWLockRelease(lock_files);
ereport(ERROR,
(errmsg("failed to retrieve master key \"%s\" from keyring.", masterKeyInfo->keyId.versioned_name)));
recursion--;
return NULL;
}
@ -290,8 +312,13 @@ GetMasterKey(void)
memcpy(masterKey->keyData, keyInfo->data.data, keyInfo->data.len);
masterKey->keyLength = keyInfo->data.len;
Assert(MyDatabaseId == masterKey->keyInfo.databaseId);
push_master_key_to_cache(masterKey);
Assert(dbOid == masterKey->keyInfo.databaseId);
#ifdef PERCONA_FORK
if (spcOid == GLOBALTABLESPACE_OID)
TDEPutGlCatKeyInCache(masterKey);
else
#endif
push_master_key_to_cache(masterKey);
/* Release the exclusive locks here */
LWLockRelease(lock_cache);
@ -300,6 +327,7 @@ GetMasterKey(void)
if (masterKeyInfo)
pfree(masterKeyInfo);
recursion--;
return masterKey;
}
@ -313,12 +341,11 @@ GetMasterKey(void)
* to make sure if some other caller has not added a master key for
* same database while we were waiting for the lock.
*/
static TDEMasterKey *
set_master_key_with_keyring(const char *key_name, GenericKeyring *keyring, bool ensure_new_key)
TDEMasterKey *
set_master_key_with_keyring(const char *key_name, GenericKeyring *keyring,
Oid dbOid, Oid spcOid, bool ensure_new_key)
{
TDEMasterKey *masterKey = NULL;
Oid dbOid = MyDatabaseId;
LWLock *lock_files = tde_lwlock_mk_files();
LWLock *lock_cache = tde_lwlock_mk_cache();
bool is_dup_key = false;
@ -334,14 +361,15 @@ set_master_key_with_keyring(const char *key_name, GenericKeyring *keyring, bool
/* TODO: Add the key in the cache? */
if (is_dup_key == false)
is_dup_key = (pg_tde_get_master_key(dbOid) != NULL);
is_dup_key = (pg_tde_get_master_key(dbOid, spcOid) != NULL);
if (is_dup_key == false)
{
const keyInfo *keyInfo = NULL;
masterKey = palloc(sizeof(TDEMasterKey));
masterKey->keyInfo.databaseId = MyDatabaseId;
masterKey->keyInfo.databaseId = dbOid;
masterKey->keyInfo.tablespaceId = spcOid;
masterKey->keyInfo.keyId.version = DEFAULT_MASTER_KEY_VERSION;
masterKey->keyInfo.keyringId = keyring->key_id;
strncpy(masterKey->keyInfo.keyId.name, key_name, TDE_KEY_NAME_LEN);
@ -370,7 +398,7 @@ set_master_key_with_keyring(const char *key_name, GenericKeyring *keyring, bool
XLogBeginInsert();
XLogRegisterData((char *) &masterKey->keyInfo, sizeof(TDEMasterKeyInfo));
XLogInsert(RM_TDERMGR_ID, XLOG_TDE_ADD_MASTER_KEY);
push_master_key_to_cache(masterKey);
}
@ -396,7 +424,10 @@ set_master_key_with_keyring(const char *key_name, GenericKeyring *keyring, bool
bool
SetMasterKey(const char *key_name, const char *provider_name, bool ensure_new_key)
{
TDEMasterKey *master_key = set_master_key_with_keyring(key_name, GetKeyProviderByName(provider_name), ensure_new_key);
TDEMasterKey *master_key = set_master_key_with_keyring(key_name,
GetKeyProviderByName(provider_name),
MyDatabaseId, MyDatabaseTableSpace,
ensure_new_key);
return (master_key != NULL);
}
@ -404,10 +435,11 @@ SetMasterKey(const char *key_name, const char *provider_name, bool ensure_new_ke
bool
RotateMasterKey(const char *new_key_name, const char *new_provider_name, bool ensure_new_key)
{
TDEMasterKey *master_key = GetMasterKey();
TDEMasterKey *master_key = GetMasterKey(MyDatabaseId, MyDatabaseTableSpace, NULL);
TDEMasterKey new_master_key;
const keyInfo *keyInfo = NULL;
GenericKeyring *keyring;
bool is_rotated;
/*
* Let's set everything the same as the older master key and
@ -446,8 +478,13 @@ RotateMasterKey(const char *new_key_name, const char *new_provider_name, bool en
new_master_key.keyLength = keyInfo->data.len;
memcpy(new_master_key.keyData, keyInfo->data.data, keyInfo->data.len);
clear_master_key_cache(MyDatabaseId, MyDatabaseTableSpace);
return pg_tde_perform_rotate_key(master_key, &new_master_key);
is_rotated = pg_tde_perform_rotate_key(master_key, &new_master_key);
if (is_rotated) {
clear_master_key_cache(master_key->keyInfo.databaseId);
push_master_key_to_cache(&new_master_key);
}
return is_rotated;
}
/*
@ -459,7 +496,7 @@ xl_tde_perform_rotate_key(XLogMasterKeyRotate *xlrec)
bool ret;
ret = pg_tde_write_map_keydata_files(xlrec->map_size, xlrec->buff, xlrec->keydata_size, &xlrec->buff[xlrec->map_size]);
clear_master_key_cache(MyDatabaseId, MyDatabaseTableSpace);
clear_master_key_cache(MyDatabaseId);
return ret;
}
@ -469,7 +506,7 @@ xl_tde_perform_rotate_key(XLogMasterKeyRotate *xlrec)
* If ensure_new_key is true, then we will keep on incrementing the version number
* till we get a key name that is not present in the keyring
*/
static keyInfo *
keyInfo *
load_latest_versioned_key_name(TDEMasterKeyInfo *mastere_key_info, GenericKeyring *keyring, bool ensure_new_key)
{
KeyringReturnCodes kr_ret;
@ -553,7 +590,7 @@ GetMasterKeyProviderId(void)
}
{
/* Master key not present in cache. Try Loading it from the info file */
masterKeyInfo = pg_tde_get_master_key(dbOid);
masterKeyInfo = pg_tde_get_master_key(dbOid, MyDatabaseTableSpace);
if (masterKeyInfo)
{
keyringId = masterKeyInfo->keyringId;
@ -609,7 +646,7 @@ static void
push_master_key_to_cache(TDEMasterKey *masterKey)
{
TDEMasterKey *cacheEntry = NULL;
Oid databaseId = MyDatabaseId;
Oid databaseId = masterKey->keyInfo.databaseId;
bool found = false;
cacheEntry = dshash_find_or_insert(get_master_key_Hash(),
&databaseId, &found);
@ -653,18 +690,18 @@ master_key_startup_cleanup(int tde_tbl_count, void* arg)
void
cleanup_master_key_info(Oid databaseId, Oid tablespaceId)
{
clear_master_key_cache(databaseId, tablespaceId);
clear_master_key_cache(databaseId);
/*
* TODO: Although should never happen. Still verify if any table in the
* database is using tde
*/
/* Remove the tde files */
pg_tde_delete_tde_files(databaseId);
pg_tde_delete_tde_files(databaseId, tablespaceId);
}
static void
clear_master_key_cache(Oid databaseId, Oid tablespaceId)
clear_master_key_cache(Oid databaseId)
{
TDEMasterKey *cache_entry;
@ -737,9 +774,14 @@ Datum pg_tde_master_key_info(PG_FUNCTION_ARGS)
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("function returning record called in context that cannot accept type record")));
master_key = GetMasterKey();
master_key = GetMasterKey(MyDatabaseId, MyDatabaseTableSpace, NULL);
if (master_key == NULL)
PG_RETURN_NULL();
{
ereport(ERROR,
(errmsg("Master key does not exists for the database"),
errhint("Use set_master_key interface to set the master key")));
PG_RETURN_NULL();
}
keyring = GetKeyProviderByID(master_key->keyInfo.keyringId);

@ -33,6 +33,12 @@ get_tde_table_am_oid(void)
return get_table_am_oid("pg_tde", false);
}
Oid
get_tde2_table_am_oid(void)
{
return get_table_am_oid("pg_tde2", false);
}
/*
* Returns the list of OIDs for all TDE tables in a database
*/
@ -208,4 +214,4 @@ extract_json_option_value(Datum top_json, const char* field_name)
elog(ERROR, "Unknown type for object %s: %s", field_name, type_cstr);
return NULL;
}
}
}

@ -10,6 +10,7 @@
#include "utils/rel.h"
#include "access/xlog_internal.h"
#include "catalog/pg_tablespace_d.h"
#include "catalog/tde_master_key.h"
#include "storage/fd.h"
#include "storage/relfilelocator.h"
@ -46,22 +47,28 @@ typedef struct XLogRelKey
RelKeyData relKey;
} XLogRelKey;
extern void pg_tde_create_key_map_entry(const RelFileLocator *newrlocator, Relation rel);
extern RelKeyData* pg_tde_create_key_map_entry(const RelFileLocator *newrlocator);
extern void pg_tde_write_key_map_entry(const RelFileLocator *rlocator, RelKeyData *enc_rel_key_data, TDEMasterKeyInfo *master_key_info);
extern void pg_tde_delete_key_map_entry(const RelFileLocator *rlocator);
extern void pg_tde_free_key_map_entry(const RelFileLocator *rlocator, off_t offset);
extern RelKeyData *pg_tde_get_key_from_fork(const RelFileLocator *rlocator);
extern RelKeyData *GetRelationKey(RelFileLocator rel);
extern RelKeyData *GetRelationKeyWithKeyring(RelFileLocator rel, GenericKeyring *keyring);
extern void pg_tde_cleanup_path_vars(void);
extern void pg_tde_delete_tde_files(Oid dbOid);
extern void pg_tde_delete_tde_files(Oid dbOid, Oid spcOid);
extern TDEMasterKeyInfo *pg_tde_get_master_key(Oid dbOid);
extern TDEMasterKeyInfo *pg_tde_get_master_key(Oid dbOid, Oid spcOid);
extern bool pg_tde_save_master_key(TDEMasterKeyInfo *master_key_info);
extern bool pg_tde_perform_rotate_key(TDEMasterKey *master_key, TDEMasterKey *new_master_key);
extern bool pg_tde_write_map_keydata_files(off_t map_size, char *m_file_data, off_t keydata_size, char *k_file_data);
extern RelKeyData* tde_create_rel_key(Oid rel_id, InternalKey *key, TDEMasterKeyInfo *master_key_info);
extern RelKeyData *tde_encrypt_rel_key(TDEMasterKey *master_key, RelKeyData *rel_key_data, const RelFileLocator *rlocator);
extern RelKeyData *tde_decrypt_rel_key(TDEMasterKey *master_key, RelKeyData *enc_rel_key_data, const RelFileLocator *rlocator);
extern void pg_tde_set_db_file_paths(const RelFileLocator *rlocator, char *map_path, char *keydata_path);
const char * tde_sprint_key(InternalKey *k);
extern void pg_tde_put_key_into_map(Oid rel_id, RelKeyData *key);
#endif /*PG_TDE_MAP_H*/

@ -9,7 +9,12 @@
#ifndef PG_TDE_XLOG_H
#define PG_TDE_XLOG_H
#include "postgres.h"
#include "access/xlog.h"
#include "access/xlog_internal.h"
#ifdef PERCONA_FORK
#include "access/xlog_smgr.h"
#endif
/* TDE XLOG resource manager */
#define XLOG_TDE_ADD_RELATION_KEY 0x00
@ -32,4 +37,28 @@ static const RmgrData pg_tde_rmgr = {
.rm_identify = pg_tde_rmgr_identify
};
#ifdef PERCONA_FORK
/* XLog encryption staff */
extern Size TDEXLogEncryptBuffSize(void);
#define XLOG_TDE_ENC_BUFF_ALIGNED_SIZE add_size(TDEXLogEncryptBuffSize(), PG_IO_ALIGN_SIZE)
extern void TDEXLogShmemInit(void);
extern ssize_t pg_tde_xlog_seg_read(int fd, void *buf, size_t count, off_t offset);
extern ssize_t pg_tde_xlog_seg_write(int fd, const void *buf, size_t count, off_t offset);
static const XLogSmgr tde_xlog_smgr = {
.seg_read = pg_tde_xlog_seg_read,
.seg_write = pg_tde_xlog_seg_write,
};
extern void TDEXLogSmgrInit(void);
extern void XLogInitGUC(void);
#endif
#endif /* PG_TDE_XLOG_H */

@ -333,4 +333,7 @@ extern void HeapCheckForSerializableConflictOut(bool visible, Relation relation,
/* Defined in pg_tdeam_handler.c */
extern bool is_pg_tde_rel(Relation rel);
const TableAmRoutine *
GetPGTdeamTableAmRoutine(void);
#endif /* PG_TDEAM_H */

@ -0,0 +1,41 @@
/*-------------------------------------------------------------------------
*
* tde_global_catalog.h
* Global catalog key management
*
* src/include/catalog/tde_global_catalog.h
*
*-------------------------------------------------------------------------
*/
#ifndef TDE_GLOBAL_CATALOG_H
#define TDE_GLOBAL_CATALOG_H
#include "postgres.h"
#include "catalog/tde_master_key.h"
/*
* Needed for glogbal data (WAL etc) keys identification in caches and storage.
* We take IDs the oid type operators, so there is no overlap with the "real"
* catalog object possible.
*/
#define GLOBAL_DATA_TDE_OID 607 /* Global objects fake "db" */
#define XLOG_TDE_OID 608
#define GLOBAL_SPACE_RLOCATOR(_obj_oid) (RelFileLocator) { \
GLOBALTABLESPACE_OID, \
GLOBAL_DATA_TDE_OID, \
_obj_oid \
}
extern void TDEGlCatInitGUC(void);
extern Size TDEGlCatEncStateSize(void);
extern void TDEGlCatShmemInit(void);
extern void TDEGlCatKeyInit(void);
extern TDEMasterKey *TDEGetGlCatKeyFromCache(void);
extern void TDEPutGlCatKeyInCache(TDEMasterKey *mkey);
extern RelKeyData *GetGlCatInternalKey(Oid obj_id);
#endif /*TDE_GLOBAL_CATALOG_H*/

@ -54,8 +54,15 @@ typedef struct VaultV2Keyring
char vault_mount_path[MAXPGPATH];
} VaultV2Keyring;
typedef union KeyringProviders
{
FileKeyring file;
VaultV2Keyring vault;
} KeyringProviders;
extern List *GetAllKeyringProviders(void);
extern GenericKeyring *GetKeyProviderByName(const char *provider_name);
extern GenericKeyring *GetKeyProviderByID(int provider_id);
extern ProviderType get_keyring_provider_from_typename(char *provider_type);
#endif /*TDE_KEYRING_H*/

@ -17,6 +17,7 @@
#include "nodes/pg_list.h"
#include "storage/lwlock.h"
#define DEFAULT_MASTER_KEY_VERSION 1
#define MASTER_KEY_NAME_LEN TDE_KEY_NAME_LEN
#define MAX_MASTER_KEY_VERSION_NUM 100000
@ -68,9 +69,16 @@ extern LWLock *tde_lwlock_mk_cache(void);
extern bool save_master_key_info(TDEMasterKeyInfo *masterKeyInfo);
extern Oid GetMasterKeyProviderId(void);
extern TDEMasterKey* GetMasterKey(void);
extern TDEMasterKey* GetMasterKey(Oid dbOid, Oid spcOid, GenericKeyring *keyring);
extern bool SetMasterKey(const char *key_name, const char *provider_name, bool ensure_new_key);
extern bool RotateMasterKey(const char *new_key_name, const char *new_provider_name, bool ensure_new_key);
extern bool xl_tde_perform_rotate_key(XLogMasterKeyRotate *xlrec);
extern TDEMasterKey *set_master_key_with_keyring(const char *key_name,
GenericKeyring *keyring,
Oid dbOid, Oid spcOid,
bool ensure_new_key);
extern keyInfo *load_latest_versioned_key_name(TDEMasterKeyInfo *mastere_key_info,
GenericKeyring *keyring,
bool ensure_new_key);
#endif /*PG_TDE_MASTER_KEY_H*/

@ -12,10 +12,11 @@
#include "nodes/pg_list.h"
extern Oid get_tde_table_am_oid(void);
extern Oid get_tde2_table_am_oid(void);
extern List *get_all_tde_tables(void);
extern int get_tde_tables_count(void);
extern const char *extract_json_cstr(Datum json, const char* field_name);
const char *extract_json_option_value(Datum top_json, const char* field_name);
#endif /*PG_TDE_UTILS_H*/
#endif /*PG_TDE_UTILS_H*/

@ -22,6 +22,7 @@
//#define ENCRYPTION_DEBUG 1
//#define KEYRING_DEBUG 1
//#define TDE_FORK_DEBUG 1
// #define TDE_XLOG_DEBUG 1
#define pg_tde_fill_tuple heap_fill_tuple
#define pg_tde_form_tuple heap_form_tuple
@ -37,8 +38,6 @@
#define pgstat_count_pg_tde_insert pgstat_count_heap_insert
#define pg_tde_getattr heap_getattr
#define GetPGTdeamTableAmRoutine GetHeapamTableAmRoutine
#define TDE_PageAddItem(rel, oid, blkno, page, item, size, offsetNumber, overwrite, is_heap) \
PGTdePageAddItemExtended(rel, oid, blkno, page, item, size, offsetNumber, \
((overwrite) ? PAI_OVERWRITE : 0) | \

@ -0,0 +1,33 @@
/*-------------------------------------------------------------------------
*
* pg_tde_event_capture.h
*
*-------------------------------------------------------------------------
*/
#ifndef PG_TDE_EVENT_CAPTURE_H
#define PG_TDE_EVENT_CAPTURE_H
#include "postgres.h"
#include "nodes/parsenodes.h"
typedef enum TdeCreateEventType
{
TDE_UNKNOWN_CREATE_EVENT,
TDE_TABLE_CREATE_EVENT,
TDE_INDEX_CREATE_EVENT
} TdeCreateEventType;
typedef struct TdeCreateEvent
{
TdeCreateEventType eventType; /* DDL statement type */
bool encryptMode; /* true when the table uses encryption */
Oid baseTableOid; /* Oid of table on which index is being
* created on. For create table statement this
* contains InvalidOid */
RangeVar *relation; /* Reference to the parsed relation from
* create statement */
} TdeCreateEvent;
extern TdeCreateEvent * GetCurrentTdeCreateEvent(void);
#endif

@ -0,0 +1,4 @@
#pragma once
extern void RegisterStorageMgr();

Binary file not shown.

@ -1,11 +1,11 @@
/*-------------------------------------------------------------------------
*
* keyring_file.c
* Implements the file provider keyring
* routines.
* Implements the file provider keyring
* routines.
*
* IDENTIFICATION
* contrib/pg_tde/src/keyring/keyring_file.c
* contrib/pg_tde/src/keyring/keyring_file.c
*
*-------------------------------------------------------------------------
*/
@ -40,21 +40,21 @@ static keyInfo*
get_key_by_name(GenericKeyring* keyring, const char* key_name, bool throw_error, KeyringReturnCodes *return_code)
{
keyInfo* key = NULL;
File file = -1;
int fd = -1;
FileKeyring* file_keyring = (FileKeyring*)keyring;
off_t bytes_read = 0;
off_t curr_pos = 0;
*return_code = KEYRING_CODE_SUCCESS;
file = PathNameOpenFile(file_keyring->file_name, PG_BINARY);
if (file < 0)
fd = BasicOpenFile(file_keyring->file_name, PG_BINARY);
if (fd < 0)
return NULL;
key = palloc(sizeof(keyInfo));
while(true)
{
bytes_read = FileRead(file, key, sizeof(keyInfo), curr_pos, WAIT_EVENT_DATA_FILE_READ);
bytes_read = pg_pread(fd, key, sizeof(keyInfo), curr_pos);
curr_pos += bytes_read;
if (bytes_read == 0 )
@ -62,13 +62,13 @@ get_key_by_name(GenericKeyring* keyring, const char* key_name, bool throw_error,
/*
* Empty keyring file is considered as a valid keyring file that has no keys
*/
FileClose(file);
close(fd);
pfree(key);
return NULL;
}
if (bytes_read != sizeof(keyInfo))
{
FileClose(file);
close(fd);
pfree(key);
/* Corrupt file */
*return_code = KEYRING_CODE_DATA_CORRUPTED;
@ -81,21 +81,21 @@ get_key_by_name(GenericKeyring* keyring, const char* key_name, bool throw_error,
}
if (strncasecmp(key->name.name, key_name, sizeof(key->name.name)) == 0)
{
FileClose(file);
close(fd);
return key;
}
}
FileClose(file);
close(fd);
pfree(key);
return NULL;
return NULL;
}
static KeyringReturnCodes
set_key_by_name(GenericKeyring* keyring, keyInfo *key, bool throw_error)
{
off_t bytes_written = 0;
off_t bytes_written = 0;
off_t curr_pos = 0;
File file;
int fd;
FileKeyring* file_keyring = (FileKeyring*)keyring;
keyInfo *existing_key;
KeyringReturnCodes return_code = KEYRING_CODE_SUCCESS;
@ -111,26 +111,35 @@ set_key_by_name(GenericKeyring* keyring, keyInfo *key, bool throw_error)
return KEYRING_CODE_INVALID_OPERATION;
}
file = PathNameOpenFile(file_keyring->file_name, O_CREAT | O_RDWR | PG_BINARY);
if (file < 0)
{
fd = BasicOpenFile(file_keyring->file_name, O_CREAT | O_RDWR | PG_BINARY);
if (fd < 0)
{
ereport(throw_error?ERROR:WARNING,
(errcode_for_file_access(),
errmsg("Failed to open keyring file %s :%m", file_keyring->file_name)));
return KEYRING_CODE_RESOURCE_NOT_ACCESSABLE;
}
return KEYRING_CODE_RESOURCE_NOT_ACCESSABLE;
}
/* Write key to the end of file */
curr_pos = FileSize(file);
bytes_written = FileWrite(file, key, sizeof(keyInfo), curr_pos, WAIT_EVENT_DATA_FILE_WRITE);
curr_pos = lseek(fd, 0, SEEK_END);
bytes_written = pg_pwrite(fd, key, sizeof(keyInfo), curr_pos);
if (bytes_written != sizeof(keyInfo))
{
FileClose(file);
ereport(throw_error?ERROR:WARNING,
(errcode_for_file_access(),
errmsg("keyring file \"%s\" can't be written: %m",
file_keyring->file_name)));
return KEYRING_CODE_RESOURCE_NOT_ACCESSABLE;
}
FileClose(file);
{
close(fd);
ereport(throw_error?ERROR:WARNING,
(errcode_for_file_access(),
errmsg("keyring file \"%s\" can't be written: %m",
file_keyring->file_name)));
return KEYRING_CODE_RESOURCE_NOT_ACCESSABLE;
}
if (pg_fsync(fd) != 0)
{
close(fd);
ereport(throw_error?ERROR:WARNING,
(errcode_for_file_access(),
errmsg("could not fsync file \"%s\": %m",
file_keyring->file_name)));
return KEYRING_CODE_RESOURCE_NOT_ACCESSABLE;
}
close(fd);
return KEYRING_CODE_SUCCESS;
}

@ -31,6 +31,10 @@
#include "keyring/keyring_vault.h"
#include "utils/builtins.h"
#include "pg_tde_defs.h"
#include "smgr/pg_tde_smgr.h"
#ifdef PERCONA_FORK
#include "catalog/tde_global_catalog.h"
#endif
#define MAX_ON_INSTALLS 5
@ -59,6 +63,11 @@ tde_shmem_request(void)
{
Size sz = TdeRequiredSharedMemorySize();
int required_locks = TdeRequiredLocksCount();
#ifdef PERCONA_FORK
sz = add_size(sz, XLOG_TDE_ENC_BUFF_ALIGNED_SIZE);
#endif
if (prev_shmem_request_hook)
prev_shmem_request_hook();
RequestAddinShmemSpace(sz);
@ -74,6 +83,14 @@ tde_shmem_startup(void)
TdeShmemInit();
AesInit();
#ifdef PERCONA_FORK
TDEGlCatShmemInit();
TDEGlCatKeyInit();
TDEXLogShmemInit();
TDEXLogSmgrInit();
#endif
}
void
@ -86,7 +103,10 @@ _PG_init(void)
keyringRegisterVariables();
InitializeMasterKeyInfo();
#ifdef PERCONA_FORK
XLogInitGUC();
TDEGlCatInitGUC();
#endif
prev_shmem_request_hook = shmem_request_hook;
shmem_request_hook = tde_shmem_request;
prev_shmem_startup_hook = shmem_startup_hook;
@ -98,6 +118,8 @@ _PG_init(void)
InstallFileKeyring();
InstallVaultV2Keyring();
RegisterCustomRmgr(RM_TDERMGR_ID, &pg_tde_rmgr);
RegisterStorageMgr();
}
Datum pg_tde_extension_initialize(PG_FUNCTION_ARGS)

@ -0,0 +1,147 @@
/*-------------------------------------------------------------------------
*
* pg_tde_event_capture.c
* event trigger logic to identify if we are creating the encrypted table or not.
*
* IDENTIFICATION
* contrib/pg_tde/src/pg_tde_event_trigger.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "funcapi.h"
#include "fmgr.h"
#include "utils/rel.h"
#include "utils/builtins.h"
#include "catalog/pg_class.h"
#include "access/table.h"
#include "catalog/pg_event_trigger.h"
#include "catalog/namespace.h"
#include "commands/event_trigger.h"
#include "common/pg_tde_utils.h"
#include "pg_tde_event_capture.h"
/* Global variable that gets set at ddl start and cleard out at ddl end*/
TdeCreateEvent tdeCurrentCreateEvent = {.relation = NULL};
static void reset_current_tde_create_event(void);
PG_FUNCTION_INFO_V1(pg_tde_ddl_command_start_capture);
PG_FUNCTION_INFO_V1(pg_tde_ddl_command_end_capture);
TdeCreateEvent *
GetCurrentTdeCreateEvent(void)
{
return &tdeCurrentCreateEvent;
}
/*
* pg_tde_ddl_command_start_capture is an event trigger function triggered
* at the start of any DDL command execution.
*
* The function specifically focuses on CREATE INDEX and CREATE TABLE statements,
* aiming to determine if the create table or the table on which an index is being created
* utilizes the pg_tde access method for encryption.
* Once it confirms the table's encryption requirement or usage,
* it updates the table information in the tdeCurrentCreateEvent global variable.
* This information can be accessed by SMGR or any other component
* during the execution of this DDL statement.
*/
Datum
pg_tde_ddl_command_start_capture(PG_FUNCTION_ARGS)
{
/* TODO: verify update_compare_indexes failure related to this */
#ifdef PERCONA_FORK
EventTriggerData *trigdata;
Node *parsetree;
/* Ensure this function is being called as an event trigger */
if (!CALLED_AS_EVENT_TRIGGER(fcinfo)) /* internal error */
ereport(ERROR,
(errmsg("Function can only be fired by event trigger manager")));
trigdata = (EventTriggerData *) fcinfo->context;
parsetree = trigdata->parsetree;
elog(DEBUG2, "EVENT TRIGGER (%s) %s", trigdata->event, nodeToString(parsetree));
reset_current_tde_create_event();
if (IsA(parsetree, IndexStmt))
{
IndexStmt *stmt = (IndexStmt *) parsetree;
Oid relationId = RangeVarGetRelid(stmt->relation, NoLock, true);
tdeCurrentCreateEvent.eventType = TDE_INDEX_CREATE_EVENT;
tdeCurrentCreateEvent.baseTableOid = relationId;
tdeCurrentCreateEvent.relation = stmt->relation;
if (relationId != InvalidOid)
{
LOCKMODE lockmode = AccessShareLock; /* TODO. Verify lock mode? */
Relation rel = table_open(relationId, lockmode);
if (rel->rd_rel->relam == get_tde_table_am_oid())
{
/* We are creating the index on encrypted table */
/* set the global state */
tdeCurrentCreateEvent.encryptMode = true;
}
else
table_close(rel, lockmode);
}
else
ereport(DEBUG1, (errmsg("Failed to get relation Oid for relation:%s", stmt->relation->relname)));
}
else if (IsA(parsetree, CreateStmt))
{
CreateStmt *stmt = (CreateStmt *) parsetree;
tdeCurrentCreateEvent.eventType = TDE_TABLE_CREATE_EVENT;
tdeCurrentCreateEvent.relation = stmt->relation;
if (stmt->accessMethod && !strcmp(stmt->accessMethod, "pg_tde2"))
{
tdeCurrentCreateEvent.encryptMode = true;
}
}
#endif
PG_RETURN_NULL();
}
/*
* trigger function called at the end of DDL statement execution.
* It just clears the tdeCurrentCreateEvent global variable.
*/
Datum
pg_tde_ddl_command_end_capture(PG_FUNCTION_ARGS)
{
#ifdef PERCONA_FORK
/* Ensure this function is being called as an event trigger */
if (!CALLED_AS_EVENT_TRIGGER(fcinfo)) /* internal error */
ereport(ERROR,
(errmsg("Function can only be fired by event trigger manager")));
elog(DEBUG1, "Type:%s EncryptMode:%s, Oid:%d, Relation:%s ",
(tdeCurrentCreateEvent.eventType == TDE_INDEX_CREATE_EVENT) ? "CREATE INDEX" :
(tdeCurrentCreateEvent.eventType == TDE_TABLE_CREATE_EVENT) ? "CREATE TABLE" : "UNKNOWN",
tdeCurrentCreateEvent.encryptMode ? "true" : "false",
tdeCurrentCreateEvent.baseTableOid,
tdeCurrentCreateEvent.relation ? tdeCurrentCreateEvent.relation->relname : "UNKNOWN");
/* All we need to do is to clear the event state */
reset_current_tde_create_event();
#endif
PG_RETURN_NULL();
}
static void
reset_current_tde_create_event(void)
{
tdeCurrentCreateEvent.encryptMode = false;
tdeCurrentCreateEvent.eventType = TDE_UNKNOWN_CREATE_EVENT;
tdeCurrentCreateEvent.baseTableOid = InvalidOid;
tdeCurrentCreateEvent.relation = NULL;
}

@ -0,0 +1,213 @@
#include "smgr/pg_tde_smgr.h"
#include "postgres.h"
#include "storage/smgr.h"
#include "storage/md.h"
#include "catalog/catalog.h"
#include "encryption/enc_aes.h"
#include "access/pg_tde_tdemap.h"
#include "pg_tde_event_capture.h"
#ifdef PERCONA_FORK
// TODO: implement proper IV
// iv should be based on blocknum + relfile, available in the API
static char iv[16] = {0,};
static RelKeyData*
tde_smgr_get_key(SMgrRelation reln)
{
// TODO: This recursion counter is a dirty hack until the metadata is in the catalog
// As otherwise we would call GetMasterKey recursively and deadlock
static int recursion = 0;
if(IsCatalogRelationOid(reln->smgr_rlocator.locator.relNumber))
{
// do not try to encrypt/decrypt catalog tables
return NULL;
}
if(recursion != 0)
{
return NULL;
}
recursion++;
if(GetMasterKey(reln->smgr_rlocator.locator.relNumber, reln->smgr_rlocator.locator.spcOid, NULL)==NULL)
{
recursion--;
return NULL;
}
TdeCreateEvent* event = GetCurrentTdeCreateEvent();
// if this is a CREATE TABLE, we have to generate the key
if(event->encryptMode == true && event->eventType == TDE_TABLE_CREATE_EVENT)
{
recursion--;
return pg_tde_create_key_map_entry(&reln->smgr_rlocator.locator);
}
// if this is a CREATE INDEX, we have to load the key based on the table
if(event->encryptMode == true && event->eventType == TDE_INDEX_CREATE_EVENT)
{
// For now keep it simple and create separate key for indexes
// Later we might modify the map infrastructure to support the same keys
recursion--;
return pg_tde_create_key_map_entry(&reln->smgr_rlocator.locator);
}
// otherwise, see if we have a key for the relation, and return if yes
RelKeyData* rkd = GetRelationKey(reln->smgr_rlocator.locator);
recursion--;
return rkd;
}
void
tde_mdwritev(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
const void **buffers, BlockNumber nblocks, bool skipFsync)
{
AesInit();
char* local_blocks = malloc( BLCKSZ * (nblocks+1) );
char* local_blocks_aligned = (char*)TYPEALIGN(PG_IO_ALIGN_SIZE, local_blocks);
const void** local_buffers = malloc ( sizeof(void*) * nblocks );
RelKeyData* rkd = tde_smgr_get_key(reln);
if(rkd == NULL)
{
mdwritev(reln, forknum, blocknum, buffers, nblocks, skipFsync);
return;
}
for(int i = 0; i < nblocks; ++i )
{
local_buffers[i] = &local_blocks_aligned[i*BLCKSZ];
int out_len = BLCKSZ;
AesEncrypt(rkd->internal_key.key, iv, ((char**)buffers)[i], BLCKSZ, local_buffers[i], &out_len);
}
mdwritev(reln, forknum, blocknum,
local_buffers, nblocks, skipFsync);
free(local_blocks);
free(local_buffers);
}
void
tde_mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
const void *buffer, bool skipFsync)
{
AesInit();
char* local_blocks = malloc( BLCKSZ * (1+1) );
char* local_blocks_aligned = (char*)TYPEALIGN(PG_IO_ALIGN_SIZE, local_blocks);
RelKeyData* rkd = tde_smgr_get_key(reln);
if(rkd == NULL)
{
mdextend(reln, forknum, blocknum, buffer, skipFsync);
return;
}
int out_len = BLCKSZ;
AesEncrypt(rkd->internal_key.key, iv, ((char*)buffer), BLCKSZ, local_blocks_aligned, &out_len);
mdextend(reln, forknum, blocknum, local_blocks_aligned, skipFsync);
free(local_blocks);
}
void
tde_mdreadv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
void **buffers, BlockNumber nblocks)
{
AesInit();
mdreadv(reln, forknum, blocknum, buffers, nblocks);
RelKeyData* rkd = tde_smgr_get_key(reln);
if(rkd == NULL)
{
return;
}
for(int i = 0; i < nblocks; ++i)
{
bool allZero = true;
for(int j = 0; j < 32; ++j)
{
if(((char**)buffers)[i][j] != 0)
{
// Postgres creates all zero blocks in an optimized route, which we do not try
// to encrypt.
// Instead we detect if a block is all zero at decryption time, and
// leave it as is.
// This could be a security issue later, but it is a good first prototype
allZero = false;
break;
}
}
if(allZero) continue;
int out_len = BLCKSZ;
AesDecrypt(rkd->internal_key.key, iv, ((char**)buffers)[i], BLCKSZ, ((char**)buffers)[i], &out_len);
}
}
void
tde_mdcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo)
{
// This is the only function that gets called during actual CREATE TABLE/INDEX (EVENT TRIGGER)
// so we create the key here by loading it
// Later calls then decide to encrypt or not based on the existence of the key
tde_smgr_get_key(reln);
return mdcreate(reln, forknum, isRedo);
}
static SMgrId tde_smgr_id;
static const struct f_smgr tde_smgr = {
.name = "tde",
.smgr_init = mdinit,
.smgr_shutdown = NULL,
.smgr_open = mdopen,
.smgr_close = mdclose,
.smgr_create = tde_mdcreate,
.smgr_exists = mdexists,
.smgr_unlink = mdunlink,
.smgr_extend = tde_mdextend,
.smgr_zeroextend = mdzeroextend,
.smgr_prefetch = mdprefetch,
.smgr_readv = tde_mdreadv,
.smgr_writev = tde_mdwritev,
.smgr_writeback = mdwriteback,
.smgr_nblocks = mdnblocks,
.smgr_truncate = mdtruncate,
.smgr_immedsync = mdimmedsync,
};
void RegisterStorageMgr()
{
tde_smgr_id = smgr_register(&tde_smgr, 0);
// TODO: figure out how this part should work in a real extension
storage_manager_id = tde_smgr_id;
}
#else
void RegisterStorageMgr()
{
}
#endif /* PERCONA_FORK */

@ -52,8 +52,6 @@ pg_tde_xact_callback(XactEvent event, void *arg)
{
pending_delete_cleanup();
}
pg_tde_cleanup_path_vars();
}
void

@ -1,13 +0,0 @@
CREATE EXTENSION pg_tde;
-- server restart
CREATE TABLE test_enc(id SERIAL,k INTEGER,PRIMARY KEY (id)) USING pg_tde;
INSERT INTO test_enc (k) VALUES (5),(6);
SELECT * FROM test_enc ORDER BY id ASC;
1|5
2|6
-- server restart
SELECT * FROM test_enc ORDER BY id ASC;
1|5
2|6
DROP TABLE test_enc;
DROP EXTENSION pg_tde;
Loading…
Cancel
Save