Merge pull request #227 from dutow/smgrmerge

Merge the smgr branch back to main
pull/209/head
Zsolt Parragi 1 year ago committed by GitHub
commit a21bfac9b6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
  1. 3
      Makefile.in
  2. 4
      meson.build
  3. 27
      pg_tde--1.0.sql
  4. 12
      src/access/pg_tde_prune.c
  5. 504
      src/access/pg_tde_tdemap.c
  6. 7
      src/access/pg_tde_vacuumlazy.c
  7. 294
      src/access/pg_tde_xlog.c
  8. 25
      src/access/pg_tdeam.c
  9. 10
      src/access/pg_tdeam_handler.c
  10. 248
      src/catalog/tde_global_catalog.c
  11. 3
      src/catalog/tde_keyring.c
  12. 130
      src/catalog/tde_master_key.c
  13. 6
      src/common/pg_tde_utils.c
  14. 17
      src/include/access/pg_tde_tdemap.h
  15. 29
      src/include/access/pg_tde_xlog.h
  16. 3
      src/include/access/pg_tdeam.h
  17. 41
      src/include/catalog/tde_global_catalog.h
  18. 7
      src/include/catalog/tde_keyring.h
  19. 10
      src/include/catalog/tde_master_key.h
  20. 1
      src/include/common/pg_tde_utils.h
  21. 3
      src/include/pg_tde_defines.h
  22. 33
      src/include/pg_tde_event_capture.h
  23. 4
      src/include/smgr/pg_tde_smgr.h
  24. BIN
      src/keyring/.keyring_api.c.swp
  25. 69
      src/keyring/keyring_file.c
  26. 24
      src/pg_tde.c
  27. 147
      src/pg_tde_event_capture.c
  28. 213
      src/smgr/pg_tde_smgr.c
  29. 2
      src/transam/pg_tde_xact_handler.c
  30. 13
      t/results/001_basic.out

@ -42,11 +42,14 @@ src/keyring/keyring_curl.o \
src/keyring/keyring_file.o \ src/keyring/keyring_file.o \
src/keyring/keyring_vault.o \ src/keyring/keyring_vault.o \
src/keyring/keyring_api.o \ src/keyring/keyring_api.o \
src/catalog/tde_global_catalog.o \
src/catalog/tde_keyring.o \ src/catalog/tde_keyring.o \
src/catalog/tde_master_key.o \ src/catalog/tde_master_key.o \
src/common/pg_tde_shmem.o \ src/common/pg_tde_shmem.o \
src/common/pg_tde_utils.o \ src/common/pg_tde_utils.o \
src/smgr/pg_tde_smgr.o \
src/pg_tde_defs.o \ src/pg_tde_defs.o \
src/pg_tde_event_capture.o \
src/pg_tde.o src/pg_tde.o
override PG_CPPFLAGS += @tde_CPPFLAGS@ override PG_CPPFLAGS += @tde_CPPFLAGS@

@ -39,12 +39,16 @@ pg_tde_sources = files(
'src/keyring/keyring_vault.c', 'src/keyring/keyring_vault.c',
'src/keyring/keyring_api.c', 'src/keyring/keyring_api.c',
'src/smgr/pg_tde_smgr.c',
'src/catalog/tde_global_catalog.c',
'src/catalog/tde_keyring.c', 'src/catalog/tde_keyring.c',
'src/catalog/tde_master_key.c', 'src/catalog/tde_master_key.c',
'src/common/pg_tde_shmem.c', 'src/common/pg_tde_shmem.c',
'src/common/pg_tde_utils.c', 'src/common/pg_tde_utils.c',
'src/pg_tde_defs.c', 'src/pg_tde_defs.c',
'src/pg_tde.c', 'src/pg_tde.c',
'src/pg_tde_event_capture.c',
) )
incdir = include_directories('src/include', '.') incdir = include_directories('src/include', '.')

@ -87,6 +87,12 @@ RETURNS table_am_handler
AS 'MODULE_PATHNAME' AS 'MODULE_PATHNAME'
LANGUAGE C; LANGUAGE C;
-- Table access method
CREATE FUNCTION pg_tde2am_handler(internal)
RETURNS table_am_handler
AS 'MODULE_PATHNAME'
LANGUAGE C;
CREATE FUNCTION pgtde_is_encrypted(table_name VARCHAR) CREATE FUNCTION pgtde_is_encrypted(table_name VARCHAR)
RETURNS boolean RETURNS boolean
AS $$ AS $$
@ -129,5 +135,26 @@ CREATE FUNCTION pg_tde_version() RETURNS TEXT AS 'MODULE_PATHNAME' LANGUAGE C;
CREATE ACCESS METHOD pg_tde TYPE TABLE HANDLER pg_tdeam_handler; CREATE ACCESS METHOD pg_tde TYPE TABLE HANDLER pg_tdeam_handler;
COMMENT ON ACCESS METHOD pg_tde IS 'pg_tde table access method'; COMMENT ON ACCESS METHOD pg_tde IS 'pg_tde table access method';
CREATE ACCESS METHOD pg_tde2 TYPE TABLE HANDLER pg_tde2am_handler;
COMMENT ON ACCESS METHOD pg_tde2 IS 'pg_tde2 table access method';
-- Per database extension initialization -- Per database extension initialization
SELECT pg_tde_extension_initialize(); SELECT pg_tde_extension_initialize();
CREATE OR REPLACE FUNCTION pg_tde_ddl_command_start_capture()
RETURNS event_trigger
AS 'MODULE_PATHNAME'
LANGUAGE C;
CREATE OR REPLACE FUNCTION pg_tde_ddl_command_end_capture()
RETURNS event_trigger
AS 'MODULE_PATHNAME'
LANGUAGE C;
CREATE EVENT TRIGGER pg_tde_trigger_create_index
ON ddl_command_start
EXECUTE FUNCTION pg_tde_ddl_command_start_capture();
CREATE EVENT TRIGGER pg_tde_trigger_create_index_2
ON ddl_command_end
EXECUTE FUNCTION pg_tde_ddl_command_end_capture();

@ -127,6 +127,7 @@ pg_tde_page_prune_opt(Relation relation, Buffer buffer)
if (RecoveryInProgress()) if (RecoveryInProgress())
return; return;
#if PG_VERSION_NUM < 170000
/* /*
* XXX: Magic to keep old_snapshot_threshold tests appear "working". They * XXX: Magic to keep old_snapshot_threshold tests appear "working". They
* currently are broken, and discussion of what to do about them is * currently are broken, and discussion of what to do about them is
@ -135,7 +136,7 @@ pg_tde_page_prune_opt(Relation relation, Buffer buffer)
*/ */
if (old_snapshot_threshold == 0) if (old_snapshot_threshold == 0)
SnapshotTooOldMagicForTest(); SnapshotTooOldMagicForTest();
#endif
/* /*
* First check whether there's any chance there's something to prune, * First check whether there's any chance there's something to prune,
* determining the appropriate horizon is a waste if there's no prune_xid * determining the appropriate horizon is a waste if there's no prune_xid
@ -166,14 +167,14 @@ pg_tde_page_prune_opt(Relation relation, Buffer buffer)
if (!GlobalVisTestIsRemovableXid(vistest, prune_xid)) if (!GlobalVisTestIsRemovableXid(vistest, prune_xid))
{ {
if (!OldSnapshotThresholdActive()) #if PG_VERSION_NUM < 170000
if ( !OldSnapshotThresholdActive())
return; return;
if (!TransactionIdLimitedForOldSnapshots(GlobalVisTestNonRemovableHorizon(vistest), if (!TransactionIdLimitedForOldSnapshots(GlobalVisTestNonRemovableHorizon(vistest),
relation, relation,
&limited_xmin, &limited_ts)) &limited_xmin, &limited_ts))
return; return;
#endif
if (!TransactionIdPrecedes(prune_xid, limited_xmin)) if (!TransactionIdPrecedes(prune_xid, limited_xmin))
return; return;
} }
@ -539,6 +540,7 @@ heap_prune_satisfies_vacuum(PruneState *prstate, HeapTuple tup, Buffer buffer)
*/ */
if (GlobalVisTestIsRemovableXid(prstate->vistest, dead_after)) if (GlobalVisTestIsRemovableXid(prstate->vistest, dead_after))
res = HEAPTUPLE_DEAD; res = HEAPTUPLE_DEAD;
#if PG_VERSION_NUM < 170000
else if (OldSnapshotThresholdActive()) else if (OldSnapshotThresholdActive())
{ {
/* haven't determined limited horizon yet, requests */ /* haven't determined limited horizon yet, requests */
@ -566,7 +568,7 @@ heap_prune_satisfies_vacuum(PruneState *prstate, HeapTuple tup, Buffer buffer)
res = HEAPTUPLE_DEAD; res = HEAPTUPLE_DEAD;
} }
} }
#endif
return res; return res;
} }

File diff suppressed because it is too large Load Diff

@ -2828,8 +2828,11 @@ should_attempt_truncation(LVRelState *vacrel)
{ {
BlockNumber possibly_freeable; BlockNumber possibly_freeable;
if (!vacrel->do_rel_truncate || VacuumFailsafeActive || if (!vacrel->do_rel_truncate || VacuumFailsafeActive
old_snapshot_threshold >= 0) #if PG_VERSION_NUM < 170000
|| old_snapshot_threshold >= 0
#endif
)
return false; return false;
possibly_freeable = vacrel->rel_pages - vacrel->nonempty_pages; possibly_freeable = vacrel->rel_pages - vacrel->nonempty_pages;

@ -12,13 +12,34 @@
#include "postgres.h" #include "postgres.h"
#include "pg_tde_defines.h"
#include "access/xlog.h" #include "access/xlog.h"
#include "access/xlog_internal.h" #include "access/xlog_internal.h"
#include "access/xloginsert.h" #include "access/xloginsert.h"
#include "catalog/pg_tablespace_d.h"
#include "storage/bufmgr.h"
#include "storage/shmem.h"
#include "utils/guc.h"
#include "utils/memutils.h"
#include "access/pg_tde_tdemap.h" #include "access/pg_tde_tdemap.h"
#include "access/pg_tde_xlog.h" #include "access/pg_tde_xlog.h"
#include "catalog/tde_master_key.h" #include "encryption/enc_tde.h"
#ifdef PERCONA_FORK
#include "catalog/tde_global_catalog.h"
static char *TDEXLogEncryptBuf = NULL;
/* GUC */
static bool EncryptXLog = false;
static XLogPageHeaderData EncryptCurrentPageHrd;
static XLogPageHeaderData DecryptCurrentPageHrd;
static ssize_t TDEXLogWriteEncryptedPages(int fd, const void *buf, size_t count, off_t offset);
static void SetXLogPageIVPrefix(TimeLineID tli, XLogRecPtr lsn, char* iv_prefix);
static int XLOGChooseNumBuffers(void);
#endif
/* /*
* TDE fork XLog * TDE fork XLog
@ -103,3 +124,274 @@ pg_tde_rmgr_identify(uint8 info)
return NULL; return NULL;
} }
#ifdef PERCONA_FORK
/*
* -------------------------
* XLog Storage Manager
*/
void
XLogInitGUC(void)
{
DefineCustomBoolVariable("pg_tde.wal_encrypt", /* name */
"Enable/Disable encryption of WAL.", /* short_desc */
NULL, /* long_desc */
&EncryptXLog, /* value address */
false, /* boot value */
PGC_POSTMASTER, /* context */
0, /* flags */
NULL, /* check_hook */
NULL, /* assign_hook */
NULL /* show_hook */
);
}
static int
XLOGChooseNumBuffers(void)
{
int xbuffers;
xbuffers = NBuffers / 32;
if (xbuffers > (wal_segment_size / XLOG_BLCKSZ))
xbuffers = (wal_segment_size / XLOG_BLCKSZ);
if (xbuffers < 8)
xbuffers = 8;
return xbuffers;
}
/*
* Defines the size of the XLog encryption buffer
*/
Size
TDEXLogEncryptBuffSize(void)
{
int xbuffers;
xbuffers = (XLOGbuffers == -1) ? XLOGChooseNumBuffers() : XLOGbuffers;
return (Size) XLOG_BLCKSZ * xbuffers;
}
/*
* Alloc memory for the encryption buffer.
*
* It should fit XLog buffers (XLOG_BLCKSZ * wal_buffers). We can't
* (re)alloc this buf in pg_tde_xlog_seg_write() based on the write size as
* it's called in the CRIT section, hence no allocations are allowed.
*
* Access to this buffer happens during XLogWrite() call which should
* be called with WALWriteLock held, hence no need in extra locks.
*/
void
TDEXLogShmemInit(void)
{
bool foundBuf;
if (EncryptXLog)
{
TDEXLogEncryptBuf = (char *)
TYPEALIGN(PG_IO_ALIGN_SIZE,
ShmemInitStruct("TDE XLog Encryption Buffer",
XLOG_TDE_ENC_BUFF_ALIGNED_SIZE,
&foundBuf));
elog(DEBUG1, "pg_tde: initialized encryption buffer %lu bytes", XLOG_TDE_ENC_BUFF_ALIGNED_SIZE);
}
}
void
TDEXLogSmgrInit(void)
{
SetXLogSmgr(&tde_xlog_smgr);
}
ssize_t
pg_tde_xlog_seg_write(int fd, const void *buf, size_t count, off_t offset)
{
if (EncryptXLog)
return TDEXLogWriteEncryptedPages(fd, buf, count, offset);
else
return pg_pwrite(fd, buf, count, offset);
}
/*
* Encrypt XLog page(s) from the buf and write to the segment file.
*/
static ssize_t
TDEXLogWriteEncryptedPages(int fd, const void *buf, size_t count, off_t offset)
{
char iv_prefix[16] = {0,};
size_t data_size = 0;
XLogPageHeader curr_page_hdr = &EncryptCurrentPageHrd;
XLogPageHeader enc_buf_page;
RelKeyData *key = GetGlCatInternalKey(XLOG_TDE_OID);
off_t enc_off;
size_t page_size = XLOG_BLCKSZ - offset % XLOG_BLCKSZ;
uint32 iv_ctr = 0;
#ifdef TDE_XLOG_DEBUG
elog(DEBUG1, "write encrypted WAL, pages amount: %d, size: %lu offset: %ld", count / (Size) XLOG_BLCKSZ, count, offset);
#endif
/*
* Go through the buf page-by-page and encrypt them.
* We may start or finish writing from/in the middle of the page
* (walsender or `full_page_writes = off`). So preserve a page header
* for the IV init data.
*
* TODO: check if walsender restarts form the beggining of the page
* in case of the crash.
*/
for (enc_off = 0; enc_off < count;)
{
data_size = Min(page_size, count);
if (page_size == XLOG_BLCKSZ)
{
memcpy((char *) curr_page_hdr, (char *) buf + enc_off, SizeOfXLogShortPHD);
/*
* Need to use a separate buf for the encryption so the page remains non-crypted
* in the XLog buf (XLogInsert has to have access to records' lsn).
*/
enc_buf_page = (XLogPageHeader) (TDEXLogEncryptBuf + enc_off);
memcpy((char *) enc_buf_page, (char *) buf + enc_off, (Size) XLogPageHeaderSize(curr_page_hdr));
enc_buf_page->xlp_info |= XLP_ENCRYPTED;
enc_off += XLogPageHeaderSize(curr_page_hdr);
data_size -= XLogPageHeaderSize(curr_page_hdr);
/* it's a beginning of the page */
iv_ctr = 0;
}
else
{
/* we're in the middle of the page */
iv_ctr = (offset % XLOG_BLCKSZ) - XLogPageHeaderSize(curr_page_hdr);
}
if (data_size + enc_off > count)
{
data_size = count - enc_off;
}
/*
* The page is zeroed (no data), no sense to enctypt.
* This may happen when base_backup or other requests XLOG SWITCH and
* some pages in XLog buffer still not used.
*/
if (curr_page_hdr->xlp_magic == 0)
{
/* ensure all the page is {0} */
Assert((*((char *) buf + enc_off) == 0) &&
memcmp((char *) buf + enc_off, (char *) buf + enc_off + 1, data_size - 1) == 0);
memcpy((char *) enc_buf_page, (char *) buf + enc_off, data_size);
}
else
{
SetXLogPageIVPrefix(curr_page_hdr->xlp_tli, curr_page_hdr->xlp_pageaddr, iv_prefix);
PG_TDE_ENCRYPT_DATA(iv_prefix, iv_ctr, (char *) buf + enc_off, data_size,
TDEXLogEncryptBuf + enc_off, key);
}
page_size = XLOG_BLCKSZ;
enc_off += data_size;
}
return pg_pwrite(fd, TDEXLogEncryptBuf, count, offset);
}
/*
* Read the XLog pages from the segment file and dectypt if need.
*/
ssize_t
pg_tde_xlog_seg_read(int fd, void *buf, size_t count, off_t offset)
{
ssize_t readsz;
char iv_prefix[16] = {0,};
size_t data_size = 0;
XLogPageHeader curr_page_hdr = &DecryptCurrentPageHrd;
RelKeyData *key = GetGlCatInternalKey(XLOG_TDE_OID);
size_t page_size = XLOG_BLCKSZ - offset % XLOG_BLCKSZ;
off_t dec_off;
uint32 iv_ctr = 0;
#ifdef TDE_XLOG_DEBUG
elog(DEBUG1, "read from a WAL segment, pages amount: %d, size: %lu offset: %ld", count / (Size) XLOG_BLCKSZ, count, offset);
#endif
readsz = pg_pread(fd, buf, count, offset);
/*
* Read the buf page by page and decypt ecnrypted pages.
* We may start or fihish reading from/in the middle of the page (walreceiver)
* in such a case we should preserve the last read page header for
* the IV data and the encryption state.
*
* TODO: check if walsender/receiver restarts form the beggining of the page
* in case of the crash.
*/
for (dec_off = 0; dec_off < readsz;)
{
data_size = Min(page_size, readsz);
if (page_size == XLOG_BLCKSZ)
{
memcpy((char *) curr_page_hdr, (char *) buf + dec_off, SizeOfXLogShortPHD);
/* set the flag to "not encrypted" for the walreceiver */
((XLogPageHeader) ((char *) buf + dec_off))->xlp_info &= ~XLP_ENCRYPTED;
Assert(curr_page_hdr->xlp_magic == XLOG_PAGE_MAGIC || curr_page_hdr->xlp_magic == 0);
dec_off += XLogPageHeaderSize(curr_page_hdr);
data_size -= XLogPageHeaderSize(curr_page_hdr);
/* it's a beginning of the page */
iv_ctr = 0;
}
else
{
/* we're in the middle of the page */
iv_ctr = (offset % XLOG_BLCKSZ) - XLogPageHeaderSize(curr_page_hdr);
}
if ((data_size + dec_off) > readsz)
{
data_size = readsz - dec_off;
}
if (curr_page_hdr->xlp_info & XLP_ENCRYPTED)
{
SetXLogPageIVPrefix(curr_page_hdr->xlp_tli, curr_page_hdr->xlp_pageaddr, iv_prefix);
PG_TDE_DECRYPT_DATA(
iv_prefix, iv_ctr,
(char *) buf + dec_off, data_size, (char *) buf + dec_off, key);
}
page_size = XLOG_BLCKSZ;
dec_off += data_size;
}
return readsz;
}
/* IV: TLI(uint32) + XLogRecPtr(uint64)*/
static void
SetXLogPageIVPrefix(TimeLineID tli, XLogRecPtr lsn, char* iv_prefix)
{
iv_prefix[0] = (tli >> 24);
iv_prefix[1] = ((tli >> 16) & 0xFF);
iv_prefix[2] = ((tli >> 8) & 0xFF);
iv_prefix[3] = (tli & 0xFF);
iv_prefix[4] = (lsn >> 56);
iv_prefix[5] = ((lsn >> 48) & 0xFF);
iv_prefix[6] = ((lsn >> 40) & 0xFF);
iv_prefix[7] = ((lsn >> 32) & 0xFF);
iv_prefix[8] = ((lsn >> 24) & 0xFF);
iv_prefix[9] = ((lsn >> 16) & 0xFF);
iv_prefix[10] = ((lsn >> 8) & 0xFF);
iv_prefix[11] = (lsn & 0xFF);
}
#endif

@ -431,7 +431,9 @@ pg_tde_getpage(TableScanDesc sscan, BlockNumber block)
LockBuffer(buffer, BUFFER_LOCK_SHARE); LockBuffer(buffer, BUFFER_LOCK_SHARE);
page = BufferGetPage(buffer); page = BufferGetPage(buffer);
#if PG_VERSION_NUM < 170000
TestForOldSnapshot(snapshot, scan->rs_base.rs_rd, page); TestForOldSnapshot(snapshot, scan->rs_base.rs_rd, page);
#endif
lines = PageGetMaxOffsetNumber(page); lines = PageGetMaxOffsetNumber(page);
ntup = 0; ntup = 0;
@ -570,9 +572,9 @@ pg_tde_gettup_start_page(HeapScanDesc scan, ScanDirection dir, int *linesleft,
/* Caller is responsible for ensuring buffer is locked if needed */ /* Caller is responsible for ensuring buffer is locked if needed */
page = BufferGetPage(scan->rs_cbuf); page = BufferGetPage(scan->rs_cbuf);
#if PG_VERSION_NUM < 170000
TestForOldSnapshot(scan->rs_base.rs_snapshot, scan->rs_base.rs_rd, page); TestForOldSnapshot(scan->rs_base.rs_snapshot, scan->rs_base.rs_rd, page);
#endif
*linesleft = PageGetMaxOffsetNumber(page) - FirstOffsetNumber + 1; *linesleft = PageGetMaxOffsetNumber(page) - FirstOffsetNumber + 1;
if (ScanDirectionIsForward(dir)) if (ScanDirectionIsForward(dir))
@ -603,9 +605,9 @@ pg_tde_gettup_continue_page(HeapScanDesc scan, ScanDirection dir, int *linesleft
/* Caller is responsible for ensuring buffer is locked if needed */ /* Caller is responsible for ensuring buffer is locked if needed */
page = BufferGetPage(scan->rs_cbuf); page = BufferGetPage(scan->rs_cbuf);
#if PG_VERSION_NUM < 170000
TestForOldSnapshot(scan->rs_base.rs_snapshot, scan->rs_base.rs_rd, page); TestForOldSnapshot(scan->rs_base.rs_snapshot, scan->rs_base.rs_rd, page);
#endif
if (ScanDirectionIsForward(dir)) if (ScanDirectionIsForward(dir))
{ {
*lineoff = OffsetNumberNext(scan->rs_coffset); *lineoff = OffsetNumberNext(scan->rs_coffset);
@ -870,8 +872,9 @@ pg_tde_gettup_pagemode(HeapScanDesc scan,
/* continue from previously returned page/tuple */ /* continue from previously returned page/tuple */
block = scan->rs_cblock; /* current page */ block = scan->rs_cblock; /* current page */
page = BufferGetPage(scan->rs_cbuf); page = BufferGetPage(scan->rs_cbuf);
#if PG_VERSION_NUM < 170000
TestForOldSnapshot(scan->rs_base.rs_snapshot, scan->rs_base.rs_rd, page); TestForOldSnapshot(scan->rs_base.rs_snapshot, scan->rs_base.rs_rd, page);
#endif
lineindex = scan->rs_cindex + dir; lineindex = scan->rs_cindex + dir;
if (ScanDirectionIsForward(dir)) if (ScanDirectionIsForward(dir))
linesleft = scan->rs_ntuples - lineindex; linesleft = scan->rs_ntuples - lineindex;
@ -890,7 +893,9 @@ pg_tde_gettup_pagemode(HeapScanDesc scan,
{ {
pg_tde_getpage((TableScanDesc) scan, block); pg_tde_getpage((TableScanDesc) scan, block);
page = BufferGetPage(scan->rs_cbuf); page = BufferGetPage(scan->rs_cbuf);
#if PG_VERSION_NUM < 170000
TestForOldSnapshot(scan->rs_base.rs_snapshot, scan->rs_base.rs_rd, page); TestForOldSnapshot(scan->rs_base.rs_snapshot, scan->rs_base.rs_rd, page);
#endif
linesleft = scan->rs_ntuples; linesleft = scan->rs_ntuples;
lineindex = ScanDirectionIsForward(dir) ? 0 : linesleft - 1; lineindex = ScanDirectionIsForward(dir) ? 0 : linesleft - 1;
@ -1107,10 +1112,10 @@ pg_tde_getnext(TableScanDesc sscan, ScanDirection direction)
* rather than the AM oid, is that this allows to write regression tests * rather than the AM oid, is that this allows to write regression tests
* that create another AM reusing the heap handler. * that create another AM reusing the heap handler.
*/ */
if (unlikely(sscan->rs_rd->rd_tableam != GetHeapamTableAmRoutine())) if (unlikely(sscan->rs_rd->rd_tableam != GetPGTdeamTableAmRoutine()))
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED), (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg_internal("only heap AM is supported"))); errmsg_internal("only pg_tde AM is supported")));
/* /*
* We don't expect direct calls to pg_tde_getnext with valid CheckXidAlive * We don't expect direct calls to pg_tde_getnext with valid CheckXidAlive
@ -1380,8 +1385,9 @@ pg_tde_fetch(Relation relation,
*/ */
LockBuffer(buffer, BUFFER_LOCK_SHARE); LockBuffer(buffer, BUFFER_LOCK_SHARE);
page = BufferGetPage(buffer); page = BufferGetPage(buffer);
#if PG_VERSION_NUM < 170000
TestForOldSnapshot(snapshot, relation, page); TestForOldSnapshot(snapshot, relation, page);
#endif
/* /*
* We'd better check for out-of-range offnum in case of VACUUM since the * We'd better check for out-of-range offnum in case of VACUUM since the
* TID was obtained. * TID was obtained.
@ -1671,8 +1677,9 @@ pg_tde_get_latest_tid(TableScanDesc sscan,
buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(&ctid)); buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(&ctid));
LockBuffer(buffer, BUFFER_LOCK_SHARE); LockBuffer(buffer, BUFFER_LOCK_SHARE);
page = BufferGetPage(buffer); page = BufferGetPage(buffer);
#if PG_VERSION_NUM < 170000
TestForOldSnapshot(snapshot, relation, page); TestForOldSnapshot(snapshot, relation, page);
#endif
/* /*
* Check for bogus item number. This is not treated as an error * Check for bogus item number. This is not treated as an error
* condition because it can happen while following a t_ctid link. We * condition because it can happen while following a t_ctid link. We

@ -55,6 +55,7 @@
#include "utils/rel.h" #include "utils/rel.h"
PG_FUNCTION_INFO_V1(pg_tdeam_handler); PG_FUNCTION_INFO_V1(pg_tdeam_handler);
PG_FUNCTION_INFO_V1(pg_tde2am_handler);
static void reform_and_rewrite_tuple(HeapTuple tuple, static void reform_and_rewrite_tuple(HeapTuple tuple,
@ -645,7 +646,7 @@ pg_tdeam_relation_set_new_filelocator(Relation rel,
ereport(DEBUG1, ereport(DEBUG1,
(errmsg("creating key file for relation %s", RelationGetRelationName(rel)))); (errmsg("creating key file for relation %s", RelationGetRelationName(rel))));
pg_tde_create_key_map_entry(newrlocator, rel); pg_tde_create_key_map_entry(newrlocator);
} }
} }
@ -2633,7 +2634,6 @@ static const TableAmRoutine pg_tdeam_methods = {
.scan_sample_next_tuple = pg_tdeam_scan_sample_next_tuple .scan_sample_next_tuple = pg_tdeam_scan_sample_next_tuple
}; };
const TableAmRoutine * const TableAmRoutine *
GetPGTdeamTableAmRoutine(void) GetPGTdeamTableAmRoutine(void)
{ {
@ -2646,6 +2646,12 @@ pg_tdeam_handler(PG_FUNCTION_ARGS)
PG_RETURN_POINTER(&pg_tdeam_methods); PG_RETURN_POINTER(&pg_tdeam_methods);
} }
Datum
pg_tde2am_handler(PG_FUNCTION_ARGS)
{
PG_RETURN_POINTER(GetHeapamTableAmRoutine());
}
bool bool
is_pg_tde_rel(Relation rel) is_pg_tde_rel(Relation rel)
{ {

@ -0,0 +1,248 @@
/*-------------------------------------------------------------------------
*
* tde_global_catalog.c
* Global catalog key management
*
*
* IDENTIFICATION
* src/catalog/tde_global_catalog.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#ifdef PERCONA_FORK
#include "storage/shmem.h"
#include "utils/guc.h"
#include "access/pg_tde_tdemap.h"
#include "catalog/tde_global_catalog.h"
#include "catalog/tde_keyring.h"
#include "catalog/tde_master_key.h"
#include <openssl/rand.h>
#include <openssl/err.h>
#include <sys/time.h>
#define MASTER_KEY_DEFAULT_NAME "tde-global-catalog-key"
/* TODO: not sure if we need an option of multiple master keys for the global catalog */
typedef enum
{
TDE_GCAT_XLOG_KEY,
/* must be last */
TDE_GCAT_KEYS_COUNT
} GlobalCatalogKeyTypes;
typedef struct EncryptionStateData
{
GenericKeyring *keyring;
TDEMasterKey master_keys[TDE_GCAT_KEYS_COUNT];
} EncryptionStateData;
static EncryptionStateData * EncryptionState = NULL;
/* GUC */
static char *KRingProviderType = NULL;
static char *KRingProviderFilePath = NULL;
static void init_gl_catalog_keys(void);
static void init_keyring(void);
static TDEMasterKey * create_master_key(const char *key_name,
GenericKeyring * keyring, Oid dbOid, Oid spcOid,
bool ensure_new_key);
void
TDEGlCatInitGUC(void)
{
DefineCustomStringVariable("pg_tde.global_keyring_type",
"Keyring type for global catalog",
NULL,
&KRingProviderType,
NULL,
PGC_POSTMASTER,
0, /* no flags required */
NULL,
NULL,
NULL
);
DefineCustomStringVariable("pg_tde.global_keyring_file_path",
"Keyring file options for global catalog",
NULL,
&KRingProviderFilePath,
NULL,
PGC_POSTMASTER,
0, /* no flags required */
NULL,
NULL,
NULL
);
}
Size
TDEGlCatEncStateSize(void)
{
Size size;
size = sizeof(EncryptionStateData);
size = add_size(size, sizeof(KeyringProviders));
return MAXALIGN(size);
}
void
TDEGlCatShmemInit(void)
{
bool foundBuf;
char *allocptr;
EncryptionState = (EncryptionStateData *)
ShmemInitStruct("TDE XLog Encryption State",
TDEGlCatEncStateSize(), &foundBuf);
allocptr = ((char *) EncryptionState) + MAXALIGN(sizeof(EncryptionStateData));
EncryptionState->keyring = (GenericKeyring *) allocptr;
memset(EncryptionState->keyring, 0, sizeof(KeyringProviders));
memset(EncryptionState->master_keys, 0, sizeof(TDEMasterKey) * TDE_GCAT_KEYS_COUNT);
}
void
TDEGlCatKeyInit(void)
{
char db_map_path[MAXPGPATH] = {0};
init_keyring();
pg_tde_set_db_file_paths(&GLOBAL_SPACE_RLOCATOR(XLOG_TDE_OID),
db_map_path, NULL);
if (access(db_map_path, F_OK) == -1)
{
init_gl_catalog_keys();
}
else
{
/* put an internal key into the cache */
GetGlCatInternalKey(XLOG_TDE_OID);
}
}
TDEMasterKey *
TDEGetGlCatKeyFromCache(void)
{
TDEMasterKey *mkey;
mkey = &EncryptionState->master_keys[TDE_GCAT_XLOG_KEY];
if (mkey->keyLength == 0)
return NULL;
return mkey;
}
void
TDEPutGlCatKeyInCache(TDEMasterKey * mkey)
{
memcpy(EncryptionState->master_keys + TDE_GCAT_XLOG_KEY, mkey, sizeof(TDEMasterKey));
}
RelKeyData *
GetGlCatInternalKey(Oid obj_id)
{
return GetRelationKeyWithKeyring(GLOBAL_SPACE_RLOCATOR(obj_id), EncryptionState->keyring);
}
/*
* TODO: should be aligned with the rest of the keyring_provider code after its
* refactoring
*
* TODO: add Vault
*/
static void
init_keyring(void)
{
EncryptionState->keyring->type = get_keyring_provider_from_typename(KRingProviderType);
switch (EncryptionState->keyring->type)
{
case FILE_KEY_PROVIDER:
FileKeyring * kring = (FileKeyring *) EncryptionState->keyring;
strncpy(kring->file_name, KRingProviderFilePath, sizeof(kring->file_name));
break;
}
}
/*
* Keys are created during the cluster start only, so no locks needed here.
*/
static void
init_gl_catalog_keys(void)
{
InternalKey int_key;
RelKeyData *rel_key_data;
RelKeyData *enc_rel_key_data;
RelFileLocator *rlocator;
TDEMasterKey *mkey;
mkey = create_master_key(MASTER_KEY_DEFAULT_NAME,
EncryptionState->keyring,
GLOBAL_DATA_TDE_OID, GLOBALTABLESPACE_OID, false);
memset(&int_key, 0, sizeof(InternalKey));
/* Create and store an internal key for XLog */
if (!RAND_bytes(int_key.key, INTERNAL_KEY_LEN))
{
ereport(FATAL,
(errcode(ERRCODE_INTERNAL_ERROR),
errmsg("could not generate internal key for \"WAL\": %s",
ERR_error_string(ERR_get_error(), NULL))));
}
rlocator = &GLOBAL_SPACE_RLOCATOR(XLOG_TDE_OID);
rel_key_data = tde_create_rel_key(rlocator->relNumber, &int_key, &mkey->keyInfo);
enc_rel_key_data = tde_encrypt_rel_key(mkey, rel_key_data, rlocator);
pg_tde_write_key_map_entry(rlocator, enc_rel_key_data, &mkey->keyInfo);
/*
* TODO: move global catalog internal keys into own cache. This cache should
* be in the TopMemmoryContext because of SSL contexts
* (see https://github.com/Percona-Lab/pg_tde/pull/214#discussion_r1648998317)
*/
pg_tde_put_key_into_map(rlocator->relNumber, rel_key_data);
TDEPutGlCatKeyInCache(mkey);
}
static TDEMasterKey *
create_master_key(const char *key_name, GenericKeyring * keyring,
Oid dbOid, Oid spcOid, bool ensure_new_key)
{
TDEMasterKey *masterKey;
keyInfo *keyInfo = NULL;
masterKey = palloc(sizeof(TDEMasterKey));
masterKey->keyInfo.databaseId = dbOid;
masterKey->keyInfo.tablespaceId = spcOid;
masterKey->keyInfo.keyId.version = DEFAULT_MASTER_KEY_VERSION;
masterKey->keyInfo.keyringId = keyring->key_id;
strncpy(masterKey->keyInfo.keyId.name, key_name, TDE_KEY_NAME_LEN);
gettimeofday(&masterKey->keyInfo.creationTime, NULL);
keyInfo = load_latest_versioned_key_name(&masterKey->keyInfo, keyring, ensure_new_key);
if (keyInfo == NULL)
keyInfo = KeyringGenerateNewKeyAndStore(keyring, masterKey->keyInfo.keyId.versioned_name, INTERNAL_KEY_LEN, false);
if (keyInfo == NULL)
{
ereport(ERROR,
(errmsg("failed to retrieve master key")));
}
masterKey->keyLength = keyInfo->data.len;
memcpy(masterKey->keyData, keyInfo->data.data, keyInfo->data.len);
return masterKey;
}
#endif /* PERCONA_FORK */

@ -50,13 +50,12 @@ PG_FUNCTION_INFO_V1(keyring_delete_dependency_check_trigger);
#define FILE_KEYRING_TYPE_KEY "type" #define FILE_KEYRING_TYPE_KEY "type"
static FileKeyring *load_file_keyring_provider_options(Datum keyring_options); static FileKeyring *load_file_keyring_provider_options(Datum keyring_options);
static ProviderType get_keyring_provider_from_typename(char *provider_type);
static GenericKeyring *load_keyring_provider_options(ProviderType provider_type, Datum keyring_options); static GenericKeyring *load_keyring_provider_options(ProviderType provider_type, Datum keyring_options);
static VaultV2Keyring *load_vaultV2_keyring_provider_options(Datum keyring_options); static VaultV2Keyring *load_vaultV2_keyring_provider_options(Datum keyring_options);
static void debug_print_kerying(GenericKeyring *keyring); static void debug_print_kerying(GenericKeyring *keyring);
static GenericKeyring *load_keyring_provider_from_tuple(HeapTuple tuple, TupleDesc tupDesc); static GenericKeyring *load_keyring_provider_from_tuple(HeapTuple tuple, TupleDesc tupDesc);
static ProviderType ProviderType
get_keyring_provider_from_typename(char *provider_type) get_keyring_provider_from_typename(char *provider_type)
{ {
if (provider_type == NULL) if (provider_type == NULL)

@ -29,8 +29,9 @@
#include <sys/time.h> #include <sys/time.h>
#include "access/pg_tde_tdemap.h" #include "access/pg_tde_tdemap.h"
#ifdef PERCONA_FORK
#define DEFAULT_MASTER_KEY_VERSION 1 #include "catalog/tde_global_catalog.h"
#endif
typedef struct TdeMasterKeySharedState typedef struct TdeMasterKeySharedState
{ {
@ -67,12 +68,10 @@ static Size required_shared_mem_size(void);
static int required_locks_count(void); static int required_locks_count(void);
static void shared_memory_shutdown(int code, Datum arg); static void shared_memory_shutdown(int code, Datum arg);
static void master_key_startup_cleanup(int tde_tbl_count, void *arg); static void master_key_startup_cleanup(int tde_tbl_count, void *arg);
static keyInfo *load_latest_versioned_key_name(TDEMasterKeyInfo *mastere_key_info, GenericKeyring *keyring, bool ensure_new_key); static void clear_master_key_cache(Oid databaseId) ;
static void clear_master_key_cache(Oid databaseId, Oid tablespaceId) ;
static inline dshash_table *get_master_key_Hash(void); static inline dshash_table *get_master_key_Hash(void);
static TDEMasterKey *get_master_key_from_cache(Oid dbOid); static TDEMasterKey *get_master_key_from_cache(Oid dbOid);
static void push_master_key_to_cache(TDEMasterKey *masterKey); static void push_master_key_to_cache(TDEMasterKey *masterKey);
static TDEMasterKey *set_master_key_with_keyring(const char *key_name, GenericKeyring *keyring, bool ensure_new_key);
static const TDEShmemSetupRoutine master_key_info_shmem_routine = { static const TDEShmemSetupRoutine master_key_info_shmem_routine = {
.init_shared_state = initialize_shared_state, .init_shared_state = initialize_shared_state,
@ -214,23 +213,41 @@ save_master_key_info(TDEMasterKeyInfo *master_key_info)
* throws an error. * throws an error.
*/ */
TDEMasterKey * TDEMasterKey *
GetMasterKey(void) GetMasterKey(Oid dbOid, Oid spcOid, GenericKeyring *keyring)
{ {
TDEMasterKey *masterKey = NULL; TDEMasterKey *masterKey = NULL;
TDEMasterKeyInfo *masterKeyInfo = NULL; TDEMasterKeyInfo *masterKeyInfo = NULL;
GenericKeyring *keyring = NULL;
const keyInfo *keyInfo = NULL; const keyInfo *keyInfo = NULL;
KeyringReturnCodes keyring_ret; KeyringReturnCodes keyring_ret;
Oid dbOid = MyDatabaseId;
LWLock *lock_files = tde_lwlock_mk_files(); LWLock *lock_files = tde_lwlock_mk_files();
LWLock *lock_cache = tde_lwlock_mk_cache(); LWLock *lock_cache = tde_lwlock_mk_cache();
// TODO: This recursion counter is a dirty hack until the metadata is in the catalog
// As otherwise we would call GetMasterKey recursively and deadlock
static int recursion = 0;
if(recursion > 0)
{
return NULL;
}
recursion++;
LWLockAcquire(lock_cache, LW_SHARED); LWLockAcquire(lock_cache, LW_SHARED);
masterKey = get_master_key_from_cache(dbOid); #ifdef PERCONA_FORK
/* Global catalog has its own cache */
if (spcOid == GLOBALTABLESPACE_OID)
masterKey = TDEGetGlCatKeyFromCache();
else
#endif
masterKey = get_master_key_from_cache(dbOid);
LWLockRelease(lock_cache); LWLockRelease(lock_cache);
if (masterKey) if (masterKey)
{
recursion--;
return masterKey; return masterKey;
}
/* /*
* We should hold an exclusive lock here to ensure that a valid master key, if found, is added * We should hold an exclusive lock here to ensure that a valid master key, if found, is added
@ -239,38 +256,44 @@ GetMasterKey(void)
LWLockAcquire(lock_files, LW_SHARED); LWLockAcquire(lock_files, LW_SHARED);
LWLockAcquire(lock_cache, LW_EXCLUSIVE); LWLockAcquire(lock_cache, LW_EXCLUSIVE);
masterKey = get_master_key_from_cache(dbOid); #ifdef PERCONA_FORK
/* Global catalog has its own cache */
if (spcOid == GLOBALTABLESPACE_OID)
masterKey = TDEGetGlCatKeyFromCache();
else
#endif
masterKey = get_master_key_from_cache(dbOid);
if (masterKey) if (masterKey)
{ {
LWLockRelease(lock_cache); LWLockRelease(lock_cache);
LWLockRelease(lock_files); LWLockRelease(lock_files);
recursion--;
return masterKey; return masterKey;
} }
/* Master key not present in cache. Load from the keyring */ /* Master key not present in cache. Load from the keyring */
masterKeyInfo = pg_tde_get_master_key(dbOid); masterKeyInfo = pg_tde_get_master_key(dbOid, spcOid);
if (masterKeyInfo == NULL) if (masterKeyInfo == NULL)
{ {
LWLockRelease(lock_cache); LWLockRelease(lock_cache);
LWLockRelease(lock_files); LWLockRelease(lock_files);
ereport(ERROR, recursion--;
(errmsg("Master key does not exists for the database"),
errhint("Use set_master_key interface to set the master key")));
return NULL; return NULL;
} }
/* Load the master key from keyring and store it in cache */
keyring = GetKeyProviderByID(masterKeyInfo->keyringId);
if (keyring == NULL) if (keyring == NULL)
{ {
LWLockRelease(lock_cache); keyring = GetKeyProviderByID(masterKeyInfo->keyringId);
LWLockRelease(lock_files); if (keyring == NULL)
{
LWLockRelease(lock_cache);
LWLockRelease(lock_files);
ereport(ERROR, recursion--;
(errmsg("Key provider with ID:\"%d\" does not exists", masterKeyInfo->keyringId))); return NULL;
return NULL; }
} }
keyInfo = KeyringGetKey(keyring, masterKeyInfo->keyId.versioned_name, false, &keyring_ret); keyInfo = KeyringGetKey(keyring, masterKeyInfo->keyId.versioned_name, false, &keyring_ret);
@ -279,8 +302,7 @@ GetMasterKey(void)
LWLockRelease(lock_cache); LWLockRelease(lock_cache);
LWLockRelease(lock_files); LWLockRelease(lock_files);
ereport(ERROR, recursion--;
(errmsg("failed to retrieve master key \"%s\" from keyring.", masterKeyInfo->keyId.versioned_name)));
return NULL; return NULL;
} }
@ -290,8 +312,13 @@ GetMasterKey(void)
memcpy(masterKey->keyData, keyInfo->data.data, keyInfo->data.len); memcpy(masterKey->keyData, keyInfo->data.data, keyInfo->data.len);
masterKey->keyLength = keyInfo->data.len; masterKey->keyLength = keyInfo->data.len;
Assert(MyDatabaseId == masterKey->keyInfo.databaseId); Assert(dbOid == masterKey->keyInfo.databaseId);
push_master_key_to_cache(masterKey); #ifdef PERCONA_FORK
if (spcOid == GLOBALTABLESPACE_OID)
TDEPutGlCatKeyInCache(masterKey);
else
#endif
push_master_key_to_cache(masterKey);
/* Release the exclusive locks here */ /* Release the exclusive locks here */
LWLockRelease(lock_cache); LWLockRelease(lock_cache);
@ -300,6 +327,7 @@ GetMasterKey(void)
if (masterKeyInfo) if (masterKeyInfo)
pfree(masterKeyInfo); pfree(masterKeyInfo);
recursion--;
return masterKey; return masterKey;
} }
@ -313,12 +341,11 @@ GetMasterKey(void)
* to make sure if some other caller has not added a master key for * to make sure if some other caller has not added a master key for
* same database while we were waiting for the lock. * same database while we were waiting for the lock.
*/ */
TDEMasterKey *
static TDEMasterKey * set_master_key_with_keyring(const char *key_name, GenericKeyring *keyring,
set_master_key_with_keyring(const char *key_name, GenericKeyring *keyring, bool ensure_new_key) Oid dbOid, Oid spcOid, bool ensure_new_key)
{ {
TDEMasterKey *masterKey = NULL; TDEMasterKey *masterKey = NULL;
Oid dbOid = MyDatabaseId;
LWLock *lock_files = tde_lwlock_mk_files(); LWLock *lock_files = tde_lwlock_mk_files();
LWLock *lock_cache = tde_lwlock_mk_cache(); LWLock *lock_cache = tde_lwlock_mk_cache();
bool is_dup_key = false; bool is_dup_key = false;
@ -334,14 +361,15 @@ set_master_key_with_keyring(const char *key_name, GenericKeyring *keyring, bool
/* TODO: Add the key in the cache? */ /* TODO: Add the key in the cache? */
if (is_dup_key == false) if (is_dup_key == false)
is_dup_key = (pg_tde_get_master_key(dbOid) != NULL); is_dup_key = (pg_tde_get_master_key(dbOid, spcOid) != NULL);
if (is_dup_key == false) if (is_dup_key == false)
{ {
const keyInfo *keyInfo = NULL; const keyInfo *keyInfo = NULL;
masterKey = palloc(sizeof(TDEMasterKey)); masterKey = palloc(sizeof(TDEMasterKey));
masterKey->keyInfo.databaseId = MyDatabaseId; masterKey->keyInfo.databaseId = dbOid;
masterKey->keyInfo.tablespaceId = spcOid;
masterKey->keyInfo.keyId.version = DEFAULT_MASTER_KEY_VERSION; masterKey->keyInfo.keyId.version = DEFAULT_MASTER_KEY_VERSION;
masterKey->keyInfo.keyringId = keyring->key_id; masterKey->keyInfo.keyringId = keyring->key_id;
strncpy(masterKey->keyInfo.keyId.name, key_name, TDE_KEY_NAME_LEN); strncpy(masterKey->keyInfo.keyId.name, key_name, TDE_KEY_NAME_LEN);
@ -396,7 +424,10 @@ set_master_key_with_keyring(const char *key_name, GenericKeyring *keyring, bool
bool bool
SetMasterKey(const char *key_name, const char *provider_name, bool ensure_new_key) SetMasterKey(const char *key_name, const char *provider_name, bool ensure_new_key)
{ {
TDEMasterKey *master_key = set_master_key_with_keyring(key_name, GetKeyProviderByName(provider_name), ensure_new_key); TDEMasterKey *master_key = set_master_key_with_keyring(key_name,
GetKeyProviderByName(provider_name),
MyDatabaseId, MyDatabaseTableSpace,
ensure_new_key);
return (master_key != NULL); return (master_key != NULL);
} }
@ -404,10 +435,11 @@ SetMasterKey(const char *key_name, const char *provider_name, bool ensure_new_ke
bool bool
RotateMasterKey(const char *new_key_name, const char *new_provider_name, bool ensure_new_key) RotateMasterKey(const char *new_key_name, const char *new_provider_name, bool ensure_new_key)
{ {
TDEMasterKey *master_key = GetMasterKey(); TDEMasterKey *master_key = GetMasterKey(MyDatabaseId, MyDatabaseTableSpace, NULL);
TDEMasterKey new_master_key; TDEMasterKey new_master_key;
const keyInfo *keyInfo = NULL; const keyInfo *keyInfo = NULL;
GenericKeyring *keyring; GenericKeyring *keyring;
bool is_rotated;
/* /*
* Let's set everything the same as the older master key and * Let's set everything the same as the older master key and
@ -446,8 +478,13 @@ RotateMasterKey(const char *new_key_name, const char *new_provider_name, bool en
new_master_key.keyLength = keyInfo->data.len; new_master_key.keyLength = keyInfo->data.len;
memcpy(new_master_key.keyData, keyInfo->data.data, keyInfo->data.len); memcpy(new_master_key.keyData, keyInfo->data.data, keyInfo->data.len);
clear_master_key_cache(MyDatabaseId, MyDatabaseTableSpace); is_rotated = pg_tde_perform_rotate_key(master_key, &new_master_key);
return pg_tde_perform_rotate_key(master_key, &new_master_key); if (is_rotated) {
clear_master_key_cache(master_key->keyInfo.databaseId);
push_master_key_to_cache(&new_master_key);
}
return is_rotated;
} }
/* /*
@ -459,7 +496,7 @@ xl_tde_perform_rotate_key(XLogMasterKeyRotate *xlrec)
bool ret; bool ret;
ret = pg_tde_write_map_keydata_files(xlrec->map_size, xlrec->buff, xlrec->keydata_size, &xlrec->buff[xlrec->map_size]); ret = pg_tde_write_map_keydata_files(xlrec->map_size, xlrec->buff, xlrec->keydata_size, &xlrec->buff[xlrec->map_size]);
clear_master_key_cache(MyDatabaseId, MyDatabaseTableSpace); clear_master_key_cache(MyDatabaseId);
return ret; return ret;
} }
@ -469,7 +506,7 @@ xl_tde_perform_rotate_key(XLogMasterKeyRotate *xlrec)
* If ensure_new_key is true, then we will keep on incrementing the version number * If ensure_new_key is true, then we will keep on incrementing the version number
* till we get a key name that is not present in the keyring * till we get a key name that is not present in the keyring
*/ */
static keyInfo * keyInfo *
load_latest_versioned_key_name(TDEMasterKeyInfo *mastere_key_info, GenericKeyring *keyring, bool ensure_new_key) load_latest_versioned_key_name(TDEMasterKeyInfo *mastere_key_info, GenericKeyring *keyring, bool ensure_new_key)
{ {
KeyringReturnCodes kr_ret; KeyringReturnCodes kr_ret;
@ -553,7 +590,7 @@ GetMasterKeyProviderId(void)
} }
{ {
/* Master key not present in cache. Try Loading it from the info file */ /* Master key not present in cache. Try Loading it from the info file */
masterKeyInfo = pg_tde_get_master_key(dbOid); masterKeyInfo = pg_tde_get_master_key(dbOid, MyDatabaseTableSpace);
if (masterKeyInfo) if (masterKeyInfo)
{ {
keyringId = masterKeyInfo->keyringId; keyringId = masterKeyInfo->keyringId;
@ -609,7 +646,7 @@ static void
push_master_key_to_cache(TDEMasterKey *masterKey) push_master_key_to_cache(TDEMasterKey *masterKey)
{ {
TDEMasterKey *cacheEntry = NULL; TDEMasterKey *cacheEntry = NULL;
Oid databaseId = MyDatabaseId; Oid databaseId = masterKey->keyInfo.databaseId;
bool found = false; bool found = false;
cacheEntry = dshash_find_or_insert(get_master_key_Hash(), cacheEntry = dshash_find_or_insert(get_master_key_Hash(),
&databaseId, &found); &databaseId, &found);
@ -653,18 +690,18 @@ master_key_startup_cleanup(int tde_tbl_count, void* arg)
void void
cleanup_master_key_info(Oid databaseId, Oid tablespaceId) cleanup_master_key_info(Oid databaseId, Oid tablespaceId)
{ {
clear_master_key_cache(databaseId, tablespaceId); clear_master_key_cache(databaseId);
/* /*
* TODO: Although should never happen. Still verify if any table in the * TODO: Although should never happen. Still verify if any table in the
* database is using tde * database is using tde
*/ */
/* Remove the tde files */ /* Remove the tde files */
pg_tde_delete_tde_files(databaseId); pg_tde_delete_tde_files(databaseId, tablespaceId);
} }
static void static void
clear_master_key_cache(Oid databaseId, Oid tablespaceId) clear_master_key_cache(Oid databaseId)
{ {
TDEMasterKey *cache_entry; TDEMasterKey *cache_entry;
@ -737,9 +774,14 @@ Datum pg_tde_master_key_info(PG_FUNCTION_ARGS)
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED), (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("function returning record called in context that cannot accept type record"))); errmsg("function returning record called in context that cannot accept type record")));
master_key = GetMasterKey(); master_key = GetMasterKey(MyDatabaseId, MyDatabaseTableSpace, NULL);
if (master_key == NULL) if (master_key == NULL)
PG_RETURN_NULL(); {
ereport(ERROR,
(errmsg("Master key does not exists for the database"),
errhint("Use set_master_key interface to set the master key")));
PG_RETURN_NULL();
}
keyring = GetKeyProviderByID(master_key->keyInfo.keyringId); keyring = GetKeyProviderByID(master_key->keyInfo.keyringId);

@ -33,6 +33,12 @@ get_tde_table_am_oid(void)
return get_table_am_oid("pg_tde", false); return get_table_am_oid("pg_tde", false);
} }
Oid
get_tde2_table_am_oid(void)
{
return get_table_am_oid("pg_tde2", false);
}
/* /*
* Returns the list of OIDs for all TDE tables in a database * Returns the list of OIDs for all TDE tables in a database
*/ */

@ -10,6 +10,7 @@
#include "utils/rel.h" #include "utils/rel.h"
#include "access/xlog_internal.h" #include "access/xlog_internal.h"
#include "catalog/pg_tablespace_d.h"
#include "catalog/tde_master_key.h" #include "catalog/tde_master_key.h"
#include "storage/fd.h" #include "storage/fd.h"
#include "storage/relfilelocator.h" #include "storage/relfilelocator.h"
@ -46,22 +47,28 @@ typedef struct XLogRelKey
RelKeyData relKey; RelKeyData relKey;
} XLogRelKey; } XLogRelKey;
extern void pg_tde_create_key_map_entry(const RelFileLocator *newrlocator, Relation rel); extern RelKeyData* pg_tde_create_key_map_entry(const RelFileLocator *newrlocator);
extern void pg_tde_write_key_map_entry(const RelFileLocator *rlocator, RelKeyData *enc_rel_key_data, TDEMasterKeyInfo *master_key_info); extern void pg_tde_write_key_map_entry(const RelFileLocator *rlocator, RelKeyData *enc_rel_key_data, TDEMasterKeyInfo *master_key_info);
extern void pg_tde_delete_key_map_entry(const RelFileLocator *rlocator); extern void pg_tde_delete_key_map_entry(const RelFileLocator *rlocator);
extern void pg_tde_free_key_map_entry(const RelFileLocator *rlocator, off_t offset); extern void pg_tde_free_key_map_entry(const RelFileLocator *rlocator, off_t offset);
extern RelKeyData *pg_tde_get_key_from_fork(const RelFileLocator *rlocator);
extern RelKeyData *GetRelationKey(RelFileLocator rel); extern RelKeyData *GetRelationKey(RelFileLocator rel);
extern RelKeyData *GetRelationKeyWithKeyring(RelFileLocator rel, GenericKeyring *keyring);
extern void pg_tde_cleanup_path_vars(void); extern void pg_tde_delete_tde_files(Oid dbOid, Oid spcOid);
extern void pg_tde_delete_tde_files(Oid dbOid);
extern TDEMasterKeyInfo *pg_tde_get_master_key(Oid dbOid); extern TDEMasterKeyInfo *pg_tde_get_master_key(Oid dbOid, Oid spcOid);
extern bool pg_tde_save_master_key(TDEMasterKeyInfo *master_key_info); extern bool pg_tde_save_master_key(TDEMasterKeyInfo *master_key_info);
extern bool pg_tde_perform_rotate_key(TDEMasterKey *master_key, TDEMasterKey *new_master_key); extern bool pg_tde_perform_rotate_key(TDEMasterKey *master_key, TDEMasterKey *new_master_key);
extern bool pg_tde_write_map_keydata_files(off_t map_size, char *m_file_data, off_t keydata_size, char *k_file_data); extern bool pg_tde_write_map_keydata_files(off_t map_size, char *m_file_data, off_t keydata_size, char *k_file_data);
extern RelKeyData* tde_create_rel_key(Oid rel_id, InternalKey *key, TDEMasterKeyInfo *master_key_info);
extern RelKeyData *tde_encrypt_rel_key(TDEMasterKey *master_key, RelKeyData *rel_key_data, const RelFileLocator *rlocator);
extern RelKeyData *tde_decrypt_rel_key(TDEMasterKey *master_key, RelKeyData *enc_rel_key_data, const RelFileLocator *rlocator);
extern void pg_tde_set_db_file_paths(const RelFileLocator *rlocator, char *map_path, char *keydata_path);
const char * tde_sprint_key(InternalKey *k); const char * tde_sprint_key(InternalKey *k);
extern void pg_tde_put_key_into_map(Oid rel_id, RelKeyData *key);
#endif /*PG_TDE_MAP_H*/ #endif /*PG_TDE_MAP_H*/

@ -9,7 +9,12 @@
#ifndef PG_TDE_XLOG_H #ifndef PG_TDE_XLOG_H
#define PG_TDE_XLOG_H #define PG_TDE_XLOG_H
#include "postgres.h"
#include "access/xlog.h"
#include "access/xlog_internal.h" #include "access/xlog_internal.h"
#ifdef PERCONA_FORK
#include "access/xlog_smgr.h"
#endif
/* TDE XLOG resource manager */ /* TDE XLOG resource manager */
#define XLOG_TDE_ADD_RELATION_KEY 0x00 #define XLOG_TDE_ADD_RELATION_KEY 0x00
@ -32,4 +37,28 @@ static const RmgrData pg_tde_rmgr = {
.rm_identify = pg_tde_rmgr_identify .rm_identify = pg_tde_rmgr_identify
}; };
#ifdef PERCONA_FORK
/* XLog encryption staff */
extern Size TDEXLogEncryptBuffSize(void);
#define XLOG_TDE_ENC_BUFF_ALIGNED_SIZE add_size(TDEXLogEncryptBuffSize(), PG_IO_ALIGN_SIZE)
extern void TDEXLogShmemInit(void);
extern ssize_t pg_tde_xlog_seg_read(int fd, void *buf, size_t count, off_t offset);
extern ssize_t pg_tde_xlog_seg_write(int fd, const void *buf, size_t count, off_t offset);
static const XLogSmgr tde_xlog_smgr = {
.seg_read = pg_tde_xlog_seg_read,
.seg_write = pg_tde_xlog_seg_write,
};
extern void TDEXLogSmgrInit(void);
extern void XLogInitGUC(void);
#endif
#endif /* PG_TDE_XLOG_H */ #endif /* PG_TDE_XLOG_H */

@ -333,4 +333,7 @@ extern void HeapCheckForSerializableConflictOut(bool visible, Relation relation,
/* Defined in pg_tdeam_handler.c */ /* Defined in pg_tdeam_handler.c */
extern bool is_pg_tde_rel(Relation rel); extern bool is_pg_tde_rel(Relation rel);
const TableAmRoutine *
GetPGTdeamTableAmRoutine(void);
#endif /* PG_TDEAM_H */ #endif /* PG_TDEAM_H */

@ -0,0 +1,41 @@
/*-------------------------------------------------------------------------
*
* tde_global_catalog.h
* Global catalog key management
*
* src/include/catalog/tde_global_catalog.h
*
*-------------------------------------------------------------------------
*/
#ifndef TDE_GLOBAL_CATALOG_H
#define TDE_GLOBAL_CATALOG_H
#include "postgres.h"
#include "catalog/tde_master_key.h"
/*
* Needed for glogbal data (WAL etc) keys identification in caches and storage.
* We take IDs the oid type operators, so there is no overlap with the "real"
* catalog object possible.
*/
#define GLOBAL_DATA_TDE_OID 607 /* Global objects fake "db" */
#define XLOG_TDE_OID 608
#define GLOBAL_SPACE_RLOCATOR(_obj_oid) (RelFileLocator) { \
GLOBALTABLESPACE_OID, \
GLOBAL_DATA_TDE_OID, \
_obj_oid \
}
extern void TDEGlCatInitGUC(void);
extern Size TDEGlCatEncStateSize(void);
extern void TDEGlCatShmemInit(void);
extern void TDEGlCatKeyInit(void);
extern TDEMasterKey *TDEGetGlCatKeyFromCache(void);
extern void TDEPutGlCatKeyInCache(TDEMasterKey *mkey);
extern RelKeyData *GetGlCatInternalKey(Oid obj_id);
#endif /*TDE_GLOBAL_CATALOG_H*/

@ -54,8 +54,15 @@ typedef struct VaultV2Keyring
char vault_mount_path[MAXPGPATH]; char vault_mount_path[MAXPGPATH];
} VaultV2Keyring; } VaultV2Keyring;
typedef union KeyringProviders
{
FileKeyring file;
VaultV2Keyring vault;
} KeyringProviders;
extern List *GetAllKeyringProviders(void); extern List *GetAllKeyringProviders(void);
extern GenericKeyring *GetKeyProviderByName(const char *provider_name); extern GenericKeyring *GetKeyProviderByName(const char *provider_name);
extern GenericKeyring *GetKeyProviderByID(int provider_id); extern GenericKeyring *GetKeyProviderByID(int provider_id);
extern ProviderType get_keyring_provider_from_typename(char *provider_type);
#endif /*TDE_KEYRING_H*/ #endif /*TDE_KEYRING_H*/

@ -17,6 +17,7 @@
#include "nodes/pg_list.h" #include "nodes/pg_list.h"
#include "storage/lwlock.h" #include "storage/lwlock.h"
#define DEFAULT_MASTER_KEY_VERSION 1
#define MASTER_KEY_NAME_LEN TDE_KEY_NAME_LEN #define MASTER_KEY_NAME_LEN TDE_KEY_NAME_LEN
#define MAX_MASTER_KEY_VERSION_NUM 100000 #define MAX_MASTER_KEY_VERSION_NUM 100000
@ -68,9 +69,16 @@ extern LWLock *tde_lwlock_mk_cache(void);
extern bool save_master_key_info(TDEMasterKeyInfo *masterKeyInfo); extern bool save_master_key_info(TDEMasterKeyInfo *masterKeyInfo);
extern Oid GetMasterKeyProviderId(void); extern Oid GetMasterKeyProviderId(void);
extern TDEMasterKey* GetMasterKey(void); extern TDEMasterKey* GetMasterKey(Oid dbOid, Oid spcOid, GenericKeyring *keyring);
extern bool SetMasterKey(const char *key_name, const char *provider_name, bool ensure_new_key); extern bool SetMasterKey(const char *key_name, const char *provider_name, bool ensure_new_key);
extern bool RotateMasterKey(const char *new_key_name, const char *new_provider_name, bool ensure_new_key); extern bool RotateMasterKey(const char *new_key_name, const char *new_provider_name, bool ensure_new_key);
extern bool xl_tde_perform_rotate_key(XLogMasterKeyRotate *xlrec); extern bool xl_tde_perform_rotate_key(XLogMasterKeyRotate *xlrec);
extern TDEMasterKey *set_master_key_with_keyring(const char *key_name,
GenericKeyring *keyring,
Oid dbOid, Oid spcOid,
bool ensure_new_key);
extern keyInfo *load_latest_versioned_key_name(TDEMasterKeyInfo *mastere_key_info,
GenericKeyring *keyring,
bool ensure_new_key);
#endif /*PG_TDE_MASTER_KEY_H*/ #endif /*PG_TDE_MASTER_KEY_H*/

@ -12,6 +12,7 @@
#include "nodes/pg_list.h" #include "nodes/pg_list.h"
extern Oid get_tde_table_am_oid(void); extern Oid get_tde_table_am_oid(void);
extern Oid get_tde2_table_am_oid(void);
extern List *get_all_tde_tables(void); extern List *get_all_tde_tables(void);
extern int get_tde_tables_count(void); extern int get_tde_tables_count(void);

@ -22,6 +22,7 @@
//#define ENCRYPTION_DEBUG 1 //#define ENCRYPTION_DEBUG 1
//#define KEYRING_DEBUG 1 //#define KEYRING_DEBUG 1
//#define TDE_FORK_DEBUG 1 //#define TDE_FORK_DEBUG 1
// #define TDE_XLOG_DEBUG 1
#define pg_tde_fill_tuple heap_fill_tuple #define pg_tde_fill_tuple heap_fill_tuple
#define pg_tde_form_tuple heap_form_tuple #define pg_tde_form_tuple heap_form_tuple
@ -37,8 +38,6 @@
#define pgstat_count_pg_tde_insert pgstat_count_heap_insert #define pgstat_count_pg_tde_insert pgstat_count_heap_insert
#define pg_tde_getattr heap_getattr #define pg_tde_getattr heap_getattr
#define GetPGTdeamTableAmRoutine GetHeapamTableAmRoutine
#define TDE_PageAddItem(rel, oid, blkno, page, item, size, offsetNumber, overwrite, is_heap) \ #define TDE_PageAddItem(rel, oid, blkno, page, item, size, offsetNumber, overwrite, is_heap) \
PGTdePageAddItemExtended(rel, oid, blkno, page, item, size, offsetNumber, \ PGTdePageAddItemExtended(rel, oid, blkno, page, item, size, offsetNumber, \
((overwrite) ? PAI_OVERWRITE : 0) | \ ((overwrite) ? PAI_OVERWRITE : 0) | \

@ -0,0 +1,33 @@
/*-------------------------------------------------------------------------
*
* pg_tde_event_capture.h
*
*-------------------------------------------------------------------------
*/
#ifndef PG_TDE_EVENT_CAPTURE_H
#define PG_TDE_EVENT_CAPTURE_H
#include "postgres.h"
#include "nodes/parsenodes.h"
typedef enum TdeCreateEventType
{
TDE_UNKNOWN_CREATE_EVENT,
TDE_TABLE_CREATE_EVENT,
TDE_INDEX_CREATE_EVENT
} TdeCreateEventType;
typedef struct TdeCreateEvent
{
TdeCreateEventType eventType; /* DDL statement type */
bool encryptMode; /* true when the table uses encryption */
Oid baseTableOid; /* Oid of table on which index is being
* created on. For create table statement this
* contains InvalidOid */
RangeVar *relation; /* Reference to the parsed relation from
* create statement */
} TdeCreateEvent;
extern TdeCreateEvent * GetCurrentTdeCreateEvent(void);
#endif

@ -0,0 +1,4 @@
#pragma once
extern void RegisterStorageMgr();

Binary file not shown.

@ -1,11 +1,11 @@
/*------------------------------------------------------------------------- /*-------------------------------------------------------------------------
* *
* keyring_file.c * keyring_file.c
* Implements the file provider keyring * Implements the file provider keyring
* routines. * routines.
* *
* IDENTIFICATION * IDENTIFICATION
* contrib/pg_tde/src/keyring/keyring_file.c * contrib/pg_tde/src/keyring/keyring_file.c
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
@ -40,21 +40,21 @@ static keyInfo*
get_key_by_name(GenericKeyring* keyring, const char* key_name, bool throw_error, KeyringReturnCodes *return_code) get_key_by_name(GenericKeyring* keyring, const char* key_name, bool throw_error, KeyringReturnCodes *return_code)
{ {
keyInfo* key = NULL; keyInfo* key = NULL;
File file = -1; int fd = -1;
FileKeyring* file_keyring = (FileKeyring*)keyring; FileKeyring* file_keyring = (FileKeyring*)keyring;
off_t bytes_read = 0; off_t bytes_read = 0;
off_t curr_pos = 0; off_t curr_pos = 0;
*return_code = KEYRING_CODE_SUCCESS; *return_code = KEYRING_CODE_SUCCESS;
file = PathNameOpenFile(file_keyring->file_name, PG_BINARY); fd = BasicOpenFile(file_keyring->file_name, PG_BINARY);
if (file < 0) if (fd < 0)
return NULL; return NULL;
key = palloc(sizeof(keyInfo)); key = palloc(sizeof(keyInfo));
while(true) while(true)
{ {
bytes_read = FileRead(file, key, sizeof(keyInfo), curr_pos, WAIT_EVENT_DATA_FILE_READ); bytes_read = pg_pread(fd, key, sizeof(keyInfo), curr_pos);
curr_pos += bytes_read; curr_pos += bytes_read;
if (bytes_read == 0 ) if (bytes_read == 0 )
@ -62,13 +62,13 @@ get_key_by_name(GenericKeyring* keyring, const char* key_name, bool throw_error,
/* /*
* Empty keyring file is considered as a valid keyring file that has no keys * Empty keyring file is considered as a valid keyring file that has no keys
*/ */
FileClose(file); close(fd);
pfree(key); pfree(key);
return NULL; return NULL;
} }
if (bytes_read != sizeof(keyInfo)) if (bytes_read != sizeof(keyInfo))
{ {
FileClose(file); close(fd);
pfree(key); pfree(key);
/* Corrupt file */ /* Corrupt file */
*return_code = KEYRING_CODE_DATA_CORRUPTED; *return_code = KEYRING_CODE_DATA_CORRUPTED;
@ -81,21 +81,21 @@ get_key_by_name(GenericKeyring* keyring, const char* key_name, bool throw_error,
} }
if (strncasecmp(key->name.name, key_name, sizeof(key->name.name)) == 0) if (strncasecmp(key->name.name, key_name, sizeof(key->name.name)) == 0)
{ {
FileClose(file); close(fd);
return key; return key;
} }
} }
FileClose(file); close(fd);
pfree(key); pfree(key);
return NULL; return NULL;
} }
static KeyringReturnCodes static KeyringReturnCodes
set_key_by_name(GenericKeyring* keyring, keyInfo *key, bool throw_error) set_key_by_name(GenericKeyring* keyring, keyInfo *key, bool throw_error)
{ {
off_t bytes_written = 0; off_t bytes_written = 0;
off_t curr_pos = 0; off_t curr_pos = 0;
File file; int fd;
FileKeyring* file_keyring = (FileKeyring*)keyring; FileKeyring* file_keyring = (FileKeyring*)keyring;
keyInfo *existing_key; keyInfo *existing_key;
KeyringReturnCodes return_code = KEYRING_CODE_SUCCESS; KeyringReturnCodes return_code = KEYRING_CODE_SUCCESS;
@ -111,26 +111,35 @@ set_key_by_name(GenericKeyring* keyring, keyInfo *key, bool throw_error)
return KEYRING_CODE_INVALID_OPERATION; return KEYRING_CODE_INVALID_OPERATION;
} }
file = PathNameOpenFile(file_keyring->file_name, O_CREAT | O_RDWR | PG_BINARY); fd = BasicOpenFile(file_keyring->file_name, O_CREAT | O_RDWR | PG_BINARY);
if (file < 0) if (fd < 0)
{ {
ereport(throw_error?ERROR:WARNING, ereport(throw_error?ERROR:WARNING,
(errcode_for_file_access(), (errcode_for_file_access(),
errmsg("Failed to open keyring file %s :%m", file_keyring->file_name))); errmsg("Failed to open keyring file %s :%m", file_keyring->file_name)));
return KEYRING_CODE_RESOURCE_NOT_ACCESSABLE; return KEYRING_CODE_RESOURCE_NOT_ACCESSABLE;
} }
/* Write key to the end of file */ /* Write key to the end of file */
curr_pos = FileSize(file); curr_pos = lseek(fd, 0, SEEK_END);
bytes_written = FileWrite(file, key, sizeof(keyInfo), curr_pos, WAIT_EVENT_DATA_FILE_WRITE); bytes_written = pg_pwrite(fd, key, sizeof(keyInfo), curr_pos);
if (bytes_written != sizeof(keyInfo)) if (bytes_written != sizeof(keyInfo))
{ {
FileClose(file); close(fd);
ereport(throw_error?ERROR:WARNING, ereport(throw_error?ERROR:WARNING,
(errcode_for_file_access(), (errcode_for_file_access(),
errmsg("keyring file \"%s\" can't be written: %m", errmsg("keyring file \"%s\" can't be written: %m",
file_keyring->file_name))); file_keyring->file_name)));
return KEYRING_CODE_RESOURCE_NOT_ACCESSABLE; return KEYRING_CODE_RESOURCE_NOT_ACCESSABLE;
} }
FileClose(file); if (pg_fsync(fd) != 0)
{
close(fd);
ereport(throw_error?ERROR:WARNING,
(errcode_for_file_access(),
errmsg("could not fsync file \"%s\": %m",
file_keyring->file_name)));
return KEYRING_CODE_RESOURCE_NOT_ACCESSABLE;
}
close(fd);
return KEYRING_CODE_SUCCESS; return KEYRING_CODE_SUCCESS;
} }

@ -31,6 +31,10 @@
#include "keyring/keyring_vault.h" #include "keyring/keyring_vault.h"
#include "utils/builtins.h" #include "utils/builtins.h"
#include "pg_tde_defs.h" #include "pg_tde_defs.h"
#include "smgr/pg_tde_smgr.h"
#ifdef PERCONA_FORK
#include "catalog/tde_global_catalog.h"
#endif
#define MAX_ON_INSTALLS 5 #define MAX_ON_INSTALLS 5
@ -59,6 +63,11 @@ tde_shmem_request(void)
{ {
Size sz = TdeRequiredSharedMemorySize(); Size sz = TdeRequiredSharedMemorySize();
int required_locks = TdeRequiredLocksCount(); int required_locks = TdeRequiredLocksCount();
#ifdef PERCONA_FORK
sz = add_size(sz, XLOG_TDE_ENC_BUFF_ALIGNED_SIZE);
#endif
if (prev_shmem_request_hook) if (prev_shmem_request_hook)
prev_shmem_request_hook(); prev_shmem_request_hook();
RequestAddinShmemSpace(sz); RequestAddinShmemSpace(sz);
@ -74,6 +83,14 @@ tde_shmem_startup(void)
TdeShmemInit(); TdeShmemInit();
AesInit(); AesInit();
#ifdef PERCONA_FORK
TDEGlCatShmemInit();
TDEGlCatKeyInit();
TDEXLogShmemInit();
TDEXLogSmgrInit();
#endif
} }
void void
@ -86,7 +103,10 @@ _PG_init(void)
keyringRegisterVariables(); keyringRegisterVariables();
InitializeMasterKeyInfo(); InitializeMasterKeyInfo();
#ifdef PERCONA_FORK
XLogInitGUC();
TDEGlCatInitGUC();
#endif
prev_shmem_request_hook = shmem_request_hook; prev_shmem_request_hook = shmem_request_hook;
shmem_request_hook = tde_shmem_request; shmem_request_hook = tde_shmem_request;
prev_shmem_startup_hook = shmem_startup_hook; prev_shmem_startup_hook = shmem_startup_hook;
@ -98,6 +118,8 @@ _PG_init(void)
InstallFileKeyring(); InstallFileKeyring();
InstallVaultV2Keyring(); InstallVaultV2Keyring();
RegisterCustomRmgr(RM_TDERMGR_ID, &pg_tde_rmgr); RegisterCustomRmgr(RM_TDERMGR_ID, &pg_tde_rmgr);
RegisterStorageMgr();
} }
Datum pg_tde_extension_initialize(PG_FUNCTION_ARGS) Datum pg_tde_extension_initialize(PG_FUNCTION_ARGS)

@ -0,0 +1,147 @@
/*-------------------------------------------------------------------------
*
* pg_tde_event_capture.c
* event trigger logic to identify if we are creating the encrypted table or not.
*
* IDENTIFICATION
* contrib/pg_tde/src/pg_tde_event_trigger.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "funcapi.h"
#include "fmgr.h"
#include "utils/rel.h"
#include "utils/builtins.h"
#include "catalog/pg_class.h"
#include "access/table.h"
#include "catalog/pg_event_trigger.h"
#include "catalog/namespace.h"
#include "commands/event_trigger.h"
#include "common/pg_tde_utils.h"
#include "pg_tde_event_capture.h"
/* Global variable that gets set at ddl start and cleard out at ddl end*/
TdeCreateEvent tdeCurrentCreateEvent = {.relation = NULL};
static void reset_current_tde_create_event(void);
PG_FUNCTION_INFO_V1(pg_tde_ddl_command_start_capture);
PG_FUNCTION_INFO_V1(pg_tde_ddl_command_end_capture);
TdeCreateEvent *
GetCurrentTdeCreateEvent(void)
{
return &tdeCurrentCreateEvent;
}
/*
* pg_tde_ddl_command_start_capture is an event trigger function triggered
* at the start of any DDL command execution.
*
* The function specifically focuses on CREATE INDEX and CREATE TABLE statements,
* aiming to determine if the create table or the table on which an index is being created
* utilizes the pg_tde access method for encryption.
* Once it confirms the table's encryption requirement or usage,
* it updates the table information in the tdeCurrentCreateEvent global variable.
* This information can be accessed by SMGR or any other component
* during the execution of this DDL statement.
*/
Datum
pg_tde_ddl_command_start_capture(PG_FUNCTION_ARGS)
{
/* TODO: verify update_compare_indexes failure related to this */
#ifdef PERCONA_FORK
EventTriggerData *trigdata;
Node *parsetree;
/* Ensure this function is being called as an event trigger */
if (!CALLED_AS_EVENT_TRIGGER(fcinfo)) /* internal error */
ereport(ERROR,
(errmsg("Function can only be fired by event trigger manager")));
trigdata = (EventTriggerData *) fcinfo->context;
parsetree = trigdata->parsetree;
elog(DEBUG2, "EVENT TRIGGER (%s) %s", trigdata->event, nodeToString(parsetree));
reset_current_tde_create_event();
if (IsA(parsetree, IndexStmt))
{
IndexStmt *stmt = (IndexStmt *) parsetree;
Oid relationId = RangeVarGetRelid(stmt->relation, NoLock, true);
tdeCurrentCreateEvent.eventType = TDE_INDEX_CREATE_EVENT;
tdeCurrentCreateEvent.baseTableOid = relationId;
tdeCurrentCreateEvent.relation = stmt->relation;
if (relationId != InvalidOid)
{
LOCKMODE lockmode = AccessShareLock; /* TODO. Verify lock mode? */
Relation rel = table_open(relationId, lockmode);
if (rel->rd_rel->relam == get_tde_table_am_oid())
{
/* We are creating the index on encrypted table */
/* set the global state */
tdeCurrentCreateEvent.encryptMode = true;
}
else
table_close(rel, lockmode);
}
else
ereport(DEBUG1, (errmsg("Failed to get relation Oid for relation:%s", stmt->relation->relname)));
}
else if (IsA(parsetree, CreateStmt))
{
CreateStmt *stmt = (CreateStmt *) parsetree;
tdeCurrentCreateEvent.eventType = TDE_TABLE_CREATE_EVENT;
tdeCurrentCreateEvent.relation = stmt->relation;
if (stmt->accessMethod && !strcmp(stmt->accessMethod, "pg_tde2"))
{
tdeCurrentCreateEvent.encryptMode = true;
}
}
#endif
PG_RETURN_NULL();
}
/*
* trigger function called at the end of DDL statement execution.
* It just clears the tdeCurrentCreateEvent global variable.
*/
Datum
pg_tde_ddl_command_end_capture(PG_FUNCTION_ARGS)
{
#ifdef PERCONA_FORK
/* Ensure this function is being called as an event trigger */
if (!CALLED_AS_EVENT_TRIGGER(fcinfo)) /* internal error */
ereport(ERROR,
(errmsg("Function can only be fired by event trigger manager")));
elog(DEBUG1, "Type:%s EncryptMode:%s, Oid:%d, Relation:%s ",
(tdeCurrentCreateEvent.eventType == TDE_INDEX_CREATE_EVENT) ? "CREATE INDEX" :
(tdeCurrentCreateEvent.eventType == TDE_TABLE_CREATE_EVENT) ? "CREATE TABLE" : "UNKNOWN",
tdeCurrentCreateEvent.encryptMode ? "true" : "false",
tdeCurrentCreateEvent.baseTableOid,
tdeCurrentCreateEvent.relation ? tdeCurrentCreateEvent.relation->relname : "UNKNOWN");
/* All we need to do is to clear the event state */
reset_current_tde_create_event();
#endif
PG_RETURN_NULL();
}
static void
reset_current_tde_create_event(void)
{
tdeCurrentCreateEvent.encryptMode = false;
tdeCurrentCreateEvent.eventType = TDE_UNKNOWN_CREATE_EVENT;
tdeCurrentCreateEvent.baseTableOid = InvalidOid;
tdeCurrentCreateEvent.relation = NULL;
}

@ -0,0 +1,213 @@
#include "smgr/pg_tde_smgr.h"
#include "postgres.h"
#include "storage/smgr.h"
#include "storage/md.h"
#include "catalog/catalog.h"
#include "encryption/enc_aes.h"
#include "access/pg_tde_tdemap.h"
#include "pg_tde_event_capture.h"
#ifdef PERCONA_FORK
// TODO: implement proper IV
// iv should be based on blocknum + relfile, available in the API
static char iv[16] = {0,};
static RelKeyData*
tde_smgr_get_key(SMgrRelation reln)
{
// TODO: This recursion counter is a dirty hack until the metadata is in the catalog
// As otherwise we would call GetMasterKey recursively and deadlock
static int recursion = 0;
if(IsCatalogRelationOid(reln->smgr_rlocator.locator.relNumber))
{
// do not try to encrypt/decrypt catalog tables
return NULL;
}
if(recursion != 0)
{
return NULL;
}
recursion++;
if(GetMasterKey(reln->smgr_rlocator.locator.relNumber, reln->smgr_rlocator.locator.spcOid, NULL)==NULL)
{
recursion--;
return NULL;
}
TdeCreateEvent* event = GetCurrentTdeCreateEvent();
// if this is a CREATE TABLE, we have to generate the key
if(event->encryptMode == true && event->eventType == TDE_TABLE_CREATE_EVENT)
{
recursion--;
return pg_tde_create_key_map_entry(&reln->smgr_rlocator.locator);
}
// if this is a CREATE INDEX, we have to load the key based on the table
if(event->encryptMode == true && event->eventType == TDE_INDEX_CREATE_EVENT)
{
// For now keep it simple and create separate key for indexes
// Later we might modify the map infrastructure to support the same keys
recursion--;
return pg_tde_create_key_map_entry(&reln->smgr_rlocator.locator);
}
// otherwise, see if we have a key for the relation, and return if yes
RelKeyData* rkd = GetRelationKey(reln->smgr_rlocator.locator);
recursion--;
return rkd;
}
void
tde_mdwritev(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
const void **buffers, BlockNumber nblocks, bool skipFsync)
{
AesInit();
char* local_blocks = malloc( BLCKSZ * (nblocks+1) );
char* local_blocks_aligned = (char*)TYPEALIGN(PG_IO_ALIGN_SIZE, local_blocks);
const void** local_buffers = malloc ( sizeof(void*) * nblocks );
RelKeyData* rkd = tde_smgr_get_key(reln);
if(rkd == NULL)
{
mdwritev(reln, forknum, blocknum, buffers, nblocks, skipFsync);
return;
}
for(int i = 0; i < nblocks; ++i )
{
local_buffers[i] = &local_blocks_aligned[i*BLCKSZ];
int out_len = BLCKSZ;
AesEncrypt(rkd->internal_key.key, iv, ((char**)buffers)[i], BLCKSZ, local_buffers[i], &out_len);
}
mdwritev(reln, forknum, blocknum,
local_buffers, nblocks, skipFsync);
free(local_blocks);
free(local_buffers);
}
void
tde_mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
const void *buffer, bool skipFsync)
{
AesInit();
char* local_blocks = malloc( BLCKSZ * (1+1) );
char* local_blocks_aligned = (char*)TYPEALIGN(PG_IO_ALIGN_SIZE, local_blocks);
RelKeyData* rkd = tde_smgr_get_key(reln);
if(rkd == NULL)
{
mdextend(reln, forknum, blocknum, buffer, skipFsync);
return;
}
int out_len = BLCKSZ;
AesEncrypt(rkd->internal_key.key, iv, ((char*)buffer), BLCKSZ, local_blocks_aligned, &out_len);
mdextend(reln, forknum, blocknum, local_blocks_aligned, skipFsync);
free(local_blocks);
}
void
tde_mdreadv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
void **buffers, BlockNumber nblocks)
{
AesInit();
mdreadv(reln, forknum, blocknum, buffers, nblocks);
RelKeyData* rkd = tde_smgr_get_key(reln);
if(rkd == NULL)
{
return;
}
for(int i = 0; i < nblocks; ++i)
{
bool allZero = true;
for(int j = 0; j < 32; ++j)
{
if(((char**)buffers)[i][j] != 0)
{
// Postgres creates all zero blocks in an optimized route, which we do not try
// to encrypt.
// Instead we detect if a block is all zero at decryption time, and
// leave it as is.
// This could be a security issue later, but it is a good first prototype
allZero = false;
break;
}
}
if(allZero) continue;
int out_len = BLCKSZ;
AesDecrypt(rkd->internal_key.key, iv, ((char**)buffers)[i], BLCKSZ, ((char**)buffers)[i], &out_len);
}
}
void
tde_mdcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo)
{
// This is the only function that gets called during actual CREATE TABLE/INDEX (EVENT TRIGGER)
// so we create the key here by loading it
// Later calls then decide to encrypt or not based on the existence of the key
tde_smgr_get_key(reln);
return mdcreate(reln, forknum, isRedo);
}
static SMgrId tde_smgr_id;
static const struct f_smgr tde_smgr = {
.name = "tde",
.smgr_init = mdinit,
.smgr_shutdown = NULL,
.smgr_open = mdopen,
.smgr_close = mdclose,
.smgr_create = tde_mdcreate,
.smgr_exists = mdexists,
.smgr_unlink = mdunlink,
.smgr_extend = tde_mdextend,
.smgr_zeroextend = mdzeroextend,
.smgr_prefetch = mdprefetch,
.smgr_readv = tde_mdreadv,
.smgr_writev = tde_mdwritev,
.smgr_writeback = mdwriteback,
.smgr_nblocks = mdnblocks,
.smgr_truncate = mdtruncate,
.smgr_immedsync = mdimmedsync,
};
void RegisterStorageMgr()
{
tde_smgr_id = smgr_register(&tde_smgr, 0);
// TODO: figure out how this part should work in a real extension
storage_manager_id = tde_smgr_id;
}
#else
void RegisterStorageMgr()
{
}
#endif /* PERCONA_FORK */

@ -52,8 +52,6 @@ pg_tde_xact_callback(XactEvent event, void *arg)
{ {
pending_delete_cleanup(); pending_delete_cleanup();
} }
pg_tde_cleanup_path_vars();
} }
void void

@ -1,13 +0,0 @@
CREATE EXTENSION pg_tde;
-- server restart
CREATE TABLE test_enc(id SERIAL,k INTEGER,PRIMARY KEY (id)) USING pg_tde;
INSERT INTO test_enc (k) VALUES (5),(6);
SELECT * FROM test_enc ORDER BY id ASC;
1|5
2|6
-- server restart
SELECT * FROM test_enc ORDER BY id ASC;
1|5
2|6
DROP TABLE test_enc;
DROP EXTENSION pg_tde;
Loading…
Cancel
Save