@ -45,7 +45,7 @@ static void *EncryptionCryptCtx = NULL;
static WalEncryptionKey EncryptionKey =
static WalEncryptionKey EncryptionKey =
{
{
. type = MAP_ENTRY_EMPTY ,
. type = MAP_ENTRY_EMPTY ,
. start_ lsn = InvalidXLogRecPtr ,
. wal_start = { . tli = 0 , . lsn = InvalidXLogRecPtr } ,
} ;
} ;
/*
/*
@ -65,7 +65,12 @@ static WalEncryptionKey EncryptionKey =
typedef struct EncryptionStateData
typedef struct EncryptionStateData
{
{
pg_atomic_uint64 enc_key_lsn ; /* to sync with readers */
/*
* To sync with readers . We sync on LSN only and TLI here just to
* communicate its value to readers .
*/
pg_atomic_uint32 enc_key_tli ;
pg_atomic_uint64 enc_key_lsn ;
} EncryptionStateData ;
} EncryptionStateData ;
static EncryptionStateData * EncryptionState = NULL ;
static EncryptionStateData * EncryptionState = NULL ;
@ -78,10 +83,24 @@ TDEXLogGetEncKeyLsn()
return ( XLogRecPtr ) pg_atomic_read_u64 ( & EncryptionState - > enc_key_lsn ) ;
return ( XLogRecPtr ) pg_atomic_read_u64 ( & EncryptionState - > enc_key_lsn ) ;
}
}
static TimeLineID
TDEXLogGetEncKeyTli ( )
{
return ( TimeLineID ) pg_atomic_read_u32 ( & EncryptionState - > enc_key_tli ) ;
}
static void
static void
TDEXLogSetEncKeyLsn ( XLogRecPtr start_lsn )
TDEXLogSetEncKeyLocation ( WalLocation loc )
{
{
pg_atomic_write_u64 ( & EncryptionState - > enc_key_lsn , start_lsn ) ;
/*
* Write TLI first and then LSN . The barrier ensures writes won ' t be
* reordered . When reading , the opposite must be done ( with a matching
* barrier in between ) , so we always see a valid TLI after observing a
* valid LSN .
*/
pg_atomic_write_u32 ( & EncryptionState - > enc_key_tli , loc . tli ) ;
pg_write_barrier ( ) ;
pg_atomic_write_u64 ( & EncryptionState - > enc_key_lsn , loc . lsn ) ;
}
}
static Size TDEXLogEncryptBuffSize ( void ) ;
static Size TDEXLogEncryptBuffSize ( void ) ;
@ -166,7 +185,8 @@ TDEXLogShmemInit(void)
typedef struct EncryptionStateData
typedef struct EncryptionStateData
{
{
XLogRecPtr enc_key_lsn ; /* to sync with reader */
TimeLineID enc_key_tli ;
XLogRecPtr enc_key_lsn ;
} EncryptionStateData ;
} EncryptionStateData ;
static EncryptionStateData EncryptionStateD = { 0 } ;
static EncryptionStateData EncryptionStateD = { 0 } ;
@ -181,10 +201,17 @@ TDEXLogGetEncKeyLsn()
return ( XLogRecPtr ) EncryptionState - > enc_key_lsn ;
return ( XLogRecPtr ) EncryptionState - > enc_key_lsn ;
}
}
static TimeLineID
TDEXLogGetEncKeyTli ( )
{
return ( TimeLineID ) EncryptionState - > enc_key_tli ;
}
static void
static void
TDEXLogSetEncKeyLsn ( XLogRecPtr start_lsn )
TDEXLogSetEncKeyLocation ( WalLocation loc )
{
{
EncryptionState - > enc_key_lsn = EncryptionKey . start_lsn ;
EncryptionState - > enc_key_tli = loc . tli ;
EncryptionState - > enc_key_lsn = loc . lsn ;
}
}
# endif /* FRONTEND */
# endif /* FRONTEND */
@ -216,7 +243,7 @@ TDEXLogSmgrInitWrite(bool encrypt_xlog)
else if ( key )
else if ( key )
{
{
EncryptionKey = * key ;
EncryptionKey = * key ;
TDEXLogSetEncKeyLs n ( EncryptionKey . start_lsn ) ;
TDEXLogSetEncKeyLocatio n ( EncryptionKey . wal_ start) ;
}
}
if ( key )
if ( key )
@ -231,7 +258,7 @@ TDEXLogSmgrInitWriteReuseKey()
if ( key )
if ( key )
{
{
EncryptionKey = * key ;
EncryptionKey = * key ;
TDEXLogSetEncKeyLs n ( EncryptionKey . start_lsn ) ;
TDEXLogSetEncKeyLocatio n ( EncryptionKey . wal_ start) ;
pfree ( key ) ;
pfree ( key ) ;
}
}
}
}
@ -252,8 +279,8 @@ TDEXLogWriteEncryptedPages(int fd, const void *buf, size_t count, off_t offset,
# endif
# endif
# ifdef TDE_XLOG_DEBUG
# ifdef TDE_XLOG_DEBUG
elog ( DEBUG1 , " write encrypted WAL, size: %lu, offset: %ld [%lX], seg: %X/%X, key_start_lsn: %X/%X " ,
elog ( DEBUG1 , " write encrypted WAL, size: %lu, offset: %ld [%lX], seg: %X/%X, key_start_lsn: %u_% X/%X " ,
count , offset , offset , LSN_FORMAT_ARGS ( segno ) , LSN_FORMAT_ARGS ( key - > start_ lsn) ) ;
count , offset , offset , LSN_FORMAT_ARGS ( segno ) , key - > wal_start . tli , LSN_FORMAT_ARGS ( key - > wal_start . lsn ) ) ;
# endif
# endif
CalcXLogPageIVPrefix ( tli , segno , key - > base_iv , iv_prefix ) ;
CalcXLogPageIVPrefix ( tli , segno , key - > base_iv , iv_prefix ) ;
@ -279,13 +306,13 @@ tdeheap_xlog_seg_write(int fd, const void *buf, size_t count, off_t offset,
*/
*/
if ( EncryptionKey . type ! = MAP_ENTRY_EMPTY & & TDEXLogGetEncKeyLsn ( ) = = 0 )
if ( EncryptionKey . type ! = MAP_ENTRY_EMPTY & & TDEXLogGetEncKeyLsn ( ) = = 0 )
{
{
XLogRecPtr lsn ;
WalLocation loc = { . tli = tli } ;
XLogSegNoOffsetToRecPtr ( segno , offset , segSize , lsn ) ;
XLogSegNoOffsetToRecPtr ( segno , offset , segSize , loc . l sn ) ;
pg_tde_wal_last_key_set_lsn ( lsn ) ;
pg_tde_wal_last_key_set_location ( loc ) ;
EncryptionKey . start_lsn = lsn ;
EncryptionKey . wal_ start = loc ;
TDEXLogSetEncKeyLsn ( lsn ) ;
TDEXLogSetEncKeyLocation ( EncryptionKey . wal_start ) ;
}
}
if ( EncryptionKey . type = = TDE_KEY_TYPE_WAL_ENCRYPTED )
if ( EncryptionKey . type = = TDE_KEY_TYPE_WAL_ENCRYPTED )
@ -304,12 +331,12 @@ tdeheap_xlog_seg_read(int fd, void *buf, size_t count, off_t offset,
ssize_t readsz ;
ssize_t readsz ;
WALKeyCacheRec * keys = pg_tde_get_wal_cache_keys ( ) ;
WALKeyCacheRec * keys = pg_tde_get_wal_cache_keys ( ) ;
XLogRecPtr write_key_lsn ;
XLogRecPtr write_key_lsn ;
XLogRecPtr data_start ;
WalLocation data_end = { . tli = tli } ;
XLogRecPtr data_end ;
WalLocation data_start = { . tli = tli } ;
# ifdef TDE_XLOG_DEBUG
# ifdef TDE_XLOG_DEBUG
elog ( DEBUG1 , " read from a WAL segment, size: %lu offset: %ld [%lX], seg: %X/%X " ,
elog ( DEBUG1 , " read from a WAL segment, size: %lu offset: %ld [%lX], seg: %u_% X/%X " ,
count , offset , offset , LSN_FORMAT_ARGS ( segno ) ) ;
count , offset , offset , tli , LSN_FORMAT_ARGS ( segno ) ) ;
# endif
# endif
readsz = pg_pread ( fd , buf , count , offset ) ;
readsz = pg_pread ( fd , buf , count , offset ) ;
@ -319,30 +346,38 @@ tdeheap_xlog_seg_read(int fd, void *buf, size_t count, off_t offset,
if ( ! keys )
if ( ! keys )
{
{
WalLocation start = { . tli = 1 , . lsn = 0 } ;
/* cache is empty, try to read keys from disk */
/* cache is empty, try to read keys from disk */
keys = pg_tde_fetch_wal_keys ( InvalidXLogRecPtr ) ;
keys = pg_tde_fetch_wal_keys ( start ) ;
}
}
/*
* The barrier ensures that we always read a vaild TLI after the valid
* LSN . See the comment in TDEXLogSetEncKeyLocation ( )
*/
write_key_lsn = TDEXLogGetEncKeyLsn ( ) ;
write_key_lsn = TDEXLogGetEncKeyLsn ( ) ;
pg_read_barrier ( ) ;
if ( ! XLogRecPtrIsInvalid ( write_key_lsn ) )
if ( ! XLogRecPtrIsInvalid ( write_key_lsn ) )
{
{
WALKeyCacheRec * last_key = pg_tde_get_last_wal_key ( ) ;
WALKeyCacheRec * last_key = pg_tde_get_last_wal_key ( ) ;
WalLocation write_loc = { . tli = TDEXLogGetEncKeyTli ( ) , . lsn = write_key_lsn } ;
Assert ( last_key ) ;
Assert ( last_key ) ;
/* write has generated a new key, need to fetch it */
/* write has generated a new key, need to fetch it */
if ( last_key - > start_lsn < write_key_lsn )
if ( wal_location_cmp ( last_key - > start , write_loc ) < 0 )
{
{
pg_tde_fetch_wal_keys ( write_key_lsn ) ;
pg_tde_fetch_wal_keys ( write_loc ) ;
/* in case cache was empty before */
/* in case cache was empty before */
keys = pg_tde_get_wal_cache_keys ( ) ;
keys = pg_tde_get_wal_cache_keys ( ) ;
}
}
}
}
XLogSegNoOffsetToRecPtr ( segno , offset , segSize , data_start ) ;
XLogSegNoOffsetToRecPtr ( segno , offset , segSize , data_start . lsn ) ;
XLogSegNoOffsetToRecPtr ( segno , offset + readsz , segSize , data_end ) ;
XLogSegNoOffsetToRecPtr ( segno , offset + readsz , segSize , data_end . lsn ) ;
/*
/*
* TODO : this is higly ineffective . We should get rid of linked list and
* TODO : this is higly ineffective . We should get rid of linked list and
@ -351,24 +386,24 @@ tdeheap_xlog_seg_read(int fd, void *buf, size_t count, off_t offset,
for ( WALKeyCacheRec * curr_key = keys ; curr_key ! = NULL ; curr_key = curr_key - > next )
for ( WALKeyCacheRec * curr_key = keys ; curr_key ! = NULL ; curr_key = curr_key - > next )
{
{
# ifdef TDE_XLOG_DEBUG
# ifdef TDE_XLOG_DEBUG
elog ( DEBUG1 , " WAL key %X/%X-%X/%X, encrypted: %s " ,
elog ( DEBUG1 , " WAL key %u_% X/%X - %u_ %X/%X, encrypted: %s " ,
LSN_FORMAT_ARGS ( curr_key - > start_ lsn ) ,
curr_key - > start . tli , LSN_FORMAT_ARGS ( curr_key - > start . lsn ) ,
LSN_FORMAT_ARGS ( curr_key - > end_ lsn ) ,
curr_key - > end . tli , LSN_FORMAT_ARGS ( curr_key - > end . lsn ) ,
curr_key - > key . type = = TDE_KEY_TYPE_WAL_ENCRYPTED ? " yes " : " no " ) ;
curr_key - > key . type = = TDE_KEY_TYPE_WAL_ENCRYPTED ? " yes " : " no " ) ;
# endif
# endif
if ( curr_key - > key . start_lsn ! = InvalidXLogRecPtr & &
if ( wal_location_valid ( curr_key - > key . wal_start ) & &
curr_key - > key . type = = TDE_KEY_TYPE_WAL_ENCRYPTED )
curr_key - > key . type = = TDE_KEY_TYPE_WAL_ENCRYPTED )
{
{
/*
/*
* Check if the key ' s range overlaps with the buffer ' s and decypt
* Check if the key ' s range overlaps with the buffer ' s and decypt
* the part that does .
* the part that does .
*/
*/
if ( data_start < curr_key - > end_lsn & & data_end > curr_key - > start_lsn )
if ( wal_location_cmp ( data_start , curr_key - > end ) < 0 & & wal_location_cmp ( data_end , curr_key - > start ) > 0 )
{
{
char iv_prefix [ 16 ] ;
char iv_prefix [ 16 ] ;
off_t dec_off = XLogSegmentOffset ( Max ( data_start , curr_key - > start_ lsn ) , segSize ) ;
off_t dec_off = XLogSegmentOffset ( Max ( data_start . lsn , curr_key - > start . lsn ) , segSize ) ;
off_t dec_end = XLogSegmentOffset ( Min ( data_end , curr_key - > end_ lsn ) , segSize ) ;
off_t dec_end = XLogSegmentOffset ( Min ( data_end . lsn , curr_key - > end . lsn ) , segSize ) ;
size_t dec_sz ;
size_t dec_sz ;
char * dec_buf = ( char * ) buf + ( dec_off - offset ) ;
char * dec_buf = ( char * ) buf + ( dec_off - offset ) ;
@ -385,8 +420,8 @@ tdeheap_xlog_seg_read(int fd, void *buf, size_t count, off_t offset,
dec_sz = dec_end - dec_off ;
dec_sz = dec_end - dec_off ;
# ifdef TDE_XLOG_DEBUG
# ifdef TDE_XLOG_DEBUG
elog ( DEBUG1 , " decrypt WAL, dec_off: %lu [buff_off %lu], sz: %lu | key %X/%X " ,
elog ( DEBUG1 , " decrypt WAL, dec_off: %lu [buff_off %lu], sz: %lu | key %u_% X/%X " ,
dec_off , dec_off - offset , dec_sz , LSN_FORMAT_ARGS ( curr_key - > key - > start_ lsn) ) ;
dec_off , dec_off - offset , dec_sz , curr_key - > key . wal_start . tli , LSN_FORMAT_ARGS ( curr_key - > key . wal_start . lsn ) ) ;
# endif
# endif
pg_tde_stream_crypt ( iv_prefix ,
pg_tde_stream_crypt ( iv_prefix ,
dec_off ,
dec_off ,