TDE TupleTableSlot for storing decrypted tuple along with the buffer … (#197)

* TDE TupleTableSlot for storing decrypted tuple along with the buffer tuple

Tuple data in the shared buffer is encrypted. To store the tuple in the
tupleTableslot, the tuple data is decrypted into allocated memory. This memory
needs to be properly cleaned up. However, with the existing
BufferHeapTupleTableSlot, there is no way to free this memory until the end of
the current query executor cycle.

To address this, the commit introduces TDEBufferHeapTupleTableSlot, a clone of
BufferHeapTupleTableSlot that keeps a reference to the allocated decrypted tuple
and frees it when the tuple slot is cleared. Most of the code is borrowed from
the BufferHeapTupleTableSlot implementation, ensuring that
TDEBufferHeapTupleTableSlot can be cast to BufferHeapTupleTableSlot

Apart from the above, a workaround to clear the decrypted tuple pointer
is added to the TDEBufferHeapTupleTableSlot for cases when the
slot is reused while the previously decrypted tuple was cleared out by
MemoryContext deletion, instead of through the slot cleanup callback.
pull/209/head
Muhammad Usama 1 year ago committed by GitHub
parent 2e2053ee60
commit fefe7b7dea
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
  1. 2
      Makefile.in
  2. 89
      expected/change_access_method.out
  3. 2
      meson.build
  4. 43
      sql/change_access_method.sql
  5. 552
      src/access/pg_tde_slot.c
  6. 3
      src/access/pg_tdeam.c
  7. 27
      src/access/pg_tdeam_handler.c
  8. 42
      src/encryption/enc_tde.c
  9. 50
      src/include/access/pg_tde_slot.h
  10. 6
      src/include/encryption/enc_tde.h

@ -15,6 +15,7 @@ pgtde_is_encrypted \
test_issue_153_fix \
multi_insert \
trigger_on_view \
change_access_method \
insert_update_delete \
keyprovider_dependency \
vault_v2_test
@ -22,6 +23,7 @@ TAP_TESTS = 1
OBJS = src/encryption/enc_tde.o \
src/encryption/enc_aes.o \
src/access/pg_tde_slot.o \
src/access/pg_tde_io.o \
src/access/pg_tdeam_visibility.o \
src/access/pg_tde_tdemap.o \

@ -0,0 +1,89 @@
CREATE EXTENSION pg_tde;
SELECT pg_tde_add_key_provider_file('file-vault','/tmp/pg_tde_test_keyring.per');
pg_tde_add_key_provider_file
------------------------------
1
(1 row)
SELECT pg_tde_set_master_key('test-db-master-key','file-vault');
pg_tde_set_master_key
-----------------------
t
(1 row)
CREATE TABLE country_table (
country_id serial primary key,
country_name text unique not null,
continent text not null
) using pg_tde;
INSERT INTO country_table (country_name, continent)
VALUES ('Japan', 'Asia'),
('UK', 'Europe'),
('USA', 'North America');
SELECT * FROM country_table;
country_id | country_name | continent
------------+--------------+---------------
1 | Japan | Asia
2 | UK | Europe
3 | USA | North America
(3 rows)
SELECT pgtde_is_encrypted('country_table');
pgtde_is_encrypted
--------------------
t
(1 row)
-- Try changing the encrypted table to an unencrypted table
ALTER TABLE country_table SET access method heap;
-- Insert some more data
INSERT INTO country_table (country_name, continent)
VALUES ('France', 'Europe'),
('Germany', 'Europe'),
('Canada', 'North America');
SELECT * FROM country_table;
country_id | country_name | continent
------------+--------------+---------------
1 | Japan | Asia
2 | UK | Europe
3 | USA | North America
4 | France | Europe
5 | Germany | Europe
6 | Canada | North America
(6 rows)
SELECT pgtde_is_encrypted('country_table');
pgtde_is_encrypted
--------------------
f
(1 row)
-- Change it back to encrypted
ALTER TABLE country_table SET access method pg_tde;
INSERT INTO country_table (country_name, continent)
VALUES ('China', 'Asia'),
('Brazil', 'South America'),
('Australia', 'Oceania');
SELECT * FROM country_table;
country_id | country_name | continent
------------+--------------+---------------
1 | Japan | Asia
2 | UK | Europe
3 | USA | North America
4 | France | Europe
5 | Germany | Europe
6 | Canada | North America
7 | China | Asia
8 | Brazil | South America
9 | Australia | Oceania
(9 rows)
SELECT pgtde_is_encrypted('country_table');
pgtde_is_encrypted
--------------------
t
(1 row)
DROP TABLE country_table;
DROP EXTENSION pg_tde;

@ -17,6 +17,7 @@ pg_tde_sources = files(
'src/pg_tde.c',
'src/transam/pg_tde_xact_handler.c',
'src/access/pg_tde_tdemap.c',
'src/access/pg_tde_slot.c',
'src/access/pg_tdeam.c',
'src/access/pg_tdeam_handler.c',
'src/access/pg_tdeam_visibility.c',
@ -89,6 +90,7 @@ tests += {
'multi_insert',
'keyprovider_dependency',
'trigger_on_view',
'change_access_method',
'insert_update_delete',
'vault_v2_test',
],

@ -0,0 +1,43 @@
CREATE EXTENSION pg_tde;
SELECT pg_tde_add_key_provider_file('file-vault','/tmp/pg_tde_test_keyring.per');
SELECT pg_tde_set_master_key('test-db-master-key','file-vault');
CREATE TABLE country_table (
country_id serial primary key,
country_name text unique not null,
continent text not null
) using pg_tde;
INSERT INTO country_table (country_name, continent)
VALUES ('Japan', 'Asia'),
('UK', 'Europe'),
('USA', 'North America');
SELECT * FROM country_table;
SELECT pgtde_is_encrypted('country_table');
-- Try changing the encrypted table to an unencrypted table
ALTER TABLE country_table SET access method heap;
-- Insert some more data
INSERT INTO country_table (country_name, continent)
VALUES ('France', 'Europe'),
('Germany', 'Europe'),
('Canada', 'North America');
SELECT * FROM country_table;
SELECT pgtde_is_encrypted('country_table');
-- Change it back to encrypted
ALTER TABLE country_table SET access method pg_tde;
INSERT INTO country_table (country_name, continent)
VALUES ('China', 'Asia'),
('Brazil', 'South America'),
('Australia', 'Oceania');
SELECT * FROM country_table;
SELECT pgtde_is_encrypted('country_table');
DROP TABLE country_table;
DROP EXTENSION pg_tde;

@ -0,0 +1,552 @@
/*-------------------------------------------------------------------------
*
* pg_tdeam.c
* pg_tde TupleTableSlot implementation code
*
* Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
* Portions Copyright (c) 2024, Percona
*
*
* IDENTIFICATION
* contrib/pg_tde/access/pg_tde_slot.c
*
*
*/
#include "postgres.h"
#include "access/pg_tde_slot.h"
#include "access/heaptoast.h"
#include "access/htup_details.h"
#include "access/tupdesc_details.h"
#include "catalog/pg_type.h"
#include "funcapi.h"
#include "nodes/nodeFuncs.h"
#include "storage/bufmgr.h"
#include "utils/builtins.h"
#include "utils/expandeddatum.h"
#include "utils/lsyscache.h"
#include "utils/typcache.h"
#include "encryption/enc_tde.h"
/*
* TTSOpsTDEBufferHeapTuple is effectively the same as TTSOpsBufferHeapTuple slot.
* The only difference is that it keeps the reference of the decrypted tuple
* and free it during clear slot operation
*/
const TupleTableSlotOps TTSOpsTDEBufferHeapTuple;
static pg_attribute_always_inline void pg_tde_slot_deform_heap_tuple(TupleTableSlot *slot, HeapTuple tuple, uint32 *offp, int natts);
static inline void pg_tde_tts_buffer_heap_store_tuple(TupleTableSlot *slot,
HeapTuple tuple,
Buffer buffer,
bool transfer_pin);
static void
pg_tde_tts_buffer_heap_init(TupleTableSlot *slot)
{
TDEBufferHeapTupleTableSlot *bslot = (TDEBufferHeapTupleTableSlot *) slot;
bslot->decrypted_tuple = NULL;
}
static void
pg_tde_tts_buffer_heap_release(TupleTableSlot *slot)
{
}
static void
pg_tde_tts_buffer_heap_clear(TupleTableSlot *slot)
{
TDEBufferHeapTupleTableSlot *bslot = (TDEBufferHeapTupleTableSlot *) slot;
if (bslot->decrypted_tuple)
heap_freetuple(bslot->decrypted_tuple);
bslot->decrypted_tuple = NULL;
/*
* Free the memory for heap tuple if allowed. A tuple coming from buffer
* can never be freed. But we may have materialized a tuple from buffer.
* Such a tuple can be freed.
*/
if (TTS_SHOULDFREE(slot))
{
/* We should have unpinned the buffer while materializing the tuple. */
Assert(!BufferIsValid(bslot->buffer));
heap_freetuple(bslot->base.tuple);
slot->tts_flags &= ~TTS_FLAG_SHOULDFREE;
}
if (BufferIsValid(bslot->buffer))
ReleaseBuffer(bslot->buffer);
slot->tts_nvalid = 0;
slot->tts_flags |= TTS_FLAG_EMPTY;
ItemPointerSetInvalid(&slot->tts_tid);
bslot->base.tuple = NULL;
bslot->base.off = 0;
bslot->buffer = InvalidBuffer;
}
static void
pg_tde_tts_buffer_heap_getsomeattrs(TupleTableSlot *slot, int natts)
{
BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot;
Assert(!TTS_EMPTY(slot));
pg_tde_slot_deform_heap_tuple(slot, bslot->base.tuple, &bslot->base.off, natts);
}
static Datum
pg_tde_tts_buffer_heap_getsysattr(TupleTableSlot *slot, int attnum, bool *isnull)
{
BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot;
Assert(!TTS_EMPTY(slot));
/*
* In some code paths it's possible to get here with a non-materialized
* slot, in which case we can't retrieve system columns.
*/
if (!bslot->base.tuple)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("cannot retrieve a system column in this context")));
return heap_getsysattr(bslot->base.tuple, attnum,
slot->tts_tupleDescriptor, isnull);
}
static void
pg_tde_tts_buffer_heap_materialize(TupleTableSlot *slot)
{
BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot;
MemoryContext oldContext;
Assert(!TTS_EMPTY(slot));
/* If slot has its tuple already materialized, nothing to do. */
if (TTS_SHOULDFREE(slot))
return;
oldContext = MemoryContextSwitchTo(slot->tts_mcxt);
/*
* Have to deform from scratch, otherwise tts_values[] entries could point
* into the non-materialized tuple (which might be gone when accessed).
*/
bslot->base.off = 0;
slot->tts_nvalid = 0;
if (!bslot->base.tuple)
{
/*
* Normally BufferHeapTupleTableSlot should have a tuple + buffer
* associated with it, unless it's materialized (which would've
* returned above). But when it's useful to allow storing virtual
* tuples in a buffer slot, which then also needs to be
* materializable.
*/
bslot->base.tuple = heap_form_tuple(slot->tts_tupleDescriptor,
slot->tts_values,
slot->tts_isnull);
}
else
{
bslot->base.tuple = heap_copytuple(bslot->base.tuple);
/*
* A heap tuple stored in a BufferHeapTupleTableSlot should have a
* buffer associated with it, unless it's materialized or virtual.
*/
if (likely(BufferIsValid(bslot->buffer)))
ReleaseBuffer(bslot->buffer);
bslot->buffer = InvalidBuffer;
}
/*
* We don't set TTS_FLAG_SHOULDFREE until after releasing the buffer, if
* any. This avoids having a transient state that would fall foul of our
* assertions that a slot with TTS_FLAG_SHOULDFREE doesn't own a buffer.
* In the unlikely event that ReleaseBuffer() above errors out, we'd
* effectively leak the copied tuple, but that seems fairly harmless.
*/
slot->tts_flags |= TTS_FLAG_SHOULDFREE;
MemoryContextSwitchTo(oldContext);
}
static void
pg_tde_tts_buffer_heap_copyslot(TupleTableSlot *dstslot, TupleTableSlot *srcslot)
{
BufferHeapTupleTableSlot *bsrcslot = (BufferHeapTupleTableSlot *) srcslot;
BufferHeapTupleTableSlot *bdstslot = (BufferHeapTupleTableSlot *) dstslot;
/*
* If the source slot is of a different kind, or is a buffer slot that has
* been materialized / is virtual, make a new copy of the tuple. Otherwise
* make a new reference to the in-buffer tuple.
*/
if (dstslot->tts_ops != srcslot->tts_ops ||
TTS_SHOULDFREE(srcslot) ||
!bsrcslot->base.tuple)
{
MemoryContext oldContext;
ExecClearTuple(dstslot);
dstslot->tts_flags &= ~TTS_FLAG_EMPTY;
oldContext = MemoryContextSwitchTo(dstslot->tts_mcxt);
bdstslot->base.tuple = ExecCopySlotHeapTuple(srcslot);
dstslot->tts_flags |= TTS_FLAG_SHOULDFREE;
MemoryContextSwitchTo(oldContext);
}
else
{
Assert(BufferIsValid(bsrcslot->buffer));
pg_tde_tts_buffer_heap_store_tuple(dstslot, bsrcslot->base.tuple,
bsrcslot->buffer, false);
/*
* The HeapTupleData portion of the source tuple might be shorter
* lived than the destination slot. Therefore copy the HeapTuple into
* our slot's tupdata, which is guaranteed to live long enough (but
* will still point into the buffer).
*/
memcpy(&bdstslot->base.tupdata, bdstslot->base.tuple, sizeof(HeapTupleData));
bdstslot->base.tuple = &bdstslot->base.tupdata;
}
}
static HeapTuple
pg_tde_tts_buffer_heap_get_heap_tuple(TupleTableSlot *slot)
{
BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot;
Assert(!TTS_EMPTY(slot));
if (!bslot->base.tuple)
pg_tde_tts_buffer_heap_materialize(slot);
return bslot->base.tuple;
}
static HeapTuple
pg_tde_tts_buffer_heap_copy_heap_tuple(TupleTableSlot *slot)
{
BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot;
Assert(!TTS_EMPTY(slot));
if (!bslot->base.tuple)
pg_tde_tts_buffer_heap_materialize(slot);
return heap_copytuple(bslot->base.tuple);
}
static MinimalTuple
pg_tde_tts_buffer_heap_copy_minimal_tuple(TupleTableSlot *slot)
{
BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot;
Assert(!TTS_EMPTY(slot));
if (!bslot->base.tuple)
pg_tde_tts_buffer_heap_materialize(slot);
return minimal_tuple_from_heap_tuple(bslot->base.tuple);
}
static inline void
pg_tde_tts_buffer_heap_store_tuple(TupleTableSlot *slot, HeapTuple tuple,
Buffer buffer, bool transfer_pin)
{
BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot;
if (TTS_SHOULDFREE(slot))
{
/* materialized slot shouldn't have a buffer to release */
Assert(!BufferIsValid(bslot->buffer));
heap_freetuple(bslot->base.tuple);
slot->tts_flags &= ~TTS_FLAG_SHOULDFREE;
}
slot->tts_flags &= ~TTS_FLAG_EMPTY;
slot->tts_nvalid = 0;
bslot->base.tuple = tuple;
bslot->base.off = 0;
slot->tts_tid = tuple->t_self;
/*
* If tuple is on a disk page, keep the page pinned as long as we hold a
* pointer into it. We assume the caller already has such a pin. If
* transfer_pin is true, we'll transfer that pin to this slot, if not
* we'll pin it again ourselves.
*
* This is coded to optimize the case where the slot previously held a
* tuple on the same disk page: in that case releasing and re-acquiring
* the pin is a waste of cycles. This is a common situation during
* seqscans, so it's worth troubling over.
*/
if (bslot->buffer != buffer)
{
if (BufferIsValid(bslot->buffer))
ReleaseBuffer(bslot->buffer);
bslot->buffer = buffer;
if (!transfer_pin && BufferIsValid(buffer))
IncrBufferRefCount(buffer);
}
else if (transfer_pin && BufferIsValid(buffer))
{
/*
* In transfer_pin mode the caller won't know about the same-page
* optimization, so we gotta release its pin.
*/
ReleaseBuffer(buffer);
}
}
/*
* slot_deform_heap_tuple
* Given a TupleTableSlot, extract data from the slot's physical tuple
* into its Datum/isnull arrays. Data is extracted up through the
* natts'th column (caller must ensure this is a legal column number).
*
* This is essentially an incremental version of heap_deform_tuple:
* on each call we extract attributes up to the one needed, without
* re-computing information about previously extracted attributes.
* slot->tts_nvalid is the number of attributes already extracted.
*
* This is marked as always inline, so the different offp for different types
* of slots gets optimized away.
*/
static pg_attribute_always_inline void
pg_tde_slot_deform_heap_tuple(TupleTableSlot *slot, HeapTuple tuple, uint32 *offp,
int natts)
{
TupleDesc tupleDesc = slot->tts_tupleDescriptor;
Datum *values = slot->tts_values;
bool *isnull = slot->tts_isnull;
HeapTupleHeader tup = tuple->t_data;
bool hasnulls = HeapTupleHasNulls(tuple);
int attnum;
char *tp; /* ptr to tuple data */
uint32 off; /* offset in tuple data */
bits8 *bp = tup->t_bits; /* ptr to null bitmap in tuple */
bool slow; /* can we use/set attcacheoff? */
/* We can only fetch as many attributes as the tuple has. */
natts = Min(HeapTupleHeaderGetNatts(tuple->t_data), natts);
/*
* Check whether the first call for this tuple, and initialize or restore
* loop state.
*/
attnum = slot->tts_nvalid;
if (attnum == 0)
{
/* Start from the first attribute */
off = 0;
slow = false;
}
else
{
/* Restore state from previous execution */
off = *offp;
slow = TTS_SLOW(slot);
}
tp = (char *) tup + tup->t_hoff;
for (; attnum < natts; attnum++)
{
Form_pg_attribute thisatt = TupleDescAttr(tupleDesc, attnum);
if (hasnulls && att_isnull(attnum, bp))
{
values[attnum] = (Datum) 0;
isnull[attnum] = true;
slow = true; /* can't use attcacheoff anymore */
continue;
}
isnull[attnum] = false;
if (!slow && thisatt->attcacheoff >= 0)
off = thisatt->attcacheoff;
else if (thisatt->attlen == -1)
{
/*
* We can only cache the offset for a varlena attribute if the
* offset is already suitably aligned, so that there would be no
* pad bytes in any case: then the offset will be valid for either
* an aligned or unaligned value.
*/
if (!slow &&
off == att_align_nominal(off, thisatt->attalign))
thisatt->attcacheoff = off;
else
{
off = att_align_pointer(off, thisatt->attalign, -1,
tp + off);
slow = true;
}
}
else
{
/* not varlena, so safe to use att_align_nominal */
off = att_align_nominal(off, thisatt->attalign);
if (!slow)
thisatt->attcacheoff = off;
}
values[attnum] = fetchatt(thisatt, tp + off);
off = att_addlength_pointer(off, thisatt->attlen, tp + off);
if (thisatt->attlen <= 0)
slow = true; /* can't use attcacheoff anymore */
}
/*
* Save state for next execution
*/
slot->tts_nvalid = attnum;
*offp = off;
if (slow)
slot->tts_flags |= TTS_FLAG_SLOW;
else
slot->tts_flags &= ~TTS_FLAG_SLOW;
}
const TupleTableSlotOps TTSOpsTDEBufferHeapTuple = {
.base_slot_size = sizeof(TDEBufferHeapTupleTableSlot),
.init = pg_tde_tts_buffer_heap_init,
.release = pg_tde_tts_buffer_heap_release,
.clear = pg_tde_tts_buffer_heap_clear,
.getsomeattrs = pg_tde_tts_buffer_heap_getsomeattrs,
.getsysattr = pg_tde_tts_buffer_heap_getsysattr,
.materialize = pg_tde_tts_buffer_heap_materialize,
.copyslot = pg_tde_tts_buffer_heap_copyslot,
.get_heap_tuple = pg_tde_tts_buffer_heap_get_heap_tuple,
/* A buffer heap tuple table slot can not "own" a minimal tuple. */
.get_minimal_tuple = NULL,
.copy_heap_tuple = pg_tde_tts_buffer_heap_copy_heap_tuple,
.copy_minimal_tuple = pg_tde_tts_buffer_heap_copy_minimal_tuple};
/* --------------------------------
* ExecStoreBufferHeapTuple
*
* This function is used to store an on-disk physical tuple from a buffer
* into a specified slot in the tuple table.
*
* tuple: tuple to store
* slot: TTSOpsBufferHeapTuple type slot to store it in
* buffer: disk buffer if tuple is in a disk page, else InvalidBuffer
*
* The tuple table code acquires a pin on the buffer which is held until the
* slot is cleared, so that the tuple won't go away on us.
*
* Return value is just the passed-in slot pointer.
*
* If the target slot is not guaranteed to be TTSOpsBufferHeapTuple type slot,
* use the, more expensive, ExecForceStoreHeapTuple().
* --------------------------------
*/
TupleTableSlot *
PGTdeExecStoreBufferHeapTuple(Relation rel,
HeapTuple tuple,
TupleTableSlot *slot,
Buffer buffer)
{
TDEBufferHeapTupleTableSlot *bslot = (TDEBufferHeapTupleTableSlot *)slot;
/*
* sanity checks
*/
Assert(rel != NULL);
Assert(tuple != NULL);
Assert(slot != NULL);
Assert(slot->tts_tupleDescriptor != NULL);
Assert(BufferIsValid(buffer));
if (unlikely(!TTS_IS_TDE_BUFFERTUPLE(slot)))
elog(ERROR, "trying to store an on-disk heap tuple into wrong type of slot");
if (rel->rd_rel->relkind != RELKIND_TOASTVALUE)
{
RelKeyData *key = GetRelationKey(rel->rd_locator);
bslot->decrypted_tuple = heap_copytuple(tuple);
PG_TDE_DECRYPT_TUPLE_EX(tuple, bslot->decrypted_tuple, key, "ExecStoreBuffer");
/* TODO: revisit this */
tuple->t_data = bslot->decrypted_tuple->t_data;
}
else
bslot->decrypted_tuple = NULL;
pg_tde_tts_buffer_heap_store_tuple(slot, tuple, buffer, false);
slot->tts_tableOid = tuple->t_tableOid;
return slot;
}
/*
* Like ExecStoreBufferHeapTuple, but transfer an existing pin from the caller
* to the slot, i.e. the caller doesn't need to, and may not, release the pin.
*/
TupleTableSlot *
PGTdeExecStorePinnedBufferHeapTuple(Relation rel,
HeapTuple tuple,
TupleTableSlot *slot,
Buffer buffer)
{
TDEBufferHeapTupleTableSlot *bslot = (TDEBufferHeapTupleTableSlot *)slot;
/*
* sanity checks
*/
Assert(rel != NULL);
Assert(tuple != NULL);
Assert(slot != NULL);
Assert(slot->tts_tupleDescriptor != NULL);
Assert(BufferIsValid(buffer));
if (unlikely(!TTS_IS_TDE_BUFFERTUPLE(slot)))
elog(ERROR, "trying to store an on-disk heap tuple into wrong type of slot");
if (rel->rd_rel->relkind != RELKIND_TOASTVALUE)
{
RelKeyData *key = GetRelationKey(rel->rd_locator);
bslot->decrypted_tuple = heap_copytuple(tuple);
PG_TDE_DECRYPT_TUPLE_EX(tuple, bslot->decrypted_tuple, key, "ExecStorePinnedBuffer");
/* TODO: revisit this */
tuple->t_data = bslot->decrypted_tuple->t_data;
}
else
bslot->decrypted_tuple = NULL;
pg_tde_tts_buffer_heap_store_tuple(slot, tuple, buffer, true);
slot->tts_tableOid = tuple->t_tableOid;
return slot;
}
/*
* Hack function to remove the decrypted tuple from the slot.
* This can be used when the memory context containing
* the decrypted tuple is already deleted, and we need to ensure
* that slot cleanup does not try to free the decrypted tuple.
*/
void
TdeSlotForgetDecryptedTuple(TupleTableSlot *slot)
{
TDEBufferHeapTupleTableSlot *bslot = (TDEBufferHeapTupleTableSlot *) slot;
bslot->decrypted_tuple = NULL;
}

@ -39,6 +39,7 @@
#include "access/pg_tdetoast.h"
#include "access/pg_tde_io.h"
#include "access/pg_tde_visibilitymap.h"
#include "access/pg_tde_slot.h"
#include "encryption/enc_tde.h"
#include "access/bufmask.h"
@ -1157,6 +1158,7 @@ pg_tde_getnextslot(TableScanDesc sscan, ScanDirection direction, TupleTableSlot
if (scan->rs_ctup.t_data == NULL)
{
TdeSlotForgetDecryptedTuple(slot);
ExecClearTuple(slot);
return false;
}
@ -1264,6 +1266,7 @@ pg_tde_getnextslot_tidrange(TableScanDesc sscan, ScanDirection direction,
if (scan->rs_ctup.t_data == NULL)
{
TdeSlotForgetDecryptedTuple(slot);
ExecClearTuple(slot);
return false;
}

@ -22,6 +22,8 @@
#include "postgres.h"
#include "access/pg_tde_slot.h"
#include "access/pg_tdeam.h"
#include "access/pg_tdetoast.h"
#include "access/pg_tde_rewrite.h"
@ -76,7 +78,7 @@ static const TableAmRoutine pg_tdeam_methods;
static const TupleTableSlotOps *
pg_tdeam_slot_callbacks(Relation relation)
{
return &TTSOpsBufferHeapTuple;
return &TTSOpsTDEBufferHeapTuple;
}
@ -129,7 +131,7 @@ pg_tdeam_index_fetch_tuple(struct IndexFetchTableData *scan,
BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot;
bool got_pg_tde_tuple;
Assert(TTS_IS_BUFFERTUPLE(slot));
Assert(TTS_IS_TDE_BUFFERTUPLE(slot));
/* We can skip the buffer-switching logic if we're in mid-HOT chain. */
if (!*call_again)
@ -195,7 +197,7 @@ pg_tdeam_fetch_row_version(Relation relation,
BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot;
Buffer buffer;
Assert(TTS_IS_BUFFERTUPLE(slot));
Assert(TTS_IS_TDE_BUFFERTUPLE(slot));
bslot->base.tupdata.t_self = *tid;
if (pg_tde_fetch(relation, snapshot, &bslot->base.tupdata, &buffer, false))
@ -226,7 +228,7 @@ pg_tdeam_tuple_satisfies_snapshot(Relation rel, TupleTableSlot *slot,
BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot;
bool res;
Assert(TTS_IS_BUFFERTUPLE(slot));
Assert(TTS_IS_TDE_BUFFERTUPLE(slot));
Assert(BufferIsValid(bslot->buffer));
/*
@ -380,7 +382,7 @@ pg_tdeam_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot,
follow_updates = (flags & TUPLE_LOCK_FLAG_LOCK_UPDATE_IN_PROGRESS) != 0;
tmfd->traversed = false;
Assert(TTS_IS_BUFFERTUPLE(slot));
Assert(TTS_IS_TDE_BUFFERTUPLE(slot));
tuple_lock_retry:
tuple->t_self = *tid;
@ -1054,7 +1056,7 @@ pg_tdeam_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin,
OffsetNumber maxoffset;
BufferHeapTupleTableSlot *hslot;
Assert(TTS_IS_BUFFERTUPLE(slot));
Assert(TTS_IS_TDE_BUFFERTUPLE(slot));
hslot = (BufferHeapTupleTableSlot *) slot;
targpage = BufferGetPage(hscan->rs_cbuf);
@ -1180,7 +1182,7 @@ pg_tdeam_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin,
/* Now release the lock and pin on the page */
UnlockReleaseBuffer(hscan->rs_cbuf);
hscan->rs_cbuf = InvalidBuffer;
TdeSlotForgetDecryptedTuple(slot);
/* also prevent old slot contents from having pin on page */
ExecClearTuple(slot);
@ -2452,7 +2454,16 @@ pg_tdeam_scan_sample_next_tuple(TableScanDesc scan, SampleScanState *scanstate,
*/
if (!pagemode)
LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK);
/*
* Hack:
* The issue is that, The previous call that would have used the same
* TupleTableSlot would have just deleted the memory context for the slot
* and refrained from calling the clear slot function. So, the slot would
* have the non NULL pointer to the decrypted tuple which is now invalid.
* So, we need to explicitly clear the decrypted tuple pointer before
* calling the clear slot function.
*/
TdeSlotForgetDecryptedTuple(slot);
ExecClearTuple(slot);
return false;
}

@ -3,6 +3,7 @@
#include "postgres.h"
#include "utils/memutils.h"
#include "access/pg_tde_slot.h"
#include "access/pg_tde_tdemap.h"
#include "encryption/enc_tde.h"
#include "encryption/enc_aes.h"
@ -223,47 +224,6 @@ PGTdePageAddItemExtended(RelFileLocator rel,
return off;
}
TupleTableSlot *
PGTdeExecStoreBufferHeapTuple(Relation rel, HeapTuple tuple, TupleTableSlot *slot, Buffer buffer)
{
if (rel->rd_rel->relkind != RELKIND_TOASTVALUE)
{
MemoryContext oldContext;
HeapTuple decrypted_tuple;
RelKeyData *key = GetRelationKey(rel->rd_locator);
oldContext = MemoryContextSwitchTo(slot->tts_mcxt);
decrypted_tuple = heap_copytuple(tuple);
MemoryContextSwitchTo(oldContext);
PG_TDE_DECRYPT_TUPLE_EX(tuple, decrypted_tuple, key, "ExecStoreBuffer");
/* TODO: revisit this */
tuple->t_data = decrypted_tuple->t_data;
}
return ExecStoreBufferHeapTuple(tuple, slot, buffer);
}
TupleTableSlot *
PGTdeExecStorePinnedBufferHeapTuple(Relation rel, HeapTuple tuple, TupleTableSlot *slot, Buffer buffer)
{
if (rel->rd_rel->relkind != RELKIND_TOASTVALUE)
{
MemoryContext oldContext;
HeapTuple decrypted_tuple;
RelKeyData *key = GetRelationKey(rel->rd_locator);
oldContext = MemoryContextSwitchTo(slot->tts_mcxt);
decrypted_tuple = heap_copytuple(tuple);
MemoryContextSwitchTo(oldContext);
PG_TDE_DECRYPT_TUPLE_EX(tuple, decrypted_tuple, key, "ExecStoreBuffer");
/* TODO: revisit this */
tuple->t_data = decrypted_tuple->t_data;
}
return ExecStorePinnedBufferHeapTuple(tuple, slot, buffer);
}
/*
* Provide a simple interface to encrypt a given key.
*

@ -0,0 +1,50 @@
/*-------------------------------------------------------------------------
*
* pg_tde_slot.h
* TupleSlot implementation for TDE
*
* src/include/access/pg_tde_slot.h
*
*-------------------------------------------------------------------------
*/
#ifndef PG_TDE_SLOT_H
#define PG_TDE_SLOT_H
#include "postgres.h"
#include "executor/tuptable.h"
#include "utils/relcache.h"
/* heap tuple residing in a buffer */
typedef struct TDEBufferHeapTupleTableSlot
{
pg_node_attr(abstract)
HeapTupleTableSlot base;
/*
* If buffer is not InvalidBuffer, then the slot is holding a pin on the
* indicated buffer page; drop the pin when we release the slot's
* reference to that buffer. (TTS_FLAG_SHOULDFREE should not be set in
* such a case, since presumably base.tuple is pointing into the buffer.)
*/
Buffer buffer; /* tuple's buffer, or InvalidBuffer */
HeapTuple decrypted_tuple; /* decrypted tuple */
} TDEBufferHeapTupleTableSlot;
extern PGDLLIMPORT const TupleTableSlotOps TTSOpsTDEBufferHeapTuple;
#define TTS_IS_TDE_BUFFERTUPLE(slot) ((slot)->tts_ops == &TTSOpsTDEBufferHeapTuple)
extern TupleTableSlot *PGTdeExecStorePinnedBufferHeapTuple(Relation rel,
HeapTuple tuple,
TupleTableSlot *slot,
Buffer buffer);
extern TupleTableSlot *PGTdeExecStoreBufferHeapTuple(Relation rel,
HeapTuple tuple,
TupleTableSlot *slot,
Buffer buffer);
extern void TdeSlotForgetDecryptedTuple(TupleTableSlot *slot);
#endif /* PG_TDE_SLOT_H */

@ -30,12 +30,6 @@ PGTdePageAddItemExtended(RelFileLocator rel, Oid oid, BlockNumber bn, Page page,
OffsetNumber offsetNumber,
int flags);
/* Wrapper functions for reading decrypted tuple into a given slot */
extern TupleTableSlot *
PGTdeExecStoreBufferHeapTuple(Relation rel, HeapTuple tuple, TupleTableSlot *slot, Buffer buffer);
extern TupleTableSlot *
PGTdeExecStorePinnedBufferHeapTuple(Relation rel, HeapTuple tuple, TupleTableSlot *slot, Buffer buffer);
/* Function Macros over crypt */
#define PG_TDE_ENCRYPT_DATA(_iv_prefix, _iv_prefix_len, _data, _data_len, _out, _key) \

Loading…
Cancel
Save