Add zstd compression support for TOAST with extended header format

pull/256/head
Dharin Shah 4 days ago
parent 630a93799d
commit 3f0ca998e7
  1. 172
      src/backend/access/common/detoast.c
  2. 199
      src/backend/access/common/toast_compression.c
  3. 169
      src/backend/access/common/toast_internals.c
  4. 38
      src/backend/replication/logical/reorderbuffer.c
  5. 4
      src/backend/utils/adt/varlena.c
  6. 6
      src/backend/utils/misc/guc_parameters.dat
  7. 3
      src/backend/utils/misc/guc_tables.c
  8. 41
      src/include/access/detoast.h
  9. 47
      src/include/access/toast_compression.h
  10. 4
      src/include/access/toast_internals.h
  11. 152
      src/include/varatt.h
  12. 20
      src/test/modules/test_toast_ext/Makefile
  13. 40
      src/test/modules/test_toast_ext/expected/test_toast_ext.out
  14. 11
      src/test/modules/test_toast_ext/sql/test_toast_ext.sql
  15. 19
      src/test/modules/test_toast_ext/test_toast_ext--1.0.sql
  16. 200
      src/test/modules/test_toast_ext/test_toast_ext.c
  17. 5
      src/test/modules/test_toast_ext/test_toast_ext.control

@ -16,6 +16,7 @@
#include "access/detoast.h"
#include "access/table.h"
#include "access/tableam.h"
#include "access/toast_compression.h"
#include "access/toast_internals.h"
#include "common/int.h"
#include "common/pg_lzcompress.h"
@ -225,12 +226,47 @@ detoast_attr_slice(struct varlena *attr,
if (VARATT_IS_EXTERNAL_ONDISK(attr))
{
struct varatt_external toast_pointer;
int32 max_size;
bool is_compressed;
bool is_pglz = false;
VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
/*
* Handle both legacy 16-byte and extended 20-byte on-disk TOAST
* pointers. Check the vartag to determine which format.
*/
if (VARTAG_EXTERNAL(attr) == VARTAG_ONDISK_EXTENDED)
{
struct varatt_external_extended toast_pointer_ext;
uint8 ext_method;
VARATT_EXTERNAL_GET_POINTER_EXTENDED(toast_pointer_ext, attr);
max_size = VARATT_EXTERNAL_GET_EXTSIZE_EXTENDED(toast_pointer_ext);
is_compressed = VARATT_EXTERNAL_IS_COMPRESSED_EXTENDED(toast_pointer_ext);
/* Check if this is pglz for slice optimization */
if (is_compressed &&
VARATT_EXTERNAL_HAS_FLAG(toast_pointer_ext, TOAST_EXT_FLAG_COMPRESSION))
{
ext_method = VARATT_EXTERNAL_GET_EXT_COMPRESSION_METHOD(toast_pointer_ext);
is_pglz = (ext_method == TOAST_PGLZ_EXT_METHOD);
}
}
else
{
struct varatt_external toast_pointer;
VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
max_size = VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer);
is_compressed = VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer);
/* Check if this is pglz for slice optimization */
if (is_compressed)
is_pglz = (VARATT_EXTERNAL_GET_COMPRESS_METHOD(toast_pointer) ==
TOAST_PGLZ_COMPRESSION_ID);
}
/* fast path for non-compressed external datums */
if (!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
if (!is_compressed)
return toast_fetch_datum_slice(attr, sliceoffset, slicelength);
/*
@ -240,19 +276,16 @@ detoast_attr_slice(struct varlena *attr,
*/
if (slicelimit >= 0)
{
int32 max_size = VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer);
/*
* Determine maximum amount of compressed data needed for a prefix
* of a given length (after decompression).
*
* At least for now, if it's LZ4 data, we'll have to fetch the
* whole thing, because there doesn't seem to be an API call to
* determine how much compressed data we need to be sure of being
* able to decompress the required slice.
* At least for now, if it's LZ4 or zstd data, we'll have to fetch
* the whole thing, because there doesn't seem to be an API call
* to determine how much compressed data we need to be sure of
* being able to decompress the required slice.
*/
if (VARATT_EXTERNAL_GET_COMPRESS_METHOD(toast_pointer) ==
TOAST_PGLZ_COMPRESSION_ID)
if (is_pglz)
max_size = pglz_maximum_compressed_size(slicelimit, max_size);
/*
@ -344,20 +377,42 @@ toast_fetch_datum(struct varlena *attr)
{
Relation toastrel;
struct varlena *result;
struct varatt_external toast_pointer;
int32 attrsize;
Oid toastrelid;
Oid valueid;
bool is_compressed;
if (!VARATT_IS_EXTERNAL_ONDISK(attr))
elog(ERROR, "toast_fetch_datum shouldn't be called for non-ondisk datums");
/* Must copy to access aligned fields */
VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
/*
* Handle both legacy 16-byte and extended 20-byte on-disk TOAST pointers.
* Check the vartag to determine which format we're dealing with.
*/
if (VARTAG_EXTERNAL(attr) == VARTAG_ONDISK_EXTENDED)
{
struct varatt_external_extended toast_pointer_ext;
attrsize = VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer);
VARATT_EXTERNAL_GET_POINTER_EXTENDED(toast_pointer_ext, attr);
attrsize = VARATT_EXTERNAL_GET_EXTSIZE_EXTENDED(toast_pointer_ext);
toastrelid = toast_pointer_ext.va_toastrelid;
valueid = toast_pointer_ext.va_valueid;
is_compressed = VARATT_EXTERNAL_IS_COMPRESSED_EXTENDED(toast_pointer_ext);
}
else
{
struct varatt_external toast_pointer;
VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
attrsize = VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer);
toastrelid = toast_pointer.va_toastrelid;
valueid = toast_pointer.va_valueid;
is_compressed = VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer);
}
result = (struct varlena *) palloc(attrsize + VARHDRSZ);
if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
if (is_compressed)
SET_VARSIZE_COMPRESSED(result, attrsize + VARHDRSZ);
else
SET_VARSIZE(result, attrsize + VARHDRSZ);
@ -369,10 +424,10 @@ toast_fetch_datum(struct varlena *attr)
/*
* Open the toast relation and its indexes
*/
toastrel = table_open(toast_pointer.va_toastrelid, AccessShareLock);
toastrel = table_open(toastrelid, AccessShareLock);
/* Fetch all chunks */
table_relation_fetch_toast_slice(toastrel, toast_pointer.va_valueid,
table_relation_fetch_toast_slice(toastrel, valueid,
attrsize, 0, attrsize, result);
/* Close toast table */
@ -398,23 +453,45 @@ toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset,
{
Relation toastrel;
struct varlena *result;
struct varatt_external toast_pointer;
int32 attrsize;
Oid toastrelid;
Oid valueid;
bool is_compressed;
if (!VARATT_IS_EXTERNAL_ONDISK(attr))
elog(ERROR, "toast_fetch_datum_slice shouldn't be called for non-ondisk datums");
/* Must copy to access aligned fields */
VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
/*
* Handle both legacy 16-byte and extended 20-byte on-disk TOAST pointers.
* Check the vartag to determine which format we're dealing with.
*/
if (VARTAG_EXTERNAL(attr) == VARTAG_ONDISK_EXTENDED)
{
struct varatt_external_extended toast_pointer_ext;
VARATT_EXTERNAL_GET_POINTER_EXTENDED(toast_pointer_ext, attr);
attrsize = VARATT_EXTERNAL_GET_EXTSIZE_EXTENDED(toast_pointer_ext);
toastrelid = toast_pointer_ext.va_toastrelid;
valueid = toast_pointer_ext.va_valueid;
is_compressed = VARATT_EXTERNAL_IS_COMPRESSED_EXTENDED(toast_pointer_ext);
}
else
{
struct varatt_external toast_pointer;
VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
attrsize = VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer);
toastrelid = toast_pointer.va_toastrelid;
valueid = toast_pointer.va_valueid;
is_compressed = VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer);
}
/*
* It's nonsense to fetch slices of a compressed datum unless when it's a
* prefix -- this isn't lo_* we can't return a compressed datum which is
* meaningful to toast later.
*/
Assert(!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer) || 0 == sliceoffset);
attrsize = VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer);
Assert(!is_compressed || 0 == sliceoffset);
if (sliceoffset >= attrsize)
{
@ -427,7 +504,7 @@ toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset,
* space required by va_tcinfo, which is stored at the beginning as an
* int32 value.
*/
if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer) && slicelength > 0)
if (is_compressed && slicelength > 0)
slicelength = slicelength + sizeof(int32);
/*
@ -440,7 +517,7 @@ toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset,
result = (struct varlena *) palloc(slicelength + VARHDRSZ);
if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
if (is_compressed)
SET_VARSIZE_COMPRESSED(result, slicelength + VARHDRSZ);
else
SET_VARSIZE(result, slicelength + VARHDRSZ);
@ -449,10 +526,10 @@ toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset,
return result; /* Can save a lot of work at this point! */
/* Open the toast relation */
toastrel = table_open(toast_pointer.va_toastrelid, AccessShareLock);
toastrel = table_open(toastrelid, AccessShareLock);
/* Fetch all chunks */
table_relation_fetch_toast_slice(toastrel, toast_pointer.va_valueid,
table_relation_fetch_toast_slice(toastrel, valueid,
attrsize, sliceoffset, slicelength,
result);
@ -485,6 +562,14 @@ toast_decompress_datum(struct varlena *attr)
return pglz_decompress_datum(attr);
case TOAST_LZ4_COMPRESSION_ID:
return lz4_decompress_datum(attr);
case TOAST_EXTENDED_COMPRESSION_ID:
/*
* Extended compression method. For inline compressed data,
* TOAST_EXTENDED_COMPRESSION_ID currently means zstd. Future
* extended methods for inline data would need to store the
* actual method ID in the compressed payload.
*/
return zstd_decompress_datum(attr);
default:
elog(ERROR, "invalid compression method id %d", cmid);
return NULL; /* keep compiler quiet */
@ -528,6 +613,12 @@ toast_decompress_datum_slice(struct varlena *attr, int32 slicelength)
return pglz_decompress_datum_slice(attr, slicelength);
case TOAST_LZ4_COMPRESSION_ID:
return lz4_decompress_datum_slice(attr, slicelength);
case TOAST_EXTENDED_COMPRESSION_ID:
/*
* Extended compression method. For inline compressed data,
* TOAST_EXTENDED_COMPRESSION_ID currently means zstd.
*/
return zstd_decompress_datum_slice(attr, slicelength);
default:
elog(ERROR, "invalid compression method id %d", cmid);
return NULL; /* keep compiler quiet */
@ -549,11 +640,15 @@ toast_raw_datum_size(Datum value)
if (VARATT_IS_EXTERNAL_ONDISK(attr))
{
/* va_rawsize is the size of the original datum -- including header */
struct varatt_external toast_pointer;
/*
* va_rawsize is the size of the original datum -- including header.
* It's at offset 0 in both varatt_external and varatt_external_extended,
* so we can read just the first 4 bytes regardless of format.
*/
int32 va_rawsize;
VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
result = toast_pointer.va_rawsize;
memcpy(&va_rawsize, VARDATA_EXTERNAL(attr), sizeof(va_rawsize));
result = va_rawsize;
}
else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
{
@ -609,11 +704,18 @@ toast_datum_size(Datum value)
* Attribute is stored externally - return the extsize whether
* compressed or not. We do not count the size of the toast pointer
* ... should we?
*
* va_extinfo is at offset 4 in both varatt_external and
* varatt_external_extended, so we can read the first 8 bytes
* regardless of format.
*/
struct varatt_external toast_pointer;
struct {
int32 va_rawsize;
uint32 va_extinfo;
} common;
VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
result = VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer);
memcpy(&common, VARDATA_EXTERNAL(attr), sizeof(common));
result = common.va_extinfo & VARLENA_EXTSIZE_MASK;
}
else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
{

@ -17,13 +17,19 @@
#include <lz4.h>
#endif
#ifdef USE_ZSTD
#include <zstd.h>
#endif
#include "access/detoast.h"
#include "access/toast_compression.h"
#include "common/pg_lzcompress.h"
#include "utils/memutils.h"
#include "varatt.h"
/* GUC */
int default_toast_compression = TOAST_PGLZ_COMPRESSION;
bool use_extended_toast_header = true; /* default: use new 20-byte format */
#define NO_COMPRESSION_SUPPORT(method) \
ereport(ERROR, \
@ -249,11 +255,16 @@ lz4_decompress_datum_slice(const struct varlena *value, int32 slicelength)
* Extract compression ID from a varlena.
*
* Returns TOAST_INVALID_COMPRESSION_ID if the varlena is not compressed.
*
* For external data stored in extended format (VARTAG_ONDISK_EXTENDED),
* the actual compression method is stored in va_data[0]. We map that
* back to the appropriate ToastCompressionId for legacy compatibility.
*/
ToastCompressionId
toast_get_compression_id(struct varlena *attr)
{
ToastCompressionId cmid = TOAST_INVALID_COMPRESSION_ID;
vartag_external tag;
/*
* If it is stored externally then fetch the compression method id from
@ -262,12 +273,52 @@ toast_get_compression_id(struct varlena *attr)
*/
if (VARATT_IS_EXTERNAL_ONDISK(attr))
{
struct varatt_external toast_pointer;
VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
cmid = VARATT_EXTERNAL_GET_COMPRESS_METHOD(toast_pointer);
tag = VARTAG_EXTERNAL(attr);
if (tag == VARTAG_ONDISK)
{
struct varatt_external toast_pointer;
VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
cmid = VARATT_EXTERNAL_GET_COMPRESS_METHOD(toast_pointer);
}
else
{
struct varatt_external_extended toast_pointer_ext;
uint8 ext_method;
Assert(tag == VARTAG_ONDISK_EXTENDED);
VARATT_EXTERNAL_GET_POINTER_EXTENDED(toast_pointer_ext, attr);
if (VARATT_EXTERNAL_IS_COMPRESSED_EXTENDED(toast_pointer_ext))
{
/*
* Extended format stores the actual method in va_data[0].
* Map it back to ToastCompressionId for reporting purposes.
*/
ext_method = VARATT_EXTERNAL_GET_EXT_COMPRESSION_METHOD(toast_pointer_ext);
switch (ext_method)
{
case TOAST_PGLZ_EXT_METHOD:
cmid = TOAST_PGLZ_COMPRESSION_ID;
break;
case TOAST_LZ4_EXT_METHOD:
cmid = TOAST_LZ4_COMPRESSION_ID;
break;
case TOAST_ZSTD_EXT_METHOD:
cmid = TOAST_EXTENDED_COMPRESSION_ID;
break;
case TOAST_UNCOMPRESSED_EXT_METHOD:
/* Uncompressed data in extended format */
cmid = TOAST_INVALID_COMPRESSION_ID;
break;
default:
elog(ERROR, "invalid extended compression method %d",
ext_method);
}
}
}
}
else if (VARATT_IS_COMPRESSED(attr))
cmid = VARDATA_COMPRESSED_GET_COMPRESS_METHOD(attr);
@ -275,6 +326,133 @@ toast_get_compression_id(struct varlena *attr)
return cmid;
}
/*
* Zstandard (zstd) compression/decompression for TOAST (extended methods).
*
* These routines use the same basic shape as the pglz and LZ4 helpers,
* but are only available when PostgreSQL is built with USE_ZSTD.
*/
/*
* Compress a varlena using ZSTD.
*
* Returns the compressed varlena, or NULL if compression fails or does
* not save space.
*/
static struct varlena *
zstd_compress_datum_internal(const struct varlena *value, int level)
{
#ifndef USE_ZSTD
NO_COMPRESSION_SUPPORT("zstd");
return NULL; /* keep compiler quiet */
#else
Size valsize;
Size max_size;
Size out_size;
struct varlena *tmp;
size_t rc;
valsize = VARSIZE_ANY_EXHDR(value);
/*
* Compute an upper bound for the compressed size and allocate enough
* space for the compressed payload plus the varlena header.
*/
max_size = ZSTD_compressBound(valsize);
if (max_size > (Size) (MaxAllocSize - VARHDRSZ_COMPRESSED))
ereport(ERROR,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("compressed data would exceed maximum allocation size")));
tmp = (struct varlena *) palloc(max_size + VARHDRSZ_COMPRESSED);
rc = ZSTD_compress((char *) tmp + VARHDRSZ_COMPRESSED, max_size,
VARDATA_ANY(value), valsize, level);
if (ZSTD_isError(rc))
ereport(ERROR,
(errcode(ERRCODE_DATA_CORRUPTED),
errmsg_internal("zstd compression failed: %s",
ZSTD_getErrorName(rc))));
out_size = (Size) rc;
/*
* If the compressed representation is not smaller than the original
* payload, give up and return NULL so that callers can fall back to
* storing the datum uncompressed or with a different method.
*/
if (out_size >= valsize)
{
pfree(tmp);
return NULL;
}
SET_VARSIZE_COMPRESSED(tmp, out_size + VARHDRSZ_COMPRESSED);
return tmp;
#endif /* USE_ZSTD */
}
struct varlena *
zstd_compress_datum(const struct varlena *value)
{
#ifndef USE_ZSTD
NO_COMPRESSION_SUPPORT("zstd");
return NULL; /* keep compiler quiet */
#else
return zstd_compress_datum_internal(value, ZSTD_CLEVEL_DEFAULT);
#endif
}
/*
* Decompress a varlena that was compressed using ZSTD.
*/
struct varlena *
zstd_decompress_datum(const struct varlena *value)
{
#ifndef USE_ZSTD
NO_COMPRESSION_SUPPORT("zstd");
return NULL; /* keep compiler quiet */
#else
struct varlena *result;
Size rawsize;
size_t rc;
/* allocate memory for the uncompressed data */
rawsize = VARDATA_COMPRESSED_GET_EXTSIZE(value);
result = (struct varlena *) palloc(rawsize + VARHDRSZ);
rc = ZSTD_decompress(VARDATA(result), rawsize,
(char *) value + VARHDRSZ_COMPRESSED,
VARSIZE(value) - VARHDRSZ_COMPRESSED);
if (ZSTD_isError(rc) || rc != rawsize)
ereport(ERROR,
(errcode(ERRCODE_DATA_CORRUPTED),
errmsg_internal("compressed zstd data is corrupt or truncated")));
SET_VARSIZE(result, rawsize + VARHDRSZ);
return result;
#endif /* USE_ZSTD */
}
/*
* Decompress part of a varlena that was compressed using ZSTD.
*
* At least initially we don't try to be clever with streaming slice
* decompression here; instead we just decompress the full datum and
* let higher layers perform the slicing. Callers should prefer the
* regular zstd_decompress_datum() when they know they need the whole
* value anyway.
*/
struct varlena *
zstd_decompress_datum_slice(const struct varlena *value, int32 slicelength)
{
/* For now, just fall back to full decompression. */
(void) slicelength;
return zstd_decompress_datum(value);
}
/*
* CompressionNameToMethod - Get compression method from compression name
*
@ -293,6 +471,13 @@ CompressionNameToMethod(const char *compression)
#endif
return TOAST_LZ4_COMPRESSION;
}
else if (strcmp(compression, "zstd") == 0)
{
#ifndef USE_ZSTD
NO_COMPRESSION_SUPPORT("zstd");
#endif
return TOAST_ZSTD_COMPRESSION;
}
return InvalidCompressionMethod;
}
@ -309,6 +494,8 @@ GetCompressionMethodName(char method)
return "pglz";
case TOAST_LZ4_COMPRESSION:
return "lz4";
case TOAST_ZSTD_COMPRESSION:
return "zstd";
default:
elog(ERROR, "invalid compression method %c", method);
return NULL; /* keep compiler quiet */

@ -18,6 +18,7 @@
#include "access/heapam.h"
#include "access/heaptoast.h"
#include "access/table.h"
#include "access/toast_compression.h"
#include "access/toast_internals.h"
#include "access/xact.h"
#include "catalog/catalog.h"
@ -71,6 +72,22 @@ toast_compress_datum(Datum value, char cmethod)
tmp = lz4_compress_datum((const struct varlena *) DatumGetPointer(value));
cmid = TOAST_LZ4_COMPRESSION_ID;
break;
case TOAST_ZSTD_COMPRESSION:
/*
* Zstd requires the extended TOAST header format. If the GUC
* use_extended_toast_header is off, fall back to pglz.
*/
if (!use_extended_toast_header)
{
tmp = pglz_compress_datum((const struct varlena *) DatumGetPointer(value));
cmid = TOAST_PGLZ_COMPRESSION_ID;
}
else
{
tmp = zstd_compress_datum((const struct varlena *) DatumGetPointer(value));
cmid = TOAST_EXTENDED_COMPRESSION_ID;
}
break;
default:
elog(ERROR, "invalid compression method %c", cmethod);
}
@ -125,12 +142,15 @@ toast_save_datum(Relation rel, Datum value,
CommandId mycid = GetCurrentCommandId(true);
struct varlena *result;
struct varatt_external toast_pointer;
struct varatt_external_extended toast_pointer_ext;
int32 chunk_seq = 0;
char *data_p;
int32 data_todo;
Pointer dval = DatumGetPointer(value);
int num_indexes;
int validIndex;
bool use_extended = false;
uint8 ext_method = 0;
Assert(!VARATT_IS_EXTERNAL(dval));
@ -167,14 +187,60 @@ toast_save_datum(Relation rel, Datum value,
}
else if (VARATT_IS_COMPRESSED(dval))
{
ToastCompressionId cmid;
data_p = VARDATA(dval);
data_todo = VARSIZE(dval) - VARHDRSZ;
/* rawsize in a compressed datum is just the size of the payload */
toast_pointer.va_rawsize = VARDATA_COMPRESSED_GET_EXTSIZE(dval) + VARHDRSZ;
/* Get compression method from compressed datum */
cmid = VARDATA_COMPRESSED_GET_COMPRESS_METHOD(dval);
/*
* Decide whether to use the extended 20-byte TOAST pointer format.
*
* Extended compression methods (zstd) always require the extended
* format. Legacy methods (pglz, lz4) can use either format - they
* use the extended format when use_extended_toast_header is enabled,
* otherwise they use the legacy 16-byte format for backward
* compatibility.
*/
if (cmid == TOAST_EXTENDED_COMPRESSION_ID)
{
use_extended = true;
ext_method = TOAST_ZSTD_EXT_METHOD;
}
else if (use_extended_toast_header)
{
/*
* When use_extended_toast_header is enabled, store pglz/lz4 using
* the new 20-byte extended format. This is a stepping stone toward
* eventually deprecating the legacy 16-byte format.
*/
use_extended = true;
switch (cmid)
{
case TOAST_PGLZ_COMPRESSION_ID:
ext_method = TOAST_PGLZ_EXT_METHOD;
break;
case TOAST_LZ4_COMPRESSION_ID:
ext_method = TOAST_LZ4_EXT_METHOD;
break;
default:
/* Should not happen, but fall back to legacy format */
use_extended = false;
break;
}
}
/* set external size and compression method */
VARATT_EXTERNAL_SET_SIZE_AND_COMPRESS_METHOD(toast_pointer, data_todo,
VARDATA_COMPRESSED_GET_COMPRESS_METHOD(dval));
if (use_extended)
VARATT_EXTERNAL_SET_SIZE_AND_COMPRESS_METHOD(toast_pointer, data_todo,
VARATT_EXTERNAL_EXTENDED_CMID);
else
VARATT_EXTERNAL_SET_SIZE_AND_COMPRESS_METHOD(toast_pointer, data_todo, cmid);
/* Assert that the numbers look like it's compressed */
Assert(VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer));
}
@ -184,6 +250,12 @@ toast_save_datum(Relation rel, Datum value,
data_todo = VARSIZE(dval) - VARHDRSZ;
toast_pointer.va_rawsize = VARSIZE(dval);
toast_pointer.va_extinfo = data_todo;
/*
* Note: We don't use extended format for uncompressed data, even when
* use_extended_toast_header is enabled. Extended format is only for
* compressed data, where we need to store the compression method.
*/
}
/*
@ -225,15 +297,36 @@ toast_save_datum(Relation rel, Datum value,
toast_pointer.va_valueid = InvalidOid;
if (oldexternal != NULL)
{
struct varatt_external old_toast_pointer;
Oid old_toastrelid;
Oid old_valueid;
Assert(VARATT_IS_EXTERNAL_ONDISK(oldexternal));
/* Must copy to access aligned fields */
VARATT_EXTERNAL_GET_POINTER(old_toast_pointer, oldexternal);
if (old_toast_pointer.va_toastrelid == rel->rd_toastoid)
/*
* Extract toastrelid and valueid from the old pointer.
* Handle both legacy 16-byte and extended 20-byte formats.
*/
if (VARTAG_EXTERNAL(oldexternal) == VARTAG_ONDISK_EXTENDED)
{
struct varatt_external_extended old_toast_pointer_ext;
VARATT_EXTERNAL_GET_POINTER_EXTENDED(old_toast_pointer_ext, oldexternal);
old_toastrelid = old_toast_pointer_ext.va_toastrelid;
old_valueid = old_toast_pointer_ext.va_valueid;
}
else
{
struct varatt_external old_toast_pointer;
VARATT_EXTERNAL_GET_POINTER(old_toast_pointer, oldexternal);
old_toastrelid = old_toast_pointer.va_toastrelid;
old_valueid = old_toast_pointer.va_valueid;
}
if (old_toastrelid == rel->rd_toastoid)
{
/* This value came from the old toast table; reuse its OID */
toast_pointer.va_valueid = old_toast_pointer.va_valueid;
toast_pointer.va_valueid = old_valueid;
/*
* There is a corner case here: the table rewrite might have
@ -357,11 +450,37 @@ toast_save_datum(Relation rel, Datum value,
table_close(toastrel, NoLock);
/*
* Create the TOAST pointer value that we'll return
* Create the TOAST pointer value that we'll return. Use the extended
* format (20-byte payload) for extended compression methods like zstd.
*/
result = (struct varlena *) palloc(TOAST_POINTER_SIZE);
SET_VARTAG_EXTERNAL(result, VARTAG_ONDISK);
memcpy(VARDATA_EXTERNAL(result), &toast_pointer, sizeof(toast_pointer));
if (use_extended)
{
/*
* Build extended TOAST pointer. Copy the common fields from
* toast_pointer, then set the extended-format-specific fields.
*/
toast_pointer_ext.va_rawsize = toast_pointer.va_rawsize;
toast_pointer_ext.va_extinfo = toast_pointer.va_extinfo;
toast_pointer_ext.va_valueid = toast_pointer.va_valueid;
toast_pointer_ext.va_toastrelid = toast_pointer.va_toastrelid;
/* Set extended format fields */
toast_pointer_ext.va_flags = TOAST_EXT_FLAG_COMPRESSION;
toast_pointer_ext.va_data[0] = ext_method;
toast_pointer_ext.va_data[1] = 0;
toast_pointer_ext.va_data[2] = 0;
result = (struct varlena *) palloc(TOAST_POINTER_SIZE_EXTENDED);
SET_VARTAG_EXTERNAL(result, VARTAG_ONDISK_EXTENDED);
memcpy(VARDATA_EXTERNAL(result), &toast_pointer_ext, sizeof(toast_pointer_ext));
}
else
{
/* Standard 16-byte TOAST pointer */
result = (struct varlena *) palloc(TOAST_POINTER_SIZE);
SET_VARTAG_EXTERNAL(result, VARTAG_ONDISK);
memcpy(VARDATA_EXTERNAL(result), &toast_pointer, sizeof(toast_pointer));
}
return PointerGetDatum(result);
}
@ -377,6 +496,7 @@ toast_delete_datum(Relation rel, Datum value, bool is_speculative)
{
struct varlena *attr = (struct varlena *) DatumGetPointer(value);
struct varatt_external toast_pointer;
struct varatt_external_extended toast_pointer_ext;
Relation toastrel;
Relation *toastidxs;
ScanKeyData toastkey;
@ -384,17 +504,36 @@ toast_delete_datum(Relation rel, Datum value, bool is_speculative)
HeapTuple toasttup;
int num_indexes;
int validIndex;
Oid toastrelid;
Oid valueid;
bool is_extended;
if (!VARATT_IS_EXTERNAL_ONDISK(attr))
return;
/* Must copy to access aligned fields */
VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
/*
* Must copy to access aligned fields. Handle both legacy (16-byte) and
* extended (20-byte) on-disk TOAST pointers based on the tag.
*/
is_extended = (VARTAG_EXTERNAL(attr) == VARTAG_ONDISK_EXTENDED);
if (!is_extended)
{
VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
toastrelid = toast_pointer.va_toastrelid;
valueid = toast_pointer.va_valueid;
}
else
{
VARATT_EXTERNAL_GET_POINTER_EXTENDED(toast_pointer_ext, attr);
toastrelid = toast_pointer_ext.va_toastrelid;
valueid = toast_pointer_ext.va_valueid;
}
/*
* Open the toast relation and its indexes
*/
toastrel = table_open(toast_pointer.va_toastrelid, RowExclusiveLock);
toastrel = table_open(toastrelid, RowExclusiveLock);
/* Fetch valid relation used for process */
validIndex = toast_open_indexes(toastrel,
@ -408,7 +547,7 @@ toast_delete_datum(Relation rel, Datum value, bool is_speculative)
ScanKeyInit(&toastkey,
(AttrNumber) 1,
BTEqualStrategyNumber, F_OIDEQ,
ObjectIdGetDatum(toast_pointer.va_valueid));
ObjectIdGetDatum(valueid));
/*
* Find all the chunks. (We don't actually care whether we see them in

@ -5137,11 +5137,17 @@ ReorderBufferToastReplace(ReorderBuffer *rb, ReorderBufferTXN *txn,
/* va_rawsize is the size of the original datum -- including header */
struct varatt_external toast_pointer;
struct varatt_external_extended toast_pointer_ext;
struct varatt_indirect redirect_pointer;
struct varlena *new_datum = NULL;
struct varlena *reconstructed;
dlist_iter it;
Size data_done = 0;
bool is_extended;
Oid valueid;
int32 rawsize;
int32 extsize;
bool is_compressed;
if (attr->attisdropped)
continue;
@ -5161,14 +5167,36 @@ ReorderBufferToastReplace(ReorderBuffer *rb, ReorderBufferTXN *txn,
if (!VARATT_IS_EXTERNAL(varlena))
continue;
VARATT_EXTERNAL_GET_POINTER(toast_pointer, varlena);
/*
* Handle both legacy 16-byte and extended 20-byte on-disk TOAST
* pointers based on the tag.
*/
is_extended = VARATT_IS_EXTERNAL_ONDISK(varlena) &&
(VARTAG_EXTERNAL(varlena) == VARTAG_ONDISK_EXTENDED);
if (is_extended)
{
VARATT_EXTERNAL_GET_POINTER_EXTENDED(toast_pointer_ext, varlena);
valueid = toast_pointer_ext.va_valueid;
rawsize = toast_pointer_ext.va_rawsize;
extsize = VARATT_EXTERNAL_GET_EXTSIZE_EXTENDED(toast_pointer_ext);
is_compressed = VARATT_EXTERNAL_IS_COMPRESSED_EXTENDED(toast_pointer_ext);
}
else
{
VARATT_EXTERNAL_GET_POINTER(toast_pointer, varlena);
valueid = toast_pointer.va_valueid;
rawsize = toast_pointer.va_rawsize;
extsize = VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer);
is_compressed = VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer);
}
/*
* Check whether the toast tuple changed, replace if so.
*/
ent = (ReorderBufferToastEnt *)
hash_search(txn->toast_hash,
&toast_pointer.va_valueid,
&valueid,
HASH_FIND,
NULL);
if (ent == NULL)
@ -5179,7 +5207,7 @@ ReorderBufferToastReplace(ReorderBuffer *rb, ReorderBufferTXN *txn,
free[natt] = true;
reconstructed = palloc0(toast_pointer.va_rawsize);
reconstructed = palloc0(rawsize);
ent->reconstructed = reconstructed;
@ -5204,10 +5232,10 @@ ReorderBufferToastReplace(ReorderBuffer *rb, ReorderBufferTXN *txn,
VARSIZE(chunk) - VARHDRSZ);
data_done += VARSIZE(chunk) - VARHDRSZ;
}
Assert(data_done == VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer));
Assert(data_done == extsize);
/* make sure its marked as compressed or not */
if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
if (is_compressed)
SET_VARSIZE_COMPRESSED(reconstructed, data_done + VARHDRSZ);
else
SET_VARSIZE(reconstructed, data_done + VARHDRSZ);

@ -4206,6 +4206,10 @@ pg_column_compression(PG_FUNCTION_ARGS)
case TOAST_LZ4_COMPRESSION_ID:
result = "lz4";
break;
case TOAST_EXTENDED_COMPRESSION_ID:
/* Extended format currently only supports zstd */
result = "zstd";
break;
default:
elog(ERROR, "invalid compression method id %d", cmid);
}

@ -739,6 +739,12 @@
options => 'default_toast_compression_options',
},
{ name => 'use_extended_toast_header', type => 'bool', context => 'PGC_USERSET', group => 'CLIENT_CONN_STATEMENT',
short_desc => 'Use 20-byte extended TOAST header format (required for zstd).',
variable => 'use_extended_toast_header',
boot_val => 'true',
},
{ name => 'default_transaction_deferrable', type => 'bool', context => 'PGC_USERSET', group => 'CLIENT_CONN_STATEMENT',
short_desc => 'Sets the default deferrable status of new transactions.',
variable => 'DefaultXactDeferrable',

@ -460,6 +460,9 @@ static const struct config_enum_entry default_toast_compression_options[] = {
{"pglz", TOAST_PGLZ_COMPRESSION, false},
#ifdef USE_LZ4
{"lz4", TOAST_LZ4_COMPRESSION, false},
#endif
#ifdef USE_ZSTD
{"zstd", TOAST_ZSTD_COMPRESSION, false},
#endif
{NULL, 0, false}
};

@ -14,25 +14,58 @@
/*
* Macro to fetch the possibly-unaligned contents of an EXTERNAL datum
* into a local "struct varatt_external" toast pointer. This should be
* just a memcpy, but some versions of gcc seem to produce broken code
* that assumes the datum contents are aligned. Introducing an explicit
* intermediate "varattrib_1b_e *" variable seems to fix it.
* into a local "struct varatt_external" toast pointer.
*
* This currently supports only the legacy on-disk TOAST pointer format,
* which has VARTAG_ONDISK and a payload size of sizeof(varatt_external).
* Extended on-disk pointers (VARTAG_ONDISK_EXTENDED) must be accessed via
* VARATT_EXTERNAL_GET_POINTER_EXTENDED().
*
* This should be just a memcpy, but some versions of gcc seem to produce
* broken code that assumes the datum contents are aligned. Introducing
* an explicit intermediate "varattrib_1b_e *" variable seems to fix it.
*/
#define VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr) \
do { \
varattrib_1b_e *attre = (varattrib_1b_e *) (attr); \
Assert(VARATT_IS_EXTERNAL(attre)); \
Assert(VARTAG_EXTERNAL(attre) == VARTAG_ONDISK); \
Assert(VARSIZE_EXTERNAL(attre) == sizeof(toast_pointer) + VARHDRSZ_EXTERNAL); \
memcpy(&(toast_pointer), VARDATA_EXTERNAL(attre), sizeof(toast_pointer)); \
} while (0)
/*
* Variant of VARATT_EXTERNAL_GET_POINTER for the extended on-disk TOAST
* pointer format. Callers should only use this when they have already
* established that the tag is VARTAG_ONDISK_EXTENDED.
*/
#define VARATT_EXTERNAL_GET_POINTER_EXTENDED(toast_pointer_ext, attr) \
do { \
varattrib_1b_e *attre = (varattrib_1b_e *) (attr); \
Assert(VARATT_IS_EXTERNAL(attre)); \
Assert(VARTAG_EXTERNAL(attre) == VARTAG_ONDISK_EXTENDED); \
Assert(VARSIZE_EXTERNAL(attre) == sizeof(toast_pointer_ext) + VARHDRSZ_EXTERNAL); \
memcpy(&(toast_pointer_ext), VARDATA_EXTERNAL(attre), sizeof(toast_pointer_ext)); \
} while (0)
/* Size of an EXTERNAL datum that contains a standard TOAST pointer */
#define TOAST_POINTER_SIZE (VARHDRSZ_EXTERNAL + sizeof(varatt_external))
/* Size of an EXTERNAL datum that contains an indirection pointer */
#define INDIRECT_POINTER_SIZE (VARHDRSZ_EXTERNAL + sizeof(varatt_indirect))
/* Size of an EXTERNAL datum that contains an extended TOAST pointer */
#define TOAST_POINTER_SIZE_EXTENDED (VARHDRSZ_EXTERNAL + sizeof(varatt_external_extended))
/* Validation helpers for TOAST pointer sizes */
#define TOAST_POINTER_SIZE_IS_VALID(size) \
((size) == TOAST_POINTER_SIZE || \
(size) == TOAST_POINTER_SIZE_EXTENDED || \
(size) == INDIRECT_POINTER_SIZE)
#define TOAST_POINTER_IS_EXTENDED_SIZE(size) \
((size) == TOAST_POINTER_SIZE_EXTENDED)
/* ----------
* detoast_external_attr() -
*

@ -13,14 +13,21 @@
#ifndef TOAST_COMPRESSION_H
#define TOAST_COMPRESSION_H
#include "varatt.h"
/*
* GUC support.
*
* default_toast_compression is an integer for purposes of the GUC machinery,
* but the value is one of the char values defined below, as they appear in
* pg_attribute.attcompression, e.g. TOAST_PGLZ_COMPRESSION.
*
* use_extended_toast_header controls whether to use the 20-byte extended
* TOAST pointer format (required for zstd) instead of the legacy 16-byte
* format. When false, zstd compression falls back to pglz.
*/
extern PGDLLIMPORT int default_toast_compression;
extern PGDLLIMPORT bool use_extended_toast_header;
/*
* Built-in compression method ID. The toast compression header will store
@ -39,6 +46,7 @@ typedef enum ToastCompressionId
TOAST_PGLZ_COMPRESSION_ID = 0,
TOAST_LZ4_COMPRESSION_ID = 1,
TOAST_INVALID_COMPRESSION_ID = 2,
TOAST_EXTENDED_COMPRESSION_ID = 3, /* extended format for future methods */
} ToastCompressionId;
/*
@ -48,6 +56,7 @@ typedef enum ToastCompressionId
*/
#define TOAST_PGLZ_COMPRESSION 'p'
#define TOAST_LZ4_COMPRESSION 'l'
#define TOAST_ZSTD_COMPRESSION 'z'
#define InvalidCompressionMethod '\0'
#define CompressionMethodIsValid(cm) ((cm) != InvalidCompressionMethod)
@ -65,9 +74,47 @@ extern struct varlena *lz4_decompress_datum(const struct varlena *value);
extern struct varlena *lz4_decompress_datum_slice(const struct varlena *value,
int32 slicelength);
/* zstd compression/decompression routines (extended methods) */
extern struct varlena *zstd_compress_datum(const struct varlena *value);
extern struct varlena *zstd_decompress_datum(const struct varlena *value);
extern struct varlena *zstd_decompress_datum_slice(const struct varlena *value,
int32 slicelength);
/* other stuff */
extern ToastCompressionId toast_get_compression_id(struct varlena *attr);
extern char CompressionNameToMethod(const char *compression);
extern const char *GetCompressionMethodName(char method);
/*
* TOAST_EXTENDED_COMPRESSION_ID (value 3) in va_extinfo bits 30-31
* signals that the data uses an extended compression method. For inline
* compressed data, this currently means zstd. For external TOAST pointers,
* the extended format (varatt_external_extended) stores the actual method
* in va_data[0].
*
* Note: TOAST_EXTENDED_COMPRESSION_ID is defined in the ToastCompressionId
* enum above, matching VARATT_EXTERNAL_EXTENDED_CMID from varatt.h.
*/
/*
* Feature flags for extended TOAST pointers (varatt_external_extended).
* Bits 2-7 are reserved for future use.
*/
#define TOAST_EXT_FLAG_COMPRESSION 0x01 /* va_data[0] = method ID */
#define TOAST_EXT_FLAG_CHECKSUM 0x02 /* va_data[1-2] = checksum */
/*
* Extended compression method IDs for use with extended TOAST format.
* Stored in va_data[0] when TOAST_EXT_FLAG_COMPRESSION is set.
*/
#define TOAST_PGLZ_EXT_METHOD 0
#define TOAST_LZ4_EXT_METHOD 1
#define TOAST_ZSTD_EXT_METHOD 2
#define TOAST_UNCOMPRESSED_EXT_METHOD 3
/* Validation macros for extended format */
#define ExtendedCompressionMethodIsValid(method) ((method) <= 255)
#define ExtendedFlagsAreValid(flags) \
(((flags) & ~(TOAST_EXT_FLAG_COMPRESSION | TOAST_EXT_FLAG_CHECKSUM)) == 0)
#endif /* TOAST_COMPRESSION_H */

@ -36,11 +36,13 @@ typedef struct toast_compress_header
#define TOAST_COMPRESS_METHOD(ptr) \
(((toast_compress_header *) (ptr))->tcinfo >> VARLENA_EXTSIZE_BITS)
/* Phase 0: Updated to accept TOAST_EXTENDED_COMPRESSION_ID (unused in practice) */
#define TOAST_COMPRESS_SET_SIZE_AND_COMPRESS_METHOD(ptr, len, cm_method) \
do { \
Assert((len) > 0 && (len) <= VARLENA_EXTSIZE_MASK); \
Assert((cm_method) == TOAST_PGLZ_COMPRESSION_ID || \
(cm_method) == TOAST_LZ4_COMPRESSION_ID); \
(cm_method) == TOAST_LZ4_COMPRESSION_ID || \
(cm_method) == TOAST_EXTENDED_COMPRESSION_ID); \
((toast_compress_header *) (ptr))->tcinfo = \
(len) | ((uint32) (cm_method) << VARLENA_EXTSIZE_BITS); \
} while (0)

@ -45,6 +45,13 @@ typedef struct varatt_external
#define VARLENA_EXTSIZE_BITS 30
#define VARLENA_EXTSIZE_MASK ((1U << VARLENA_EXTSIZE_BITS) - 1)
/*
* Compression method ID stored in the 2 high-order bits of va_extinfo.
* Value 3 indicates an extended TOAST pointer format (varatt_external_extended).
* This constant is also defined in toast_compression.h for use by TOAST code.
*/
#define VARATT_EXTERNAL_EXTENDED_CMID 3
/*
* struct varatt_indirect is a "TOAST pointer" representing an out-of-line
* Datum that's stored in memory, not in an external toast relation.
@ -76,6 +83,28 @@ typedef struct varatt_expanded
ExpandedObjectHeader *eohptr;
} varatt_expanded;
/*
* Extended TOAST pointer, extending varatt_external from 16 to 20 bytes.
*
* Identified by compression method ID 3 in va_extinfo bits 30-31. The
* va_flags field indicates which optional features are enabled; va_data[3]
* contains feature-specific data.
*
* Like varatt_external, stored unaligned and requires memcpy for access.
*
* This struct must be defined before VARTAG_SIZE() which uses sizeof().
*/
typedef struct varatt_external_extended
{
int32 va_rawsize; /* Original data size (includes header) */
uint32 va_extinfo; /* External saved size (30 bits) + extended
* indicator (2 bits, value = 3) */
uint8 va_flags; /* Feature flags indicating enabled extensions */
uint8 va_data[3]; /* Extension data - interpretation depends on flags */
Oid va_valueid; /* Unique ID of value within TOAST table */
Oid va_toastrelid; /* RelID of TOAST table containing it */
} varatt_external_extended;
/*
* Type tag for the various sorts of "TOAST pointer" datums. The peculiar
* value for VARTAG_ONDISK comes from a requirement for on-disk compatibility
@ -86,7 +115,17 @@ typedef enum vartag_external
VARTAG_INDIRECT = 1,
VARTAG_EXPANDED_RO = 2,
VARTAG_EXPANDED_RW = 3,
VARTAG_ONDISK = 18
VARTAG_ONDISK = 18,
/*
* VARTAG_ONDISK_EXTENDED is used for the extended TOAST pointer format,
* which increases the on-disk payload from 16 to 20 bytes. The first
* 8 bytes (va_rawsize, va_extinfo) are layout-compatible with
* struct varatt_external so that existing code inspecting those fields
* continues to work. Older PostgreSQL versions do not know about this
* tag and therefore must not be used to read clusters that contain it.
*/
VARTAG_ONDISK_EXTENDED = 19
} vartag_external;
/* Is a TOAST pointer either type of expanded-object pointer? */
@ -97,7 +136,14 @@ VARTAG_IS_EXPANDED(vartag_external tag)
return ((tag & ~1) == VARTAG_EXPANDED_RO);
}
/* Size of the data part of a "TOAST pointer" datum */
/*
* Size of the data part of a "TOAST pointer" datum.
*
* For on-disk TOAST pointers we now support two payload sizes:
* the original 16-byte format (VARTAG_ONDISK) described by struct
* varatt_external, and a 20-byte extended format
* (VARTAG_ONDISK_EXTENDED) described by struct varatt_external_extended.
*/
static inline Size
VARTAG_SIZE(vartag_external tag)
{
@ -107,6 +153,8 @@ VARTAG_SIZE(vartag_external tag)
return sizeof(varatt_expanded);
else if (tag == VARTAG_ONDISK)
return sizeof(varatt_external);
else if (tag == VARTAG_ONDISK_EXTENDED)
return sizeof(varatt_external_extended);
else
{
Assert(false);
@ -360,7 +408,13 @@ VARATT_IS_EXTERNAL(const void *PTR)
static inline bool
VARATT_IS_EXTERNAL_ONDISK(const void *PTR)
{
return VARATT_IS_EXTERNAL(PTR) && VARTAG_EXTERNAL(PTR) == VARTAG_ONDISK;
vartag_external tag;
if (!VARATT_IS_EXTERNAL(PTR))
return false;
tag = VARTAG_EXTERNAL(PTR);
return tag == VARTAG_ONDISK || tag == VARTAG_ONDISK_EXTENDED;
}
/* Is varlena datum an indirect pointer? */
@ -516,11 +570,11 @@ VARATT_EXTERNAL_GET_COMPRESS_METHOD(struct varatt_external toast_pointer)
}
/* Set size and compress method of an externally-stored varlena datum */
/* This has to remain a macro; beware multiple evaluations! */
#define VARATT_EXTERNAL_SET_SIZE_AND_COMPRESS_METHOD(toast_pointer, len, cm) \
do { \
Assert((cm) == TOAST_PGLZ_COMPRESSION_ID || \
(cm) == TOAST_LZ4_COMPRESSION_ID); \
(cm) == TOAST_LZ4_COMPRESSION_ID || \
(cm) == VARATT_EXTERNAL_EXTENDED_CMID); \
((toast_pointer).va_extinfo = \
(len) | ((uint32) (cm) << VARLENA_EXTSIZE_BITS)); \
} while (0)
@ -539,4 +593,92 @@ VARATT_EXTERNAL_IS_COMPRESSED(struct varatt_external toast_pointer)
(Size) (toast_pointer.va_rawsize - VARHDRSZ);
}
/* Macros for extended TOAST pointers (varatt_external_extended) */
/*
* Check if a TOAST pointer uses the extended on-disk format.
*
* Callers must have already verified VARATT_IS_EXTERNAL_ONDISK() before
* calling this; here we look only at the compression-method bits embedded
* in va_extinfo.
*/
static inline bool
VARATT_EXTERNAL_IS_EXTENDED(struct varatt_external toast_pointer)
{
return VARATT_EXTERNAL_GET_COMPRESS_METHOD(toast_pointer) ==
VARATT_EXTERNAL_EXTENDED_CMID;
}
/* Get feature flags from extended pointer */
static inline uint8
VARATT_EXTERNAL_GET_FLAGS(struct varatt_external_extended toast_pointer_ext)
{
return toast_pointer_ext.va_flags;
}
/* Set feature flags in extended pointer */
#define VARATT_EXTERNAL_SET_FLAGS(toast_pointer_ext, flags) \
do { \
(toast_pointer_ext).va_flags = (flags); \
} while (0)
/* Test if a specific flag is set */
#define VARATT_EXTERNAL_HAS_FLAG(toast_pointer_ext, flag) \
(((toast_pointer_ext).va_flags & (flag)) != 0)
/* Get pointer to extension data array */
#define VARATT_EXTERNAL_GET_EXT_DATA(toast_pointer_ext) \
((toast_pointer_ext).va_data)
/* Get extended compression method (when TOAST_EXT_FLAG_COMPRESSION is set) */
static inline uint8
VARATT_EXTERNAL_GET_EXT_COMPRESSION_METHOD(struct varatt_external_extended toast_pointer_ext)
{
return toast_pointer_ext.va_data[0];
}
/* Set extended compression method */
#define VARATT_EXTERNAL_SET_EXT_COMPRESSION_METHOD(toast_pointer_ext, method) \
do { \
(toast_pointer_ext).va_data[0] = (method); \
} while (0)
/* Get extsize and compress method from extended pointer (same as standard) */
static inline Size
VARATT_EXTERNAL_GET_EXTSIZE_EXTENDED(struct varatt_external_extended toast_pointer_ext)
{
return toast_pointer_ext.va_extinfo & VARLENA_EXTSIZE_MASK;
}
static inline uint32
VARATT_EXTERNAL_GET_COMPRESS_METHOD_EXTENDED(struct varatt_external_extended toast_pointer_ext)
{
return toast_pointer_ext.va_extinfo >> VARLENA_EXTSIZE_BITS;
}
/* Set size and extended indicator in va_extinfo */
#define VARATT_EXTERNAL_SET_SIZE_AND_EXT_FLAGS(toast_pointer_ext, len, flags) \
do { \
Assert((len) > 0 && (len) <= VARLENA_EXTSIZE_MASK); \
(toast_pointer_ext).va_extinfo = \
(len) | ((uint32) VARATT_EXTERNAL_EXTENDED_CMID << VARLENA_EXTSIZE_BITS); \
(toast_pointer_ext).va_flags = (flags); \
memset((toast_pointer_ext).va_data, 0, 3); \
} while (0)
/* Convenience macro for setting extended pointer with compression method */
#define VARATT_EXTERNAL_SET_SIZE_AND_EXT_COMPRESSION(toast_pointer_ext, len, method) \
do { \
VARATT_EXTERNAL_SET_SIZE_AND_EXT_FLAGS(toast_pointer_ext, len, TOAST_EXT_FLAG_COMPRESSION); \
VARATT_EXTERNAL_SET_EXT_COMPRESSION_METHOD(toast_pointer_ext, method); \
} while (0)
/* Test if extended pointer is compressed (same logic as standard) */
static inline bool
VARATT_EXTERNAL_IS_COMPRESSED_EXTENDED(struct varatt_external_extended toast_pointer_ext)
{
return VARATT_EXTERNAL_GET_EXTSIZE_EXTENDED(toast_pointer_ext) <
(Size) (toast_pointer_ext.va_rawsize - VARHDRSZ);
}
#endif

@ -0,0 +1,20 @@
# src/test/modules/test_toast_ext/Makefile
MODULE_big = test_toast_ext
OBJS = test_toast_ext.o
EXTENSION = test_toast_ext
DATA = test_toast_ext--1.0.sql
REGRESS = test_toast_ext
ifdef USE_PGXS
PG_CONFIG = pg_config
PGXS := $(shell $(PG_CONFIG) --pgxs)
include $(PGXS)
else
subdir = src/test/modules/test_toast_ext
top_builddir = ../../../..
include $(top_builddir)/src/Makefile.global
include $(top_srcdir)/contrib/contrib-global.mk
endif

@ -0,0 +1,40 @@
-- Test extended TOAST header structures (Phase 0)
CREATE EXTENSION test_toast_ext;
-- Test 1: Structure sizes
SELECT test_toast_structure_sizes();
test_toast_structure_sizes
---------------------------------------------------------------
PASS: varatt_external is 16 bytes +
PASS: varatt_external_extended is 20 bytes +
PASS: TOAST_POINTER_SIZE is 18 bytes +
PASS: TOAST_POINTER_SIZE_EXTENDED is 22 bytes +
PASS: All field offsets correct (no padding) +
+
Result: ALL TESTS PASSED +
(1 row)
-- Test 2: Flag validation
SELECT test_toast_flag_validation();
test_toast_flag_validation
----------------------------------------------------------
PASS: Valid flags (0x00-0x03) accepted +
PASS: Invalid flags (0x04+) rejected +
PASS: Compression methods 0-255 valid +
PASS: Compression method IDs correct +
+
Result: ALL TESTS PASSED +
(1 row)
-- Test 3: Compression ID constants
SELECT test_toast_compression_ids();
test_toast_compression_ids
---------------------------------------------------------
PASS: Standard compression IDs correct (0,1,2,3) +
PASS: PGLZ/LZ4 IDs consistent between formats +
+
Result: ALL TESTS PASSED +
(1 row)

@ -0,0 +1,11 @@
-- Test extended TOAST header structures (Phase 0)
CREATE EXTENSION test_toast_ext;
-- Test 1: Structure sizes
SELECT test_toast_structure_sizes();
-- Test 2: Flag validation
SELECT test_toast_flag_validation();
-- Test 3: Compression ID constants
SELECT test_toast_compression_ids();

@ -0,0 +1,19 @@
/* src/test/modules/test_toast_ext/test_toast_ext--1.0.sql */
-- complain if script is sourced in psql, rather than via CREATE EXTENSION
\echo Use "CREATE EXTENSION test_toast_ext" to load this file. \quit
CREATE FUNCTION test_toast_structure_sizes()
RETURNS text
AS 'MODULE_PATHNAME'
LANGUAGE C STRICT;
CREATE FUNCTION test_toast_flag_validation()
RETURNS text
AS 'MODULE_PATHNAME'
LANGUAGE C STRICT;
CREATE FUNCTION test_toast_compression_ids()
RETURNS text
AS 'MODULE_PATHNAME'
LANGUAGE C STRICT;

@ -0,0 +1,200 @@
/*-------------------------------------------------------------------------
*
* test_toast_ext.c
* Test module for extended TOAST header structures (Phase 0)
*
* Copyright (c) 2025, PostgreSQL Global Development Group
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "fmgr.h"
#include "access/detoast.h"
#include "access/toast_compression.h"
#include "utils/builtins.h"
#include "varatt.h"
PG_MODULE_MAGIC;
/*
* Test structure sizes for extended TOAST pointers
*/
PG_FUNCTION_INFO_V1(test_toast_structure_sizes);
Datum
test_toast_structure_sizes(PG_FUNCTION_ARGS)
{
StringInfoData buf;
bool all_passed = true;
initStringInfo(&buf);
/* Test standard structure size */
if (sizeof(varatt_external) != 16)
{
appendStringInfo(&buf, "FAIL: varatt_external is %zu bytes, expected 16\n",
sizeof(varatt_external));
all_passed = false;
}
else
appendStringInfo(&buf, "PASS: varatt_external is 16 bytes\n");
/* Test extended structure size */
if (sizeof(varatt_external_extended) != 20)
{
appendStringInfo(&buf, "FAIL: varatt_external_extended is %zu bytes, expected 20\n",
sizeof(varatt_external_extended));
all_passed = false;
}
else
appendStringInfo(&buf, "PASS: varatt_external_extended is 20 bytes\n");
/* Test TOAST pointer sizes */
if (TOAST_POINTER_SIZE != 18)
{
appendStringInfo(&buf, "FAIL: TOAST_POINTER_SIZE is %zu, expected 18\n",
(Size) TOAST_POINTER_SIZE);
all_passed = false;
}
else
appendStringInfo(&buf, "PASS: TOAST_POINTER_SIZE is 18 bytes\n");
if (TOAST_POINTER_SIZE_EXTENDED != 22)
{
appendStringInfo(&buf, "FAIL: TOAST_POINTER_SIZE_EXTENDED is %zu, expected 22\n",
(Size) TOAST_POINTER_SIZE_EXTENDED);
all_passed = false;
}
else
appendStringInfo(&buf, "PASS: TOAST_POINTER_SIZE_EXTENDED is 22 bytes\n");
/* Test field offsets */
if (offsetof(varatt_external_extended, va_rawsize) != 0)
appendStringInfo(&buf, "FAIL: va_rawsize offset\n"), all_passed = false;
if (offsetof(varatt_external_extended, va_extinfo) != 4)
appendStringInfo(&buf, "FAIL: va_extinfo offset\n"), all_passed = false;
if (offsetof(varatt_external_extended, va_flags) != 8)
appendStringInfo(&buf, "FAIL: va_flags offset\n"), all_passed = false;
if (offsetof(varatt_external_extended, va_data) != 9)
appendStringInfo(&buf, "FAIL: va_data offset\n"), all_passed = false;
if (offsetof(varatt_external_extended, va_valueid) != 12)
appendStringInfo(&buf, "FAIL: va_valueid offset\n"), all_passed = false;
if (offsetof(varatt_external_extended, va_toastrelid) != 16)
appendStringInfo(&buf, "FAIL: va_toastrelid offset\n"), all_passed = false;
else
appendStringInfo(&buf, "PASS: All field offsets correct (no padding)\n");
if (all_passed)
appendStringInfo(&buf, "\nResult: ALL TESTS PASSED\n");
else
appendStringInfo(&buf, "\nResult: SOME TESTS FAILED\n");
PG_RETURN_TEXT_P(cstring_to_text(buf.data));
}
/*
* Test flag validation macros
*/
PG_FUNCTION_INFO_V1(test_toast_flag_validation);
Datum
test_toast_flag_validation(PG_FUNCTION_ARGS)
{
StringInfoData buf;
bool all_passed = true;
initStringInfo(&buf);
/* Test valid flags */
if (!ExtendedFlagsAreValid(0x00))
appendStringInfo(&buf, "FAIL: flags 0x00 should be valid\n"), all_passed = false;
if (!ExtendedFlagsAreValid(0x01))
appendStringInfo(&buf, "FAIL: flags 0x01 should be valid\n"), all_passed = false;
if (!ExtendedFlagsAreValid(0x02))
appendStringInfo(&buf, "FAIL: flags 0x02 should be valid\n"), all_passed = false;
if (!ExtendedFlagsAreValid(0x03))
appendStringInfo(&buf, "FAIL: flags 0x03 should be valid\n"), all_passed = false;
else
appendStringInfo(&buf, "PASS: Valid flags (0x00-0x03) accepted\n");
/* Test invalid flags */
if (ExtendedFlagsAreValid(0x04))
appendStringInfo(&buf, "FAIL: flags 0x04 should be invalid\n"), all_passed = false;
if (ExtendedFlagsAreValid(0x08))
appendStringInfo(&buf, "FAIL: flags 0x08 should be invalid\n"), all_passed = false;
if (ExtendedFlagsAreValid(0xFF))
appendStringInfo(&buf, "FAIL: flags 0xFF should be invalid\n"), all_passed = false;
else
appendStringInfo(&buf, "PASS: Invalid flags (0x04+) rejected\n");
/* Test compression method validation */
if (!ExtendedCompressionMethodIsValid(0))
appendStringInfo(&buf, "FAIL: method 0 should be valid\n"), all_passed = false;
if (!ExtendedCompressionMethodIsValid(255))
appendStringInfo(&buf, "FAIL: method 255 should be valid\n"), all_passed = false;
else
appendStringInfo(&buf, "PASS: Compression methods 0-255 valid\n");
/* Test compression method IDs */
if (TOAST_PGLZ_EXT_METHOD != 0)
appendStringInfo(&buf, "FAIL: TOAST_PGLZ_EXT_METHOD should be 0\n"), all_passed = false;
if (TOAST_LZ4_EXT_METHOD != 1)
appendStringInfo(&buf, "FAIL: TOAST_LZ4_EXT_METHOD should be 1\n"), all_passed = false;
if (TOAST_ZSTD_EXT_METHOD != 2)
appendStringInfo(&buf, "FAIL: TOAST_ZSTD_EXT_METHOD should be 2\n"), all_passed = false;
if (TOAST_UNCOMPRESSED_EXT_METHOD != 3)
appendStringInfo(&buf, "FAIL: TOAST_UNCOMPRESSED_EXT_METHOD should be 3\n"), all_passed = false;
else
appendStringInfo(&buf, "PASS: Compression method IDs correct\n");
if (all_passed)
appendStringInfo(&buf, "\nResult: ALL TESTS PASSED\n");
else
appendStringInfo(&buf, "\nResult: SOME TESTS FAILED\n");
PG_RETURN_TEXT_P(cstring_to_text(buf.data));
}
/*
* Test compression ID constants
*/
PG_FUNCTION_INFO_V1(test_toast_compression_ids);
Datum
test_toast_compression_ids(PG_FUNCTION_ARGS)
{
StringInfoData buf;
bool all_passed = true;
initStringInfo(&buf);
/* Standard compression IDs */
if (TOAST_PGLZ_COMPRESSION_ID != 0)
appendStringInfo(&buf, "FAIL: TOAST_PGLZ_COMPRESSION_ID != 0\n"), all_passed = false;
if (TOAST_LZ4_COMPRESSION_ID != 1)
appendStringInfo(&buf, "FAIL: TOAST_LZ4_COMPRESSION_ID != 1\n"), all_passed = false;
if (TOAST_INVALID_COMPRESSION_ID != 2)
appendStringInfo(&buf, "FAIL: TOAST_INVALID_COMPRESSION_ID != 2\n"), all_passed = false;
if (TOAST_EXTENDED_COMPRESSION_ID != 3)
appendStringInfo(&buf, "FAIL: TOAST_EXTENDED_COMPRESSION_ID != 3\n"), all_passed = false;
else
appendStringInfo(&buf, "PASS: Standard compression IDs correct (0,1,2,3)\n");
/* Extended compression IDs match standard where applicable */
if (TOAST_PGLZ_EXT_METHOD != TOAST_PGLZ_COMPRESSION_ID)
appendStringInfo(&buf, "FAIL: PGLZ IDs don't match (standard=%d, extended=%d)\n",
TOAST_PGLZ_COMPRESSION_ID, TOAST_PGLZ_EXT_METHOD), all_passed = false;
if (TOAST_LZ4_EXT_METHOD != TOAST_LZ4_COMPRESSION_ID)
appendStringInfo(&buf, "FAIL: LZ4 IDs don't match (standard=%d, extended=%d)\n",
TOAST_LZ4_COMPRESSION_ID, TOAST_LZ4_EXT_METHOD), all_passed = false;
else
appendStringInfo(&buf, "PASS: PGLZ/LZ4 IDs consistent between formats\n");
if (all_passed)
appendStringInfo(&buf, "\nResult: ALL TESTS PASSED\n");
else
appendStringInfo(&buf, "\nResult: SOME TESTS FAILED\n");
PG_RETURN_TEXT_P(cstring_to_text(buf.data));
}

@ -0,0 +1,5 @@
# test_toast_ext extension
comment = 'Test module for extended TOAST header structures'
default_version = '1.0'
module_pathname = '$libdir/test_toast_ext'
relocatable = true
Loading…
Cancel
Save