Grab the low-hanging fruit from forcing sizeof(Datum) to 8.

Remove conditionally-compiled code for smaller Datum widths, and simplify comments that describe cases no longer of interest. I also fixed up a few more places that were not using DatumGetIntXX where they should, and made some cosmetic adjustments such as using sizeof(int64) not sizeof(Datum) in places where that fit better with the surrounding code. One thing I remembered while preparing this part is that SP-GiST stores pass-by-value prefix keys as Datums, so that the on-disk representation depends on sizeof(Datum). That's even more unfortunate than the existing commentary makes it out to be, because now there is a hazard that the change of sizeof(Datum) will break SP-GiST indexes on 32-bit machines. It appears that there are no existing SP-GiST opclasses that are actually affected; and if there are some that I didn't find, the number of installations that are using them on 32-bit machines is doubtless tiny. So I'm proceeding on the assumption that we can get away with this, but it's something to worry about. (gininsert.c looks like it has a similar problem, but it's okay because the "tuples" it's constructing are just transient data within the tuplesort step. That's pretty poorly documented though, so I added some comments.) Author: Tom Lane <tgl@sss.pgh.pa.us> Reviewed-by: Peter Eisentraut <peter@eisentraut.org> Discussion: https://postgr.es/m/1749799.1752797397@sss.pgh.pa.us
1 month ago · 6aebedc384
parent 2a600a93c7
commit 6aebedc384
17 changed files with 72 additions and 284 deletions
--- a/doc/src/sgml/xfunc.sgml
+++ b/doc/src/sgml/xfunc.sgml
@ -2051,8 +2051,7 @@ PG_MODULE_MAGIC_EXT(
    </para>
    <para>
-     By-value  types  can  only be 1, 2, or 4 bytes in length
+     By-value types can only be 1, 2, 4, or 8 bytes in length.
     (also 8 bytes, if <literal>sizeof(Datum)</literal> is 8 on your machine).
     You should be careful to define your types such that they will be the
     same size (in bytes) on all architectures.  For example, the
     <literal>long</literal> type is dangerous because it is 4 bytes on some
--- a/src/backend/access/gin/gininsert.c
+++ b/src/backend/access/gin/gininsert.c
@ -2189,7 +2189,10 @@ typedef struct
 * we simply copy the whole Datum, so that we don't have to care about stuff
 * like endianess etc. We could make it a little bit smaller, but it's not
 * worth it - it's a tiny fraction of the data, and we need to MAXALIGN the
- * start of the TID list anyway. So we wouldn't save anything.
+ * start of the TID list anyway. So we wouldn't save anything. (This would
 * not be a good idea for the permanent in-index data, since we'd prefer
 * that that not depend on sizeof(Datum). But this is just a transient
 * representation to use while sorting the data.)
 *
 * The TID list is serialized as compressed - it's highly compressible, and
 * we already have ginCompressPostingList for this purpose. The list may be
--- a/src/backend/access/gist/gistproc.c
+++ b/src/backend/access/gist/gistproc.c
@ -1707,8 +1707,8 @@ gist_bbox_zorder_cmp(Datum a, Datum b, SortSupport ssup)
 * Abbreviated version of Z-order comparison
 *
 * The abbreviated format is a Z-order value computed from the two 32-bit
- * floats. If SIZEOF_DATUM == 8, the 64-bit Z-order value fits fully in the
+ * floats.  Now that sizeof(Datum) is always 8, the 64-bit Z-order value
- * abbreviated Datum, otherwise use its most significant bits.
+ * always fits fully in the abbreviated Datum.
 */
 static Datum
 gist_bbox_zorder_abbrev_convert(Datum original, SortSupport ssup)
@ -1718,11 +1718,7 @@ gist_bbox_zorder_abbrev_convert(Datum original, SortSupport ssup)
 	z = point_zorder_internal(p->x, p->y);
-#if SIZEOF_DATUM == 8
+	return UInt64GetDatum(z);
 	return (Datum) z;
 #else
 	return (Datum) (z >> 32);
 #endif
 }
 /*
--- a/src/backend/access/nbtree/nbtcompare.c
+++ b/src/backend/access/nbtree/nbtcompare.c
@ -278,32 +278,12 @@ btint8cmp(PG_FUNCTION_ARGS)
 		PG_RETURN_INT32(A_LESS_THAN_B);
 }
 #if SIZEOF_DATUM < 8
 static int
 btint8fastcmp(Datum x, Datum y, SortSupport ssup)
 {
 	int64		a = DatumGetInt64(x);
 	int64		b = DatumGetInt64(y);
 	if (a > b)
 		return A_GREATER_THAN_B;
 	else if (a == b)
 		return 0;
 	else
 		return A_LESS_THAN_B;
 }
 #endif
 Datum
 btint8sortsupport(PG_FUNCTION_ARGS)
 {
 	SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
 #if SIZEOF_DATUM >= 8
 	ssup->comparator = ssup_datum_signed_cmp;
 #else
 	ssup->comparator = btint8fastcmp;
 #endif
 	PG_RETURN_VOID();
 }
--- a/src/backend/catalog/pg_type.c
+++ b/src/backend/catalog/pg_type.c
@ -285,8 +285,7 @@ TypeCreate(Oid newTypeOid,
 						 errmsg("alignment \"%c\" is invalid for passed-by-value type of size %d",
 								alignment, internalSize)));
 		}
-#if SIZEOF_DATUM == 8
+		else if (internalSize == (int16) sizeof(int64))
 		else if (internalSize == (int16) sizeof(Datum))
 		{
 			if (alignment != TYPALIGN_DOUBLE)
 				ereport(ERROR,
@ -294,7 +293,6 @@ TypeCreate(Oid newTypeOid,
 						 errmsg("alignment \"%c\" is invalid for passed-by-value type of size %d",
 								alignment, internalSize)));
 		}
 #endif
 		else
 			ereport(ERROR,
 					(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
--- a/src/backend/utils/adt/mac.c
+++ b/src/backend/utils/adt/mac.c
@ -481,33 +481,26 @@ macaddr_abbrev_convert(Datum original, SortSupport ssup)
 	Datum		res;
 	/*
-	 * On a 64-bit machine, zero out the 8-byte datum and copy the 6 bytes of
+	 * Zero out the 8-byte Datum and copy in the 6 bytes of the MAC address.
-	 * the MAC address in. There will be two bytes of zero padding on the end
+	 * There will be two bytes of zero padding on the end of the least
-	 * of the least significant bits.
+	 * significant bits.
 	 */
-#if SIZEOF_DATUM == 8
+	StaticAssertStmt(sizeof(res) >= sizeof(macaddr),
-	memset(&res, 0, SIZEOF_DATUM);
+					 "Datum is too small for macaddr");
 	memset(&res, 0, sizeof(res));
 	memcpy(&res, authoritative, sizeof(macaddr));
 #else							/* SIZEOF_DATUM != 8 */
 	memcpy(&res, authoritative, SIZEOF_DATUM);
 #endif
 	uss->input_count += 1;
 	/*
-	 * Cardinality estimation. The estimate uses uint32, so on a 64-bit
+	 * Cardinality estimation. The estimate uses uint32, so XOR the two 32-bit
-	 * architecture, XOR the two 32-bit halves together to produce slightly
+	 * halves together to produce slightly more entropy. The two zeroed bytes
-	 * more entropy. The two zeroed bytes won't have any practical impact on
+	 * won't have any practical impact on this operation.
 	 * this operation.
 	 */
 	if (uss->estimating)
 	{
 		uint32		tmp;
-#if SIZEOF_DATUM == 8
+		tmp = DatumGetUInt32(res) ^ (uint32) (DatumGetUInt64(res) >> 32);
 		tmp = (uint32) res ^ (uint32) ((uint64) res >> 32);
 #else							/* SIZEOF_DATUM != 8 */
 		tmp = (uint32) res;
 #endif
 		addHyperLogLog(&uss->abbr_card, DatumGetUInt32(hash_uint32(tmp)));
 	}
--- a/src/backend/utils/adt/network.c
+++ b/src/backend/utils/adt/network.c
@ -567,24 +567,11 @@ network_abbrev_abort(int memtupcount, SortSupport ssup)
 *
 * When generating abbreviated keys for SortSupport, we pack as much as we can
 * into a datum while ensuring that when comparing those keys as integers,
- * these rules will be respected. Exact contents depend on IP family and datum
+ * these rules will be respected. Exact contents depend on IP family:
 * size.
 *
 * IPv4
 * ----
 *
 * 4 byte datums:
 *
 * Start with 1 bit for the IP family (IPv4 or IPv6; this bit is present in
 * every case below) followed by all but 1 of the netmasked bits.
 *
 * +----------+---------------------+
 * | 1 bit IP |   31 bits network   |     (1 bit network
 * |  family  |     (truncated)     |      omitted)
 * +----------+---------------------+
 *
 * 8 byte datums:
 *
 * We have space to store all netmasked bits, followed by the netmask size,
 * followed by 25 bits of the subnet (25 bits is usually more than enough in
 * practice). cidr datums always have all-zero subnet bits.
@ -597,15 +584,6 @@ network_abbrev_abort(int memtupcount, SortSupport ssup)
 * IPv6
 * ----
 *
 * 4 byte datums:
 *
 * +----------+---------------------+
 * | 1 bit IP |   31 bits network   |    (up to 97 bits
 * |  family  |     (truncated)     |   network omitted)
 * +----------+---------------------+
 *
 * 8 byte datums:
 *
 * +----------+---------------------------------+
 * | 1 bit IP |         63 bits network         |    (up to 65 bits
 * |  family  |           (truncated)           |   network omitted)
@ -628,8 +606,7 @@ network_abbrev_convert(Datum original, SortSupport ssup)
 	/*
 	 * Get an unsigned integer representation of the IP address by taking its
 	 * first 4 or 8 bytes. Always take all 4 bytes of an IPv4 address. Take
-	 * the first 8 bytes of an IPv6 address with an 8 byte datum and 4 bytes
+	 * the first 8 bytes of an IPv6 address.
 	 * otherwise.
 	 *
 	 * We're consuming an array of unsigned char, so byteswap on little endian
 	 * systems (an inet's ipaddr field stores the most significant byte
@ -659,7 +636,7 @@ network_abbrev_convert(Datum original, SortSupport ssup)
 		ipaddr_datum = DatumBigEndianToNative(ipaddr_datum);
 		/* Initialize result with ipfamily (most significant) bit set */
-		res = ((Datum) 1) << (SIZEOF_DATUM * BITS_PER_BYTE - 1);
+		res = ((Datum) 1) << (sizeof(Datum) * BITS_PER_BYTE - 1);
 	}
 	/*
@ -668,8 +645,7 @@ network_abbrev_convert(Datum original, SortSupport ssup)
 	 * while low order bits go in "subnet" component when there is space for
 	 * one. This is often accomplished by generating a temp datum subnet
 	 * bitmask, which we may reuse later when generating the subnet bits
-	 * themselves.  (Note that subnet bits are only used with IPv4 datums on
+	 * themselves.
 	 * platforms where datum is 8 bytes.)
 	 *
 	 * The number of bits in subnet is used to generate a datum subnet
 	 * bitmask. For example, with a /24 IPv4 datum there are 8 subnet bits
@ -681,14 +657,14 @@ network_abbrev_convert(Datum original, SortSupport ssup)
 	subnet_size = ip_maxbits(authoritative) - ip_bits(authoritative);
 	Assert(subnet_size >= 0);
 	/* subnet size must work with prefix ipaddr cases */
-	subnet_size %= SIZEOF_DATUM * BITS_PER_BYTE;
+	subnet_size %= sizeof(Datum) * BITS_PER_BYTE;
 	if (ip_bits(authoritative) == 0)
 	{
 		/* Fit as many ipaddr bits as possible into subnet */
 		subnet_bitmask = ((Datum) 0) - 1;
 		network = 0;
 	}
-	else if (ip_bits(authoritative) < SIZEOF_DATUM * BITS_PER_BYTE)
+	else if (ip_bits(authoritative) < sizeof(Datum) * BITS_PER_BYTE)
 	{
 		/* Split ipaddr bits between network and subnet */
 		subnet_bitmask = (((Datum) 1) << subnet_size) - 1;
@ -701,12 +677,11 @@ network_abbrev_convert(Datum original, SortSupport ssup)
 		network = ipaddr_datum;
 	}
 #if SIZEOF_DATUM == 8
 	if (ip_family(authoritative) == PGSQL_AF_INET)
 	{
 		/*
-		 * IPv4 with 8 byte datums: keep all 32 netmasked bits, netmask size,
+		 * IPv4: keep all 32 netmasked bits, netmask size, and most
-		 * and most significant 25 subnet bits
+		 * significant 25 subnet bits
 		 */
 		Datum		netmask_size = (Datum) ip_bits(authoritative);
 		Datum		subnet;
@ -750,12 +725,11 @@ network_abbrev_convert(Datum original, SortSupport ssup)
 		res |= network | netmask_size | subnet;
 	}
 	else
 #endif
 	{
 		/*
-		 * 4 byte datums, or IPv6 with 8 byte datums: Use as many of the
+		 * IPv6: Use as many of the netmasked bits as will fit in final
-		 * netmasked bits as will fit in final abbreviated key. Avoid
+		 * abbreviated key. Avoid clobbering the ipfamily bit that was set
-		 * clobbering the ipfamily bit that was set earlier.
+		 * earlier.
 		 */
 		res |= network >> 1;
 	}
@ -767,11 +741,7 @@ network_abbrev_convert(Datum original, SortSupport ssup)
 	{
 		uint32		tmp;
-#if SIZEOF_DATUM == 8
+		tmp = DatumGetUInt32(res) ^ (uint32) (DatumGetUInt64(res) >> 32);
 		tmp = (uint32) res ^ (uint32) ((uint64) res >> 32);
 #else							/* SIZEOF_DATUM != 8 */
 		tmp = (uint32) res;
 #endif
 		addHyperLogLog(&uss->abbr_card, DatumGetUInt32(hash_uint32(tmp)));
 	}
--- a/src/backend/utils/adt/numeric.c
+++ b/src/backend/utils/adt/numeric.c
@ -392,30 +392,21 @@ typedef struct NumericSumAccum
 /*
 * We define our own macros for packing and unpacking abbreviated-key
- * representations for numeric values in order to avoid depending on
+ * representations, just to have a notational indication that that's
- * USE_FLOAT8_BYVAL.  The type of abbreviation we use is based only on
+ * what we're doing.  Now that sizeof(Datum) is always 8, we can rely
- * the size of a datum, not the argument-passing convention for float8.
+ * on fitting an int64 into Datum.
 *
- * The range of abbreviations for finite values is from +PG_INT64/32_MAX
+ * The range of abbreviations for finite values is from +PG_INT64_MAX
- * to -PG_INT64/32_MAX.  NaN has the abbreviation PG_INT64/32_MIN, and we
+ * to -PG_INT64_MAX.  NaN has the abbreviation PG_INT64_MIN, and we
 * define the sort ordering to make that work out properly (see further
 * comments below).  PINF and NINF share the abbreviations of the largest
 * and smallest finite abbreviation classes.
 */
-#define NUMERIC_ABBREV_BITS (SIZEOF_DATUM * BITS_PER_BYTE)
+#define NumericAbbrevGetDatum(X) Int64GetDatum(X)
-#if SIZEOF_DATUM == 8
+#define DatumGetNumericAbbrev(X) DatumGetInt64(X)
 #define NumericAbbrevGetDatum(X) ((Datum) (X))
 #define DatumGetNumericAbbrev(X) ((int64) (X))
 #define NUMERIC_ABBREV_NAN		 NumericAbbrevGetDatum(PG_INT64_MIN)
 #define NUMERIC_ABBREV_PINF		 NumericAbbrevGetDatum(-PG_INT64_MAX)
 #define NUMERIC_ABBREV_NINF		 NumericAbbrevGetDatum(PG_INT64_MAX)
 #else
 #define NumericAbbrevGetDatum(X) ((Datum) (X))
 #define DatumGetNumericAbbrev(X) ((int32) (X))
 #define NUMERIC_ABBREV_NAN		 NumericAbbrevGetDatum(PG_INT32_MIN)
 #define NUMERIC_ABBREV_PINF		 NumericAbbrevGetDatum(-PG_INT32_MAX)
 #define NUMERIC_ABBREV_NINF		 NumericAbbrevGetDatum(PG_INT32_MAX)
 #endif
 /* ----------
@ -2096,12 +2087,11 @@ compute_bucket(Numeric operand, Numeric bound1, Numeric bound2,
 * while this could be worked on itself, the abbreviation strategy gives more
 * speedup in many common cases.
 *
- * Two different representations are used for the abbreviated form, one in
+ * The abbreviated format is an int64. The representation is negated relative
- * int32 and one in int64, whichever fits into a by-value Datum.  In both cases
+ * to the original value, because we use the largest negative value for NaN,
- * the representation is negated relative to the original value, because we use
+ * which sorts higher than other values. We convert the absolute value of the
- * the largest negative value for NaN, which sorts higher than other values. We
+ * numeric to a 63-bit positive value, and then negate it if the original
- * convert the absolute value of the numeric to a 31-bit or 63-bit positive
+ * number was positive.
 * value, and then negate it if the original number was positive.
 *
 * We abort the abbreviation process if the abbreviation cardinality is below
 * 0.01% of the row count (1 per 10k non-null rows).  The actual break-even
@ -2328,7 +2318,7 @@ numeric_cmp_abbrev(Datum x, Datum y, SortSupport ssup)
 }
 /*
- * Abbreviate a NumericVar according to the available bit size.
+ * Abbreviate a NumericVar into the 64-bit sortsupport size.
 *
 * The 31-bit value is constructed as:
 *
@ -2372,9 +2362,6 @@ numeric_cmp_abbrev(Datum x, Datum y, SortSupport ssup)
 * with all bits zero. This allows simple comparisons to work on the composite
 * value.
 */
 #if NUMERIC_ABBREV_BITS == 64
 static Datum
 numeric_abbrev_convert_var(const NumericVar *var, NumericSortSupport *nss)
 {
@ -2426,84 +2413,6 @@ numeric_abbrev_convert_var(const NumericVar *var, NumericSortSupport *nss)
 	return NumericAbbrevGetDatum(result);
 }
 #endif							/* NUMERIC_ABBREV_BITS == 64 */
 #if NUMERIC_ABBREV_BITS == 32
 static Datum
 numeric_abbrev_convert_var(const NumericVar *var, NumericSortSupport *nss)
 {
 	int			ndigits = var->ndigits;
 	int			weight = var->weight;
 	int32		result;
 	if (ndigits == 0 || weight < -11)
 	{
 		result = 0;
 	}
 	else if (weight > 20)
 	{
 		result = PG_INT32_MAX;
 	}
 	else
 	{
 		NumericDigit nxt1 = (ndigits > 1) ? var->digits[1] : 0;
 		weight = (weight + 11) * 4;
 		result = var->digits[0];
 		/*
 		 * "result" now has 1 to 4 nonzero decimal digits. We pack in more
 		 * digits to make 7 in total (largest we can fit in 24 bits)
 		 */
 		if (result > 999)
 		{
 			/* already have 4 digits, add 3 more */
 			result = (result * 1000) + (nxt1 / 10);
 			weight += 3;
 		}
 		else if (result > 99)
 		{
 			/* already have 3 digits, add 4 more */
 			result = (result * 10000) + nxt1;
 			weight += 2;
 		}
 		else if (result > 9)
 		{
 			NumericDigit nxt2 = (ndigits > 2) ? var->digits[2] : 0;
 			/* already have 2 digits, add 5 more */
 			result = (result * 100000) + (nxt1 * 10) + (nxt2 / 1000);
 			weight += 1;
 		}
 		else
 		{
 			NumericDigit nxt2 = (ndigits > 2) ? var->digits[2] : 0;
 			/* already have 1 digit, add 6 more */
 			result = (result * 1000000) + (nxt1 * 100) + (nxt2 / 100);
 		}
 		result = result | (weight << 24);
 	}
 	/* the abbrev is negated relative to the original */
 	if (var->sign == NUMERIC_POS)
 		result = -result;
 	if (nss->estimating)
 	{
 		uint32		tmp = (uint32) result;
 		addHyperLogLog(&nss->abbr_card, DatumGetUInt32(hash_uint32(tmp)));
 	}
 	return NumericAbbrevGetDatum(result);
 }
 #endif							/* NUMERIC_ABBREV_BITS == 32 */
 /*
 * Ordinary (non-sortsupport) comparisons follow.
--- a/src/backend/utils/adt/timestamp.c
+++ b/src/backend/utils/adt/timestamp.c
@ -2275,33 +2275,12 @@ timestamp_cmp(PG_FUNCTION_ARGS)
 	PG_RETURN_INT32(timestamp_cmp_internal(dt1, dt2));
 }
 #if SIZEOF_DATUM < 8
 /* note: this is used for timestamptz also */
 static int
 timestamp_fastcmp(Datum x, Datum y, SortSupport ssup)
 {
 	Timestamp	a = DatumGetTimestamp(x);
 	Timestamp	b = DatumGetTimestamp(y);
 	return timestamp_cmp_internal(a, b);
 }
 #endif
 Datum
 timestamp_sortsupport(PG_FUNCTION_ARGS)
 {
 	SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
 #if SIZEOF_DATUM >= 8
 	/*
 	 * If this build has pass-by-value timestamps, then we can use a standard
 	 * comparator function.
 	 */
 	ssup->comparator = ssup_datum_signed_cmp;
 #else
 	ssup->comparator = timestamp_fastcmp;
 #endif
 	PG_RETURN_VOID();
 }
--- a/src/backend/utils/adt/uuid.c
+++ b/src/backend/utils/adt/uuid.c
@ -398,11 +398,7 @@ uuid_abbrev_convert(Datum original, SortSupport ssup)
 	{
 		uint32		tmp;
-#if SIZEOF_DATUM == 8
+		tmp = DatumGetUInt32(res) ^ (uint32) (DatumGetUInt64(res) >> 32);
 		tmp = (uint32) res ^ (uint32) ((uint64) res >> 32);
 #else							/* SIZEOF_DATUM != 8 */
 		tmp = (uint32) res;
 #endif
 		addHyperLogLog(&uss->abbr_card, DatumGetUInt32(hash_uint32(tmp)));
 	}
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@ -1671,14 +1671,13 @@ varstr_sortsupport(SortSupport ssup, Oid typid, Oid collid)
 		 *
 		 * Even apart from the risk of broken locales, it's possible that
 		 * there are platforms where the use of abbreviated keys should be
-		 * disabled at compile time.  Having only 4 byte datums could make
+		 * disabled at compile time.  For example, macOS's strxfrm()
-		 * worst-case performance drastically more likely, for example.
+		 * implementation is known to not effectively concentrate a
-		 * Moreover, macOS's strxfrm() implementation is known to not
+		 * significant amount of entropy from the original string in earlier
-		 * effectively concentrate a significant amount of entropy from the
+		 * transformed blobs.  It's possible that other supported platforms
-		 * original string in earlier transformed blobs.  It's possible that
+		 * are similarly encumbered.  So, if we ever get past disabling this
-		 * other supported platforms are similarly encumbered.  So, if we ever
+		 * categorically, we may still want or need to disable it for
-		 * get past disabling this categorically, we may still want or need to
+		 * particular platforms.
 		 * disable it for particular platforms.
 		 */
 		if (!pg_strxfrm_enabled(locale))
 			abbreviate = false;
@ -2132,18 +2131,12 @@ varstr_abbrev_convert(Datum original, SortSupport ssup)
 	addHyperLogLog(&sss->full_card, hash);
 	/* Hash abbreviated key */
 #if SIZEOF_DATUM == 8
 	{
-		uint32		lohalf,
+		uint32		tmp;
 					hihalf;
-		lohalf = (uint32) res;
+		tmp = DatumGetUInt32(res) ^ (uint32) (DatumGetUInt64(res) >> 32);
-		hihalf = (uint32) (res >> 32);
+		hash = DatumGetUInt32(hash_uint32(tmp));
 		hash = DatumGetUInt32(hash_uint32(lohalf ^ hihalf));
 	}
 #else							/* SIZEOF_DATUM != 8 */
 	hash = DatumGetUInt32(hash_uint32((uint32) res));
 #endif
 	addHyperLogLog(&sss->abbr_card, hash);
--- a/src/backend/utils/sort/tuplesort.c
+++ b/src/backend/utils/sort/tuplesort.c
@ -512,7 +512,6 @@ qsort_tuple_unsigned_compare(SortTuple *a, SortTuple *b, Tuplesortstate *state)
 	return state->base.comparetup_tiebreak(a, b, state);
 }
 #if SIZEOF_DATUM >= 8
 /* Used if first key's comparator is ssup_datum_signed_cmp */
 static pg_attribute_always_inline int
 qsort_tuple_signed_compare(SortTuple *a, SortTuple *b, Tuplesortstate *state)
@ -535,7 +534,6 @@ qsort_tuple_signed_compare(SortTuple *a, SortTuple *b, Tuplesortstate *state)
 	return state->base.comparetup_tiebreak(a, b, state);
 }
 #endif
 /* Used if first key's comparator is ssup_datum_int32_cmp */
 static pg_attribute_always_inline int
@ -578,7 +576,6 @@ qsort_tuple_int32_compare(SortTuple *a, SortTuple *b, Tuplesortstate *state)
 #define ST_DEFINE
 #include "lib/sort_template.h"
 #if SIZEOF_DATUM >= 8
 #define ST_SORT qsort_tuple_signed
 #define ST_ELEMENT_TYPE SortTuple
 #define ST_COMPARE(a, b, state) qsort_tuple_signed_compare(a, b, state)
@ -587,7 +584,6 @@ qsort_tuple_int32_compare(SortTuple *a, SortTuple *b, Tuplesortstate *state)
 #define ST_SCOPE static
 #define ST_DEFINE
 #include "lib/sort_template.h"
 #endif
 #define ST_SORT qsort_tuple_int32
 #define ST_ELEMENT_TYPE SortTuple
@ -2692,7 +2688,6 @@ tuplesort_sort_memtuples(Tuplesortstate *state)
 									 state);
 				return;
 			}
 #if SIZEOF_DATUM >= 8
 			else if (state->base.sortKeys[0].comparator == ssup_datum_signed_cmp)
 			{
 				qsort_tuple_signed(state->memtuples,
@ -2700,7 +2695,6 @@ tuplesort_sort_memtuples(Tuplesortstate *state)
 								   state);
 				return;
 			}
 #endif
 			else if (state->base.sortKeys[0].comparator == ssup_datum_int32_cmp)
 			{
 				qsort_tuple_int32(state->memtuples,
@ -3146,7 +3140,6 @@ ssup_datum_unsigned_cmp(Datum x, Datum y, SortSupport ssup)
 		return 0;
 }
 #if SIZEOF_DATUM >= 8
 int
 ssup_datum_signed_cmp(Datum x, Datum y, SortSupport ssup)
 {
@ -3160,7 +3153,6 @@ ssup_datum_signed_cmp(Datum x, Datum y, SortSupport ssup)
 	else
 		return 0;
 }
 #endif
 int
 ssup_datum_int32_cmp(Datum x, Datum y, SortSupport ssup)
--- a/src/include/access/gin_tuple.h
+++ b/src/include/access/gin_tuple.h
@ -15,7 +15,9 @@
 #include "utils/sortsupport.h"
 /*
- * Data for one key in a GIN index.
+ * Data for one key in a GIN index.  (This is not the permanent in-index
 * representation, but just a convenient format to use during the tuplesort
 * stage of building a new GIN index.)
 */
 typedef struct GinTuple
 {
--- a/src/include/access/spgist_private.h
+++ b/src/include/access/spgist_private.h
@ -285,10 +285,12 @@ typedef struct SpGistCache
 * If the prefix datum is of a pass-by-value type, it is stored in its
 * Datum representation, that is its on-disk representation is of length
 * sizeof(Datum).  This is a fairly unfortunate choice, because in no other
- * place does Postgres use Datum as an on-disk representation; it creates
+ * place does Postgres use Datum as an on-disk representation.  Formerly it
- * an unnecessary incompatibility between 32-bit and 64-bit builds.  But the
+ * meant an unnecessary incompatibility between 32-bit and 64-bit builds, and
- * compatibility loss is mostly theoretical since MAXIMUM_ALIGNOF typically
+ * as of v19 it instead creates a hazard for binary upgrades on 32-bit builds.
- * differs between such builds, too.  Anyway we're stuck with it now.
+ * Fortunately, that hazard seems mostly theoretical for lack of affected
 * opclasses.  Going forward, we will be using a fixed size of Datum so that
 * there's no longer any pressing reason to change this.
 */
 typedef struct SpGistInnerTupleData
 {
@ -377,8 +379,8 @@ typedef SpGistNodeTupleData *SpGistNodeTuple;
 *
 * size must be a multiple of MAXALIGN; also, it must be at least SGDTSIZE
 * so that the tuple can be converted to REDIRECT status later.  (This
- * restriction only adds bytes for a NULL leaf datum stored on a 32-bit
+ * restriction only adds bytes for a NULL leaf datum; otherwise alignment
- * machine; otherwise alignment restrictions force it anyway.)
+ * restrictions force it anyway.)
 */
 typedef struct SpGistLeafTupleData
 {
--- a/src/include/access/tupmacs.h
+++ b/src/include/access/tupmacs.h
@ -39,9 +39,6 @@ att_isnull(int ATT, const bits8 *BITS)
 * return the correct number of bytes fetched from the data area and extended
 * to Datum form.
 *
 * On machines where Datum is 8 bytes, we support fetching 8-byte byval
 * attributes; otherwise, only 1, 2, and 4-byte values are supported.
 *
 * Note that T must already be properly aligned for this to work correctly.
 */
 #define fetchatt(A,T) fetch_att(T, (A)->attbyval, (A)->attlen)
@ -62,10 +59,8 @@ fetch_att(const void *T, bool attbyval, int attlen)
 				return Int16GetDatum(*((const int16 *) T));
 			case sizeof(int32):
 				return Int32GetDatum(*((const int32 *) T));
-#if SIZEOF_DATUM == 8
+			case sizeof(int64):
-			case sizeof(Datum):
+				return Int64GetDatum(*((const int64 *) T));
 				return *((const Datum *) T);
 #endif
 			default:
 				elog(ERROR, "unsupported byval length: %d", attlen);
 				return 0;
@ -221,11 +216,9 @@ store_att_byval(void *T, Datum newdatum, int attlen)
 		case sizeof(int32):
 			*(int32 *) T = DatumGetInt32(newdatum);
 			break;
-#if SIZEOF_DATUM == 8
+		case sizeof(int64):
-		case sizeof(Datum):
+			*(int64 *) T = DatumGetInt64(newdatum);
 			*(Datum *) T = newdatum;
 			break;
 #endif
 		default:
 			elog(ERROR, "unsupported byval length: %d", attlen);
 	}
--- a/src/include/port/pg_bswap.h
+++ b/src/include/port/pg_bswap.h
@ -130,8 +130,7 @@ pg_bswap64(uint64 x)
 /*
 * Rearrange the bytes of a Datum from big-endian order into the native byte
- * order.  On big-endian machines, this does nothing at all.  Note that the C
+ * order.  On big-endian machines, this does nothing at all.
 * type Datum is an unsigned integer type on all platforms.
 *
 * One possible application of the DatumBigEndianToNative() macro is to make
 * bitwise comparisons cheaper.  A simple 3-way comparison of Datums
@ -139,23 +138,11 @@ pg_bswap64(uint64 x)
 * the same result as a memcmp() of the corresponding original Datums, but can
 * be much cheaper.  It's generally safe to do this on big-endian systems
 * without any special transformation occurring first.
 *
 * If SIZEOF_DATUM is not defined, then postgres.h wasn't included and these
 * macros probably shouldn't be used, so we define nothing.  Note that
 * SIZEOF_DATUM == 8 would evaluate as 0 == 8 in that case, potentially
 * leading to the wrong implementation being selected and confusing errors, so
 * defining nothing is safest.
 */
 #ifdef SIZEOF_DATUM
 #ifdef WORDS_BIGENDIAN
 #define		DatumBigEndianToNative(x)	(x)
 #else							/* !WORDS_BIGENDIAN */
-#if SIZEOF_DATUM == 8
+#define		DatumBigEndianToNative(x)	UInt64GetDatum(pg_bswap64(DatumGetUInt64(x)))
 #define		DatumBigEndianToNative(x)	pg_bswap64(x)
 #else							/* SIZEOF_DATUM != 8 */
 #define		DatumBigEndianToNative(x)	pg_bswap32(x)
 #endif							/* SIZEOF_DATUM == 8 */
 #endif							/* WORDS_BIGENDIAN */
 #endif							/* SIZEOF_DATUM */
 #endif							/* PG_BSWAP_H */
--- a/src/include/utils/sortsupport.h
+++ b/src/include/utils/sortsupport.h
@ -262,7 +262,6 @@ ApplyUnsignedSortComparator(Datum datum1, bool isNull1,
 	return compare;
 }
 #if SIZEOF_DATUM >= 8
 static inline int
 ApplySignedSortComparator(Datum datum1, bool isNull1,
 						  Datum datum2, bool isNull2,
@ -296,7 +295,6 @@ ApplySignedSortComparator(Datum datum1, bool isNull1,
 	return compare;
 }
 #endif
 static inline int
 ApplyInt32SortComparator(Datum datum1, bool isNull1,
@ -376,9 +374,7 @@ ApplySortAbbrevFullComparator(Datum datum1, bool isNull1,
 * are eligible for faster sorting.
 */
 extern int	ssup_datum_unsigned_cmp(Datum x, Datum y, SortSupport ssup);
 #if SIZEOF_DATUM >= 8
 extern int	ssup_datum_signed_cmp(Datum x, Datum y, SortSupport ssup);
 #endif
 extern int	ssup_datum_int32_cmp(Datum x, Datum y, SortSupport ssup);
 /* Other functions in utils/sort/sortsupport.c */