From cb7b2e5e8efb3e5fb08052425cd00f067a56f877 Mon Sep 17 00:00:00 2001 From: Nathan Bossart Date: Thu, 12 Feb 2026 11:32:49 -0600 Subject: [PATCH] Remove some unnecessary optimizations in popcount code. Over the past few releases, we've added a huge amount of complexity to our popcount implementations. Commits fbe327e5b4, 79e232ca01, 8c6653516c, and 25dc485074 did some preliminary refactoring, but many opportunities remain. In particular, if we disclaim interest in micro-optimizing this code for 32-bit builds and in unnecessary alignment checks on x86-64, we can remove a decent chunk of code. I cannot find public discussion or benchmarks for the code this commit removes, but it seems unlikely that this change will noticeably impact performance on affected systems. Suggested-by: John Naylor Reviewed-by: John Naylor Discussion: https://postgr.es/m/CANWCAZY7R%2Biy%2Br9YM_sySNydHzNqUirx1xk0tB3ej5HO62GdgQ%40mail.gmail.com --- src/include/port/pg_bitutils.h | 16 +------- src/port/pg_bitutils.c | 30 --------------- src/port/pg_popcount_x86.c | 67 ++++++---------------------------- 3 files changed, 14 insertions(+), 99 deletions(-) diff --git a/src/include/port/pg_bitutils.h b/src/include/port/pg_bitutils.h index 35761f509ec..20c11b79c61 100644 --- a/src/include/port/pg_bitutils.h +++ b/src/include/port/pg_bitutils.h @@ -333,13 +333,7 @@ pg_popcount(const char *buf, int bytes) * We set the threshold to the point at which we'll first use special * instructions in the optimized version. */ -#if SIZEOF_VOID_P >= 8 - int threshold = 8; -#else - int threshold = 4; -#endif - - if (bytes < threshold) + if (bytes < 8) { uint64 popcnt = 0; @@ -364,13 +358,7 @@ pg_popcount_masked(const char *buf, int bytes, bits8 mask) * We set the threshold to the point at which we'll first use special * instructions in the optimized version. */ -#if SIZEOF_VOID_P >= 8 - int threshold = 8; -#else - int threshold = 4; -#endif - - if (bytes < threshold) + if (bytes < 8) { uint64 popcnt = 0; diff --git a/src/port/pg_bitutils.c b/src/port/pg_bitutils.c index ffda75825e5..bec06c06fc3 100644 --- a/src/port/pg_bitutils.c +++ b/src/port/pg_bitutils.c @@ -167,20 +167,6 @@ pg_popcount_portable(const char *buf, int bytes) bytes -= 8; } - buf = (const char *) words; - } -#else - /* Process in 32-bit chunks if the buffer is aligned. */ - if (buf == (const char *) TYPEALIGN(4, buf)) - { - const uint32 *words = (const uint32 *) buf; - - while (bytes >= 4) - { - popcnt += pg_popcount32_portable(*words++); - bytes -= 4; - } - buf = (const char *) words; } #endif @@ -215,22 +201,6 @@ pg_popcount_masked_portable(const char *buf, int bytes, bits8 mask) bytes -= 8; } - buf = (const char *) words; - } -#else - /* Process in 32-bit chunks if the buffer is aligned. */ - uint32 maskv = ~((uint32) 0) / 0xFF * mask; - - if (buf == (const char *) TYPEALIGN(4, buf)) - { - const uint32 *words = (const uint32 *) buf; - - while (bytes >= 4) - { - popcnt += pg_popcount32_portable(*words++ & maskv); - bytes -= 4; - } - buf = (const char *) words; } #endif diff --git a/src/port/pg_popcount_x86.c b/src/port/pg_popcount_x86.c index 245f0167d00..7aebf69898b 100644 --- a/src/port/pg_popcount_x86.c +++ b/src/port/pg_popcount_x86.c @@ -376,40 +376,20 @@ __asm__ __volatile__(" popcntq %1,%0\n":"=q"(res):"rm"(word):"cc"); * pg_popcount_sse42 * Returns the number of 1-bits in buf */ +pg_attribute_no_sanitize_alignment() static uint64 pg_popcount_sse42(const char *buf, int bytes) { uint64 popcnt = 0; + const uint64 *words = (const uint64 *) buf; -#if SIZEOF_VOID_P >= 8 - /* Process in 64-bit chunks if the buffer is aligned. */ - if (buf == (const char *) TYPEALIGN(8, buf)) + while (bytes >= 8) { - const uint64 *words = (const uint64 *) buf; - - while (bytes >= 8) - { - popcnt += pg_popcount64_sse42(*words++); - bytes -= 8; - } - - buf = (const char *) words; + popcnt += pg_popcount64_sse42(*words++); + bytes -= 8; } -#else - /* Process in 32-bit chunks if the buffer is aligned. */ - if (buf == (const char *) TYPEALIGN(4, buf)) - { - const uint32 *words = (const uint32 *) buf; - while (bytes >= 4) - { - popcnt += pg_popcount32_sse42(*words++); - bytes -= 4; - } - - buf = (const char *) words; - } -#endif + buf = (const char *) words; /* Process any remaining bytes */ while (bytes--) @@ -422,44 +402,21 @@ pg_popcount_sse42(const char *buf, int bytes) * pg_popcount_masked_sse42 * Returns the number of 1-bits in buf after applying the mask to each byte */ +pg_attribute_no_sanitize_alignment() static uint64 pg_popcount_masked_sse42(const char *buf, int bytes, bits8 mask) { uint64 popcnt = 0; - -#if SIZEOF_VOID_P >= 8 - /* Process in 64-bit chunks if the buffer is aligned */ uint64 maskv = ~UINT64CONST(0) / 0xFF * mask; + const uint64 *words = (const uint64 *) buf; - if (buf == (const char *) TYPEALIGN(8, buf)) + while (bytes >= 8) { - const uint64 *words = (const uint64 *) buf; - - while (bytes >= 8) - { - popcnt += pg_popcount64_sse42(*words++ & maskv); - bytes -= 8; - } - - buf = (const char *) words; + popcnt += pg_popcount64_sse42(*words++ & maskv); + bytes -= 8; } -#else - /* Process in 32-bit chunks if the buffer is aligned. */ - uint32 maskv = ~((uint32) 0) / 0xFF * mask; - - if (buf == (const char *) TYPEALIGN(4, buf)) - { - const uint32 *words = (const uint32 *) buf; - - while (bytes >= 4) - { - popcnt += pg_popcount32_sse42(*words++ & maskv); - bytes -= 4; - } - buf = (const char *) words; - } -#endif + buf = (const char *) words; /* Process any remaining bytes */ while (bytes--)