Remove some unnecessary optimizations in popcount code.

Over the past few releases, we've added a huge amount of complexity
to our popcount implementations.  Commits fbe327e5b4, 79e232ca01,
8c6653516c, and 25dc485074 did some preliminary refactoring, but
many opportunities remain.  In particular, if we disclaim interest
in micro-optimizing this code for 32-bit builds and in unnecessary
alignment checks on x86-64, we can remove a decent chunk of code.
I cannot find public discussion or benchmarks for the code this
commit removes,  but it seems unlikely that this change will
noticeably impact performance on affected systems.

Suggested-by: John Naylor <johncnaylorls@gmail.com>
Reviewed-by: John Naylor <johncnaylorls@gmail.com>
Discussion: https://postgr.es/m/CANWCAZY7R%2Biy%2Br9YM_sySNydHzNqUirx1xk0tB3ej5HO62GdgQ%40mail.gmail.com
pull/272/head
Nathan Bossart 3 days ago
parent 88327092ff
commit cb7b2e5e8e
  1. 16
      src/include/port/pg_bitutils.h
  2. 30
      src/port/pg_bitutils.c
  3. 67
      src/port/pg_popcount_x86.c

@ -333,13 +333,7 @@ pg_popcount(const char *buf, int bytes)
* We set the threshold to the point at which we'll first use special
* instructions in the optimized version.
*/
#if SIZEOF_VOID_P >= 8
int threshold = 8;
#else
int threshold = 4;
#endif
if (bytes < threshold)
if (bytes < 8)
{
uint64 popcnt = 0;
@ -364,13 +358,7 @@ pg_popcount_masked(const char *buf, int bytes, bits8 mask)
* We set the threshold to the point at which we'll first use special
* instructions in the optimized version.
*/
#if SIZEOF_VOID_P >= 8
int threshold = 8;
#else
int threshold = 4;
#endif
if (bytes < threshold)
if (bytes < 8)
{
uint64 popcnt = 0;

@ -167,20 +167,6 @@ pg_popcount_portable(const char *buf, int bytes)
bytes -= 8;
}
buf = (const char *) words;
}
#else
/* Process in 32-bit chunks if the buffer is aligned. */
if (buf == (const char *) TYPEALIGN(4, buf))
{
const uint32 *words = (const uint32 *) buf;
while (bytes >= 4)
{
popcnt += pg_popcount32_portable(*words++);
bytes -= 4;
}
buf = (const char *) words;
}
#endif
@ -215,22 +201,6 @@ pg_popcount_masked_portable(const char *buf, int bytes, bits8 mask)
bytes -= 8;
}
buf = (const char *) words;
}
#else
/* Process in 32-bit chunks if the buffer is aligned. */
uint32 maskv = ~((uint32) 0) / 0xFF * mask;
if (buf == (const char *) TYPEALIGN(4, buf))
{
const uint32 *words = (const uint32 *) buf;
while (bytes >= 4)
{
popcnt += pg_popcount32_portable(*words++ & maskv);
bytes -= 4;
}
buf = (const char *) words;
}
#endif

@ -376,40 +376,20 @@ __asm__ __volatile__(" popcntq %1,%0\n":"=q"(res):"rm"(word):"cc");
* pg_popcount_sse42
* Returns the number of 1-bits in buf
*/
pg_attribute_no_sanitize_alignment()
static uint64
pg_popcount_sse42(const char *buf, int bytes)
{
uint64 popcnt = 0;
const uint64 *words = (const uint64 *) buf;
#if SIZEOF_VOID_P >= 8
/* Process in 64-bit chunks if the buffer is aligned. */
if (buf == (const char *) TYPEALIGN(8, buf))
while (bytes >= 8)
{
const uint64 *words = (const uint64 *) buf;
while (bytes >= 8)
{
popcnt += pg_popcount64_sse42(*words++);
bytes -= 8;
}
buf = (const char *) words;
popcnt += pg_popcount64_sse42(*words++);
bytes -= 8;
}
#else
/* Process in 32-bit chunks if the buffer is aligned. */
if (buf == (const char *) TYPEALIGN(4, buf))
{
const uint32 *words = (const uint32 *) buf;
while (bytes >= 4)
{
popcnt += pg_popcount32_sse42(*words++);
bytes -= 4;
}
buf = (const char *) words;
}
#endif
buf = (const char *) words;
/* Process any remaining bytes */
while (bytes--)
@ -422,44 +402,21 @@ pg_popcount_sse42(const char *buf, int bytes)
* pg_popcount_masked_sse42
* Returns the number of 1-bits in buf after applying the mask to each byte
*/
pg_attribute_no_sanitize_alignment()
static uint64
pg_popcount_masked_sse42(const char *buf, int bytes, bits8 mask)
{
uint64 popcnt = 0;
#if SIZEOF_VOID_P >= 8
/* Process in 64-bit chunks if the buffer is aligned */
uint64 maskv = ~UINT64CONST(0) / 0xFF * mask;
const uint64 *words = (const uint64 *) buf;
if (buf == (const char *) TYPEALIGN(8, buf))
while (bytes >= 8)
{
const uint64 *words = (const uint64 *) buf;
while (bytes >= 8)
{
popcnt += pg_popcount64_sse42(*words++ & maskv);
bytes -= 8;
}
buf = (const char *) words;
popcnt += pg_popcount64_sse42(*words++ & maskv);
bytes -= 8;
}
#else
/* Process in 32-bit chunks if the buffer is aligned. */
uint32 maskv = ~((uint32) 0) / 0xFF * mask;
if (buf == (const char *) TYPEALIGN(4, buf))
{
const uint32 *words = (const uint32 *) buf;
while (bytes >= 4)
{
popcnt += pg_popcount32_sse42(*words++ & maskv);
bytes -= 4;
}
buf = (const char *) words;
}
#endif
buf = (const char *) words;
/* Process any remaining bytes */
while (bytes--)

Loading…
Cancel
Save