@ -106,19 +106,23 @@ const uint8 pg_number_of_ones[256] = {
static inline int pg_popcount32_slow ( uint32 word ) ;
static inline int pg_popcount64_slow ( uint64 word ) ;
static uint64 pg_popcount_slow ( const char * buf , int bytes ) ;
static uint64 pg_popcount_masked_slow ( const char * buf , int bytes , bits8 mask ) ;
# ifdef TRY_POPCNT_FAST
static bool pg_popcount_available ( void ) ;
static int pg_popcount32_choose ( uint32 word ) ;
static int pg_popcount64_choose ( uint64 word ) ;
static uint64 pg_popcount_choose ( const char * buf , int bytes ) ;
static uint64 pg_popcount_masked_choose ( const char * buf , int bytes , bits8 mask ) ;
static inline int pg_popcount32_fast ( uint32 word ) ;
static inline int pg_popcount64_fast ( uint64 word ) ;
static uint64 pg_popcount_fast ( const char * buf , int bytes ) ;
static uint64 pg_popcount_masked_fast ( const char * buf , int bytes , bits8 mask ) ;
int ( * pg_popcount32 ) ( uint32 word ) = pg_popcount32_choose ;
int ( * pg_popcount64 ) ( uint64 word ) = pg_popcount64_choose ;
uint64 ( * pg_popcount_optimized ) ( const char * buf , int bytes ) = pg_popcount_choose ;
uint64 ( * pg_popcount_masked_optimized ) ( const char * buf , int bytes , bits8 mask ) = pg_popcount_masked_choose ;
# endif /* TRY_POPCNT_FAST */
# ifdef TRY_POPCNT_FAST
@ -156,17 +160,22 @@ choose_popcount_functions(void)
pg_popcount32 = pg_popcount32_fast ;
pg_popcount64 = pg_popcount64_fast ;
pg_popcount_optimized = pg_popcount_fast ;
pg_popcount_masked_optimized = pg_popcount_masked_fast ;
}
else
{
pg_popcount32 = pg_popcount32_slow ;
pg_popcount64 = pg_popcount64_slow ;
pg_popcount_optimized = pg_popcount_slow ;
pg_popcount_masked_optimized = pg_popcount_masked_slow ;
}
# ifdef USE_AVX512_POPCNT_WITH_RUNTIME_CHECK
if ( pg_popcount_avx512_available ( ) )
{
pg_popcount_optimized = pg_popcount_avx512 ;
pg_popcount_masked_optimized = pg_popcount_masked_avx512 ;
}
# endif
}
@ -191,6 +200,13 @@ pg_popcount_choose(const char *buf, int bytes)
return pg_popcount_optimized ( buf , bytes ) ;
}
static uint64
pg_popcount_masked_choose ( const char * buf , int bytes , bits8 mask )
{
choose_popcount_functions ( ) ;
return pg_popcount_masked ( buf , bytes , mask ) ;
}
/*
* pg_popcount32_fast
* Return the number of 1 bits set in word
@ -271,6 +287,56 @@ pg_popcount_fast(const char *buf, int bytes)
return popcnt ;
}
/*
* pg_popcount_masked_fast
* Returns the number of 1 - bits in buf after applying the mask to each byte
*/
static uint64
pg_popcount_masked_fast ( const char * buf , int bytes , bits8 mask )
{
uint64 popcnt = 0 ;
# if SIZEOF_VOID_P >= 8
/* Process in 64-bit chunks if the buffer is aligned */
uint64 maskv = ~ UINT64CONST ( 0 ) / 0xFF * mask ;
if ( buf = = ( const char * ) TYPEALIGN ( 8 , buf ) )
{
const uint64 * words = ( const uint64 * ) buf ;
while ( bytes > = 8 )
{
popcnt + = pg_popcount64_fast ( * words + + & maskv ) ;
bytes - = 8 ;
}
buf = ( const char * ) words ;
}
# else
/* Process in 32-bit chunks if the buffer is aligned. */
uint32 maskv = ~ ( ( uint32 ) 0 ) / 0xFF * mask ;
if ( buf = = ( const char * ) TYPEALIGN ( 4 , buf ) )
{
const uint32 * words = ( const uint32 * ) buf ;
while ( bytes > = 4 )
{
popcnt + = pg_popcount32_fast ( * words + + & maskv ) ;
bytes - = 4 ;
}
buf = ( const char * ) words ;
}
# endif
/* Process any remaining bytes */
while ( bytes - - )
popcnt + = pg_number_of_ones [ ( unsigned char ) * buf + + & mask ] ;
return popcnt ;
}
# endif /* TRY_POPCNT_FAST */
@ -370,6 +436,56 @@ pg_popcount_slow(const char *buf, int bytes)
return popcnt ;
}
/*
* pg_popcount_masked_slow
* Returns the number of 1 - bits in buf after applying the mask to each byte
*/
static uint64
pg_popcount_masked_slow ( const char * buf , int bytes , bits8 mask )
{
uint64 popcnt = 0 ;
# if SIZEOF_VOID_P >= 8
/* Process in 64-bit chunks if the buffer is aligned */
uint64 maskv = ~ UINT64CONST ( 0 ) / 0xFF * mask ;
if ( buf = = ( const char * ) TYPEALIGN ( 8 , buf ) )
{
const uint64 * words = ( const uint64 * ) buf ;
while ( bytes > = 8 )
{
popcnt + = pg_popcount64_slow ( * words + + & maskv ) ;
bytes - = 8 ;
}
buf = ( const char * ) words ;
}
# else
/* Process in 32-bit chunks if the buffer is aligned. */
uint32 maskv = ~ ( ( uint32 ) 0 ) / 0xFF * mask ;
if ( buf = = ( const char * ) TYPEALIGN ( 4 , buf ) )
{
const uint32 * words = ( const uint32 * ) buf ;
while ( bytes > = 4 )
{
popcnt + = pg_popcount32_slow ( * words + + & maskv ) ;
bytes - = 4 ;
}
buf = ( const char * ) words ;
}
# endif
/* Process any remaining bytes */
while ( bytes - - )
popcnt + = pg_number_of_ones [ ( unsigned char ) * buf + + & mask ] ;
return popcnt ;
}
# ifndef TRY_POPCNT_FAST
/*
@ -401,4 +517,14 @@ pg_popcount_optimized(const char *buf, int bytes)
return pg_popcount_slow ( buf , bytes ) ;
}
/*
* pg_popcount_masked_optimized
* Returns the number of 1 - bits in buf after applying the mask to each byte
*/
uint64
pg_popcount_masked_optimized ( const char * buf , int bytes , bits8 mask )
{
return pg_popcount_masked_slow ( buf , bytes , mask ) ;
}
# endif /* !TRY_POPCNT_FAST */