mirror of https://github.com/postgres/postgres
Use SSE2 intrinsics to speed up the search, where available. Otherwise, use a simple 'for' loop. The motivation to add this now is to speed up XidInMVCCSnapshot(), which is the reason only unsigned 32-bit integer arrays are optimized. Other types are left for future work, as is the extension of this technique to non-x86 platforms. Nathan Bossart Reviewed by: Andres Freund, Bharath Rupireddy, Masahiko Sawada Discussion: https://postgr.es/m/20220713170950.GA3116318%40nathanxps13pull/101/head
parent
356dd2ce5b
commit
b6ef167564
@ -0,0 +1,103 @@ |
||||
/*-------------------------------------------------------------------------
|
||||
* |
||||
* pg_lfind.h |
||||
* Optimized linear search routines. |
||||
* |
||||
* Copyright (c) 2022, PostgreSQL Global Development Group |
||||
* |
||||
* IDENTIFICATION |
||||
* src/include/port/pg_lfind.h |
||||
* |
||||
*------------------------------------------------------------------------- |
||||
*/ |
||||
#ifndef PG_LFIND_H |
||||
#define PG_LFIND_H |
||||
|
||||
#include "port/simd.h" |
||||
|
||||
/*
|
||||
* pg_lfind32 |
||||
* |
||||
* Return true if there is an element in 'base' that equals 'key', otherwise |
||||
* return false. |
||||
*/ |
||||
static inline bool |
||||
pg_lfind32(uint32 key, uint32 *base, uint32 nelem) |
||||
{ |
||||
uint32 i = 0; |
||||
|
||||
/* Use SIMD intrinsics where available. */ |
||||
#ifdef USE_SSE2 |
||||
|
||||
/*
|
||||
* A 16-byte register only has four 4-byte lanes. For better |
||||
* instruction-level parallelism, each loop iteration operates on a block |
||||
* of four registers. Testing has showed this is ~40% faster than using a |
||||
* block of two registers. |
||||
*/ |
||||
const __m128i keys = _mm_set1_epi32(key); /* load 4 copies of key */ |
||||
uint32 iterations = nelem & ~0xF; /* round down to multiple of 16 */ |
||||
|
||||
#if defined(USE_ASSERT_CHECKING) |
||||
bool assert_result = false; |
||||
|
||||
/* pre-compute the result for assert checking */ |
||||
for (i = 0; i < nelem; i++) |
||||
{ |
||||
if (key == base[i]) |
||||
{ |
||||
assert_result = true; |
||||
break; |
||||
} |
||||
} |
||||
#endif |
||||
|
||||
for (i = 0; i < iterations; i += 16) |
||||
{ |
||||
/* load the next block into 4 registers holding 4 values each */ |
||||
const __m128i vals1 = _mm_loadu_si128((__m128i *) & base[i]); |
||||
const __m128i vals2 = _mm_loadu_si128((__m128i *) & base[i + 4]); |
||||
const __m128i vals3 = _mm_loadu_si128((__m128i *) & base[i + 8]); |
||||
const __m128i vals4 = _mm_loadu_si128((__m128i *) & base[i + 12]); |
||||
|
||||
/* compare each value to the key */ |
||||
const __m128i result1 = _mm_cmpeq_epi32(keys, vals1); |
||||
const __m128i result2 = _mm_cmpeq_epi32(keys, vals2); |
||||
const __m128i result3 = _mm_cmpeq_epi32(keys, vals3); |
||||
const __m128i result4 = _mm_cmpeq_epi32(keys, vals4); |
||||
|
||||
/* combine the results into a single variable */ |
||||
const __m128i tmp1 = _mm_or_si128(result1, result2); |
||||
const __m128i tmp2 = _mm_or_si128(result3, result4); |
||||
const __m128i result = _mm_or_si128(tmp1, tmp2); |
||||
|
||||
/* see if there was a match */ |
||||
if (_mm_movemask_epi8(result) != 0) |
||||
{ |
||||
#if defined(USE_ASSERT_CHECKING) |
||||
Assert(assert_result == true); |
||||
#endif |
||||
return true; |
||||
} |
||||
} |
||||
#endif /* USE_SSE2 */ |
||||
|
||||
/* Process the remaining elements one at a time. */ |
||||
for (; i < nelem; i++) |
||||
{ |
||||
if (key == base[i]) |
||||
{ |
||||
#if defined(USE_SSE2) && defined(USE_ASSERT_CHECKING) |
||||
Assert(assert_result == true); |
||||
#endif |
||||
return true; |
||||
} |
||||
} |
||||
|
||||
#if defined(USE_SSE2) && defined(USE_ASSERT_CHECKING) |
||||
Assert(assert_result == false); |
||||
#endif |
||||
return false; |
||||
} |
||||
|
||||
#endif /* PG_LFIND_H */ |
@ -0,0 +1,4 @@ |
||||
# Generated subdirectories |
||||
/log/ |
||||
/results/ |
||||
/tmp_check/ |
@ -0,0 +1,23 @@ |
||||
# src/test/modules/test_lfind/Makefile
|
||||
|
||||
MODULE_big = test_lfind
|
||||
OBJS = \
|
||||
$(WIN32RES) \
|
||||
test_lfind.o
|
||||
PGFILEDESC = "test_lfind - test code for optimized linear search functions"
|
||||
|
||||
EXTENSION = test_lfind
|
||||
DATA = test_lfind--1.0.sql
|
||||
|
||||
REGRESS = test_lfind
|
||||
|
||||
ifdef USE_PGXS |
||||
PG_CONFIG = pg_config
|
||||
PGXS := $(shell $(PG_CONFIG) --pgxs)
|
||||
include $(PGXS) |
||||
else |
||||
subdir = src/test/modules/test_lfind
|
||||
top_builddir = ../../../..
|
||||
include $(top_builddir)/src/Makefile.global |
||||
include $(top_srcdir)/contrib/contrib-global.mk |
||||
endif |
@ -0,0 +1,12 @@ |
||||
CREATE EXTENSION test_lfind; |
||||
-- |
||||
-- These tests don't produce any interesting output. We're checking that |
||||
-- the operations complete without crashing or hanging and that none of their |
||||
-- internal sanity tests fail. |
||||
-- |
||||
SELECT test_lfind(); |
||||
test_lfind |
||||
------------ |
||||
|
||||
(1 row) |
||||
|
@ -0,0 +1,8 @@ |
||||
CREATE EXTENSION test_lfind; |
||||
|
||||
-- |
||||
-- These tests don't produce any interesting output. We're checking that |
||||
-- the operations complete without crashing or hanging and that none of their |
||||
-- internal sanity tests fail. |
||||
-- |
||||
SELECT test_lfind(); |
@ -0,0 +1,8 @@ |
||||
/* src/test/modules/test_lfind/test_lfind--1.0.sql */ |
||||
|
||||
-- complain if script is sourced in psql, rather than via CREATE EXTENSION |
||||
\echo Use "CREATE EXTENSION test_lfind" to load this file. \quit |
||||
|
||||
CREATE FUNCTION test_lfind() |
||||
RETURNS pg_catalog.void |
||||
AS 'MODULE_PATHNAME' LANGUAGE C; |
@ -0,0 +1,52 @@ |
||||
/*--------------------------------------------------------------------------
|
||||
* |
||||
* test_lfind.c |
||||
* Test correctness of optimized linear search functions. |
||||
* |
||||
* Copyright (c) 2022, PostgreSQL Global Development Group |
||||
* |
||||
* IDENTIFICATION |
||||
* src/test/modules/test_lfind/test_lfind.c |
||||
* |
||||
* ------------------------------------------------------------------------- |
||||
*/ |
||||
|
||||
#include "postgres.h" |
||||
|
||||
#include "fmgr.h" |
||||
#include "port/pg_lfind.h" |
||||
|
||||
PG_MODULE_MAGIC; |
||||
|
||||
PG_FUNCTION_INFO_V1(test_lfind); |
||||
|
||||
Datum |
||||
test_lfind(PG_FUNCTION_ARGS) |
||||
{ |
||||
#define TEST_ARRAY_SIZE 135 |
||||
uint32 test_array[TEST_ARRAY_SIZE] = {0}; |
||||
|
||||
test_array[8] = 1; |
||||
test_array[64] = 2; |
||||
test_array[TEST_ARRAY_SIZE - 1] = 3; |
||||
|
||||
if (pg_lfind32(1, test_array, 4)) |
||||
elog(ERROR, "pg_lfind32() found nonexistent element"); |
||||
if (!pg_lfind32(1, test_array, TEST_ARRAY_SIZE)) |
||||
elog(ERROR, "pg_lfind32() did not find existing element"); |
||||
|
||||
if (pg_lfind32(2, test_array, 32)) |
||||
elog(ERROR, "pg_lfind32() found nonexistent element"); |
||||
if (!pg_lfind32(2, test_array, TEST_ARRAY_SIZE)) |
||||
elog(ERROR, "pg_lfind32() did not find existing element"); |
||||
|
||||
if (pg_lfind32(3, test_array, 96)) |
||||
elog(ERROR, "pg_lfind32() found nonexistent element"); |
||||
if (!pg_lfind32(3, test_array, TEST_ARRAY_SIZE)) |
||||
elog(ERROR, "pg_lfind32() did not find existing element"); |
||||
|
||||
if (pg_lfind32(4, test_array, TEST_ARRAY_SIZE)) |
||||
elog(ERROR, "pg_lfind32() found nonexistent element"); |
||||
|
||||
PG_RETURN_VOID(); |
||||
} |
@ -0,0 +1,4 @@ |
||||
comment = 'Test code for optimized linear search functions' |
||||
default_version = '1.0' |
||||
module_pathname = '$libdir/test_lfind' |
||||
relocatable = true |
Loading…
Reference in new issue