mirror of https://github.com/postgres/postgres
Use SSE2 intrinsics to speed up the search, where available. Otherwise, use a simple 'for' loop. The motivation to add this now is to speed up XidInMVCCSnapshot(), which is the reason only unsigned 32-bit integer arrays are optimized. Other types are left for future work, as is the extension of this technique to non-x86 platforms. Nathan Bossart Reviewed by: Andres Freund, Bharath Rupireddy, Masahiko Sawada Discussion: https://postgr.es/m/20220713170950.GA3116318%40nathanxps13pull/101/head
parent
356dd2ce5b
commit
b6ef167564
@ -0,0 +1,103 @@ |
|||||||
|
/*-------------------------------------------------------------------------
|
||||||
|
* |
||||||
|
* pg_lfind.h |
||||||
|
* Optimized linear search routines. |
||||||
|
* |
||||||
|
* Copyright (c) 2022, PostgreSQL Global Development Group |
||||||
|
* |
||||||
|
* IDENTIFICATION |
||||||
|
* src/include/port/pg_lfind.h |
||||||
|
* |
||||||
|
*------------------------------------------------------------------------- |
||||||
|
*/ |
||||||
|
#ifndef PG_LFIND_H |
||||||
|
#define PG_LFIND_H |
||||||
|
|
||||||
|
#include "port/simd.h" |
||||||
|
|
||||||
|
/*
|
||||||
|
* pg_lfind32 |
||||||
|
* |
||||||
|
* Return true if there is an element in 'base' that equals 'key', otherwise |
||||||
|
* return false. |
||||||
|
*/ |
||||||
|
static inline bool |
||||||
|
pg_lfind32(uint32 key, uint32 *base, uint32 nelem) |
||||||
|
{ |
||||||
|
uint32 i = 0; |
||||||
|
|
||||||
|
/* Use SIMD intrinsics where available. */ |
||||||
|
#ifdef USE_SSE2 |
||||||
|
|
||||||
|
/*
|
||||||
|
* A 16-byte register only has four 4-byte lanes. For better |
||||||
|
* instruction-level parallelism, each loop iteration operates on a block |
||||||
|
* of four registers. Testing has showed this is ~40% faster than using a |
||||||
|
* block of two registers. |
||||||
|
*/ |
||||||
|
const __m128i keys = _mm_set1_epi32(key); /* load 4 copies of key */ |
||||||
|
uint32 iterations = nelem & ~0xF; /* round down to multiple of 16 */ |
||||||
|
|
||||||
|
#if defined(USE_ASSERT_CHECKING) |
||||||
|
bool assert_result = false; |
||||||
|
|
||||||
|
/* pre-compute the result for assert checking */ |
||||||
|
for (i = 0; i < nelem; i++) |
||||||
|
{ |
||||||
|
if (key == base[i]) |
||||||
|
{ |
||||||
|
assert_result = true; |
||||||
|
break; |
||||||
|
} |
||||||
|
} |
||||||
|
#endif |
||||||
|
|
||||||
|
for (i = 0; i < iterations; i += 16) |
||||||
|
{ |
||||||
|
/* load the next block into 4 registers holding 4 values each */ |
||||||
|
const __m128i vals1 = _mm_loadu_si128((__m128i *) & base[i]); |
||||||
|
const __m128i vals2 = _mm_loadu_si128((__m128i *) & base[i + 4]); |
||||||
|
const __m128i vals3 = _mm_loadu_si128((__m128i *) & base[i + 8]); |
||||||
|
const __m128i vals4 = _mm_loadu_si128((__m128i *) & base[i + 12]); |
||||||
|
|
||||||
|
/* compare each value to the key */ |
||||||
|
const __m128i result1 = _mm_cmpeq_epi32(keys, vals1); |
||||||
|
const __m128i result2 = _mm_cmpeq_epi32(keys, vals2); |
||||||
|
const __m128i result3 = _mm_cmpeq_epi32(keys, vals3); |
||||||
|
const __m128i result4 = _mm_cmpeq_epi32(keys, vals4); |
||||||
|
|
||||||
|
/* combine the results into a single variable */ |
||||||
|
const __m128i tmp1 = _mm_or_si128(result1, result2); |
||||||
|
const __m128i tmp2 = _mm_or_si128(result3, result4); |
||||||
|
const __m128i result = _mm_or_si128(tmp1, tmp2); |
||||||
|
|
||||||
|
/* see if there was a match */ |
||||||
|
if (_mm_movemask_epi8(result) != 0) |
||||||
|
{ |
||||||
|
#if defined(USE_ASSERT_CHECKING) |
||||||
|
Assert(assert_result == true); |
||||||
|
#endif |
||||||
|
return true; |
||||||
|
} |
||||||
|
} |
||||||
|
#endif /* USE_SSE2 */ |
||||||
|
|
||||||
|
/* Process the remaining elements one at a time. */ |
||||||
|
for (; i < nelem; i++) |
||||||
|
{ |
||||||
|
if (key == base[i]) |
||||||
|
{ |
||||||
|
#if defined(USE_SSE2) && defined(USE_ASSERT_CHECKING) |
||||||
|
Assert(assert_result == true); |
||||||
|
#endif |
||||||
|
return true; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
#if defined(USE_SSE2) && defined(USE_ASSERT_CHECKING) |
||||||
|
Assert(assert_result == false); |
||||||
|
#endif |
||||||
|
return false; |
||||||
|
} |
||||||
|
|
||||||
|
#endif /* PG_LFIND_H */ |
@ -0,0 +1,4 @@ |
|||||||
|
# Generated subdirectories |
||||||
|
/log/ |
||||||
|
/results/ |
||||||
|
/tmp_check/ |
@ -0,0 +1,23 @@ |
|||||||
|
# src/test/modules/test_lfind/Makefile
|
||||||
|
|
||||||
|
MODULE_big = test_lfind
|
||||||
|
OBJS = \
|
||||||
|
$(WIN32RES) \
|
||||||
|
test_lfind.o
|
||||||
|
PGFILEDESC = "test_lfind - test code for optimized linear search functions"
|
||||||
|
|
||||||
|
EXTENSION = test_lfind
|
||||||
|
DATA = test_lfind--1.0.sql
|
||||||
|
|
||||||
|
REGRESS = test_lfind
|
||||||
|
|
||||||
|
ifdef USE_PGXS |
||||||
|
PG_CONFIG = pg_config
|
||||||
|
PGXS := $(shell $(PG_CONFIG) --pgxs)
|
||||||
|
include $(PGXS) |
||||||
|
else |
||||||
|
subdir = src/test/modules/test_lfind
|
||||||
|
top_builddir = ../../../..
|
||||||
|
include $(top_builddir)/src/Makefile.global |
||||||
|
include $(top_srcdir)/contrib/contrib-global.mk |
||||||
|
endif |
@ -0,0 +1,12 @@ |
|||||||
|
CREATE EXTENSION test_lfind; |
||||||
|
-- |
||||||
|
-- These tests don't produce any interesting output. We're checking that |
||||||
|
-- the operations complete without crashing or hanging and that none of their |
||||||
|
-- internal sanity tests fail. |
||||||
|
-- |
||||||
|
SELECT test_lfind(); |
||||||
|
test_lfind |
||||||
|
------------ |
||||||
|
|
||||||
|
(1 row) |
||||||
|
|
@ -0,0 +1,8 @@ |
|||||||
|
CREATE EXTENSION test_lfind; |
||||||
|
|
||||||
|
-- |
||||||
|
-- These tests don't produce any interesting output. We're checking that |
||||||
|
-- the operations complete without crashing or hanging and that none of their |
||||||
|
-- internal sanity tests fail. |
||||||
|
-- |
||||||
|
SELECT test_lfind(); |
@ -0,0 +1,8 @@ |
|||||||
|
/* src/test/modules/test_lfind/test_lfind--1.0.sql */ |
||||||
|
|
||||||
|
-- complain if script is sourced in psql, rather than via CREATE EXTENSION |
||||||
|
\echo Use "CREATE EXTENSION test_lfind" to load this file. \quit |
||||||
|
|
||||||
|
CREATE FUNCTION test_lfind() |
||||||
|
RETURNS pg_catalog.void |
||||||
|
AS 'MODULE_PATHNAME' LANGUAGE C; |
@ -0,0 +1,52 @@ |
|||||||
|
/*--------------------------------------------------------------------------
|
||||||
|
* |
||||||
|
* test_lfind.c |
||||||
|
* Test correctness of optimized linear search functions. |
||||||
|
* |
||||||
|
* Copyright (c) 2022, PostgreSQL Global Development Group |
||||||
|
* |
||||||
|
* IDENTIFICATION |
||||||
|
* src/test/modules/test_lfind/test_lfind.c |
||||||
|
* |
||||||
|
* ------------------------------------------------------------------------- |
||||||
|
*/ |
||||||
|
|
||||||
|
#include "postgres.h" |
||||||
|
|
||||||
|
#include "fmgr.h" |
||||||
|
#include "port/pg_lfind.h" |
||||||
|
|
||||||
|
PG_MODULE_MAGIC; |
||||||
|
|
||||||
|
PG_FUNCTION_INFO_V1(test_lfind); |
||||||
|
|
||||||
|
Datum |
||||||
|
test_lfind(PG_FUNCTION_ARGS) |
||||||
|
{ |
||||||
|
#define TEST_ARRAY_SIZE 135 |
||||||
|
uint32 test_array[TEST_ARRAY_SIZE] = {0}; |
||||||
|
|
||||||
|
test_array[8] = 1; |
||||||
|
test_array[64] = 2; |
||||||
|
test_array[TEST_ARRAY_SIZE - 1] = 3; |
||||||
|
|
||||||
|
if (pg_lfind32(1, test_array, 4)) |
||||||
|
elog(ERROR, "pg_lfind32() found nonexistent element"); |
||||||
|
if (!pg_lfind32(1, test_array, TEST_ARRAY_SIZE)) |
||||||
|
elog(ERROR, "pg_lfind32() did not find existing element"); |
||||||
|
|
||||||
|
if (pg_lfind32(2, test_array, 32)) |
||||||
|
elog(ERROR, "pg_lfind32() found nonexistent element"); |
||||||
|
if (!pg_lfind32(2, test_array, TEST_ARRAY_SIZE)) |
||||||
|
elog(ERROR, "pg_lfind32() did not find existing element"); |
||||||
|
|
||||||
|
if (pg_lfind32(3, test_array, 96)) |
||||||
|
elog(ERROR, "pg_lfind32() found nonexistent element"); |
||||||
|
if (!pg_lfind32(3, test_array, TEST_ARRAY_SIZE)) |
||||||
|
elog(ERROR, "pg_lfind32() did not find existing element"); |
||||||
|
|
||||||
|
if (pg_lfind32(4, test_array, TEST_ARRAY_SIZE)) |
||||||
|
elog(ERROR, "pg_lfind32() found nonexistent element"); |
||||||
|
|
||||||
|
PG_RETURN_VOID(); |
||||||
|
} |
@ -0,0 +1,4 @@ |
|||||||
|
comment = 'Test code for optimized linear search functions' |
||||||
|
default_version = '1.0' |
||||||
|
module_pathname = '$libdir/test_lfind' |
||||||
|
relocatable = true |
Loading…
Reference in new issue