From 4c910f3bbe92aa4e84ff15fa27b4de2da0d7ae50 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Wed, 11 Mar 2026 07:36:10 +0900 Subject: [PATCH] bloom: Optimize bitmap scan path with streaming read This commit replaces the per-page buffer read look in blgetbitmap() with a reading stream, to improve scan efficiency, particularly useful for large bloom indexes. Some benchmarking with a large number of rows has shown a very nice improvement in terms of runtime and IO read reduction with test cases up to 10M rows for a bloom index scan. For the io_uring method, The author has reported a 3x in runtime with io_uring while I was at close to a 7x. For the worker method with 3 workers, the author has reported better numbers than myself in runtime, with the reduction in IO stats being appealing for all the cases measured. Author: Xuneng Zhou Reviewed-by: Michael Paquier Reviewed-by: Nazir Bilal Yavuz Discussion: https://postgr.es/m/CABPTF7VrqfbcDXqGrdLQ2xaQ=K0RzExNuw6U_GGqzSJu32wfdQ@mail.gmail.com --- contrib/bloom/blscan.c | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/contrib/bloom/blscan.c b/contrib/bloom/blscan.c index 0535d45f2d8..1a0e42021ec 100644 --- a/contrib/bloom/blscan.c +++ b/contrib/bloom/blscan.c @@ -18,6 +18,7 @@ #include "miscadmin.h" #include "pgstat.h" #include "storage/bufmgr.h" +#include "storage/read_stream.h" /* * Begin scan of bloom index. @@ -76,11 +77,13 @@ int64 blgetbitmap(IndexScanDesc scan, TIDBitmap *tbm) { int64 ntids = 0; - BlockNumber blkno = BLOOM_HEAD_BLKNO, + BlockNumber blkno, npages; int i; BufferAccessStrategy bas; BloomScanOpaque so = (BloomScanOpaque) scan->opaque; + BlockRangeReadStreamPrivate p; + ReadStream *stream; if (so->sign == NULL) { @@ -120,14 +123,29 @@ blgetbitmap(IndexScanDesc scan, TIDBitmap *tbm) if (scan->instrument) scan->instrument->nsearches++; + /* Scan all blocks except the metapage using streaming reads */ + p.current_blocknum = BLOOM_HEAD_BLKNO; + p.last_exclusive = npages; + + /* + * It is safe to use batchmode as block_range_read_stream_cb takes no + * locks. + */ + stream = read_stream_begin_relation(READ_STREAM_FULL | + READ_STREAM_USE_BATCHING, + bas, + scan->indexRelation, + MAIN_FORKNUM, + block_range_read_stream_cb, + &p, + 0); + for (blkno = BLOOM_HEAD_BLKNO; blkno < npages; blkno++) { Buffer buffer; Page page; - buffer = ReadBufferExtended(scan->indexRelation, MAIN_FORKNUM, - blkno, RBM_NORMAL, bas); - + buffer = read_stream_next_buffer(stream, NULL); LockBuffer(buffer, BUFFER_LOCK_SHARE); page = BufferGetPage(buffer); @@ -163,6 +181,9 @@ blgetbitmap(IndexScanDesc scan, TIDBitmap *tbm) UnlockReleaseBuffer(buffer); CHECK_FOR_INTERRUPTS(); } + + Assert(read_stream_next_buffer(stream, NULL) == InvalidBuffer); + read_stream_end(stream); FreeAccessStrategy(bas); return ntids;