vacuumlazy.c: Rename dead_tuples to dead_items.

Commit 8523492d simplified what it meant for an item to be considered "dead" to VACUUM: TIDs collected in memory (in preparation for index vacuuming) must always come from LP_DEAD stub line pointers in heap pages, found following pruning. This formalized the idea that index vacuuming (and heap vacuuming) are optional processes. Unlike pruning, they can be delayed indefinitely, without any risk of that violating fundamental invariants. For example, leaving LP_DEAD items behind clearly won't add to the risk of transaction ID wraparound. You can't have transaction ID wraparound without transaction IDs. Renaming anything that references DEAD tuples (tuples with storage) reinforces all this. Code outside vacuumlazy.c continues to fudge the distinction between dead/deleted tuples, and LP_DEAD items. This is necessary because autovacuum scheduling is still mostly driven by "dead items/tuples" statistics. In the future we may find it useful to replace this model with something more sophisticated, as a step towards teaching autovacuum to perform more frequent vacuuming that targeting individual indexes that happen to be more prone to becoming bloated through version churn. In passing, simplify some function signatures that deal with VACUUM's dead_items array. Author: Peter Geoghegan <pg@bowt.ie> Reviewed-By: Masahiko Sawada <sawada.mshk@gmail.com> Discussion: https://postgr.es/m/CAH2-WzktGBg4si6DEdmq3q6SoXSDqNi6MtmB8CmmTmvhsxDTLA@mail.gmail.com
4 years ago · 4f8d9d1217
parent 4f33af23e7
commit 4f8d9d1217
1 changed files with 190 additions and 159 deletions
--- a/src/backend/access/heap/vacuumlazy.c
+++ b/src/backend/access/heap/vacuumlazy.c
@ -126,7 +126,7 @@
 * use small integers.
 */
 #define PARALLEL_VACUUM_KEY_SHARED			1
-#define PARALLEL_VACUUM_KEY_DEAD_TUPLES		2
+#define PARALLEL_VACUUM_KEY_DEAD_ITEMS		2
 #define PARALLEL_VACUUM_KEY_QUERY_TEXT		3
 #define PARALLEL_VACUUM_KEY_BUFFER_USAGE	4
 #define PARALLEL_VACUUM_KEY_WAL_USAGE		5
@ -149,26 +149,24 @@ typedef enum
 } VacErrPhase;

 /*
- * LVDeadTuples stores the dead tuple TIDs collected during the heap scan.
- * This is allocated in the DSM segment in parallel mode and in local memory
- * in non-parallel mode.
+ * LVDeadItems stores TIDs whose index tuples are deleted by index vacuuming.
+ * Each TID points to an LP_DEAD line pointer from a heap page that has been
+ * processed by lazy_scan_prune.
+ *
+ * Also needed by lazy_vacuum_heap_rel, which marks the same LP_DEAD line
+ * pointers as LP_UNUSED during second heap pass.
 */
-typedef struct LVDeadTuples
+typedef struct LVDeadItems
 {
-	int			max_tuples;		/* # slots allocated in array */
-	int			num_tuples;		/* current # of entries */
-	/* List of TIDs of tuples we intend to delete */
-	/* NB: this list is ordered by TID address */
-	ItemPointerData itemptrs[FLEXIBLE_ARRAY_MEMBER];	/* array of
-														 * ItemPointerData */
-} LVDeadTuples;
+	int			max_items;		/* # slots allocated in array */
+	int			num_items;		/* current # of entries */
+
+	/* Sorted array of TIDs to delete from indexes */
+	ItemPointerData items[FLEXIBLE_ARRAY_MEMBER];
+} LVDeadItems;

-/* The dead tuple space consists of LVDeadTuples and dead tuple TIDs */
-#define SizeOfDeadTuples(cnt) \
-	add_size(offsetof(LVDeadTuples, itemptrs), \
-			 mul_size(sizeof(ItemPointerData), cnt))
-#define MAXDEADTUPLES(max_size) \
-		(((max_size) - offsetof(LVDeadTuples, itemptrs)) / sizeof(ItemPointerData))
+#define MAXDEADITEMS(avail_mem) \
+	(((avail_mem) - offsetof(LVDeadItems, items)) / sizeof(ItemPointerData))

 /*
 * Shared information among parallel workers.  So this is allocated in the DSM
@ -322,7 +320,7 @@ typedef struct LVRelState
 	/*
 	 * State managed by lazy_scan_heap() follows
 	 */
-	LVDeadTuples *dead_tuples;	/* items to vacuum from indexes */
+	LVDeadItems *dead_items;	/* TIDs whose index tuples we'll delete */
 	BlockNumber rel_pages;		/* total number of pages */
 	BlockNumber scanned_pages;	/* number of pages we examined */
 	BlockNumber pinskipped_pages;	/* # of pages skipped due to a pin */
@ -389,7 +387,7 @@ static void lazy_vacuum(LVRelState *vacrel);
 static bool lazy_vacuum_all_indexes(LVRelState *vacrel);
 static void lazy_vacuum_heap_rel(LVRelState *vacrel);
 static int	lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno,
-								  Buffer buffer, int tupindex, Buffer *vmbuffer);
+								  Buffer buffer, int index, Buffer *vmbuffer);
 static bool lazy_check_needs_freeze(Buffer buf, bool *hastup,
 									LVRelState *vacrel);
 static bool lazy_check_wraparound_failsafe(LVRelState *vacrel);
@ -419,10 +417,10 @@ static bool should_attempt_truncation(LVRelState *vacrel);
 static void lazy_truncate_heap(LVRelState *vacrel);
 static BlockNumber count_nondeletable_pages(LVRelState *vacrel,
 											bool *lock_waiter_detected);
-static long compute_max_dead_tuples(BlockNumber relblocks, bool hasindex);
-static void lazy_space_alloc(LVRelState *vacrel, int nworkers,
-							 BlockNumber relblocks);
-static void lazy_space_free(LVRelState *vacrel);
+static int dead_items_max_items(LVRelState *vacrel);
+static inline Size max_items_to_alloc_size(int max_items);
+static void dead_items_alloc(LVRelState *vacrel, int nworkers);
+static void dead_items_cleanup(LVRelState *vacrel);
 static bool lazy_tid_reaped(ItemPointer itemptr, void *state);
 static int	vac_cmp_itemptr(const void *left, const void *right);
 static bool heap_page_is_all_visible(LVRelState *vacrel, Buffer buf,
@ -431,9 +429,7 @@ static int	compute_parallel_vacuum_workers(LVRelState *vacrel,
 											int nrequested,
 											bool *will_parallel_vacuum);
 static void update_index_statistics(LVRelState *vacrel);
-static LVParallelState *begin_parallel_vacuum(LVRelState *vacrel,
-											  BlockNumber nblocks,
-											  int nrequested);
+static void begin_parallel_vacuum(LVRelState *vacrel, int nrequested);
 static void end_parallel_vacuum(LVRelState *vacrel);
 static LVSharedIndStats *parallel_stats_for_idx(LVShared *lvshared, int getidx);
 static bool parallel_processing_is_safe(Relation indrel, LVShared *lvshared);
@ -894,7 +890,7 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
 static void
 lazy_scan_heap(LVRelState *vacrel, VacuumParams *params, bool aggressive)
 {
-	LVDeadTuples *dead_tuples;
+	LVDeadItems *dead_items;
 	BlockNumber nblocks,
 				blkno,
 				next_unskippable_block,
@ -952,23 +948,24 @@ lazy_scan_heap(LVRelState *vacrel, VacuumParams *params, bool aggressive)
 		palloc0(vacrel->nindexes * sizeof(IndexBulkDeleteResult *));

 	/*
-	 * Before beginning scan, check if it's already necessary to apply
-	 * failsafe
+	 * Do failsafe precheck before calling dead_items_alloc.  This ensures
+	 * that parallel VACUUM won't be attempted when relfrozenxid is already
+	 * dangerously old.
 	 */
 	lazy_check_wraparound_failsafe(vacrel);

 	/*
 	 * Allocate the space for dead tuples.  Note that this handles parallel
 	 * VACUUM initialization as part of allocating shared memory space used
-	 * for dead_tuples.
+	 * for dead_items.
 	 */
-	lazy_space_alloc(vacrel, params->nworkers, nblocks);
-	dead_tuples = vacrel->dead_tuples;
+	dead_items_alloc(vacrel, params->nworkers);
+	dead_items = vacrel->dead_items;

 	/* Report that we're scanning the heap, advertising total # of blocks */
 	initprog_val[0] = PROGRESS_VACUUM_PHASE_SCAN_HEAP;
 	initprog_val[1] = nblocks;
-	initprog_val[2] = dead_tuples->max_tuples;
+	initprog_val[2] = dead_items->max_items;
 	pgstat_progress_update_multi_param(3, initprog_index, initprog_val);

 	/*
@ -1156,11 +1153,11 @@ lazy_scan_heap(LVRelState *vacrel, VacuumParams *params, bool aggressive)
 		/*
 		 * Consider if we definitely have enough space to process TIDs on page
 		 * already.  If we are close to overrunning the available space for
-		 * dead-tuple TIDs, pause and do a cycle of vacuuming before we tackle
+		 * dead_items TIDs, pause and do a cycle of vacuuming before we tackle
 		 * this page.
 		 */
-		if ((dead_tuples->max_tuples - dead_tuples->num_tuples) < MaxHeapTuplesPerPage &&
-			dead_tuples->num_tuples > 0)
+		Assert(dead_items->max_items >= MaxHeapTuplesPerPage);
+		if (dead_items->max_items - dead_items->num_items < MaxHeapTuplesPerPage)
 		{
 			/*
 			 * Before beginning index vacuuming, we release any pin we may
@ -1271,7 +1268,7 @@ lazy_scan_heap(LVRelState *vacrel, VacuumParams *params, bool aggressive)
 		}

 		/*
-		 * By here we definitely have enough dead_tuples space for whatever
+		 * By here we definitely have enough dead_items space for whatever
 		 * LP_DEAD tids are on this page, we have the visibility map page set
 		 * up in case we need to set this page's all_visible/all_frozen bit,
 		 * and we have a cleanup lock.  Any tuples on this page are now sure
@ -1391,8 +1388,8 @@ lazy_scan_heap(LVRelState *vacrel, VacuumParams *params, bool aggressive)

 				lazy_vacuum_heap_page(vacrel, blkno, buf, 0, &vmbuffer);

-				/* Forget the now-vacuumed tuples */
-				dead_tuples->num_tuples = 0;
+				/* Forget the LP_DEAD items that we just vacuumed */
+				dead_items->num_items = 0;

 				/*
 				 * Periodically perform FSM vacuuming to make newly-freed
@ -1429,7 +1426,7 @@ lazy_scan_heap(LVRelState *vacrel, VacuumParams *params, bool aggressive)
 			 * with prunestate-driven visibility map and FSM steps (just like
 			 * the two-pass strategy).
 			 */
-			Assert(dead_tuples->num_tuples == 0);
+			Assert(dead_items->num_items == 0);
 		}

 		/*
@ -1587,7 +1584,7 @@ lazy_scan_heap(LVRelState *vacrel, VacuumParams *params, bool aggressive)
 	}

 	/* Perform a final round of index and heap vacuuming */
-	if (dead_tuples->num_tuples > 0)
+	if (dead_items->num_items > 0)
 		lazy_vacuum(vacrel);

 	/*
@ -1605,11 +1602,10 @@ lazy_scan_heap(LVRelState *vacrel, VacuumParams *params, bool aggressive)
 		lazy_cleanup_all_indexes(vacrel);

 	/*
-	 * Free resources managed by lazy_space_alloc().  (We must end parallel
-	 * mode/free shared memory before updating index statistics.  We cannot
-	 * write while in parallel mode.)
+	 * Free resources managed by dead_items_alloc.  This will end parallel
+	 * mode when needed (it must end before we update index statistics).
 	 */
-	lazy_space_free(vacrel);
+	dead_items_cleanup(vacrel);

 	/* Update index statistics */
 	if (vacrel->nindexes > 0 && vacrel->do_index_cleanup)
@ -1678,7 +1674,7 @@ lazy_scan_heap(LVRelState *vacrel, VacuumParams *params, bool aggressive)
 * The approach we take now is to restart pruning when the race condition is
 * detected.  This allows heap_page_prune() to prune the tuples inserted by
 * the now-aborted transaction.  This is a little crude, but it guarantees
- * that any items that make it into the dead_tuples array are simple LP_DEAD
+ * that any items that make it into the dead_items array are simple LP_DEAD
 * line pointers, and that every remaining item with tuple storage is
 * considered as a candidate for freezing.
 */
@ -2025,12 +2021,11 @@ retry:
 #endif

 	/*
-	 * Now save details of the LP_DEAD items from the page in the dead_tuples
-	 * array
+	 * Now save details of the LP_DEAD items from the page in vacrel
 	 */
 	if (lpdead_items > 0)
 	{
-		LVDeadTuples *dead_tuples = vacrel->dead_tuples;
+		LVDeadItems *dead_items = vacrel->dead_items;
 		ItemPointerData tmp;

 		Assert(!prunestate->all_visible);
@ -2043,12 +2038,12 @@ retry:
 		for (int i = 0; i < lpdead_items; i++)
 		{
 			ItemPointerSetOffsetNumber(&tmp, deadoffsets[i]);
-			dead_tuples->itemptrs[dead_tuples->num_tuples++] = tmp;
+			dead_items->items[dead_items->num_items++] = tmp;
 		}

-		Assert(dead_tuples->num_tuples <= dead_tuples->max_tuples);
+		Assert(dead_items->num_items <= dead_items->max_items);
 		pgstat_progress_update_param(PROGRESS_VACUUM_NUM_DEAD_TUPLES,
-									 dead_tuples->num_tuples);
+									 dead_items->num_items);
 	}

 	/* Finally, add page-local counts to whole-VACUUM counts */
@ -2079,7 +2074,7 @@ lazy_vacuum(LVRelState *vacrel)
 	if (!vacrel->do_index_vacuuming)
 	{
 		Assert(!vacrel->do_index_cleanup);
-		vacrel->dead_tuples->num_tuples = 0;
+		vacrel->dead_items->num_items = 0;
 		return;
 	}

@ -2108,7 +2103,7 @@ lazy_vacuum(LVRelState *vacrel)
 		BlockNumber threshold;

 		Assert(vacrel->num_index_scans == 0);
-		Assert(vacrel->lpdead_items == vacrel->dead_tuples->num_tuples);
+		Assert(vacrel->lpdead_items == vacrel->dead_items->num_items);
 		Assert(vacrel->do_index_vacuuming);
 		Assert(vacrel->do_index_cleanup);

@ -2124,7 +2119,7 @@ lazy_vacuum(LVRelState *vacrel)
 		 * to store the TIDs (TIDs that now all point to LP_DEAD items) must
 		 * not exceed 32MB.  This limits the risk that we will bypass index
 		 * vacuuming again and again until eventually there is a VACUUM whose
-		 * dead_tuples space is not CPU cache resident.
+		 * dead_items space is not CPU cache resident.
 		 *
 		 * We don't take any special steps to remember the LP_DEAD items (such
 		 * as counting them in new_dead_tuples report to the stats collector)
@ -2136,7 +2131,7 @@ lazy_vacuum(LVRelState *vacrel)
 		 */
 		threshold = (double) vacrel->rel_pages * BYPASS_THRESHOLD_PAGES;
 		bypass = (vacrel->lpdead_item_pages < threshold &&
-				  vacrel->lpdead_items < MAXDEADTUPLES(32L * 1024L * 1024L));
+				  vacrel->lpdead_items < MAXDEADITEMS(32L * 1024L * 1024L));
 	}

 	if (bypass)
@ -2186,7 +2181,7 @@ lazy_vacuum(LVRelState *vacrel)
 	 * Forget the LP_DEAD items that we just vacuumed (or just decided to not
 	 * vacuum)
 	 */
-	vacrel->dead_tuples->num_tuples = 0;
+	vacrel->dead_items->num_items = 0;
 }

 /*
@ -2260,7 +2255,7 @@ lazy_vacuum_all_indexes(LVRelState *vacrel)
 	 * place).
 	 */
 	Assert(vacrel->num_index_scans > 0 ||
-		   vacrel->dead_tuples->num_tuples == vacrel->lpdead_items);
+		   vacrel->dead_items->num_items == vacrel->lpdead_items);
 	Assert(allindexes || vacrel->failsafe_active);

 	/*
@ -2279,7 +2274,7 @@ lazy_vacuum_all_indexes(LVRelState *vacrel)
 /*
 *	lazy_vacuum_heap_rel() -- second pass over the heap for two pass strategy
 *
- * This routine marks LP_DEAD items in vacrel->dead_tuples array as LP_UNUSED.
+ * This routine marks LP_DEAD items in vacrel->dead_items array as LP_UNUSED.
 * Pages that never had lazy_scan_prune record LP_DEAD items are not visited
 * at all.
 *
@ -2297,7 +2292,7 @@ lazy_vacuum_all_indexes(LVRelState *vacrel)
 static void
 lazy_vacuum_heap_rel(LVRelState *vacrel)
 {
-	int			tupindex;
+	int			index;
 	BlockNumber vacuumed_pages;
 	PGRUsage	ru0;
 	Buffer		vmbuffer = InvalidBuffer;
@ -2319,8 +2314,8 @@ lazy_vacuum_heap_rel(LVRelState *vacrel)
 	pg_rusage_init(&ru0);
 	vacuumed_pages = 0;

-	tupindex = 0;
-	while (tupindex < vacrel->dead_tuples->num_tuples)
+	index = 0;
+	while (index < vacrel->dead_items->num_items)
 	{
 		BlockNumber tblk;
 		Buffer		buf;
@ -2329,13 +2324,12 @@ lazy_vacuum_heap_rel(LVRelState *vacrel)

 		vacuum_delay_point();

-		tblk = ItemPointerGetBlockNumber(&vacrel->dead_tuples->itemptrs[tupindex]);
+		tblk = ItemPointerGetBlockNumber(&vacrel->dead_items->items[index]);
 		vacrel->blkno = tblk;
 		buf = ReadBufferExtended(vacrel->rel, MAIN_FORKNUM, tblk, RBM_NORMAL,
 								 vacrel->bstrategy);
 		LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
-		tupindex = lazy_vacuum_heap_page(vacrel, tblk, buf, tupindex,
-										 &vmbuffer);
+		index = lazy_vacuum_heap_page(vacrel, tblk, buf, index, &vmbuffer);

 		/* Now that we've vacuumed the page, record its available space */
 		page = BufferGetPage(buf);
@ -2359,14 +2353,14 @@ lazy_vacuum_heap_rel(LVRelState *vacrel)
 	 * We set all LP_DEAD items from the first heap pass to LP_UNUSED during
 	 * the second heap pass.  No more, no less.
 	 */
-	Assert(tupindex > 0);
+	Assert(index > 0);
 	Assert(vacrel->num_index_scans > 1 ||
-		   (tupindex == vacrel->lpdead_items &&
+		   (index == vacrel->lpdead_items &&
 			vacuumed_pages == vacrel->lpdead_item_pages));

 	ereport(elevel,
 			(errmsg("table \"%s\": removed %lld dead item identifiers in %u pages",
-					vacrel->relname, (long long ) tupindex, vacuumed_pages),
+					vacrel->relname, (long long) index, vacuumed_pages),
 			 errdetail_internal("%s", pg_rusage_show(&ru0))));

 	/* Revert to the previous phase information for error traceback */
@ -2375,27 +2369,27 @@ lazy_vacuum_heap_rel(LVRelState *vacrel)

 /*
 *	lazy_vacuum_heap_page() -- free page's LP_DEAD items listed in the
- *						  vacrel->dead_tuples array.
+ *						  vacrel->dead_items array.
 *
 * Caller must have an exclusive buffer lock on the buffer (though a full
 * cleanup lock is also acceptable).
 *
- * tupindex is the index in vacrel->dead_tuples of the first dead tuple for
- * this page.  We assume the rest follow sequentially.  The return value is
- * the first tupindex after the tuples of this page.
+ * index is an offset into the vacrel->dead_items array for the first listed
+ * LP_DEAD item on the page.  The return value is the first index immediately
+ * after all LP_DEAD items for the same page in the array.
 *
 * Prior to PostgreSQL 14 there were rare cases where this routine had to set
 * tuples with storage to unused.  These days it is strictly responsible for
 * marking LP_DEAD stub line pointers as unused.  This only happens for those
 * LP_DEAD items on the page that were determined to be LP_DEAD items back
 * when the same page was visited by lazy_scan_prune() (i.e. those whose TID
- * was recorded in the dead_tuples array).
+ * was recorded in the dead_items array at the time).
 */
 static int
 lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, Buffer buffer,
-					  int tupindex, Buffer *vmbuffer)
+					  int index, Buffer *vmbuffer)
 {
-	LVDeadTuples *dead_tuples = vacrel->dead_tuples;
+	LVDeadItems *dead_items = vacrel->dead_items;
 	Page		page = BufferGetPage(buffer);
 	OffsetNumber unused[MaxHeapTuplesPerPage];
 	int			uncnt = 0;
@ -2414,16 +2408,16 @@ lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, Buffer buffer,

 	START_CRIT_SECTION();

-	for (; tupindex < dead_tuples->num_tuples; tupindex++)
+	for (; index < dead_items->num_items; index++)
 	{
 		BlockNumber tblk;
 		OffsetNumber toff;
 		ItemId		itemid;

-		tblk = ItemPointerGetBlockNumber(&dead_tuples->itemptrs[tupindex]);
+		tblk = ItemPointerGetBlockNumber(&dead_items->items[index]);
 		if (tblk != blkno)
 			break;				/* past end of tuples for this block */
-		toff = ItemPointerGetOffsetNumber(&dead_tuples->itemptrs[tupindex]);
+		toff = ItemPointerGetOffsetNumber(&dead_items->items[index]);
 		itemid = PageGetItemId(page, toff);

 		Assert(ItemIdIsDead(itemid) && !ItemIdHasStorage(itemid));
@ -2503,7 +2497,7 @@ lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, Buffer buffer,

 	/* Revert to the previous phase information for error traceback */
 	restore_vacuum_error_info(vacrel, &saved_err_info);
-	return tupindex;
+	return index;
 }

 /*
@ -3000,11 +2994,13 @@ lazy_cleanup_all_indexes(LVRelState *vacrel)
 /*
 *	lazy_vacuum_one_index() -- vacuum index relation.
 *
- *		Delete all the index entries pointing to tuples listed in
- *		dead_tuples, and update running statistics.
+ *		Delete all the index tuples containing a TID collected in
+ *		vacrel->dead_items array.  Also update running statistics.
+ *		Exact details depend on index AM's ambulkdelete routine.
 *
 *		reltuples is the number of heap tuples to be passed to the
 *		bulkdelete callback.  It's always assumed to be estimated.
+ *		See indexam.sgml for more info.
 *
 * Returns bulk delete stats derived from input stats
 */
@ -3040,11 +3036,11 @@ lazy_vacuum_one_index(Relation indrel, IndexBulkDeleteResult *istat,

 	/* Do bulk deletion */
 	istat = index_bulk_delete(&ivinfo, istat, lazy_tid_reaped,
-							  (void *) vacrel->dead_tuples);
+							  (void *) vacrel->dead_items);

 	ereport(elevel,
 			(errmsg("scanned index \"%s\" to remove %d row versions",
-					vacrel->indname, vacrel->dead_tuples->num_tuples),
+					vacrel->indname, vacrel->dead_items->num_items),
 			 errdetail_internal("%s", pg_rusage_show(&ru0))));

 	/* Revert to the previous phase information for error traceback */
@ -3058,8 +3054,9 @@ lazy_vacuum_one_index(Relation indrel, IndexBulkDeleteResult *istat,
 /*
 *	lazy_cleanup_one_index() -- do post-vacuum cleanup for index relation.
 *
- *		reltuples is the number of heap tuples and estimated_count is true
- *		if reltuples is an estimated value.
+ *		Calls index AM's amvacuumcleanup routine.  reltuples is the number
+ *		of heap tuples and estimated_count is true if reltuples is an
+ *		estimated value.  See indexam.sgml for more info.
 *
 * Returns bulk delete stats derived from input stats
 */
@ -3433,45 +3430,72 @@ count_nondeletable_pages(LVRelState *vacrel, bool *lock_waiter_detected)
 }

 /*
- * Return the maximum number of dead tuples we can record.
+ * Returns the number of dead TIDs that VACUUM should allocate space to
+ * store, given a heap rel of size vacrel->rel_pages, and given current
+ * maintenance_work_mem setting (or current autovacuum_work_mem setting,
+ * when applicable).
+ *
+ * See the comments at the head of this file for rationale.
 */
-static long
-compute_max_dead_tuples(BlockNumber relblocks, bool hasindex)
+static int
+dead_items_max_items(LVRelState *vacrel)
 {
-	long		maxtuples;
+	int64		max_items;
 	int			vac_work_mem = IsAutoVacuumWorkerProcess() &&
 	autovacuum_work_mem != -1 ?
 	autovacuum_work_mem : maintenance_work_mem;

-	if (hasindex)
+	Assert(!IsParallelWorker());
+
+	if (vacrel->nindexes > 0)
 	{
-		maxtuples = MAXDEADTUPLES(vac_work_mem * 1024L);
-		maxtuples = Min(maxtuples, INT_MAX);
-		maxtuples = Min(maxtuples, MAXDEADTUPLES(MaxAllocSize));
+		BlockNumber rel_pages = vacrel->rel_pages;
+
+		max_items = MAXDEADITEMS(vac_work_mem * 1024L);
+		max_items = Min(max_items, INT_MAX);
+		max_items = Min(max_items, MAXDEADITEMS(MaxAllocSize));

 		/* curious coding here to ensure the multiplication can't overflow */
-		if ((BlockNumber) (maxtuples / MaxHeapTuplesPerPage) > relblocks)
-			maxtuples = relblocks * MaxHeapTuplesPerPage;
+		if ((BlockNumber) (max_items / MaxHeapTuplesPerPage) > rel_pages)
+			max_items = rel_pages * MaxHeapTuplesPerPage;

 		/* stay sane if small maintenance_work_mem */
-		maxtuples = Max(maxtuples, MaxHeapTuplesPerPage);
+		max_items = Max(max_items, MaxHeapTuplesPerPage);
 	}
 	else
-		maxtuples = MaxHeapTuplesPerPage;
+	{
+		/* One-pass case only stores a single heap page's TIDs at a time */
+		max_items = MaxHeapTuplesPerPage;
+	}
+
+	return (int) max_items;
+}

-	return maxtuples;
+/*
+ * Returns the total required space for VACUUM's dead_items array given a
+ * max_items value returned by dead_items_max_items
+ */
+static inline Size
+max_items_to_alloc_size(int max_items)
+{
+	Assert(max_items >= MaxHeapTuplesPerPage);
+	Assert(max_items <= MAXDEADITEMS(MaxAllocSize));
+
+	return offsetof(LVDeadItems, items) + sizeof(ItemPointerData) * max_items;
 }

 /*
- * lazy_space_alloc - space allocation decisions for lazy vacuum
+ * Allocate dead_items (either using palloc, or in dynamic shared memory).
+ * Sets dead_items in vacrel for caller.
 *
- * See the comments at the head of this file for rationale.
+ * Also handles parallel initialization as part of allocating dead_items in
+ * DSM when required.
 */
 static void
-lazy_space_alloc(LVRelState *vacrel, int nworkers, BlockNumber nblocks)
+dead_items_alloc(LVRelState *vacrel, int nworkers)
 {
-	LVDeadTuples *dead_tuples;
-	long		maxtuples;
+	LVDeadItems *dead_items;
+	int			max_items;

 	/*
 	 * Initialize state for a parallel vacuum.  As of now, only one worker can
@ -3496,30 +3520,33 @@ lazy_space_alloc(LVRelState *vacrel, int nworkers, BlockNumber nblocks)
 								vacrel->relname)));
 		}
 		else
-			vacrel->lps = begin_parallel_vacuum(vacrel, nblocks, nworkers);
+			begin_parallel_vacuum(vacrel, nworkers);

-		/* If parallel mode started, we're done */
+		/* If parallel mode started, vacrel->dead_items allocated in DSM */
 		if (ParallelVacuumIsActive(vacrel))
 			return;
 	}

-	maxtuples = compute_max_dead_tuples(nblocks, vacrel->nindexes > 0);
-
-	dead_tuples = (LVDeadTuples *) palloc(SizeOfDeadTuples(maxtuples));
-	dead_tuples->num_tuples = 0;
-	dead_tuples->max_tuples = (int) maxtuples;
+	/* Serial VACUUM case */
+	max_items = dead_items_max_items(vacrel);
+	dead_items = (LVDeadItems *) palloc(max_items_to_alloc_size(max_items));
+	dead_items->max_items = max_items;
+	dead_items->num_items = 0;

-	vacrel->dead_tuples = dead_tuples;
+	vacrel->dead_items = dead_items;
 }

 /*
- * lazy_space_free - free space allocated in lazy_space_alloc
+ * Perform cleanup for resources allocated in dead_items_alloc
 */
 static void
-lazy_space_free(LVRelState *vacrel)
+dead_items_cleanup(LVRelState *vacrel)
 {
 	if (!ParallelVacuumIsActive(vacrel))
+	{
+		/* Don't bother with pfree here */
 		return;
+	}

 	/*
 	 * End parallel mode before updating index statistics as we cannot write
@ -3533,24 +3560,24 @@ lazy_space_free(LVRelState *vacrel)
 *
 *		This has the right signature to be an IndexBulkDeleteCallback.
 *
- *		Assumes dead_tuples array is in sorted order.
+ *		Assumes dead_items array is sorted (in ascending TID order).
 */
 static bool
 lazy_tid_reaped(ItemPointer itemptr, void *state)
 {
-	LVDeadTuples *dead_tuples = (LVDeadTuples *) state;
+	LVDeadItems *dead_items = (LVDeadItems *) state;
 	int64		litem,
 				ritem,
 				item;
 	ItemPointer res;

-	litem = itemptr_encode(&dead_tuples->itemptrs[0]);
-	ritem = itemptr_encode(&dead_tuples->itemptrs[dead_tuples->num_tuples - 1]);
+	litem = itemptr_encode(&dead_items->items[0]);
+	ritem = itemptr_encode(&dead_items->items[dead_items->num_items - 1]);
 	item = itemptr_encode(itemptr);

 	/*
 	 * Doing a simple bound check before bsearch() is useful to avoid the
-	 * extra cost of bsearch(), especially if dead tuples on the heap are
+	 * extra cost of bsearch(), especially if dead items on the heap are
 	 * concentrated in a certain range.  Since this function is called for
 	 * every index tuple, it pays to be really fast.
 	 */
@ -3558,8 +3585,8 @@ lazy_tid_reaped(ItemPointer itemptr, void *state)
 		return false;

 	res = (ItemPointer) bsearch((void *) itemptr,
-								(void *) dead_tuples->itemptrs,
-								dead_tuples->num_tuples,
+								(void *) dead_items->items,
+								dead_items->num_items,
 								sizeof(ItemPointerData),
 								vac_cmp_itemptr);

@ -3820,26 +3847,28 @@ update_index_statistics(LVRelState *vacrel)
 }

 /*
- * This function prepares and returns parallel vacuum state if we can launch
- * even one worker.  This function is responsible for entering parallel mode,
- * create a parallel context, and then initialize the DSM segment.
+ * Try to enter parallel mode and create a parallel context.  Then initialize
+ * shared memory state.
+ *
+ * On success (when we can launch one or more workers), will set dead_items and
+ * lps in vacrel for caller.  A set lps in vacrel state indicates that parallel
+ * VACUUM is currently active.
 */
-static LVParallelState *
-begin_parallel_vacuum(LVRelState *vacrel, BlockNumber nblocks,
-					  int nrequested)
+static void
+begin_parallel_vacuum(LVRelState *vacrel, int nrequested)
 {
-	LVParallelState *lps = NULL;
+	LVParallelState *lps;
 	Relation   *indrels = vacrel->indrels;
 	int			nindexes = vacrel->nindexes;
 	ParallelContext *pcxt;
 	LVShared   *shared;
-	LVDeadTuples *dead_tuples;
+	LVDeadItems *dead_items;
 	BufferUsage *buffer_usage;
 	WalUsage   *wal_usage;
 	bool	   *will_parallel_vacuum;
-	long		maxtuples;
-	Size		est_shared;
-	Size		est_deadtuples;
+	int			max_items;
+	Size		est_shared_len;
+	Size		est_dead_items_len;
 	int			nindexes_mwm = 0;
 	int			parallel_workers = 0;
 	int			querylen;
@ -3858,12 +3887,11 @@ begin_parallel_vacuum(LVRelState *vacrel, BlockNumber nblocks,
 	parallel_workers = compute_parallel_vacuum_workers(vacrel,
 													   nrequested,
 													   will_parallel_vacuum);
-
-	/* Can't perform vacuum in parallel */
 	if (parallel_workers <= 0)
 	{
+		/* Can't perform vacuum in parallel -- lps not set in vacrel */
 		pfree(will_parallel_vacuum);
-		return lps;
+		return;
 	}

 	lps = (LVParallelState *) palloc0(sizeof(LVParallelState));
@ -3875,7 +3903,7 @@ begin_parallel_vacuum(LVRelState *vacrel, BlockNumber nblocks,
 	lps->pcxt = pcxt;

 	/* Estimate size for shared information -- PARALLEL_VACUUM_KEY_SHARED */
-	est_shared = MAXALIGN(add_size(SizeOfLVShared, BITMAPLEN(nindexes)));
+	est_shared_len = MAXALIGN(add_size(SizeOfLVShared, BITMAPLEN(nindexes)));
 	for (int idx = 0; idx < nindexes; idx++)
 	{
 		Relation	indrel = indrels[idx];
@ -3896,7 +3924,7 @@ begin_parallel_vacuum(LVRelState *vacrel, BlockNumber nblocks,
 		if (indrel->rd_indam->amusemaintenanceworkmem)
 			nindexes_mwm++;

-		est_shared = add_size(est_shared, sizeof(LVSharedIndStats));
+		est_shared_len = add_size(est_shared_len, sizeof(LVSharedIndStats));

 		/*
 		 * Remember the number of indexes that support parallel operation for
@ -3909,13 +3937,13 @@ begin_parallel_vacuum(LVRelState *vacrel, BlockNumber nblocks,
 		if ((vacoptions & VACUUM_OPTION_PARALLEL_COND_CLEANUP) != 0)
 			lps->nindexes_parallel_condcleanup++;
 	}
-	shm_toc_estimate_chunk(&pcxt->estimator, est_shared);
+	shm_toc_estimate_chunk(&pcxt->estimator, est_shared_len);
 	shm_toc_estimate_keys(&pcxt->estimator, 1);

-	/* Estimate size for dead tuples -- PARALLEL_VACUUM_KEY_DEAD_TUPLES */
-	maxtuples = compute_max_dead_tuples(nblocks, true);
-	est_deadtuples = MAXALIGN(SizeOfDeadTuples(maxtuples));
-	shm_toc_estimate_chunk(&pcxt->estimator, est_deadtuples);
+	/* Estimate size for dead_items -- PARALLEL_VACUUM_KEY_DEAD_ITEMS */
+	max_items = dead_items_max_items(vacrel);
+	est_dead_items_len = MAXALIGN(max_items_to_alloc_size(max_items));
+	shm_toc_estimate_chunk(&pcxt->estimator, est_dead_items_len);
 	shm_toc_estimate_keys(&pcxt->estimator, 1);

 	/*
@ -3946,8 +3974,8 @@ begin_parallel_vacuum(LVRelState *vacrel, BlockNumber nblocks,
 	InitializeParallelDSM(pcxt);

 	/* Prepare shared information */
-	shared = (LVShared *) shm_toc_allocate(pcxt->toc, est_shared);
-	MemSet(shared, 0, est_shared);
+	shared = (LVShared *) shm_toc_allocate(pcxt->toc, est_shared_len);
+	MemSet(shared, 0, est_shared_len);
 	shared->relid = RelationGetRelid(vacrel->rel);
 	shared->elevel = elevel;
 	shared->maintenance_work_mem_worker =
@ -3977,13 +4005,13 @@ begin_parallel_vacuum(LVRelState *vacrel, BlockNumber nblocks,
 	shm_toc_insert(pcxt->toc, PARALLEL_VACUUM_KEY_SHARED, shared);
 	lps->lvshared = shared;

-	/* Prepare the dead tuple space */
-	dead_tuples = (LVDeadTuples *) shm_toc_allocate(pcxt->toc, est_deadtuples);
-	dead_tuples->max_tuples = maxtuples;
-	dead_tuples->num_tuples = 0;
-	MemSet(dead_tuples->itemptrs, 0, sizeof(ItemPointerData) * maxtuples);
-	shm_toc_insert(pcxt->toc, PARALLEL_VACUUM_KEY_DEAD_TUPLES, dead_tuples);
-	vacrel->dead_tuples = dead_tuples;
+	/* Prepare the dead_items space */
+	dead_items = (LVDeadItems *) shm_toc_allocate(pcxt->toc,
+												  est_dead_items_len);
+	dead_items->max_items = max_items;
+	dead_items->num_items = 0;
+	MemSet(dead_items->items, 0, sizeof(ItemPointerData) * max_items);
+	shm_toc_insert(pcxt->toc, PARALLEL_VACUUM_KEY_DEAD_ITEMS, dead_items);

 	/*
 	 * Allocate space for each worker's BufferUsage and WalUsage; no need to
@ -4011,7 +4039,10 @@ begin_parallel_vacuum(LVRelState *vacrel, BlockNumber nblocks,
 	}

 	pfree(will_parallel_vacuum);
-	return lps;
+
+	/* Success -- set dead_items and lps in leader's vacrel state */
+	vacrel->dead_items = dead_items;
+	vacrel->lps = lps;
 }

 /*
@ -4141,7 +4172,7 @@ parallel_vacuum_main(dsm_segment *seg, shm_toc *toc)
 	Relation	rel;
 	Relation   *indrels;
 	LVShared   *lvshared;
-	LVDeadTuples *dead_tuples;
+	LVDeadItems *dead_items;
 	BufferUsage *buffer_usage;
 	WalUsage   *wal_usage;
 	int			nindexes;
@ -4183,9 +4214,9 @@ parallel_vacuum_main(dsm_segment *seg, shm_toc *toc)
 	vac_open_indexes(rel, RowExclusiveLock, &nindexes, &indrels);
 	Assert(nindexes > 0);

-	/* Set dead tuple space */
-	dead_tuples = (LVDeadTuples *) shm_toc_lookup(toc,
-												  PARALLEL_VACUUM_KEY_DEAD_TUPLES,
+	/* Set dead_items space (set as worker's vacrel dead_items below) */
+	dead_items = (LVDeadItems *) shm_toc_lookup(toc,
+												PARALLEL_VACUUM_KEY_DEAD_ITEMS,
 												false);

 	/* Set cost-based vacuum delay */
@ -4216,7 +4247,7 @@ parallel_vacuum_main(dsm_segment *seg, shm_toc *toc)
 	vacrel.relname = pstrdup(RelationGetRelationName(rel));
 	vacrel.indname = NULL;
 	vacrel.phase = VACUUM_ERRCB_PHASE_UNKNOWN;	/* Not yet processing */
-	vacrel.dead_tuples = dead_tuples;
+	vacrel.dead_items = dead_items;

 	/* Setup error traceback support for ereport() */
 	errcallback.callback = vacuum_error_callback;