Rearrange lazy_scan_heap to avoid visibility map race conditions.

We must set the visibility map bit before releasing our exclusive lock
on the heap page; otherwise, someone might clear the heap page bit
before we set the visibility map bit, leading to a situation where the
visibility map thinks the page is all-visible but it's really not.

This problem has existed since 8.4, but it wasn't critical before we
had index-only scans, since the worst case scenario was that the page
wouldn't get vacuumed until the next scan_all vacuum.

Along the way, a couple of minor, related improvements: (1) if we
pause the heap scan to do an index vac cycle, release any visibility
map page we're holding, since really long-running pins are not good
for a variety of reasons; and (2) warn if we see a page that's marked
all-visible in the visibility map but not on the page level, since
that should never happen any more (it was allowed in previous
releases, but not in 9.2).
pull/3/head
Robert Haas 14 years ago
parent 85efd5f065
commit 7ab9b2f3b7
  1. 85
      src/backend/commands/vacuumlazy.c

@ -490,6 +490,18 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
if ((vacrelstats->max_dead_tuples - vacrelstats->num_dead_tuples) < MaxHeapTuplesPerPage &&
vacrelstats->num_dead_tuples > 0)
{
/*
* Before beginning index vacuuming, we release any pin we may hold
* on the visibility map page. This isn't necessary for correctness,
* but we do it anyway to avoid holding the pin across a lengthy,
* unrelated operation.
*/
if (BufferIsValid(vmbuffer))
{
ReleaseBuffer(vmbuffer);
vmbuffer = InvalidBuffer;
}
/* Log cleanup info before we touch indexes */
vacuum_log_cleanup_info(onerel, vacrelstats);
@ -510,6 +522,16 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
vacrelstats->num_index_scans++;
}
/*
* Pin the visibility map page in case we need to mark the page
* all-visible. In most cases this will be very cheap, because we'll
* already have the correct page pinned anyway. However, it's possible
* that (a) next_not_all_visible_block is covered by a different VM page
* than the current block or (b) we released our pin and did a cycle of
* index vacuuming.
*/
visibilitymap_pin(onerel, blkno, &vmbuffer);
buf = ReadBufferExtended(onerel, MAIN_FORKNUM, blkno,
RBM_NORMAL, vac_strategy);
@ -600,26 +622,15 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
empty_pages++;
freespace = PageGetHeapFreeSpace(page);
/* empty pages are always all-visible */
if (!PageIsAllVisible(page))
{
PageSetAllVisible(page);
MarkBufferDirty(buf);
visibilitymap_set(onerel, blkno, InvalidXLogRecPtr, vmbuffer);
}
LockBuffer(buf, BUFFER_LOCK_UNLOCK);
/* Update the visibility map */
if (!all_visible_according_to_vm)
{
visibilitymap_pin(onerel, blkno, &vmbuffer);
LockBuffer(buf, BUFFER_LOCK_SHARE);
if (PageIsAllVisible(page))
visibilitymap_set(onerel, blkno, InvalidXLogRecPtr,
vmbuffer);
LockBuffer(buf, BUFFER_LOCK_UNLOCK);
}
ReleaseBuffer(buf);
UnlockReleaseBuffer(buf);
RecordPageWithFreeSpace(onerel, blkno, freespace);
continue;
}
@ -834,11 +845,26 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
freespace = PageGetHeapFreeSpace(page);
/* Update the all-visible flag on the page */
if (!PageIsAllVisible(page) && all_visible)
/* mark page all-visible, if appropriate */
if (all_visible && !all_visible_according_to_vm)
{
PageSetAllVisible(page);
MarkBufferDirty(buf);
if (!PageIsAllVisible(page))
{
PageSetAllVisible(page);
MarkBufferDirty(buf);
}
visibilitymap_set(onerel, blkno, InvalidXLogRecPtr, vmbuffer);
}
/*
* As of PostgreSQL 9.2, the visibility map bit should never be set if
* the page-level bit is clear.
*/
else if (all_visible_according_to_vm && !PageIsAllVisible(page))
{
elog(WARNING, "page is not marked all-visible but visibility map bit is set in relation \"%s\" page %u",
relname, blkno);
visibilitymap_clear(onerel, blkno, vmbuffer);
}
/*
@ -859,30 +885,11 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
elog(WARNING, "page containing dead tuples is marked as all-visible in relation \"%s\" page %u",
relname, blkno);
PageClearAllVisible(page);
SetBufferCommitInfoNeedsSave(buf);
/*
* Normally, we would drop the lock on the heap page before
* updating the visibility map, but since this case shouldn't
* happen anyway, don't worry about that.
*/
visibilitymap_pin(onerel, blkno, &vmbuffer);
MarkBufferDirty(buf);
visibilitymap_clear(onerel, blkno, vmbuffer);
}
LockBuffer(buf, BUFFER_LOCK_UNLOCK);
/* Update the visibility map */
if (!all_visible_according_to_vm && all_visible)
{
visibilitymap_pin(onerel, blkno, &vmbuffer);
LockBuffer(buf, BUFFER_LOCK_SHARE);
if (PageIsAllVisible(page))
visibilitymap_set(onerel, blkno, InvalidXLogRecPtr, vmbuffer);
LockBuffer(buf, BUFFER_LOCK_UNLOCK);
}
ReleaseBuffer(buf);
UnlockReleaseBuffer(buf);
/* Remember the location of the last page with nonremovable tuples */
if (hastup)

Loading…
Cancel
Save