Fix a deadlock during ALTER SUBSCRIPTION ... DROP PUBLICATION.

A deadlock can occur when the DDL command and the apply worker acquire
catalog locks in different orders while dropping replication origins.

The issue is rare in PG16 and higher branches because, in most cases, the
tablesync worker performs the origin drop in those branches, and its
locking sequence does not conflict with DDL operations.

This patch ensures consistent lock acquisition to prevent such deadlocks.

As per buildfarm.

Reported-by: Alexander Lakhin <exclusion@gmail.com>
Author: Ajin Cherian <itsajin@gmail.com>
Reviewed-by: Hayato Kuroda <kuroda.hayato@fujitsu.com>
Reviewed-by: vignesh C <vignesh21@gmail.com>
Reviewed-by: Amit Kapila <amit.kapila16@gmail.com>
Backpatch-through: 14, where it was introduced
Discussion: https://postgr.es/m/bab95e12-6cc5-4ebb-80a8-3e41956aa297@gmail.com
REL_14_STABLE
Amit Kapila 2 months ago
parent 7ee7c1cd38
commit 41fb3f51cb
  1. 31
      src/backend/catalog/pg_subscription.c
  2. 25
      src/backend/replication/logical/tablesync.c
  3. 2
      src/include/catalog/pg_subscription_rel.h

@ -281,8 +281,8 @@ AddSubscriptionRelState(Oid subid, Oid relid, char state,
* Update the state of a subscription table. * Update the state of a subscription table.
*/ */
void void
UpdateSubscriptionRelState(Oid subid, Oid relid, char state, UpdateSubscriptionRelStateEx(Oid subid, Oid relid, char state,
XLogRecPtr sublsn) XLogRecPtr sublsn, bool already_locked)
{ {
Relation rel; Relation rel;
HeapTuple tup; HeapTuple tup;
@ -290,9 +290,24 @@ UpdateSubscriptionRelState(Oid subid, Oid relid, char state,
Datum values[Natts_pg_subscription_rel]; Datum values[Natts_pg_subscription_rel];
bool replaces[Natts_pg_subscription_rel]; bool replaces[Natts_pg_subscription_rel];
LockSharedObject(SubscriptionRelationId, subid, 0, AccessShareLock); if (already_locked)
{
#ifdef USE_ASSERT_CHECKING
LOCKTAG tag;
Assert(CheckRelationOidLockedByMe(SubscriptionRelRelationId,
RowExclusiveLock, true));
SET_LOCKTAG_OBJECT(tag, InvalidOid, SubscriptionRelationId, subid, 0);
Assert(LockHeldByMe(&tag, AccessShareLock));
#endif
rel = table_open(SubscriptionRelRelationId, NoLock);
}
else
{
LockSharedObject(SubscriptionRelationId, subid, 0, AccessShareLock);
rel = table_open(SubscriptionRelRelationId, RowExclusiveLock); rel = table_open(SubscriptionRelRelationId, RowExclusiveLock);
}
/* Try finding existing mapping. */ /* Try finding existing mapping. */
tup = SearchSysCacheCopy2(SUBSCRIPTIONRELMAP, tup = SearchSysCacheCopy2(SUBSCRIPTIONRELMAP,
@ -326,6 +341,16 @@ UpdateSubscriptionRelState(Oid subid, Oid relid, char state,
table_close(rel, NoLock); table_close(rel, NoLock);
} }
/*
* Update the state of a subscription table.
*/
void
UpdateSubscriptionRelState(Oid subid, Oid relid, char state,
XLogRecPtr sublsn)
{
UpdateSubscriptionRelStateEx(subid, relid, state, sublsn, false);
}
/* /*
* Get state of subscription table. * Get state of subscription table.
* *

@ -366,6 +366,7 @@ process_syncing_tables_for_apply(XLogRecPtr current_lsn)
static HTAB *last_start_times = NULL; static HTAB *last_start_times = NULL;
ListCell *lc; ListCell *lc;
bool started_tx = false; bool started_tx = false;
Relation rel = NULL;
Assert(!IsTransactionState()); Assert(!IsTransactionState());
@ -463,7 +464,16 @@ process_syncing_tables_for_apply(XLogRecPtr current_lsn)
* refresh for the subscription where we remove the table * refresh for the subscription where we remove the table
* state and its origin and by this time the origin might be * state and its origin and by this time the origin might be
* already removed. So passing missing_ok = true. * already removed. So passing missing_ok = true.
*
* Lock the subscription and origin in the same order as we
* are doing during DDL commands to avoid deadlocks. See
* AlterSubscription_refresh.
*/ */
LockSharedObject(SubscriptionRelationId, MyLogicalRepWorker->subid,
0, AccessShareLock);
if (!rel)
rel = table_open(SubscriptionRelRelationId, RowExclusiveLock);
ReplicationOriginNameForTablesync(MyLogicalRepWorker->subid, ReplicationOriginNameForTablesync(MyLogicalRepWorker->subid,
rstate->relid, rstate->relid,
originname, originname,
@ -473,9 +483,9 @@ process_syncing_tables_for_apply(XLogRecPtr current_lsn)
/* /*
* Update the state to READY only after the origin cleanup. * Update the state to READY only after the origin cleanup.
*/ */
UpdateSubscriptionRelState(MyLogicalRepWorker->subid, UpdateSubscriptionRelStateEx(MyLogicalRepWorker->subid,
rstate->relid, rstate->state, rstate->relid, rstate->state,
rstate->lsn); rstate->lsn, true);
} }
} }
else else
@ -526,7 +536,14 @@ process_syncing_tables_for_apply(XLogRecPtr current_lsn)
* This is required to avoid any undetected deadlocks * This is required to avoid any undetected deadlocks
* due to any existing lock as deadlock detector won't * due to any existing lock as deadlock detector won't
* be able to detect the waits on the latch. * be able to detect the waits on the latch.
*
* Also close any tables prior to the commit.
*/ */
if (rel)
{
table_close(rel, NoLock);
rel = NULL;
}
CommitTransactionCommand(); CommitTransactionCommand();
pgstat_report_stat(false); pgstat_report_stat(false);
} }
@ -586,6 +603,10 @@ process_syncing_tables_for_apply(XLogRecPtr current_lsn)
} }
} }
/* Close table if opened */
if (rel)
table_close(rel, NoLock);
if (started_tx) if (started_tx)
{ {
CommitTransactionCommand(); CommitTransactionCommand();

@ -85,6 +85,8 @@ extern void AddSubscriptionRelState(Oid subid, Oid relid, char state,
XLogRecPtr sublsn); XLogRecPtr sublsn);
extern void UpdateSubscriptionRelState(Oid subid, Oid relid, char state, extern void UpdateSubscriptionRelState(Oid subid, Oid relid, char state,
XLogRecPtr sublsn); XLogRecPtr sublsn);
extern void UpdateSubscriptionRelStateEx(Oid subid, Oid relid, char state,
XLogRecPtr sublsn, bool already_locked);
extern char GetSubscriptionRelState(Oid subid, Oid relid, XLogRecPtr *sublsn); extern char GetSubscriptionRelState(Oid subid, Oid relid, XLogRecPtr *sublsn);
extern void RemoveSubscriptionRel(Oid subid, Oid relid); extern void RemoveSubscriptionRel(Oid subid, Oid relid);

Loading…
Cancel
Save