Restructure subtransaction handling to reduce resource consumption,

as per recent discussions. Invent SubTransactionIds that are managed like CommandIds (ie, counter is reset at start of each top transaction), and use these instead of TransactionIds to keep track of subtransaction status in those modules that need it. This means that a subtransaction does not need an XID unless it actually inserts/modifies rows in the database. Accordingly, don't assign it an XID nor take a lock on the XID until it tries to do that. This saves a lot of overhead for subtransactions that are only used for error recovery (eg plpgsql exceptions). Also, arrange to release a subtransaction's XID lock as soon as the subtransaction exits, in both the commit and abort cases. This avoids holding many unique locks after a long series of subtransactions. The price is some additional overhead in XactLockTableWait, but that seems acceptable. Finally, restructure the state machine in xact.c to have a more orthogonal set of states for subtransactions.
21 years ago · 8f9f198603
parent 42c0d1f3cd
commit 8f9f198603
34 changed files with 1192 additions and 917 deletions
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.174 2004/09/11 18:28:32 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.175 2004/09/16 16:58:25 tgl Exp $
 *
 *
 * INTERFACE ROUTINES
@ -1108,6 +1108,7 @@ heap_get_latest_tid(Relation relation,
 Oid
 heap_insert(Relation relation, HeapTuple tup, CommandId cid)
 {
+	TransactionId xid = GetCurrentTransactionId();
 	Buffer		buffer;

 	if (relation->rd_rel->relhasoids)
@ -1139,7 +1140,7 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid)

 	tup->t_data->t_infomask &= ~(HEAP_XACT_MASK);
 	tup->t_data->t_infomask |= HEAP_XMAX_INVALID;
-	HeapTupleHeaderSetXmin(tup->t_data, GetCurrentTransactionId());
+	HeapTupleHeaderSetXmin(tup->t_data, xid);
 	HeapTupleHeaderSetCmin(tup->t_data, cid);
 	HeapTupleHeaderSetCmax(tup->t_data, 0);		/* zero out Datum fields */
 	tup->t_tableOid = relation->rd_id;
@ -1277,6 +1278,7 @@ heap_delete(Relation relation, ItemPointer tid,
 			ItemPointer ctid, CommandId cid,
 			Snapshot crosscheck, bool wait)
 {
+	TransactionId xid = GetCurrentTransactionId();
 	ItemId		lp;
 	HeapTupleData tp;
 	PageHeader	dp;
@ -1365,7 +1367,7 @@ l1:
 							   HEAP_XMAX_INVALID |
 							   HEAP_MARKED_FOR_UPDATE |
 							   HEAP_MOVED);
-	HeapTupleHeaderSetXmax(tp.t_data, GetCurrentTransactionId());
+	HeapTupleHeaderSetXmax(tp.t_data, xid);
 	HeapTupleHeaderSetCmax(tp.t_data, cid);
 	/* Make sure there is no forward chain link in t_ctid */
 	tp.t_data->t_ctid = tp.t_self;
@ -1495,6 +1497,7 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
 			ItemPointer ctid, CommandId cid,
 			Snapshot crosscheck, bool wait)
 {
+	TransactionId xid = GetCurrentTransactionId();
 	ItemId		lp;
 	HeapTupleData oldtup;
 	PageHeader	dp;
@ -1603,7 +1606,7 @@ l2:

 	newtup->t_data->t_infomask &= ~(HEAP_XACT_MASK);
 	newtup->t_data->t_infomask |= (HEAP_XMAX_INVALID | HEAP_UPDATED);
-	HeapTupleHeaderSetXmin(newtup->t_data, GetCurrentTransactionId());
+	HeapTupleHeaderSetXmin(newtup->t_data, xid);
 	HeapTupleHeaderSetCmin(newtup->t_data, cid);
 	HeapTupleHeaderSetCmax(newtup->t_data, 0);	/* zero out Datum fields */

@ -1644,7 +1647,7 @@ l2:
 									   HEAP_MARKED_FOR_UPDATE |
 									   HEAP_MOVED);
 		oldtup.t_data->t_infomask |= HEAP_XMAX_UNLOGGED;
-		HeapTupleHeaderSetXmax(oldtup.t_data, GetCurrentTransactionId());
+		HeapTupleHeaderSetXmax(oldtup.t_data, xid);
 		HeapTupleHeaderSetCmax(oldtup.t_data, cid);
 		already_marked = true;
 		LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
@ -1735,7 +1738,7 @@ l2:
 									   HEAP_XMAX_INVALID |
 									   HEAP_MARKED_FOR_UPDATE |
 									   HEAP_MOVED);
-		HeapTupleHeaderSetXmax(oldtup.t_data, GetCurrentTransactionId());
+		HeapTupleHeaderSetXmax(oldtup.t_data, xid);
 		HeapTupleHeaderSetCmax(oldtup.t_data, cid);
 	}

@ -1836,6 +1839,7 @@ int
 heap_mark4update(Relation relation, HeapTuple tuple, Buffer *buffer,
 				 CommandId cid)
 {
+	TransactionId xid = GetCurrentTransactionId();
 	ItemPointer tid = &(tuple->t_self);
 	ItemId		lp;
 	PageHeader	dp;
@ -1912,7 +1916,7 @@ l3:
 								   HEAP_XMAX_INVALID |
 								   HEAP_MOVED);
 	tuple->t_data->t_infomask |= HEAP_MARKED_FOR_UPDATE;
-	HeapTupleHeaderSetXmax(tuple->t_data, GetCurrentTransactionId());
+	HeapTupleHeaderSetXmax(tuple->t_data, xid);
 	HeapTupleHeaderSetCmax(tuple->t_data, cid);
 	/* Make sure there is no forward chain link in t_ctid */
 	tuple->t_data->t_ctid = *tid;
@ -2584,6 +2588,7 @@ newsame:;
 static void
 _heap_unlock_tuple(void *data)
 {
+	TransactionId xid = GetCurrentTransactionId();
 	xl_heaptid *xltid = (xl_heaptid *) data;
 	Relation	reln = XLogOpenRelation(false, RM_HEAP_ID, xltid->node);
 	Buffer		buffer;
@ -2614,13 +2619,12 @@ _heap_unlock_tuple(void *data)

 	htup = (HeapTupleHeader) PageGetItem(page, lp);

-	if (!TransactionIdEquals(HeapTupleHeaderGetXmax(htup), GetCurrentTransactionId()))
+	if (!TransactionIdEquals(HeapTupleHeaderGetXmax(htup), xid))
 		elog(PANIC, "_heap_unlock_tuple: invalid xmax in rollback");
 	htup->t_infomask &= ~HEAP_XMAX_UNLOGGED;
 	htup->t_infomask |= HEAP_XMAX_INVALID;
 	LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
 	WriteBuffer(buffer);
-	return;
 }

 void
--- a/src/backend/access/transam/README
+++ b/src/backend/access/transam/README
@ -1,4 +1,4 @@
-$PostgreSQL: pgsql/src/backend/access/transam/README,v 1.1 2004/08/01 20:57:59 tgl Exp $
+$PostgreSQL: pgsql/src/backend/access/transam/README,v 1.2 2004/09/16 16:58:26 tgl Exp $

 The Transaction System
 ----------------------
@ -9,7 +9,7 @@ the mainloop's control code, which in turn implements user-visible
 transactions and savepoints.

 The middle layer of code is called by postgres.c before and after the
-processing of each query:
+processing of each query, or after detecting an error:

 		StartTransactionCommand
 		CommitTransactionCommand
@ -44,9 +44,9 @@ effects of previous commands within the same transaction.  Note that this is
 done automatically by CommitTransactionCommand after each query inside a
 transaction block, but some utility functions also do it internally to allow
 some operations (usually in the system catalogs) to be seen by future
-operations in the same utility command (for example, in DefineRelation it is
+operations in the same utility command.  (For example, in DefineRelation it is
 done after creating the heap so the pg_class row is visible, to be able to
-lock it).
+lock it.)


 For example, consider the following sequence of user commands:
@ -60,26 +60,26 @@ In the main processing loop, this results in the following function call
 sequence:

 	 /	StartTransactionCommand;
-	/	ProcessUtility;				<< BEGIN
-1) <			BeginTransactionBlock;
-	\	CommitTransactionCommand;
-	 \		StartTransaction;
+	/		StartTransaction;
+1) <		ProcessUtility;				<< BEGIN
+	\		BeginTransactionBlock;
+	 \	CommitTransactionCommand;

 	/	StartTransactionCommand;
-2) /		ProcessQuery;				<< SELECT * FROM foo
+2) /		ProcessQuery;				<< SELECT ...
   \		CommitTransactionCommand;
 	\		CommandCounterIncrement;

 	/	StartTransactionCommand;
-3) /		ProcessQuery;				<< INSERT INTO foo VALUES (...)
+3) /		ProcessQuery;				<< INSERT ...
   \		CommitTransactionCommand;
 	\		CommandCounterIncrement;

 	 /	StartTransactionCommand;
 	/	ProcessUtility;				<< COMMIT
 4) <			EndTransactionBlock;
-	\			CommitTransaction;
-	 \	CommitTransactionCommand;
+	\	CommitTransactionCommand;
+	 \		CommitTransaction;

 The point of this example is to demonstrate the need for
 StartTransactionCommand and CommitTransactionCommand to be state smart -- they
@ -118,7 +118,7 @@ to do all the real work.  The only difference is what state we enter after
 AbortTransaction does its work:

 * AbortCurrentTransaction leaves us in TBLOCK_ABORT,
-* UserAbortTransactionBlock leaves us in TBLOCK_ENDABORT
+* UserAbortTransactionBlock leaves us in TBLOCK_ABORT_END

 Low-level transaction abort handling is divided in two phases:
 * AbortTransaction executes as soon as we realize the transaction has
@ -126,7 +126,7 @@ Low-level transaction abort handling is divided in two phases:
  not delay other backends unnecessarily.
 * CleanupTransaction executes when we finally see a user COMMIT
  or ROLLBACK command; it cleans things up and gets us out of the transaction
-  internally.  In particular, we mustn't destroy TopTransactionContext until
+  completely.  In particular, we mustn't destroy TopTransactionContext until
  this point.

 Also, note that when a transaction is committed, we don't close it right away.
@ -163,28 +163,48 @@ called so the system returns to the parent transaction.
 One important point regarding subtransaction handling is that several may need
 to be closed in response to a single user command.  That's because savepoints
 have names, and we allow to commit or rollback a savepoint by name, which is
-not necessarily the one that was last opened.  In the case of subtransaction
-commit this is not a problem, and we close all the involved subtransactions
-right away by calling CommitTransactionToLevel, which in turn calls
-CommitSubTransaction and PopTransaction as many times as needed.
-
-In the case of subtransaction abort (when the user issues ROLLBACK TO
-<savepoint>), things are not so easy.  We have to keep the subtransactions
-open and return control to the main loop.  So what RollbackToSavepoint does is
-abort the innermost subtransaction and put it in TBLOCK_SUBENDABORT state, and
-put the rest in TBLOCK_SUBABORT_PENDING state.  Then we return control to the
-main loop, which will in turn return control to us by calling
-CommitTransactionCommand.  At this point we can close all subtransactions that
-are marked with the "abort pending" state.  When that's done, the outermost
-subtransaction is created again, to conform to SQL's definition of ROLLBACK TO.
+not necessarily the one that was last opened.  Also a COMMIT or ROLLBACK
+command must be able to close out the entire stack.  We handle this by having
+the utility command subroutine mark all the state stack entries as commit-
+pending or abort-pending, and then when the main loop reaches
+CommitTransactionCommand, the real work is done.  The main point of doing
+things this way is that if we get an error while popping state stack entries,
+the remaining stack entries still show what we need to do to finish up.
+
+In the case of ROLLBACK TO <savepoint>, we abort all the subtransactions up
+through the one identified by the savepoint name, and then re-create that
+subtransaction level with the same name.  So it's a completely new
+subtransaction as far as the internals are concerned.

 Other subsystems are allowed to start "internal" subtransactions, which are
 handled by BeginInternalSubtransaction.  This is to allow implementing
 exception handling, e.g. in PL/pgSQL.  ReleaseCurrentSubTransaction and
 RollbackAndReleaseCurrentSubTransaction allows the subsystem to close said
 subtransactions.  The main difference between this and the savepoint/release
-path is that BeginInternalSubtransaction is allowed when no explicit
-transaction block has been established, while DefineSavepoint is not.
+path is that we execute the complete state transition immediately in each
+subroutine, rather than deferring some work until CommitTransactionCommand.
+Another difference is that BeginInternalSubtransaction is allowed when no
+explicit transaction block has been established, while DefineSavepoint is not.
+
+
+Subtransaction numbering
+------------------------
+
+A top-level transaction is always given a TransactionId (XID) as soon as it is
+created.  This is necessary for a number of reasons, notably XMIN bookkeeping
+for VACUUM.  However, a subtransaction doesn't need its own XID unless it
+(or one of its child subxacts) writes tuples into the database.  Therefore,
+we postpone assigning XIDs to subxacts until and unless they call
+GetCurrentTransactionId.  The subsidiary actions of obtaining a lock on the
+XID and and entering it into pg_subtrans and PG_PROC are done at the same time.
+
+Internally, a backend needs a way to identify subtransactions whether or not
+they have XIDs; but this need only lasts as long as the parent top transaction
+endures.  Therefore, we have SubTransactionId, which is somewhat like
+CommandId in that it's generated from a counter that we reset at the start of
+each top transaction.  The top-level transaction itself has SubTransactionId 1,
+and subtransactions have IDs 2 and up.  (Zero is reserved for
+InvalidSubTransactionId.)


 pg_clog and pg_subtrans
@ -197,27 +217,28 @@ there's a long running transaction or a backend sitting idle with an open
 transaction, it may be necessary to be able to read and write this information
 from disk.  They also allow information to be permanent across server restarts.

-pg_clog records the commit status for each transaction.  A transaction can be
-in progress, committed, aborted, or "sub-committed".  This last state means
-that it's a subtransaction that's no longer running, but its parent has not
-updated its state yet (either it is still running, or the backend crashed
-without updating its status).  A sub-committed transaction's status will be
-updated again to the final value as soon as the parent commits or aborts, or
-when the parent is detected to be aborted.
+pg_clog records the commit status for each transaction that has been assigned
+an XID.  A transaction can be in progress, committed, aborted, or
+"sub-committed".  This last state means that it's a subtransaction that's no
+longer running, but its parent has not updated its state yet (either it is
+still running, or the backend crashed without updating its status).  A
+sub-committed transaction's status will be updated again to the final value as
+soon as the parent commits or aborts, or when the parent is detected to be
+aborted.

 Savepoints are implemented using subtransactions.  A subtransaction is a
-transaction inside a transaction; it gets its own TransactionId, but its
-commit or abort status is not only dependent on whether it committed itself,
-but also whether its parent transaction committed.  To implement multiple
-savepoints in a transaction we allow unlimited transaction nesting depth, so
-any particular subtransaction's commit state is dependent on the commit status
-of each and every ancestor transaction.
+transaction inside a transaction; its commit or abort status is not only
+dependent on whether it committed itself, but also whether its parent
+transaction committed.  To implement multiple savepoints in a transaction we
+allow unlimited transaction nesting depth, so any particular subtransaction's
+commit state is dependent on the commit status of each and every ancestor
+transaction.

 The "subtransaction parent" (pg_subtrans) mechanism records, for each
-transaction, the TransactionId of its parent transaction.  This information is
-stored as soon as the subtransaction is created.  Top-level transactions do
-not have a parent, so they leave their pg_subtrans entries set to the default
-value of zero (InvalidTransactionId).
+transaction with an XID, the TransactionId of its parent transaction.  This
+information is stored as soon as the subtransaction is assigned an XID.
+Top-level transactions do not have a parent, so they leave their pg_subtrans
+entries set to the default value of zero (InvalidTransactionId).

 pg_subtrans is used to check whether the transaction in question is still
 running --- the main Xid of a transaction is recorded in the PGPROC struct,
--- a/src/backend/access/transam/xact.c
+++ b/src/backend/access/transam/xact.c
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@ -7,7 +7,7 @@
 * Portions Copyright (c) 1996-2004, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.169 2004/09/06 03:04:27 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.170 2004/09/16 16:58:26 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -747,7 +747,7 @@ begin:;
 	/* Insert record header */

 	record->xl_prev = Insert->PrevRecord;
-	record->xl_xid = GetCurrentTransactionId();
+	record->xl_xid = GetCurrentTransactionIdIfAny();
 	record->xl_len = len;		/* doesn't include backup blocks */
 	record->xl_info = info;
 	record->xl_rmid = rmid;
--- a/src/backend/catalog/namespace.c
+++ b/src/backend/catalog/namespace.c
@ -13,7 +13,7 @@
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/catalog/namespace.c,v 1.70 2004/08/29 05:06:41 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/catalog/namespace.c,v 1.71 2004/09/16 16:58:27 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -108,16 +108,16 @@ static bool namespaceSearchPathValid = true;
 * in a particular backend session (this happens when a CREATE TEMP TABLE
 * command is first executed).	Thereafter it's the OID of the temp namespace.
 *
- * myTempNamespaceXID shows whether we've created the TEMP namespace in the
- * current transaction.  The TransactionId propagates up the transaction tree,
+ * myTempNamespaceSubID shows whether we've created the TEMP namespace in the
+ * current subtransaction.  The flag propagates up the subtransaction tree,
 * so the main transaction will correctly recognize the flag if all
- * intermediate subtransactions commit.  When it is InvalidTransactionId,
+ * intermediate subtransactions commit.  When it is InvalidSubTransactionId,
 * we either haven't made the TEMP namespace yet, or have successfully
 * committed its creation, depending on whether myTempNamespace is valid.
 */
 static Oid	myTempNamespace = InvalidOid;

-static TransactionId myTempNamespaceXID = InvalidTransactionId;
+static SubTransactionId myTempNamespaceSubID = InvalidSubTransactionId;

 /*
 * "Special" namespace for CREATE SCHEMA.  If set, it's the first search
@ -1696,8 +1696,8 @@ InitTempTableNamespace(void)
 	myTempNamespace = namespaceId;

 	/* It should not be done already. */
-	AssertState(myTempNamespaceXID == InvalidTransactionId);
-	myTempNamespaceXID = GetCurrentTransactionId();
+	AssertState(myTempNamespaceSubID == InvalidSubTransactionId);
+	myTempNamespaceSubID = GetCurrentSubTransactionId();

 	namespaceSearchPathValid = false;	/* need to rebuild list */
 }
@ -1716,7 +1716,7 @@ AtEOXact_Namespace(bool isCommit)
 	 * temp tables at backend shutdown.  (We only want to register the
 	 * callback once per session, so this is a good place to do it.)
 	 */
-	if (myTempNamespaceXID == GetCurrentTransactionId())
+	if (myTempNamespaceSubID != InvalidSubTransactionId)
 	{
 		if (isCommit)
 			on_shmem_exit(RemoveTempRelationsCallback, 0);
@ -1725,7 +1725,7 @@ AtEOXact_Namespace(bool isCommit)
 			myTempNamespace = InvalidOid;
 			namespaceSearchPathValid = false;	/* need to rebuild list */
 		}
-		myTempNamespaceXID = InvalidTransactionId;
+		myTempNamespaceSubID = InvalidSubTransactionId;
 	}

 	/*
@ -1742,21 +1742,21 @@ AtEOXact_Namespace(bool isCommit)
 * AtEOSubXact_Namespace
 *
 * At subtransaction commit, propagate the temp-namespace-creation
- * flag to the parent transaction.
+ * flag to the parent subtransaction.
 *
 * At subtransaction abort, forget the flag if we set it up.
 */
 void
-AtEOSubXact_Namespace(bool isCommit, TransactionId myXid,
-					  TransactionId parentXid)
+AtEOSubXact_Namespace(bool isCommit, SubTransactionId mySubid,
+					  SubTransactionId parentSubid)
 {
-	if (myTempNamespaceXID == myXid)
+	if (myTempNamespaceSubID == mySubid)
 	{
 		if (isCommit)
-			myTempNamespaceXID = parentXid;
+			myTempNamespaceSubID = parentSubid;
 		else
 		{
-			myTempNamespaceXID = InvalidTransactionId;
+			myTempNamespaceSubID = InvalidSubTransactionId;
 			/* TEMP namespace creation failed, so reset state */
 			myTempNamespace = InvalidOid;
 			namespaceSearchPathValid = false;	/* need to rebuild list */
--- a/src/backend/commands/portalcmds.c
+++ b/src/backend/commands/portalcmds.c
@ -14,7 +14,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/commands/portalcmds.c,v 1.35 2004/09/13 20:06:29 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/commands/portalcmds.c,v 1.36 2004/09/16 16:58:28 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -309,7 +309,7 @@ PersistHoldablePortal(Portal portal)
 	 * If we're preserving a holdable portal, we had better be inside the
 	 * transaction that originally created it.
 	 */
-	Assert(portal->createXact == GetCurrentTransactionId());
+	Assert(portal->createSubid != InvalidSubTransactionId);
 	Assert(queryDesc != NULL);

 	/*
--- a/src/backend/commands/sequence.c
+++ b/src/backend/commands/sequence.c
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/commands/sequence.c,v 1.116 2004/08/29 05:06:41 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/commands/sequence.c,v 1.117 2004/09/16 16:58:28 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -23,6 +23,8 @@
 #include "miscadmin.h"
 #include "utils/acl.h"
 #include "utils/builtins.h"
+#include "utils/resowner.h"
+

 /*
 * We don't want to log each fetching of a value from a sequence,
@ -754,25 +756,14 @@ static void
 init_sequence(RangeVar *relation, SeqTable *p_elm, Relation *p_rel)
 {
 	Oid			relid = RangeVarGetRelid(relation, false);
-	TransactionId thisxid = GetCurrentTransactionId();
-	SeqTable	elm;
+	TransactionId thisxid = GetTopTransactionId();
+	volatile SeqTable elm;
 	Relation	seqrel;

-	/* Look to see if we already have a seqtable entry for relation */
-	for (elm = seqtab; elm != NULL; elm = elm->next)
-	{
-		if (elm->relid == relid)
-			break;
-	}
-
 	/*
-	 * Open the sequence relation, acquiring AccessShareLock if we don't
-	 * already have a lock in the current xact.
+	 * Open the sequence relation.
 	 */
-	if (elm == NULL || elm->xid != thisxid)
-		seqrel = relation_open(relid, AccessShareLock);
-	else
-		seqrel = relation_open(relid, NoLock);
+	seqrel = relation_open(relid, NoLock);

 	if (seqrel->rd_rel->relkind != RELKIND_SEQUENCE)
 		ereport(ERROR,
@ -780,6 +771,13 @@ init_sequence(RangeVar *relation, SeqTable *p_elm, Relation *p_rel)
 				 errmsg("\"%s\" is not a sequence",
 						relation->relname)));

+	/* Look to see if we already have a seqtable entry for relation */
+	for (elm = seqtab; elm != NULL; elm = elm->next)
+	{
+		if (elm->relid == relid)
+			break;
+	}
+
 	/*
 	 * Allocate new seqtable entry if we didn't find one.
 	 *
@ -799,14 +797,42 @@ init_sequence(RangeVar *relation, SeqTable *p_elm, Relation *p_rel)
 					(errcode(ERRCODE_OUT_OF_MEMORY),
 					 errmsg("out of memory")));
 		elm->relid = relid;
+		elm->xid = InvalidTransactionId;
 		/* increment is set to 0 until we do read_info (see currval) */
 		elm->last = elm->cached = elm->increment = 0;
 		elm->next = seqtab;
 		seqtab = elm;
 	}

-	/* Flag that we have a lock in the current xact. */
-	elm->xid = thisxid;
+	/*
+	 * If we haven't touched the sequence already in this transaction,
+	 * we need to acquire AccessShareLock.  We arrange for the lock to
+	 * be owned by the top transaction, so that we don't need to do it
+	 * more than once per xact.
+	 */
+	if (elm->xid != thisxid)
+	{
+		ResourceOwner currentOwner;
+
+		currentOwner = CurrentResourceOwner;
+		PG_TRY();
+		{
+			CurrentResourceOwner = TopTransactionResourceOwner;
+
+			LockRelation(seqrel, AccessShareLock);
+		}
+		PG_CATCH();
+		{
+			/* Ensure CurrentResourceOwner is restored on error */
+			CurrentResourceOwner = currentOwner;
+			PG_RE_THROW();
+		}
+		PG_END_TRY();
+		CurrentResourceOwner = currentOwner;
+
+		/* Flag that we have a lock in the current xact. */
+		elm->xid = thisxid;
+	}

 	*p_elm = elm;
 	*p_rel = seqrel;
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/commands/tablecmds.c,v 1.131 2004/08/31 23:27:05 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/commands/tablecmds.c,v 1.132 2004/09/16 16:58:28 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -71,14 +71,14 @@ typedef struct OnCommitItem
 	OnCommitAction oncommit;	/* what to do at end of xact */

 	/*
-	 * If this entry was created during this xact, it should be deleted at
-	 * xact abort.	Conversely, if this entry was deleted during this
-	 * xact, it should be removed at xact commit.  We leave deleted
-	 * entries in the list until commit so that we can roll back if
-	 * needed.
+	 * If this entry was created during the current transaction,
+	 * creating_subid is the ID of the creating subxact; if created in a prior
+	 * transaction, creating_subid is zero.  If deleted during the current
+	 * transaction, deleting_subid is the ID of the deleting subxact; if no
+	 * deletion request is pending, deleting_subid is zero.
 	 */
-	TransactionId creating_xid;
-	TransactionId deleting_xid;
+	SubTransactionId creating_subid;
+	SubTransactionId deleting_subid;
 } OnCommitItem;

 static List *on_commits = NIL;
@ -5821,8 +5821,8 @@ register_on_commit_action(Oid relid, OnCommitAction action)
 	oc = (OnCommitItem *) palloc(sizeof(OnCommitItem));
 	oc->relid = relid;
 	oc->oncommit = action;
-	oc->creating_xid = GetCurrentTransactionId();
-	oc->deleting_xid = InvalidTransactionId;
+	oc->creating_subid = GetCurrentSubTransactionId();
+	oc->deleting_subid = InvalidSubTransactionId;

 	on_commits = lcons(oc, on_commits);

@ -5845,7 +5845,7 @@ remove_on_commit_action(Oid relid)

 		if (oc->relid == relid)
 		{
-			oc->deleting_xid = GetCurrentTransactionId();
+			oc->deleting_subid = GetCurrentSubTransactionId();
 			break;
 		}
 	}
@ -5860,7 +5860,6 @@ remove_on_commit_action(Oid relid)
 void
 PreCommit_on_commit_actions(void)
 {
-	TransactionId xid = GetCurrentTransactionId();
 	ListCell   *l;

 	foreach(l, on_commits)
@ -5868,7 +5867,7 @@ PreCommit_on_commit_actions(void)
 		OnCommitItem *oc = (OnCommitItem *) lfirst(l);

 		/* Ignore entry if already dropped in this xact */
-		if (oc->deleting_xid == xid)
+		if (oc->deleting_subid != InvalidSubTransactionId)
 			continue;

 		switch (oc->oncommit)
@ -5895,7 +5894,7 @@ PreCommit_on_commit_actions(void)
 					 * remove_on_commit_action, so the entry should get
 					 * marked as deleted.
 					 */
-					Assert(oc->deleting_xid == xid);
+					Assert(oc->deleting_subid != InvalidSubTransactionId);
 					break;
 				}
 		}
@ -5911,7 +5910,7 @@ PreCommit_on_commit_actions(void)
 * during abort, remove those created during this transaction.
 */
 void
-AtEOXact_on_commit_actions(bool isCommit, TransactionId xid)
+AtEOXact_on_commit_actions(bool isCommit)
 {
 	ListCell   *cur_item;
 	ListCell   *prev_item;
@ -5923,8 +5922,8 @@ AtEOXact_on_commit_actions(bool isCommit, TransactionId xid)
 	{
 		OnCommitItem *oc = (OnCommitItem *) lfirst(cur_item);

-		if (isCommit ? TransactionIdEquals(oc->deleting_xid, xid) :
-			TransactionIdEquals(oc->creating_xid, xid))
+		if (isCommit ? oc->deleting_subid != InvalidSubTransactionId :
+			oc->creating_subid != InvalidSubTransactionId)
 		{
 			/* cur_item must be removed */
 			on_commits = list_delete_cell(on_commits, cur_item, prev_item);
@ -5937,8 +5936,8 @@ AtEOXact_on_commit_actions(bool isCommit, TransactionId xid)
 		else
 		{
 			/* cur_item must be preserved */
-			oc->creating_xid = InvalidTransactionId;
-			oc->deleting_xid = InvalidTransactionId;
+			oc->creating_subid = InvalidSubTransactionId;
+			oc->deleting_subid = InvalidSubTransactionId;
 			prev_item = cur_item;
 			cur_item = lnext(prev_item);
 		}
@ -5953,8 +5952,8 @@ AtEOXact_on_commit_actions(bool isCommit, TransactionId xid)
 * this subtransaction as being the parent's responsibility.
 */
 void
-AtEOSubXact_on_commit_actions(bool isCommit, TransactionId childXid,
-							  TransactionId parentXid)
+AtEOSubXact_on_commit_actions(bool isCommit, SubTransactionId mySubid,
+							  SubTransactionId parentSubid)
 {
 	ListCell   *cur_item;
 	ListCell   *prev_item;
@ -5966,7 +5965,7 @@ AtEOSubXact_on_commit_actions(bool isCommit, TransactionId childXid,
 	{
 		OnCommitItem *oc = (OnCommitItem *) lfirst(cur_item);

-		if (!isCommit && TransactionIdEquals(oc->creating_xid, childXid))
+		if (!isCommit && oc->creating_subid == mySubid)
 		{
 			/* cur_item must be removed */
 			on_commits = list_delete_cell(on_commits, cur_item, prev_item);
@ -5979,10 +5978,10 @@ AtEOSubXact_on_commit_actions(bool isCommit, TransactionId childXid,
 		else
 		{
 			/* cur_item must be preserved */
-			if (TransactionIdEquals(oc->creating_xid, childXid))
-				oc->creating_xid = parentXid;
-			if (TransactionIdEquals(oc->deleting_xid, childXid))
-				oc->deleting_xid = isCommit ? parentXid : InvalidTransactionId;
+			if (oc->creating_subid == mySubid)
+				oc->creating_subid = parentSubid;
+			if (oc->deleting_subid == mySubid)
+				oc->deleting_subid = isCommit ? parentSubid : InvalidSubTransactionId;
 			prev_item = cur_item;
 			cur_item = lnext(prev_item);
 		}
--- a/src/backend/commands/user.c
+++ b/src/backend/commands/user.c
@ -6,7 +6,7 @@
 * Portions Copyright (c) 1996-2004, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $PostgreSQL: pgsql/src/backend/commands/user.c,v 1.144 2004/08/29 05:06:41 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/commands/user.c,v 1.145 2004/09/16 16:58:28 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -45,28 +45,30 @@
 extern bool Password_encryption;

 /*
- * The need-to-update-files flags are a pair of TransactionIds that show what
- * level of the transaction tree requested the update.	To register an update,
- * the transaction saves its own TransactionId in the flag, unless the value
- * was already set to a valid TransactionId.  If it aborts and the value is its
- * TransactionId, it resets the value to InvalidTransactionId.	If it commits,
- * it changes the value to its parent's TransactionId.  This way the value is
- * propagated up to the topmost transaction, which will update the files if a
- * valid TransactionId is detected.
+ * The need-to-update-files flags are a pair of SubTransactionIds that show
+ * what level of the subtransaction tree requested the update. To register
+ * an update, the subtransaction saves its own SubTransactionId in the flag,
+ * unless the value was already set to a valid SubTransactionId (which implies
+ * that it or a parent level has already requested the same).  If it aborts
+ * and the value is its SubTransactionId, it resets the flag to
+ * InvalidSubTransactionId. If it commits, it changes the value to its
+ * parent's SubTransactionId.  This way the value is propagated up to the
+ * top-level transaction, which will update the files if a valid
+ * SubTransactionId is detected.
 */
-static TransactionId user_file_update_xid = InvalidTransactionId;
-static TransactionId group_file_update_xid = InvalidTransactionId;
+static SubTransactionId user_file_update_subid = InvalidSubTransactionId;
+static SubTransactionId group_file_update_subid = InvalidSubTransactionId;

 #define user_file_update_needed() \
 	do { \
-		if (user_file_update_xid == InvalidTransactionId) \
-			user_file_update_xid = GetCurrentTransactionId(); \
+		if (user_file_update_subid == InvalidSubTransactionId) \
+			user_file_update_subid = GetCurrentSubTransactionId(); \
 	} while (0)

 #define group_file_update_needed() \
 	do { \
-		if (group_file_update_xid == InvalidTransactionId) \
-			group_file_update_xid = GetCurrentTransactionId(); \
+		if (group_file_update_subid == InvalidSubTransactionId) \
+			group_file_update_subid = GetCurrentSubTransactionId(); \
 	} while (0)


@ -451,14 +453,14 @@ AtEOXact_UpdatePasswordFile(bool isCommit)
 	Relation	urel = NULL;
 	Relation	grel = NULL;

-	if (user_file_update_xid == InvalidTransactionId &&
-		group_file_update_xid == InvalidTransactionId)
+	if (user_file_update_subid == InvalidSubTransactionId &&
+		group_file_update_subid == InvalidSubTransactionId)
 		return;

 	if (!isCommit)
 	{
-		user_file_update_xid = InvalidTransactionId;
-		group_file_update_xid = InvalidTransactionId;
+		user_file_update_subid = InvalidSubTransactionId;
+		group_file_update_subid = InvalidSubTransactionId;
 		return;
 	}

@ -470,22 +472,22 @@ AtEOXact_UpdatePasswordFile(bool isCommit)
 	 * pg_shadow or pg_group, which likely won't have gotten a strong
 	 * enough lock), so get the locks we need before writing anything.
 	 */
-	if (user_file_update_xid != InvalidTransactionId)
+	if (user_file_update_subid != InvalidSubTransactionId)
 		urel = heap_openr(ShadowRelationName, ExclusiveLock);
-	if (group_file_update_xid != InvalidTransactionId)
+	if (group_file_update_subid != InvalidSubTransactionId)
 		grel = heap_openr(GroupRelationName, ExclusiveLock);

 	/* Okay to write the files */
-	if (user_file_update_xid != InvalidTransactionId)
+	if (user_file_update_subid != InvalidSubTransactionId)
 	{
-		user_file_update_xid = InvalidTransactionId;
+		user_file_update_subid = InvalidSubTransactionId;
 		write_user_file(urel);
 		heap_close(urel, NoLock);
 	}

-	if (group_file_update_xid != InvalidTransactionId)
+	if (group_file_update_subid != InvalidSubTransactionId)
 	{
-		group_file_update_xid = InvalidTransactionId;
+		group_file_update_subid = InvalidSubTransactionId;
 		write_group_file(grel);
 		heap_close(grel, NoLock);
 	}
@ -503,24 +505,25 @@ AtEOXact_UpdatePasswordFile(bool isCommit)
 * need-to-update-files flags.
 */
 void
-AtEOSubXact_UpdatePasswordFile(bool isCommit, TransactionId myXid,
-							   TransactionId parentXid)
+AtEOSubXact_UpdatePasswordFile(bool isCommit,
+							   SubTransactionId mySubid,
+							   SubTransactionId parentSubid)
 {
 	if (isCommit)
 	{
-		if (user_file_update_xid == myXid)
-			user_file_update_xid = parentXid;
+		if (user_file_update_subid == mySubid)
+			user_file_update_subid = parentSubid;

-		if (group_file_update_xid == myXid)
-			group_file_update_xid = parentXid;
+		if (group_file_update_subid == mySubid)
+			group_file_update_subid = parentSubid;
 	}
 	else
 	{
-		if (user_file_update_xid == myXid)
-			user_file_update_xid = InvalidTransactionId;
+		if (user_file_update_subid == mySubid)
+			user_file_update_subid = InvalidSubTransactionId;

-		if (group_file_update_xid == myXid)
-			group_file_update_xid = InvalidTransactionId;
+		if (group_file_update_subid == mySubid)
+			group_file_update_subid = InvalidSubTransactionId;
 	}
 }

--- a/src/backend/executor/spi.c
+++ b/src/backend/executor/spi.c
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/executor/spi.c,v 1.127 2004/09/13 20:06:46 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/executor/spi.c,v 1.128 2004/09/16 16:58:29 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -104,7 +104,7 @@ SPI_connect(void)
 	_SPI_current = &(_SPI_stack[_SPI_connected]);
 	_SPI_current->processed = 0;
 	_SPI_current->tuptable = NULL;
-	_SPI_current->connectXid = GetCurrentTransactionId();
+	_SPI_current->connectSubid = GetCurrentSubTransactionId();

 	/*
 	 * Create memory contexts for this procedure
@ -198,10 +198,10 @@ AtEOXact_SPI(bool isCommit)
 * Clean up SPI state at subtransaction commit or abort.
 *
 * During commit, there shouldn't be any unclosed entries remaining from
- * the current transaction; we throw them away if found.
+ * the current subtransaction; we emit a warning if any are found.
 */
 void
-AtEOSubXact_SPI(bool isCommit, TransactionId childXid)
+AtEOSubXact_SPI(bool isCommit, SubTransactionId mySubid)
 {
 	bool		found = false;

@ -209,7 +209,7 @@ AtEOSubXact_SPI(bool isCommit, TransactionId childXid)
 	{
 		_SPI_connection *connection = &(_SPI_stack[_SPI_connected]);

-		if (connection->connectXid != childXid)
+		if (connection->connectSubid != mySubid)
 			break;				/* couldn't be any underneath it either */

 		found = true;
@ -235,7 +235,7 @@ AtEOSubXact_SPI(bool isCommit, TransactionId childXid)
 		ereport(WARNING,
 				(errcode(ERRCODE_WARNING),
 				 errmsg("subtransaction left non-empty SPI stack"),
-				 errhint("Check for missing \"SPI_finish\" calls")));
+				 errhint("Check for missing \"SPI_finish\" calls.")));
 }


@ -1692,8 +1692,7 @@ _SPI_copy_plan(_SPI_plan *plan, int location)
 		parentcxt = _SPI_current->procCxt;
 	else if (location == _SPI_CPLAN_TOPCXT)
 		parentcxt = TopMemoryContext;
-	else
-/* (this case not currently used) */
+	else	/* (this case not currently used) */
 		parentcxt = CurrentMemoryContext;

 	/*
--- a/src/backend/libpq/be-fsstubs.c
+++ b/src/backend/libpq/be-fsstubs.c
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/libpq/be-fsstubs.c,v 1.75 2004/09/11 15:56:46 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/libpq/be-fsstubs.c,v 1.76 2004/09/16 16:58:30 tgl Exp $
 *
 * NOTES
 *	  This should be moved to a more appropriate place.  It is here
@ -551,11 +551,11 @@ AtEOXact_LargeObject(bool isCommit)
 *		Take care of large objects at subtransaction commit/abort
 *
 * Reassign LOs created/opened during a committing subtransaction
- * to the parent transaction.  On abort, just close them.
+ * to the parent subtransaction.  On abort, just close them.
 */
 void
-AtEOSubXact_LargeObject(bool isCommit, TransactionId myXid,
-						TransactionId parentXid)
+AtEOSubXact_LargeObject(bool isCommit, SubTransactionId mySubid,
+						SubTransactionId parentSubid)
 {
 	int			i;

@ -566,10 +566,10 @@ AtEOSubXact_LargeObject(bool isCommit, TransactionId myXid,
 	{
 		LargeObjectDesc *lo = cookies[i];

-		if (lo != NULL && lo->xid == myXid)
+		if (lo != NULL && lo->subid == mySubid)
 		{
 			if (isCommit)
-				lo->xid = parentXid;
+				lo->subid = parentSubid;
 			else
 			{
 				/*
--- a/src/backend/storage/buffer/freelist.c
+++ b/src/backend/storage/buffer/freelist.c
@ -12,7 +12,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/storage/buffer/freelist.c,v 1.47 2004/08/29 05:06:47 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/storage/buffer/freelist.c,v 1.48 2004/09/16 16:58:31 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -269,7 +269,7 @@ StrategyBufferLookup(BufferTag *tagPtr, bool recheck,
 		if (!strategy_hint_vacuum)
 		{
 			if (!cdb->t1_vacuum &&
-				!TransactionIdIsCurrentTransactionId(cdb->t1_xid))
+				!TransactionIdEquals(cdb->t1_xid, GetTopTransactionId()))
 			{
 				STRAT_LIST_REMOVE(cdb);
 				STRAT_MRU_INSERT(cdb, STRAT_LIST_T2);
@ -286,7 +286,7 @@ StrategyBufferLookup(BufferTag *tagPtr, bool recheck,
 				 */
 				if (cdb->t1_vacuum)
 				{
-					cdb->t1_xid = GetCurrentTransactionId();
+					cdb->t1_xid = GetTopTransactionId();
 					cdb->t1_vacuum = false;
 				}
 			}
@ -644,7 +644,8 @@ StrategyReplaceBuffer(BufferDesc *buf, BufferTag *newTag,
 		 */
 		if (strategy_hint_vacuum)
 		{
-			if (TransactionIdIsCurrentTransactionId(strategy_vacuum_xid))
+			if (TransactionIdEquals(strategy_vacuum_xid,
+									GetTopTransactionId()))
 				STRAT_LRU_INSERT(cdb_found, STRAT_LIST_T1);
 			else
 			{
@ -661,7 +662,7 @@ StrategyReplaceBuffer(BufferDesc *buf, BufferTag *newTag,
 		 * single UPDATE promoting a newcomer straight into T2. Also
 		 * remember if it was loaded for VACUUM.
 		 */
-		cdb_found->t1_xid = GetCurrentTransactionId();
+		cdb_found->t1_xid = GetTopTransactionId();
 		cdb_found->t1_vacuum = strategy_hint_vacuum;
 	}
 }
@ -727,7 +728,7 @@ void
 StrategyHintVacuum(bool vacuum_active)
 {
 	strategy_hint_vacuum = vacuum_active;
-	strategy_vacuum_xid = GetCurrentTransactionId();
+	strategy_vacuum_xid = GetTopTransactionId();
 }

 /*
--- a/src/backend/storage/file/fd.c
+++ b/src/backend/storage/file/fd.c
@ -7,7 +7,7 @@
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/storage/file/fd.c,v 1.112 2004/08/29 05:06:47 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/storage/file/fd.c,v 1.113 2004/09/16 16:58:32 tgl Exp $
 *
 * NOTES:
 *
@ -123,7 +123,7 @@ typedef struct vfd
 {
 	signed short fd;			/* current FD, or VFD_CLOSED if none */
 	unsigned short fdstate;		/* bitflags for VFD's state */
-	TransactionId create_xid;	/* for XACT_TEMPORARY fds, creating Xid */
+	SubTransactionId create_subid;	/* for TEMPORARY fds, creating subxact */
 	File		nextFree;		/* link to next free VFD, if in freelist */
 	File		lruMoreRecently;	/* doubly linked recency-of-use list */
 	File		lruLessRecently;
@ -171,7 +171,7 @@ typedef struct
 		FILE	   *file;
 		DIR		   *dir;
 	}			desc;
-	TransactionId create_xid;
+	SubTransactionId create_subid;
 } AllocateDesc;

 static int	numAllocatedDescs = 0;
@ -887,7 +887,7 @@ OpenTemporaryFile(bool interXact)
 	if (!interXact)
 	{
 		VfdCache[file].fdstate |= FD_XACT_TEMPORARY;
-		VfdCache[file].create_xid = GetCurrentTransactionId();
+		VfdCache[file].create_subid = GetCurrentSubTransactionId();
 	}

 	return file;
@ -1166,7 +1166,7 @@ TryAgain:

 		desc->kind = AllocateDescFile;
 		desc->desc.file = file;
-		desc->create_xid = GetCurrentTransactionId();
+		desc->create_subid = GetCurrentSubTransactionId();
 		numAllocatedDescs++;
 		return desc->desc.file;
 	}
@ -1281,7 +1281,7 @@ TryAgain:

 		desc->kind = AllocateDescDir;
 		desc->desc.dir = dir;
-		desc->create_xid = GetCurrentTransactionId();
+		desc->create_subid = GetCurrentSubTransactionId();
 		numAllocatedDescs++;
 		return desc->desc.dir;
 	}
@ -1359,10 +1359,11 @@ closeAllVfds(void)
 *
 * Take care of subtransaction commit/abort.  At abort, we close temp files
 * that the subtransaction may have opened.  At commit, we reassign the
- * files that were opened to the parent transaction.
+ * files that were opened to the parent subtransaction.
 */
 void
-AtEOSubXact_Files(bool isCommit, TransactionId myXid, TransactionId parentXid)
+AtEOSubXact_Files(bool isCommit, SubTransactionId mySubid,
+				  SubTransactionId parentSubid)
 {
 	Index		i;

@ -1374,10 +1375,10 @@ AtEOSubXact_Files(bool isCommit, TransactionId myXid, TransactionId parentXid)
 			unsigned short fdstate = VfdCache[i].fdstate;

 			if ((fdstate & FD_XACT_TEMPORARY) &&
-				VfdCache[i].create_xid == myXid)
+				VfdCache[i].create_subid == mySubid)
 			{
 				if (isCommit)
-					VfdCache[i].create_xid = parentXid;
+					VfdCache[i].create_subid = parentSubid;
 				else if (VfdCache[i].fileName != NULL)
 					FileClose(i);
 			}
@ -1386,10 +1387,10 @@ AtEOSubXact_Files(bool isCommit, TransactionId myXid, TransactionId parentXid)

 	for (i = 0; i < numAllocatedDescs; i++)
 	{
-		if (allocatedDescs[i].create_xid == myXid)
+		if (allocatedDescs[i].create_subid == mySubid)
 		{
 			if (isCommit)
-				allocatedDescs[i].create_xid = parentXid;
+				allocatedDescs[i].create_subid = parentSubid;
 			else
 			{
 				/* have to recheck the item after FreeDesc (ugly) */
--- a/src/backend/storage/large_object/inv_api.c
+++ b/src/backend/storage/large_object/inv_api.c
@ -9,7 +9,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/storage/large_object/inv_api.c,v 1.106 2004/08/29 05:06:48 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/storage/large_object/inv_api.c,v 1.107 2004/09/16 16:58:33 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -167,7 +167,7 @@ inv_create(int flags)
 	retval = (LargeObjectDesc *) palloc(sizeof(LargeObjectDesc));

 	retval->id = file_oid;
-	retval->xid = GetCurrentTransactionId();
+	retval->subid = GetCurrentSubTransactionId();
 	retval->offset = 0;

 	if (flags & INV_WRITE)
@ -199,7 +199,7 @@ inv_open(Oid lobjId, int flags)
 	retval = (LargeObjectDesc *) palloc(sizeof(LargeObjectDesc));

 	retval->id = lobjId;
-	retval->xid = GetCurrentTransactionId();
+	retval->subid = GetCurrentSubTransactionId();
 	retval->offset = 0;

 	if (flags & INV_WRITE)
--- a/src/backend/storage/lmgr/lmgr.c
+++ b/src/backend/storage/lmgr/lmgr.c
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/storage/lmgr/lmgr.c,v 1.69 2004/08/29 05:06:48 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/storage/lmgr/lmgr.c,v 1.70 2004/09/16 16:58:33 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -21,6 +21,7 @@
 #include "catalog/catalog.h"
 #include "miscadmin.h"
 #include "storage/lmgr.h"
+#include "storage/sinval.h"
 #include "utils/inval.h"


@ -311,9 +312,6 @@ UnlockPage(Relation relation, BlockNumber blkno, LOCKMODE lockmode)
 * Insert a lock showing that the given transaction ID is running ---
 * this is done during xact startup.  The lock can then be used to wait
 * for the transaction to finish.
- *
- * We need no corresponding unlock function, since the lock will always
- * be released implicitly at transaction commit/abort, never any other way.
 */
 void
 XactLockTableInsert(TransactionId xid)
@ -325,19 +323,43 @@ XactLockTableInsert(TransactionId xid)
 	tag.dbId = InvalidOid;		/* xids are globally unique */
 	tag.objId.xid = xid;

-	if (!LockAcquire(LockTableId, &tag, xid,
+	if (!LockAcquire(LockTableId, &tag, GetTopTransactionId(),
 					 ExclusiveLock, false))
 		elog(ERROR, "LockAcquire failed");
 }

+/*
+ *		XactLockTableDelete
+ *
+ * Delete the lock showing that the given transaction ID is running.
+ * (This is never used for main transaction IDs; those locks are only
+ * released implicitly at transaction end.  But we do use it for subtrans
+ * IDs.)
+ */
+void
+XactLockTableDelete(TransactionId xid)
+{
+	LOCKTAG		tag;
+
+	MemSet(&tag, 0, sizeof(tag));
+	tag.relId = XactLockTableId;
+	tag.dbId = InvalidOid;		/* xids are globally unique */
+	tag.objId.xid = xid;
+
+	LockRelease(LockTableId, &tag, GetTopTransactionId(), ExclusiveLock);
+}
+
 /*
 *		XactLockTableWait
 *
 * Wait for the specified transaction to commit or abort.
 *
- * Note that this does the right thing for subtransactions: if we
- * wait on a subtransaction, we will be awakened as soon as it aborts
- * or its parent commits.
+ * Note that this does the right thing for subtransactions: if we wait on a
+ * subtransaction, we will exit as soon as it aborts or its top parent commits.
+ * It takes some extra work to ensure this, because to save on shared memory
+ * the XID lock of a subtransaction is released when it ends, whether
+ * successfully or unsuccessfully.  So we have to check if it's "still running"
+ * and if so wait for its parent.
 */
 void
 XactLockTableWait(TransactionId xid)
@ -345,18 +367,24 @@ XactLockTableWait(TransactionId xid)
 	LOCKTAG		tag;
 	TransactionId myxid = GetTopTransactionId();

-	Assert(!TransactionIdEquals(xid, myxid));
+	for (;;)
+	{
+		Assert(TransactionIdIsValid(xid));
+		Assert(!TransactionIdEquals(xid, myxid));

-	MemSet(&tag, 0, sizeof(tag));
-	tag.relId = XactLockTableId;
-	tag.dbId = InvalidOid;
-	tag.objId.xid = xid;
+		MemSet(&tag, 0, sizeof(tag));
+		tag.relId = XactLockTableId;
+		tag.dbId = InvalidOid;
+		tag.objId.xid = xid;

-	if (!LockAcquire(LockTableId, &tag, myxid,
-					 ShareLock, false))
-		elog(ERROR, "LockAcquire failed");
+		if (!LockAcquire(LockTableId, &tag, myxid, ShareLock, false))
+			elog(ERROR, "LockAcquire failed");
+		LockRelease(LockTableId, &tag, myxid, ShareLock);

-	LockRelease(LockTableId, &tag, myxid, ShareLock);
+		if (!TransactionIdIsInProgress(xid))
+			break;
+		xid = SubTransGetParent(xid);
+	}

 	/*
 	 * Transaction was committed/aborted/crashed - we have to update
--- a/src/backend/utils/adt/xid.c
+++ b/src/backend/utils/adt/xid.c
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/adt/xid.c,v 1.5 2004/08/29 04:12:52 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/adt/xid.c,v 1.6 2004/09/16 16:58:34 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -93,7 +93,7 @@ Datum
 xid_age(PG_FUNCTION_ARGS)
 {
 	TransactionId xid = PG_GETARG_TRANSACTIONID(0);
-	TransactionId now = GetCurrentTransactionId();
+	TransactionId now = GetTopTransactionId();

 	/* Permanent XIDs are always infinitely old */
 	if (!TransactionIdIsNormal(xid))
--- a/src/backend/utils/cache/relcache.c
+++ b/src/backend/utils/cache/relcache.c
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/cache/relcache.c,v 1.210 2004/08/29 05:06:50 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/cache/relcache.c,v 1.211 2004/09/16 16:58:35 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -836,7 +836,7 @@ RelationBuildDesc(RelationBuildDescInfo buildinfo,
 	 */
 	relation->rd_refcnt = 0;
 	relation->rd_isnailed = false;
-	relation->rd_createxact = InvalidTransactionId;
+	relation->rd_createSubid = InvalidSubTransactionId;
 	relation->rd_istemp = isTempNamespace(relation->rd_rel->relnamespace);

 	/*
@ -1287,7 +1287,7 @@ formrdesc(const char *relationName,
 	 * for new or temp relations.
 	 */
 	relation->rd_isnailed = true;
-	relation->rd_createxact = InvalidTransactionId;
+	relation->rd_createSubid = InvalidSubTransactionId;
 	relation->rd_istemp = false;

 	/*
@ -1578,7 +1578,7 @@ RelationClose(Relation relation)

 #ifdef RELCACHE_FORCE_RELEASE
 	if (RelationHasReferenceCountZero(relation) &&
-		!TransactionIdIsValid(relation->rd_createxact))
+		relation->rd_createSubid == InvalidSubTransactionId)
 		RelationClearRelation(relation, false);
 #endif
 }
@ -1736,7 +1736,7 @@ RelationClearRelation(Relation relation, bool rebuild)
 	{
 		/*
 		 * When rebuilding an open relcache entry, must preserve ref count
-		 * and rd_createxact state.  Also attempt to preserve the
+		 * and rd_createSubid state.  Also attempt to preserve the
 		 * tupledesc and rewrite-rule substructures in place.
 		 *
 		 * Note that this process does not touch CurrentResourceOwner; which
@ -1744,7 +1744,7 @@ RelationClearRelation(Relation relation, bool rebuild)
 		 * necessarily belong to that resource owner.
 		 */
 		int			old_refcnt = relation->rd_refcnt;
-		TransactionId old_createxact = relation->rd_createxact;
+		SubTransactionId old_createSubid = relation->rd_createSubid;
 		TupleDesc	old_att = relation->rd_att;
 		RuleLock   *old_rules = relation->rd_rules;
 		MemoryContext old_rulescxt = relation->rd_rulescxt;
@ -1765,7 +1765,7 @@ RelationClearRelation(Relation relation, bool rebuild)
 				 buildinfo.i.info_id);
 		}
 		relation->rd_refcnt = old_refcnt;
-		relation->rd_createxact = old_createxact;
+		relation->rd_createSubid = old_createSubid;
 		if (equalTupleDescs(old_att, relation->rd_att))
 		{
 			/* needn't flush typcache here */
@ -1802,7 +1802,7 @@ RelationFlushRelation(Relation relation)
 {
 	bool		rebuild;

-	if (TransactionIdIsValid(relation->rd_createxact))
+	if (relation->rd_createSubid != InvalidSubTransactionId)
 	{
 		/*
 		 * New relcache entries are always rebuilt, not flushed; else we'd
@ -1948,7 +1948,7 @@ RelationCacheInvalidate(void)
 		}

 		/* Ignore new relations, since they are never SI targets */
-		if (TransactionIdIsValid(relation->rd_createxact))
+		if (relation->rd_createSubid != InvalidSubTransactionId)
 			continue;

 		relcacheInvalsReceived++;
@ -2032,10 +2032,10 @@ AtEOXact_RelationCache(bool isCommit)
 		 * flush, the entry will get deleted anyway by shared-cache-inval
 		 * processing of the aborted pg_class insertion.)
 		 */
-		if (TransactionIdIsValid(relation->rd_createxact))
+		if (relation->rd_createSubid != InvalidSubTransactionId)
 		{
 			if (isCommit)
-				relation->rd_createxact = InvalidTransactionId;
+				relation->rd_createSubid = InvalidSubTransactionId;
 			else
 			{
 				RelationClearRelation(relation, false);
@ -2097,8 +2097,8 @@ AtEOXact_RelationCache(bool isCommit)
 * Note: this must be called *before* processing invalidation messages.
 */
 void
-AtEOSubXact_RelationCache(bool isCommit, TransactionId myXid,
-						  TransactionId parentXid)
+AtEOSubXact_RelationCache(bool isCommit, SubTransactionId mySubid,
+						  SubTransactionId parentSubid)
 {
 	HASH_SEQ_STATUS status;
 	RelIdCacheEnt *idhentry;
@ -2115,10 +2115,10 @@ AtEOSubXact_RelationCache(bool isCommit, TransactionId myXid,
 		 * During subcommit, mark it as belonging to the parent, instead.
 		 * During subabort, simply delete the relcache entry.
 		 */
-		if (TransactionIdEquals(relation->rd_createxact, myXid))
+		if (relation->rd_createSubid == mySubid)
 		{
 			if (isCommit)
-				relation->rd_createxact = parentXid;
+				relation->rd_createSubid = parentSubid;
 			else
 			{
 				Assert(RelationHasReferenceCountZero(relation));
@ -2182,7 +2182,7 @@ RelationBuildLocalRelation(const char *relname,
 	rel->rd_refcnt = nailit ? 1 : 0;

 	/* it's being created in this transaction */
-	rel->rd_createxact = GetCurrentTransactionId();
+	rel->rd_createSubid = GetCurrentSubTransactionId();

 	/* is it a temporary relation? */
 	rel->rd_istemp = isTempNamespace(relnamespace);
--- a/src/backend/utils/mmgr/portalmem.c
+++ b/src/backend/utils/mmgr/portalmem.c
@ -12,7 +12,7 @@
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/mmgr/portalmem.c,v 1.71 2004/08/29 05:06:51 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/mmgr/portalmem.c,v 1.72 2004/09/16 16:58:36 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -192,7 +192,7 @@ CreatePortal(const char *name, bool allowDup, bool dupSilent)

 	/* initialize portal fields that don't start off zero */
 	portal->cleanup = PortalCleanup;
-	portal->createXact = GetCurrentTransactionId();
+	portal->createSubid = GetCurrentSubTransactionId();
 	portal->strategy = PORTAL_MULTI_QUERY;
 	portal->cursorOptions = CURSOR_OPT_NO_SCROLL;
 	portal->atStart = true;
@ -427,7 +427,6 @@ AtCommit_Portals(void)
 {
 	HASH_SEQ_STATUS status;
 	PortalHashEnt *hentry;
-	TransactionId xact = GetCurrentTransactionId();

 	hash_seq_init(&status, PortalHashTable);

@ -450,12 +449,9 @@ AtCommit_Portals(void)

 		/*
 		 * Do nothing else to cursors held over from a previous
-		 * transaction. (This test must include checking CURSOR_OPT_HOLD,
-		 * else we will fail to clean up a VACUUM portal if it fails after
-		 * its first sub-transaction.)
+		 * transaction.
 		 */
-		if (portal->createXact != xact &&
-			(portal->cursorOptions & CURSOR_OPT_HOLD))
+		if (portal->createSubid == InvalidSubTransactionId)
 			continue;

 		if ((portal->cursorOptions & CURSOR_OPT_HOLD) &&
@ -479,6 +475,12 @@ AtCommit_Portals(void)
 			 * longer have its own resources.
 			 */
 			portal->resowner = NULL;
+
+			/*
+			 * Having successfully exported the holdable cursor, mark it
+			 * as not belonging to this transaction.
+			 */
+			portal->createSubid = InvalidSubTransactionId;
 		}
 		else
 		{
@ -502,7 +504,6 @@ AtAbort_Portals(void)
 {
 	HASH_SEQ_STATUS status;
 	PortalHashEnt *hentry;
-	TransactionId xact = GetCurrentTransactionId();

 	hash_seq_init(&status, PortalHashTable);

@ -515,12 +516,9 @@ AtAbort_Portals(void)

 		/*
 		 * Do nothing else to cursors held over from a previous
-		 * transaction. (This test must include checking CURSOR_OPT_HOLD,
-		 * else we will fail to clean up a VACUUM portal if it fails after
-		 * its first sub-transaction.)
+		 * transaction.
 		 */
-		if (portal->createXact != xact &&
-			(portal->cursorOptions & CURSOR_OPT_HOLD))
+		if (portal->createSubid == InvalidSubTransactionId)
 			continue;

 		/* let portalcmds.c clean up the state it knows about */
@ -548,7 +546,6 @@ AtCleanup_Portals(void)
 {
 	HASH_SEQ_STATUS status;
 	PortalHashEnt *hentry;
-	TransactionId xact = GetCurrentTransactionId();

 	hash_seq_init(&status, PortalHashTable);

@ -556,14 +553,8 @@ AtCleanup_Portals(void)
 	{
 		Portal		portal = hentry->portal;

-		/*
-		 * Do nothing else to cursors held over from a previous
-		 * transaction. (This test must include checking CURSOR_OPT_HOLD,
-		 * else we will fail to clean up a VACUUM portal if it fails after
-		 * its first sub-transaction.)
-		 */
-		if (portal->createXact != xact &&
-			(portal->cursorOptions & CURSOR_OPT_HOLD))
+		/* Do nothing to cursors held over from a previous transaction */
+		if (portal->createSubid == InvalidSubTransactionId)
 		{
 			Assert(portal->status != PORTAL_ACTIVE);
 			Assert(portal->resowner == NULL);
@ -579,15 +570,15 @@ AtCleanup_Portals(void)
 * Pre-subcommit processing for portals.
 *
 * Reassign the portals created in the current subtransaction to the parent
- * transaction.
+ * subtransaction.
 */
 void
-AtSubCommit_Portals(TransactionId parentXid,
+AtSubCommit_Portals(SubTransactionId mySubid,
+					SubTransactionId parentSubid,
 					ResourceOwner parentXactOwner)
 {
 	HASH_SEQ_STATUS status;
 	PortalHashEnt *hentry;
-	TransactionId curXid = GetCurrentTransactionId();

 	hash_seq_init(&status, PortalHashTable);

@ -595,9 +586,9 @@ AtSubCommit_Portals(TransactionId parentXid,
 	{
 		Portal		portal = hentry->portal;

-		if (portal->createXact == curXid)
+		if (portal->createSubid == mySubid)
 		{
-			portal->createXact = parentXid;
+			portal->createSubid = parentSubid;
 			if (portal->resowner)
 				ResourceOwnerNewParent(portal->resowner, parentXactOwner);
 		}
@ -612,12 +603,12 @@ AtSubCommit_Portals(TransactionId parentXid,
 * in descendants of the subtransaction too.
 */
 void
-AtSubAbort_Portals(TransactionId parentXid,
+AtSubAbort_Portals(SubTransactionId mySubid,
+				   SubTransactionId parentSubid,
 				   ResourceOwner parentXactOwner)
 {
 	HASH_SEQ_STATUS status;
 	PortalHashEnt *hentry;
-	TransactionId curXid = GetCurrentTransactionId();

 	hash_seq_init(&status, PortalHashTable);

@ -625,7 +616,7 @@ AtSubAbort_Portals(TransactionId parentXid,
 	{
 		Portal		portal = hentry->portal;

-		if (portal->createXact != curXid)
+		if (portal->createSubid != mySubid)
 			continue;

 		/*
@ -644,7 +635,7 @@ AtSubAbort_Portals(TransactionId parentXid,
 		 */
 		if (portal->status == PORTAL_READY)
 		{
-			portal->createXact = parentXid;
+			portal->createSubid = parentSubid;
 			if (portal->resowner)
 				ResourceOwnerNewParent(portal->resowner, parentXactOwner);
 		}
@ -674,11 +665,10 @@ AtSubAbort_Portals(TransactionId parentXid,
 * we will not drop any that were reassigned to the parent above).
 */
 void
-AtSubCleanup_Portals(void)
+AtSubCleanup_Portals(SubTransactionId mySubid)
 {
 	HASH_SEQ_STATUS status;
 	PortalHashEnt *hentry;
-	TransactionId curXid = GetCurrentTransactionId();

 	hash_seq_init(&status, PortalHashTable);

@ -686,7 +676,7 @@ AtSubCleanup_Portals(void)
 	{
 		Portal		portal = hentry->portal;

-		if (portal->createXact != curXid)
+		if (portal->createSubid != mySubid)
 			continue;

 		/* AtSubAbort_Portals should have fixed these: */
--- a/src/include/access/xact.h
+++ b/src/include/access/xact.h
@ -7,7 +7,7 @@
 * Portions Copyright (c) 1996-2004, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $PostgreSQL: pgsql/src/include/access/xact.h,v 1.72 2004/09/05 23:01:26 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/access/xact.h,v 1.73 2004/09/16 16:58:37 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -46,14 +46,21 @@ extern bool XactReadOnly;
 */
 typedef enum
 {
-	XACT_EVENT_ABORT,
 	XACT_EVENT_COMMIT,
-	XACT_EVENT_START_SUB,
-	XACT_EVENT_ABORT_SUB,
-	XACT_EVENT_COMMIT_SUB
+	XACT_EVENT_ABORT
 } XactEvent;

-typedef void (*XactCallback) (XactEvent event, TransactionId parentXid, void *arg);
+typedef void (*XactCallback) (XactEvent event, void *arg);
+
+typedef enum
+{
+	SUBXACT_EVENT_START_SUB,
+	SUBXACT_EVENT_COMMIT_SUB,
+	SUBXACT_EVENT_ABORT_SUB
+} SubXactEvent;
+
+typedef void (*SubXactCallback) (SubXactEvent event, SubTransactionId mySubid,
+								 SubTransactionId parentSubid, void *arg);


 /* ----------------
@ -101,6 +108,8 @@ extern bool IsTransactionState(void);
 extern bool IsAbortedTransactionBlockState(void);
 extern TransactionId GetTopTransactionId(void);
 extern TransactionId GetCurrentTransactionId(void);
+extern TransactionId GetCurrentTransactionIdIfAny(void);
+extern SubTransactionId GetCurrentSubTransactionId(void);
 extern CommandId GetCurrentCommandId(void);
 extern AbsoluteTime GetCurrentTransactionStartTime(void);
 extern AbsoluteTime GetCurrentTransactionStartTimeUsec(int *usec);
@ -129,6 +138,8 @@ extern void RequireTransactionChain(void *stmtNode, const char *stmtType);
 extern bool IsInTransactionChain(void *stmtNode);
 extern void RegisterXactCallback(XactCallback callback, void *arg);
 extern void UnregisterXactCallback(XactCallback callback, void *arg);
+extern void RegisterSubXactCallback(SubXactCallback callback, void *arg);
+extern void UnregisterSubXactCallback(SubXactCallback callback, void *arg);

 extern void RecordTransactionCommit(void);

--- a/src/include/c.h
+++ b/src/include/c.h
@ -12,7 +12,7 @@
 * Portions Copyright (c) 1996-2004, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $PostgreSQL: pgsql/src/include/c.h,v 1.171 2004/09/10 15:51:47 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/c.h,v 1.172 2004/09/16 16:58:38 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -351,7 +351,7 @@ typedef float float4;
 typedef double float8;

 /*
- * Oid, RegProcedure, TransactionId, CommandId, AclId
+ * Oid, RegProcedure, TransactionId, SubTransactionId, CommandId, AclId
 */

 /* typedef Oid is in postgres_ext.h */
@ -365,6 +365,11 @@ typedef regproc RegProcedure;

 typedef uint32 TransactionId;

+typedef uint32 SubTransactionId;
+
+#define InvalidSubTransactionId		((SubTransactionId) 0)
+#define TopSubTransactionId			((SubTransactionId) 1)
+
 typedef uint32 CommandId;

 #define FirstCommandId	((CommandId) 0)
--- a/src/include/catalog/namespace.h
+++ b/src/include/catalog/namespace.h
@ -7,7 +7,7 @@
 * Portions Copyright (c) 1996-2004, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $PostgreSQL: pgsql/src/include/catalog/namespace.h,v 1.33 2004/08/29 05:06:55 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/namespace.h,v 1.34 2004/09/16 16:58:39 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -91,8 +91,8 @@ extern Oid	FindDefaultConversionProc(int4 for_encoding, int4 to_encoding);
 /* initialization & transaction cleanup code */
 extern void InitializeSearchPath(void);
 extern void AtEOXact_Namespace(bool isCommit);
-extern void AtEOSubXact_Namespace(bool isCommit, TransactionId myXid,
-					  TransactionId parentXid);
+extern void AtEOSubXact_Namespace(bool isCommit, SubTransactionId mySubid,
+								  SubTransactionId parentSubid);

 /* stuff for search_path GUC variable */
 extern char *namespace_search_path;
--- a/src/include/commands/tablecmds.h
+++ b/src/include/commands/tablecmds.h
@ -7,7 +7,7 @@
 * Portions Copyright (c) 1996-2004, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $PostgreSQL: pgsql/src/include/commands/tablecmds.h,v 1.19 2004/08/29 05:06:56 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/commands/tablecmds.h,v 1.20 2004/09/16 16:58:39 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -42,9 +42,9 @@ extern void register_on_commit_action(Oid relid, OnCommitAction action);
 extern void remove_on_commit_action(Oid relid);

 extern void PreCommit_on_commit_actions(void);
-extern void AtEOXact_on_commit_actions(bool isCommit, TransactionId xid);
+extern void AtEOXact_on_commit_actions(bool isCommit);
 extern void AtEOSubXact_on_commit_actions(bool isCommit,
-							  TransactionId childXid,
-							  TransactionId parentXid);
+										  SubTransactionId mySubid,
+										  SubTransactionId parentSubid);

 #endif   /* TABLECMDS_H */
--- a/src/include/commands/user.h
+++ b/src/include/commands/user.h
@ -4,7 +4,7 @@
 *	  Commands for manipulating users and groups.
 *
 *
- * $PostgreSQL: pgsql/src/include/commands/user.h,v 1.24 2004/08/29 05:06:56 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/commands/user.h,v 1.25 2004/09/16 16:58:39 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -32,7 +32,8 @@ extern void RenameGroup(const char *oldname, const char *newname);
 extern Datum update_pg_pwd_and_pg_group(PG_FUNCTION_ARGS);

 extern void AtEOXact_UpdatePasswordFile(bool isCommit);
-extern void AtEOSubXact_UpdatePasswordFile(bool isCommit, TransactionId myXid,
-							   TransactionId parentXid);
+extern void AtEOSubXact_UpdatePasswordFile(bool isCommit,
+										   SubTransactionId mySubid,
+										   SubTransactionId parentSubid);

 #endif   /* USER_H */
--- a/src/include/executor/spi.h
+++ b/src/include/executor/spi.h
@ -2,7 +2,7 @@
 *
 * spi.h
 *
- * $PostgreSQL: pgsql/src/include/executor/spi.h,v 1.48 2004/09/13 20:07:53 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/executor/spi.h,v 1.49 2004/09/16 16:58:40 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -126,6 +126,6 @@ extern void SPI_cursor_move(Portal portal, bool forward, int count);
 extern void SPI_cursor_close(Portal portal);

 extern void AtEOXact_SPI(bool isCommit);
-extern void AtEOSubXact_SPI(bool isCommit, TransactionId childXid);
+extern void AtEOSubXact_SPI(bool isCommit, SubTransactionId mySubid);

 #endif   /* SPI_H */
--- a/src/include/executor/spi_priv.h
+++ b/src/include/executor/spi_priv.h
@ -6,7 +6,7 @@
 * Portions Copyright (c) 1996-2004, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $PostgreSQL: pgsql/src/include/executor/spi_priv.h,v 1.20 2004/08/29 04:13:07 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/executor/spi_priv.h,v 1.21 2004/09/16 16:58:40 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -23,7 +23,7 @@ typedef struct
 	MemoryContext procCxt;		/* procedure context */
 	MemoryContext execCxt;		/* executor context */
 	MemoryContext savedcxt;
-	TransactionId connectXid;	/* Xid of connecting transaction */
+	SubTransactionId connectSubid;	/* ID of connecting subtransaction */
 } _SPI_connection;

 typedef struct
--- a/src/include/libpq/be-fsstubs.h
+++ b/src/include/libpq/be-fsstubs.h
@ -7,7 +7,7 @@
 * Portions Copyright (c) 1996-2004, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $PostgreSQL: pgsql/src/include/libpq/be-fsstubs.h,v 1.21 2004/08/29 05:06:56 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/libpq/be-fsstubs.h,v 1.22 2004/09/16 16:58:41 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -46,7 +46,7 @@ extern int	lo_write(int fd, char *buf, int len);
 * Cleanup LOs at xact commit/abort
 */
 extern void AtEOXact_LargeObject(bool isCommit);
-extern void AtEOSubXact_LargeObject(bool isCommit, TransactionId myXid,
-						TransactionId parentXid);
+extern void AtEOSubXact_LargeObject(bool isCommit, SubTransactionId mySubid,
+									SubTransactionId parentSubid);

 #endif   /* BE_FSSTUBS_H */
--- a/src/include/storage/fd.h
+++ b/src/include/storage/fd.h
@ -7,7 +7,7 @@
 * Portions Copyright (c) 1996-2004, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $PostgreSQL: pgsql/src/include/storage/fd.h,v 1.48 2004/08/29 05:06:58 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/storage/fd.h,v 1.49 2004/09/16 16:58:42 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -85,8 +85,8 @@ extern int	BasicOpenFile(FileName fileName, int fileFlags, int fileMode);
 extern void set_max_safe_fds(void);
 extern void closeAllVfds(void);
 extern void AtEOXact_Files(void);
-extern void AtEOSubXact_Files(bool isCommit, TransactionId myXid,
-				  TransactionId parentXid);
+extern void AtEOSubXact_Files(bool isCommit, SubTransactionId mySubid,
+							  SubTransactionId parentSubid);
 extern void RemovePgTempFiles(void);
 extern int	pg_fsync(int fd);
 extern int	pg_fdatasync(int fd);
--- a/src/include/storage/large_object.h
+++ b/src/include/storage/large_object.h
@ -8,7 +8,7 @@
 * Portions Copyright (c) 1996-2004, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $PostgreSQL: pgsql/src/include/storage/large_object.h,v 1.29 2004/08/29 04:13:10 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/storage/large_object.h,v 1.30 2004/09/16 16:58:42 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -20,7 +20,7 @@
 * Data about a currently-open large object.
 *
 * id is the logical OID of the large object
- * xid is the transaction Id that opened the LO (or currently owns it)
+ * subid is the subtransaction that opened the LO (or currently owns it)
 * offset is the current seek offset within the LO
 * flags contains some flag bits
 *
@ -32,7 +32,7 @@
 typedef struct LargeObjectDesc
 {
 	Oid			id;				/* LO's identifier */
-	TransactionId xid;			/* owning XID */
+	SubTransactionId subid;		/* owning subtransaction ID */
 	uint32		offset;			/* current seek pointer */
 	int			flags;			/* locking info, etc */

--- a/src/include/storage/lmgr.h
+++ b/src/include/storage/lmgr.h
@ -7,7 +7,7 @@
 * Portions Copyright (c) 1996-2004, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $PostgreSQL: pgsql/src/include/storage/lmgr.h,v 1.43 2004/08/29 04:13:10 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/storage/lmgr.h,v 1.44 2004/09/16 16:58:42 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -58,6 +58,7 @@ extern void UnlockPage(Relation relation, BlockNumber blkno, LOCKMODE lockmode);

 /* Lock an XID (used to wait for a transaction to finish) */
 extern void XactLockTableInsert(TransactionId xid);
+extern void XactLockTableDelete(TransactionId xid);
 extern void XactLockTableWait(TransactionId xid);

 #endif   /* LMGR_H */
--- a/src/include/utils/portal.h
+++ b/src/include/utils/portal.h
@ -39,7 +39,7 @@
 * Portions Copyright (c) 1996-2004, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $PostgreSQL: pgsql/src/include/utils/portal.h,v 1.52 2004/08/29 05:06:59 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/utils/portal.h,v 1.53 2004/09/16 16:58:43 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -106,7 +106,11 @@ typedef struct PortalData
 	MemoryContext heap;			/* subsidiary memory for portal */
 	ResourceOwner resowner;		/* resources owned by portal */
 	void		(*cleanup) (Portal portal);		/* cleanup hook */
-	TransactionId createXact;	/* the xid of the creating xact */
+	SubTransactionId createSubid;	/* the ID of the creating subxact */
+	/*
+	 * if createSubid is InvalidSubTransactionId, the portal is held over
+	 * from a previous transaction
+	 */

 	/* The query or queries the portal will execute */
 	const char *sourceText;		/* text of query, if known (may be NULL) */
@ -181,11 +185,13 @@ extern void EnablePortalManager(void);
 extern void AtCommit_Portals(void);
 extern void AtAbort_Portals(void);
 extern void AtCleanup_Portals(void);
-extern void AtSubCommit_Portals(TransactionId parentXid,
-					ResourceOwner parentXactOwner);
-extern void AtSubAbort_Portals(TransactionId parentXid,
-				   ResourceOwner parentXactOwner);
-extern void AtSubCleanup_Portals(void);
+extern void AtSubCommit_Portals(SubTransactionId mySubid,
+								SubTransactionId parentSubid,
+								ResourceOwner parentXactOwner);
+extern void AtSubAbort_Portals(SubTransactionId mySubid,
+							   SubTransactionId parentSubid,
+							   ResourceOwner parentXactOwner);
+extern void AtSubCleanup_Portals(SubTransactionId mySubid);
 extern Portal CreatePortal(const char *name, bool allowDup, bool dupSilent);
 extern Portal CreateNewPortal(void);
 extern void PortalDrop(Portal portal, bool isTopCommit);
--- a/src/include/utils/rel.h
+++ b/src/include/utils/rel.h
@ -7,7 +7,7 @@
 * Portions Copyright (c) 1996-2004, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $PostgreSQL: pgsql/src/include/utils/rel.h,v 1.79 2004/08/29 05:06:59 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/utils/rel.h,v 1.80 2004/09/16 16:58:43 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -115,12 +115,12 @@ typedef struct RelationData
 	bool		rd_isvalid;		/* relcache entry is valid */
 	char		rd_indexvalid;	/* state of rd_indexlist: 0 = not valid, 1
 								 * = valid, 2 = temporarily forced */
-	TransactionId rd_createxact;	/* rel was created in current xact */
+	SubTransactionId rd_createSubid;	/* rel was created in current xact */

 	/*
-	 * rd_createxact is the XID of the highest subtransaction the rel has
+	 * rd_createSubid is the ID of the highest subtransaction the rel has
 	 * survived into; or zero if the rel was not created in the current
-	 * transaction.  This should be relied on only for optimization
+	 * top transaction.  This should be relied on only for optimization
 	 * purposes; it is possible for new-ness to be "forgotten" (eg, after
 	 * CLUSTER).
 	 */
@ -241,7 +241,8 @@ typedef Relation *RelationPtr;
 * Beware of multiple eval of argument
 */
 #define RELATION_IS_LOCAL(relation) \
-	((relation)->rd_istemp || TransactionIdIsValid((relation)->rd_createxact))
+	((relation)->rd_istemp || \
+	 (relation)->rd_createSubid != InvalidSubTransactionId)

 /* routines in utils/cache/relcache.c */
 extern void RelationIncrementReferenceCount(Relation rel);
--- a/src/include/utils/relcache.h
+++ b/src/include/utils/relcache.h
@ -7,7 +7,7 @@
 * Portions Copyright (c) 1996-2004, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $PostgreSQL: pgsql/src/include/utils/relcache.h,v 1.45 2004/08/29 05:06:59 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/utils/relcache.h,v 1.46 2004/09/16 16:58:43 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -66,8 +66,8 @@ extern void RelationCacheInvalidateEntry(Oid relationId, RelFileNode *rnode);
 extern void RelationCacheInvalidate(void);

 extern void AtEOXact_RelationCache(bool isCommit);
-extern void AtEOSubXact_RelationCache(bool isCommit, TransactionId myXid,
-						  TransactionId parentXid);
+extern void AtEOSubXact_RelationCache(bool isCommit, SubTransactionId mySubid,
+									  SubTransactionId parentSubid);

 /*
 * Routines to help manage rebuilding of relcache init file
--- a/src/pl/plpgsql/src/pl_exec.c
+++ b/src/pl/plpgsql/src/pl_exec.c
@ -3,7 +3,7 @@
 *			  procedural language
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/pl/plpgsql/src/pl_exec.c,v 1.119 2004/09/13 20:09:20 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/pl/plpgsql/src/pl_exec.c,v 1.120 2004/09/16 16:58:44 tgl Exp $
 *
 *	  This software is copyrighted by Jan Wieck - Hamburg.
 *
@ -4240,52 +4240,38 @@ exec_set_found(PLpgSQL_execstate *estate, bool state)
 }

 /*
- * plpgsql_eoxact --- post-transaction-commit-or-abort cleanup
+ * plpgsql_xact_cb --- post-transaction-commit-or-abort cleanup
 *
 * If a simple_eval_estate was created in the current transaction,
 * it has to be cleaned up, and we have to mark all active PLpgSQL_expr
 * structs that are using it as no longer active.
+ *
+ * XXX Do we need to do anything at subtransaction events?
+ * Maybe subtransactions need to have their own simple_eval_estate?
+ * It would get a lot messier, so for now let's assume we don't need that.
 */
 void
-plpgsql_xact_cb(XactEvent event, TransactionId parentXid, void *arg)
+plpgsql_xact_cb(XactEvent event, void *arg)
 {
 	PLpgSQL_expr *expr;
 	PLpgSQL_expr *enext;

-	switch (event)
+	/* Mark all active exprs as inactive */
+	for (expr = active_simple_exprs; expr; expr = enext)
 	{
-			/*
-			 * Nothing to do at subtransaction events
-			 *
-			 * XXX really?	Maybe subtransactions need to have their own
-			 * simple_eval_estate?	It would get a lot messier, so for now
-			 * let's assume we don't need that.
-			 */
-		case XACT_EVENT_START_SUB:
-		case XACT_EVENT_ABORT_SUB:
-		case XACT_EVENT_COMMIT_SUB:
-			break;
-
-		case XACT_EVENT_ABORT:
-		case XACT_EVENT_COMMIT:
-			/* Mark all active exprs as inactive */
-			for (expr = active_simple_exprs; expr; expr = enext)
-			{
-				enext = expr->expr_simple_next;
-				expr->expr_simple_state = NULL;
-				expr->expr_simple_next = NULL;
-			}
-			active_simple_exprs = NULL;
-
-			/*
-			 * If we are doing a clean transaction shutdown, free the
-			 * EState (so that any remaining resources will be released
-			 * correctly). In an abort, we expect the regular abort
-			 * recovery procedures to release everything of interest.
-			 */
-			if (event == XACT_EVENT_COMMIT && simple_eval_estate)
-				FreeExecutorState(simple_eval_estate);
-			simple_eval_estate = NULL;
-			break;
+		enext = expr->expr_simple_next;
+		expr->expr_simple_state = NULL;
+		expr->expr_simple_next = NULL;
 	}
+	active_simple_exprs = NULL;
+
+	/*
+	 * If we are doing a clean transaction shutdown, free the
+	 * EState (so that any remaining resources will be released
+	 * correctly). In an abort, we expect the regular abort
+	 * recovery procedures to release everything of interest.
+	 */
+	if (event == XACT_EVENT_COMMIT && simple_eval_estate)
+		FreeExecutorState(simple_eval_estate);
+	simple_eval_estate = NULL;
 }
--- a/src/pl/plpgsql/src/plpgsql.h
+++ b/src/pl/plpgsql/src/plpgsql.h
@ -3,7 +3,7 @@
 *			  procedural language
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/pl/plpgsql/src/plpgsql.h,v 1.55 2004/09/14 23:46:46 neilc Exp $
+ *	  $PostgreSQL: pgsql/src/pl/plpgsql/src/plpgsql.h,v 1.56 2004/09/16 16:58:44 tgl Exp $
 *
 *	  This software is copyrighted by Jan Wieck - Hamburg.
 *
@ -708,7 +708,7 @@ extern Datum plpgsql_exec_function(PLpgSQL_function *func,
 					  FunctionCallInfo fcinfo);
 extern HeapTuple plpgsql_exec_trigger(PLpgSQL_function *func,
 					 TriggerData *trigdata);
-extern void plpgsql_xact_cb(XactEvent event, TransactionId parentXid, void *arg);
+extern void plpgsql_xact_cb(XactEvent event, void *arg);

 /* ----------
 * Functions for the dynamic string handling in pl_funcs.c