@ -47,7 +47,6 @@
# include "storage/predicate.h"
# include "storage/procarray.h"
# include "utils/datum.h"
# include "utils/injection_point.h"
# include "utils/inval.h"
# include "utils/spccache.h"
@ -6023,61 +6022,167 @@ heap_abort_speculative(Relation relation, ItemPointer tid)
}
/*
* heap_inplace_update - update a tuple " in place " ( ie , overwrite it )
*
* Overwriting violates both MVCC and transactional safety , so the uses
* of this function in Postgres are extremely limited . Nonetheless we
* find some places to use it .
*
* The tuple cannot change size , and therefore it ' s reasonable to assume
* that its null bitmap ( if any ) doesn ' t change either . So we just
* overwrite the data portion of the tuple without touching the null
* bitmap or any of the header fields .
*
* tuple is an in - memory tuple structure containing the data to be written
* over the target tuple . Also , tuple - > t_self identifies the target tuple .
*
* Note that the tuple updated here had better not come directly from the
* syscache if the relation has a toast relation as this tuple could
* include toast values that have been expanded , causing a failure here .
* heap_inplace_lock - protect inplace update from concurrent heap_update ( )
*
* Evaluate whether the tuple ' s state is compatible with a no - key update .
* Current transaction rowmarks are fine , as is KEY SHARE from any
* transaction . If compatible , return true with the buffer exclusive - locked ,
* and the caller must release that by calling
* heap_inplace_update_and_unlock ( ) , calling heap_inplace_unlock ( ) , or raising
* an error . Otherwise , return false after blocking transactions , if any ,
* have ended .
*
* Since this is intended for system catalogs and SERIALIZABLE doesn ' t cover
* DDL , this doesn ' t guarantee any particular predicate locking .
*
* One could modify this to return true for tuples with delete in progress ,
* All inplace updaters take a lock that conflicts with DROP . If explicit
* " DELETE FROM pg_class " is in progress , we ' ll wait for it like we would an
* update .
*
* Readers of inplace - updated fields expect changes to those fields are
* durable . For example , vac_truncate_clog ( ) reads datfrozenxid from
* pg_database tuples via catalog snapshots . A future snapshot must not
* return a lower datfrozenxid for the same database OID ( lower in the
* FullTransactionIdPrecedes ( ) sense ) . We achieve that since no update of a
* tuple can start while we hold a lock on its buffer . In cases like
* BEGIN ; GRANT ; CREATE INDEX ; COMMIT we ' re inplace - updating a tuple visible only
* to this transaction . ROLLBACK then is one case where it ' s okay to lose
* inplace updates . ( Restoring relhasindex = false on ROLLBACK is fine , since
* any concurrent CREATE INDEX would have blocked , then inplace - updated the
* committed tuple . )
*
* In principle , we could avoid waiting by overwriting every tuple in the
* updated tuple chain . Reader expectations permit updating a tuple only if
* it ' s aborted , is the tail of the chain , or we already updated the tuple
* referenced in its t_ctid . Hence , we would need to overwrite the tuples in
* order from tail to head . That would imply either ( a ) mutating all tuples
* in one critical section or ( b ) accepting a chance of partial completion .
* Partial completion of a relfrozenxid update would have the weird
* consequence that the table ' s next VACUUM could see the table ' s relfrozenxid
* move forward between vacuum_get_cutoffs ( ) and finishing .
*/
void
heap_inplace_update ( Relation relation , HeapTuple tuple )
bool
heap_inplace_lock ( Relation relation ,
HeapTuple oldtup_ptr , Buffer buffer )
{
Buffer buffer ;
Page page ;
OffsetNumber offnum ;
ItemId lp = NULL ;
HeapTupleHeader htup ;
uint32 oldlen ;
uint32 newlen ;
HeapTupleData oldtup = * oldtup_ptr ; /* minimize diff vs. heap_update() */
TM_Result result ;
bool ret ;
/*
* For now , we don ' t allow parallel updates . Unlike a regular update ,
* this should never create a combo CID , so it might be possible to relax
* this restriction , but not without more thought and testing . It ' s not
* clear that it would be useful , anyway .
Assert ( BufferIsValid ( buffer ) ) ;
LockBuffer ( buffer , BUFFER_LOCK_EXCLUSIVE ) ;
/*----------
* Interpret HeapTupleSatisfiesUpdate ( ) like heap_update ( ) does , except :
*
* - wait unconditionally
* - no tuple locks
* - don ' t recheck header after wait : simpler to defer to next iteration
* - don ' t try to continue even if the updater aborts : likewise
* - no crosscheck
*/
if ( IsInParallelMode ( ) )
result = HeapTupleSatisfiesUpdate ( & oldtup , GetCurrentCommandId ( false ) ,
buffer ) ;
if ( result = = TM_Invisible )
{
/* no known way this can happen */
ereport ( ERROR ,
( errcode ( ERRCODE_INVALID_TRANSACTION_STATE ) ,
errmsg ( " cannot update tuples during a parallel operation " ) ) ) ;
( errcode ( ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE ) ,
errmsg_internal ( " attempted to overwrite invisible tuple " ) ) ) ;
}
else if ( result = = TM_SelfModified )
{
/*
* CREATE INDEX might reach this if an expression is silly enough to
* call e . g . SELECT . . . FROM pg_class FOR SHARE . C code of other SQL
* statements might get here after a heap_update ( ) of the same row , in
* the absence of an intervening CommandCounterIncrement ( ) .
*/
ereport ( ERROR ,
( errcode ( ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE ) ,
errmsg ( " tuple to be updated was already modified by an operation triggered by the current command " ) ) ) ;
}
else if ( result = = TM_BeingModified )
{
TransactionId xwait ;
uint16 infomask ;
INJECTION_POINT ( " inplace-before-pin " ) ;
buffer = ReadBuffer ( relation , ItemPointerGetBlockNumber ( & ( tuple - > t_self ) ) ) ;
LockBuffer ( buffer , BUFFER_LOCK_EXCLUSIVE ) ;
page = ( Page ) BufferGetPage ( buffer ) ;
xwait = HeapTupleHeaderGetRawXmax ( oldtup . t_data ) ;
infomask = oldtup . t_data - > t_infomask ;
offnum = ItemPointerGetOffsetNumber ( & ( tuple - > t_self ) ) ;
if ( PageGetMaxOffsetNumber ( page ) > = offnum )
lp = PageGetItemId ( page , offnum ) ;
if ( infomask & HEAP_XMAX_IS_MULTI )
{
LockTupleMode lockmode = LockTupleNoKeyExclusive ;
MultiXactStatus mxact_status = MultiXactStatusNoKeyUpdate ;
int remain ;
bool current_is_member ;
if ( PageGetMaxOffsetNumber ( page ) < offnum | | ! ItemIdIsNormal ( lp ) )
elog ( ERROR , " invalid lp " ) ;
if ( DoesMultiXactIdConflict ( ( MultiXactId ) xwait , infomask ,
lockmode , & current_is_member ) )
{
LockBuffer ( buffer , BUFFER_LOCK_UNLOCK ) ;
ret = false ;
MultiXactIdWait ( ( MultiXactId ) xwait , mxact_status , infomask ,
relation , & oldtup . t_self , XLTW_Update ,
& remain ) ;
}
else
ret = true ;
}
else if ( TransactionIdIsCurrentTransactionId ( xwait ) )
ret = true ;
else if ( HEAP_XMAX_IS_KEYSHR_LOCKED ( infomask ) )
ret = true ;
else
{
LockBuffer ( buffer , BUFFER_LOCK_UNLOCK ) ;
ret = false ;
XactLockTableWait ( xwait , relation , & oldtup . t_self ,
XLTW_Update ) ;
}
}
else
{
ret = ( result = = TM_Ok ) ;
if ( ! ret )
{
LockBuffer ( buffer , BUFFER_LOCK_UNLOCK ) ;
}
}
htup = ( HeapTupleHeader ) PageGetItem ( page , lp ) ;
/*
* GetCatalogSnapshot ( ) relies on invalidation messages to know when to
* take a new snapshot . COMMIT of xwait is responsible for sending the
* invalidation . We ' re not acquiring heavyweight locks sufficient to
* block if not yet sent , so we must take a new snapshot to ensure a later
* attempt has a fair chance . While we don ' t need this if xwait aborted ,
* don ' t bother optimizing that .
*/
if ( ! ret )
InvalidateCatalogSnapshot ( ) ;
return ret ;
}
/*
* heap_inplace_update_and_unlock - core of systable_inplace_update_finish
*
* The tuple cannot change size , and therefore its header fields and null
* bitmap ( if any ) don ' t change either .
*/
void
heap_inplace_update_and_unlock ( Relation relation ,
HeapTuple oldtup , HeapTuple tuple ,
Buffer buffer )
{
HeapTupleHeader htup = oldtup - > t_data ;
uint32 oldlen ;
uint32 newlen ;
oldlen = ItemIdGetLength ( lp ) - htup - > t_hoff ;
Assert ( ItemPointerEquals ( & oldtup - > t_self , & tuple - > t_self ) ) ;
oldlen = oldtup - > t_len - htup - > t_hoff ;
newlen = tuple - > t_len - tuple - > t_data - > t_hoff ;
if ( oldlen ! = newlen | | htup - > t_hoff ! = tuple - > t_data - > t_hoff )
elog ( ERROR , " wrong tuple length " ) ;
@ -6089,6 +6194,19 @@ heap_inplace_update(Relation relation, HeapTuple tuple)
( char * ) tuple - > t_data + tuple - > t_data - > t_hoff ,
newlen ) ;
/*----------
* XXX A crash here can allow datfrozenxid ( ) to get ahead of relfrozenxid :
*
* [ " D " is a VACUUM ( ONLY_DATABASE_STATS ) ]
* [ " R " is a VACUUM tbl ]
* D : vac_update_datfrozenid ( ) - > systable_beginscan ( pg_class )
* D : systable_getnext ( ) returns pg_class tuple of tbl
* R : memcpy ( ) into pg_class tuple of tbl
* D : raise pg_database . datfrozenxid , XLogInsert ( ) , finish
* [ crash ]
* [ recovery restores datfrozenxid w / o relfrozenxid ]
*/
MarkBufferDirty ( buffer ) ;
/* XLOG stuff */
@ -6109,23 +6227,35 @@ heap_inplace_update(Relation relation, HeapTuple tuple)
recptr = XLogInsert ( RM_HEAP_ID , XLOG_HEAP_INPLACE ) ;
PageSetLSN ( page , recptr ) ;
PageSetLSN ( BufferGetPage ( buffer ) , recptr ) ;
}
END_CRIT_SECTION ( ) ;
UnlockReleaseBuffer ( buffer ) ;
heap_inplace_unlock ( relation , oldtup , buffer ) ;
/*
* Send out shared cache inval if necessary . Note that because we only
* pass the new version of the tuple , this mustn ' t be used for any
* operations that could change catcache lookup keys . But we aren ' t
* bothering with index updates either , so that ' s true a fortiori .
*
* XXX ROLLBACK discards the invalidation . See test inplace - inval . spec .
*/
if ( ! IsBootstrapProcessingMode ( ) )
CacheInvalidateHeapTuple ( relation , tuple , NULL ) ;
}
/*
* heap_inplace_unlock - reverse of heap_inplace_lock
*/
void
heap_inplace_unlock ( Relation relation ,
HeapTuple oldtup , Buffer buffer )
{
LockBuffer ( buffer , BUFFER_LOCK_UNLOCK ) ;
}
# define FRM_NOOP 0x0001
# define FRM_INVALIDATE_XMAX 0x0002
# define FRM_RETURN_IS_XID 0x0004