@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $ PostgreSQL : pgsql / src / backend / catalog / index . c , v 1.272 2006 / 07 / 31 20 : 09 : 00 tgl Exp $
* $ PostgreSQL : pgsql / src / backend / catalog / index . c , v 1.273 2006 / 08 / 25 04 : 06 : 46 tgl Exp $
*
*
* INTERFACE ROUTINES
@ -34,6 +34,7 @@
# include "catalog/index.h"
# include "catalog/indexing.h"
# include "catalog/pg_constraint.h"
# include "catalog/pg_operator.h"
# include "catalog/pg_opclass.h"
# include "catalog/pg_type.h"
# include "executor/executor.h"
@ -49,8 +50,17 @@
# include "utils/memutils.h"
# include "utils/relcache.h"
# include "utils/syscache.h"
# include "utils/tuplesort.h"
/* state info for validate_index bulkdelete callback */
typedef struct
{
Tuplesortstate * tuplesort ; /* for sorting the index TIDs */
/* statistics (for debug purposes only): */
double htups , itups , tups_inserted ;
} v_i_state ;
/* non-export function prototypes */
static TupleDesc ConstructTupleDescriptor ( Relation heapRelation ,
IndexInfo * indexInfo ,
@ -61,9 +71,16 @@ static void AppendAttributeTuples(Relation indexRelation, int numatts);
static void UpdateIndexRelation ( Oid indexoid , Oid heapoid ,
IndexInfo * indexInfo ,
Oid * classOids ,
bool primary ) ;
bool primary ,
bool isvalid ) ;
static void index_update_stats ( Relation rel , bool hasindex , bool isprimary ,
Oid reltoastidxid , double reltuples ) ;
static bool validate_index_callback ( ItemPointer itemptr , void * opaque ) ;
static void validate_index_heapscan ( Relation heapRelation ,
Relation indexRelation ,
IndexInfo * indexInfo ,
Snapshot snapshot ,
v_i_state * state ) ;
static Oid IndexGetRelation ( Oid indexId ) ;
@ -308,6 +325,8 @@ AppendAttributeTuples(Relation indexRelation, int numatts)
/* ----------------------------------------------------------------
* UpdateIndexRelation
*
* Construct and insert a new entry in the pg_index catalog
* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
static void
@ -315,7 +334,8 @@ UpdateIndexRelation(Oid indexoid,
Oid heapoid ,
IndexInfo * indexInfo ,
Oid * classOids ,
bool primary )
bool primary ,
bool isvalid )
{
int2vector * indkey ;
oidvector * indclass ;
@ -383,6 +403,7 @@ UpdateIndexRelation(Oid indexoid,
values [ Anum_pg_index_indisunique - 1 ] = BoolGetDatum ( indexInfo - > ii_Unique ) ;
values [ Anum_pg_index_indisprimary - 1 ] = BoolGetDatum ( primary ) ;
values [ Anum_pg_index_indisclustered - 1 ] = BoolGetDatum ( false ) ;
values [ Anum_pg_index_indisvalid - 1 ] = BoolGetDatum ( isvalid ) ;
values [ Anum_pg_index_indkey - 1 ] = PointerGetDatum ( indkey ) ;
values [ Anum_pg_index_indclass - 1 ] = PointerGetDatum ( indclass ) ;
values [ Anum_pg_index_indexprs - 1 ] = exprsDatum ;
@ -427,7 +448,10 @@ UpdateIndexRelation(Oid indexoid,
* isconstraint : index is owned by a PRIMARY KEY or UNIQUE constraint
* allow_system_table_mods : allow table to be a system catalog
* skip_build : true to skip the index_build ( ) step for the moment ; caller
* must do it later ( typically via reindex_index ( ) )
* must do it later ( typically via reindex_index ( ) )
* concurrent : if true , do not lock the table against writers . The index
* will be marked " invalid " and the caller must take additional steps
* to fix it up .
*
* Returns OID of the created index .
*/
@ -443,7 +467,8 @@ index_create(Oid heapRelationId,
bool isprimary ,
bool isconstraint ,
bool allow_system_table_mods ,
bool skip_build )
bool skip_build ,
bool concurrent )
{
Relation pg_class ;
Relation heapRelation ;
@ -456,9 +481,12 @@ index_create(Oid heapRelationId,
pg_class = heap_open ( RelationRelationId , RowExclusiveLock ) ;
/*
* Only SELECT . . . FOR UPDATE / SHARE are allowed while doing this
* Only SELECT . . . FOR UPDATE / SHARE are allowed while doing a standard
* index build ; but for concurrent builds we allow INSERT / UPDATE / DELETE
* ( but not VACUUM ) .
*/
heapRelation = heap_open ( heapRelationId , ShareLock ) ;
heapRelation = heap_open ( heapRelationId ,
( concurrent ? ShareUpdateExclusiveLock : ShareLock ) ) ;
/*
* The index will be in the same namespace as its parent table , and is
@ -480,6 +508,16 @@ index_create(Oid heapRelationId,
( errcode ( ERRCODE_FEATURE_NOT_SUPPORTED ) ,
errmsg ( " user-defined indexes on system catalog tables are not supported " ) ) ) ;
/*
* concurrent index build on a system catalog is unsafe because we tend
* to release locks before committing in catalogs
*/
if ( concurrent & &
IsSystemRelation ( heapRelation ) )
ereport ( ERROR ,
( errcode ( ERRCODE_FEATURE_NOT_SUPPORTED ) ,
errmsg ( " concurrent index creation on system catalog tables is not supported " ) ) ) ;
/*
* We cannot allow indexing a shared relation after initdb ( because
* there ' s no way to make the entry in other databases ' pg_class ) .
@ -578,7 +616,7 @@ index_create(Oid heapRelationId,
* - - - - - - - - - - - - - - - -
*/
UpdateIndexRelation ( indexRelationId , heapRelationId , indexInfo ,
classObjectId , isprimary ) ;
classObjectId , isprimary , ! concurrent ) ;
/*
* Register constraint and dependencies for the index .
@ -745,9 +783,8 @@ index_create(Oid heapRelationId,
}
/*
* Close the heap and index ; but we keep the ShareLock on the heap and
* the exclusive lock on the index that we acquired above , until end of
* transaction .
* Close the heap and index ; but we keep the locks that we acquired above
* until end of transaction .
*/
index_close ( indexRelation , NoLock ) ;
heap_close ( heapRelation , NoLock ) ;
@ -895,6 +932,7 @@ BuildIndexInfo(Relation index)
/* other info */
ii - > ii_Unique = indexStruct - > indisunique ;
ii - > ii_Concurrent = false ; /* assume normal case */
return ii ;
}
@ -1327,13 +1365,22 @@ IndexBuildHeapScan(Relation heapRelation,
estate ) ;
/*
* Ok , begin our scan of the base relation . We use SnapshotAny because we
* must retrieve all tuples and do our own time qual checks .
* Prepare for scan of the base relation . In a normal index build ,
* we use SnapshotAny because we must retrieve all tuples and do our own
* time qual checks ( because we have to index RECENTLY_DEAD tuples ) .
* In a concurrent build , we take a regular MVCC snapshot and index
* whatever ' s live according to that . During bootstrap we just use
* SnapshotNow .
*/
if ( IsBootstrapProcessingMode ( ) )
{
snapshot = SnapshotNow ;
OldestXmin = InvalidTransactionId ;
OldestXmin = InvalidTransactionId ; /* not used */
}
else if ( indexInfo - > ii_Concurrent )
{
snapshot = CopySnapshot ( GetTransactionSnapshot ( ) ) ;
OldestXmin = InvalidTransactionId ; /* not used */
}
else
{
@ -1344,8 +1391,8 @@ IndexBuildHeapScan(Relation heapRelation,
scan = heap_beginscan ( heapRelation , /* relation */
snapshot , /* seeself */
0 , /* number of keys */
NULL ) ; /* scan key */
0 , /* number of keys */
NULL ) ; /* scan key */
reltuples = 0 ;
@ -1374,10 +1421,12 @@ IndexBuildHeapScan(Relation heapRelation,
scan - > rs_cbuf ) )
{
case HEAPTUPLE_DEAD :
/* Definitely dead, we can ignore it */
indexIt = false ;
tupleIsAlive = false ;
break ;
case HEAPTUPLE_LIVE :
/* Normal case, index and unique-check it */
indexIt = true ;
tupleIsAlive = true ;
break ;
@ -1388,6 +1437,7 @@ IndexBuildHeapScan(Relation heapRelation,
* anyway to preserve MVCC semantics . ( Pre - existing
* transactions could try to use the index after we
* finish building it , and may need to see such tuples . )
* Exclude it from unique - checking , however .
*/
indexIt = true ;
tupleIsAlive = false ;
@ -1499,6 +1549,309 @@ IndexBuildHeapScan(Relation heapRelation,
}
/*
* validate_index - support code for concurrent index builds
*
* We do a concurrent index build by first building the index normally via
* index_create ( ) , while holding a weak lock that allows concurrent
* insert / update / delete . Also , we index only tuples that are valid
* as of the start of the scan ( see IndexBuildHeapScan ) , whereas a normal
* build takes care to include recently - dead tuples . This is OK because
* we won ' t mark the index valid until all transactions that might be able
* to see those tuples are gone . The reason for doing that is to avoid
* bogus unique - index failures due to concurrent UPDATEs ( we might see
* different versions of the same row as being valid when we pass over them ,
* if we used HeapTupleSatisfiesVacuum ) . This leaves us with an index that
* does not contain any tuples added to the table while we built the index .
*
* Next , we commit the transaction so that the index becomes visible to other
* backends , but it is marked not " indisvalid " to prevent the planner from
* relying on it for indexscans . Then we wait for all transactions that
* could have been modifying the table to terminate . At this point we
* know that any subsequently - started transactions will see the index and
* insert their new tuples into it . We then take a new reference snapshot
* which is passed to validate_index ( ) . Any tuples that are valid according
* to this snap , but are not in the index , must be added to the index .
* ( Any tuples committed live after the snap will be inserted into the
* index by their originating transaction . Any tuples committed dead before
* the snap need not be indexed , because we will wait out all transactions
* that might care about them before we mark the index valid . )
*
* validate_index ( ) works by first gathering all the TIDs currently in the
* index , using a bulkdelete callback that just stores the TIDs and doesn ' t
* ever say " delete it " . ( This should be faster than a plain indexscan ;
* also , not all index AMs support full - index indexscan . ) Then we sort the
* TIDs , and finally scan the table doing a " merge join " against the TID list
* to see which tuples are missing from the index . Thus we will ensure that
* all tuples valid according to the reference snapshot are in the index .
*
* Building a unique index this way is tricky : we might try to insert a
* tuple that is already dead or is in process of being deleted , and we
* mustn ' t have a uniqueness failure against an updated version of the same
* row . We can check the tuple to see if it ' s already dead and tell
* index_insert ( ) not to do the uniqueness check , but that still leaves us
* with a race condition against an in - progress update . To handle that ,
* we expect the index AM to recheck liveness of the to - be - inserted tuple
* before it declares a uniqueness error .
*
* After completing validate_index ( ) , we wait until all transactions that
* were alive at the time of the reference snapshot are gone ; this is
* necessary to be sure there are none left with a serializable snapshot
* older than the reference ( and hence possibly able to see tuples we did
* not index ) . Then we mark the index valid and commit .
*
* Doing two full table scans is a brute - force strategy . We could try to be
* cleverer , eg storing new tuples in a special area of the table ( perhaps
* making the table append - only by setting use_fsm ) . However that would
* add yet more locking issues .
*/
void
validate_index ( Oid heapId , Oid indexId , Snapshot snapshot )
{
Relation heapRelation , indexRelation ;
IndexInfo * indexInfo ;
IndexVacuumInfo ivinfo ;
v_i_state state ;
/* Open and lock the parent heap relation */
heapRelation = heap_open ( heapId , ShareUpdateExclusiveLock ) ;
/* And the target index relation */
indexRelation = index_open ( indexId , RowExclusiveLock ) ;
/*
* Fetch info needed for index_insert . ( You might think this should
* be passed in from DefineIndex , but its copy is long gone due to
* having been built in a previous transaction . )
*/
indexInfo = BuildIndexInfo ( indexRelation ) ;
/* mark build is concurrent just for consistency */
indexInfo - > ii_Concurrent = true ;
/*
* Scan the index and gather up all the TIDs into a tuplesort object .
*/
ivinfo . index = indexRelation ;
ivinfo . vacuum_full = false ;
ivinfo . message_level = DEBUG2 ;
ivinfo . num_heap_tuples = - 1 ;
state . tuplesort = tuplesort_begin_datum ( TIDOID ,
TIDLessOperator ,
maintenance_work_mem ,
false ) ;
state . htups = state . itups = state . tups_inserted = 0 ;
( void ) index_bulk_delete ( & ivinfo , NULL ,
validate_index_callback , ( void * ) & state ) ;
/* Execute the sort */
tuplesort_performsort ( state . tuplesort ) ;
/*
* Now scan the heap and " merge " it with the index
*/
validate_index_heapscan ( heapRelation ,
indexRelation ,
indexInfo ,
snapshot ,
& state ) ;
/* Done with tuplesort object */
tuplesort_end ( state . tuplesort ) ;
elog ( DEBUG2 ,
" validate_index found %.0f heap tuples, %.0f index tuples; inserted %.0f missing tuples " ,
state . htups , state . itups , state . tups_inserted ) ;
/* Close rels, but keep locks */
index_close ( indexRelation , NoLock ) ;
heap_close ( heapRelation , NoLock ) ;
}
/*
* validate_index_callback - bulkdelete callback to collect the index TIDs
*/
static bool
validate_index_callback ( ItemPointer itemptr , void * opaque )
{
v_i_state * state = ( v_i_state * ) opaque ;
tuplesort_putdatum ( state - > tuplesort , PointerGetDatum ( itemptr ) , false ) ;
state - > itups + = 1 ;
return false ; /* never actually delete anything */
}
/*
* validate_index_heapscan - second table scan for concurrent index build
*
* This has much code in common with IndexBuildHeapScan , but it ' s enough
* different that it seems cleaner to have two routines not one .
*/
static void
validate_index_heapscan ( Relation heapRelation ,
Relation indexRelation ,
IndexInfo * indexInfo ,
Snapshot snapshot ,
v_i_state * state )
{
HeapScanDesc scan ;
HeapTuple heapTuple ;
Datum values [ INDEX_MAX_KEYS ] ;
bool isnull [ INDEX_MAX_KEYS ] ;
List * predicate ;
TupleTableSlot * slot ;
EState * estate ;
ExprContext * econtext ;
/* state variables for the merge */
ItemPointer indexcursor = NULL ;
bool tuplesort_empty = false ;
/*
* sanity checks
*/
Assert ( OidIsValid ( indexRelation - > rd_rel - > relam ) ) ;
/*
* Need an EState for evaluation of index expressions and partial - index
* predicates . Also a slot to hold the current tuple .
*/
estate = CreateExecutorState ( ) ;
econtext = GetPerTupleExprContext ( estate ) ;
slot = MakeSingleTupleTableSlot ( RelationGetDescr ( heapRelation ) ) ;
/* Arrange for econtext's scan tuple to be the tuple under test */
econtext - > ecxt_scantuple = slot ;
/* Set up execution state for predicate, if any. */
predicate = ( List * )
ExecPrepareExpr ( ( Expr * ) indexInfo - > ii_Predicate ,
estate ) ;
/*
* Prepare for scan of the base relation . We need just those tuples
* satisfying the passed - in reference snapshot .
*/
scan = heap_beginscan ( heapRelation , /* relation */
snapshot , /* seeself */
0 , /* number of keys */
NULL ) ; /* scan key */
/*
* Scan all tuples matching the snapshot .
*/
while ( ( heapTuple = heap_getnext ( scan , ForwardScanDirection ) ) ! = NULL )
{
ItemPointer heapcursor = & heapTuple - > t_self ;
CHECK_FOR_INTERRUPTS ( ) ;
state - > htups + = 1 ;
/*
* " merge " by skipping through the index tuples until we find or
* pass the current heap tuple .
*/
while ( ! tuplesort_empty & &
( ! indexcursor | |
ItemPointerCompare ( indexcursor , heapcursor ) < 0 ) )
{
Datum ts_val ;
bool ts_isnull ;
if ( indexcursor )
pfree ( indexcursor ) ;
tuplesort_empty = ! tuplesort_getdatum ( state - > tuplesort , true ,
& ts_val , & ts_isnull ) ;
Assert ( tuplesort_empty | | ! ts_isnull ) ;
indexcursor = ( ItemPointer ) DatumGetPointer ( ts_val ) ;
}
if ( tuplesort_empty | |
ItemPointerCompare ( indexcursor , heapcursor ) > 0 )
{
/*
* We ' ve overshot which means this heap tuple is missing from the
* index , so insert it .
*/
bool check_unique ;
MemoryContextReset ( econtext - > ecxt_per_tuple_memory ) ;
/* Set up for predicate or expression evaluation */
ExecStoreTuple ( heapTuple , slot , InvalidBuffer , false ) ;
/*
* In a partial index , discard tuples that don ' t satisfy the
* predicate .
*/
if ( predicate ! = NIL )
{
if ( ! ExecQual ( predicate , econtext , false ) )
continue ;
}
/*
* For the current heap tuple , extract all the attributes we use
* in this index , and note which are null . This also performs
* evaluation of any expressions needed .
*/
FormIndexDatum ( indexInfo ,
slot ,
estate ,
values ,
isnull ) ;
/*
* If the tuple is already committed dead , we still have to
* put it in the index ( because some xacts might be able to
* see it ) , but we might as well suppress uniqueness checking .
* This is just an optimization because the index AM is not
* supposed to raise a uniqueness failure anyway .
*/
if ( indexInfo - > ii_Unique )
{
/* must hold a buffer lock to call HeapTupleSatisfiesNow */
LockBuffer ( scan - > rs_cbuf , BUFFER_LOCK_SHARE ) ;
if ( HeapTupleSatisfiesNow ( heapTuple - > t_data , scan - > rs_cbuf ) )
check_unique = true ;
else
check_unique = false ;
LockBuffer ( scan - > rs_cbuf , BUFFER_LOCK_UNLOCK ) ;
}
else
check_unique = false ;
/*
* You ' d think we should go ahead and build the index tuple here ,
* but some index AMs want to do further processing on the data
* first . So pass the values [ ] and isnull [ ] arrays , instead .
*/
index_insert ( indexRelation ,
values ,
isnull ,
heapcursor ,
heapRelation ,
check_unique ) ;
state - > tups_inserted + = 1 ;
}
}
heap_endscan ( scan ) ;
ExecDropSingleTupleTableSlot ( slot ) ;
FreeExecutorState ( estate ) ;
/* These may have been pointing to the now-gone estate */
indexInfo - > ii_ExpressionsState = NIL ;
indexInfo - > ii_PredicateState = NIL ;
}
/*
* IndexGetRelation : given an index ' s relation OID , get the OID of the
* relation it is an index on . Uses the system cache .
@ -1530,9 +1883,12 @@ void
reindex_index ( Oid indexId )
{
Relation iRel ,
heapRelation ;
heapRelation ,
pg_index ;
Oid heapId ;
bool inplace ;
HeapTuple indexTuple ;
Form_pg_index indexForm ;
/*
* Open and lock the parent heap relation . ShareLock is sufficient since
@ -1600,6 +1956,28 @@ reindex_index(Oid indexId)
PG_END_TRY ( ) ;
ResetReindexProcessing ( ) ;
/*
* If the index is marked invalid ( ie , it ' s from a failed CREATE INDEX
* CONCURRENTLY ) , we can now mark it valid . This allows REINDEX to be
* used to clean up in such cases .
*/
pg_index = heap_open ( IndexRelationId , RowExclusiveLock ) ;
indexTuple = SearchSysCacheCopy ( INDEXRELID ,
ObjectIdGetDatum ( indexId ) ,
0 , 0 , 0 ) ;
if ( ! HeapTupleIsValid ( indexTuple ) )
elog ( ERROR , " cache lookup failed for index %u " , indexId ) ;
indexForm = ( Form_pg_index ) GETSTRUCT ( indexTuple ) ;
if ( ! indexForm - > indisvalid )
{
indexForm - > indisvalid = true ;
simple_heap_update ( pg_index , & indexTuple - > t_self , indexTuple ) ;
CatalogUpdateIndexes ( pg_index , indexTuple ) ;
}
heap_close ( pg_index , RowExclusiveLock ) ;
/* Close rels, but keep locks */
index_close ( iRel , NoLock ) ;
heap_close ( heapRelation , NoLock ) ;