@ -8,7 +8,7 @@
*
*
*
*
* IDENTIFICATION
* IDENTIFICATION
* $ PostgreSQL : pgsql / src / backend / access / hash / hashpage . c , v 1.52 .2 .2 2006 / 11 / 19 21 : 33 : 29 tgl Exp $
* $ PostgreSQL : pgsql / src / backend / access / hash / hashpage . c , v 1.52 .2 .3 2007 / 04 / 19 20 : 24 : 18 tgl Exp $
*
*
* NOTES
* NOTES
* Postgres hash pages look like ordinary relation pages . The opaque
* Postgres hash pages look like ordinary relation pages . The opaque
@ -36,7 +36,8 @@
# include "utils/lsyscache.h"
# include "utils/lsyscache.h"
static BlockNumber _hash_alloc_buckets ( Relation rel , uint32 nblocks ) ;
static bool _hash_alloc_buckets ( Relation rel , BlockNumber firstblock ,
uint32 nblocks ) ;
static void _hash_splitbucket ( Relation rel , Buffer metabuf ,
static void _hash_splitbucket ( Relation rel , Buffer metabuf ,
Bucket obucket , Bucket nbucket ,
Bucket obucket , Bucket nbucket ,
BlockNumber start_oblkno ,
BlockNumber start_oblkno ,
@ -104,14 +105,18 @@ _hash_droplock(Relation rel, BlockNumber whichlock, int access)
* requested buffer and its reference count has been incremented
* requested buffer and its reference count has been incremented
* ( ie , the buffer is " locked and pinned " ) .
* ( ie , the buffer is " locked and pinned " ) .
*
*
* blkno = = P_NEW is allowed , but it is caller ' s responsibility to
* P_NEW is disallowed because this routine should only be used
* ensure that only one process can extend the index at a time .
* to access pages that are known to be before the filesystem EOF .
* Extending the index should be done with _hash_getnewbuf .
*/
*/
Buffer
Buffer
_hash_getbuf ( Relation rel , BlockNumber blkno , int access )
_hash_getbuf ( Relation rel , BlockNumber blkno , int access )
{
{
Buffer buf ;
Buffer buf ;
if ( blkno = = P_NEW )
elog ( ERROR , " hash AM does not use P_NEW " ) ;
buf = ReadBuffer ( rel , blkno ) ;
buf = ReadBuffer ( rel , blkno ) ;
if ( access ! = HASH_NOLOCK )
if ( access ! = HASH_NOLOCK )
@ -121,6 +126,51 @@ _hash_getbuf(Relation rel, BlockNumber blkno, int access)
return buf ;
return buf ;
}
}
/*
* _hash_getnewbuf ( ) - - Get a new page at the end of the index .
*
* This has the same API as _hash_getbuf , except that we are adding
* a page to the index , and hence expect the page to be past the
* logical EOF . ( However , we have to support the case where it isn ' t ,
* since a prior try might have crashed after extending the filesystem
* EOF but before updating the metapage to reflect the added page . )
*
* It is caller ' s responsibility to ensure that only one process can
* extend the index at a time .
*
* All call sites should call _hash_pageinit on the returned page .
* Also , it ' s difficult to imagine why access would not be HASH_WRITE .
*/
Buffer
_hash_getnewbuf ( Relation rel , BlockNumber blkno , int access )
{
BlockNumber nblocks = RelationGetNumberOfBlocks ( rel ) ;
Buffer buf ;
if ( blkno = = P_NEW )
elog ( ERROR , " hash AM does not use P_NEW " ) ;
if ( blkno > nblocks )
elog ( ERROR , " access to noncontiguous page in hash index \" %s \" " ,
RelationGetRelationName ( rel ) ) ;
/* smgr insists we use P_NEW to extend the relation */
if ( blkno = = nblocks )
{
buf = ReadBuffer ( rel , P_NEW ) ;
if ( BufferGetBlockNumber ( buf ) ! = blkno )
elog ( ERROR , " unexpected hash relation size: %u, should be %u " ,
BufferGetBlockNumber ( buf ) , blkno ) ;
}
else
buf = ReadBuffer ( rel , blkno ) ;
if ( access ! = HASH_NOLOCK )
LockBuffer ( buf , access ) ;
/* ref count and lock type are correct */
return buf ;
}
/*
/*
* _hash_relbuf ( ) - - release a locked buffer .
* _hash_relbuf ( ) - - release a locked buffer .
*
*
@ -253,12 +303,11 @@ _hash_metapinit(Relation rel)
/*
/*
* We initialize the metapage , the first two bucket pages , and the
* We initialize the metapage , the first two bucket pages , and the
* first bitmap page in sequence , using P_NEW to cause smgrextend ( )
* first bitmap page in sequence , using _hash_getnewbuf to cause
* calls to occur . This ensures that the smgr level has the right
* smgrextend ( ) calls to occur . This ensures that the smgr level
* idea of the physical index length .
* has the right idea of the physical index length .
*/
*/
metabuf = _hash_getbuf ( rel , P_NEW , HASH_WRITE ) ;
metabuf = _hash_getnewbuf ( rel , HASH_METAPAGE , HASH_WRITE ) ;
Assert ( BufferGetBlockNumber ( metabuf ) = = HASH_METAPAGE ) ;
pg = BufferGetPage ( metabuf ) ;
pg = BufferGetPage ( metabuf ) ;
_hash_pageinit ( pg , BufferGetPageSize ( metabuf ) ) ;
_hash_pageinit ( pg , BufferGetPageSize ( metabuf ) ) ;
@ -311,8 +360,7 @@ _hash_metapinit(Relation rel)
*/
*/
for ( i = 0 ; i < = 1 ; i + + )
for ( i = 0 ; i < = 1 ; i + + )
{
{
buf = _hash_getbuf ( rel , P_NEW , HASH_WRITE ) ;
buf = _hash_getnewbuf ( rel , BUCKET_TO_BLKNO ( metap , i ) , HASH_WRITE ) ;
Assert ( BufferGetBlockNumber ( buf ) = = BUCKET_TO_BLKNO ( metap , i ) ) ;
pg = BufferGetPage ( buf ) ;
pg = BufferGetPage ( buf ) ;
_hash_pageinit ( pg , BufferGetPageSize ( buf ) ) ;
_hash_pageinit ( pg , BufferGetPageSize ( buf ) ) ;
pageopaque = ( HashPageOpaque ) PageGetSpecialPointer ( pg ) ;
pageopaque = ( HashPageOpaque ) PageGetSpecialPointer ( pg ) ;
@ -360,7 +408,6 @@ _hash_expandtable(Relation rel, Buffer metabuf)
Bucket old_bucket ;
Bucket old_bucket ;
Bucket new_bucket ;
Bucket new_bucket ;
uint32 spare_ndx ;
uint32 spare_ndx ;
BlockNumber firstblock = InvalidBlockNumber ;
BlockNumber start_oblkno ;
BlockNumber start_oblkno ;
BlockNumber start_nblkno ;
BlockNumber start_nblkno ;
uint32 maxbucket ;
uint32 maxbucket ;
@ -412,39 +459,15 @@ _hash_expandtable(Relation rel, Buffer metabuf)
if ( metap - > hashm_maxbucket > = ( uint32 ) 0x7FFFFFFE )
if ( metap - > hashm_maxbucket > = ( uint32 ) 0x7FFFFFFE )
goto fail ;
goto fail ;
/*
* If the split point is increasing ( hashm_maxbucket ' s log base 2
* increases ) , we need to allocate a new batch of bucket pages .
*/
new_bucket = metap - > hashm_maxbucket + 1 ;
spare_ndx = _hash_log2 ( new_bucket + 1 ) ;
if ( spare_ndx > metap - > hashm_ovflpoint )
{
Assert ( spare_ndx = = metap - > hashm_ovflpoint + 1 ) ;
/*
* The number of buckets in the new splitpoint is equal to the
* total number already in existence , i . e . new_bucket . Currently
* this maps one - to - one to blocks required , but someday we may need
* a more complicated calculation here .
*/
firstblock = _hash_alloc_buckets ( rel , new_bucket ) ;
if ( firstblock = = InvalidBlockNumber )
goto fail ; /* can't split due to BlockNumber overflow */
}
/*
/*
* Determine which bucket is to be split , and attempt to lock the old
* Determine which bucket is to be split , and attempt to lock the old
* bucket . If we can ' t get the lock , give up .
* bucket . If we can ' t get the lock , give up .
*
*
* The lock protects us against other backends , but not against our own
* The lock protects us against other backends , but not against our own
* backend . Must check for active scans separately .
* backend . Must check for active scans separately .
*
* Ideally we would lock the new bucket too before proceeding , but if we
* are about to cross a splitpoint then the BUCKET_TO_BLKNO mapping isn ' t
* correct yet . For simplicity we update the metapage first and then
* lock . This should be okay because no one else should be trying to lock
* the new bucket yet . . .
*/
*/
new_bucket = metap - > hashm_maxbucket + 1 ;
old_bucket = ( new_bucket & metap - > hashm_lowmask ) ;
old_bucket = ( new_bucket & metap - > hashm_lowmask ) ;
start_oblkno = BUCKET_TO_BLKNO ( metap , old_bucket ) ;
start_oblkno = BUCKET_TO_BLKNO ( metap , old_bucket ) ;
@ -455,6 +478,45 @@ _hash_expandtable(Relation rel, Buffer metabuf)
if ( ! _hash_try_getlock ( rel , start_oblkno , HASH_EXCLUSIVE ) )
if ( ! _hash_try_getlock ( rel , start_oblkno , HASH_EXCLUSIVE ) )
goto fail ;
goto fail ;
/*
* Likewise lock the new bucket ( should never fail ) .
*
* Note : it is safe to compute the new bucket ' s blkno here , even though
* we may still need to update the BUCKET_TO_BLKNO mapping . This is
* because the current value of hashm_spares [ hashm_ovflpoint ] correctly
* shows where we are going to put a new splitpoint ' s worth of buckets .
*/
start_nblkno = BUCKET_TO_BLKNO ( metap , new_bucket ) ;
if ( _hash_has_active_scan ( rel , new_bucket ) )
elog ( ERROR , " scan in progress on supposedly new bucket " ) ;
if ( ! _hash_try_getlock ( rel , start_nblkno , HASH_EXCLUSIVE ) )
elog ( ERROR , " could not get lock on supposedly new bucket " ) ;
/*
* If the split point is increasing ( hashm_maxbucket ' s log base 2
* increases ) , we need to allocate a new batch of bucket pages .
*/
spare_ndx = _hash_log2 ( new_bucket + 1 ) ;
if ( spare_ndx > metap - > hashm_ovflpoint )
{
Assert ( spare_ndx = = metap - > hashm_ovflpoint + 1 ) ;
/*
* The number of buckets in the new splitpoint is equal to the
* total number already in existence , i . e . new_bucket . Currently
* this maps one - to - one to blocks required , but someday we may need
* a more complicated calculation here .
*/
if ( ! _hash_alloc_buckets ( rel , start_nblkno , new_bucket ) )
{
/* can't split due to BlockNumber overflow */
_hash_droplock ( rel , start_oblkno , HASH_EXCLUSIVE ) ;
_hash_droplock ( rel , start_nblkno , HASH_EXCLUSIVE ) ;
goto fail ;
}
}
/*
/*
* Okay to proceed with split . Update the metapage bucket mapping info .
* Okay to proceed with split . Update the metapage bucket mapping info .
*
*
@ -487,20 +549,6 @@ _hash_expandtable(Relation rel, Buffer metabuf)
metap - > hashm_ovflpoint = spare_ndx ;
metap - > hashm_ovflpoint = spare_ndx ;
}
}
/* now we can compute the new bucket's primary block number */
start_nblkno = BUCKET_TO_BLKNO ( metap , new_bucket ) ;
/* if we added a splitpoint, should match result of _hash_alloc_buckets */
if ( firstblock ! = InvalidBlockNumber & &
firstblock ! = start_nblkno )
elog ( PANIC , " unexpected hash relation size: %u, should be %u " ,
firstblock , start_nblkno ) ;
Assert ( ! _hash_has_active_scan ( rel , new_bucket ) ) ;
if ( ! _hash_try_getlock ( rel , start_nblkno , HASH_EXCLUSIVE ) )
elog ( PANIC , " could not get lock on supposedly new bucket " ) ;
/* Done mucking with metapage */
/* Done mucking with metapage */
END_CRIT_SECTION ( ) ;
END_CRIT_SECTION ( ) ;
@ -566,23 +614,16 @@ fail:
* for the purpose . OTOH , adding a splitpoint is a very infrequent operation ,
* for the purpose . OTOH , adding a splitpoint is a very infrequent operation ,
* so it may not be worth worrying about .
* so it may not be worth worrying about .
*
*
* Returns the first block number in the new splitpoint ' s range , or
* Returns TRUE if successful , or FALSE if allocation failed due to
* InvalidBlockNumber if allocation failed due to BlockNumber overflow .
* BlockNumber overflow .
*/
*/
static BlockNumber
static bool
_hash_alloc_buckets ( Relation rel , uint32 nblocks )
_hash_alloc_buckets ( Relation rel , BlockNumber firstblock , uint32 nblocks )
{
{
BlockNumber firstblock ;
BlockNumber lastblock ;
BlockNumber lastblock ;
BlockNumber endblock ;
BlockNumber endblock ;
char zerobuf [ BLCKSZ ] ;
char zerobuf [ BLCKSZ ] ;
/*
* Since we hold metapage lock , no one else is either splitting or
* allocating a new page in _hash_getovflpage ( ) ; hence it ' s safe to
* assume that the relation length isn ' t changing under us .
*/
firstblock = RelationGetNumberOfBlocks ( rel ) ;
lastblock = firstblock + nblocks - 1 ;
lastblock = firstblock + nblocks - 1 ;
/*
/*
@ -590,12 +631,12 @@ _hash_alloc_buckets(Relation rel, uint32 nblocks)
* extend the index anymore .
* extend the index anymore .
*/
*/
if ( lastblock < firstblock | | lastblock = = InvalidBlockNumber )
if ( lastblock < firstblock | | lastblock = = InvalidBlockNumber )
return InvalidBlockNumber ;
return false ;
/* Note: we assume RelationGetNumberOfBlocks did RelationOpenSmgr for us */
MemSet ( zerobuf , 0 , sizeof ( zerobuf ) ) ;
MemSet ( zerobuf , 0 , sizeof ( zerobuf ) ) ;
RelationOpenSmgr ( rel ) ;
/*
/*
* XXX If the extension results in creation of new segment files ,
* XXX If the extension results in creation of new segment files ,
* we have to make sure that each non - last file is correctly filled out to
* we have to make sure that each non - last file is correctly filled out to
@ -612,7 +653,7 @@ _hash_alloc_buckets(Relation rel, uint32 nblocks)
smgrextend ( rel - > rd_smgr , lastblock , zerobuf , rel - > rd_istemp ) ;
smgrextend ( rel - > rd_smgr , lastblock , zerobuf , rel - > rd_istemp ) ;
return firstblock ;
return true ;
}
}