|
|
|
@ -260,39 +260,12 @@ static long hash_accesses, |
|
|
|
|
hash_expansions; |
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
/* access to parts of the hash table, allocated as a single chunk */ |
|
|
|
|
#define HASH_DIRECTORY_PTR(hashp) \ |
|
|
|
|
(((char *) (hashp)->hctl) + sizeof(HASHHDR)) |
|
|
|
|
|
|
|
|
|
#define HASH_SEGMENT_OFFSET(hctl, idx) \ |
|
|
|
|
(sizeof(HASHHDR) + \
|
|
|
|
|
((hctl)->dsize * sizeof(HASHSEGMENT)) + \
|
|
|
|
|
((hctl)->ssize * (idx) * sizeof(HASHBUCKET))) |
|
|
|
|
|
|
|
|
|
#define HASH_SEGMENT_PTR(hashp, idx) \ |
|
|
|
|
((char *) (hashp)->hctl + HASH_SEGMENT_OFFSET((hashp)->hctl, (idx))) |
|
|
|
|
|
|
|
|
|
#define HASH_SEGMENT_SIZE(hashp) \ |
|
|
|
|
((hashp)->ssize * sizeof(HASHBUCKET)) |
|
|
|
|
|
|
|
|
|
#define HASH_ELEMENTS_PTR(hashp, nsegs) \ |
|
|
|
|
((char *) (hashp)->hctl + HASH_SEGMENT_OFFSET((hashp)->hctl, nsegs)) |
|
|
|
|
|
|
|
|
|
/* Each element has a HASHELEMENT header plus user data. */ |
|
|
|
|
#define HASH_ELEMENT_SIZE(hctl) \ |
|
|
|
|
(MAXALIGN(sizeof(HASHELEMENT)) + MAXALIGN((hctl)->entrysize)) |
|
|
|
|
|
|
|
|
|
#define HASH_ELEMENT_NEXT(hctl, num, ptr) \ |
|
|
|
|
((char *) (ptr) + ((num) * HASH_ELEMENT_SIZE(hctl))) |
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Private function prototypes |
|
|
|
|
*/ |
|
|
|
|
static void *DynaHashAlloc(Size size); |
|
|
|
|
static HASHSEGMENT seg_alloc(HTAB *hashp); |
|
|
|
|
static HASHELEMENT *element_alloc(HTAB *hashp, int nelem); |
|
|
|
|
static void element_add(HTAB *hashp, HASHELEMENT *firstElement, |
|
|
|
|
int nelem, int freelist_idx); |
|
|
|
|
static bool element_alloc(HTAB *hashp, int nelem, int freelist_idx); |
|
|
|
|
static bool dir_realloc(HTAB *hashp); |
|
|
|
|
static bool expand_table(HTAB *hashp); |
|
|
|
|
static HASHBUCKET get_hash_entry(HTAB *hashp, int freelist_idx); |
|
|
|
@ -307,9 +280,6 @@ static int next_pow2_int(long num); |
|
|
|
|
static void register_seq_scan(HTAB *hashp); |
|
|
|
|
static void deregister_seq_scan(HTAB *hashp); |
|
|
|
|
static bool has_seq_scans(HTAB *hashp); |
|
|
|
|
static void compute_buckets_and_segs(long nelem, long num_partitions, |
|
|
|
|
long ssize, |
|
|
|
|
int *nbuckets, int *nsegments); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
@ -383,8 +353,6 @@ hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags) |
|
|
|
|
{ |
|
|
|
|
HTAB *hashp; |
|
|
|
|
HASHHDR *hctl; |
|
|
|
|
int nelem_batch; |
|
|
|
|
bool prealloc; |
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Hash tables now allocate space for key and data, but you have to say |
|
|
|
@ -539,31 +507,9 @@ hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags) |
|
|
|
|
hashp->isshared = false; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/* Choose the number of entries to allocate at a time. */ |
|
|
|
|
nelem_batch = choose_nelem_alloc(info->entrysize); |
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Decide whether to pre-allocate elements. |
|
|
|
|
* |
|
|
|
|
* For a private hash table, preallocate the requested number of elements |
|
|
|
|
* if it's less than our chosen nelem_alloc. This avoids wasting space if |
|
|
|
|
* the caller correctly estimates a small table size. |
|
|
|
|
* |
|
|
|
|
* For a shared hash table, preallocate the requested number of elements. |
|
|
|
|
* This reduces problems with run-time out-of-shared-memory conditions. |
|
|
|
|
*/ |
|
|
|
|
prealloc = (flags & HASH_SHARED_MEM) || (nelem < nelem_batch); |
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Allocate the memory needed for hash header, directory, segments and |
|
|
|
|
* elements together. Use pointer arithmetic to arrive at the start of |
|
|
|
|
* each of these structures later. |
|
|
|
|
*/ |
|
|
|
|
if (!hashp->hctl) |
|
|
|
|
{ |
|
|
|
|
Size size = hash_get_size(info, flags, nelem, prealloc); |
|
|
|
|
|
|
|
|
|
hashp->hctl = (HASHHDR *) hashp->alloc(size); |
|
|
|
|
hashp->hctl = (HASHHDR *) hashp->alloc(sizeof(HASHHDR)); |
|
|
|
|
if (!hashp->hctl) |
|
|
|
|
ereport(ERROR, |
|
|
|
|
(errcode(ERRCODE_OUT_OF_MEMORY), |
|
|
|
@ -612,9 +558,6 @@ hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags) |
|
|
|
|
hctl->keysize = info->keysize; |
|
|
|
|
hctl->entrysize = info->entrysize; |
|
|
|
|
|
|
|
|
|
/* remember how many elements to allocate at once */ |
|
|
|
|
hctl->nelem_alloc = nelem_batch; |
|
|
|
|
|
|
|
|
|
/* make local copies of heavily-used constant fields */ |
|
|
|
|
hashp->keysize = hctl->keysize; |
|
|
|
|
hashp->ssize = hctl->ssize; |
|
|
|
@ -624,6 +567,14 @@ hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags) |
|
|
|
|
if (!init_htab(hashp, nelem)) |
|
|
|
|
elog(ERROR, "failed to initialize hash table \"%s\"", hashp->tabname); |
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* For a shared hash table, preallocate the requested number of elements. |
|
|
|
|
* This reduces problems with run-time out-of-shared-memory conditions. |
|
|
|
|
* |
|
|
|
|
* For a non-shared hash table, preallocate the requested number of |
|
|
|
|
* elements if it's less than our chosen nelem_alloc. This avoids wasting |
|
|
|
|
* space if the caller correctly estimates a small table size. |
|
|
|
|
*/ |
|
|
|
|
if ((flags & HASH_SHARED_MEM) || |
|
|
|
|
nelem < hctl->nelem_alloc) |
|
|
|
|
{ |
|
|
|
@ -631,7 +582,6 @@ hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags) |
|
|
|
|
freelist_partitions, |
|
|
|
|
nelem_alloc, |
|
|
|
|
nelem_alloc_first; |
|
|
|
|
void *ptr = NULL; |
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* If hash table is partitioned, give each freelist an equal share of |
|
|
|
@ -656,28 +606,14 @@ hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags) |
|
|
|
|
else |
|
|
|
|
nelem_alloc_first = nelem_alloc; |
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Calculate the offset at which to find the first partition of |
|
|
|
|
* elements. We have to skip space for the header, segments and |
|
|
|
|
* buckets. |
|
|
|
|
*/ |
|
|
|
|
ptr = HASH_ELEMENTS_PTR(hashp, hctl->nsegs); |
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Assign the correct location of each parition within a pre-allocated |
|
|
|
|
* buffer. |
|
|
|
|
* |
|
|
|
|
* Actual memory allocation happens in ShmemInitHash for shared hash |
|
|
|
|
* tables or earlier in this function for non-shared hash tables. |
|
|
|
|
* |
|
|
|
|
* We just need to split that allocation into per-batch freelists. |
|
|
|
|
*/ |
|
|
|
|
for (i = 0; i < freelist_partitions; i++) |
|
|
|
|
{ |
|
|
|
|
int temp = (i == 0) ? nelem_alloc_first : nelem_alloc; |
|
|
|
|
|
|
|
|
|
element_add(hashp, (HASHELEMENT *) ptr, temp, i); |
|
|
|
|
ptr = HASH_ELEMENT_NEXT(hctl, temp, ptr); |
|
|
|
|
if (!element_alloc(hashp, temp, i)) |
|
|
|
|
ereport(ERROR, |
|
|
|
|
(errcode(ERRCODE_OUT_OF_MEMORY), |
|
|
|
|
errmsg("out of memory"))); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
@ -766,16 +702,29 @@ init_htab(HTAB *hashp, long nelem) |
|
|
|
|
SpinLockInit(&(hctl->freeList[i].mutex)); |
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* We've already calculated these parameters when we calculated how much |
|
|
|
|
* space to allocate in hash_get_size(). Be careful to keep these two |
|
|
|
|
* places in sync, so that we get the same parameters. |
|
|
|
|
* Allocate space for the next greater power of two number of buckets, |
|
|
|
|
* assuming a desired maximum load factor of 1. |
|
|
|
|
*/ |
|
|
|
|
nbuckets = next_pow2_int(nelem); |
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* In a partitioned table, nbuckets must be at least equal to |
|
|
|
|
* num_partitions; were it less, keys with apparently different partition |
|
|
|
|
* numbers would map to the same bucket, breaking partition independence. |
|
|
|
|
* (Normally nbuckets will be much bigger; this is just a safety check.) |
|
|
|
|
*/ |
|
|
|
|
compute_buckets_and_segs(nelem, hctl->num_partitions, hctl->ssize, |
|
|
|
|
&nbuckets, &nsegs); |
|
|
|
|
while (nbuckets < hctl->num_partitions) |
|
|
|
|
nbuckets <<= 1; |
|
|
|
|
|
|
|
|
|
hctl->max_bucket = hctl->low_mask = nbuckets - 1; |
|
|
|
|
hctl->high_mask = (nbuckets << 1) - 1; |
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Figure number of directory segments needed, round up to a power of 2 |
|
|
|
|
*/ |
|
|
|
|
nsegs = (nbuckets - 1) / hctl->ssize + 1; |
|
|
|
|
nsegs = next_pow2_int(nsegs); |
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Make sure directory is big enough. If pre-allocated directory is too |
|
|
|
|
* small, choke (caller screwed up). |
|
|
|
@ -788,25 +737,26 @@ init_htab(HTAB *hashp, long nelem) |
|
|
|
|
return false; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Assign a directory by making it point to the correct location in the |
|
|
|
|
* pre-allocated buffer. |
|
|
|
|
*/ |
|
|
|
|
/* Allocate a directory */ |
|
|
|
|
if (!(hashp->dir)) |
|
|
|
|
{ |
|
|
|
|
CurrentDynaHashCxt = hashp->hcxt; |
|
|
|
|
hashp->dir = (HASHSEGMENT *) HASH_DIRECTORY_PTR(hashp); |
|
|
|
|
hashp->dir = (HASHSEGMENT *) |
|
|
|
|
hashp->alloc(hctl->dsize * sizeof(HASHSEGMENT)); |
|
|
|
|
if (!hashp->dir) |
|
|
|
|
return false; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/* Assign initial segments, which are also pre-allocated */ |
|
|
|
|
i = 0; |
|
|
|
|
/* Allocate initial segments */ |
|
|
|
|
for (segp = hashp->dir; hctl->nsegs < nsegs; hctl->nsegs++, segp++) |
|
|
|
|
{ |
|
|
|
|
*segp = (HASHSEGMENT) HASH_SEGMENT_PTR(hashp, i++); |
|
|
|
|
MemSet(*segp, 0, HASH_SEGMENT_SIZE(hashp)); |
|
|
|
|
*segp = seg_alloc(hashp); |
|
|
|
|
if (*segp == NULL) |
|
|
|
|
return false; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
Assert(i == nsegs); |
|
|
|
|
/* Choose number of entries to allocate at a time */ |
|
|
|
|
hctl->nelem_alloc = choose_nelem_alloc(hctl->entrysize); |
|
|
|
|
|
|
|
|
|
#ifdef HASH_DEBUG |
|
|
|
|
fprintf(stderr, "init_htab:\n%s%p\n%s%ld\n%s%ld\n%s%d\n%s%ld\n%s%u\n%s%x\n%s%x\n%s%ld\n", |
|
|
|
@ -896,75 +846,16 @@ hash_select_dirsize(long num_entries) |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* hash_get_size -- determine memory needed for a new dynamic hash table |
|
|
|
|
* |
|
|
|
|
* info: hash table parameters |
|
|
|
|
* flags: bitmask indicating which parameters to take from *info |
|
|
|
|
* nelem: maximum number of elements expected |
|
|
|
|
* prealloc: request preallocation of elements |
|
|
|
|
* |
|
|
|
|
* Compute the required initial memory allocation for a hashtable with the given |
|
|
|
|
* parameters. We need space for the HASHHDR, for the directory, segments and |
|
|
|
|
* preallocated elements. |
|
|
|
|
* |
|
|
|
|
* The hash table may be private or shared. For shared hash tables the directory |
|
|
|
|
* size is non-expansive, and we preallocate all elements (nelem). For private |
|
|
|
|
* hash tables, we preallocate elements only if the expected number of elements |
|
|
|
|
* is small (less than nelem_alloc). |
|
|
|
|
* Compute the required initial memory allocation for a shared-memory |
|
|
|
|
* hashtable with the given parameters. We need space for the HASHHDR |
|
|
|
|
* and for the (non expansible) directory. |
|
|
|
|
*/ |
|
|
|
|
Size |
|
|
|
|
hash_get_size(const HASHCTL *info, int flags, long nelem, bool prealloc) |
|
|
|
|
hash_get_shared_size(HASHCTL *info, int flags) |
|
|
|
|
{ |
|
|
|
|
int nbuckets; |
|
|
|
|
int nsegs; |
|
|
|
|
int num_partitions; |
|
|
|
|
long ssize; |
|
|
|
|
long dsize; |
|
|
|
|
Size elementSize = HASH_ELEMENT_SIZE(info); |
|
|
|
|
long nelem_prealloc = 0; |
|
|
|
|
|
|
|
|
|
#ifdef USE_ASSERT_CHECKING |
|
|
|
|
/* shared hash tables have non-expansive directory */ |
|
|
|
|
if (flags & HASH_SHARED_MEM) |
|
|
|
|
{ |
|
|
|
|
Assert(flags & HASH_DIRSIZE); |
|
|
|
|
Assert(info->dsize == info->max_dsize); |
|
|
|
|
Assert(prealloc); |
|
|
|
|
} |
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
/* Non-shared hash tables may not specify dir size */ |
|
|
|
|
if (flags & HASH_DIRSIZE) |
|
|
|
|
dsize = info->dsize; |
|
|
|
|
else |
|
|
|
|
dsize = DEF_DIRSIZE; |
|
|
|
|
|
|
|
|
|
if (flags & HASH_SEGMENT) |
|
|
|
|
ssize = info->ssize; |
|
|
|
|
else |
|
|
|
|
ssize = DEF_SEGSIZE; |
|
|
|
|
|
|
|
|
|
if (flags & HASH_PARTITION) |
|
|
|
|
{ |
|
|
|
|
num_partitions = info->num_partitions; |
|
|
|
|
|
|
|
|
|
/* Number of entries should be atleast equal to the freelists */ |
|
|
|
|
if (nelem < NUM_FREELISTS) |
|
|
|
|
nelem = NUM_FREELISTS; |
|
|
|
|
} |
|
|
|
|
else |
|
|
|
|
num_partitions = 0; |
|
|
|
|
|
|
|
|
|
compute_buckets_and_segs(nelem, num_partitions, ssize, |
|
|
|
|
&nbuckets, &nsegs); |
|
|
|
|
|
|
|
|
|
/* initial_elems as false indicates no elements are to be pre-allocated */ |
|
|
|
|
if (prealloc) |
|
|
|
|
nelem_prealloc = nelem; |
|
|
|
|
|
|
|
|
|
return sizeof(HASHHDR) + dsize * sizeof(HASHSEGMENT) |
|
|
|
|
+ sizeof(HASHBUCKET) * ssize * nsegs |
|
|
|
|
+ nelem_prealloc * elementSize; |
|
|
|
|
Assert(flags & HASH_DIRSIZE); |
|
|
|
|
Assert(info->dsize == info->max_dsize); |
|
|
|
|
return sizeof(HASHHDR) + info->dsize * sizeof(HASHSEGMENT); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
@ -1394,7 +1285,7 @@ get_hash_entry(HTAB *hashp, int freelist_idx) |
|
|
|
|
* Failing because the needed element is in a different freelist is |
|
|
|
|
* not acceptable. |
|
|
|
|
*/ |
|
|
|
|
if ((newElement = element_alloc(hashp, hctl->nelem_alloc)) == NULL) |
|
|
|
|
if (!element_alloc(hashp, hctl->nelem_alloc, freelist_idx)) |
|
|
|
|
{ |
|
|
|
|
int borrow_from_idx; |
|
|
|
|
|
|
|
|
@ -1431,7 +1322,6 @@ get_hash_entry(HTAB *hashp, int freelist_idx) |
|
|
|
|
/* no elements available to borrow either, so out of memory */ |
|
|
|
|
return NULL; |
|
|
|
|
} |
|
|
|
|
element_add(hashp, newElement, hctl->nelem_alloc, freelist_idx); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/* remove entry from freelist, bump nentries */ |
|
|
|
@ -1810,43 +1700,29 @@ seg_alloc(HTAB *hashp) |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* allocate some new elements |
|
|
|
|
* allocate some new elements and link them into the indicated free list |
|
|
|
|
*/ |
|
|
|
|
static HASHELEMENT * |
|
|
|
|
element_alloc(HTAB *hashp, int nelem) |
|
|
|
|
static bool |
|
|
|
|
element_alloc(HTAB *hashp, int nelem, int freelist_idx) |
|
|
|
|
{ |
|
|
|
|
HASHHDR *hctl = hashp->hctl; |
|
|
|
|
Size elementSize; |
|
|
|
|
HASHELEMENT *firstElement = NULL; |
|
|
|
|
HASHELEMENT *firstElement; |
|
|
|
|
HASHELEMENT *tmpElement; |
|
|
|
|
HASHELEMENT *prevElement; |
|
|
|
|
int i; |
|
|
|
|
|
|
|
|
|
if (hashp->isfixed) |
|
|
|
|
return NULL; |
|
|
|
|
return false; |
|
|
|
|
|
|
|
|
|
/* Each element has a HASHELEMENT header plus user data. */ |
|
|
|
|
elementSize = HASH_ELEMENT_SIZE(hctl); |
|
|
|
|
elementSize = MAXALIGN(sizeof(HASHELEMENT)) + MAXALIGN(hctl->entrysize); |
|
|
|
|
|
|
|
|
|
CurrentDynaHashCxt = hashp->hcxt; |
|
|
|
|
firstElement = (HASHELEMENT *) hashp->alloc(nelem * elementSize); |
|
|
|
|
|
|
|
|
|
if (!firstElement) |
|
|
|
|
return NULL; |
|
|
|
|
|
|
|
|
|
return firstElement; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* link the elements allocated by element_alloc into the indicated free list |
|
|
|
|
*/ |
|
|
|
|
static void |
|
|
|
|
element_add(HTAB *hashp, HASHELEMENT *firstElement, int nelem, int freelist_idx) |
|
|
|
|
{ |
|
|
|
|
HASHHDR *hctl = hashp->hctl; |
|
|
|
|
Size elementSize; |
|
|
|
|
HASHELEMENT *tmpElement; |
|
|
|
|
HASHELEMENT *prevElement; |
|
|
|
|
int i; |
|
|
|
|
|
|
|
|
|
/* Each element has a HASHELEMENT header plus user data. */ |
|
|
|
|
elementSize = HASH_ELEMENT_SIZE(hctl); |
|
|
|
|
return false; |
|
|
|
|
|
|
|
|
|
/* prepare to link all the new entries into the freelist */ |
|
|
|
|
prevElement = NULL; |
|
|
|
@ -1868,6 +1744,8 @@ element_add(HTAB *hashp, HASHELEMENT *firstElement, int nelem, int freelist_idx) |
|
|
|
|
|
|
|
|
|
if (IS_PARTITIONED(hctl)) |
|
|
|
|
SpinLockRelease(&hctl->freeList[freelist_idx].mutex); |
|
|
|
|
|
|
|
|
|
return true; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
@ -2079,34 +1957,3 @@ AtEOSubXact_HashTables(bool isCommit, int nestDepth) |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Calculate the number of buckets and segments to store the given |
|
|
|
|
* number of elements in a hash table. Segments contain buckets which |
|
|
|
|
* in turn contain elements. |
|
|
|
|
*/ |
|
|
|
|
static void |
|
|
|
|
compute_buckets_and_segs(long nelem, long num_partitions, long ssize, |
|
|
|
|
int *nbuckets, int *nsegments) |
|
|
|
|
{ |
|
|
|
|
/*
|
|
|
|
|
* Allocate space for the next greater power of two number of buckets, |
|
|
|
|
* assuming a desired maximum load factor of 1. |
|
|
|
|
*/ |
|
|
|
|
*nbuckets = next_pow2_int(nelem); |
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* In a partitioned table, nbuckets must be at least equal to |
|
|
|
|
* num_partitions; were it less, keys with apparently different partition |
|
|
|
|
* numbers would map to the same bucket, breaking partition independence. |
|
|
|
|
* (Normally nbuckets will be much bigger; this is just a safety check.) |
|
|
|
|
*/ |
|
|
|
|
while ((*nbuckets) < num_partitions) |
|
|
|
|
(*nbuckets) <<= 1; |
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Figure number of directory segments needed, round up to a power of 2 |
|
|
|
|
*/ |
|
|
|
|
*nsegments = ((*nbuckets) - 1) / ssize + 1; |
|
|
|
|
*nsegments = next_pow2_int(*nsegments); |
|
|
|
|
} |
|
|
|
|