I've attached a patch which implements Bob Jenkin's hash function for

PostgreSQL. This hash function replaces the one used by hash indexes and the catalog cache. Hash joins use a different, relatively poor-quality hash function, but I'll fix that later. As suggested by Tom Lane, this patch also changes the size of the fixed hash table used by the catalog cache to be a power-of-2 (instead of a prime: I chose 256 instead of 257). This allows the catcache to lookup hash buckets using a simple bitmask. This should improve the performance of the catalog cache slightly, since the previous method (modulo a prime) was slow. In my tests, this improves the performance of hash indexes by between 4% and 8%; the performance when using btree indexes or seqscans is basically unchanged. Neil Conway <neilconway@rogers.com>
24 years ago · 7ab7467318
parent 5b5cef9abd
commit 7ab7467318
10 changed files with 127 additions and 97 deletions
--- a/src/backend/access/hash/hash.c
+++ b/src/backend/access/hash/hash.c
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/access/hash/hash.c,v 1.54 2002/03/02 21:39:16 momjian Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/access/hash/hash.c,v 1.55 2002/03/06 20:49:37 momjian Exp $
 *
 * NOTES
 *	  This file contains only the public interface routines.
@ -165,9 +165,6 @@ hashinsert(PG_FUNCTION_ARGS)
 	char	   *nulls = (char *) PG_GETARG_POINTER(2);
 	ItemPointer ht_ctid = (ItemPointer) PG_GETARG_POINTER(3);

-#ifdef NOT_USED
-	Relation	heapRel = (Relation) PG_GETARG_POINTER(4);
-#endif
 	InsertIndexResult res;
 	HashItem	hitem;
 	IndexTuple	itup;
@ -333,7 +330,6 @@ hashendscan(PG_FUNCTION_ARGS)

 /*
 *	hashmarkpos() -- save current scan position
- *
 */
 Datum
 hashmarkpos(PG_FUNCTION_ARGS)
--- a/src/backend/access/hash/hashfunc.c
+++ b/src/backend/access/hash/hashfunc.c
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/access/hash/hashfunc.c,v 1.31 2002/02/25 04:06:47 momjian Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/access/hash/hashfunc.c,v 1.32 2002/03/06 20:49:38 momjian Exp $
 *
 * NOTES
 *	  These functions are stored in pg_amproc.	For each operator class
@ -95,6 +95,8 @@ hashname(PG_FUNCTION_ARGS)
 {
 	char	   *key = NameStr(*PG_GETARG_NAME(0));

+	Assert(strlen(key) <= NAMEDATALEN);
+
 	return hash_any(key, strlen(key));
 }

@ -116,61 +118,89 @@ hashvarlena(PG_FUNCTION_ARGS)
 	return result;
 }

+/* This hash function was written by Bob Jenkins
+ * (bob_jenkins@burtleburtle.net), and superficially adapted
+ * for PostgreSQL by Neil Conway. For more information on this
+ * hash function, see http://burtleburtle.net/bob/hash/doobs.html
+ */

 /*
- * hash_any --- compute a hash function for any specified chunk of memory
- *
- * This can be used as the underlying hash function for any pass-by-reference
- * data type in which there are no non-significant bits.
- *
- * (Comment from the original db3 hashing code: )
- *
- * This is INCREDIBLY ugly, but fast.  We break the string up into 8 byte
- * units.  On the first time through the loop we get the 'leftover bytes'
- * (strlen % 8).  On every later iteration, we perform 8 HASHC's so we handle
- * all 8 bytes.  Essentially, this saves us 7 cmp & branch instructions.  If
- * this routine is heavily used enough, it's worth the ugly coding.
- *
- * "OZ's original sdbm hash"
+ * mix -- mix 3 32-bit values reversibly.
+ * For every delta with one or two bits set, and the deltas of all three
+ * high bits or all three low bits, whether the original value of a,b,c
+ * is almost all zero or is uniformly distributed,
+ * - If mix() is run forward or backward, at least 32 bits in a,b,c
+ *   have at least 1/4 probability of changing.
+ * - If mix() is run forward, every bit of c will change between 1/3 and
+ *   2/3 of the time.  (Well, 22/100 and 78/100 for some 2-bit deltas.)
+ */
+#define mix(a,b,c) \
+{ \
+  a -= b; a -= c; a ^= (c>>13); \
+  b -= c; b -= a; b ^= (a<<8); \
+  c -= a; c -= b; c ^= (b>>13); \
+  a -= b; a -= c; a ^= (c>>12);  \
+  b -= c; b -= a; b ^= (a<<16); \
+  c -= a; c -= b; c ^= (b>>5); \
+  a -= b; a -= c; a ^= (c>>3);  \
+  b -= c; b -= a; b ^= (a<<10); \
+  c -= a; c -= b; c ^= (b>>15); \
+}
+
+/*
+ * hash_any() -- hash a variable-length key into a 32-bit value
+ *      k       : the key (the unaligned variable-length array of bytes)
+ *      len     : the length of the key, counting by bytes
+ * Returns a 32-bit value.  Every bit of the key affects every bit of
+ * the return value.  Every 1-bit and 2-bit delta achieves avalanche.
+ * About 6*len+35 instructions. The best hash table sizes are powers
+ * of 2.  There is no need to do mod a prime (mod is sooo slow!).
+ * If you need less than 32 bits, use a bitmask.
 */
 Datum
-hash_any(const char *keydata, int keylen)
+hash_any(register const char *k, register int keylen)
 {
-	uint32		n;
-	int			loop;
-
-#define HASHC	n = *keydata++ + 65599 * n
+   register Datum a,b,c,len;
+
+   /* Set up the internal state */
+   len = keylen;
+   a = b = 0x9e3779b9;  /* the golden ratio; an arbitrary value */
+   /* Another arbitrary value. If the hash function is called
+    * multiple times, this could be the previously generated
+    * hash value; however, the interface currently doesn't allow
+    * this. AFAIK this isn't a big deal.
+    */
+   c = 3923095;

-	n = 0;
-	if (keylen > 0)
+   /* handle most of the key */
+   while (len >= 12)
   {
-		loop = (keylen + 8 - 1) >> 3;
+      a += (k[0] +((Datum)k[1]<<8) +((Datum)k[2]<<16) +((Datum)k[3]<<24));
+      b += (k[4] +((Datum)k[5]<<8) +((Datum)k[6]<<16) +((Datum)k[7]<<24));
+      c += (k[8] +((Datum)k[9]<<8) +((Datum)k[10]<<16)+((Datum)k[11]<<24));
+      mix(a,b,c);
+      k += 12; len -= 12;
+   }

-		switch (keylen & (8 - 1))
+   /* handle the last 11 bytes */
+   c += keylen;
+   switch(len)              /* all the case statements fall through */
   {
-			case 0:
-				do
-				{				/* All fall throughs */
-					HASHC;
-			case 7:
-					HASHC;
-			case 6:
-					HASHC;
-			case 5:
-					HASHC;
-			case 4:
-					HASHC;
-			case 3:
-					HASHC;
-			case 2:
-					HASHC;
-			case 1:
-					HASHC;
-				} while (--loop);
-		}
+   case 11: c+=((Datum)k[10]<<24);
+   case 10: c+=((Datum)k[9]<<16);
+   case 9 : c+=((Datum)k[8]<<8);
+      /* the first byte of c is reserved for the length */
+   case 8 : b+=((Datum)k[7]<<24);
+   case 7 : b+=((Datum)k[6]<<16);
+   case 6 : b+=((Datum)k[5]<<8);
+   case 5 : b+=k[4];
+   case 4 : a+=((Datum)k[3]<<24);
+   case 3 : a+=((Datum)k[2]<<16);
+   case 2 : a+=((Datum)k[1]<<8);
+   case 1 : a+=k[0];
+     /* case 0: nothing left to add */
   }
-
-#undef HASHC
-
-	PG_RETURN_UINT32(n);
+   mix(a,b,c);
+   /* report the result */
+   return c;
 }
--- a/src/backend/access/hash/hashinsert.c
+++ b/src/backend/access/hash/hashinsert.c
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/access/hash/hashinsert.c,v 1.23 2001/10/25 05:49:21 momjian Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/access/hash/hashinsert.c,v 1.24 2002/03/06 20:49:40 momjian Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -49,7 +49,7 @@ _hash_doinsert(Relation rel, HashItem hitem)
 	itup = &(hitem->hash_itup);
 	if ((natts = rel->rd_rel->relnatts) != 1)
 		elog(ERROR, "Hash indices valid for only one index key.");
-	itup_scankey = _hash_mkscankey(rel, itup, metap);
+	itup_scankey = _hash_mkscankey(rel, itup);

 	/*
 	 * find the first page in the bucket chain containing this key and
@ -232,7 +232,7 @@ _hash_pgaddtup(Relation rel,
 			 RelationGetRelationName(rel));

 	/* write the buffer, but hold our lock */
-	_hash_wrtnorelbuf(rel, buf);
+	_hash_wrtnorelbuf(buf);

 	return itup_off;
 }
--- a/src/backend/access/hash/hashovfl.c
+++ b/src/backend/access/hash/hashovfl.c
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/access/hash/hashovfl.c,v 1.31 2001/10/25 05:49:21 momjian Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/access/hash/hashovfl.c,v 1.32 2002/03/06 20:49:41 momjian Exp $
 *
 * NOTES
 *	  Overflow pages look like ordinary relation pages.
@ -73,11 +73,11 @@ _hash_addovflpage(Relation rel, Buffer *metabufp, Buffer buf)
 	ovflopaque->hasho_flag = LH_OVERFLOW_PAGE;
 	ovflopaque->hasho_oaddr = oaddr;
 	ovflopaque->hasho_bucket = pageopaque->hasho_bucket;
-	_hash_wrtnorelbuf(rel, ovflbuf);
+	_hash_wrtnorelbuf(ovflbuf);

 	/* logically chain overflow page to previous page */
 	pageopaque->hasho_nextblkno = ovflblkno;
-	_hash_wrtnorelbuf(rel, buf);
+	_hash_wrtnorelbuf(buf);
 	return ovflbuf;
 }

@ -574,7 +574,7 @@ _hash_squeezebucket(Relation rel,
 		 * the "next" ItemId.
 		 */
 		PageIndexTupleDelete(rpage, roffnum);
-		_hash_wrtnorelbuf(rel, rbuf);
+		_hash_wrtnorelbuf(rbuf);

 		/*
 		 * if the "read" page is now empty because of the deletion, free
--- a/src/backend/access/hash/hashpage.c
+++ b/src/backend/access/hash/hashpage.c
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/access/hash/hashpage.c,v 1.34 2002/01/15 22:14:16 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/access/hash/hashpage.c,v 1.35 2002/03/06 20:49:42 momjian Exp $
 *
 * NOTES
 *	  Postgres hash pages look like ordinary relation pages.  The opaque
@ -151,7 +151,7 @@ _hash_metapinit(Relation rel)
 		elog(ERROR, "Problem with _hash_initbitmap.");

 	/* all done */
-	_hash_wrtnorelbuf(rel, metabuf);
+	_hash_wrtnorelbuf(metabuf);

 	/*
 	 * initialize the first two buckets
@ -260,7 +260,7 @@ _hash_wrtbuf(Relation rel, Buffer buf)
 *		or a reference to the buffer.
 */
 void
-_hash_wrtnorelbuf(Relation rel, Buffer buf)
+_hash_wrtnorelbuf(Buffer buf)
 {
 	BlockNumber blkno;

@ -383,7 +383,7 @@ _hash_pagedel(Relation rel, ItemPointer tid)
 	opaque = (HashPageOpaque) PageGetSpecialPointer(page);

 	PageIndexTupleDelete(page, offno);
-	_hash_wrtnorelbuf(rel, buf);
+	_hash_wrtnorelbuf(buf);

 	if (PageIsEmpty(page) && (opaque->hasho_flag & LH_OVERFLOW_PAGE))
 	{
@ -505,7 +505,7 @@ _hash_splitpage(Relation rel,
 	nopaque->hasho_flag = LH_BUCKET_PAGE;
 	nopaque->hasho_oaddr = InvalidOvflAddress;
 	nopaque->hasho_bucket = nbucket;
-	_hash_wrtnorelbuf(rel, nbuf);
+	_hash_wrtnorelbuf(nbuf);

 	/*
 	 * make sure the old bucket isn't empty.  advance 'opage' and friends
@ -628,7 +628,7 @@ _hash_splitpage(Relation rel,
 				== InvalidOffsetNumber)
 				elog(ERROR, "_hash_splitpage: failed to add index item to %s",
 					 RelationGetRelationName(rel));
-			_hash_wrtnorelbuf(rel, nbuf);
+			_hash_wrtnorelbuf(nbuf);

 			/*
 			 * now delete the tuple from the old bucket.  after this
@ -640,7 +640,7 @@ _hash_splitpage(Relation rel,
 			 * instead of calling PageGetMaxOffsetNumber.
 			 */
 			PageIndexTupleDelete(opage, ooffnum);
-			_hash_wrtnorelbuf(rel, obuf);
+			_hash_wrtnorelbuf(obuf);
 			omaxoffnum = OffsetNumberPrev(omaxoffnum);

 			/*
--- a/src/backend/access/hash/hashutil.c
+++ b/src/backend/access/hash/hashutil.c
@ -1,14 +1,14 @@
 /*-------------------------------------------------------------------------
 *
- * btutils.c
- *	  Utility code for Postgres btree implementation.
+ * hashutil.c
+ *	  Utility code for Postgres hash implementation.
 *
 * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/access/hash/hashutil.c,v 1.27 2001/10/06 23:21:43 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/access/hash/hashutil.c,v 1.28 2002/03/06 20:49:43 momjian Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -21,7 +21,7 @@


 ScanKey
-_hash_mkscankey(Relation rel, IndexTuple itup, HashMetaPage metap)
+_hash_mkscankey(Relation rel, IndexTuple itup)
 {
 	ScanKey		skey;
 	TupleDesc	itupdesc;
--- a/src/backend/executor/nodeHash.c
+++ b/src/backend/executor/nodeHash.c
@ -7,7 +7,7 @@
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 *
- *	$Id: nodeHash.c,v 1.60 2001/10/25 05:49:28 momjian Exp $
+ *	$Id: nodeHash.c,v 1.61 2002/03/06 20:49:44 momjian Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -115,8 +115,7 @@ ExecInitHash(Hash *node, EState *estate, Plan *parent)
 	HashState  *hashstate;
 	Plan	   *outerPlan;

-	SO1_printf("ExecInitHash: %s\n",
-			   "initializing hash node");
+	SO_printf("ExecInitHash: initializing hash node\n");

 	/*
 	 * assign the node's execution state
--- a/src/backend/utils/cache/catcache.c
+++ b/src/backend/utils/cache/catcache.c
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/utils/cache/catcache.c,v 1.91 2002/03/06 06:10:21 momjian Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/utils/cache/catcache.c,v 1.92 2002/03/06 20:49:45 momjian Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -39,7 +39,7 @@
 /*
 * Constants related to size of the catcache.
 *
- * NCCBUCKETS should be prime and must be less than 64K (because
+ * NCCBUCKETS must be a power of two and must be less than 64K (because
 * SharedInvalCatcacheMsg crams hash indexes into a uint16 field).	In
 * practice it should be a lot less, anyway, to avoid chewing up too much
 * space on hash bucket headers.
@ -47,9 +47,16 @@
 * MAXCCTUPLES could be as small as a few hundred, if per-backend memory
 * consumption is at a premium.
 */
-#define NCCBUCKETS 257			/* Hash buckets per CatCache */
+#define NCCBUCKETS 256			/* Hash buckets per CatCache */
 #define MAXCCTUPLES 5000		/* Maximum # of tuples in all caches */

+/*
+ * Given a hash value and the size of the hash table, find the bucket
+ * in which the hash value belongs. Since the hash table must contain
+ * a power-of-2 number of elements, this is a simple bitmask.
+ */
+#define HASH_INDEX(h, sz) ((Index) ((h) & ((sz) - 1)))
+

 /*
 *		variables, macros and other stuff
@ -380,7 +387,7 @@ CatalogCacheIdInvalidate(int cacheId,
 		/*
 		 * inspect the proper hash bucket for matches
 		 */
-		hashIndex = (Index) (hashValue % (uint32) ccp->cc_size);
+		hashIndex = HASH_INDEX(hashValue, ccp->cc_nbuckets);

 		for (elt = DLGetHead(&ccp->cc_bucket[hashIndex]); elt; elt = nextelt)
 		{
@ -490,7 +497,7 @@ ResetCatalogCache(CatCache *cache)
 	int			i;

 	/* Remove each tuple in this cache, or at least mark it dead */
-	for (i = 0; i < cache->cc_size; i++)
+	for (i = 0; i < cache->cc_nbuckets; i++)
 	{
 		Dlelem	   *elt,
 				   *nextelt;
@ -578,7 +585,7 @@ CatalogCacheFlushRelation(Oid relId)
 			continue;			/* nope, leave it alone */

 		/* Yes, scan the tuples and remove those related to relId */
-		for (i = 0; i < cache->cc_size; i++)
+		for (i = 0; i < cache->cc_nbuckets; i++)
 		{
 			Dlelem	   *elt,
 					   *nextelt;
@ -640,7 +647,7 @@ CatalogCacheFlushRelation(Oid relId)
 #define InitCatCache_DEBUG1 \
 do { \
 	elog(DEBUG1, "InitCatCache: rel=%s id=%d nkeys=%d size=%d\n", \
-		cp->cc_relname, cp->id, cp->cc_nkeys, cp->cc_size); \
+		cp->cc_relname, cp->id, cp->cc_nkeys, cp->cc_nbuckets); \
 } while(0)

 #else
@ -705,7 +712,7 @@ InitCatCache(int id,
 	cp->cc_tupdesc = (TupleDesc) NULL;
 	cp->cc_reloidattr = reloidattr;
 	cp->cc_ntup = 0;
-	cp->cc_size = NCCBUCKETS;
+	cp->cc_nbuckets = NCCBUCKETS;
 	cp->cc_nkeys = nkeys;
 	for (i = 0; i < nkeys; ++i)
 		cp->cc_key[i] = key[i];
@ -985,7 +992,7 @@ SearchCatCache(CatCache *cache,
 	 * find the hash bucket in which to look for the tuple
 	 */
 	hashValue = CatalogCacheComputeHashValue(cache, cur_skey);
-	hashIndex = (Index) (hashValue % (uint32) cache->cc_size);
+	hashIndex = HASH_INDEX(hashValue, cache->cc_nbuckets);

 	/*
 	 * scan the hash bucket until we find a match or exhaust our tuples
--- a/src/include/access/hash.h
+++ b/src/include/access/hash.h
@ -7,7 +7,7 @@
 * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $Id: hash.h,v 1.43 2002/02/25 04:06:52 momjian Exp $
+ * $Id: hash.h,v 1.44 2002/03/06 20:49:45 momjian Exp $
 *
 * NOTES
 *		modeled after Margo Seltzer's hash implementation for unix.
@ -265,8 +265,7 @@ extern Datum hashoidvector(PG_FUNCTION_ARGS);
 extern Datum hashint2vector(PG_FUNCTION_ARGS);
 extern Datum hashname(PG_FUNCTION_ARGS);
 extern Datum hashvarlena(PG_FUNCTION_ARGS);
-extern Datum hash_any(const char *keydata, int keylen);
-
+extern Datum hash_any(register const char *k, register int keylen);

 /* private routines */

@ -288,7 +287,7 @@ extern void _hash_metapinit(Relation rel);
 extern Buffer _hash_getbuf(Relation rel, BlockNumber blkno, int access);
 extern void _hash_relbuf(Relation rel, Buffer buf, int access);
 extern void _hash_wrtbuf(Relation rel, Buffer buf);
-extern void _hash_wrtnorelbuf(Relation rel, Buffer buf);
+extern void _hash_wrtnorelbuf(Buffer buf);
 extern Page _hash_chgbufaccess(Relation rel, Buffer *bufp, int from_access,
 				   int to_access);
 extern void _hash_pageinit(Page page, Size size);
@ -313,8 +312,7 @@ extern bool _hash_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir,


 /* hashutil.c */
-extern ScanKey _hash_mkscankey(Relation rel, IndexTuple itup,
-				HashMetaPage metap);
+extern ScanKey _hash_mkscankey(Relation rel, IndexTuple itup);
 extern void _hash_freeskey(ScanKey skey);
 extern bool _hash_checkqual(IndexScanDesc scan, IndexTuple itup);
 extern HashItem _hash_formitem(IndexTuple itup);
--- a/src/include/utils/catcache.h
+++ b/src/include/utils/catcache.h
@ -13,7 +13,7 @@
 * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $Id: catcache.h,v 1.39 2002/03/03 17:47:56 tgl Exp $
+ * $Id: catcache.h,v 1.40 2002/03/06 20:49:46 momjian Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -40,8 +40,8 @@ typedef struct catcache
 	TupleDesc	cc_tupdesc;		/* tuple descriptor (copied from reldesc) */
 	int			cc_reloidattr;	/* AttrNumber of relation OID attr, or 0 */
 	int			cc_ntup;		/* # of tuples currently in this cache */
-	int			cc_size;		/* # of hash buckets in this cache */
-	int			cc_nkeys;		/* number of keys (1..4) */
+	int			cc_nbuckets;	/* # of hash buckets in this cache */
+	int			cc_nkeys;		/* # of keys (1..4) */
 	int			cc_key[4];		/* AttrNumber of each key */
 	PGFunction	cc_hashfunc[4]; /* hash function to use for each key */
 	ScanKeyData cc_skey[4];		/* precomputed key info for heap scans */