@ -40,6 +40,7 @@
int bytea_output = BYTEA_OUTPUT_HEX ;
typedef struct varlena unknown ;
typedef struct varlena string ;
typedef struct
{
@ -67,13 +68,14 @@ typedef struct
int last_returned ; /* Last comparison result (cache) */
bool cache_blob ; /* Does buf2 contain strxfrm() blob, etc? */
bool collate_c ;
bool bpchar ; /* Sorting pbchar, not varchar/text/bytea? */
hyperLogLogState abbr_card ; /* Abbreviated key cardinality state */
hyperLogLogState full_card ; /* Full key cardinality state */
double prop_card ; /* Required cardinality proportion */
# ifdef HAVE_LOCALE_T
pg_locale_t locale ;
# endif
} Text SortSupport;
} String SortSupport;
/*
* This should be large enough that most strings will fit , but small enough
@ -87,12 +89,15 @@ typedef struct
# define PG_GETARG_UNKNOWN_P_COPY(n) DatumGetUnknownPCopy(PG_GETARG_DATUM(n))
# define PG_RETURN_UNKNOWN_P(x) PG_RETURN_POINTER(x)
static void btsortsupport_worker ( SortSupport ssup , Oid collid ) ;
static int bttextfastcmp_c ( Datum x , Datum y , SortSupport ssup ) ;
static int bttextfastcmp_locale ( Datum x , Datum y , SortSupport ssup ) ;
static int bttextcmp_abbrev ( Datum x , Datum y , SortSupport ssup ) ;
static Datum bttext_abbrev_convert ( Datum original , SortSupport ssup ) ;
static bool bttext_abbrev_abort ( int memtupcount , SortSupport ssup ) ;
# define DatumGetStringP(X) ((string *) PG_DETOAST_DATUM(X))
# define DatumGetStringPP(X) ((string *) PG_DETOAST_DATUM_PACKED(X))
static int varstrfastcmp_c ( Datum x , Datum y , SortSupport ssup ) ;
static int bpcharfastcmp_c ( Datum x , Datum y , SortSupport ssup ) ;
static int varstrfastcmp_locale ( Datum x , Datum y , SortSupport ssup ) ;
static int varstrcmp_abbrev ( Datum x , Datum y , SortSupport ssup ) ;
static Datum varstr_abbrev_convert ( Datum original , SortSupport ssup ) ;
static bool varstr_abbrev_abort ( int memtupcount , SortSupport ssup ) ;
static int32 text_length ( Datum str ) ;
static text * text_catenate ( text * t1 , text * t2 ) ;
static text * text_substring ( Datum str ,
@ -1738,19 +1743,30 @@ bttextsortsupport(PG_FUNCTION_ARGS)
oldcontext = MemoryContextSwitchTo ( ssup - > ssup_cxt ) ;
btsortsupport_worker ( ssup , collid ) ;
/* Use generic string SortSupport */
varstr_sortsupport ( ssup , collid , false ) ;
MemoryContextSwitchTo ( oldcontext ) ;
PG_RETURN_VOID ( ) ;
}
static void
btsortsupport_worker ( SortSupport ssup , Oid collid )
/*
* Generic sortsupport interface for character type ' s operator classes .
* Includes locale support , and support for BpChar semantics ( i . e . removing
* trailing spaces before comparison ) .
*
* Relies on the assumption that text , VarChar , BpChar , and bytea all have the
* same representation . Callers that always use the C collation ( e . g .
* non - collatable type callers like bytea ) may have NUL bytes in their strings ;
* this will not work with any other collation , though .
*/
void
varstr_sortsupport ( SortSupport ssup , Oid collid , bool bpchar )
{
bool abbreviate = ssup - > abbreviate ;
bool collate_c = false ;
TextSortSupport * tss ;
StringSortSupport * s ss;
# ifdef HAVE_LOCALE_T
pg_locale_t locale = 0 ;
@ -1762,20 +1778,25 @@ btsortsupport_worker(SortSupport ssup, Oid collid)
* overhead of a trip through the fmgr layer for every comparison , which
* can be substantial .
*
* Most typically , we ' ll set the comparator to bttextfastcmp_locale , which
* uses strcoll ( ) to perform comparisons . However , if LC_COLLATE = C , we
* can make things quite a bit faster with bttextfastcmp_c , which uses
* memcmp ( ) rather than strcoll ( ) .
* Most typically , we ' ll set the comparator to varstrfastcmp_locale , which
* uses strcoll ( ) to perform comparisons and knows about the special
* requirements of BpChar callers . However , if LC_COLLATE = C , we can make
* things quite a bit faster with varstrfastcmp_c or bpcharfastcmp_c ,
* both of which use memcmp ( ) rather than strcoll ( ) .
*
* There is a further exception on Windows . When the database encoding is
* UTF - 8 and we are not using the C collation , complex hacks are required .
* We don ' t currently have a comparator that handles that case , so we fall
* back on the slow method of having the sort code invoke bttextcmp ( ) via
* the fmgr trampoline .
* back on the slow method of having the sort code invoke bttextcmp ( ) ( in
* the case of text ) via the fmgr trampoline .
*/
if ( lc_collate_is_c ( collid ) )
{
ssup - > comparator = bttextfastcmp_c ;
if ( ! bpchar )
ssup - > comparator = varstrfastcmp_c ;
else
ssup - > comparator = bpcharfastcmp_c ;
collate_c = true ;
}
# ifdef WIN32
@ -1784,7 +1805,7 @@ btsortsupport_worker(SortSupport ssup, Oid collid)
# endif
else
{
ssup - > comparator = bttext fastcmp_locale;
ssup - > comparator = varstr fastcmp_locale;
/*
* We need a collation - sensitive comparison . To make things faster ,
@ -1825,24 +1846,25 @@ btsortsupport_worker(SortSupport ssup, Oid collid)
/*
* If we ' re using abbreviated keys , or if we ' re using a locale - aware
* comparison , we need to initialize a TextSortSupport object . Both cases
* will make use of the temporary buffers we initialize here for scratch
* space , and the abbreviation case requires additional state .
* comparison , we need to initialize a StringSortSupport object . Both
* cases will make use of the temporary buffers we initialize here for
* scratch space ( and to detect requirement for BpChar semantics from
* caller ) , and the abbreviation case requires additional state .
*/
if ( abbreviate | | ! collate_c )
{
t ss = palloc ( sizeof ( Text SortSupport) ) ;
t ss- > buf1 = palloc ( TEXTBUFLEN ) ;
t ss- > buflen1 = TEXTBUFLEN ;
t ss- > buf2 = palloc ( TEXTBUFLEN ) ;
t ss- > buflen2 = TEXTBUFLEN ;
s ss = palloc ( sizeof ( String SortSupport) ) ;
s ss- > buf1 = palloc ( TEXTBUFLEN ) ;
s ss- > buflen1 = TEXTBUFLEN ;
s ss- > buf2 = palloc ( TEXTBUFLEN ) ;
s ss- > buflen2 = TEXTBUFLEN ;
/* Start with invalid values */
t ss- > last_len1 = - 1 ;
t ss- > last_len2 = - 1 ;
s ss- > last_len1 = - 1 ;
s ss- > last_len2 = - 1 ;
/* Initialize */
t ss- > last_returned = 0 ;
s ss- > last_returned = 0 ;
# ifdef HAVE_LOCALE_T
t ss- > locale = locale ;
s ss- > locale = locale ;
# endif
/*
* To avoid somehow confusing a strxfrm ( ) blob and an original string ,
@ -1858,9 +1880,10 @@ btsortsupport_worker(SortSupport ssup, Oid collid)
*
* Arbitrarily initialize cache_blob to true .
*/
tss - > cache_blob = true ;
tss - > collate_c = collate_c ;
ssup - > ssup_extra = tss ;
sss - > cache_blob = true ;
sss - > collate_c = collate_c ;
sss - > bpchar = bpchar ;
ssup - > ssup_extra = sss ;
/*
* If possible , plan to use the abbreviated keys optimization . The
@ -1869,13 +1892,13 @@ btsortsupport_worker(SortSupport ssup, Oid collid)
*/
if ( abbreviate )
{
t ss- > prop_card = 0.20 ;
initHyperLogLog ( & t ss- > abbr_card , 10 ) ;
initHyperLogLog ( & t ss- > full_card , 10 ) ;
s ss- > prop_card = 0.20 ;
initHyperLogLog ( & s ss- > abbr_card , 10 ) ;
initHyperLogLog ( & s ss- > full_card , 10 ) ;
ssup - > abbrev_full_comparator = ssup - > comparator ;
ssup - > comparator = bttext cmp_abbrev;
ssup - > abbrev_converter = bttext _abbrev_convert;
ssup - > abbrev_abort = bttext _abbrev_abort;
ssup - > comparator = varstr cmp_abbrev;
ssup - > abbrev_converter = varstr _abbrev_convert;
ssup - > abbrev_abort = varstr _abbrev_abort;
}
}
}
@ -1884,10 +1907,10 @@ btsortsupport_worker(SortSupport ssup, Oid collid)
* sortsupport comparison func ( for C locale case )
*/
static int
bttext fastcmp_c( Datum x , Datum y , SortSupport ssup )
varstr fastcmp_c( Datum x , Datum y , SortSupport ssup )
{
text * arg1 = DatumGetText PP ( x ) ;
text * arg2 = DatumGetText PP ( y ) ;
string * arg1 = DatumGetString PP ( x ) ;
string * arg2 = DatumGetString PP ( y ) ;
char * a1p ,
* a2p ;
int len1 ,
@ -1913,16 +1936,53 @@ bttextfastcmp_c(Datum x, Datum y, SortSupport ssup)
return result ;
}
/*
* sortsupport comparison func ( for BpChar C locale case )
*
* BpChar outsources its sortsupport to this module . Specialization for the
* varstr_sortsupport BpChar case , modeled on
* internal_bpchar_pattern_compare ( ) .
*/
static int
bpcharfastcmp_c ( Datum x , Datum y , SortSupport ssup )
{
BpChar * arg1 = DatumGetBpCharPP ( x ) ;
BpChar * arg2 = DatumGetBpCharPP ( y ) ;
char * a1p ,
* a2p ;
int len1 ,
len2 ,
result ;
a1p = VARDATA_ANY ( arg1 ) ;
a2p = VARDATA_ANY ( arg2 ) ;
len1 = bpchartruelen ( a1p , VARSIZE_ANY_EXHDR ( arg1 ) ) ;
len2 = bpchartruelen ( a2p , VARSIZE_ANY_EXHDR ( arg2 ) ) ;
result = memcmp ( a1p , a2p , Min ( len1 , len2 ) ) ;
if ( ( result = = 0 ) & & ( len1 ! = len2 ) )
result = ( len1 < len2 ) ? - 1 : 1 ;
/* We can't afford to leak memory here. */
if ( PointerGetDatum ( arg1 ) ! = x )
pfree ( arg1 ) ;
if ( PointerGetDatum ( arg2 ) ! = y )
pfree ( arg2 ) ;
return result ;
}
/*
* sortsupport comparison func ( for locale case )
*/
static int
bttextfastcmp_locale ( Datum x , Datum y , SortSupport ssup )
varstr fastcmp_locale( Datum x , Datum y , SortSupport ssup )
{
text * arg1 = DatumGetTextPP ( x ) ;
text * arg2 = DatumGetTextPP ( y ) ;
string * arg1 = DatumGetString PP ( x ) ;
string * arg2 = DatumGetString PP ( y ) ;
bool arg1_match ;
TextSortSupport * tss = ( TextSortSupport * ) ssup - > ssup_extra ;
StringSortSupport * sss = ( String SortSupport * ) ssup - > ssup_extra ;
/* working state */
char * a1p ,
@ -1944,41 +2004,56 @@ bttextfastcmp_locale(Datum x, Datum y, SortSupport ssup)
* No change in buf1 or buf2 contents , so avoid changing last_len1 or
* last_len2 . Existing contents of buffers might still be used by next
* call .
*
* It ' s fine to allow the comparison of BpChar padding bytes here , even
* though that implies that the memcmp ( ) will usually be performed for
* BpChar callers ( though multibyte characters could still prevent that
* from occurring ) . The memcmp ( ) is still very cheap , and BpChar ' s
* funny semantics have us remove trailing spaces ( not limited to
* padding ) , so we need make no distinction between padding space
* characters and " real " space characters .
*/
result = 0 ;
goto done ;
}
if ( len1 > = tss - > buflen1 )
if ( s ss- > bpchar )
{
pfree ( tss - > buf1 ) ;
tss - > buf len1 = Max ( len1 + 1 , Min ( tss - > buflen1 * 2 , MaxAllocSize ) ) ;
tss - > buf1 = MemoryContextAlloc ( ssup - > ssup_cxt , tss - > buflen1 ) ;
/* Get true number of bytes, ignoring trailing spaces */
len1 = bpchartruelen ( a1p , len1 ) ;
len2 = bpchartruelen ( a2p , len2 ) ;
}
if ( len2 > = tss - > buflen2 )
if ( len1 > = sss - > buflen1 )
{
pfree ( tss - > buf2 ) ;
tss - > buflen2 = Max ( len2 + 1 , Min ( tss - > buflen2 * 2 , MaxAllocSize ) ) ;
tss - > buf2 = MemoryContextAlloc ( ssup - > ssup_cxt , tss - > buflen2 ) ;
pfree ( sss - > buf1 ) ;
sss - > buflen1 = Max ( len1 + 1 , Min ( sss - > buflen1 * 2 , MaxAllocSize ) ) ;
sss - > buf1 = MemoryContextAlloc ( ssup - > ssup_cxt , sss - > buflen1 ) ;
}
if ( len2 > = sss - > buflen2 )
{
pfree ( sss - > buf2 ) ;
sss - > buflen2 = Max ( len2 + 1 , Min ( sss - > buflen2 * 2 , MaxAllocSize ) ) ;
sss - > buf2 = MemoryContextAlloc ( ssup - > ssup_cxt , sss - > buflen2 ) ;
}
/*
* We ' re likely to be asked to compare the same strings repeatedly , and
* memcmp ( ) is so much cheaper than strcoll ( ) that it pays to try to cache
* comparisons , even though in general there is no reason to think that
* that will work out ( every text datum may be unique ) . Caching does not
* that will work out ( every string datum may be unique ) . Caching does not
* slow things down measurably when it doesn ' t work out , and can speed
* things up by rather a lot when it does . In part , this is because the
* memcmp ( ) compares data from cachelines that are needed in L1 cache even
* when the last comparison ' s result cannot be reused .
*/
arg1_match = true ;
if ( len1 ! = t ss- > last_len1 | | memcmp ( t ss- > buf1 , a1p , len1 ) ! = 0 )
if ( len1 ! = s ss- > last_len1 | | memcmp ( s ss- > buf1 , a1p , len1 ) ! = 0 )
{
arg1_match = false ;
memcpy ( t ss- > buf1 , a1p , len1 ) ;
t ss- > buf1 [ len1 ] = ' \0 ' ;
t ss- > last_len1 = len1 ;
memcpy ( s ss- > buf1 , a1p , len1 ) ;
s ss- > buf1 [ len1 ] = ' \0 ' ;
s ss- > last_len1 = len1 ;
}
/*
@ -1987,25 +2062,25 @@ bttextfastcmp_locale(Datum x, Datum y, SortSupport ssup)
* it seems ( at least with moderate to low cardinality sets ) , because
* quicksort compares the same pivot against many values .
*/
if ( len2 ! = t ss- > last_len2 | | memcmp ( t ss- > buf2 , a2p , len2 ) ! = 0 )
if ( len2 ! = s ss- > last_len2 | | memcmp ( s ss- > buf2 , a2p , len2 ) ! = 0 )
{
memcpy ( t ss- > buf2 , a2p , len2 ) ;
t ss- > buf2 [ len2 ] = ' \0 ' ;
t ss- > last_len2 = len2 ;
memcpy ( s ss- > buf2 , a2p , len2 ) ;
s ss- > buf2 [ len2 ] = ' \0 ' ;
s ss- > last_len2 = len2 ;
}
else if ( arg1_match & & ! t ss- > cache_blob )
else if ( arg1_match & & ! s ss- > cache_blob )
{
/* Use result cached following last actual strcoll() call */
result = t ss- > last_returned ;
result = s ss- > last_returned ;
goto done ;
}
# ifdef HAVE_LOCALE_T
if ( t ss- > locale )
result = strcoll_l ( t ss- > buf1 , t ss- > buf2 , t ss- > locale ) ;
if ( s ss- > locale )
result = strcoll_l ( s ss- > buf1 , sss - > buf2 , s ss- > locale ) ;
else
# endif
result = strcoll ( t ss- > buf1 , t ss- > buf2 ) ;
result = strcoll ( s ss- > buf1 , s ss- > buf2 ) ;
/*
* In some locales strcoll ( ) can claim that nonidentical strings are
@ -2013,11 +2088,11 @@ bttextfastcmp_locale(Datum x, Datum y, SortSupport ssup)
* follow Perl ' s lead and sort " equal " strings according to strcmp ( ) .
*/
if ( result = = 0 )
result = strcmp ( t ss- > buf1 , t ss- > buf2 ) ;
result = strcmp ( s ss- > buf1 , s ss- > buf2 ) ;
/* Cache result, perhaps saving an expensive strcoll() call next time */
t ss- > cache_blob = false ;
t ss- > last_returned = result ;
s ss- > cache_blob = false ;
s ss- > last_returned = result ;
done :
/* We can't afford to leak memory here. */
if ( PointerGetDatum ( arg1 ) ! = x )
@ -2032,13 +2107,14 @@ done:
* Abbreviated key comparison func
*/
static int
bttext cmp_abbrev( Datum x , Datum y , SortSupport ssup )
varstr cmp_abbrev( Datum x , Datum y , SortSupport ssup )
{
/*
* When 0 is returned , the core system will call bttextfastcmp_c ( ) or
* bttextfastcmp_locale ( ) . Even a strcmp ( ) on two non - truncated strxfrm ( )
* blobs cannot indicate * equality * authoritatively , for the same reason
* that there is a strcoll ( ) tie - breaker call to strcmp ( ) in varstr_cmp ( ) .
* When 0 is returned , the core system will call varstrfastcmp_c ( )
* ( bpcharfastcmp_c ( ) in BpChar case ) or varstrfastcmp_locale ( ) . Even a
* strcmp ( ) on two non - truncated strxfrm ( ) blobs cannot indicate * equality *
* authoritatively , for the same reason that there is a strcoll ( )
* tie - breaker call to strcmp ( ) in varstr_cmp ( ) .
*/
if ( x > y )
return 1 ;
@ -2049,16 +2125,17 @@ bttextcmp_abbrev(Datum x, Datum y, SortSupport ssup)
}
/*
* Conversion routine for sortsupport . Converts original text to abbreviated
* key representation . Our encoding strategy is simple - - pack the first 8
* bytes of a strxfrm ( ) blob into a Datum ( on little - endian machines , the 8
* bytes are stored in reverse order ) , and treat it as an unsigned integer .
* Conversion routine for sortsupport . Converts original to abbreviated key
* representation . Our encoding strategy is simple - - pack the first 8 bytes
* of a strxfrm ( ) blob into a Datum ( on little - endian machines , the 8 bytes are
* stored in reverse order ) , and treat it as an unsigned integer . When the " C "
* locale is used , or in case of bytea , just memcpy ( ) from original instead .
*/
static Datum
bttext _abbrev_convert( Datum original , SortSupport ssup )
varstr _abbrev_convert( Datum original , SortSupport ssup )
{
TextSortSupport * tss = ( Text SortSupport * ) ssup - > ssup_extra ;
text * authoritative = DatumGetText PP ( original ) ;
StringSortSupport * sss = ( String SortSupport * ) ssup - > ssup_extra ;
string * authoritative = DatumGetString PP ( original ) ;
char * authoritative_data = VARDATA_ANY ( authoritative ) ;
/* working state */
@ -2072,13 +2149,38 @@ bttext_abbrev_convert(Datum original, SortSupport ssup)
memset ( pres , 0 , sizeof ( Datum ) ) ;
len = VARSIZE_ANY_EXHDR ( authoritative ) ;
/* Get number of bytes, ignoring trailing spaces */
if ( sss - > bpchar )
len = bpchartruelen ( authoritative_data , len ) ;
/*
* If we ' re using the C collation , use memcmp ( ) , rather than strxfrm ( ) , to
* abbreviate keys . The full comparator for the C locale is always
* memcmp ( ) , and we can ' t risk having this give a different answer .
* Besides , this should be faster , too .
* memcmp ( ) . It would be incorrect to allow bytea callers ( callers that
* always force the C collation - - bytea isn ' t a collatable type , but this
* approach is convenient ) to use strxfrm ( ) . This is because bytea strings
* may contain NUL bytes . Besides , this should be faster , too .
*
* More generally , it ' s okay that bytea callers can have NUL bytes in
* strings because varstrcmp_abbrev ( ) need not make a distinction between
* terminating NUL bytes , and NUL bytes representing actual NULs in the
* authoritative representation . Hopefully a comparison at or past one
* abbreviated key ' s terminating NUL byte will resolve the comparison
* without consulting the authoritative representation ; specifically , some
* later non - NUL byte in the longer string can resolve the comparison
* against a subsequent terminating NUL in the shorter string . There will
* usually be what is effectively a " length-wise " resolution there and
* then .
*
* If that doesn ' t work out - - if all bytes in the longer string positioned
* at or past the offset of the smaller string ' s ( first ) terminating NUL
* are actually representative of NUL bytes in the authoritative binary
* string ( perhaps with some * terminating * NUL bytes towards the end of the
* longer string iff it happens to still be small ) - - then an authoritative
* tie - breaker will happen , and do the right thing : explicitly consider
* string length .
*/
if ( tss - > collate_c )
if ( s ss- > collate_c )
memcpy ( pres , authoritative_data , Min ( len , sizeof ( Datum ) ) ) ;
else
{
@ -2088,50 +2190,50 @@ bttext_abbrev_convert(Datum original, SortSupport ssup)
* We ' re not using the C collation , so fall back on strxfrm .
*/
/* By convention, we use buffer 1 to store and NUL-terminate text */
if ( len > = t ss- > buflen1 )
/* By convention, we use buffer 1 to store and NUL-terminate */
if ( len > = s ss- > buflen1 )
{
pfree ( t ss- > buf1 ) ;
t ss- > buflen1 = Max ( len + 1 , Min ( t ss- > buflen1 * 2 , MaxAllocSize ) ) ;
t ss- > buf1 = palloc ( t ss- > buflen1 ) ;
pfree ( s ss- > buf1 ) ;
s ss- > buflen1 = Max ( len + 1 , Min ( s ss- > buflen1 * 2 , MaxAllocSize ) ) ;
s ss- > buf1 = palloc ( s ss- > buflen1 ) ;
}
/* Might be able to reuse strxfrm() blob from last call */
if ( t ss- > last_len1 = = len & & t ss- > cache_blob & &
memcmp ( t ss- > buf1 , authoritative_data , len ) = = 0 )
if ( s ss- > last_len1 = = len & & s ss- > cache_blob & &
memcmp ( s ss- > buf1 , authoritative_data , len ) = = 0 )
{
memcpy ( pres , t ss- > buf2 , Min ( sizeof ( Datum ) , t ss- > last_len2 ) ) ;
memcpy ( pres , s ss- > buf2 , Min ( sizeof ( Datum ) , s ss- > last_len2 ) ) ;
/* No change affecting cardinality, so no hashing required */
goto done ;
}
/* Just like strcoll(), strxfrm() expects a NUL-terminated string */
memcpy ( t ss- > buf1 , authoritative_data , len ) ;
t ss- > buf1 [ len ] = ' \0 ' ;
t ss- > last_len1 = len ;
memcpy ( s ss- > buf1 , authoritative_data , len ) ;
s ss- > buf1 [ len ] = ' \0 ' ;
s ss- > last_len1 = len ;
for ( ; ; )
{
# ifdef HAVE_LOCALE_T
if ( t ss- > locale )
bsize = strxfrm_l ( t ss- > buf2 , t ss- > buf1 ,
t ss- > buflen2 , t ss- > locale ) ;
if ( s ss- > locale )
bsize = strxfrm_l ( s ss- > buf2 , s ss- > buf1 ,
s ss- > buflen2 , s ss- > locale ) ;
else
# endif
bsize = strxfrm ( t ss- > buf2 , t ss- > buf1 , t ss- > buflen2 ) ;
bsize = strxfrm ( s ss- > buf2 , sss - > buf1 , s ss- > buflen2 ) ;
t ss- > last_len2 = bsize ;
if ( bsize < t ss- > buflen2 )
s ss- > last_len2 = bsize ;
if ( bsize < s ss- > buflen2 )
break ;
/*
* The C standard states that the contents of the buffer is now
* unspecified . Grow buffer , and retry .
*/
pfree ( t ss- > buf2 ) ;
t ss- > buflen2 = Max ( bsize + 1 ,
Min ( t ss- > buflen2 * 2 , MaxAllocSize ) ) ;
t ss- > buf2 = palloc ( t ss- > buflen2 ) ;
pfree ( s ss- > buf2 ) ;
s ss- > buflen2 = Max ( bsize + 1 ,
Min ( s ss- > buflen2 * 2 , MaxAllocSize ) ) ;
s ss- > buf2 = palloc ( s ss- > buflen2 ) ;
}
/*
@ -2139,8 +2241,11 @@ bttext_abbrev_convert(Datum original, SortSupport ssup)
* strxfrm ( ) blob is itself NUL terminated , leaving no danger of
* misinterpreting any NUL bytes not intended to be interpreted as
* logically representing termination .
*
* ( Actually , even if there were NUL bytes in the blob it would be
* okay . See remarks on bytea case above . )
*/
memcpy ( pres , tss - > buf2 , Min ( sizeof ( Datum ) , bsize ) ) ;
memcpy ( pres , s ss- > buf2 , Min ( sizeof ( Datum ) , bsize ) ) ;
}
/*
@ -2148,7 +2253,7 @@ bttext_abbrev_convert(Datum original, SortSupport ssup)
* authoritative keys using HyperLogLog . Used as cheap insurance against
* the worst case , where we do many string transformations for no saving
* in full strcoll ( ) - based comparisons . These statistics are used by
* bttext _abbrev_abort( ) .
* varstr _abbrev_abort( ) .
*
* First , Hash key proper , or a significant fraction of it . Mix in length
* in order to compensate for cases where differences are past
@ -2160,7 +2265,7 @@ bttext_abbrev_convert(Datum original, SortSupport ssup)
if ( len > PG_CACHE_LINE_SIZE )
hash ^ = DatumGetUInt32 ( hash_uint32 ( ( uint32 ) len ) ) ;
addHyperLogLog ( & t ss- > full_card , hash ) ;
addHyperLogLog ( & s ss- > full_card , hash ) ;
/* Hash abbreviated key */
# if SIZEOF_DATUM == 8
@ -2176,15 +2281,15 @@ bttext_abbrev_convert(Datum original, SortSupport ssup)
hash = DatumGetUInt32 ( hash_uint32 ( ( uint32 ) res ) ) ;
# endif
addHyperLogLog ( & t ss- > abbr_card , hash ) ;
addHyperLogLog ( & s ss- > abbr_card , hash ) ;
/* Cache result, perhaps saving an expensive strxfrm() call next time */
t ss- > cache_blob = true ;
s ss- > cache_blob = true ;
done :
/*
* Byteswap on little - endian machines .
*
* This is needed so that bttext cmp_abbrev( ) ( an unsigned integer 3 - way
* This is needed so that varstr cmp_abbrev( ) ( an unsigned integer 3 - way
* comparator ) works correctly on all platforms . If we didn ' t do this ,
* the comparator would have to call memcmp ( ) with a pair of pointers to
* the first byte of each abbreviated key , which is slower .
@ -2204,9 +2309,9 @@ done:
* should be aborted , based on its projected effectiveness .
*/
static bool
bttext _abbrev_abort( int memtupcount , SortSupport ssup )
varstr _abbrev_abort( int memtupcount , SortSupport ssup )
{
TextSortSupport * tss = ( Text SortSupport * ) ssup - > ssup_extra ;
StringSortSupport * sss = ( String SortSupport * ) ssup - > ssup_extra ;
double abbrev_distinct ,
key_distinct ;
@ -2216,8 +2321,8 @@ bttext_abbrev_abort(int memtupcount, SortSupport ssup)
if ( memtupcount < 100 )
return false ;
abbrev_distinct = estimateHyperLogLog ( & t ss- > abbr_card ) ;
key_distinct = estimateHyperLogLog ( & t ss- > full_card ) ;
abbrev_distinct = estimateHyperLogLog ( & s ss- > abbr_card ) ;
key_distinct = estimateHyperLogLog ( & s ss- > full_card ) ;
/*
* Clamp cardinality estimates to at least one distinct value . While
@ -2240,10 +2345,10 @@ bttext_abbrev_abort(int memtupcount, SortSupport ssup)
{
double norm_abbrev_card = abbrev_distinct / ( double ) memtupcount ;
elog ( LOG , " bttext _abbrev: abbrev_distinct after %d: %f "
elog ( LOG , " varstr _abbrev: abbrev_distinct after %d: %f "
" (key_distinct: %f, norm_abbrev_card: %f, prop_card: %f) " ,
memtupcount , abbrev_distinct , key_distinct , norm_abbrev_card ,
t ss- > prop_card ) ;
s ss- > prop_card ) ;
}
# endif
@ -2263,7 +2368,7 @@ bttext_abbrev_abort(int memtupcount, SortSupport ssup)
* abbreviated comparison with a cheap memcmp ( ) - based authoritative
* resolution are equivalent .
*/
if ( abbrev_distinct > key_distinct * t ss- > prop_card )
if ( abbrev_distinct > key_distinct * s ss- > prop_card )
{
/*
* When we have exceeded 10 , 000 tuples , decay required cardinality
@ -2291,7 +2396,7 @@ bttext_abbrev_abort(int memtupcount, SortSupport ssup)
* apparent it ' s probably not worth aborting .
*/
if ( memtupcount > 10000 )
t ss- > prop_card * = 0.65 ;
s ss- > prop_card * = 0.65 ;
return false ;
}
@ -2309,9 +2414,9 @@ bttext_abbrev_abort(int memtupcount, SortSupport ssup)
*/
# ifdef TRACE_SORT
if ( trace_sort )
elog ( LOG , " bttext _abbrev: aborted abbreviation at %d "
elog ( LOG , " varstr _abbrev: aborted abbreviation at %d "
" (abbrev_distinct: %f, key_distinct: %f, prop_card: %f) " ,
memtupcount , abbrev_distinct , key_distinct , t ss- > prop_card ) ;
memtupcount , abbrev_distinct , key_distinct , s ss- > prop_card ) ;
# endif
return true ;
@ -2345,8 +2450,9 @@ text_smaller(PG_FUNCTION_ARGS)
/*
* The following operators support character - by - character comparison
* of text datums , to allow building indexes suitable for LIKE clauses .
* Note that the regular texteq / textne comparison operators are assumed
* to be compatible with these !
* Note that the regular texteq / textne comparison operators , and regular
* support functions 1 and 2 with " C " collation are assumed to be
* compatible with these !
*/
static int
@ -2451,6 +2557,23 @@ bttext_pattern_cmp(PG_FUNCTION_ARGS)
}
Datum
bttext_pattern_sortsupport ( PG_FUNCTION_ARGS )
{
SortSupport ssup = ( SortSupport ) PG_GETARG_POINTER ( 0 ) ;
MemoryContext oldcontext ;
oldcontext = MemoryContextSwitchTo ( ssup - > ssup_cxt ) ;
/* Use generic string SortSupport, forcing "C" collation */
varstr_sortsupport ( ssup , C_COLLATION_OID , false ) ;
MemoryContextSwitchTo ( oldcontext ) ;
PG_RETURN_VOID ( ) ;
}
/*-------------------------------------------------------------
* byteaoctetlen
*
@ -3375,6 +3498,22 @@ byteacmp(PG_FUNCTION_ARGS)
PG_RETURN_INT32 ( cmp ) ;
}
Datum
bytea_sortsupport ( PG_FUNCTION_ARGS )
{
SortSupport ssup = ( SortSupport ) PG_GETARG_POINTER ( 0 ) ;
MemoryContext oldcontext ;
oldcontext = MemoryContextSwitchTo ( ssup - > ssup_cxt ) ;
/* Use generic string SortSupport, forcing "C" collation */
varstr_sortsupport ( ssup , C_COLLATION_OID , false ) ;
MemoryContextSwitchTo ( oldcontext ) ;
PG_RETURN_VOID ( ) ;
}
/*
* appendStringInfoText
*