@ -28,7 +28,9 @@
# include "utils/builtins.h"
# include "utils/bytea.h"
# include "utils/lsyscache.h"
# include "utils/memutils.h"
# include "utils/pg_locale.h"
# include "utils/sortsupport.h"
/* GUC variable */
@ -50,12 +52,32 @@ typedef struct
int skiptable [ 256 ] ; /* skip distance for given mismatched char */
} TextPositionState ;
typedef struct
{
char * buf1 ; /* 1st string */
char * buf2 ; /* 2nd string */
int buflen1 ;
int buflen2 ;
# ifdef HAVE_LOCALE_T
pg_locale_t locale ;
# endif
} TextSortSupport ;
/*
* This should be large enough that most strings will fit , but small enough
* that we feel comfortable putting it on the stack
*/
# define TEXTBUFLEN 1024
# define DatumGetUnknownP(X) ((unknown *) PG_DETOAST_DATUM(X))
# define DatumGetUnknownPCopy(X) ((unknown *) PG_DETOAST_DATUM_COPY(X))
# define PG_GETARG_UNKNOWN_P(n) DatumGetUnknownP(PG_GETARG_DATUM(n))
# define PG_GETARG_UNKNOWN_P_COPY(n) DatumGetUnknownPCopy(PG_GETARG_DATUM(n))
# define PG_RETURN_UNKNOWN_P(x) PG_RETURN_POINTER(x)
static void btsortsupport_worker ( SortSupport ssup , Oid collid ) ;
static int bttextfastcmp_c ( Datum x , Datum y , SortSupport ssup ) ;
static int bttextfastcmp_locale ( Datum x , Datum y , SortSupport ssup ) ;
static int32 text_length ( Datum str ) ;
static text * text_catenate ( text * t1 , text * t2 ) ;
static text * text_substring ( Datum str ,
@ -1356,10 +1378,8 @@ varstr_cmp(char *arg1, int len1, char *arg2, int len2, Oid collid)
}
else
{
# define STACKBUFLEN 1024
char a1buf [ STACKBUFLEN ] ;
char a2buf [ STACKBUFLEN ] ;
char a1buf [ TEXTBUFLEN ] ;
char a2buf [ TEXTBUFLEN ] ;
char * a1p ,
* a2p ;
@ -1393,24 +1413,24 @@ varstr_cmp(char *arg1, int len1, char *arg2, int len2, Oid collid)
int a2len ;
int r ;
if ( len1 > = STACK BUFLEN / 2 )
if ( len1 > = TEXT BUFLEN / 2 )
{
a1len = len1 * 2 + 2 ;
a1p = palloc ( a1len ) ;
}
else
{
a1len = STACK BUFLEN;
a1len = TEXT BUFLEN;
a1p = a1buf ;
}
if ( len2 > = STACK BUFLEN / 2 )
if ( len2 > = TEXT BUFLEN / 2 )
{
a2len = len2 * 2 + 2 ;
a2p = palloc ( a2len ) ;
}
else
{
a2len = STACK BUFLEN;
a2len = TEXT BUFLEN;
a2p = a2buf ;
}
@ -1475,11 +1495,11 @@ varstr_cmp(char *arg1, int len1, char *arg2, int len2, Oid collid)
}
# endif /* WIN32 */
if ( len1 > = STACK BUFLEN)
if ( len1 > = TEXT BUFLEN)
a1p = ( char * ) palloc ( len1 + 1 ) ;
else
a1p = a1buf ;
if ( len2 > = STACK BUFLEN)
if ( len2 > = TEXT BUFLEN)
a2p = ( char * ) palloc ( len2 + 1 ) ;
else
a2p = a2buf ;
@ -1683,6 +1703,186 @@ bttextcmp(PG_FUNCTION_ARGS)
PG_RETURN_INT32 ( result ) ;
}
Datum
bttextsortsupport ( PG_FUNCTION_ARGS )
{
SortSupport ssup = ( SortSupport ) PG_GETARG_POINTER ( 0 ) ;
Oid collid = ssup - > ssup_collation ;
MemoryContext oldcontext ;
oldcontext = MemoryContextSwitchTo ( ssup - > ssup_cxt ) ;
btsortsupport_worker ( ssup , collid ) ;
MemoryContextSwitchTo ( oldcontext ) ;
PG_RETURN_VOID ( ) ;
}
static void
btsortsupport_worker ( SortSupport ssup , Oid collid )
{
TextSortSupport * tss ;
/*
* If LC_COLLATE = C , we can make things quite a bit faster by using
* memcmp ( ) rather than strcoll ( ) . To minimize the per - comparison
* overhead , we make this decision just once for the whole sort .
*/
if ( lc_collate_is_c ( collid ) )
{
ssup - > comparator = bttextfastcmp_c ;
return ;
}
/*
* WIN32 requires complex hacks when the database encoding is UTF - 8 ( except
* when using the " C " collation ) . For now , we don ' t optimize that case .
*/
# ifdef WIN32
if ( GetDatabaseEncoding ( ) = = PG_UTF8 )
return ;
# endif
/*
* We may need a collation - sensitive comparison . To make things faster ,
* we ' ll figure out the collation based on the locale id and cache the
* result . Also , since strxfrm ( ) / strcoll ( ) require NUL - terminated inputs ,
* prepare one or two palloc ' d buffers to use as temporary workspace . In
* the ad - hoc comparison case we only use palloc ' d buffers when we need
* more space than we ' re comfortable allocating on the stack , but here we
* can keep the buffers around for the whole sort , so it makes sense to
* allocate them once and use them unconditionally .
*/
tss = palloc ( sizeof ( TextSortSupport ) ) ;
# ifdef HAVE_LOCALE_T
tss - > locale = 0 ;
# endif
if ( collid ! = DEFAULT_COLLATION_OID )
{
if ( ! OidIsValid ( collid ) )
{
/*
* This typically means that the parser could not resolve a
* conflict of implicit collations , so report it that way .
*/
ereport ( ERROR ,
( errcode ( ERRCODE_INDETERMINATE_COLLATION ) ,
errmsg ( " could not determine which collation to use for string comparison " ) ,
errhint ( " Use the COLLATE clause to set the collation explicitly. " ) ) ) ;
}
# ifdef HAVE_LOCALE_T
tss - > locale = pg_newlocale_from_collation ( collid ) ;
# endif
}
tss - > buf1 = palloc ( TEXTBUFLEN ) ;
tss - > buflen1 = TEXTBUFLEN ;
tss - > buf2 = palloc ( TEXTBUFLEN ) ;
tss - > buflen2 = TEXTBUFLEN ;
ssup - > ssup_extra = tss ;
ssup - > comparator = bttextfastcmp_locale ;
}
/*
* sortsupport comparison func ( for C locale case )
*/
static int
bttextfastcmp_c ( Datum x , Datum y , SortSupport ssup )
{
text * arg1 = DatumGetTextPP ( x ) ;
text * arg2 = DatumGetTextPP ( y ) ;
char * a1p ,
* a2p ;
int len1 ,
len2 ,
result ;
a1p = VARDATA_ANY ( arg1 ) ;
a2p = VARDATA_ANY ( arg2 ) ;
len1 = VARSIZE_ANY_EXHDR ( arg1 ) ;
len2 = VARSIZE_ANY_EXHDR ( arg2 ) ;
result = memcmp ( a1p , a2p , Min ( len1 , len2 ) ) ;
if ( ( result = = 0 ) & & ( len1 ! = len2 ) )
result = ( len1 < len2 ) ? - 1 : 1 ;
/* We can't afford to leak memory here. */
if ( PointerGetDatum ( arg1 ) ! = x )
pfree ( arg1 ) ;
if ( PointerGetDatum ( arg2 ) ! = y )
pfree ( arg2 ) ;
return result ;
}
/*
* sortsupport comparison func ( for locale case )
*/
static int
bttextfastcmp_locale ( Datum x , Datum y , SortSupport ssup )
{
text * arg1 = DatumGetTextPP ( x ) ;
text * arg2 = DatumGetTextPP ( y ) ;
TextSortSupport * tss = ( TextSortSupport * ) ssup - > ssup_extra ;
/* working state */
char * a1p ,
* a2p ;
int len1 ,
len2 ,
result ;
a1p = VARDATA_ANY ( arg1 ) ;
a2p = VARDATA_ANY ( arg2 ) ;
len1 = VARSIZE_ANY_EXHDR ( arg1 ) ;
len2 = VARSIZE_ANY_EXHDR ( arg2 ) ;
if ( len1 > = tss - > buflen1 )
{
pfree ( tss - > buf1 ) ;
tss - > buflen1 = Max ( len1 + 1 , Min ( tss - > buflen1 * 2 , MaxAllocSize ) ) ;
tss - > buf1 = MemoryContextAlloc ( ssup - > ssup_cxt , tss - > buflen1 ) ;
}
if ( len2 > = tss - > buflen2 )
{
pfree ( tss - > buf2 ) ;
tss - > buflen1 = Max ( len2 + 1 , Min ( tss - > buflen2 * 2 , MaxAllocSize ) ) ;
tss - > buf2 = MemoryContextAlloc ( ssup - > ssup_cxt , tss - > buflen2 ) ;
}
memcpy ( tss - > buf1 , a1p , len1 ) ;
tss - > buf1 [ len1 ] = ' \0 ' ;
memcpy ( tss - > buf2 , a2p , len2 ) ;
tss - > buf2 [ len2 ] = ' \0 ' ;
# ifdef HAVE_LOCALE_T
if ( tss - > locale )
result = strcoll_l ( tss - > buf1 , tss - > buf2 , tss - > locale ) ;
else
# endif
result = strcoll ( tss - > buf1 , tss - > buf2 ) ;
/*
* In some locales strcoll ( ) can claim that nonidentical strings are equal .
* Believing that would be bad news for a number of reasons , so we follow
* Perl ' s lead and sort " equal " strings according to strcmp ( ) .
*/
if ( result = = 0 )
result = strcmp ( tss - > buf1 , tss - > buf2 ) ;
/* We can't afford to leak memory here. */
if ( PointerGetDatum ( arg1 ) ! = x )
pfree ( arg1 ) ;
if ( PointerGetDatum ( arg2 ) ! = y )
pfree ( arg2 ) ;
return result ;
}
Datum
text_larger ( PG_FUNCTION_ARGS )