@ -224,6 +224,10 @@ typedef struct
* of aliases to columns of the right input . Thus , positions in the printable
* column alias list are not necessarily one - for - one with varattnos of the
* JOIN , so we need a separate new_colnames [ ] array for printing purposes .
*
* Finally , when dealing with wide tables we risk O ( N ^ 2 ) costs in assigning
* non - duplicate column names . We ameliorate that by using a hash table that
* holds all the strings appearing in colnames , new_colnames , and parentUsing .
*/
typedef struct
{
@ -291,6 +295,15 @@ typedef struct
int * leftattnos ; /* left-child varattnos of join cols, or 0 */
int * rightattnos ; /* right-child varattnos of join cols, or 0 */
List * usingNames ; /* names assigned to merged columns */
/*
* Hash table holding copies of all the strings appearing in this struct ' s
* colnames , new_colnames , and parentUsing . We use a hash table only for
* sufficiently wide relations , and only during the colname - assignment
* functions set_relation_column_names and set_join_column_names ;
* otherwise , names_hash is NULL .
*/
HTAB * names_hash ; /* entries are just strings */
} deparse_columns ;
/* This macro is analogous to rt_fetch(), but for deparse_columns structs */
@ -376,6 +389,9 @@ static bool colname_is_unique(const char *colname, deparse_namespace *dpns,
static char * make_colname_unique ( char * colname , deparse_namespace * dpns ,
deparse_columns * colinfo ) ;
static void expand_colnames_array_to ( deparse_columns * colinfo , int n ) ;
static void build_colinfo_names_hash ( deparse_columns * colinfo ) ;
static void add_to_names_hash ( deparse_columns * colinfo , const char * name ) ;
static void destroy_colinfo_names_hash ( deparse_columns * colinfo ) ;
static void identify_join_columns ( JoinExpr * j , RangeTblEntry * jrte ,
deparse_columns * colinfo ) ;
static char * get_rtable_name ( int rtindex , deparse_context * context ) ;
@ -4133,6 +4149,10 @@ has_dangerous_join_using(deparse_namespace *dpns, Node *jtnode)
*
* parentUsing is a list of all USING aliases assigned in parent joins of
* the current jointree node . ( The passed - in list must not be modified . )
*
* Note that we do not use per - deparse_columns hash tables in this function .
* The number of names that need to be assigned should be small enough that
* we don ' t need to trouble with that .
*/
static void
set_using_names ( deparse_namespace * dpns , Node * jtnode , List * parentUsing )
@ -4408,6 +4428,9 @@ set_relation_column_names(deparse_namespace *dpns, RangeTblEntry *rte,
colinfo - > new_colnames = ( char * * ) palloc ( ncolumns * sizeof ( char * ) ) ;
colinfo - > is_new_col = ( bool * ) palloc ( ncolumns * sizeof ( bool ) ) ;
/* If the RTE is wide enough, use a hash table to avoid O(N^2) costs */
build_colinfo_names_hash ( colinfo ) ;
/*
* Scan the columns , select a unique alias for each one , and store it in
* colinfo - > colnames and colinfo - > new_colnames . The former array has NULL
@ -4443,6 +4466,7 @@ set_relation_column_names(deparse_namespace *dpns, RangeTblEntry *rte,
colname = make_colname_unique ( colname , dpns , colinfo ) ;
colinfo - > colnames [ i ] = colname ;
add_to_names_hash ( colinfo , colname ) ;
}
/* Put names of non-dropped columns in new_colnames[] too */
@ -4456,6 +4480,9 @@ set_relation_column_names(deparse_namespace *dpns, RangeTblEntry *rte,
changed_any = true ;
}
/* We're now done needing the colinfo's names_hash */
destroy_colinfo_names_hash ( colinfo ) ;
/*
* Set correct length for new_colnames [ ] array . ( Note : if columns have
* been added , colinfo - > num_cols includes them , which is not really quite
@ -4526,6 +4553,9 @@ set_join_column_names(deparse_namespace *dpns, RangeTblEntry *rte,
expand_colnames_array_to ( colinfo , noldcolumns ) ;
Assert ( colinfo - > num_cols = = noldcolumns ) ;
/* If the RTE is wide enough, use a hash table to avoid O(N^2) costs */
build_colinfo_names_hash ( colinfo ) ;
/*
* Scan the join output columns , select an alias for each one , and store
* it in colinfo - > colnames . If there are USING columns , set_using_names ( )
@ -4563,6 +4593,7 @@ set_join_column_names(deparse_namespace *dpns, RangeTblEntry *rte,
if ( rte - > alias = = NULL )
{
colinfo - > colnames [ i ] = real_colname ;
add_to_names_hash ( colinfo , real_colname ) ;
continue ;
}
@ -4579,6 +4610,7 @@ set_join_column_names(deparse_namespace *dpns, RangeTblEntry *rte,
colname = make_colname_unique ( colname , dpns , colinfo ) ;
colinfo - > colnames [ i ] = colname ;
add_to_names_hash ( colinfo , colname ) ;
}
/* Remember if any assigned aliases differ from "real" name */
@ -4677,6 +4709,7 @@ set_join_column_names(deparse_namespace *dpns, RangeTblEntry *rte,
}
else
colinfo - > new_colnames [ j ] = child_colname ;
add_to_names_hash ( colinfo , colinfo - > new_colnames [ j ] ) ;
}
colinfo - > is_new_col [ j ] = leftcolinfo - > is_new_col [ jc ] ;
@ -4726,6 +4759,7 @@ set_join_column_names(deparse_namespace *dpns, RangeTblEntry *rte,
}
else
colinfo - > new_colnames [ j ] = child_colname ;
add_to_names_hash ( colinfo , colinfo - > new_colnames [ j ] ) ;
}
colinfo - > is_new_col [ j ] = rightcolinfo - > is_new_col [ jc ] ;
@ -4740,6 +4774,9 @@ set_join_column_names(deparse_namespace *dpns, RangeTblEntry *rte,
Assert ( j = = nnewcolumns ) ;
# endif
/* We're now done needing the colinfo's names_hash */
destroy_colinfo_names_hash ( colinfo ) ;
/*
* For a named join , print column aliases if we changed any from the child
* names . Unnamed joins cannot print aliases .
@ -4762,38 +4799,59 @@ colname_is_unique(const char *colname, deparse_namespace *dpns,
int i ;
ListCell * lc ;
/* Check against already-assigned column aliases within RTE */
for ( i = 0 ; i < colinfo - > num_cols ; i + + )
{
char * oldname = colinfo - > colnames [ i ] ;
if ( oldname & & strcmp ( oldname , colname ) = = 0 )
return false ;
}
/*
* If we ' re building a new_colnames array , check that too ( this will b e
* partially but not completely redundant with the previous checks )
* If we have a hash table , consult that instead of linearly scanning the
* colinfo ' s strings .
*/
for ( i = 0 ; i < colinfo - > num_new_cols ; i + + )
if ( colinfo - > names_hash )
{
char * oldname = colinfo - > new_colnames [ i ] ;
if ( oldname & & strcmp ( oldname , colname ) = = 0 )
if ( hash_search ( colinfo - > names_hash ,
colname ,
HASH_FIND ,
NULL ) ! = NULL )
return false ;
}
/* Also check against USING-column names that must be globally unique */
foreach ( lc , dpns - > using_names )
else
{
char * oldname = ( char * ) lfirst ( lc ) ;
/* Check against already-assigned column aliases within RTE */
for ( i = 0 ; i < colinfo - > num_cols ; i + + )
{
char * oldname = colinfo - > colnames [ i ] ;
if ( strcmp ( oldname , colname ) = = 0 )
return false ;
if ( oldname & & strcmp ( oldname , colname ) = = 0 )
return false ;
}
/*
* If we ' re building a new_colnames array , check that too ( this will
* be partially but not completely redundant with the previous checks )
*/
for ( i = 0 ; i < colinfo - > num_new_cols ; i + + )
{
char * oldname = colinfo - > new_colnames [ i ] ;
if ( oldname & & strcmp ( oldname , colname ) = = 0 )
return false ;
}
/*
* Also check against names already assigned for parent - join USING
* cols
*/
foreach ( lc , colinfo - > parentUsing )
{
char * oldname = ( char * ) lfirst ( lc ) ;
if ( strcmp ( oldname , colname ) = = 0 )
return false ;
}
}
/* Also check against names already assigned for parent-join USING cols */
foreach ( lc , colinfo - > parentUsing )
/*
* Also check against USING - column names that must be globally unique .
* These are not hashed , but there should be few of them .
*/
foreach ( lc , dpns - > using_names )
{
char * oldname = ( char * ) lfirst ( lc ) ;
@ -4861,6 +4919,90 @@ expand_colnames_array_to(deparse_columns *colinfo, int n)
}
}
/*
* build_colinfo_names_hash : optionally construct a hash table for colinfo
*/
static void
build_colinfo_names_hash ( deparse_columns * colinfo )
{
HASHCTL hash_ctl ;
int i ;
ListCell * lc ;
/*
* Use a hash table only for RTEs with at least 32 columns . ( The cutoff
* is somewhat arbitrary , but let ' s choose it so that this code does get
* exercised in the regression tests . )
*/
if ( colinfo - > num_cols < 32 )
return ;
/*
* Set up the hash table . The entries are just strings with no other
* payload .
*/
hash_ctl . keysize = NAMEDATALEN ;
hash_ctl . entrysize = NAMEDATALEN ;
hash_ctl . hcxt = CurrentMemoryContext ;
colinfo - > names_hash = hash_create ( " deparse_columns names " ,
colinfo - > num_cols + colinfo - > num_new_cols ,
& hash_ctl ,
HASH_ELEM | HASH_STRINGS | HASH_CONTEXT ) ;
/*
* Preload the hash table with any names already present ( these would have
* come from set_using_names ) .
*/
for ( i = 0 ; i < colinfo - > num_cols ; i + + )
{
char * oldname = colinfo - > colnames [ i ] ;
if ( oldname )
add_to_names_hash ( colinfo , oldname ) ;
}
for ( i = 0 ; i < colinfo - > num_new_cols ; i + + )
{
char * oldname = colinfo - > new_colnames [ i ] ;
if ( oldname )
add_to_names_hash ( colinfo , oldname ) ;
}
foreach ( lc , colinfo - > parentUsing )
{
char * oldname = ( char * ) lfirst ( lc ) ;
add_to_names_hash ( colinfo , oldname ) ;
}
}
/*
* add_to_names_hash : add a string to the names_hash , if we ' re using one
*/
static void
add_to_names_hash ( deparse_columns * colinfo , const char * name )
{
if ( colinfo - > names_hash )
( void ) hash_search ( colinfo - > names_hash ,
name ,
HASH_ENTER ,
NULL ) ;
}
/*
* destroy_colinfo_names_hash : destroy hash table when done with it
*/
static void
destroy_colinfo_names_hash ( deparse_columns * colinfo )
{
if ( colinfo - > names_hash )
{
hash_destroy ( colinfo - > names_hash ) ;
colinfo - > names_hash = NULL ;
}
}
/*
* identify_join_columns : figure out where columns of a join come from
*