@ -32,6 +32,20 @@
# include "optimizer/tlist.h"
# include "utils/lsyscache.h"
/*
* One element of the list passed to query_is_distinct_for_with_collations ( ) .
* Each entry names a subquery output column that the caller needs to be
* distinct over , plus the upper - level equality operator and its input
* collation , so that the subquery ' s own DISTINCT / GROUP BY / set - op clauses can
* be compared for compatibility .
*/
typedef struct DistinctColInfo
{
int colno ; /* subquery output column resno */
Oid opid ; /* upper-level equality operator */
Oid collid ; /* input collation of opid */
} DistinctColInfo ;
/* local functions */
static bool join_is_removable ( PlannerInfo * root , SpecialJoinInfo * sjinfo ) ;
static void remove_rel_from_query ( PlannerInfo * root , int relid ,
@ -40,7 +54,9 @@ static List *remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved);
static bool rel_supports_distinctness ( PlannerInfo * root , RelOptInfo * rel ) ;
static bool rel_is_distinct_for ( PlannerInfo * root , RelOptInfo * rel ,
List * clause_list ) ;
static Oid distinct_col_search ( int colno , List * colnos , List * opids ) ;
static bool query_is_distinct_for_with_collations ( Query * query ,
List * distinct_cols ) ;
static DistinctColInfo * distinct_col_search ( int colno , List * distinct_cols ) ;
static bool is_innerrel_unique_for ( PlannerInfo * root ,
Relids joinrelids ,
Relids outerrelids ,
@ -660,15 +676,17 @@ rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *clause_list)
{
Index relid = rel - > relid ;
Query * subquery = root - > simple_rte_array [ relid ] - > subquery ;
List * colnos = NIL ;
List * opids = NIL ;
List * distinct_cols = NIL ;
ListCell * l ;
/*
* Build the argument lists for query_is_distinct_for : a list of
* output column numbers that the query needs to be distinct over , and
* a list of equality operators that the output columns need to be
* distinct according to .
* Build the argument list for query_is_distinct_for_with_collations :
* a list of DistinctColInfo entries , each holding an output column
* number that the query needs to be distinct over , the equality
* operator that the column needs to be distinct according to , and
* that operator ' s input collation . The collation matters because the
* subquery ' s own DISTINCT / GROUP BY / set - op proves uniqueness under
* its own collation , which need not agree with the operator ' s .
*
* ( XXX we are not considering restriction clauses attached to the
* subquery ; is that worth doing ? )
@ -676,18 +694,18 @@ rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *clause_list)
foreach ( l , clause_list )
{
RestrictInfo * rinfo = lfirst_node ( RestrictInfo , l ) ;
Oid op ;
OpExpr * opexpr ;
Var * var ;
DistinctColInfo * dcinfo ;
/*
* Get the equality operator we need uniqueness according to .
* ( This might be a cross - type operator and thus not exactly the
* same operator the subquery would consider ; that ' s all right
* since query_is_distinct_for can resolve such cases . ) The
* caller ' s mergejoinability test should have selected only
* OpExprs .
* The caller ' s mergejoinability test should have selected only
* OpExprs . The operator might be a cross - type operator and thus
* not exactly the same operator the subquery would consider ;
* that ' s all right since query_is_distinct_for_with_collations
* can resolve such cases .
*/
op = castNode ( OpExpr , rinfo - > clause ) - > opno ;
opexpr = castNode ( OpExpr , rinfo - > clause ) ;
/* caller identified the inner side for us */
if ( rinfo - > outer_is_left )
@ -711,11 +729,14 @@ rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *clause_list)
var - > varno ! = relid | | var - > varlevelsup ! = 0 )
continue ;
colnos = lappend_int ( colnos , var - > varattno ) ;
opids = lappend_oid ( opids , op ) ;
dcinfo = palloc ( sizeof ( DistinctColInfo ) ) ;
dcinfo - > colno = var - > varattno ;
dcinfo - > opid = opexpr - > opno ;
dcinfo - > collid = opexpr - > inputcollid ;
distinct_cols = lappend ( distinct_cols , dcinfo ) ;
}
if ( query_is_distinct_for ( subquery , colnos , opids ) )
if ( query_is_distinct_for_with_collations ( subquery , distinct_col s) )
return true ;
}
return false ;
@ -753,31 +774,71 @@ query_supports_distinctness(Query *query)
}
/*
* query_is_distinct_for - does query never return duplicates of the
* specified columns ?
* query_is_distinct_for - ABI - preserving wrapper around
* query_is_distinct_for_with_collations ( ) .
*
* query is a not - yet - planned subquery ( in current usage , it ' s always from
* a subquery RTE , which the planner avoids scribbling on ) .
*
* colnos is an integer list of output column numbers ( resno ' s ) . We are
* interested in whether rows consisting of just these columns are certain
* to be distinct . " Distinctness " is defined according to whether the
* corresponding upper - level equality operators listed in opids would think
* the values are distinct . ( Note : the opids entries could be cross - type
* operators , and thus not exactly the equality operators that the subquery
* would use itself . We use equality_ops_are_compatible ( ) to check
* compatibility . That looks at btree or hash opfamily membership , and so
* should give trustworthy answers for all operators that we might need
* to deal with here . )
* The original signature took parallel colnos / opids lists and did not
* consider collations . External callers built against earlier minor
* releases continue to call it with the historical ( collation - blind )
* semantics ; we forward with InvalidOid collations , which makes the
* collation check a no - op ( see collations_agree_on_equality ( ) ) .
*/
bool
query_is_distinct_for ( Query * query , List * colnos , List * opids )
{
ListCell * l ;
Oid opid ;
List * distinct_cols = NIL ;
ListCell * lc1 ;
ListCell * lc2 ;
Assert ( list_length ( colnos ) = = list_length ( opids ) ) ;
forboth ( lc1 , colnos , lc2 , opids )
{
DistinctColInfo * dcinfo = palloc ( sizeof ( DistinctColInfo ) ) ;
dcinfo - > colno = lfirst_int ( lc1 ) ;
dcinfo - > opid = lfirst_oid ( lc2 ) ;
dcinfo - > collid = InvalidOid ;
distinct_cols = lappend ( distinct_cols , dcinfo ) ;
}
return query_is_distinct_for_with_collations ( query , distinct_cols ) ;
}
/*
* query_is_distinct_for_with_collations - does query never return duplicates
* of the specified columns ?
*
* query is a not - yet - planned subquery ( in current usage , it ' s always from
* a subquery RTE , which the planner avoids scribbling on ) .
*
* distinct_cols is a list of DistinctColInfo , one per requested output column .
* Each entry names the subquery output column number we want distinct , the
* upper - level equality operator we ' ll compare values with , and that operator ' s
* input collation . We are interested in whether rows consisting of just these
* columns are certain to be distinct .
*
* " Distinctness " is defined according to whether the corresponding upper - level
* equality operators would think the values are distinct . ( Note : each opid
* could be a cross - type operator , and thus not exactly the equality operator
* that the subquery would use itself . We use equality_ops_are_compatible ( ) to
* check compatibility . That looks at opfamily membership for index AMs that
* have declared that they support consistent equality semantics within an
* opfamily , and so should give trustworthy answers for all operators that we
* might need to deal with here . )
*
* The collid must also agree on equality with the collation the subquery ' s own
* DISTINCT / GROUP BY / set - op uses to deduplicate the column , else the subquery ' s
* distinctness does not carry over to the caller ' s equality semantics . Two
* collations agree on equality if they match or if both are deterministic ( in
* which case both reduce equality to byte - equality ; see CREATE COLLATION ) .
*/
static bool
query_is_distinct_for_with_collations ( Query * query , List * distinct_cols )
{
ListCell * l ;
DistinctColInfo * dcinfo ;
/*
* DISTINCT ( including DISTINCT ON ) guarantees uniqueness if all the
* columns in the DISTINCT clause appear in colnos and operator semantics
@ -792,9 +853,11 @@ query_is_distinct_for(Query *query, List *colnos, List *opids)
TargetEntry * tle = get_sortgroupclause_tle ( sgc ,
query - > targetList ) ;
opid = distinct_col_search ( tle - > resno , colnos , opids ) ;
if ( ! OidIsValid ( opid ) | |
! equality_ops_are_compatible ( opid , sgc - > eqop ) )
dcinfo = distinct_col_search ( tle - > resno , distinct_cols ) ;
if ( dcinfo = = NULL | |
! equality_ops_are_compatible ( dcinfo - > opid , sgc - > eqop ) | |
! collations_agree_on_equality ( dcinfo - > collid ,
exprCollation ( ( Node * ) tle - > expr ) ) )
break ; /* exit early if no match */
}
if ( l = = NULL ) /* had matches for all? */
@ -823,9 +886,11 @@ query_is_distinct_for(Query *query, List *colnos, List *opids)
TargetEntry * tle = get_sortgroupclause_tle ( sgc ,
query - > targetList ) ;
opid = distinct_col_search ( tle - > resno , colnos , opids ) ;
if ( ! OidIsValid ( opid ) | |
! equality_ops_are_compatible ( opid , sgc - > eqop ) )
dcinfo = distinct_col_search ( tle - > resno , distinct_cols ) ;
if ( dcinfo = = NULL | |
! equality_ops_are_compatible ( dcinfo - > opid , sgc - > eqop ) | |
! collations_agree_on_equality ( dcinfo - > collid ,
exprCollation ( ( Node * ) tle - > expr ) ) )
break ; /* exit early if no match */
}
if ( l = = NULL ) /* had matches for all? */
@ -891,9 +956,11 @@ query_is_distinct_for(Query *query, List *colnos, List *opids)
sgc = ( SortGroupClause * ) lfirst ( lg ) ;
lg = lnext ( topop - > groupClauses , lg ) ;
opid = distinct_col_search ( tle - > resno , colnos , opids ) ;
if ( ! OidIsValid ( opid ) | |
! equality_ops_are_compatible ( opid , sgc - > eqop ) )
dcinfo = distinct_col_search ( tle - > resno , distinct_cols ) ;
if ( dcinfo = = NULL | |
! equality_ops_are_compatible ( dcinfo - > opid , sgc - > eqop ) | |
! collations_agree_on_equality ( dcinfo - > collid ,
exprCollation ( ( Node * ) tle - > expr ) ) )
break ; /* exit early if no match */
}
if ( l = = NULL ) /* had matches for all? */
@ -913,24 +980,27 @@ query_is_distinct_for(Query *query, List *colnos, List *opids)
}
/*
* distinct_col_search - subroutine for query_is_distinct_for
* distinct_col_search - subroutine for query_is_distinct_for_with_collations
*
* If colno is in colnos , return the corresponding element of opids ,
* else return InvalidOid . ( Ordinarily colnos would not contain duplicates ,
* but if it does , we arbitrarily select the first match . )
* If colno matches the colno field of an entry in distinct_cols , return a
* pointer to that entry ; else return NULL . ( Ordinarily distinct_cols would
* not contain duplicate colnos , but if it does , we arbitrarily select the
* first match . )
*/
static Oid
distinct_col_search ( int colno , List * colnos , List * opid s)
static DistinctColInfo *
distinct_col_search ( int colno , List * distinct_col s)
{
ListCell * lc1 ,
* lc2 ;
ListCell * lc ;
forboth ( lc1 , colnos , lc2 , opid s )
foreach ( lc , distinct_col s )
{
if ( colno = = lfirst_int ( lc1 ) )
return lfirst_oid ( lc2 ) ;
DistinctColInfo * dcinfo = ( DistinctColInfo * ) lfirst ( lc ) ;
if ( dcinfo - > colno = = colno )
return dcinfo ;
}
return InvalidOid ;
return NULL ;
}