@ -22,7 +22,7 @@
*
*
* IDENTIFICATION
* $ PostgreSQL : pgsql / src / backend / optimizer / prep / prepunion . c , v 1.149 2008 / 08 / 02 21 : 32 : 0 0 tgl Exp $
* $ PostgreSQL : pgsql / src / backend / optimizer / prep / prepunion . c , v 1.150 2008 / 08 / 07 01 : 11 : 5 0 tgl Exp $
*
* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
@ -32,8 +32,12 @@
# include "access/heapam.h"
# include "catalog/namespace.h"
# include "catalog/pg_type.h"
# include "miscadmin.h"
# include "nodes/makefuncs.h"
# include "optimizer/clauses.h"
# include "optimizer/cost.h"
# include "optimizer/pathnode.h"
# include "optimizer/paths.h"
# include "optimizer/plancat.h"
# include "optimizer/planmain.h"
# include "optimizer/planner.h"
@ -61,6 +65,13 @@ static List *recurse_union_children(Node *setOp, PlannerInfo *root,
double tuple_fraction ,
SetOperationStmt * top_union ,
List * refnames_tlist ) ;
static Plan * make_union_unique ( SetOperationStmt * op , Plan * plan ,
PlannerInfo * root , double tuple_fraction ,
List * * sortClauses ) ;
static bool choose_hashed_setop ( PlannerInfo * root , List * groupClauses ,
Plan * input_plan ,
double tuple_fraction , double dNumDistinctRows ,
const char * construct ) ;
static List * generate_setop_tlist ( List * colTypes , int flag ,
Index varno ,
bool hack_constants ,
@ -69,7 +80,7 @@ static List *generate_setop_tlist(List *colTypes, int flag,
static List * generate_append_tlist ( List * colTypes , bool flag ,
List * input_plans ,
List * refnames_tlist ) ;
static List * generate_setop_sortlist ( List * targetlist ) ;
static List * generate_setop_grouplist ( SetOperationStmt * op , List * targetlist ) ;
static void expand_inherited_rtentry ( PlannerInfo * root , RangeTblEntry * rte ,
Index rti ) ;
static void make_inh_translation_lists ( Relation oldrelation ,
@ -99,7 +110,8 @@ static List *adjust_inherited_tlist(List *tlist,
* top level has already been factored into tuple_fraction .
*
* * sortClauses is an output argument : it is set to a list of SortGroupClauses
* representing the result ordering of the topmost set operation .
* representing the result ordering of the topmost set operation . ( This will
* be NIL if the output isn ' t ordered . )
*/
Plan *
plan_set_operations ( PlannerInfo * root , double tuple_fraction ,
@ -287,8 +299,8 @@ generate_union_plan(SetOperationStmt *op, PlannerInfo *root,
/*
* If any of my children are identical UNION nodes ( same op , all - flag , and
* colTypes ) then they can be merged into this node so that we generate
* only one Append and Sort for the lot . Recurse to find such nodes and
* compute their children ' s plans .
* only one Append and unique - ification for the lot . Recurse to find such
* nodes and compute their children ' s plans .
*/
planlist = list_concat ( recurse_union_children ( op - > larg , root ,
tuple_fraction ,
@ -314,22 +326,12 @@ generate_union_plan(SetOperationStmt *op, PlannerInfo *root,
/*
* For UNION ALL , we just need the Append plan . For UNION , need to add
* Sort and Unique nodes to produce unique output .
* node ( s ) to remove duplicates .
*/
if ( ! op - > all )
{
List * sortList ;
sortList = generate_setop_sortlist ( tlist ) ;
if ( sortList )
{
plan = ( Plan * ) make_sort_from_sortclauses ( root , sortList , plan ) ;
plan = ( Plan * ) make_unique ( plan , sortList ) ;
}
* sortClauses = sortList ;
}
if ( op - > all )
* sortClauses = NIL ; /* result of UNION ALL is always unsorted */
else
* sortClauses = NIL ;
plan = make_union_unique ( op , plan , root , tuple_fraction , sortClauses ) ;
return plan ;
}
@ -346,7 +348,7 @@ generate_nonunion_plan(SetOperationStmt *op, PlannerInfo *root,
* rplan ,
* plan ;
List * tlist ,
* sort List,
* group List,
* planlist ,
* child_sortclauses ;
SetOpCmd cmd ;
@ -381,19 +383,24 @@ generate_nonunion_plan(SetOperationStmt *op, PlannerInfo *root,
*/
plan = ( Plan * ) make_append ( planlist , false , tlist ) ;
/*
* Sort the child results , then add a SetOp plan node to generate the
* correct output .
*/
sortList = generate_setop_sortlist ( tlist ) ;
/* Identify the grouping semantics */
groupList = generate_setop_grouplist ( op , tlist ) ;
if ( sortList = = NIL ) /* nothing to sort on? */
/* punt if nothing to group on (can this happen?) */
if ( groupList = = NIL )
{
* sortClauses = NIL ;
return plan ;
}
plan = ( Plan * ) make_sort_from_sortclauses ( root , sortList , plan ) ;
/*
* Decide whether to hash or sort , and add a sort node if needed .
*/
plan = ( Plan * ) make_sort_from_sortclauses ( root , groupList , plan ) ;
/*
* Finally , add a SetOp plan node to generate the correct output .
*/
switch ( op - > op )
{
case SETOP_INTERSECT :
@ -403,14 +410,13 @@ generate_nonunion_plan(SetOperationStmt *op, PlannerInfo *root,
cmd = op - > all ? SETOPCMD_EXCEPT_ALL : SETOPCMD_EXCEPT ;
break ;
default :
elog ( ERROR , " unrecognized set op: %d " ,
( int ) op - > op ) ;
elog ( ERROR , " unrecognized set op: %d " , ( int ) op - > op ) ;
cmd = SETOPCMD_INTERSECT ; /* keep compiler quiet */
break ;
}
plan = ( Plan * ) make_setop ( cmd , plan , sort List, list_length ( op - > colTypes ) + 1 ) ;
plan = ( Plan * ) make_setop ( cmd , plan , group List, list_length ( op - > colTypes ) + 1 ) ;
* sortClauses = sort List;
* sortClauses = group List;
return plan ;
}
@ -465,6 +471,171 @@ recurse_union_children(Node *setOp, PlannerInfo *root,
& child_sortclauses ) ) ;
}
/*
* Add nodes to the given plan tree to unique - ify the result of a UNION .
*/
static Plan *
make_union_unique ( SetOperationStmt * op , Plan * plan ,
PlannerInfo * root , double tuple_fraction ,
List * * sortClauses )
{
List * groupList ;
double dNumDistinctRows ;
long numDistinctRows ;
/* Identify the grouping semantics */
groupList = generate_setop_grouplist ( op , plan - > targetlist ) ;
/* punt if nothing to group on (can this happen?) */
if ( groupList = = NIL )
{
* sortClauses = NIL ;
return plan ;
}
/*
* XXX for the moment , take the number of distinct groups as being the
* total input size , ie , the worst case . This is too conservative , but
* we don ' t want to risk having the hashtable overrun memory ; also ,
* it ' s not clear how to get a decent estimate of the true size . One
* should note as well the propensity of novices to write UNION rather
* than UNION ALL even when they don ' t expect any duplicates . . .
*/
dNumDistinctRows = plan - > plan_rows ;
/* Also convert to long int --- but 'ware overflow! */
numDistinctRows = ( long ) Min ( dNumDistinctRows , ( double ) LONG_MAX ) ;
/* Decide whether to hash or sort */
if ( choose_hashed_setop ( root , groupList , plan ,
tuple_fraction , dNumDistinctRows ,
" UNION " ) )
{
/* Hashed aggregate plan --- no sort needed */
plan = ( Plan * ) make_agg ( root ,
plan - > targetlist ,
NIL ,
AGG_HASHED ,
list_length ( groupList ) ,
extract_grouping_cols ( groupList ,
plan - > targetlist ) ,
extract_grouping_ops ( groupList ) ,
numDistinctRows ,
0 ,
plan ) ;
/* Hashed aggregation produces randomly-ordered results */
* sortClauses = NIL ;
}
else
{
/* Sort and Unique */
plan = ( Plan * ) make_sort_from_sortclauses ( root , groupList , plan ) ;
plan = ( Plan * ) make_unique ( plan , groupList ) ;
plan - > plan_rows = dNumDistinctRows ;
/* We know the sort order of the result */
* sortClauses = groupList ;
}
return plan ;
}
/*
* choose_hashed_setop - should we use hashing for a set operation ?
*/
static bool
choose_hashed_setop ( PlannerInfo * root , List * groupClauses ,
Plan * input_plan ,
double tuple_fraction , double dNumDistinctRows ,
const char * construct )
{
int numDistinctCols = list_length ( groupClauses ) ;
bool can_sort ;
bool can_hash ;
Size hashentrysize ;
List * needed_pathkeys ;
Path hashed_p ;
Path sorted_p ;
/* Check whether the operators support sorting or hashing */
can_sort = grouping_is_sortable ( groupClauses ) ;
can_hash = grouping_is_hashable ( groupClauses ) ;
if ( can_hash & & can_sort )
{
/* we have a meaningful choice to make, continue ... */
}
else if ( can_hash )
return true ;
else if ( can_sort )
return false ;
else
ereport ( ERROR ,
( errcode ( ERRCODE_FEATURE_NOT_SUPPORTED ) ,
/* translator: %s is UNION, INTERSECT, or EXCEPT */
errmsg ( " could not implement %s " , construct ) ,
errdetail ( " Some of the datatypes only support hashing, while others only support sorting. " ) ) ) ;
/* Prefer sorting when enable_hashagg is off */
if ( ! enable_hashagg )
return false ;
/*
* Don ' t do it if it doesn ' t look like the hashtable will fit into
* work_mem .
*/
hashentrysize = MAXALIGN ( input_plan - > plan_width ) + MAXALIGN ( sizeof ( MinimalTupleData ) ) ;
if ( hashentrysize * dNumDistinctRows > work_mem * 1024L )
return false ;
/*
* See if the estimated cost is no more than doing it the other way .
*
* We need to consider input_plan + hashagg versus input_plan + sort +
* group . Note that the actual result plan might involve a SetOp or
* Unique node , not Agg or Group , but the cost estimates for Agg and Group
* should be close enough for our purposes here .
*
* These path variables are dummies that just hold cost fields ; we don ' t
* make actual Paths for these steps .
*/
cost_agg ( & hashed_p , root , AGG_HASHED , 0 ,
numDistinctCols , dNumDistinctRows ,
input_plan - > startup_cost , input_plan - > total_cost ,
input_plan - > plan_rows ) ;
/*
* Now for the sorted case . Note that the input is * always * unsorted ,
* since it was made by appending unrelated sub - relations together .
*/
sorted_p . startup_cost = input_plan - > startup_cost ;
sorted_p . total_cost = input_plan - > total_cost ;
/* XXX this is more expensive than cost_sort really needs: */
needed_pathkeys = make_pathkeys_for_sortclauses ( root ,
groupClauses ,
input_plan - > targetlist ,
true ) ;
cost_sort ( & sorted_p , root , needed_pathkeys , sorted_p . total_cost ,
input_plan - > plan_rows , input_plan - > plan_width , - 1.0 ) ;
cost_group ( & sorted_p , root , numDistinctCols , dNumDistinctRows ,
sorted_p . startup_cost , sorted_p . total_cost ,
input_plan - > plan_rows ) ;
/*
* Now make the decision using the top - level tuple fraction . First we
* have to convert an absolute count ( LIMIT ) into fractional form .
*/
if ( tuple_fraction > = 1.0 )
tuple_fraction / = dNumDistinctRows ;
if ( compare_fractional_path_costs ( & hashed_p , & sorted_p ,
tuple_fraction ) < 0 )
{
/* Hashed is cheaper, so use it */
return true ;
}
return false ;
}
/*
* Generate targetlist for a set - operation plan node
*
@ -677,30 +848,47 @@ generate_append_tlist(List *colTypes, bool flag,
}
/*
* generate_setop_sort list
* Build a SortGroupClause list enumerat ing all the non - resjunk
* tlist entries , using default ordering propertie s.
* generate_setop_group list
* Build a SortGroupClause list d efi ning the sort / grouping properties
* of the setop ' s output column s.
*
* For now , we require all the items to be sortable . Eventually we
* should implement hashing setops and allow hash - only datatypes .
* Parse analysis already determined the properties and built a suitable
* list , except that the entries do not have sortgrouprefs set because
* the parser output representation doesn ' t include a tlist for each
* setop . So what we need to do here is copy that list and install
* proper sortgrouprefs into it and into the targetlist .
*/
static List *
generate_setop_sortlist ( List * targetlist )
generate_setop_grouplist ( SetOperationStmt * op , List * targetlist )
{
List * sortlist = NIL ;
ListCell * l ;
List * grouplist = ( List * ) copyObject ( op - > groupClauses ) ;
ListCell * lg ;
ListCell * lt ;
Index refno = 1 ;
foreach ( l , targetlist )
lg = list_head ( grouplist ) ;
foreach ( lt , targetlist )
{
TargetEntry * tle = ( TargetEntry * ) lfirst ( l ) ;
TargetEntry * tle = ( TargetEntry * ) lfirst ( lt ) ;
SortGroupClause * sgc ;
if ( ! tle - > resjunk )
sortlist = addTargetToGroupList ( NULL , tle ,
sortlist , targetlist ,
true , /* XXX fixme someday */
false ) ;
/* tlist shouldn't have any sortgrouprefs yet */
Assert ( tle - > ressortgroupref = = 0 ) ;
if ( tle - > resjunk )
continue ; /* ignore resjunk columns */
/* non-resjunk columns should have grouping clauses */
Assert ( lg ! = NULL ) ;
sgc = ( SortGroupClause * ) lfirst ( lg ) ;
lg = lnext ( lg ) ;
Assert ( sgc - > tleSortGroupRef = = 0 ) ;
/* we could use assignSortGroupRef here, but seems a bit silly */
sgc - > tleSortGroupRef = tle - > ressortgroupref = refno + + ;
}
return sortlist ;
Assert ( lg = = NULL ) ;
return grouplist ;
}