@ -147,10 +147,17 @@ static bool has_indexed_join_quals(NestPath *joinpath);
static double approx_tuple_count ( PlannerInfo * root , JoinPath * path ,
List * quals ) ;
static double calc_joinrel_size_estimate ( PlannerInfo * root ,
RelOptInfo * outer_rel ,
RelOptInfo * inner_rel ,
double outer_rows ,
double inner_rows ,
SpecialJoinInfo * sjinfo ,
List * restrictlist ) ;
static Selectivity get_foreign_key_join_selectivity ( PlannerInfo * root ,
Relids outer_relids ,
Relids inner_relids ,
SpecialJoinInfo * sjinfo ,
List * * restrictlist ) ;
static void set_rel_width ( PlannerInfo * root , RelOptInfo * rel ) ;
static double relation_byte_size ( double tuples , int width ) ;
static double page_size ( double tuples , int width ) ;
@ -3837,6 +3844,8 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel,
List * restrictlist )
{
rel - > rows = calc_joinrel_size_estimate ( root ,
outer_rel ,
inner_rel ,
outer_rel - > rows ,
inner_rel - > rows ,
sjinfo ,
@ -3848,8 +3857,8 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel,
* Make a size estimate for a parameterized scan of a join relation .
*
* ' rel ' is the joinrel under consideration .
* ' outer_rows ' , ' inner_rows ' are the sizes of the ( probably also
* parameterized ) join inputs under consideration .
* ' outer_path ' , ' inner_path ' are ( probably also parameterized ) Paths that
* produce the relations being joined .
* ' sjinfo ' is any SpecialJoinInfo relevant to this join .
* ' restrict_clauses ' lists the join clauses that need to be applied at the
* join node ( including any movable clauses that were moved down to this join ,
@ -3860,8 +3869,8 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel,
*/
double
get_parameterized_joinrel_size ( PlannerInfo * root , RelOptInfo * rel ,
double outer_rows ,
double inner_rows ,
Path * outer_path ,
Path * inner_path ,
SpecialJoinInfo * sjinfo ,
List * restrict_clauses )
{
@ -3877,8 +3886,10 @@ get_parameterized_joinrel_size(PlannerInfo *root, RelOptInfo *rel,
* estimate for any pair with the same parameterization .
*/
nrows = calc_joinrel_size_estimate ( root ,
outer_rows ,
inner_rows ,
outer_path - > parent ,
inner_path - > parent ,
outer_path - > rows ,
inner_path - > rows ,
sjinfo ,
restrict_clauses ) ;
/* For safety, make sure result is not more than the base estimate */
@ -3891,15 +3902,22 @@ get_parameterized_joinrel_size(PlannerInfo *root, RelOptInfo *rel,
* calc_joinrel_size_estimate
* Workhorse for set_joinrel_size_estimates and
* get_parameterized_joinrel_size .
*
* outer_rel / inner_rel are the relations being joined , but they should be
* assumed to have sizes outer_rows / inner_rows ; those numbers might be less
* than what rel - > rows says , when we are considering parameterized paths .
*/
static double
calc_joinrel_size_estimate ( PlannerInfo * root ,
RelOptInfo * outer_rel ,
RelOptInfo * inner_rel ,
double outer_rows ,
double inner_rows ,
SpecialJoinInfo * sjinfo ,
List * restrictlist )
{
JoinType jointype = sjinfo - > jointype ;
Selectivity fkselec ;
Selectivity jselec ;
Selectivity pselec ;
double nrows ;
@ -3910,6 +3928,22 @@ calc_joinrel_size_estimate(PlannerInfo *root,
* double - counting them because they were not considered in estimating the
* sizes of the component rels .
*
* First , see whether any of the joinclauses can be matched to known FK
* constraints . If so , drop those clauses from the restrictlist , and
* instead estimate their selectivity using FK semantics . ( We do this
* without regard to whether said clauses are local or " pushed down " .
* Probably , an FK - matching clause could never be seen as pushed down at
* an outer join , since it would be strict and hence would be grounds for
* join strength reduction . ) fkselec gets the net selectivity for
* FK - matching clauses , or 1.0 if there are none .
*/
fkselec = get_foreign_key_join_selectivity ( root ,
outer_rel - > relids ,
inner_rel - > relids ,
sjinfo ,
& restrictlist ) ;
/*
* For an outer join , we have to distinguish the selectivity of the join ' s
* own clauses ( JOIN / ON conditions ) from any clauses that were " pushed
* down " . For inner joins we just count them all as joinclauses.
@ -3973,16 +4007,17 @@ calc_joinrel_size_estimate(PlannerInfo *root,
switch ( jointype )
{
case JOIN_INNER :
nrows = outer_rows * inner_rows * jselec ;
nrows = outer_rows * inner_rows * fkselec * jselec ;
/* pselec not used */
break ;
case JOIN_LEFT :
nrows = outer_rows * inner_rows * jselec ;
nrows = outer_rows * inner_rows * fkselec * jselec ;
if ( nrows < outer_rows )
nrows = outer_rows ;
nrows * = pselec ;
break ;
case JOIN_FULL :
nrows = outer_rows * inner_rows * jselec ;
nrows = outer_rows * inner_rows * fkselec * jselec ;
if ( nrows < outer_rows )
nrows = outer_rows ;
if ( nrows < inner_rows )
@ -3990,11 +4025,11 @@ calc_joinrel_size_estimate(PlannerInfo *root,
nrows * = pselec ;
break ;
case JOIN_SEMI :
nrows = outer_rows * jselec ;
nrows = outer_rows * fkselec * jselec ;
/* pselec not used */
break ;
case JOIN_ANTI :
nrows = outer_rows * ( 1.0 - jselec ) ;
nrows = outer_rows * ( 1.0 - fkselec * jselec ) ;
nrows * = pselec ;
break ;
default :
@ -4007,6 +4042,224 @@ calc_joinrel_size_estimate(PlannerInfo *root,
return clamp_row_est ( nrows ) ;
}
/*
* get_foreign_key_join_selectivity
* Estimate join selectivity for foreign - key - related clauses .
*
* Remove any clauses that can be matched to FK constraints from * restrictlist ,
* and return a substitute estimate of their selectivity . 1.0 is returned
* when there are no such clauses .
*
* The reason for treating such clauses specially is that we can get better
* estimates this way than by relying on clauselist_selectivity ( ) , especially
* for multi - column FKs where that function ' s assumption that the clauses are
* independent falls down badly . But even with single - column FKs , we may be
* able to get a better answer when the pg_statistic stats are missing or out
* of date .
*/
static Selectivity
get_foreign_key_join_selectivity ( PlannerInfo * root ,
Relids outer_relids ,
Relids inner_relids ,
SpecialJoinInfo * sjinfo ,
List * * restrictlist )
{
Selectivity fkselec = 1.0 ;
JoinType jointype = sjinfo - > jointype ;
List * worklist = * restrictlist ;
ListCell * lc ;
/* Consider each FK constraint that is known to match the query */
foreach ( lc , root - > fkey_list )
{
ForeignKeyOptInfo * fkinfo = ( ForeignKeyOptInfo * ) lfirst ( lc ) ;
bool ref_is_outer ;
List * removedlist ;
ListCell * cell ;
ListCell * prev ;
ListCell * next ;
/*
* This FK is not relevant unless it connects a baserel on one side of
* this join to a baserel on the other side .
*/
if ( bms_is_member ( fkinfo - > con_relid , outer_relids ) & &
bms_is_member ( fkinfo - > ref_relid , inner_relids ) )
ref_is_outer = false ;
else if ( bms_is_member ( fkinfo - > ref_relid , outer_relids ) & &
bms_is_member ( fkinfo - > con_relid , inner_relids ) )
ref_is_outer = true ;
else
continue ;
/*
* Modify the restrictlist by removing clauses that match the FK ( and
* putting them into removedlist instead ) . It seems unsafe to modify
* the originally - passed List structure , so we make a shallow copy the
* first time through .
*/
if ( worklist = = * restrictlist )
worklist = list_copy ( worklist ) ;
removedlist = NIL ;
prev = NULL ;
for ( cell = list_head ( worklist ) ; cell ; cell = next )
{
RestrictInfo * rinfo = ( RestrictInfo * ) lfirst ( cell ) ;
bool remove_it = false ;
int i ;
next = lnext ( cell ) ;
/* Drop this clause if it matches any column of the FK */
for ( i = 0 ; i < fkinfo - > nkeys ; i + + )
{
if ( rinfo - > parent_ec )
{
/*
* EC - derived clauses can only match by EC . It is okay to
* consider any clause derived from the same EC as
* matching the FK : even if equivclass . c chose to generate
* a clause equating some other pair of Vars , it could
* have generated one equating the FK ' s Vars . So for
* purposes of estimation , we can act as though it did so .
*
* Note : checking parent_ec is a bit of a cheat because
* there are EC - derived clauses that don ' t have parent_ec
* set ; but such clauses must compare expressions that
* aren ' t just Vars , so they cannot match the FK anyway .
*/
if ( fkinfo - > eclass [ i ] = = rinfo - > parent_ec )
{
remove_it = true ;
break ;
}
}
else
{
/*
* Otherwise , see if rinfo was previously matched to FK as
* a " loose " clause .
*/
if ( list_member_ptr ( fkinfo - > rinfos [ i ] , rinfo ) )
{
remove_it = true ;
break ;
}
}
}
if ( remove_it )
{
worklist = list_delete_cell ( worklist , cell , prev ) ;
removedlist = lappend ( removedlist , rinfo ) ;
}
else
prev = cell ;
}
/*
* If we failed to remove all the matching clauses we expected to
* find , chicken out and ignore this FK ; applying its selectivity
* might result in double - counting . Put any clauses we did manage to
* remove back into the worklist .
*
* Since the matching clauses are known not outerjoin - delayed , they
* should certainly have appeared in the initial joinclause list . If
* we didn ' t find them , they must have been matched to , and removed
* by , some other FK in a previous iteration of this loop . ( A likely
* case is that two FKs are matched to the same EC ; there will be only
* one EC - derived clause in the initial list , so the first FK will
* consume it . ) Applying both FKs ' selectivity independently risks
* underestimating the join size ; in particular , this would undo one
* of the main things that ECs were invented for , namely to avoid
* double - counting the selectivity of redundant equality conditions .
* Later we might think of a reasonable way to combine the estimates ,
* but for now , just punt , since this is a fairly uncommon situation .
*/
if ( list_length ( removedlist ) ! =
( fkinfo - > nmatched_ec + fkinfo - > nmatched_ri ) )
{
worklist = list_concat ( worklist , removedlist ) ;
continue ;
}
/*
* Finally we get to the payoff : estimate selectivity using the
* knowledge that each referencing row will match exactly one row in
* the referenced table .
*
* XXX that ' s not true in the presence of nulls in the referencing
* column ( s ) , so in principle we should derate the estimate for those .
* However ( 1 ) if there are any strict restriction clauses for the
* referencing column ( s ) elsewhere in the query , derating here would
* be double - counting the null fraction , and ( 2 ) it ' s not very clear
* how to combine null fractions for multiple referencing columns .
*
* In the first branch of the logic below , null derating is done
* implicitly by relying on clause_selectivity ( ) ; in the other two
* paths , we do nothing for now about correcting for nulls .
*
* XXX another point here is that if either side of an FK constraint
* is an inheritance parent , we estimate as though the constraint
* covers all its children as well . This is not an unreasonable
* assumption for a referencing table , ie the user probably applied
* identical constraints to all child tables ( though perhaps we ought
* to check that ) . But it ' s not possible to have done that for a
* referenced table . Fortunately , precisely because that doesn ' t
* work , it is uncommon in practice to have an FK referencing a parent
* table . So , at least for now , disregard inheritance here .
*/
if ( ref_is_outer & & jointype ! = JOIN_INNER )
{
/*
* When the referenced table is on the outer side of a non - inner
* join , knowing that each inner row has exactly one match is not
* as useful as one could wish , since we really need to know the
* fraction of outer rows with a match . Still , we can avoid the
* folly of multiplying the per - column estimates together . Take
* the smallest per - column selectivity , instead . ( This should
* correspond to the FK column with the most nulls . )
*/
Selectivity thisfksel = 1.0 ;
foreach ( cell , removedlist )
{
RestrictInfo * rinfo = ( RestrictInfo * ) lfirst ( cell ) ;
Selectivity csel ;
csel = clause_selectivity ( root , ( Node * ) rinfo ,
0 , jointype , sjinfo ) ;
thisfksel = Min ( thisfksel , csel ) ;
}
fkselec * = thisfksel ;
}
else if ( jointype = = JOIN_SEMI | | jointype = = JOIN_ANTI )
{
/*
* For JOIN_SEMI and JOIN_ANTI , the selectivity is defined as the
* fraction of LHS rows that have matches . If the referenced
* table is on the inner side , that means the selectivity is 1.0
* ( modulo nulls , which we ' re ignoring for now ) . We already
* covered the other case , so no work here .
*/
}
else
{
/*
* Otherwise , selectivity is exactly 1 / referenced - table - size ; but
* guard against tuples = = 0. Note we should use the raw table
* tuple count , not any estimate of its filtered or joined size .
*/
RelOptInfo * ref_rel = find_base_rel ( root , fkinfo - > ref_relid ) ;
double ref_tuples = Max ( ref_rel - > tuples , 1.0 ) ;
fkselec * = 1.0 / ref_tuples ;
}
}
* restrictlist = worklist ;
return fkselec ;
}
/*
* set_subquery_size_estimates
* Set the size estimates for a base relation that is a subquery .