@ -164,10 +164,20 @@ static double ineq_histogram_selectivity(PlannerInfo *root,
VariableStatData * vardata ,
FmgrInfo * opproc , bool isgt , bool iseq ,
Datum constval , Oid consttype ) ;
static double eqjoinsel_inner ( Oid operator ,
VariableStatData * vardata1 , VariableStatData * vardata2 ) ;
static double eqjoinsel_semi ( Oid operator ,
static double eqjoinsel_inner ( Oid opfuncoid ,
VariableStatData * vardata1 , VariableStatData * vardata2 ,
double nd1 , double nd2 ,
bool isdefault1 , bool isdefault2 ,
AttStatsSlot * sslot1 , AttStatsSlot * sslot2 ,
Form_pg_statistic stats1 , Form_pg_statistic stats2 ,
bool have_mcvs1 , bool have_mcvs2 ) ;
static double eqjoinsel_semi ( Oid opfuncoid ,
VariableStatData * vardata1 , VariableStatData * vardata2 ,
double nd1 , double nd2 ,
bool isdefault1 , bool isdefault2 ,
AttStatsSlot * sslot1 , AttStatsSlot * sslot2 ,
Form_pg_statistic stats1 , Form_pg_statistic stats2 ,
bool have_mcvs1 , bool have_mcvs2 ,
RelOptInfo * inner_rel ) ;
static bool estimate_multivariate_ndistinct ( PlannerInfo * root ,
RelOptInfo * rel , List * * varinfos , double * ndistinct ) ;
@ -2290,20 +2300,69 @@ eqjoinsel(PG_FUNCTION_ARGS)
# endif
SpecialJoinInfo * sjinfo = ( SpecialJoinInfo * ) PG_GETARG_POINTER ( 4 ) ;
double selec ;
double selec_inner ;
VariableStatData vardata1 ;
VariableStatData vardata2 ;
double nd1 ;
double nd2 ;
bool isdefault1 ;
bool isdefault2 ;
Oid opfuncoid ;
AttStatsSlot sslot1 ;
AttStatsSlot sslot2 ;
Form_pg_statistic stats1 = NULL ;
Form_pg_statistic stats2 = NULL ;
bool have_mcvs1 = false ;
bool have_mcvs2 = false ;
bool join_is_reversed ;
RelOptInfo * inner_rel ;
get_join_variables ( root , args , sjinfo ,
& vardata1 , & vardata2 , & join_is_reversed ) ;
nd1 = get_variable_numdistinct ( & vardata1 , & isdefault1 ) ;
nd2 = get_variable_numdistinct ( & vardata2 , & isdefault2 ) ;
opfuncoid = get_opcode ( operator ) ;
memset ( & sslot1 , 0 , sizeof ( sslot1 ) ) ;
memset ( & sslot2 , 0 , sizeof ( sslot2 ) ) ;
if ( HeapTupleIsValid ( vardata1 . statsTuple ) )
{
/* note we allow use of nullfrac regardless of security check */
stats1 = ( Form_pg_statistic ) GETSTRUCT ( vardata1 . statsTuple ) ;
if ( statistic_proc_security_check ( & vardata1 , opfuncoid ) )
have_mcvs1 = get_attstatsslot ( & sslot1 , vardata1 . statsTuple ,
STATISTIC_KIND_MCV , InvalidOid ,
ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS ) ;
}
if ( HeapTupleIsValid ( vardata2 . statsTuple ) )
{
/* note we allow use of nullfrac regardless of security check */
stats2 = ( Form_pg_statistic ) GETSTRUCT ( vardata2 . statsTuple ) ;
if ( statistic_proc_security_check ( & vardata2 , opfuncoid ) )
have_mcvs2 = get_attstatsslot ( & sslot2 , vardata2 . statsTuple ,
STATISTIC_KIND_MCV , InvalidOid ,
ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS ) ;
}
/* We need to compute the inner-join selectivity in all cases */
selec_inner = eqjoinsel_inner ( opfuncoid ,
& vardata1 , & vardata2 ,
nd1 , nd2 ,
isdefault1 , isdefault2 ,
& sslot1 , & sslot2 ,
stats1 , stats2 ,
have_mcvs1 , have_mcvs2 ) ;
switch ( sjinfo - > jointype )
{
case JOIN_INNER :
case JOIN_LEFT :
case JOIN_FULL :
selec = eqjoinsel_inner ( operator , & vardata1 , & vardata2 ) ;
selec = selec_inner ;
break ;
case JOIN_SEMI :
case JOIN_ANTI :
@ -2317,12 +2376,40 @@ eqjoinsel(PG_FUNCTION_ARGS)
inner_rel = find_join_input_rel ( root , sjinfo - > min_righthand ) ;
if ( ! join_is_reversed )
selec = eqjoinsel_semi ( operator , & vardata1 , & vardata2 ,
selec = eqjoinsel_semi ( opfuncoid ,
& vardata1 , & vardata2 ,
nd1 , nd2 ,
isdefault1 , isdefault2 ,
& sslot1 , & sslot2 ,
stats1 , stats2 ,
have_mcvs1 , have_mcvs2 ,
inner_rel ) ;
else
selec = eqjoinsel_semi ( get_commutator ( operator ) ,
{
Oid commop = get_commutator ( operator ) ;
Oid commopfuncoid = OidIsValid ( commop ) ? get_opcode ( commop ) : InvalidOid ;
selec = eqjoinsel_semi ( commopfuncoid ,
& vardata2 , & vardata1 ,
nd2 , nd1 ,
isdefault2 , isdefault1 ,
& sslot2 , & sslot1 ,
stats2 , stats1 ,
have_mcvs2 , have_mcvs1 ,
inner_rel ) ;
}
/*
* We should never estimate the output of a semijoin to be more
* rows than we estimate for an inner join with the same input
* rels and join condition ; it ' s obviously impossible for that to
* happen . The former estimate is N1 * Ssemi while the latter is
* N1 * N2 * Sinner , so we may clamp Ssemi < = N2 * Sinner . Doing
* this is worthwhile because of the shakier estimation rules we
* use in eqjoinsel_semi , particularly in cases where it has to
* punt entirely .
*/
selec = Min ( selec , inner_rel - > rows * selec_inner ) ;
break ;
default :
/* other values not expected here */
@ -2332,6 +2419,9 @@ eqjoinsel(PG_FUNCTION_ARGS)
break ;
}
free_attstatsslot ( & sslot1 ) ;
free_attstatsslot ( & sslot2 ) ;
ReleaseVariableStats ( vardata1 ) ;
ReleaseVariableStats ( vardata2 ) ;
@ -2347,49 +2437,15 @@ eqjoinsel(PG_FUNCTION_ARGS)
* that it ' s worth trying to distinguish them here .
*/
static double
eqjoinsel_inner ( Oid operator ,
VariableStatData * vardata1 , VariableStatData * vardata2 )
eqjoinsel_inner ( Oid opfuncoid ,
VariableStatData * vardata1 , VariableStatData * vardata2 ,
double nd1 , double nd2 ,
bool isdefault1 , bool isdefault2 ,
AttStatsSlot * sslot1 , AttStatsSlot * sslot2 ,
Form_pg_statistic stats1 , Form_pg_statistic stats2 ,
bool have_mcvs1 , bool have_mcvs2 )
{
double selec ;
double nd1 ;
double nd2 ;
bool isdefault1 ;
bool isdefault2 ;
Oid opfuncoid ;
Form_pg_statistic stats1 = NULL ;
Form_pg_statistic stats2 = NULL ;
bool have_mcvs1 = false ;
bool have_mcvs2 = false ;
AttStatsSlot sslot1 ;
AttStatsSlot sslot2 ;
nd1 = get_variable_numdistinct ( vardata1 , & isdefault1 ) ;
nd2 = get_variable_numdistinct ( vardata2 , & isdefault2 ) ;
opfuncoid = get_opcode ( operator ) ;
memset ( & sslot1 , 0 , sizeof ( sslot1 ) ) ;
memset ( & sslot2 , 0 , sizeof ( sslot2 ) ) ;
if ( HeapTupleIsValid ( vardata1 - > statsTuple ) )
{
/* note we allow use of nullfrac regardless of security check */
stats1 = ( Form_pg_statistic ) GETSTRUCT ( vardata1 - > statsTuple ) ;
if ( statistic_proc_security_check ( vardata1 , opfuncoid ) )
have_mcvs1 = get_attstatsslot ( & sslot1 , vardata1 - > statsTuple ,
STATISTIC_KIND_MCV , InvalidOid ,
ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS ) ;
}
if ( HeapTupleIsValid ( vardata2 - > statsTuple ) )
{
/* note we allow use of nullfrac regardless of security check */
stats2 = ( Form_pg_statistic ) GETSTRUCT ( vardata2 - > statsTuple ) ;
if ( statistic_proc_security_check ( vardata2 , opfuncoid ) )
have_mcvs2 = get_attstatsslot ( & sslot2 , vardata2 - > statsTuple ,
STATISTIC_KIND_MCV , InvalidOid ,
ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS ) ;
}
if ( have_mcvs1 & & have_mcvs2 )
{
@ -2423,8 +2479,8 @@ eqjoinsel_inner(Oid operator,
nmatches ;
fmgr_info ( opfuncoid , & eqproc ) ;
hasmatch1 = ( bool * ) palloc0 ( sslot1 . nvalues * sizeof ( bool ) ) ;
hasmatch2 = ( bool * ) palloc0 ( sslot2 . nvalues * sizeof ( bool ) ) ;
hasmatch1 = ( bool * ) palloc0 ( sslot1 - > nvalues * sizeof ( bool ) ) ;
hasmatch2 = ( bool * ) palloc0 ( sslot2 - > nvalues * sizeof ( bool ) ) ;
/*
* Note we assume that each MCV will match at most one member of the
@ -2434,21 +2490,21 @@ eqjoinsel_inner(Oid operator,
*/
matchprodfreq = 0.0 ;
nmatches = 0 ;
for ( i = 0 ; i < sslot1 . nvalues ; i + + )
for ( i = 0 ; i < sslot1 - > nvalues ; i + + )
{
int j ;
for ( j = 0 ; j < sslot2 . nvalues ; j + + )
for ( j = 0 ; j < sslot2 - > nvalues ; j + + )
{
if ( hasmatch2 [ j ] )
continue ;
if ( DatumGetBool ( FunctionCall2Coll ( & eqproc ,
DEFAULT_COLLATION_OID ,
sslot1 . values [ i ] ,
sslot2 . values [ j ] ) ) )
sslot1 - > values [ i ] ,
sslot2 - > values [ j ] ) ) )
{
hasmatch1 [ i ] = hasmatch2 [ j ] = true ;
matchprodfreq + = sslot1 . numbers [ i ] * sslot2 . numbers [ j ] ;
matchprodfreq + = sslot1 - > numbers [ i ] * sslot2 - > numbers [ j ] ;
nmatches + + ;
break ;
}
@ -2457,22 +2513,22 @@ eqjoinsel_inner(Oid operator,
CLAMP_PROBABILITY ( matchprodfreq ) ;
/* Sum up frequencies of matched and unmatched MCVs */
matchfreq1 = unmatchfreq1 = 0.0 ;
for ( i = 0 ; i < sslot1 . nvalues ; i + + )
for ( i = 0 ; i < sslot1 - > nvalues ; i + + )
{
if ( hasmatch1 [ i ] )
matchfreq1 + = sslot1 . numbers [ i ] ;
matchfreq1 + = sslot1 - > numbers [ i ] ;
else
unmatchfreq1 + = sslot1 . numbers [ i ] ;
unmatchfreq1 + = sslot1 - > numbers [ i ] ;
}
CLAMP_PROBABILITY ( matchfreq1 ) ;
CLAMP_PROBABILITY ( unmatchfreq1 ) ;
matchfreq2 = unmatchfreq2 = 0.0 ;
for ( i = 0 ; i < sslot2 . nvalues ; i + + )
for ( i = 0 ; i < sslot2 - > nvalues ; i + + )
{
if ( hasmatch2 [ i ] )
matchfreq2 + = sslot2 . numbers [ i ] ;
matchfreq2 + = sslot2 - > numbers [ i ] ;
else
unmatchfreq2 + = sslot2 . numbers [ i ] ;
unmatchfreq2 + = sslot2 - > numbers [ i ] ;
}
CLAMP_PROBABILITY ( matchfreq2 ) ;
CLAMP_PROBABILITY ( unmatchfreq2 ) ;
@ -2497,15 +2553,15 @@ eqjoinsel_inner(Oid operator,
* MCVs plus non - MCV values .
*/
totalsel1 = matchprodfreq ;
if ( nd2 > sslot2 . nvalues )
totalsel1 + = unmatchfreq1 * otherfreq2 / ( nd2 - sslot2 . nvalues ) ;
if ( nd2 > sslot2 - > nvalues )
totalsel1 + = unmatchfreq1 * otherfreq2 / ( nd2 - sslot2 - > nvalues ) ;
if ( nd2 > nmatches )
totalsel1 + = otherfreq1 * ( otherfreq2 + unmatchfreq2 ) /
( nd2 - nmatches ) ;
/* Same estimate from the point of view of relation 2. */
totalsel2 = matchprodfreq ;
if ( nd1 > sslot1 . nvalues )
totalsel2 + = unmatchfreq2 * otherfreq1 / ( nd1 - sslot1 . nvalues ) ;
if ( nd1 > sslot1 - > nvalues )
totalsel2 + = unmatchfreq2 * otherfreq1 / ( nd1 - sslot1 - > nvalues ) ;
if ( nd1 > nmatches )
totalsel2 + = otherfreq2 * ( otherfreq1 + unmatchfreq1 ) /
( nd1 - nmatches ) ;
@ -2550,9 +2606,6 @@ eqjoinsel_inner(Oid operator,
selec / = nd2 ;
}
free_attstatsslot ( & sslot1 ) ;
free_attstatsslot ( & sslot2 ) ;
return selec ;
}
@ -2561,32 +2614,19 @@ eqjoinsel_inner(Oid operator,
*
* ( Also used for anti join , which we are supposed to estimate the same way . )
* Caller has ensured that vardata1 is the LHS variable .
* Unlike eqjoinsel_inner , we have to cope with operator being InvalidOid .
* Unlike eqjoinsel_inner , we have to cope with opfuncoid being InvalidOid .
*/
static double
eqjoinsel_semi ( Oid operator ,
eqjoinsel_semi ( Oid opfuncoid ,
VariableStatData * vardata1 , VariableStatData * vardata2 ,
double nd1 , double nd2 ,
bool isdefault1 , bool isdefault2 ,
AttStatsSlot * sslot1 , AttStatsSlot * sslot2 ,
Form_pg_statistic stats1 , Form_pg_statistic stats2 ,
bool have_mcvs1 , bool have_mcvs2 ,
RelOptInfo * inner_rel )
{
double selec ;
double nd1 ;
double nd2 ;
bool isdefault1 ;
bool isdefault2 ;
Oid opfuncoid ;
Form_pg_statistic stats1 = NULL ;
bool have_mcvs1 = false ;
bool have_mcvs2 = false ;
AttStatsSlot sslot1 ;
AttStatsSlot sslot2 ;
nd1 = get_variable_numdistinct ( vardata1 , & isdefault1 ) ;
nd2 = get_variable_numdistinct ( vardata2 , & isdefault2 ) ;
opfuncoid = OidIsValid ( operator ) ? get_opcode ( operator ) : InvalidOid ;
memset ( & sslot1 , 0 , sizeof ( sslot1 ) ) ;
memset ( & sslot2 , 0 , sizeof ( sslot2 ) ) ;
/*
* We clamp nd2 to be not more than what we estimate the inner relation ' s
@ -2621,26 +2661,7 @@ eqjoinsel_semi(Oid operator,
isdefault2 = false ;
}
if ( HeapTupleIsValid ( vardata1 - > statsTuple ) )
{
/* note we allow use of nullfrac regardless of security check */
stats1 = ( Form_pg_statistic ) GETSTRUCT ( vardata1 - > statsTuple ) ;
if ( statistic_proc_security_check ( vardata1 , opfuncoid ) )
have_mcvs1 = get_attstatsslot ( & sslot1 , vardata1 - > statsTuple ,
STATISTIC_KIND_MCV , InvalidOid ,
ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS ) ;
}
if ( HeapTupleIsValid ( vardata2 - > statsTuple ) & &
statistic_proc_security_check ( vardata2 , opfuncoid ) )
{
have_mcvs2 = get_attstatsslot ( & sslot2 , vardata2 - > statsTuple ,
STATISTIC_KIND_MCV , InvalidOid ,
ATTSTATSSLOT_VALUES ) ;
/* note: currently don't need stanumbers from RHS */
}
if ( have_mcvs1 & & have_mcvs2 & & OidIsValid ( operator ) )
if ( have_mcvs1 & & have_mcvs2 & & OidIsValid ( opfuncoid ) )
{
/*
* We have most - common - value lists for both relations . Run through
@ -2663,15 +2684,15 @@ eqjoinsel_semi(Oid operator,
/*
* The clamping above could have resulted in nd2 being less than
* sslot2 . nvalues ; in which case , we assume that precisely the nd2
* sslot2 - > nvalues ; in which case , we assume that precisely the nd2
* most common values in the relation will appear in the join input ,
* and so compare to only the first nd2 members of the MCV list . Of
* course this is frequently wrong , but it ' s the best bet we can make .
*/
clamped_nvalues2 = Min ( sslot2 . nvalues , nd2 ) ;
clamped_nvalues2 = Min ( sslot2 - > nvalues , nd2 ) ;
fmgr_info ( opfuncoid , & eqproc ) ;
hasmatch1 = ( bool * ) palloc0 ( sslot1 . nvalues * sizeof ( bool ) ) ;
hasmatch1 = ( bool * ) palloc0 ( sslot1 - > nvalues * sizeof ( bool ) ) ;
hasmatch2 = ( bool * ) palloc0 ( clamped_nvalues2 * sizeof ( bool ) ) ;
/*
@ -2681,7 +2702,7 @@ eqjoinsel_semi(Oid operator,
* and because the math wouldn ' t add up . . .
*/
nmatches = 0 ;
for ( i = 0 ; i < sslot1 . nvalues ; i + + )
for ( i = 0 ; i < sslot1 - > nvalues ; i + + )
{
int j ;
@ -2691,8 +2712,8 @@ eqjoinsel_semi(Oid operator,
continue ;
if ( DatumGetBool ( FunctionCall2Coll ( & eqproc ,
DEFAULT_COLLATION_OID ,
sslot1 . values [ i ] ,
sslot2 . values [ j ] ) ) )
sslot1 - > values [ i ] ,
sslot2 - > values [ j ] ) ) )
{
hasmatch1 [ i ] = hasmatch2 [ j ] = true ;
nmatches + + ;
@ -2702,10 +2723,10 @@ eqjoinsel_semi(Oid operator,
}
/* Sum up frequencies of matched MCVs */
matchfreq1 = 0.0 ;
for ( i = 0 ; i < sslot1 . nvalues ; i + + )
for ( i = 0 ; i < sslot1 - > nvalues ; i + + )
{
if ( hasmatch1 [ i ] )
matchfreq1 + = sslot1 . numbers [ i ] ;
matchfreq1 + = sslot1 - > numbers [ i ] ;
}
CLAMP_PROBABILITY ( matchfreq1 ) ;
pfree ( hasmatch1 ) ;
@ -2760,9 +2781,6 @@ eqjoinsel_semi(Oid operator,
selec = 0.5 * ( 1.0 - nullfrac1 ) ;
}
free_attstatsslot ( & sslot1 ) ;
free_attstatsslot ( & sslot2 ) ;
return selec ;
}