@ -143,26 +143,78 @@
# define DEFAULT_PAGE_CPU_MULTIPLIER 50.0
# define DEFAULT_PAGE_CPU_MULTIPLIER 50.0
/*
* In production builds , switch to hash - based MCV matching when the lists are
* large enough to amortize hash setup cost . ( This threshold is compared to
* the sum of the lengths of the two MCV lists . This is simplistic but seems
* to work well enough . ) In debug builds , we use a smaller threshold so that
* the regression tests cover both paths well .
*/
# ifndef USE_ASSERT_CHECKING
# define EQJOINSEL_MCV_HASH_THRESHOLD 200
# else
# define EQJOINSEL_MCV_HASH_THRESHOLD 20
# endif
/* Entries in the simplehash hash table used by eqjoinsel_find_matches */
typedef struct MCVHashEntry
{
Datum value ; /* the value represented by this entry */
int index ; /* its index in the relevant AttStatsSlot */
uint32 hash ; /* hash code for the Datum */
char status ; /* status code used by simplehash.h */
} MCVHashEntry ;
/* private_data for the simplehash hash table */
typedef struct MCVHashContext
{
FunctionCallInfo equal_fcinfo ; /* the equality join operator */
FunctionCallInfo hash_fcinfo ; /* the hash function to use */
bool op_is_reversed ; /* equality compares hash type to probe type */
bool insert_mode ; /* doing inserts or lookups? */
bool hash_typbyval ; /* typbyval of hashed data type */
int16 hash_typlen ; /* typlen of hashed data type */
} MCVHashContext ;
/* forward reference */
typedef struct MCVHashTable_hash MCVHashTable_hash ;
/* Hooks for plugins to get control when we ask for stats */
/* Hooks for plugins to get control when we ask for stats */
get_relation_stats_hook_type get_relation_stats_hook = NULL ;
get_relation_stats_hook_type get_relation_stats_hook = NULL ;
get_index_stats_hook_type get_index_stats_hook = NULL ;
get_index_stats_hook_type get_index_stats_hook = NULL ;
static double eqsel_internal ( PG_FUNCTION_ARGS , bool negate ) ;
static double eqsel_internal ( PG_FUNCTION_ARGS , bool negate ) ;
static double eqjoinsel_inner ( Oid opfuncoid , Oid collation ,
static double eqjoinsel_inner ( FmgrInfo * eqproc , Oid collation ,
Oid hashLeft , Oid hashRight ,
VariableStatData * vardata1 , VariableStatData * vardata2 ,
VariableStatData * vardata1 , VariableStatData * vardata2 ,
double nd1 , double nd2 ,
double nd1 , double nd2 ,
bool isdefault1 , bool isdefault2 ,
bool isdefault1 , bool isdefault2 ,
AttStatsSlot * sslot1 , AttStatsSlot * sslot2 ,
AttStatsSlot * sslot1 , AttStatsSlot * sslot2 ,
Form_pg_statistic stats1 , Form_pg_statistic stats2 ,
Form_pg_statistic stats1 , Form_pg_statistic stats2 ,
bool have_mcvs1 , bool have_mcvs2 ) ;
bool have_mcvs1 , bool have_mcvs2 ,
static double eqjoinsel_semi ( Oid opfuncoid , Oid collation ,
bool * hasmatch1 , bool * hasmatch2 ,
int * p_nmatches ) ;
static double eqjoinsel_semi ( FmgrInfo * eqproc , Oid collation ,
Oid hashLeft , Oid hashRight ,
bool op_is_reversed ,
VariableStatData * vardata1 , VariableStatData * vardata2 ,
VariableStatData * vardata1 , VariableStatData * vardata2 ,
double nd1 , double nd2 ,
double nd1 , double nd2 ,
bool isdefault1 , bool isdefault2 ,
bool isdefault1 , bool isdefault2 ,
AttStatsSlot * sslot1 , AttStatsSlot * sslot2 ,
AttStatsSlot * sslot1 , AttStatsSlot * sslot2 ,
Form_pg_statistic stats1 , Form_pg_statistic stats2 ,
Form_pg_statistic stats1 , Form_pg_statistic stats2 ,
bool have_mcvs1 , bool have_mcvs2 ,
bool have_mcvs1 , bool have_mcvs2 ,
bool * hasmatch1 , bool * hasmatch2 ,
int * p_nmatches ,
RelOptInfo * inner_rel ) ;
RelOptInfo * inner_rel ) ;
static void eqjoinsel_find_matches ( FmgrInfo * eqproc , Oid collation ,
Oid hashLeft , Oid hashRight ,
bool op_is_reversed ,
AttStatsSlot * sslot1 , AttStatsSlot * sslot2 ,
int nvalues1 , int nvalues2 ,
bool * hasmatch1 , bool * hasmatch2 ,
int * p_nmatches , double * p_matchprodfreq ) ;
static uint32 hash_mcv ( MCVHashTable_hash * tab , Datum key ) ;
static bool mcvs_equal ( MCVHashTable_hash * tab , Datum key0 , Datum key1 ) ;
static bool estimate_multivariate_ndistinct ( PlannerInfo * root ,
static bool estimate_multivariate_ndistinct ( PlannerInfo * root ,
RelOptInfo * rel , List * * varinfos , double * ndistinct ) ;
RelOptInfo * rel , List * * varinfos , double * ndistinct ) ;
static bool convert_to_scalar ( Datum value , Oid valuetypid , Oid collid ,
static bool convert_to_scalar ( Datum value , Oid valuetypid , Oid collid ,
@ -218,6 +270,20 @@ static RelOptInfo *find_join_input_rel(PlannerInfo *root, Relids relids);
static double btcost_correlation ( IndexOptInfo * index ,
static double btcost_correlation ( IndexOptInfo * index ,
VariableStatData * vardata ) ;
VariableStatData * vardata ) ;
/* Define support routines for MCV hash tables */
# define SH_PREFIX MCVHashTable
# define SH_ELEMENT_TYPE MCVHashEntry
# define SH_KEY_TYPE Datum
# define SH_KEY value
# define SH_HASH_KEY(tab,key) hash_mcv(tab, key)
# define SH_EQUAL(tab,key0,key1) mcvs_equal(tab, key0, key1)
# define SH_SCOPE static inline
# define SH_STORE_HASH
# define SH_GET_HASH(tab,ent) (ent)->hash
# define SH_DEFINE
# define SH_DECLARE
# include "lib/simplehash.h"
/*
/*
* eqsel - Selectivity of " = " for any data types .
* eqsel - Selectivity of " = " for any data types .
@ -2304,12 +2370,18 @@ eqjoinsel(PG_FUNCTION_ARGS)
bool isdefault1 ;
bool isdefault1 ;
bool isdefault2 ;
bool isdefault2 ;
Oid opfuncoid ;
Oid opfuncoid ;
FmgrInfo eqproc ;
Oid hashLeft = InvalidOid ;
Oid hashRight = InvalidOid ;
AttStatsSlot sslot1 ;
AttStatsSlot sslot1 ;
AttStatsSlot sslot2 ;
AttStatsSlot sslot2 ;
Form_pg_statistic stats1 = NULL ;
Form_pg_statistic stats1 = NULL ;
Form_pg_statistic stats2 = NULL ;
Form_pg_statistic stats2 = NULL ;
bool have_mcvs1 = false ;
bool have_mcvs1 = false ;
bool have_mcvs2 = false ;
bool have_mcvs2 = false ;
bool * hasmatch1 = NULL ;
bool * hasmatch2 = NULL ;
int nmatches = 0 ;
bool get_mcv_stats ;
bool get_mcv_stats ;
bool join_is_reversed ;
bool join_is_reversed ;
RelOptInfo * inner_rel ;
RelOptInfo * inner_rel ;
@ -2360,14 +2432,34 @@ eqjoinsel(PG_FUNCTION_ARGS)
ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS ) ;
ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS ) ;
}
}
/* Prepare info usable by both eqjoinsel_inner and eqjoinsel_semi */
if ( have_mcvs1 & & have_mcvs2 )
{
fmgr_info ( opfuncoid , & eqproc ) ;
hasmatch1 = ( bool * ) palloc0 ( sslot1 . nvalues * sizeof ( bool ) ) ;
hasmatch2 = ( bool * ) palloc0 ( sslot2 . nvalues * sizeof ( bool ) ) ;
/*
* If the MCV lists are long enough to justify hashing , try to look up
* hash functions for the join operator .
*/
if ( ( sslot1 . nvalues + sslot2 . nvalues ) > = EQJOINSEL_MCV_HASH_THRESHOLD )
( void ) get_op_hash_functions ( operator , & hashLeft , & hashRight ) ;
}
else
memset ( & eqproc , 0 , sizeof ( eqproc ) ) ; /* silence uninit-var warnings */
/* We need to compute the inner-join selectivity in all cases */
/* We need to compute the inner-join selectivity in all cases */
selec_inner = eqjoinsel_inner ( opfuncoid , collation ,
selec_inner = eqjoinsel_inner ( & eqproc , collation ,
hashLeft , hashRight ,
& vardata1 , & vardata2 ,
& vardata1 , & vardata2 ,
nd1 , nd2 ,
nd1 , nd2 ,
isdefault1 , isdefault2 ,
isdefault1 , isdefault2 ,
& sslot1 , & sslot2 ,
& sslot1 , & sslot2 ,
stats1 , stats2 ,
stats1 , stats2 ,
have_mcvs1 , have_mcvs2 ) ;
have_mcvs1 , have_mcvs2 ,
hasmatch1 , hasmatch2 ,
& nmatches ) ;
switch ( sjinfo - > jointype )
switch ( sjinfo - > jointype )
{
{
@ -2388,28 +2480,31 @@ eqjoinsel(PG_FUNCTION_ARGS)
inner_rel = find_join_input_rel ( root , sjinfo - > min_righthand ) ;
inner_rel = find_join_input_rel ( root , sjinfo - > min_righthand ) ;
if ( ! join_is_reversed )
if ( ! join_is_reversed )
selec = eqjoinsel_semi ( opfuncoid , collation ,
selec = eqjoinsel_semi ( & eqproc , collation ,
hashLeft , hashRight ,
false ,
& vardata1 , & vardata2 ,
& vardata1 , & vardata2 ,
nd1 , nd2 ,
nd1 , nd2 ,
isdefault1 , isdefault2 ,
isdefault1 , isdefault2 ,
& sslot1 , & sslot2 ,
& sslot1 , & sslot2 ,
stats1 , stats2 ,
stats1 , stats2 ,
have_mcvs1 , have_mcvs2 ,
have_mcvs1 , have_mcvs2 ,
hasmatch1 , hasmatch2 ,
& nmatches ,
inner_rel ) ;
inner_rel ) ;
else
else
{
selec = eqjoinsel_semi ( & eqproc , collation ,
Oid commop = get_commutator ( operator ) ;
hashLeft , hashRight ,
Oid commopfuncoid = OidIsValid ( commop ) ? get_opcode ( commop ) : InvalidOid ;
true ,
selec = eqjoinsel_semi ( commopfuncoid , collation ,
& vardata2 , & vardata1 ,
& vardata2 , & vardata1 ,
nd2 , nd1 ,
nd2 , nd1 ,
isdefault2 , isdefault1 ,
isdefault2 , isdefault1 ,
& sslot2 , & sslot1 ,
& sslot2 , & sslot1 ,
stats2 , stats1 ,
stats2 , stats1 ,
have_mcvs2 , have_mcvs1 ,
have_mcvs2 , have_mcvs1 ,
hasmatch2 , hasmatch1 ,
& nmatches ,
inner_rel ) ;
inner_rel ) ;
}
/*
/*
* We should never estimate the output of a semijoin to be more
* We should never estimate the output of a semijoin to be more
@ -2437,6 +2532,11 @@ eqjoinsel(PG_FUNCTION_ARGS)
ReleaseVariableStats ( vardata1 ) ;
ReleaseVariableStats ( vardata1 ) ;
ReleaseVariableStats ( vardata2 ) ;
ReleaseVariableStats ( vardata2 ) ;
if ( hasmatch1 )
pfree ( hasmatch1 ) ;
if ( hasmatch2 )
pfree ( hasmatch2 ) ;
CLAMP_PROBABILITY ( selec ) ;
CLAMP_PROBABILITY ( selec ) ;
PG_RETURN_FLOAT8 ( ( float8 ) selec ) ;
PG_RETURN_FLOAT8 ( ( float8 ) selec ) ;
@ -2445,17 +2545,24 @@ eqjoinsel(PG_FUNCTION_ARGS)
/*
/*
* eqjoinsel_inner - - - eqjoinsel for normal inner join
* eqjoinsel_inner - - - eqjoinsel for normal inner join
*
*
* In addition to computing the selectivity estimate , this will fill
* hasmatch1 [ ] , hasmatch2 [ ] , and * p_nmatches ( if have_mcvs1 & & have_mcvs2 ) .
* We may be able to re - use that data in eqjoinsel_semi .
*
* We also use this for LEFT / FULL outer joins ; it ' s not presently clear
* We also use this for LEFT / FULL outer joins ; it ' s not presently clear
* that it ' s worth trying to distinguish them here .
* that it ' s worth trying to distinguish them here .
*/
*/
static double
static double
eqjoinsel_inner ( Oid opfuncoid , Oid collation ,
eqjoinsel_inner ( FmgrInfo * eqproc , Oid collation ,
Oid hashLeft , Oid hashRight ,
VariableStatData * vardata1 , VariableStatData * vardata2 ,
VariableStatData * vardata1 , VariableStatData * vardata2 ,
double nd1 , double nd2 ,
double nd1 , double nd2 ,
bool isdefault1 , bool isdefault2 ,
bool isdefault1 , bool isdefault2 ,
AttStatsSlot * sslot1 , AttStatsSlot * sslot2 ,
AttStatsSlot * sslot1 , AttStatsSlot * sslot2 ,
Form_pg_statistic stats1 , Form_pg_statistic stats2 ,
Form_pg_statistic stats1 , Form_pg_statistic stats2 ,
bool have_mcvs1 , bool have_mcvs2 )
bool have_mcvs1 , bool have_mcvs2 ,
bool * hasmatch1 , bool * hasmatch2 ,
int * p_nmatches )
{
{
double selec ;
double selec ;
@ -2473,10 +2580,6 @@ eqjoinsel_inner(Oid opfuncoid, Oid collation,
* results " , Technical Report 1018, Computer Science Dept., University
* results " , Technical Report 1018, Computer Science Dept., University
* of Wisconsin , Madison , March 1991 ( available from ftp . cs . wisc . edu ) .
* of Wisconsin , Madison , March 1991 ( available from ftp . cs . wisc . edu ) .
*/
*/
LOCAL_FCINFO ( fcinfo , 2 ) ;
FmgrInfo eqproc ;
bool * hasmatch1 ;
bool * hasmatch2 ;
double nullfrac1 = stats1 - > stanullfrac ;
double nullfrac1 = stats1 - > stanullfrac ;
double nullfrac2 = stats2 - > stanullfrac ;
double nullfrac2 = stats2 - > stanullfrac ;
double matchprodfreq ,
double matchprodfreq ,
@ -2491,55 +2594,17 @@ eqjoinsel_inner(Oid opfuncoid, Oid collation,
int i ,
int i ,
nmatches ;
nmatches ;
fmgr_info ( opfuncoid , & eqproc ) ;
/* Fill the match arrays */
eqjoinsel_find_matches ( eqproc , collation ,
/*
hashLeft , hashRight ,
* Save a few cycles by setting up the fcinfo struct just once . Using
false ,
* FunctionCallInvoke directly also avoids failure if the eqproc
sslot1 , sslot2 ,
* returns NULL , though really equality functions should never do
sslot1 - > nvalues , sslot2 - > nvalues ,
* that .
hasmatch1 , hasmatch2 ,
*/
p_nmatches , & matchprodfreq ) ;
InitFunctionCallInfoData ( * fcinfo , & eqproc , 2 , collation ,
nmatches = * p_nmatches ;
NULL , NULL ) ;
fcinfo - > args [ 0 ] . isnull = false ;
fcinfo - > args [ 1 ] . isnull = false ;
hasmatch1 = ( bool * ) palloc0 ( sslot1 - > nvalues * sizeof ( bool ) ) ;
hasmatch2 = ( bool * ) palloc0 ( sslot2 - > nvalues * sizeof ( bool ) ) ;
/*
* Note we assume that each MCV will match at most one member of the
* other MCV list . If the operator isn ' t really equality , there could
* be multiple matches - - - but we don ' t look for them , both for speed
* and because the math wouldn ' t add up . . .
*/
matchprodfreq = 0.0 ;
nmatches = 0 ;
for ( i = 0 ; i < sslot1 - > nvalues ; i + + )
{
int j ;
fcinfo - > args [ 0 ] . value = sslot1 - > values [ i ] ;
for ( j = 0 ; j < sslot2 - > nvalues ; j + + )
{
Datum fresult ;
if ( hasmatch2 [ j ] )
continue ;
fcinfo - > args [ 1 ] . value = sslot2 - > values [ j ] ;
fcinfo - > isnull = false ;
fresult = FunctionCallInvoke ( fcinfo ) ;
if ( ! fcinfo - > isnull & & DatumGetBool ( fresult ) )
{
hasmatch1 [ i ] = hasmatch2 [ j ] = true ;
matchprodfreq + = sslot1 - > numbers [ i ] * sslot2 - > numbers [ j ] ;
nmatches + + ;
break ;
}
}
}
CLAMP_PROBABILITY ( matchprodfreq ) ;
CLAMP_PROBABILITY ( matchprodfreq ) ;
/* Sum up frequencies of matched and unmatched MCVs */
/* Sum up frequencies of matched and unmatched MCVs */
matchfreq1 = unmatchfreq1 = 0.0 ;
matchfreq1 = unmatchfreq1 = 0.0 ;
for ( i = 0 ; i < sslot1 - > nvalues ; i + + )
for ( i = 0 ; i < sslot1 - > nvalues ; i + + )
@ -2561,8 +2626,6 @@ eqjoinsel_inner(Oid opfuncoid, Oid collation,
}
}
CLAMP_PROBABILITY ( matchfreq2 ) ;
CLAMP_PROBABILITY ( matchfreq2 ) ;
CLAMP_PROBABILITY ( unmatchfreq2 ) ;
CLAMP_PROBABILITY ( unmatchfreq2 ) ;
pfree ( hasmatch1 ) ;
pfree ( hasmatch2 ) ;
/*
/*
* Compute total frequency of non - null values that are not in the MCV
* Compute total frequency of non - null values that are not in the MCV
@ -2642,17 +2705,24 @@ eqjoinsel_inner(Oid opfuncoid, Oid collation,
* eqjoinsel_semi - - - eqjoinsel for semi join
* eqjoinsel_semi - - - eqjoinsel for semi join
*
*
* ( Also used for anti join , which we are supposed to estimate the same way . )
* ( Also used for anti join , which we are supposed to estimate the same way . )
* Caller has ensured that vardata1 is the LHS variable .
* Caller has ensured that vardata1 is the LHS variable ; however , eqproc
* Unlike eqjoinsel_inner , we have to cope with opfuncoid being InvalidOid .
* is for the original join operator , which might now need to have the inputs
* swapped in order to apply correctly . Also , if have_mcvs1 & & have_mcvs2
* then hasmatch1 [ ] , hasmatch2 [ ] , and * p_nmatches were filled by
* eqjoinsel_inner .
*/
*/
static double
static double
eqjoinsel_semi ( Oid opfuncoid , Oid collation ,
eqjoinsel_semi ( FmgrInfo * eqproc , Oid collation ,
Oid hashLeft , Oid hashRight ,
bool op_is_reversed ,
VariableStatData * vardata1 , VariableStatData * vardata2 ,
VariableStatData * vardata1 , VariableStatData * vardata2 ,
double nd1 , double nd2 ,
double nd1 , double nd2 ,
bool isdefault1 , bool isdefault2 ,
bool isdefault1 , bool isdefault2 ,
AttStatsSlot * sslot1 , AttStatsSlot * sslot2 ,
AttStatsSlot * sslot1 , AttStatsSlot * sslot2 ,
Form_pg_statistic stats1 , Form_pg_statistic stats2 ,
Form_pg_statistic stats1 , Form_pg_statistic stats2 ,
bool have_mcvs1 , bool have_mcvs2 ,
bool have_mcvs1 , bool have_mcvs2 ,
bool * hasmatch1 , bool * hasmatch2 ,
int * p_nmatches ,
RelOptInfo * inner_rel )
RelOptInfo * inner_rel )
{
{
double selec ;
double selec ;
@ -2690,7 +2760,7 @@ eqjoinsel_semi(Oid opfuncoid, Oid collation,
isdefault2 = false ;
isdefault2 = false ;
}
}
if ( have_mcvs1 & & have_mcvs2 & & OidIsValid ( opfuncoid ) )
if ( have_mcvs1 & & have_mcvs2 )
{
{
/*
/*
* We have most - common - value lists for both relations . Run through
* We have most - common - value lists for both relations . Run through
@ -2700,12 +2770,9 @@ eqjoinsel_semi(Oid opfuncoid, Oid collation,
* lists . We still have to estimate for the remaining population , but
* lists . We still have to estimate for the remaining population , but
* in a skewed distribution this gives us a big leg up in accuracy .
* in a skewed distribution this gives us a big leg up in accuracy .
*/
*/
LOCAL_FCINFO ( fcinfo , 2 ) ;
FmgrInfo eqproc ;
bool * hasmatch1 ;
bool * hasmatch2 ;
double nullfrac1 = stats1 - > stanullfrac ;
double nullfrac1 = stats1 - > stanullfrac ;
double matchfreq1 ,
double matchprodfreq ,
matchfreq1 ,
uncertainfrac ,
uncertainfrac ,
uncertain ;
uncertain ;
int i ,
int i ,
@ -2721,52 +2788,32 @@ eqjoinsel_semi(Oid opfuncoid, Oid collation,
*/
*/
clamped_nvalues2 = Min ( sslot2 - > nvalues , nd2 ) ;
clamped_nvalues2 = Min ( sslot2 - > nvalues , nd2 ) ;
fmgr_info ( opfuncoid , & eqproc ) ;
/*
* Save a few cycles by setting up the fcinfo struct just once . Using
* FunctionCallInvoke directly also avoids failure if the eqproc
* returns NULL , though really equality functions should never do
* that .
*/
InitFunctionCallInfoData ( * fcinfo , & eqproc , 2 , collation ,
NULL , NULL ) ;
fcinfo - > args [ 0 ] . isnull = false ;
fcinfo - > args [ 1 ] . isnull = false ;
hasmatch1 = ( bool * ) palloc0 ( sslot1 - > nvalues * sizeof ( bool ) ) ;
hasmatch2 = ( bool * ) palloc0 ( clamped_nvalues2 * sizeof ( bool ) ) ;
/*
/*
* Note we assume that each MCV will match at most one member of the
* If we did not set clamped_nvalues2 to less than sslot2 - > nvalues ,
* other MCV list . If the operator isn ' t really equality , there could
* then the hasmatch1 [ ] and hasmatch2 [ ] match flags computed by
* be multiple matches - - - but we don ' t look for them , both for speed
* eqjoinsel_inner are still perfectly applicable , so we need not
* and because the math wouldn ' t add up . . .
* re - do the matching work . Note that it does not matter if
* op_is_reversed : we ' d get the same answers .
*
* If we did clamp , then a different set of sslot2 values is to be
* compared , so we have to re - do the matching .
*/
*/
nmatches = 0 ;
if ( clamped_nvalues2 ! = sslot2 - > nvalues )
for ( i = 0 ; i < sslot1 - > nvalues ; i + + )
{
{
int j ;
/* Must re-zero the arrays */
memset ( hasmatch1 , 0 , sslot1 - > nvalues * sizeof ( bool ) ) ;
fcinfo - > args [ 0 ] . value = sslot1 - > values [ i ] ;
memset ( hasmatch2 , 0 , clamped_nvalues2 * sizeof ( bool ) ) ;
/* Re-fill the match arrays */
for ( j = 0 ; j < clamped_nvalues2 ; j + + )
eqjoinsel_find_matches ( eqproc , collation ,
{
hashLeft , hashRight ,
Datum fresult ;
op_is_reversed ,
sslot1 , sslot2 ,
if ( hasmatch2 [ j ] )
sslot1 - > nvalues , clamped_nvalues2 ,
continue ;
hasmatch1 , hasmatch2 ,
fcinfo - > args [ 1 ] . value = sslot2 - > values [ j ] ;
p_nmatches , & matchprodfreq ) ;
fcinfo - > isnull = false ;
fresult = FunctionCallInvoke ( fcinfo ) ;
if ( ! fcinfo - > isnull & & DatumGetBool ( fresult ) )
{
hasmatch1 [ i ] = hasmatch2 [ j ] = true ;
nmatches + + ;
break ;
}
}
}
}
nmatches = * p_nmatches ;
/* Sum up frequencies of matched MCVs */
/* Sum up frequencies of matched MCVs */
matchfreq1 = 0.0 ;
matchfreq1 = 0.0 ;
for ( i = 0 ; i < sslot1 - > nvalues ; i + + )
for ( i = 0 ; i < sslot1 - > nvalues ; i + + )
@ -2775,8 +2822,6 @@ eqjoinsel_semi(Oid opfuncoid, Oid collation,
matchfreq1 + = sslot1 - > numbers [ i ] ;
matchfreq1 + = sslot1 - > numbers [ i ] ;
}
}
CLAMP_PROBABILITY ( matchfreq1 ) ;
CLAMP_PROBABILITY ( matchfreq1 ) ;
pfree ( hasmatch1 ) ;
pfree ( hasmatch2 ) ;
/*
/*
* Now we need to estimate the fraction of relation 1 that has at
* Now we need to estimate the fraction of relation 1 that has at
@ -2830,6 +2875,273 @@ eqjoinsel_semi(Oid opfuncoid, Oid collation,
return selec ;
return selec ;
}
}
/*
* Identify matching MCVs for eqjoinsel_inner or eqjoinsel_semi .
*
* Inputs :
* eqproc : FmgrInfo for equality function to use ( might be reversed )
* collation : OID of collation to use
* hashLeft , hashRight : OIDs of hash functions associated with equality op ,
* or InvalidOid if we ' re not to use hashing
* op_is_reversed : indicates that eqproc compares right type to left type
* sslot1 , sslot2 : MCV values for the lefthand and righthand inputs
* nvalues1 , nvalues2 : number of values to be considered ( can be less than
* sslotN - > nvalues , but not more )
* Outputs :
* hasmatch1 [ ] , hasmatch2 [ ] : pre - zeroed arrays of lengths nvalues1 , nvalues2 ;
* entries are set to true if that MCV has a match on the other side
* * p_nmatches : receives number of MCV pairs that match
* * p_matchprodfreq : receives sum ( sslot1 - > numbers [ i ] * sslot2 - > numbers [ j ] )
* for matching MCVs
*
* Note that hashLeft is for the eqproc ' s left - hand input type , hashRight
* for its right , regardless of op_is_reversed .
*
* Note we assume that each MCV will match at most one member of the other
* MCV list . If the operator isn ' t really equality , there could be multiple
* matches - - - but we don ' t look for them , both for speed and because the
* math wouldn ' t add up . . .
*/
static void
eqjoinsel_find_matches ( FmgrInfo * eqproc , Oid collation ,
Oid hashLeft , Oid hashRight ,
bool op_is_reversed ,
AttStatsSlot * sslot1 , AttStatsSlot * sslot2 ,
int nvalues1 , int nvalues2 ,
bool * hasmatch1 , bool * hasmatch2 ,
int * p_nmatches , double * p_matchprodfreq )
{
LOCAL_FCINFO ( fcinfo , 2 ) ;
double matchprodfreq = 0.0 ;
int nmatches = 0 ;
/*
* Save a few cycles by setting up the fcinfo struct just once . Using
* FunctionCallInvoke directly also avoids failure if the eqproc returns
* NULL , though really equality functions should never do that .
*/
InitFunctionCallInfoData ( * fcinfo , eqproc , 2 , collation ,
NULL , NULL ) ;
fcinfo - > args [ 0 ] . isnull = false ;
fcinfo - > args [ 1 ] . isnull = false ;
if ( OidIsValid ( hashLeft ) & & OidIsValid ( hashRight ) )
{
/* Use a hash table to speed up the matching */
LOCAL_FCINFO ( hash_fcinfo , 1 ) ;
FmgrInfo hash_proc ;
MCVHashContext hashContext ;
MCVHashTable_hash * hashTable ;
AttStatsSlot * statsProbe ;
AttStatsSlot * statsHash ;
bool * hasMatchProbe ;
bool * hasMatchHash ;
int nvaluesProbe ;
int nvaluesHash ;
/* Make sure we build the hash table on the smaller array. */
if ( sslot1 - > nvalues > = sslot2 - > nvalues )
{
statsProbe = sslot1 ;
statsHash = sslot2 ;
hasMatchProbe = hasmatch1 ;
hasMatchHash = hasmatch2 ;
nvaluesProbe = nvalues1 ;
nvaluesHash = nvalues2 ;
}
else
{
/* We'll have to reverse the direction of use of the operator. */
op_is_reversed = ! op_is_reversed ;
statsProbe = sslot2 ;
statsHash = sslot1 ;
hasMatchProbe = hasmatch2 ;
hasMatchHash = hasmatch1 ;
nvaluesProbe = nvalues2 ;
nvaluesHash = nvalues1 ;
}
/*
* Build the hash table on the smaller array , using the appropriate
* hash function for its data type .
*/
fmgr_info ( op_is_reversed ? hashLeft : hashRight , & hash_proc ) ;
InitFunctionCallInfoData ( * hash_fcinfo , & hash_proc , 1 , collation ,
NULL , NULL ) ;
hash_fcinfo - > args [ 0 ] . isnull = false ;
hashContext . equal_fcinfo = fcinfo ;
hashContext . hash_fcinfo = hash_fcinfo ;
hashContext . op_is_reversed = op_is_reversed ;
hashContext . insert_mode = true ;
get_typlenbyval ( statsHash - > valuetype ,
& hashContext . hash_typlen ,
& hashContext . hash_typbyval ) ;
hashTable = MCVHashTable_create ( CurrentMemoryContext ,
nvaluesHash ,
& hashContext ) ;
for ( int i = 0 ; i < nvaluesHash ; i + + )
{
bool found = false ;
MCVHashEntry * entry = MCVHashTable_insert ( hashTable ,
statsHash - > values [ i ] ,
& found ) ;
/*
* MCVHashTable_insert will only report " found " if the new value
* is equal to some previous one per datum_image_eq ( ) . That
* probably shouldn ' t happen , since we ' re not expecting duplicates
* in the MCV list . If we do find a dup , just ignore it , leaving
* the hash entry ' s index pointing at the first occurrence . That
* matches the behavior that the non - hashed code path would have .
*/
if ( likely ( ! found ) )
entry - > index = i ;
}
/*
* Prepare to probe the hash table . If the probe values are of a
* different data type , then we need to change hash functions . ( This
* code relies on the assumption that since we defined SH_STORE_HASH ,
* simplehash . h will never need to compute hash values for existing
* hash table entries . )
*/
hashContext . insert_mode = false ;
if ( hashLeft ! = hashRight )
{
fmgr_info ( op_is_reversed ? hashRight : hashLeft , & hash_proc ) ;
/* Resetting hash_fcinfo is probably unnecessary, but be safe */
InitFunctionCallInfoData ( * hash_fcinfo , & hash_proc , 1 , collation ,
NULL , NULL ) ;
hash_fcinfo - > args [ 0 ] . isnull = false ;
}
/* Look up each probe value in turn. */
for ( int i = 0 ; i < nvaluesProbe ; i + + )
{
MCVHashEntry * entry = MCVHashTable_lookup ( hashTable ,
statsProbe - > values [ i ] ) ;
/* As in the other code path, skip already-matched hash entries */
if ( entry ! = NULL & & ! hasMatchHash [ entry - > index ] )
{
hasMatchHash [ entry - > index ] = hasMatchProbe [ i ] = true ;
nmatches + + ;
matchprodfreq + = statsHash - > numbers [ entry - > index ] * statsProbe - > numbers [ i ] ;
}
}
MCVHashTable_destroy ( hashTable ) ;
}
else
{
/* We're not to use hashing, so do it the O(N^2) way */
int index1 ,
index2 ;
/* Set up to supply the values in the order the operator expects */
if ( op_is_reversed )
{
index1 = 1 ;
index2 = 0 ;
}
else
{
index1 = 0 ;
index2 = 1 ;
}
for ( int i = 0 ; i < nvalues1 ; i + + )
{
fcinfo - > args [ index1 ] . value = sslot1 - > values [ i ] ;
for ( int j = 0 ; j < nvalues2 ; j + + )
{
Datum fresult ;
if ( hasmatch2 [ j ] )
continue ;
fcinfo - > args [ index2 ] . value = sslot2 - > values [ j ] ;
fcinfo - > isnull = false ;
fresult = FunctionCallInvoke ( fcinfo ) ;
if ( ! fcinfo - > isnull & & DatumGetBool ( fresult ) )
{
hasmatch1 [ i ] = hasmatch2 [ j ] = true ;
matchprodfreq + = sslot1 - > numbers [ i ] * sslot2 - > numbers [ j ] ;
nmatches + + ;
break ;
}
}
}
}
* p_nmatches = nmatches ;
* p_matchprodfreq = matchprodfreq ;
}
/*
* Support functions for the hash tables used by eqjoinsel_find_matches
*/
static uint32
hash_mcv ( MCVHashTable_hash * tab , Datum key )
{
MCVHashContext * context = ( MCVHashContext * ) tab - > private_data ;
FunctionCallInfo fcinfo = context - > hash_fcinfo ;
Datum fresult ;
fcinfo - > args [ 0 ] . value = key ;
fcinfo - > isnull = false ;
fresult = FunctionCallInvoke ( fcinfo ) ;
Assert ( ! fcinfo - > isnull ) ;
return DatumGetUInt32 ( fresult ) ;
}
static bool
mcvs_equal ( MCVHashTable_hash * tab , Datum key0 , Datum key1 )
{
MCVHashContext * context = ( MCVHashContext * ) tab - > private_data ;
if ( context - > insert_mode )
{
/*
* During the insertion step , any comparisons will be between two
* Datums of the hash table ' s data type , so if the given operator is
* cross - type it will be the wrong thing to use . Fortunately , we can
* use datum_image_eq instead . The MCV values should all be distinct
* anyway , so it ' s mostly pro - forma to compare them at all .
*/
return datum_image_eq ( key0 , key1 ,
context - > hash_typbyval , context - > hash_typlen ) ;
}
else
{
FunctionCallInfo fcinfo = context - > equal_fcinfo ;
Datum fresult ;
/*
* Apply the operator the correct way around . Although simplehash . h
* doesn ' t document this explicitly , during lookups key0 is from the
* hash table while key1 is the probe value , so we should compare them
* in that order only if op_is_reversed .
*/
if ( context - > op_is_reversed )
{
fcinfo - > args [ 0 ] . value = key0 ;
fcinfo - > args [ 1 ] . value = key1 ;
}
else
{
fcinfo - > args [ 0 ] . value = key1 ;
fcinfo - > args [ 1 ] . value = key0 ;
}
fcinfo - > isnull = false ;
fresult = FunctionCallInvoke ( fcinfo ) ;
return ( ! fcinfo - > isnull & & DatumGetBool ( fresult ) ) ;
}
}
/*
/*
* neqjoinsel - Join selectivity of " != "
* neqjoinsel - Join selectivity of " != "
*/
*/