@ -6575,6 +6575,20 @@ spgcostestimate(PG_FUNCTION_ARGS)
PG_RETURN_VOID ( ) ;
PG_RETURN_VOID ( ) ;
}
}
/*
* Support routines for gincostestimate
*/
typedef struct
{
bool haveFullScan ;
double partialEntries ;
double exactEntries ;
double searchEntries ;
double arrayScans ;
} GinQualCounts ;
/* Find the index column matching "op"; return its index, or -1 if no match */
/* Find the index column matching "op"; return its index, or -1 if no match */
static int
static int
find_index_column ( Node * op , IndexOptInfo * index )
find_index_column ( Node * op , IndexOptInfo * index )
@ -6590,6 +6604,277 @@ find_index_column(Node *op, IndexOptInfo *index)
return - 1 ;
return - 1 ;
}
}
/*
* Estimate the number of index terms that need to be searched for while
* testing the given GIN query , and increment the counts in * counts
* appropriately . If the query is unsatisfiable , return false .
*/
static bool
gincost_pattern ( IndexOptInfo * index , int indexcol ,
Oid clause_op , Datum query ,
GinQualCounts * counts )
{
Oid extractProcOid ;
int strategy_op ;
Oid lefttype ,
righttype ;
int32 nentries = 0 ;
bool * partial_matches = NULL ;
Pointer * extra_data = NULL ;
bool * nullFlags = NULL ;
int32 searchMode = GIN_SEARCH_MODE_DEFAULT ;
int32 i ;
/*
* Get the operator ' s strategy number and declared input data types
* within the index opfamily . ( We don ' t need the latter , but we use
* get_op_opfamily_properties because it will throw error if it fails
* to find a matching pg_amop entry . )
*/
get_op_opfamily_properties ( clause_op , index - > opfamily [ indexcol ] , false ,
& strategy_op , & lefttype , & righttype ) ;
/*
* GIN always uses the " default " support functions , which are those
* with lefttype = = righttype = = the opclass ' opcintype ( see
* IndexSupportInitialize in relcache . c ) .
*/
extractProcOid = get_opfamily_proc ( index - > opfamily [ indexcol ] ,
index - > opcintype [ indexcol ] ,
index - > opcintype [ indexcol ] ,
GIN_EXTRACTQUERY_PROC ) ;
if ( ! OidIsValid ( extractProcOid ) )
{
/* should not happen; throw same error as index_getprocinfo */
elog ( ERROR , " missing support function %d for attribute %d of index \" %s \" " ,
GIN_EXTRACTQUERY_PROC , indexcol + 1 ,
get_rel_name ( index - > indexoid ) ) ;
}
OidFunctionCall7 ( extractProcOid ,
query ,
PointerGetDatum ( & nentries ) ,
UInt16GetDatum ( strategy_op ) ,
PointerGetDatum ( & partial_matches ) ,
PointerGetDatum ( & extra_data ) ,
PointerGetDatum ( & nullFlags ) ,
PointerGetDatum ( & searchMode ) ) ;
if ( nentries < = 0 & & searchMode = = GIN_SEARCH_MODE_DEFAULT )
{
/* No match is possible */
return false ;
}
for ( i = 0 ; i < nentries ; i + + )
{
/*
* For partial match we haven ' t any information to estimate number of
* matched entries in index , so , we just estimate it as 100
*/
if ( partial_matches & & partial_matches [ i ] )
counts - > partialEntries + = 100 ;
else
counts - > exactEntries + + ;
counts - > searchEntries + + ;
}
if ( searchMode = = GIN_SEARCH_MODE_INCLUDE_EMPTY )
{
/* Treat "include empty" like an exact-match item */
counts - > exactEntries + + ;
counts - > searchEntries + + ;
}
else if ( searchMode ! = GIN_SEARCH_MODE_DEFAULT )
{
/* It's GIN_SEARCH_MODE_ALL */
counts - > haveFullScan = true ;
}
return true ;
}
/*
* Estimate the number of index terms that need to be searched for while
* testing the given GIN index clause , and increment the counts in * counts
* appropriately . If the query is unsatisfiable , return false .
*/
static bool
gincost_opexpr ( IndexOptInfo * index , OpExpr * clause , GinQualCounts * counts )
{
Node * leftop = get_leftop ( ( Expr * ) clause ) ;
Node * rightop = get_rightop ( ( Expr * ) clause ) ;
Oid clause_op = clause - > opno ;
int indexcol ;
Node * operand ;
/* Locate the operand being compared to the index column */
if ( ( indexcol = find_index_column ( leftop , index ) ) > = 0 )
{
operand = rightop ;
}
else if ( ( indexcol = find_index_column ( rightop , index ) ) > = 0 )
{
operand = leftop ;
clause_op = get_commutator ( clause_op ) ;
}
else
{
elog ( ERROR , " could not match index to operand " ) ;
operand = NULL ; /* keep compiler quiet */
}
if ( IsA ( operand , RelabelType ) )
operand = ( Node * ) ( ( RelabelType * ) operand ) - > arg ;
/*
* It ' s impossible to call extractQuery method for unknown operand . So
* unless operand is a Const we can ' t do much ; just assume there will
* be one ordinary search entry from the operand at runtime .
*/
if ( ! IsA ( operand , Const ) )
{
counts - > exactEntries + + ;
counts - > searchEntries + + ;
return true ;
}
/* If Const is null, there can be no matches */
if ( ( ( Const * ) operand ) - > constisnull )
return false ;
/* Otherwise, apply extractQuery and get the actual term counts */
return gincost_pattern ( index , indexcol , clause_op ,
( ( Const * ) operand ) - > constvalue ,
counts ) ;
}
/*
* Estimate the number of index terms that need to be searched for while
* testing the given GIN index clause , and increment the counts in * counts
* appropriately . If the query is unsatisfiable , return false .
*
* A ScalarArrayOpExpr will give rise to N separate indexscans at runtime ,
* each of which involves one value from the RHS array , plus all the
* non - array quals ( if any ) . To model this , we average the counts across
* the RHS elements , and add the averages to the counts in * counts ( which
* correspond to per - indexscan costs ) . We also multiply counts - > arrayScans
* by N , causing gincostestimate to scale up its estimates accordingly .
*/
static bool
gincost_scalararrayopexpr ( IndexOptInfo * index , ScalarArrayOpExpr * clause ,
double numIndexEntries ,
GinQualCounts * counts )
{
Node * leftop = ( Node * ) linitial ( clause - > args ) ;
Node * rightop = ( Node * ) lsecond ( clause - > args ) ;
Oid clause_op = clause - > opno ;
int indexcol ;
ArrayType * arrayval ;
int16 elmlen ;
bool elmbyval ;
char elmalign ;
int numElems ;
Datum * elemValues ;
bool * elemNulls ;
GinQualCounts arraycounts ;
int numPossible = 0 ;
int i ;
Assert ( clause - > useOr ) ;
/* index column must be on the left */
if ( ( indexcol = find_index_column ( leftop , index ) ) < 0 )
elog ( ERROR , " could not match index to operand " ) ;
if ( IsA ( rightop , RelabelType ) )
rightop = ( Node * ) ( ( RelabelType * ) rightop ) - > arg ;
/*
* It ' s impossible to call extractQuery method for unknown operand . So
* unless operand is a Const we can ' t do much ; just assume there will
* be one ordinary search entry from each array entry at runtime , and
* fall back on a probably - bad estimate of the number of array entries .
*/
if ( ! IsA ( rightop , Const ) )
{
counts - > exactEntries + + ;
counts - > searchEntries + + ;
counts - > arrayScans * = estimate_array_length ( rightop ) ;
return true ;
}
/* If Const is null, there can be no matches */
if ( ( ( Const * ) rightop ) - > constisnull )
return false ;
/* Otherwise, extract the array elements and iterate over them */
arrayval = DatumGetArrayTypeP ( ( ( Const * ) rightop ) - > constvalue ) ;
get_typlenbyvalalign ( ARR_ELEMTYPE ( arrayval ) ,
& elmlen , & elmbyval , & elmalign ) ;
deconstruct_array ( arrayval ,
ARR_ELEMTYPE ( arrayval ) ,
elmlen , elmbyval , elmalign ,
& elemValues , & elemNulls , & numElems ) ;
memset ( & arraycounts , 0 , sizeof ( arraycounts ) ) ;
for ( i = 0 ; i < numElems ; i + + )
{
GinQualCounts elemcounts ;
/* NULL can't match anything, so ignore, as the executor will */
if ( elemNulls [ i ] )
continue ;
/* Otherwise, apply extractQuery and get the actual term counts */
memset ( & elemcounts , 0 , sizeof ( elemcounts ) ) ;
if ( gincost_pattern ( index , indexcol , clause_op , elemValues [ i ] ,
& elemcounts ) )
{
/* We ignore array elements that are unsatisfiable patterns */
numPossible + + ;
if ( elemcounts . haveFullScan )
{
/*
* Full index scan will be required . We treat this as if
* every key in the index had been listed in the query ; is
* that reasonable ?
*/
elemcounts . partialEntries = 0 ;
elemcounts . exactEntries = numIndexEntries ;
elemcounts . searchEntries = numIndexEntries ;
}
arraycounts . partialEntries + = elemcounts . partialEntries ;
arraycounts . exactEntries + = elemcounts . exactEntries ;
arraycounts . searchEntries + = elemcounts . searchEntries ;
}
}
if ( numPossible = = 0 )
{
/* No satisfiable patterns in the array */
return false ;
}
/*
* Now add the averages to the global counts . This will give us an
* estimate of the average number of terms searched for in each indexscan ,
* including contributions from both array and non - array quals .
*/
counts - > partialEntries + = arraycounts . partialEntries / numPossible ;
counts - > exactEntries + = arraycounts . exactEntries / numPossible ;
counts - > searchEntries + = arraycounts . searchEntries / numPossible ;
counts - > arrayScans * = numPossible ;
return true ;
}
/*
/*
* GIN has search behavior completely different from other index types
* GIN has search behavior completely different from other index types
*/
*/
@ -6613,17 +6898,15 @@ gincostestimate(PG_FUNCTION_ARGS)
numDataPages ,
numDataPages ,
numPendingPages ,
numPendingPages ,
numEntries ;
numEntries ;
bool haveFullScan = false ;
GinQualCounts counts ;
double partialEntriesInQuals = 0.0 ;
bool matchPossible ;
double searchEntriesInQuals = 0.0 ;
double exactEntriesInQuals = 0.0 ;
double entryPagesFetched ,
double entryPagesFetched ,
dataPagesFetched ,
dataPagesFetched ,
dataPagesFetchedBySel ;
dataPagesFetchedBySel ;
double qual_op_cost ,
double qual_op_cost ,
qual_arg_cost ,
qual_arg_cost ,
spc_random_page_cost ,
spc_random_page_cost ,
num _scans;
outer _scans;
QualCost index_qual_cost ;
QualCost index_qual_cost ;
Relation indexRel ;
Relation indexRel ;
GinStatsData ginStats ;
GinStatsData ginStats ;
@ -6709,199 +6992,113 @@ gincostestimate(PG_FUNCTION_ARGS)
/*
/*
* Examine quals to estimate number of search entries & partial matches
* Examine quals to estimate number of search entries & partial matches
*/
*/
memset ( & counts , 0 , sizeof ( counts ) ) ;
counts . arrayScans = 1 ;
matchPossible = true ;
foreach ( l , indexQuals )
foreach ( l , indexQuals )
{
{
RestrictInfo * rinfo = ( RestrictInfo * ) lfirst ( l ) ;
RestrictInfo * rinfo = ( RestrictInfo * ) lfirst ( l ) ;
Expr * clause ;
Expr * clause ;
Node * leftop ,
* rightop ,
* operand ;
Oid extractProcOid ;
Oid clause_op ;
int strategy_op ;
Oid lefttype ,
righttype ;
int32 nentries = 0 ;
bool * partial_matches = NULL ;
Pointer * extra_data = NULL ;
bool * nullFlags = NULL ;
int32 searchMode = GIN_SEARCH_MODE_DEFAULT ;
int indexcol ;
Assert ( IsA ( rinfo , RestrictInfo ) ) ;
Assert ( IsA ( rinfo , RestrictInfo ) ) ;
clause = rinfo - > clause ;
clause = rinfo - > clause ;
Assert ( IsA ( clause , OpExpr ) ) ;
if ( IsA ( clause , OpExpr ) )
leftop = get_leftop ( clause ) ;
rightop = get_rightop ( clause ) ;
clause_op = ( ( OpExpr * ) clause ) - > opno ;
if ( ( indexcol = find_index_column ( leftop , index ) ) > = 0 )
{
operand = rightop ;
}
else if ( ( indexcol = find_index_column ( rightop , index ) ) > = 0 )
{
operand = leftop ;
clause_op = get_commutator ( clause_op ) ;
}
else
{
elog ( ERROR , " could not match index to operand " ) ;
operand = NULL ; /* keep compiler quiet */
}
if ( IsA ( operand , RelabelType ) )
operand = ( Node * ) ( ( RelabelType * ) operand ) - > arg ;
/*
* It ' s impossible to call extractQuery method for unknown operand . So
* unless operand is a Const we can ' t do much ; just assume there will
* be one ordinary search entry from the operand at runtime .
*/
if ( ! IsA ( operand , Const ) )
{
searchEntriesInQuals + + ;
continue ;
}
/* If Const is null, there can be no matches */
if ( ( ( Const * ) operand ) - > constisnull )
{
* indexStartupCost = 0 ;
* indexTotalCost = 0 ;
* indexSelectivity = 0 ;
PG_RETURN_VOID ( ) ;
}
/*
* Get the operator ' s strategy number and declared input data types
* within the index opfamily . ( We don ' t need the latter , but we use
* get_op_opfamily_properties because it will throw error if it fails
* to find a matching pg_amop entry . )
*/
get_op_opfamily_properties ( clause_op , index - > opfamily [ indexcol ] , false ,
& strategy_op , & lefttype , & righttype ) ;
/*
* GIN always uses the " default " support functions , which are those
* with lefttype = = righttype = = the opclass ' opcintype ( see
* IndexSupportInitialize in relcache . c ) .
*/
extractProcOid = get_opfamily_proc ( index - > opfamily [ indexcol ] ,
index - > opcintype [ indexcol ] ,
index - > opcintype [ indexcol ] ,
GIN_EXTRACTQUERY_PROC ) ;
if ( ! OidIsValid ( extractProcOid ) )
{
{
/* should not happen; throw same error as index_getprocinfo */
matchPossible = gincost_opexpr ( index ,
elog ( ERROR , " missing support function %d for attribute %d of index \" %s \" " ,
( OpExpr * ) clause ,
GIN_EXTRACTQUERY_PROC , indexcol + 1 ,
& counts ) ;
get_rel_name ( index - > indexoid ) ) ;
if ( ! matchPossible )
break ;
}
}
else if ( IsA ( clause , ScalarArrayOpExpr ) )
OidFunctionCall7 ( extractProcOid ,
( ( Const * ) operand ) - > constvalue ,
PointerGetDatum ( & nentries ) ,
UInt16GetDatum ( strategy_op ) ,
PointerGetDatum ( & partial_matches ) ,
PointerGetDatum ( & extra_data ) ,
PointerGetDatum ( & nullFlags ) ,
PointerGetDatum ( & searchMode ) ) ;
if ( nentries < = 0 & & searchMode = = GIN_SEARCH_MODE_DEFAULT )
{
{
/* No match is possible */
matchPossible = gincost_scalararrayopexpr ( index ,
* indexStartupCost = 0 ;
( ScalarArrayOpExpr * ) clause ,
* indexTotalCost = 0 ;
numEntries ,
* indexSelectivity = 0 ;
& counts ) ;
PG_RETURN_VOID ( ) ;
if ( ! matchPossible )
break ;
}
}
else
else
{
{
int32 i ;
/* shouldn't be anything else for a GIN index */
elog ( ERROR , " unsupported GIN indexqual type: %d " ,
for ( i = 0 ; i < nentries ; i + + )
( int ) nodeTag ( clause ) ) ;
{
/*
* For partial match we haven ' t any information to estimate
* number of matched entries in index , so , we just estimate it
* as 100
*/
if ( partial_matches & & partial_matches [ i ] )
partialEntriesInQuals + = 100 ;
else
exactEntriesInQuals + + ;
searchEntriesInQuals + + ;
}
}
}
}
if ( searchMode = = GIN_SEARCH_MODE_INCLUDE_EMPTY )
/* Fall out if there were any provably-unsatisfiable quals */
{
if ( ! matchPossible )
/* Treat "include empty" like an exact-match item */
{
exactEntriesInQuals + + ;
* indexStartupCost = 0 ;
searchEntriesInQuals + + ;
* indexTotalCost = 0 ;
}
* indexSelectivity = 0 ;
else if ( searchMode ! = GIN_SEARCH_MODE_DEFAULT )
PG_RETURN_VOID ( ) ;
{
/* It's GIN_SEARCH_MODE_ALL */
haveFullScan = true ;
}
}
}
if ( haveFullScan | | indexQuals = = NIL )
if ( counts . haveFullScan | | indexQuals = = NIL )
{
{
/*
/*
* Full index scan will be required . We treat this as if every key in
* Full index scan will be required . We treat this as if every key in
* the index had been listed in the query ; is that reasonable ?
* the index had been listed in the query ; is that reasonable ?
*/
*/
searchEntriesInQuals = numEntries ;
counts . partialEntries = 0 ;
counts . exactEntries = numEntries ;
counts . searchEntries = numEntries ;
}
}
/* Will we have more than one iteration of a nestloop scan? */
/* Will we have more than one iteration of a nestloop scan? */
if ( outer_rel ! = NULL & & outer_rel - > rows > 1 )
if ( outer_rel ! = NULL & & outer_rel - > rows > 1 )
num _scans = outer_rel - > rows ;
outer_scans = outer_rel - > rows ;
else
else
num _scans = 1 ;
outer_scans = 1 ;
/*
/*
* cost to begin scan , first of all , pay attention to pending list .
* Compute cost to begin scan , first of all , pay attention to pending list .
*/
*/
entryPagesFetched = numPendingPages ;
entryPagesFetched = numPendingPages ;
/*
/*
* Estimate number of entry pages read . We need to do
* Estimate number of entry pages read . We need to do
* searchEntriesInQual s searches . Use a power function as it should be ,
* counts . searchEntries searches . Use a power function as it should be ,
* but tuples on leaf pages usually is much greater . Here we include all
* but tuples on leaf pages usually is much greater . Here we include all
* searches in entry tree , including search of first entry in partial
* searches in entry tree , including search of first entry in partial
* match algorithm
* match algorithm
*/
*/
entryPagesFetched + = ceil ( searchEntriesInQual s * rint ( pow ( numEntryPages , 0.15 ) ) ) ;
entryPagesFetched + = ceil ( counts . searchEntries * rint ( pow ( numEntryPages , 0.15 ) ) ) ;
/*
/*
* Add an estimate of entry pages read by partial match algorithm . It ' s a
* Add an estimate of entry pages read by partial match algorithm . It ' s a
* scan over leaf pages in entry tree . We haven ' t any useful stats here ,
* scan over leaf pages in entry tree . We haven ' t any useful stats here ,
* so estimate it as proportion .
* so estimate it as proportion .
*/
*/
entryPagesFetched + = ceil ( numEntryPages * partialEntriesInQual s / numEntries ) ;
entryPagesFetched + = ceil ( numEntryPages * counts . partialEntries / numEntries ) ;
/*
/*
* Partial match algorithm reads all data pages before doing actual scan ,
* Partial match algorithm reads all data pages before doing actual scan ,
* so it ' s a startup cost . Again , we havn ' t any useful stats here , so ,
* so it ' s a startup cost . Again , we have n ' t any useful stats here , so ,
* estimate it as proportion
* estimate it as proportion
*/
*/
dataPagesFetched = ceil ( numDataPages * partialEntriesInQual s / numEntries ) ;
dataPagesFetched = ceil ( numDataPages * counts . partialEntries / numEntries ) ;
/* calculate cache effects */
/*
if ( num_scans > 1 | | searchEntriesInQuals > 1 )
* Calculate cache effects if more than one scan due to nestloops or array
* quals . The result is pro - rated per nestloop scan , but the array qual
* factor shouldn ' t be pro - rated ( compare genericcostestimate ) .
*/
if ( outer_scans > 1 | | counts . arrayScans > 1 )
{
{
entryPagesFetched * = outer_scans * counts . arrayScans ;
entryPagesFetched = index_pages_fetched ( entryPagesFetched ,
entryPagesFetched = index_pages_fetched ( entryPagesFetched ,
( BlockNumber ) numEntryPages ,
( BlockNumber ) numEntryPages ,
numEntryPages , root ) ;
numEntryPages , root ) ;
entryPagesFetched / = outer_scans ;
dataPagesFetched * = outer_scans * counts . arrayScans ;
dataPagesFetched = index_pages_fetched ( dataPagesFetched ,
dataPagesFetched = index_pages_fetched ( dataPagesFetched ,
( BlockNumber ) numDataPages ,
( BlockNumber ) numDataPages ,
numDataPages , root ) ;
numDataPages , root ) ;
dataPagesFetched / = outer_scans ;
}
}
/*
/*
@ -6910,8 +7107,12 @@ gincostestimate(PG_FUNCTION_ARGS)
*/
*/
* indexStartupCost = ( entryPagesFetched + dataPagesFetched ) * spc_random_page_cost ;
* indexStartupCost = ( entryPagesFetched + dataPagesFetched ) * spc_random_page_cost ;
/* cost to scan data pages for each exact (non-partial) matched entry */
/*
dataPagesFetched = ceil ( numDataPages * exactEntriesInQuals / numEntries ) ;
* Now we compute the number of data pages fetched while the scan proceeds .
*/
/* data pages scanned for each exact (non-partial) matched entry */
dataPagesFetched = ceil ( numDataPages * counts . exactEntries / numEntries ) ;
/*
/*
* Estimate number of data pages read , using selectivity estimation and
* Estimate number of data pages read , using selectivity estimation and
@ -6932,10 +7133,17 @@ gincostestimate(PG_FUNCTION_ARGS)
dataPagesFetched = dataPagesFetchedBySel ;
dataPagesFetched = dataPagesFetchedBySel ;
}
}
if ( num_scans > 1 )
/* Account for cache effects, the same as above */
if ( outer_scans > 1 | | counts . arrayScans > 1 )
{
dataPagesFetched * = outer_scans * counts . arrayScans ;
dataPagesFetched = index_pages_fetched ( dataPagesFetched ,
dataPagesFetched = index_pages_fetched ( dataPagesFetched ,
( BlockNumber ) numDataPages ,
( BlockNumber ) numDataPages ,
numDataPages , root ) ;
numDataPages , root ) ;
dataPagesFetched / = outer_scans ;
}
/* And apply random_page_cost as the cost per page */
* indexTotalCost = * indexStartupCost +
* indexTotalCost = * indexStartupCost +
dataPagesFetched * spc_random_page_cost ;
dataPagesFetched * spc_random_page_cost ;