@ -8,7 +8,7 @@
*
*
*
*
* IDENTIFICATION
* IDENTIFICATION
* $ Header : / cvsroot / pgsql / src / backend / optimizer / prep / prepqual . c , v 1.21 2000 / 01 / 26 05 : 56 : 39 momjian Exp $
* $ Header : / cvsroot / pgsql / src / backend / optimizer / prep / prepqual . c , v 1.22 2000 / 01 / 28 03 : 22 : 36 tgl Exp $
*
*
* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
*/
@ -32,7 +32,8 @@ static Expr *find_ands(Expr *qual);
static Expr * and_normalize ( List * andlist ) ;
static Expr * and_normalize ( List * andlist ) ;
static Expr * qual_cleanup ( Expr * qual ) ;
static Expr * qual_cleanup ( Expr * qual ) ;
static List * remove_duplicates ( List * list ) ;
static List * remove_duplicates ( List * list ) ;
static int count_bool_nodes ( Expr * qual ) ;
static void count_bool_nodes ( Expr * qual , double * nodes ,
double * cnfnodes , double * dnfnodes ) ;
/*****************************************************************************
/*****************************************************************************
*
*
@ -84,12 +85,12 @@ static int count_bool_nodes(Expr *qual);
List *
List *
canonicalize_qual ( Expr * qual , bool removeAndFlag )
canonicalize_qual ( Expr * qual , bool removeAndFlag )
{
{
Expr * newqual ,
Expr * newqual ;
* cnfqual ,
double nodes ,
* dnfqual ;
cnfnodes ,
int qualcnt ,
dnfnodes ;
cnfcnt ,
bool cnfok ,
dnfcnt ;
dnfok ;
if ( qual = = NULL )
if ( qual = = NULL )
return NIL ;
return NIL ;
@ -98,57 +99,64 @@ canonicalize_qual(Expr *qual, bool removeAndFlag)
* This improvement is always worthwhile , so do it unconditionally .
* This improvement is always worthwhile , so do it unconditionally .
*/
*/
qual = flatten_andors ( qual ) ;
qual = flatten_andors ( qual ) ;
/* Push down NOTs. We do this only in the top-level boolean
/* Push down NOTs. We do this only in the top-level boolean
* expression , without examining arguments of operators / functions .
* expression , without examining arguments of operators / functions .
* Even so , it might not be a win if we are unable to find negators
* Even so , it might not be a win if we are unable to find negators
* for all the operators involved ; so we keep the flattened - but - not -
* for all the operators involved ; perhap s we should compare before -
* NOT - pushed qual as the reference point for comparsions .
* and - after tree sizes ?
*/
*/
newqual = find_nots ( qual ) ;
newqual = find_nots ( qual ) ;
/*
* Generate both CNF and DNF forms from newqual .
*/
/* Normalize into conjunctive normal form, and clean up the result. */
cnfqual = qual_cleanup ( find_ors ( newqual ) ) ;
/* Likewise for DNF */
dnfqual = qual_cleanup ( find_ands ( newqual ) ) ;
/*
/*
* Now , choose whether to return qual , cnfqual , or dnfqual .
* Choose whether to convert to CNF , or DNF , or leave well enough alone .
*
*
* First heuristic is to forget about either CNF or DNF if it shows
* We make an approximate estimate of the number of bottom - level nodes
* that will appear in the CNF and DNF forms of the query .
*/
count_bool_nodes ( newqual , & nodes , & cnfnodes , & dnfnodes ) ;
/*
* First heuristic is to forget about * both * normal forms if there are
* a huge number of terms in the qual clause . This would only happen
* with machine - generated queries , presumably ; and most likely such
* a query is already in either CNF or DNF .
*/
cnfok = dnfok = true ;
if ( nodes > = 500.0 )
cnfok = dnfok = false ;
/*
* Second heuristic is to forget about either CNF or DNF if it shows
* unreasonable growth compared to the original form of the qual ,
* unreasonable growth compared to the original form of the qual ,
* where we define " unreasonable " a tad arbitrarily as 4 x more
* where we define " unreasonable " a tad arbitrarily as 4 x more
* operators .
* operators .
*/
*/
qualcnt = count_bool_nodes ( qual ) ;
if ( cnfnodes > = 4.0 * nodes )
cnfcnt = count_bool_nodes ( cnfqual ) ;
cnfok = false ;
dnfcnt = count_bool_nodes ( dnfqual ) ;
if ( dnfnodes > = 4.0 * nodes )
if ( cnfcnt > = 4 * qualcnt )
dnfok = false ;
cnfqual = NULL ; /* mark CNF not usable */
if ( dnfcnt > = 4 * qualcnt )
dnfqual = NULL ; /* mark DNF not usable */
/*
/*
* Second heuristic is to prefer DNF if only one relation is mentioned
* Third heuristic is to prefer DNF if top level is already an OR ,
* and it is smaller than the CNF representation .
* and only one relation is mentioned , and DNF is no larger than
* the CNF representation . ( Pretty shaky ; can we improve on this ? )
*/
*/
if ( dnfqual & & dnfcnt < cnfcnt & & NumRelids ( ( Node * ) dnfqual ) = = 1 )
if ( dnfok & & dnfnodes < = cnfnodes & & or_clause ( ( Node * ) newqual ) & &
cnfqual = NULL ;
NumRelids ( ( Node * ) newqual ) = = 1 )
cnfok = false ;
/*
/*
* Otherwise , we prefer CNF .
* Otherwise , we prefer CNF .
*
*
* XXX obviously , these rules could be improved upon .
* XXX obviously , these rules could be improved upon .
*/
*/
if ( cnfok )
/* pick preferred survivor */
{
if ( cnfqual )
/* Normalize into conjunctive normal form, and clean up the result. */
newqual = cnfqual ;
newqual = qual_cleanup ( find_ors ( newqual ) ) ;
else if ( dnfqual )
}
newqual = dnfqual ;
else if ( dnfok )
else
{
newqual = qual ;
/* Normalize into disjunctive normal form, and clean up the result. */
newqual = qual_cleanup ( find_ands ( newqual ) ) ;
}
/* Convert to implicit-AND list if requested */
/* Convert to implicit-AND list if requested */
if ( removeAndFlag )
if ( removeAndFlag )
@ -828,27 +836,72 @@ remove_duplicates(List *list)
/*
/*
* count_bool_nodes
* count_bool_nodes
* Support for heuristics in canonicalize_qual ( ) : count the
* Support for heuristics in canonicalize_qual ( ) : count the
* number of nodes in the top level AND / OR / NOT part of a qual tree
* number of nodes that are inputs to the top level AND / OR / NOT
* part of a qual tree , and estimate how many nodes will appear
* in the CNF ' ified or DNF ' ified equivalent of the expression .
*
* This is just an approximate calculation ; it doesn ' t deal with NOTs
* very well , and of course it cannot detect possible simplifications
* from eliminating duplicate subclauses . The idea is just to cheaply
* determine whether CNF will be markedly worse than DNF or vice versa .
*
* The counts / estimates are represented as doubles to avoid risk of overflow .
*/
*/
static int
static void
count_bool_nodes ( Expr * qual )
count_bool_nodes ( Expr * qual ,
double * nodes ,
double * cnfnodes ,
double * dnfnodes )
{
{
if ( qual = = NULL )
List * temp ;
return 0 ;
double subnodes , subcnfnodes , subdnfnodes ;
if ( and_clause ( ( Node * ) qual ) | |
if ( and_clause ( ( Node * ) qual ) )
or_clause ( ( Node * ) qual ) )
{
{
int sum = 1 ; /* for the and/or itself */
* nodes = * cnfnodes = 0.0 ;
List * temp ;
* dnfnodes = 1.0 ; /* DNF nodes will be product of sub-counts */
foreach ( temp , qual - > args )
foreach ( temp , qual - > args )
sum + = count_bool_nodes ( lfirst ( temp ) ) ;
{
count_bool_nodes ( lfirst ( temp ) ,
& subnodes , & subcnfnodes , & subdnfnodes ) ;
* nodes + = subnodes ;
* cnfnodes + = subcnfnodes ;
* dnfnodes * = subdnfnodes ;
}
/* we could get dnfnodes < cnfnodes here, if all the sub-nodes are
* simple ones with count 1. Make sure dnfnodes isn ' t too small .
*/
if ( * dnfnodes < * cnfnodes )
* dnfnodes = * cnfnodes ;
}
else if ( or_clause ( ( Node * ) qual ) )
{
* nodes = * dnfnodes = 0.0 ;
* cnfnodes = 1.0 ; /* CNF nodes will be product of sub-counts */
return sum ;
foreach ( temp , qual - > args )
{
count_bool_nodes ( lfirst ( temp ) ,
& subnodes , & subcnfnodes , & subdnfnodes ) ;
* nodes + = subnodes ;
* cnfnodes * = subcnfnodes ;
* dnfnodes + = subdnfnodes ;
}
/* we could get cnfnodes < dnfnodes here, if all the sub-nodes are
* simple ones with count 1. Make sure cnfnodes isn ' t too small .
*/
if ( * cnfnodes < * dnfnodes )
* cnfnodes = * dnfnodes ;
}
}
else if ( not_clause ( ( Node * ) qual ) )
else if ( not_clause ( ( Node * ) qual ) )
return count_bool_nodes ( get_notclausearg ( qual ) ) + 1 ;
{
count_bool_nodes ( get_notclausearg ( qual ) ,
nodes , cnfnodes , dnfnodes ) ;
}
else
else
return 1 ; /* anything else counts 1 for my purposes */
{
/* anything else counts 1 for my purposes */
* nodes = * cnfnodes = * dnfnodes = 1.0 ;
}
}
}