You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
postgres/src/backend/optimizer/plan/subselect.c

739 lines
21 KiB

/*-------------------------------------------------------------------------
*
* subselect.c
* Planning routines for subselects and parameters.
*
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/plan/subselect.c,v 1.62 2003/01/09 20:50:51 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "catalog/pg_operator.h"
#include "catalog/pg_type.h"
#include "nodes/makefuncs.h"
#include "nodes/params.h"
#include "optimizer/clauses.h"
#include "optimizer/cost.h"
#include "optimizer/planmain.h"
27 years ago
#include "optimizer/planner.h"
#include "optimizer/subselect.h"
#include "parser/parsetree.h"
#include "parser/parse_expr.h"
#include "parser/parse_oper.h"
#include "utils/syscache.h"
Index PlannerQueryLevel; /* level of current query */
List *PlannerInitPlan; /* init subplans for current query */
List *PlannerParamVar; /* to get Var from Param->paramid */
int PlannerPlanId = 0; /* to assign unique ID to subquery plans */
/*--------------------
* PlannerParamVar is a list of Var nodes, wherein the n'th entry
* (n counts from 0) corresponds to Param->paramid = n. The Var nodes
* are ordinary except for one thing: their varlevelsup field does NOT
* have the usual interpretation of "subplan levels out from current".
* Instead, it contains the absolute plan level, with the outermost
* plan being level 1 and nested plans having higher level numbers.
* This nonstandardness is useful because we don't have to run around
* and update the list elements when we enter or exit a subplan
* recursion level. But we must pay attention not to confuse this
* meaning with the normal meaning of varlevelsup.
*
* We also need to create Param slots that don't correspond to any outer Var.
* For these, we set varno = 0 and varlevelsup = 0, so that they can't
* accidentally match an outer Var.
*--------------------
*/
typedef struct finalize_primnode_results
{
List *paramids; /* List of PARAM_EXEC paramids found */
} finalize_primnode_results;
static List *convert_sublink_opers(List *operlist, List *lefthand,
List *targetlist, List **setParams);
static Node *replace_correlation_vars_mutator(Node *node, void *context);
static Node *process_sublinks_mutator(Node *node, void *context);
static bool finalize_primnode(Node *node, finalize_primnode_results *results);
/*
* Create a new entry in the PlannerParamVar list, and return its index.
*
* var contains the data to use, except for varlevelsup which
* is set from the absolute level value given by varlevel. NOTE that
* the passed var is scribbled on and placed directly into the list!
* Generally, caller should have just created or copied it.
*/
static int
new_param(Var *var, Index varlevel)
{
var->varlevelsup = varlevel;
PlannerParamVar = lappend(PlannerParamVar, var);
return length(PlannerParamVar) - 1;
}
/*
* Generate a Param node to replace the given Var,
* which is expected to have varlevelsup > 0 (ie, it is not local).
*/
static Param *
replace_var(Var *var)
{
List *ppv;
Param *retval;
Index varlevel;
int i;
Assert(var->varlevelsup > 0 && var->varlevelsup < PlannerQueryLevel);
varlevel = PlannerQueryLevel - var->varlevelsup;
/*
* If there's already a PlannerParamVar entry for this same Var, just
* use it. NOTE: in sufficiently complex querytrees, it is possible
* for the same varno/varlevel to refer to different RTEs in different
* parts of the parsetree, so that different fields might end up
* sharing the same Param number. As long as we check the vartype as
* well, I believe that this sort of aliasing will cause no trouble.
* The correct field should get stored into the Param slot at
* execution in each part of the tree.
*/
i = 0;
foreach(ppv, PlannerParamVar)
{
Var *pvar = lfirst(ppv);
if (pvar->varno == var->varno &&
pvar->varattno == var->varattno &&
pvar->varlevelsup == varlevel &&
pvar->vartype == var->vartype)
break;
i++;
}
if (!ppv)
{
/* Nope, so make a new one */
i = new_param((Var *) copyObject(var), varlevel);
}
retval = makeNode(Param);
retval->paramkind = PARAM_EXEC;
retval->paramid = (AttrNumber) i;
retval->paramtype = var->vartype;
return retval;
}
/*
* Generate a new Param node that will not conflict with any other.
*/
static Param *
generate_new_param(Oid paramtype, int32 paramtypmod)
{
Var *var = makeVar(0, 0, paramtype, paramtypmod, 0);
Param *retval = makeNode(Param);
retval->paramkind = PARAM_EXEC;
retval->paramid = (AttrNumber) new_param(var, 0);
retval->paramtype = paramtype;
return retval;
}
/*
* Convert a bare SubLink (as created by the parser) into a SubPlan.
*
* We are given the raw SubLink and the already-processed lefthand argument
* list (use this instead of the SubLink's own field).
*
* The result is whatever we need to substitute in place of the SubLink
* node in the executable expression. This will be either the SubPlan
* node (if we have to do the subplan as a subplan), or a Param node
* representing the result of an InitPlan, or possibly an AND or OR tree
* containing InitPlan Param nodes.
*/
static Node *
make_subplan(SubLink *slink, List *lefthand)
{
SubPlan *node = makeNode(SubPlan);
Query *subquery = (Query *) (slink->subselect);
double tuple_fraction;
Plan *plan;
List *lst;
Node *result;
/*
* Copy the source Query node. This is a quick and dirty kluge to
* resolve the fact that the parser can generate trees with multiple
* links to the same sub-Query node, but the planner wants to scribble
* on the Query. Try to clean this up when we do querytree redesign...
*/
subquery = (Query *) copyObject(subquery);
/*
* For an EXISTS subplan, tell lower-level planner to expect that only
* the first tuple will be retrieved. For ALL and ANY subplans, we
* will be able to stop evaluating if the test condition fails, so
* very often not all the tuples will be retrieved; for lack of a
* better idea, specify 50% retrieval. For EXPR and MULTIEXPR
* subplans, use default behavior (we're only expecting one row out,
* anyway).
*
* NOTE: if you change these numbers, also change cost_qual_eval_walker()
* in path/costsize.c.
*
* XXX If an ALL/ANY subplan is uncorrelated, we may decide to
* materialize its result below. In that case it would've been better
* to specify full retrieval. At present, however, we can only detect
* correlation or lack of it after we've made the subplan :-(. Perhaps
* detection of correlation should be done as a separate step.
* Meanwhile, we don't want to be too optimistic about the percentage
* of tuples retrieved, for fear of selecting a plan that's bad for
* the materialization case.
*/
if (slink->subLinkType == EXISTS_SUBLINK)
tuple_fraction = 1.0; /* just like a LIMIT 1 */
else if (slink->subLinkType == ALL_SUBLINK ||
slink->subLinkType == ANY_SUBLINK)
tuple_fraction = 0.5; /* 50% */
else
tuple_fraction = -1.0; /* default behavior */
/*
* Generate the plan for the subquery.
*/
node->plan = plan = subquery_planner(subquery, tuple_fraction);
node->plan_id = PlannerPlanId++; /* Assign unique ID to this
* SubPlan */
node->rtable = subquery->rtable;
/*
* Fill in other fields of the SubPlan node.
*/
node->subLinkType = slink->subLinkType;
node->useOr = slink->useOr;
node->oper = NIL;
node->setParam = NIL;
node->parParam = NIL;
node->args = NIL;
/*
* Make parParam list of params that current query level will pass to
* this child plan.
*/
foreach(lst, plan->extParam)
{
int paramid = lfirsti(lst);
Var *var = nth(paramid, PlannerParamVar);
/* note varlevelsup is absolute level number */
if (var->varlevelsup == PlannerQueryLevel)
node->parParam = lappendi(node->parParam, paramid);
}
/*
* Un-correlated or undirect correlated plans of EXISTS, EXPR, or
* MULTIEXPR types can be used as initPlans. For EXISTS or EXPR, we
* just produce a Param referring to the result of evaluating the
* initPlan. For MULTIEXPR, we must build an AND or OR-clause of the
* individual comparison operators, using the appropriate lefthand
* side expressions and Params for the initPlan's target items.
*/
if (node->parParam == NIL && slink->subLinkType == EXISTS_SUBLINK)
{
Param *prm;
prm = generate_new_param(BOOLOID, -1);
node->setParam = lappendi(node->setParam, prm->paramid);
PlannerInitPlan = lappend(PlannerInitPlan, node);
result = (Node *) prm;
}
else if (node->parParam == NIL && slink->subLinkType == EXPR_SUBLINK)
{
TargetEntry *te = lfirst(plan->targetlist);
Param *prm;
prm = generate_new_param(te->resdom->restype, te->resdom->restypmod);
node->setParam = lappendi(node->setParam, prm->paramid);
PlannerInitPlan = lappend(PlannerInitPlan, node);
result = (Node *) prm;
}
else if (node->parParam == NIL && slink->subLinkType == MULTIEXPR_SUBLINK)
{
List *oper;
/* Convert the oper list, but don't put it into the SubPlan node */
oper = convert_sublink_opers(slink->oper,
lefthand,
plan->targetlist,
&node->setParam);
PlannerInitPlan = lappend(PlannerInitPlan, node);
if (length(oper) > 1)
result = (Node *) (node->useOr ? make_orclause(oper) :
make_andclause(oper));
else
result = (Node *) lfirst(oper);
}
else
{
List *args;
/*
* We can't convert subplans of ALL_SUBLINK or ANY_SUBLINK types
* to initPlans, even when they are uncorrelated or undirect
* correlated, because we need to scan the output of the subplan
* for each outer tuple. However, we have the option to tack a
* MATERIAL node onto the top of an uncorrelated/undirect
* correlated subplan, which lets us do the work of evaluating the
* subplan only once. We do this if the subplan's top plan node
* is anything more complicated than a plain sequential scan, and
* we do it even for seqscan if the qual appears selective enough
* to eliminate many tuples.
*
* XXX It's pretty ugly to be inserting a MATERIAL node at this
* point. Since subquery_planner has already run SS_finalize_plan
* on the subplan tree, we have to kluge up parameter lists for
* the MATERIAL node. Possibly this could be fixed by postponing
* SS_finalize_plan processing until setrefs.c is run.
*/
if (node->parParam == NIL)
{
bool use_material;
switch (nodeTag(plan))
{
case T_SeqScan:
if (plan->initPlan)
use_material = true;
else
{
Selectivity qualsel;
qualsel = clauselist_selectivity(subquery,
plan->qual,
0);
/* Is 10% selectivity a good threshold?? */
use_material = qualsel < 0.10;
}
break;
case T_Material:
case T_FunctionScan:
case T_Sort:
/*
* Don't add another Material node if there's one
* already, nor if the top node is any other type that
* materializes its output anyway.
*/
use_material = false;
break;
default:
use_material = true;
break;
}
if (use_material)
{
23 years ago
Plan *matplan;
Path matpath; /* dummy for result of cost_material */
matplan = (Plan *) make_material(plan->targetlist, plan);
/* need to calculate costs */
cost_material(&matpath,
plan->total_cost,
plan->plan_rows,
plan->plan_width);
matplan->startup_cost = matpath.startup_cost;
matplan->total_cost = matpath.total_cost;
/* parameter kluge --- see comments above */
matplan->extParam = listCopy(plan->extParam);
matplan->locParam = listCopy(plan->locParam);
node->plan = plan = matplan;
}
}
/* Convert the SubLink's oper list into executable form */
node->oper = convert_sublink_opers(slink->oper,
lefthand,
plan->targetlist,
NULL);
/*
* Make node->args from parParam.
*/
args = NIL;
foreach(lst, node->parParam)
{
Var *var = nth(lfirsti(lst), PlannerParamVar);
var = (Var *) copyObject(var);
/*
* Must fix absolute-level varlevelsup from the
* PlannerParamVar entry. But since var is at current subplan
* level, this is easy:
*/
var->varlevelsup = 0;
args = lappend(args, var);
}
node->args = args;
result = (Node *) node;
}
return result;
}
/*
* convert_sublink_opers: convert a SubLink's oper list from the
* parser/rewriter format into the executor's format.
*
* The oper list is initially a list of OpExpr nodes with NIL args. We
* convert it to a list of actually executable expressions, in which the
* specified operators are applied to corresponding elements of the
* lefthand list and Params representing the results of the subplan.
*
* If setParams is not NULL, the paramids of the Params created are added
* to the *setParams list.
*/
static List *
convert_sublink_opers(List *operlist, List *lefthand,
List *targetlist, List **setParams)
{
List *newoper = NIL;
List *leftlist = lefthand;
List *lst;
foreach(lst, operlist)
{
OpExpr *oper = (OpExpr *) lfirst(lst);
Node *leftop = lfirst(leftlist);
TargetEntry *te = lfirst(targetlist);
Param *prm;
Operator tup;
Form_pg_operator opform;
Node *left,
*right;
/* Make the Param node representing the subplan's result */
prm = generate_new_param(te->resdom->restype,
te->resdom->restypmod);
/* Record its ID if needed */
if (setParams)
*setParams = lappendi(*setParams, prm->paramid);
/* Look up the operator to check its declared input types */
Assert(IsA(oper, OpExpr));
tup = SearchSysCache(OPEROID,
ObjectIdGetDatum(oper->opno),
0, 0, 0);
if (!HeapTupleIsValid(tup))
elog(ERROR, "cache lookup failed for operator %u", oper->opno);
opform = (Form_pg_operator) GETSTRUCT(tup);
/*
* Make the expression node.
*
* Note: we use make_operand in case runtime type conversion
* function calls must be inserted for this operator!
*/
left = make_operand(leftop, exprType(leftop), opform->oprleft);
right = make_operand((Node *) prm, prm->paramtype, opform->oprright);
newoper = lappend(newoper,
make_opclause(oper->opno,
oper->opresulttype,
oper->opretset,
(Expr *) left,
(Expr *) right));
ReleaseSysCache(tup);
leftlist = lnext(leftlist);
targetlist = lnext(targetlist);
}
return newoper;
}
/*
* Replace correlation vars (uplevel vars) with Params.
*/
Node *
SS_replace_correlation_vars(Node *expr)
{
/* No setup needed for tree walk, so away we go */
return replace_correlation_vars_mutator(expr, NULL);
}
static Node *
replace_correlation_vars_mutator(Node *node, void *context)
{
if (node == NULL)
return NULL;
if (IsA(node, Var))
{
if (((Var *) node)->varlevelsup > 0)
return (Node *) replace_var((Var *) node);
}
return expression_tree_mutator(node,
replace_correlation_vars_mutator,
context);
}
/*
* Expand SubLinks to SubPlans in the given expression.
*/
Node *
SS_process_sublinks(Node *expr)
{
/* No setup needed for tree walk, so away we go */
return process_sublinks_mutator(expr, NULL);
}
static Node *
process_sublinks_mutator(Node *node, void *context)
{
if (node == NULL)
return NULL;
if (IsA(node, SubLink))
{
SubLink *sublink = (SubLink *) node;
List *lefthand;
/*
* First, recursively process the lefthand-side expressions, if any.
*/
lefthand = (List *)
process_sublinks_mutator((Node *) sublink->lefthand, context);
/*
* Now build the SubPlan node and make the expr to return.
*/
return make_subplan(sublink, lefthand);
}
/*
* Note that we will never see a SubPlan expression in the input
* (since this is the very routine that creates 'em to begin with). So
* the code in expression_tree_mutator() that might do inappropriate
* things with SubPlans or SubLinks will not be exercised.
*/
Assert(!is_subplan(node));
return expression_tree_mutator(node,
process_sublinks_mutator,
context);
}
/*
* SS_finalize_plan - do final sublink processing for a completed Plan.
*
* This recursively computes and sets the extParam and locParam lists
* for every Plan node in the given tree.
*/
List *
SS_finalize_plan(Plan *plan, List *rtable)
{
List *extParam = NIL;
List *locParam = NIL;
finalize_primnode_results results;
List *lst;
if (plan == NULL)
return NIL;
results.paramids = NIL; /* initialize list to NIL */
/*
* When we call finalize_primnode, results.paramids lists are
* automatically merged together. But when recursing to self, we have
* to do it the hard way. We want the paramids list to include params
* in subplans as well as at this level.
*/
/* Find params in targetlist and qual */
finalize_primnode((Node *) plan->targetlist, &results);
finalize_primnode((Node *) plan->qual, &results);
/* Check additional node-type-specific fields */
switch (nodeTag(plan))
{
case T_Result:
finalize_primnode(((Result *) plan)->resconstantqual,
&results);
break;
case T_IndexScan:
finalize_primnode((Node *) ((IndexScan *) plan)->indxqual,
&results);
/*
* we need not look at indxqualorig, since it will have the
* same param references as indxqual.
*/
break;
case T_TidScan:
finalize_primnode((Node *) ((TidScan *) plan)->tideval,
&results);
break;
case T_SubqueryScan:
/*
* In a SubqueryScan, SS_finalize_plan has already been run on
* the subplan by the inner invocation of subquery_planner, so
* there's no need to do it again. Instead, just pull out the
* subplan's extParams list, which represents the params it
* needs from my level and higher levels.
*/
results.paramids = set_unioni(results.paramids,
((SubqueryScan *) plan)->subplan->extParam);
break;
case T_FunctionScan:
{
RangeTblEntry *rte;
rte = rt_fetch(((FunctionScan *) plan)->scan.scanrelid,
rtable);
Assert(rte->rtekind == RTE_FUNCTION);
finalize_primnode(rte->funcexpr, &results);
}
break;
case T_Append:
foreach(lst, ((Append *) plan)->appendplans)
results.paramids = set_unioni(results.paramids,
23 years ago
SS_finalize_plan((Plan *) lfirst(lst),
rtable));
break;
case T_NestLoop:
finalize_primnode((Node *) ((Join *) plan)->joinqual,
&results);
break;
case T_MergeJoin:
finalize_primnode((Node *) ((Join *) plan)->joinqual,
&results);
finalize_primnode((Node *) ((MergeJoin *) plan)->mergeclauses,
&results);
break;
case T_HashJoin:
finalize_primnode((Node *) ((Join *) plan)->joinqual,
&results);
finalize_primnode((Node *) ((HashJoin *) plan)->hashclauses,
&results);
break;
case T_Hash:
finalize_primnode((Node *) ((Hash *) plan)->hashkeys,
&results);
break;
case T_Agg:
case T_SeqScan:
case T_Material:
case T_Sort:
case T_Unique:
case T_SetOp:
case T_Limit:
case T_Group:
break;
default:
elog(ERROR, "SS_finalize_plan: node %d unsupported",
nodeTag(plan));
}
/* Process left and right child plans, if any */
results.paramids = set_unioni(results.paramids,
SS_finalize_plan(plan->lefttree,
rtable));
results.paramids = set_unioni(results.paramids,
SS_finalize_plan(plan->righttree,
rtable));
/* Now we have all the paramids */
foreach(lst, results.paramids)
{
int paramid = lfirsti(lst);
Var *var = nth(paramid, PlannerParamVar);
/* note varlevelsup is absolute level number */
if (var->varlevelsup < PlannerQueryLevel)
extParam = lappendi(extParam, paramid);
else if (var->varlevelsup > PlannerQueryLevel)
elog(ERROR, "SS_finalize_plan: plan shouldn't reference subplan's variable");
else
{
Assert(var->varno == 0 && var->varattno == 0);
locParam = lappendi(locParam, paramid);
}
}
plan->extParam = extParam;
plan->locParam = locParam;
return results.paramids;
}
/*
* finalize_primnode: build lists of params appearing
* in the given expression tree. NOTE: items are added to list passed in,
* so caller must initialize list to NIL before first call!
*/
static bool
finalize_primnode(Node *node, finalize_primnode_results *results)
{
if (node == NULL)
return false;
if (IsA(node, Param))
{
if (((Param *) node)->paramkind == PARAM_EXEC)
{
int paramid = (int) ((Param *) node)->paramid;
if (!intMember(paramid, results->paramids))
results->paramids = lconsi(paramid, results->paramids);
}
return false; /* no more to do here */
}
if (is_subplan(node))
{
SubPlan *subplan = (SubPlan *) node;
List *lst;
/* Check extParam list for params to add to paramids */
foreach(lst, subplan->plan->extParam)
{
int paramid = lfirsti(lst);
Var *var = nth(paramid, PlannerParamVar);
/* note varlevelsup is absolute level number */
if (var->varlevelsup < PlannerQueryLevel &&
!intMember(paramid, results->paramids))
results->paramids = lconsi(paramid, results->paramids);
}
/* fall through to recurse into subplan args */
}
return expression_tree_walker(node, finalize_primnode,
(void *) results);
}