|
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
|
*
|
|
|
|
|
* var.c
|
|
|
|
|
* Var node manipulation routines
|
|
|
|
|
*
|
|
|
|
|
* Note: for most purposes, PlaceHolderVar is considered a Var too,
|
|
|
|
|
* even if its contained expression is variable-free. Also, CurrentOfExpr
|
|
|
|
|
* is treated as a Var for purposes of determining whether an expression
|
|
|
|
|
* contains variables.
|
|
|
|
|
*
|
|
|
|
|
*
|
|
|
|
|
* Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
|
|
|
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
|
|
|
|
*
|
|
|
|
|
*
|
|
|
|
|
* IDENTIFICATION
|
|
|
|
|
* src/backend/optimizer/util/var.c
|
|
|
|
|
*
|
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
|
*/
|
|
|
|
|
#include "postgres.h"
|
|
|
|
|
|
|
|
|
|
#include "access/sysattr.h"
|
|
|
|
|
#include "nodes/nodeFuncs.h"
|
|
|
|
|
#include "optimizer/optimizer.h"
|
|
|
|
|
#include "optimizer/prep.h"
|
|
|
|
|
#include "parser/parsetree.h"
|
|
|
|
|
#include "rewrite/rewriteManip.h"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
typedef struct
|
|
|
|
|
{
|
|
|
|
|
Relids varnos;
|
|
|
|
|
int sublevels_up;
|
|
|
|
|
} pull_varnos_context;
|
|
|
|
|
|
|
|
|
|
typedef struct
|
|
|
|
|
{
|
|
|
|
|
Bitmapset *varattnos;
|
|
|
|
|
Index varno;
|
|
|
|
|
} pull_varattnos_context;
|
|
|
|
|
|
|
|
|
|
typedef struct
|
|
|
|
|
{
|
|
|
|
|
List *vars;
|
|
|
|
|
int sublevels_up;
|
|
|
|
|
} pull_vars_context;
|
|
|
|
|
|
|
|
|
|
typedef struct
|
|
|
|
|
{
|
|
|
|
|
int var_location;
|
|
|
|
|
int sublevels_up;
|
|
|
|
|
} locate_var_of_level_context;
|
|
|
|
|
|
|
|
|
|
typedef struct
|
|
|
|
|
{
|
|
|
|
|
List *varlist;
|
|
|
|
|
int flags;
|
|
|
|
|
} pull_var_clause_context;
|
|
|
|
|
|
|
|
|
|
typedef struct
|
|
|
|
|
{
|
|
|
|
|
Query *query; /* outer Query */
|
|
|
|
|
int sublevels_up;
|
Phase 2 of pgindent updates.
Change pg_bsd_indent to follow upstream rules for placement of comments
to the right of code, and remove pgindent hack that caused comments
following #endif to not obey the general rule.
Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using
the published version of pg_bsd_indent, but a hacked-up version that
tried to minimize the amount of movement of comments to the right of
code. The situation of interest is where such a comment has to be
moved to the right of its default placement at column 33 because there's
code there. BSD indent has always moved right in units of tab stops
in such cases --- but in the previous incarnation, indent was working
in 8-space tab stops, while now it knows we use 4-space tabs. So the
net result is that in about half the cases, such comments are placed
one tab stop left of before. This is better all around: it leaves
more room on the line for comment text, and it means that in such
cases the comment uniformly starts at the next 4-space tab stop after
the code, rather than sometimes one and sometimes two tabs after.
Also, ensure that comments following #endif are indented the same
as comments following other preprocessor commands such as #else.
That inconsistency turns out to have been self-inflicted damage
from a poorly-thought-through post-indent "fixup" in pgindent.
This patch is much less interesting than the first round of indent
changes, but also bulkier, so I thought it best to separate the effects.
Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org
Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
9 years ago
|
|
|
bool possible_sublink; /* could aliases include a SubLink? */
|
|
|
|
|
bool inserted_sublink; /* have we inserted a SubLink? */
|
|
|
|
|
} flatten_join_alias_vars_context;
|
|
|
|
|
|
|
|
|
|
static bool pull_varnos_walker(Node *node,
|
|
|
|
|
pull_varnos_context *context);
|
|
|
|
|
static bool pull_varattnos_walker(Node *node, pull_varattnos_context *context);
|
|
|
|
|
static bool pull_vars_walker(Node *node, pull_vars_context *context);
|
|
|
|
|
static bool contain_var_clause_walker(Node *node, void *context);
|
|
|
|
|
static bool contain_vars_of_level_walker(Node *node, int *sublevels_up);
|
|
|
|
|
static bool locate_var_of_level_walker(Node *node,
|
|
|
|
|
locate_var_of_level_context *context);
|
|
|
|
|
static bool pull_var_clause_walker(Node *node,
|
|
|
|
|
pull_var_clause_context *context);
|
|
|
|
|
static Node *flatten_join_alias_vars_mutator(Node *node,
|
|
|
|
|
flatten_join_alias_vars_context *context);
|
|
|
|
|
static Relids alias_relid_set(Query *query, Relids relids);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* pull_varnos
|
|
|
|
|
* Create a set of all the distinct varnos present in a parsetree.
|
|
|
|
|
* Only varnos that reference level-zero rtable entries are considered.
|
|
|
|
|
*
|
|
|
|
|
* NOTE: this is used on not-yet-planned expressions. It may therefore find
|
|
|
|
|
* bare SubLinks, and if so it needs to recurse into them to look for uplevel
|
|
|
|
|
* references to the desired rtable level! But when we find a completed
|
|
|
|
|
* SubPlan, we only need to look at the parameters passed to the subplan.
|
|
|
|
|
*/
|
|
|
|
|
Relids
|
|
|
|
|
pull_varnos(Node *node)
|
|
|
|
|
{
|
|
|
|
|
pull_varnos_context context;
|
|
|
|
|
|
|
|
|
|
context.varnos = NULL;
|
|
|
|
|
context.sublevels_up = 0;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Must be prepared to start with a Query or a bare expression tree; if
|
|
|
|
|
* it's a Query, we don't want to increment sublevels_up.
|
|
|
|
|
*/
|
|
|
|
|
query_or_expression_tree_walker(node,
|
|
|
|
|
pull_varnos_walker,
|
|
|
|
|
(void *) &context,
|
|
|
|
|
0);
|
|
|
|
|
|
|
|
|
|
return context.varnos;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* pull_varnos_of_level
|
|
|
|
|
* Create a set of all the distinct varnos present in a parsetree.
|
|
|
|
|
* Only Vars of the specified level are considered.
|
|
|
|
|
*/
|
|
|
|
|
Relids
|
|
|
|
|
pull_varnos_of_level(Node *node, int levelsup)
|
|
|
|
|
{
|
|
|
|
|
pull_varnos_context context;
|
|
|
|
|
|
|
|
|
|
context.varnos = NULL;
|
|
|
|
|
context.sublevels_up = levelsup;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Must be prepared to start with a Query or a bare expression tree; if
|
|
|
|
|
* it's a Query, we don't want to increment sublevels_up.
|
|
|
|
|
*/
|
|
|
|
|
query_or_expression_tree_walker(node,
|
|
|
|
|
pull_varnos_walker,
|
|
|
|
|
(void *) &context,
|
|
|
|
|
0);
|
|
|
|
|
|
|
|
|
|
return context.varnos;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool
|
|
|
|
|
pull_varnos_walker(Node *node, pull_varnos_context *context)
|
|
|
|
|
{
|
|
|
|
|
if (node == NULL)
|
|
|
|
|
return false;
|
|
|
|
|
if (IsA(node, Var))
|
|
|
|
|
{
|
|
|
|
|
Var *var = (Var *) node;
|
|
|
|
|
|
|
|
|
|
if (var->varlevelsup == context->sublevels_up)
|
|
|
|
|
context->varnos = bms_add_member(context->varnos, var->varno);
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
if (IsA(node, CurrentOfExpr))
|
|
|
|
|
{
|
|
|
|
|
CurrentOfExpr *cexpr = (CurrentOfExpr *) node;
|
|
|
|
|
|
|
|
|
|
if (context->sublevels_up == 0)
|
|
|
|
|
context->varnos = bms_add_member(context->varnos, cexpr->cvarno);
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
if (IsA(node, PlaceHolderVar))
|
|
|
|
|
{
|
|
|
|
|
/*
|
|
|
|
|
* A PlaceHolderVar acts as a variable of its syntactic scope, or
|
|
|
|
|
* lower than that if it references only a subset of the rels in its
|
|
|
|
|
* syntactic scope. It might also contain lateral references, but we
|
|
|
|
|
* should ignore such references when computing the set of varnos in
|
|
|
|
|
* an expression tree. Also, if the PHV contains no variables within
|
|
|
|
|
* its syntactic scope, it will be forced to be evaluated exactly at
|
|
|
|
|
* the syntactic scope, so take that as the relid set.
|
|
|
|
|
*/
|
|
|
|
|
PlaceHolderVar *phv = (PlaceHolderVar *) node;
|
|
|
|
|
pull_varnos_context subcontext;
|
|
|
|
|
|
|
|
|
|
subcontext.varnos = NULL;
|
|
|
|
|
subcontext.sublevels_up = context->sublevels_up;
|
|
|
|
|
(void) pull_varnos_walker((Node *) phv->phexpr, &subcontext);
|
|
|
|
|
if (phv->phlevelsup == context->sublevels_up)
|
|
|
|
|
{
|
|
|
|
|
subcontext.varnos = bms_int_members(subcontext.varnos,
|
|
|
|
|
phv->phrels);
|
|
|
|
|
if (bms_is_empty(subcontext.varnos))
|
|
|
|
|
context->varnos = bms_add_members(context->varnos,
|
|
|
|
|
phv->phrels);
|
|
|
|
|
}
|
|
|
|
|
context->varnos = bms_join(context->varnos, subcontext.varnos);
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
if (IsA(node, Query))
|
|
|
|
|
{
|
|
|
|
|
/* Recurse into RTE subquery or not-yet-planned sublink subquery */
|
|
|
|
|
bool result;
|
|
|
|
|
|
|
|
|
|
context->sublevels_up++;
|
|
|
|
|
result = query_tree_walker((Query *) node, pull_varnos_walker,
|
|
|
|
|
(void *) context, 0);
|
|
|
|
|
context->sublevels_up--;
|
|
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
return expression_tree_walker(node, pull_varnos_walker,
|
|
|
|
|
(void *) context);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* pull_varattnos
|
|
|
|
|
* Find all the distinct attribute numbers present in an expression tree,
|
|
|
|
|
* and add them to the initial contents of *varattnos.
|
|
|
|
|
* Only Vars of the given varno and rtable level zero are considered.
|
|
|
|
|
*
|
|
|
|
|
* Attribute numbers are offset by FirstLowInvalidHeapAttributeNumber so that
|
|
|
|
|
* we can include system attributes (e.g., OID) in the bitmap representation.
|
|
|
|
|
*
|
|
|
|
|
* Currently, this does not support unplanned subqueries; that is not needed
|
|
|
|
|
* for current uses. It will handle already-planned SubPlan nodes, though,
|
|
|
|
|
* looking into only the "testexpr" and the "args" list. (The subplan cannot
|
|
|
|
|
* contain any other references to Vars of the current level.)
|
|
|
|
|
*/
|
|
|
|
|
void
|
|
|
|
|
pull_varattnos(Node *node, Index varno, Bitmapset **varattnos)
|
|
|
|
|
{
|
|
|
|
|
pull_varattnos_context context;
|
|
|
|
|
|
|
|
|
|
context.varattnos = *varattnos;
|
|
|
|
|
context.varno = varno;
|
|
|
|
|
|
|
|
|
|
(void) pull_varattnos_walker(node, &context);
|
|
|
|
|
|
|
|
|
|
*varattnos = context.varattnos;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool
|
|
|
|
|
pull_varattnos_walker(Node *node, pull_varattnos_context *context)
|
|
|
|
|
{
|
|
|
|
|
if (node == NULL)
|
|
|
|
|
return false;
|
|
|
|
|
if (IsA(node, Var))
|
|
|
|
|
{
|
|
|
|
|
Var *var = (Var *) node;
|
|
|
|
|
|
|
|
|
|
if (var->varno == context->varno && var->varlevelsup == 0)
|
|
|
|
|
context->varattnos =
|
|
|
|
|
bms_add_member(context->varattnos,
|
Phase 3 of pgindent updates.
Don't move parenthesized lines to the left, even if that means they
flow past the right margin.
By default, BSD indent lines up statement continuation lines that are
within parentheses so that they start just to the right of the preceding
left parenthesis. However, traditionally, if that resulted in the
continuation line extending to the right of the desired right margin,
then indent would push it left just far enough to not overrun the margin,
if it could do so without making the continuation line start to the left of
the current statement indent. That makes for a weird mix of indentations
unless one has been completely rigid about never violating the 80-column
limit.
This behavior has been pretty universally panned by Postgres developers.
Hence, disable it with indent's new -lpl switch, so that parenthesized
lines are always lined up with the preceding left paren.
This patch is much less interesting than the first round of indent
changes, but also bulkier, so I thought it best to separate the effects.
Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org
Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
9 years ago
|
|
|
var->varattno - FirstLowInvalidHeapAttributeNumber);
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Should not find an unplanned subquery */
|
|
|
|
|
Assert(!IsA(node, Query));
|
|
|
|
|
|
|
|
|
|
return expression_tree_walker(node, pull_varattnos_walker,
|
|
|
|
|
(void *) context);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* pull_vars_of_level
|
|
|
|
|
* Create a list of all Vars (and PlaceHolderVars) referencing the
|
|
|
|
|
* specified query level in the given parsetree.
|
|
|
|
|
*
|
|
|
|
|
* Caution: the Vars are not copied, only linked into the list.
|
|
|
|
|
*/
|
|
|
|
|
List *
|
|
|
|
|
pull_vars_of_level(Node *node, int levelsup)
|
|
|
|
|
{
|
|
|
|
|
pull_vars_context context;
|
|
|
|
|
|
|
|
|
|
context.vars = NIL;
|
|
|
|
|
context.sublevels_up = levelsup;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Must be prepared to start with a Query or a bare expression tree; if
|
|
|
|
|
* it's a Query, we don't want to increment sublevels_up.
|
|
|
|
|
*/
|
|
|
|
|
query_or_expression_tree_walker(node,
|
|
|
|
|
pull_vars_walker,
|
|
|
|
|
(void *) &context,
|
|
|
|
|
0);
|
|
|
|
|
|
|
|
|
|
return context.vars;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool
|
|
|
|
|
pull_vars_walker(Node *node, pull_vars_context *context)
|
|
|
|
|
{
|
|
|
|
|
if (node == NULL)
|
|
|
|
|
return false;
|
|
|
|
|
if (IsA(node, Var))
|
|
|
|
|
{
|
|
|
|
|
Var *var = (Var *) node;
|
|
|
|
|
|
|
|
|
|
if (var->varlevelsup == context->sublevels_up)
|
|
|
|
|
context->vars = lappend(context->vars, var);
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
if (IsA(node, PlaceHolderVar))
|
|
|
|
|
{
|
|
|
|
|
PlaceHolderVar *phv = (PlaceHolderVar *) node;
|
|
|
|
|
|
|
|
|
|
if (phv->phlevelsup == context->sublevels_up)
|
|
|
|
|
context->vars = lappend(context->vars, phv);
|
|
|
|
|
/* we don't want to look into the contained expression */
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
if (IsA(node, Query))
|
|
|
|
|
{
|
|
|
|
|
/* Recurse into RTE subquery or not-yet-planned sublink subquery */
|
|
|
|
|
bool result;
|
|
|
|
|
|
|
|
|
|
context->sublevels_up++;
|
|
|
|
|
result = query_tree_walker((Query *) node, pull_vars_walker,
|
|
|
|
|
(void *) context, 0);
|
|
|
|
|
context->sublevels_up--;
|
|
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
return expression_tree_walker(node, pull_vars_walker,
|
|
|
|
|
(void *) context);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* contain_var_clause
|
|
|
|
|
* Recursively scan a clause to discover whether it contains any Var nodes
|
|
|
|
|
* (of the current query level).
|
|
|
|
|
*
|
|
|
|
|
* Returns true if any varnode found.
|
|
|
|
|
*
|
|
|
|
|
* Does not examine subqueries, therefore must only be used after reduction
|
|
|
|
|
* of sublinks to subplans!
|
|
|
|
|
*/
|
|
|
|
|
bool
|
|
|
|
|
contain_var_clause(Node *node)
|
|
|
|
|
{
|
|
|
|
|
return contain_var_clause_walker(node, NULL);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool
|
|
|
|
|
contain_var_clause_walker(Node *node, void *context)
|
|
|
|
|
{
|
|
|
|
|
if (node == NULL)
|
|
|
|
|
return false;
|
|
|
|
|
if (IsA(node, Var))
|
|
|
|
|
{
|
|
|
|
|
if (((Var *) node)->varlevelsup == 0)
|
|
|
|
|
return true; /* abort the tree traversal and return true */
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
if (IsA(node, CurrentOfExpr))
|
|
|
|
|
return true;
|
|
|
|
|
if (IsA(node, PlaceHolderVar))
|
|
|
|
|
{
|
|
|
|
|
if (((PlaceHolderVar *) node)->phlevelsup == 0)
|
|
|
|
|
return true; /* abort the tree traversal and return true */
|
|
|
|
|
/* else fall through to check the contained expr */
|
|
|
|
|
}
|
|
|
|
|
return expression_tree_walker(node, contain_var_clause_walker, context);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* contain_vars_of_level
|
|
|
|
|
* Recursively scan a clause to discover whether it contains any Var nodes
|
|
|
|
|
* of the specified query level.
|
|
|
|
|
*
|
|
|
|
|
* Returns true if any such Var found.
|
|
|
|
|
*
|
|
|
|
|
* Will recurse into sublinks. Also, may be invoked directly on a Query.
|
|
|
|
|
*/
|
|
|
|
|
bool
|
|
|
|
|
contain_vars_of_level(Node *node, int levelsup)
|
|
|
|
|
{
|
|
|
|
|
int sublevels_up = levelsup;
|
|
|
|
|
|
|
|
|
|
return query_or_expression_tree_walker(node,
|
|
|
|
|
contain_vars_of_level_walker,
|
|
|
|
|
(void *) &sublevels_up,
|
|
|
|
|
0);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool
|
|
|
|
|
contain_vars_of_level_walker(Node *node, int *sublevels_up)
|
|
|
|
|
{
|
|
|
|
|
if (node == NULL)
|
|
|
|
|
return false;
|
|
|
|
|
if (IsA(node, Var))
|
|
|
|
|
{
|
|
|
|
|
if (((Var *) node)->varlevelsup == *sublevels_up)
|
|
|
|
|
return true; /* abort tree traversal and return true */
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
if (IsA(node, CurrentOfExpr))
|
|
|
|
|
{
|
|
|
|
|
if (*sublevels_up == 0)
|
|
|
|
|
return true;
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
if (IsA(node, PlaceHolderVar))
|
|
|
|
|
{
|
|
|
|
|
if (((PlaceHolderVar *) node)->phlevelsup == *sublevels_up)
|
|
|
|
|
return true; /* abort the tree traversal and return true */
|
|
|
|
|
/* else fall through to check the contained expr */
|
|
|
|
|
}
|
|
|
|
|
if (IsA(node, Query))
|
|
|
|
|
{
|
|
|
|
|
/* Recurse into subselects */
|
|
|
|
|
bool result;
|
|
|
|
|
|
|
|
|
|
(*sublevels_up)++;
|
|
|
|
|
result = query_tree_walker((Query *) node,
|
|
|
|
|
contain_vars_of_level_walker,
|
|
|
|
|
(void *) sublevels_up,
|
|
|
|
|
0);
|
|
|
|
|
(*sublevels_up)--;
|
|
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
return expression_tree_walker(node,
|
|
|
|
|
contain_vars_of_level_walker,
|
|
|
|
|
(void *) sublevels_up);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* locate_var_of_level
|
|
|
|
|
* Find the parse location of any Var of the specified query level.
|
|
|
|
|
*
|
|
|
|
|
* Returns -1 if no such Var is in the querytree, or if they all have
|
|
|
|
|
* unknown parse location. (The former case is probably caller error,
|
|
|
|
|
* but we don't bother to distinguish it from the latter case.)
|
|
|
|
|
*
|
|
|
|
|
* Will recurse into sublinks. Also, may be invoked directly on a Query.
|
|
|
|
|
*
|
|
|
|
|
* Note: it might seem appropriate to merge this functionality into
|
|
|
|
|
* contain_vars_of_level, but that would complicate that function's API.
|
|
|
|
|
* Currently, the only uses of this function are for error reporting,
|
|
|
|
|
* and so shaving cycles probably isn't very important.
|
|
|
|
|
*/
|
|
|
|
|
int
|
|
|
|
|
locate_var_of_level(Node *node, int levelsup)
|
|
|
|
|
{
|
|
|
|
|
locate_var_of_level_context context;
|
|
|
|
|
|
|
|
|
|
context.var_location = -1; /* in case we find nothing */
|
|
|
|
|
context.sublevels_up = levelsup;
|
|
|
|
|
|
|
|
|
|
(void) query_or_expression_tree_walker(node,
|
|
|
|
|
locate_var_of_level_walker,
|
|
|
|
|
(void *) &context,
|
|
|
|
|
0);
|
|
|
|
|
|
|
|
|
|
return context.var_location;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool
|
|
|
|
|
locate_var_of_level_walker(Node *node,
|
|
|
|
|
locate_var_of_level_context *context)
|
|
|
|
|
{
|
|
|
|
|
if (node == NULL)
|
|
|
|
|
return false;
|
|
|
|
|
if (IsA(node, Var))
|
|
|
|
|
{
|
|
|
|
|
Var *var = (Var *) node;
|
|
|
|
|
|
|
|
|
|
if (var->varlevelsup == context->sublevels_up &&
|
|
|
|
|
var->location >= 0)
|
|
|
|
|
{
|
|
|
|
|
context->var_location = var->location;
|
|
|
|
|
return true; /* abort tree traversal and return true */
|
|
|
|
|
}
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
if (IsA(node, CurrentOfExpr))
|
|
|
|
|
{
|
|
|
|
|
/* since CurrentOfExpr doesn't carry location, nothing we can do */
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
/* No extra code needed for PlaceHolderVar; just look in contained expr */
|
|
|
|
|
if (IsA(node, Query))
|
|
|
|
|
{
|
|
|
|
|
/* Recurse into subselects */
|
|
|
|
|
bool result;
|
|
|
|
|
|
|
|
|
|
context->sublevels_up++;
|
|
|
|
|
result = query_tree_walker((Query *) node,
|
|
|
|
|
locate_var_of_level_walker,
|
|
|
|
|
(void *) context,
|
|
|
|
|
0);
|
|
|
|
|
context->sublevels_up--;
|
|
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
return expression_tree_walker(node,
|
|
|
|
|
locate_var_of_level_walker,
|
|
|
|
|
(void *) context);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* pull_var_clause
|
|
|
|
|
* Recursively pulls all Var nodes from an expression clause.
|
|
|
|
|
*
|
|
|
|
|
* Aggrefs are handled according to these bits in 'flags':
|
Avoid listing ungrouped Vars in the targetlist of Agg-underneath-Window.
Regular aggregate functions in combination with, or within the arguments
of, window functions are OK per spec; they have the semantics that the
aggregate output rows are computed and then we run the window functions
over that row set. (Thus, this combination is not really useful unless
there's a GROUP BY so that more than one aggregate output row is possible.)
The case without GROUP BY could fail, as recently reported by Jeff Davis,
because sloppy construction of the Agg node's targetlist resulted in extra
references to possibly-ungrouped Vars appearing outside the aggregate
function calls themselves. See the added regression test case for an
example.
Fixing this requires modifying the API of flatten_tlist and its underlying
function pull_var_clause. I chose to make pull_var_clause's API for
aggregates identical to what it was already doing for placeholders, since
the useful behaviors turn out to be the same (error, report node as-is, or
recurse into it). I also tightened the error checking in this area a bit:
if it was ever valid to see an uplevel Var, Aggref, or PlaceHolderVar here,
that was a long time ago, so complain instead of ignoring them.
Backpatch into 9.1. The failure exists in 8.4 and 9.0 as well, but seeing
that it only occurs in a basically-useless corner case, it doesn't seem
worth the risks of changing a function API in a minor release. There might
be third-party code using pull_var_clause.
15 years ago
|
|
|
* PVC_INCLUDE_AGGREGATES include Aggrefs in output list
|
|
|
|
|
* PVC_RECURSE_AGGREGATES recurse into Aggref arguments
|
|
|
|
|
* neither flag throw error if Aggref found
|
|
|
|
|
* Vars within an Aggref's expression are included in the result only
|
|
|
|
|
* when PVC_RECURSE_AGGREGATES is specified.
|
Avoid listing ungrouped Vars in the targetlist of Agg-underneath-Window.
Regular aggregate functions in combination with, or within the arguments
of, window functions are OK per spec; they have the semantics that the
aggregate output rows are computed and then we run the window functions
over that row set. (Thus, this combination is not really useful unless
there's a GROUP BY so that more than one aggregate output row is possible.)
The case without GROUP BY could fail, as recently reported by Jeff Davis,
because sloppy construction of the Agg node's targetlist resulted in extra
references to possibly-ungrouped Vars appearing outside the aggregate
function calls themselves. See the added regression test case for an
example.
Fixing this requires modifying the API of flatten_tlist and its underlying
function pull_var_clause. I chose to make pull_var_clause's API for
aggregates identical to what it was already doing for placeholders, since
the useful behaviors turn out to be the same (error, report node as-is, or
recurse into it). I also tightened the error checking in this area a bit:
if it was ever valid to see an uplevel Var, Aggref, or PlaceHolderVar here,
that was a long time ago, so complain instead of ignoring them.
Backpatch into 9.1. The failure exists in 8.4 and 9.0 as well, but seeing
that it only occurs in a basically-useless corner case, it doesn't seem
worth the risks of changing a function API in a minor release. There might
be third-party code using pull_var_clause.
15 years ago
|
|
|
*
|
|
|
|
|
* WindowFuncs are handled according to these bits in 'flags':
|
|
|
|
|
* PVC_INCLUDE_WINDOWFUNCS include WindowFuncs in output list
|
|
|
|
|
* PVC_RECURSE_WINDOWFUNCS recurse into WindowFunc arguments
|
|
|
|
|
* neither flag throw error if WindowFunc found
|
|
|
|
|
* Vars within a WindowFunc's expression are included in the result only
|
|
|
|
|
* when PVC_RECURSE_WINDOWFUNCS is specified.
|
|
|
|
|
*
|
|
|
|
|
* PlaceHolderVars are handled according to these bits in 'flags':
|
|
|
|
|
* PVC_INCLUDE_PLACEHOLDERS include PlaceHolderVars in output list
|
Avoid listing ungrouped Vars in the targetlist of Agg-underneath-Window.
Regular aggregate functions in combination with, or within the arguments
of, window functions are OK per spec; they have the semantics that the
aggregate output rows are computed and then we run the window functions
over that row set. (Thus, this combination is not really useful unless
there's a GROUP BY so that more than one aggregate output row is possible.)
The case without GROUP BY could fail, as recently reported by Jeff Davis,
because sloppy construction of the Agg node's targetlist resulted in extra
references to possibly-ungrouped Vars appearing outside the aggregate
function calls themselves. See the added regression test case for an
example.
Fixing this requires modifying the API of flatten_tlist and its underlying
function pull_var_clause. I chose to make pull_var_clause's API for
aggregates identical to what it was already doing for placeholders, since
the useful behaviors turn out to be the same (error, report node as-is, or
recurse into it). I also tightened the error checking in this area a bit:
if it was ever valid to see an uplevel Var, Aggref, or PlaceHolderVar here,
that was a long time ago, so complain instead of ignoring them.
Backpatch into 9.1. The failure exists in 8.4 and 9.0 as well, but seeing
that it only occurs in a basically-useless corner case, it doesn't seem
worth the risks of changing a function API in a minor release. There might
be third-party code using pull_var_clause.
15 years ago
|
|
|
* PVC_RECURSE_PLACEHOLDERS recurse into PlaceHolderVar arguments
|
|
|
|
|
* neither flag throw error if PlaceHolderVar found
|
|
|
|
|
* Vars within a PHV's expression are included in the result only
|
|
|
|
|
* when PVC_RECURSE_PLACEHOLDERS is specified.
|
|
|
|
|
*
|
|
|
|
|
* GroupingFuncs are treated mostly like Aggrefs, and so do not need
|
|
|
|
|
* their own flag bits.
|
|
|
|
|
*
|
|
|
|
|
* CurrentOfExpr nodes are ignored in all cases.
|
|
|
|
|
*
|
Avoid listing ungrouped Vars in the targetlist of Agg-underneath-Window.
Regular aggregate functions in combination with, or within the arguments
of, window functions are OK per spec; they have the semantics that the
aggregate output rows are computed and then we run the window functions
over that row set. (Thus, this combination is not really useful unless
there's a GROUP BY so that more than one aggregate output row is possible.)
The case without GROUP BY could fail, as recently reported by Jeff Davis,
because sloppy construction of the Agg node's targetlist resulted in extra
references to possibly-ungrouped Vars appearing outside the aggregate
function calls themselves. See the added regression test case for an
example.
Fixing this requires modifying the API of flatten_tlist and its underlying
function pull_var_clause. I chose to make pull_var_clause's API for
aggregates identical to what it was already doing for placeholders, since
the useful behaviors turn out to be the same (error, report node as-is, or
recurse into it). I also tightened the error checking in this area a bit:
if it was ever valid to see an uplevel Var, Aggref, or PlaceHolderVar here,
that was a long time ago, so complain instead of ignoring them.
Backpatch into 9.1. The failure exists in 8.4 and 9.0 as well, but seeing
that it only occurs in a basically-useless corner case, it doesn't seem
worth the risks of changing a function API in a minor release. There might
be third-party code using pull_var_clause.
15 years ago
|
|
|
* Upper-level vars (with varlevelsup > 0) should not be seen here,
|
|
|
|
|
* likewise for upper-level Aggrefs and PlaceHolderVars.
|
|
|
|
|
*
|
|
|
|
|
* Returns list of nodes found. Note the nodes themselves are not
|
|
|
|
|
* copied, only referenced.
|
|
|
|
|
*
|
|
|
|
|
* Does not examine subqueries, therefore must only be used after reduction
|
|
|
|
|
* of sublinks to subplans!
|
|
|
|
|
*/
|
|
|
|
|
List *
|
|
|
|
|
pull_var_clause(Node *node, int flags)
|
|
|
|
|
{
|
|
|
|
|
pull_var_clause_context context;
|
|
|
|
|
|
|
|
|
|
/* Assert that caller has not specified inconsistent flags */
|
|
|
|
|
Assert((flags & (PVC_INCLUDE_AGGREGATES | PVC_RECURSE_AGGREGATES))
|
|
|
|
|
!= (PVC_INCLUDE_AGGREGATES | PVC_RECURSE_AGGREGATES));
|
|
|
|
|
Assert((flags & (PVC_INCLUDE_WINDOWFUNCS | PVC_RECURSE_WINDOWFUNCS))
|
|
|
|
|
!= (PVC_INCLUDE_WINDOWFUNCS | PVC_RECURSE_WINDOWFUNCS));
|
|
|
|
|
Assert((flags & (PVC_INCLUDE_PLACEHOLDERS | PVC_RECURSE_PLACEHOLDERS))
|
|
|
|
|
!= (PVC_INCLUDE_PLACEHOLDERS | PVC_RECURSE_PLACEHOLDERS));
|
|
|
|
|
|
|
|
|
|
context.varlist = NIL;
|
|
|
|
|
context.flags = flags;
|
|
|
|
|
|
|
|
|
|
pull_var_clause_walker(node, &context);
|
|
|
|
|
return context.varlist;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool
|
|
|
|
|
pull_var_clause_walker(Node *node, pull_var_clause_context *context)
|
|
|
|
|
{
|
|
|
|
|
if (node == NULL)
|
|
|
|
|
return false;
|
|
|
|
|
if (IsA(node, Var))
|
|
|
|
|
{
|
Avoid listing ungrouped Vars in the targetlist of Agg-underneath-Window.
Regular aggregate functions in combination with, or within the arguments
of, window functions are OK per spec; they have the semantics that the
aggregate output rows are computed and then we run the window functions
over that row set. (Thus, this combination is not really useful unless
there's a GROUP BY so that more than one aggregate output row is possible.)
The case without GROUP BY could fail, as recently reported by Jeff Davis,
because sloppy construction of the Agg node's targetlist resulted in extra
references to possibly-ungrouped Vars appearing outside the aggregate
function calls themselves. See the added regression test case for an
example.
Fixing this requires modifying the API of flatten_tlist and its underlying
function pull_var_clause. I chose to make pull_var_clause's API for
aggregates identical to what it was already doing for placeholders, since
the useful behaviors turn out to be the same (error, report node as-is, or
recurse into it). I also tightened the error checking in this area a bit:
if it was ever valid to see an uplevel Var, Aggref, or PlaceHolderVar here,
that was a long time ago, so complain instead of ignoring them.
Backpatch into 9.1. The failure exists in 8.4 and 9.0 as well, but seeing
that it only occurs in a basically-useless corner case, it doesn't seem
worth the risks of changing a function API in a minor release. There might
be third-party code using pull_var_clause.
15 years ago
|
|
|
if (((Var *) node)->varlevelsup != 0)
|
|
|
|
|
elog(ERROR, "Upper-level Var found where not expected");
|
|
|
|
|
context->varlist = lappend(context->varlist, node);
|
|
|
|
|
return false;
|
|
|
|
|
}
|
Avoid listing ungrouped Vars in the targetlist of Agg-underneath-Window.
Regular aggregate functions in combination with, or within the arguments
of, window functions are OK per spec; they have the semantics that the
aggregate output rows are computed and then we run the window functions
over that row set. (Thus, this combination is not really useful unless
there's a GROUP BY so that more than one aggregate output row is possible.)
The case without GROUP BY could fail, as recently reported by Jeff Davis,
because sloppy construction of the Agg node's targetlist resulted in extra
references to possibly-ungrouped Vars appearing outside the aggregate
function calls themselves. See the added regression test case for an
example.
Fixing this requires modifying the API of flatten_tlist and its underlying
function pull_var_clause. I chose to make pull_var_clause's API for
aggregates identical to what it was already doing for placeholders, since
the useful behaviors turn out to be the same (error, report node as-is, or
recurse into it). I also tightened the error checking in this area a bit:
if it was ever valid to see an uplevel Var, Aggref, or PlaceHolderVar here,
that was a long time ago, so complain instead of ignoring them.
Backpatch into 9.1. The failure exists in 8.4 and 9.0 as well, but seeing
that it only occurs in a basically-useless corner case, it doesn't seem
worth the risks of changing a function API in a minor release. There might
be third-party code using pull_var_clause.
15 years ago
|
|
|
else if (IsA(node, Aggref))
|
|
|
|
|
{
|
|
|
|
|
if (((Aggref *) node)->agglevelsup != 0)
|
|
|
|
|
elog(ERROR, "Upper-level Aggref found where not expected");
|
|
|
|
|
if (context->flags & PVC_INCLUDE_AGGREGATES)
|
|
|
|
|
{
|
|
|
|
|
context->varlist = lappend(context->varlist, node);
|
|
|
|
|
/* we do NOT descend into the contained expression */
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
else if (context->flags & PVC_RECURSE_AGGREGATES)
|
Avoid listing ungrouped Vars in the targetlist of Agg-underneath-Window.
Regular aggregate functions in combination with, or within the arguments
of, window functions are OK per spec; they have the semantics that the
aggregate output rows are computed and then we run the window functions
over that row set. (Thus, this combination is not really useful unless
there's a GROUP BY so that more than one aggregate output row is possible.)
The case without GROUP BY could fail, as recently reported by Jeff Davis,
because sloppy construction of the Agg node's targetlist resulted in extra
references to possibly-ungrouped Vars appearing outside the aggregate
function calls themselves. See the added regression test case for an
example.
Fixing this requires modifying the API of flatten_tlist and its underlying
function pull_var_clause. I chose to make pull_var_clause's API for
aggregates identical to what it was already doing for placeholders, since
the useful behaviors turn out to be the same (error, report node as-is, or
recurse into it). I also tightened the error checking in this area a bit:
if it was ever valid to see an uplevel Var, Aggref, or PlaceHolderVar here,
that was a long time ago, so complain instead of ignoring them.
Backpatch into 9.1. The failure exists in 8.4 and 9.0 as well, but seeing
that it only occurs in a basically-useless corner case, it doesn't seem
worth the risks of changing a function API in a minor release. There might
be third-party code using pull_var_clause.
15 years ago
|
|
|
{
|
|
|
|
|
/* fall through to recurse into the aggregate's arguments */
|
Avoid listing ungrouped Vars in the targetlist of Agg-underneath-Window.
Regular aggregate functions in combination with, or within the arguments
of, window functions are OK per spec; they have the semantics that the
aggregate output rows are computed and then we run the window functions
over that row set. (Thus, this combination is not really useful unless
there's a GROUP BY so that more than one aggregate output row is possible.)
The case without GROUP BY could fail, as recently reported by Jeff Davis,
because sloppy construction of the Agg node's targetlist resulted in extra
references to possibly-ungrouped Vars appearing outside the aggregate
function calls themselves. See the added regression test case for an
example.
Fixing this requires modifying the API of flatten_tlist and its underlying
function pull_var_clause. I chose to make pull_var_clause's API for
aggregates identical to what it was already doing for placeholders, since
the useful behaviors turn out to be the same (error, report node as-is, or
recurse into it). I also tightened the error checking in this area a bit:
if it was ever valid to see an uplevel Var, Aggref, or PlaceHolderVar here,
that was a long time ago, so complain instead of ignoring them.
Backpatch into 9.1. The failure exists in 8.4 and 9.0 as well, but seeing
that it only occurs in a basically-useless corner case, it doesn't seem
worth the risks of changing a function API in a minor release. There might
be third-party code using pull_var_clause.
15 years ago
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
elog(ERROR, "Aggref found where not expected");
|
Avoid listing ungrouped Vars in the targetlist of Agg-underneath-Window.
Regular aggregate functions in combination with, or within the arguments
of, window functions are OK per spec; they have the semantics that the
aggregate output rows are computed and then we run the window functions
over that row set. (Thus, this combination is not really useful unless
there's a GROUP BY so that more than one aggregate output row is possible.)
The case without GROUP BY could fail, as recently reported by Jeff Davis,
because sloppy construction of the Agg node's targetlist resulted in extra
references to possibly-ungrouped Vars appearing outside the aggregate
function calls themselves. See the added regression test case for an
example.
Fixing this requires modifying the API of flatten_tlist and its underlying
function pull_var_clause. I chose to make pull_var_clause's API for
aggregates identical to what it was already doing for placeholders, since
the useful behaviors turn out to be the same (error, report node as-is, or
recurse into it). I also tightened the error checking in this area a bit:
if it was ever valid to see an uplevel Var, Aggref, or PlaceHolderVar here,
that was a long time ago, so complain instead of ignoring them.
Backpatch into 9.1. The failure exists in 8.4 and 9.0 as well, but seeing
that it only occurs in a basically-useless corner case, it doesn't seem
worth the risks of changing a function API in a minor release. There might
be third-party code using pull_var_clause.
15 years ago
|
|
|
}
|
Support GROUPING SETS, CUBE and ROLLUP.
This SQL standard functionality allows to aggregate data by different
GROUP BY clauses at once. Each grouping set returns rows with columns
grouped by in other sets set to NULL.
This could previously be achieved by doing each grouping as a separate
query, conjoined by UNION ALLs. Besides being considerably more concise,
grouping sets will in many cases be faster, requiring only one scan over
the underlying data.
The current implementation of grouping sets only supports using sorting
for input. Individual sets that share a sort order are computed in one
pass. If there are sets that don't share a sort order, additional sort &
aggregation steps are performed. These additional passes are sourced by
the previous sort step; thus avoiding repeated scans of the source data.
The code is structured in a way that adding support for purely using
hash aggregation or a mix of hashing and sorting is possible. Sorting
was chosen to be supported first, as it is the most generic method of
implementation.
Instead of, as in an earlier versions of the patch, representing the
chain of sort and aggregation steps as full blown planner and executor
nodes, all but the first sort are performed inside the aggregation node
itself. This avoids the need to do some unusual gymnastics to handle
having to return aggregated and non-aggregated tuples from underlying
nodes, as well as having to shut down underlying nodes early to limit
memory usage. The optimizer still builds Sort/Agg node to describe each
phase, but they're not part of the plan tree, but instead additional
data for the aggregation node. They're a convenient and preexisting way
to describe aggregation and sorting. The first (and possibly only) sort
step is still performed as a separate execution step. That retains
similarity with existing group by plans, makes rescans fairly simple,
avoids very deep plans (leading to slow explains) and easily allows to
avoid the sorting step if the underlying data is sorted by other means.
A somewhat ugly side of this patch is having to deal with a grammar
ambiguity between the new CUBE keyword and the cube extension/functions
named cube (and rollup). To avoid breaking existing deployments of the
cube extension it has not been renamed, neither has cube been made a
reserved keyword. Instead precedence hacking is used to make GROUP BY
cube(..) refer to the CUBE grouping sets feature, and not the function
cube(). To actually group by a function cube(), unlikely as that might
be, the function name has to be quoted.
Needs a catversion bump because stored rules may change.
Author: Andrew Gierth and Atri Sharma, with contributions from Andres Freund
Reviewed-By: Andres Freund, Noah Misch, Tom Lane, Svenne Krap, Tomas
Vondra, Erik Rijkers, Marti Raudsepp, Pavel Stehule
Discussion: CAOeZVidmVRe2jU6aMk_5qkxnB7dfmPROzM7Ur8JPW5j8Y5X-Lw@mail.gmail.com
11 years ago
|
|
|
else if (IsA(node, GroupingFunc))
|
|
|
|
|
{
|
|
|
|
|
if (((GroupingFunc *) node)->agglevelsup != 0)
|
|
|
|
|
elog(ERROR, "Upper-level GROUPING found where not expected");
|
|
|
|
|
if (context->flags & PVC_INCLUDE_AGGREGATES)
|
Support GROUPING SETS, CUBE and ROLLUP.
This SQL standard functionality allows to aggregate data by different
GROUP BY clauses at once. Each grouping set returns rows with columns
grouped by in other sets set to NULL.
This could previously be achieved by doing each grouping as a separate
query, conjoined by UNION ALLs. Besides being considerably more concise,
grouping sets will in many cases be faster, requiring only one scan over
the underlying data.
The current implementation of grouping sets only supports using sorting
for input. Individual sets that share a sort order are computed in one
pass. If there are sets that don't share a sort order, additional sort &
aggregation steps are performed. These additional passes are sourced by
the previous sort step; thus avoiding repeated scans of the source data.
The code is structured in a way that adding support for purely using
hash aggregation or a mix of hashing and sorting is possible. Sorting
was chosen to be supported first, as it is the most generic method of
implementation.
Instead of, as in an earlier versions of the patch, representing the
chain of sort and aggregation steps as full blown planner and executor
nodes, all but the first sort are performed inside the aggregation node
itself. This avoids the need to do some unusual gymnastics to handle
having to return aggregated and non-aggregated tuples from underlying
nodes, as well as having to shut down underlying nodes early to limit
memory usage. The optimizer still builds Sort/Agg node to describe each
phase, but they're not part of the plan tree, but instead additional
data for the aggregation node. They're a convenient and preexisting way
to describe aggregation and sorting. The first (and possibly only) sort
step is still performed as a separate execution step. That retains
similarity with existing group by plans, makes rescans fairly simple,
avoids very deep plans (leading to slow explains) and easily allows to
avoid the sorting step if the underlying data is sorted by other means.
A somewhat ugly side of this patch is having to deal with a grammar
ambiguity between the new CUBE keyword and the cube extension/functions
named cube (and rollup). To avoid breaking existing deployments of the
cube extension it has not been renamed, neither has cube been made a
reserved keyword. Instead precedence hacking is used to make GROUP BY
cube(..) refer to the CUBE grouping sets feature, and not the function
cube(). To actually group by a function cube(), unlikely as that might
be, the function name has to be quoted.
Needs a catversion bump because stored rules may change.
Author: Andrew Gierth and Atri Sharma, with contributions from Andres Freund
Reviewed-By: Andres Freund, Noah Misch, Tom Lane, Svenne Krap, Tomas
Vondra, Erik Rijkers, Marti Raudsepp, Pavel Stehule
Discussion: CAOeZVidmVRe2jU6aMk_5qkxnB7dfmPROzM7Ur8JPW5j8Y5X-Lw@mail.gmail.com
11 years ago
|
|
|
{
|
|
|
|
|
context->varlist = lappend(context->varlist, node);
|
|
|
|
|
/* we do NOT descend into the contained expression */
|
|
|
|
|
return false;
|
Support GROUPING SETS, CUBE and ROLLUP.
This SQL standard functionality allows to aggregate data by different
GROUP BY clauses at once. Each grouping set returns rows with columns
grouped by in other sets set to NULL.
This could previously be achieved by doing each grouping as a separate
query, conjoined by UNION ALLs. Besides being considerably more concise,
grouping sets will in many cases be faster, requiring only one scan over
the underlying data.
The current implementation of grouping sets only supports using sorting
for input. Individual sets that share a sort order are computed in one
pass. If there are sets that don't share a sort order, additional sort &
aggregation steps are performed. These additional passes are sourced by
the previous sort step; thus avoiding repeated scans of the source data.
The code is structured in a way that adding support for purely using
hash aggregation or a mix of hashing and sorting is possible. Sorting
was chosen to be supported first, as it is the most generic method of
implementation.
Instead of, as in an earlier versions of the patch, representing the
chain of sort and aggregation steps as full blown planner and executor
nodes, all but the first sort are performed inside the aggregation node
itself. This avoids the need to do some unusual gymnastics to handle
having to return aggregated and non-aggregated tuples from underlying
nodes, as well as having to shut down underlying nodes early to limit
memory usage. The optimizer still builds Sort/Agg node to describe each
phase, but they're not part of the plan tree, but instead additional
data for the aggregation node. They're a convenient and preexisting way
to describe aggregation and sorting. The first (and possibly only) sort
step is still performed as a separate execution step. That retains
similarity with existing group by plans, makes rescans fairly simple,
avoids very deep plans (leading to slow explains) and easily allows to
avoid the sorting step if the underlying data is sorted by other means.
A somewhat ugly side of this patch is having to deal with a grammar
ambiguity between the new CUBE keyword and the cube extension/functions
named cube (and rollup). To avoid breaking existing deployments of the
cube extension it has not been renamed, neither has cube been made a
reserved keyword. Instead precedence hacking is used to make GROUP BY
cube(..) refer to the CUBE grouping sets feature, and not the function
cube(). To actually group by a function cube(), unlikely as that might
be, the function name has to be quoted.
Needs a catversion bump because stored rules may change.
Author: Andrew Gierth and Atri Sharma, with contributions from Andres Freund
Reviewed-By: Andres Freund, Noah Misch, Tom Lane, Svenne Krap, Tomas
Vondra, Erik Rijkers, Marti Raudsepp, Pavel Stehule
Discussion: CAOeZVidmVRe2jU6aMk_5qkxnB7dfmPROzM7Ur8JPW5j8Y5X-Lw@mail.gmail.com
11 years ago
|
|
|
}
|
|
|
|
|
else if (context->flags & PVC_RECURSE_AGGREGATES)
|
|
|
|
|
{
|
|
|
|
|
/*
|
|
|
|
|
* We do NOT descend into the contained expression, even if the
|
|
|
|
|
* caller asked for it, because we never actually evaluate it -
|
|
|
|
|
* the result is driven entirely off the associated GROUP BY
|
|
|
|
|
* clause, so we never need to extract the actual Vars here.
|
|
|
|
|
*/
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
elog(ERROR, "GROUPING found where not expected");
|
Support GROUPING SETS, CUBE and ROLLUP.
This SQL standard functionality allows to aggregate data by different
GROUP BY clauses at once. Each grouping set returns rows with columns
grouped by in other sets set to NULL.
This could previously be achieved by doing each grouping as a separate
query, conjoined by UNION ALLs. Besides being considerably more concise,
grouping sets will in many cases be faster, requiring only one scan over
the underlying data.
The current implementation of grouping sets only supports using sorting
for input. Individual sets that share a sort order are computed in one
pass. If there are sets that don't share a sort order, additional sort &
aggregation steps are performed. These additional passes are sourced by
the previous sort step; thus avoiding repeated scans of the source data.
The code is structured in a way that adding support for purely using
hash aggregation or a mix of hashing and sorting is possible. Sorting
was chosen to be supported first, as it is the most generic method of
implementation.
Instead of, as in an earlier versions of the patch, representing the
chain of sort and aggregation steps as full blown planner and executor
nodes, all but the first sort are performed inside the aggregation node
itself. This avoids the need to do some unusual gymnastics to handle
having to return aggregated and non-aggregated tuples from underlying
nodes, as well as having to shut down underlying nodes early to limit
memory usage. The optimizer still builds Sort/Agg node to describe each
phase, but they're not part of the plan tree, but instead additional
data for the aggregation node. They're a convenient and preexisting way
to describe aggregation and sorting. The first (and possibly only) sort
step is still performed as a separate execution step. That retains
similarity with existing group by plans, makes rescans fairly simple,
avoids very deep plans (leading to slow explains) and easily allows to
avoid the sorting step if the underlying data is sorted by other means.
A somewhat ugly side of this patch is having to deal with a grammar
ambiguity between the new CUBE keyword and the cube extension/functions
named cube (and rollup). To avoid breaking existing deployments of the
cube extension it has not been renamed, neither has cube been made a
reserved keyword. Instead precedence hacking is used to make GROUP BY
cube(..) refer to the CUBE grouping sets feature, and not the function
cube(). To actually group by a function cube(), unlikely as that might
be, the function name has to be quoted.
Needs a catversion bump because stored rules may change.
Author: Andrew Gierth and Atri Sharma, with contributions from Andres Freund
Reviewed-By: Andres Freund, Noah Misch, Tom Lane, Svenne Krap, Tomas
Vondra, Erik Rijkers, Marti Raudsepp, Pavel Stehule
Discussion: CAOeZVidmVRe2jU6aMk_5qkxnB7dfmPROzM7Ur8JPW5j8Y5X-Lw@mail.gmail.com
11 years ago
|
|
|
}
|
|
|
|
|
else if (IsA(node, WindowFunc))
|
|
|
|
|
{
|
|
|
|
|
/* WindowFuncs have no levelsup field to check ... */
|
|
|
|
|
if (context->flags & PVC_INCLUDE_WINDOWFUNCS)
|
|
|
|
|
{
|
|
|
|
|
context->varlist = lappend(context->varlist, node);
|
|
|
|
|
/* we do NOT descend into the contained expressions */
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
else if (context->flags & PVC_RECURSE_WINDOWFUNCS)
|
|
|
|
|
{
|
|
|
|
|
/* fall through to recurse into the windowfunc's arguments */
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
elog(ERROR, "WindowFunc found where not expected");
|
|
|
|
|
}
|
Avoid listing ungrouped Vars in the targetlist of Agg-underneath-Window.
Regular aggregate functions in combination with, or within the arguments
of, window functions are OK per spec; they have the semantics that the
aggregate output rows are computed and then we run the window functions
over that row set. (Thus, this combination is not really useful unless
there's a GROUP BY so that more than one aggregate output row is possible.)
The case without GROUP BY could fail, as recently reported by Jeff Davis,
because sloppy construction of the Agg node's targetlist resulted in extra
references to possibly-ungrouped Vars appearing outside the aggregate
function calls themselves. See the added regression test case for an
example.
Fixing this requires modifying the API of flatten_tlist and its underlying
function pull_var_clause. I chose to make pull_var_clause's API for
aggregates identical to what it was already doing for placeholders, since
the useful behaviors turn out to be the same (error, report node as-is, or
recurse into it). I also tightened the error checking in this area a bit:
if it was ever valid to see an uplevel Var, Aggref, or PlaceHolderVar here,
that was a long time ago, so complain instead of ignoring them.
Backpatch into 9.1. The failure exists in 8.4 and 9.0 as well, but seeing
that it only occurs in a basically-useless corner case, it doesn't seem
worth the risks of changing a function API in a minor release. There might
be third-party code using pull_var_clause.
15 years ago
|
|
|
else if (IsA(node, PlaceHolderVar))
|
|
|
|
|
{
|
Avoid listing ungrouped Vars in the targetlist of Agg-underneath-Window.
Regular aggregate functions in combination with, or within the arguments
of, window functions are OK per spec; they have the semantics that the
aggregate output rows are computed and then we run the window functions
over that row set. (Thus, this combination is not really useful unless
there's a GROUP BY so that more than one aggregate output row is possible.)
The case without GROUP BY could fail, as recently reported by Jeff Davis,
because sloppy construction of the Agg node's targetlist resulted in extra
references to possibly-ungrouped Vars appearing outside the aggregate
function calls themselves. See the added regression test case for an
example.
Fixing this requires modifying the API of flatten_tlist and its underlying
function pull_var_clause. I chose to make pull_var_clause's API for
aggregates identical to what it was already doing for placeholders, since
the useful behaviors turn out to be the same (error, report node as-is, or
recurse into it). I also tightened the error checking in this area a bit:
if it was ever valid to see an uplevel Var, Aggref, or PlaceHolderVar here,
that was a long time ago, so complain instead of ignoring them.
Backpatch into 9.1. The failure exists in 8.4 and 9.0 as well, but seeing
that it only occurs in a basically-useless corner case, it doesn't seem
worth the risks of changing a function API in a minor release. There might
be third-party code using pull_var_clause.
15 years ago
|
|
|
if (((PlaceHolderVar *) node)->phlevelsup != 0)
|
|
|
|
|
elog(ERROR, "Upper-level PlaceHolderVar found where not expected");
|
|
|
|
|
if (context->flags & PVC_INCLUDE_PLACEHOLDERS)
|
|
|
|
|
{
|
|
|
|
|
context->varlist = lappend(context->varlist, node);
|
|
|
|
|
/* we do NOT descend into the contained expression */
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
else if (context->flags & PVC_RECURSE_PLACEHOLDERS)
|
|
|
|
|
{
|
|
|
|
|
/* fall through to recurse into the placeholder's expression */
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
elog(ERROR, "PlaceHolderVar found where not expected");
|
|
|
|
|
}
|
|
|
|
|
return expression_tree_walker(node, pull_var_clause_walker,
|
|
|
|
|
(void *) context);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* flatten_join_alias_vars
|
|
|
|
|
* Replace Vars that reference JOIN outputs with references to the original
|
|
|
|
|
* relation variables instead. This allows quals involving such vars to be
|
|
|
|
|
* pushed down. Whole-row Vars that reference JOIN relations are expanded
|
|
|
|
|
* into RowExpr constructs that name the individual output Vars. This
|
|
|
|
|
* is necessary since we will not scan the JOIN as a base relation, which
|
|
|
|
|
* is the only way that the executor can directly handle whole-row Vars.
|
|
|
|
|
*
|
|
|
|
|
* This also adjusts relid sets found in some expression node types to
|
|
|
|
|
* substitute the contained base rels for any join relid.
|
|
|
|
|
*
|
|
|
|
|
* If a JOIN contains sub-selects that have been flattened, its join alias
|
|
|
|
|
* entries might now be arbitrary expressions, not just Vars. This affects
|
|
|
|
|
* this function in one important way: we might find ourselves inserting
|
|
|
|
|
* SubLink expressions into subqueries, and we must make sure that their
|
|
|
|
|
* Query.hasSubLinks fields get set to true if so. If there are any
|
|
|
|
|
* SubLinks in the join alias lists, the outer Query should already have
|
|
|
|
|
* hasSubLinks = true, so this is only relevant to un-flattened subqueries.
|
|
|
|
|
*
|
|
|
|
|
* NOTE: this is used on not-yet-planned expressions. We do not expect it
|
|
|
|
|
* to be applied directly to the whole Query, so if we see a Query to start
|
|
|
|
|
* with, we do want to increment sublevels_up (this occurs for LATERAL
|
|
|
|
|
* subqueries).
|
|
|
|
|
*/
|
|
|
|
|
Node *
|
|
|
|
|
flatten_join_alias_vars(Query *query, Node *node)
|
|
|
|
|
{
|
|
|
|
|
flatten_join_alias_vars_context context;
|
|
|
|
|
|
|
|
|
|
context.query = query;
|
|
|
|
|
context.sublevels_up = 0;
|
|
|
|
|
/* flag whether join aliases could possibly contain SubLinks */
|
|
|
|
|
context.possible_sublink = query->hasSubLinks;
|
|
|
|
|
/* if hasSubLinks is already true, no need to work hard */
|
|
|
|
|
context.inserted_sublink = query->hasSubLinks;
|
|
|
|
|
|
|
|
|
|
return flatten_join_alias_vars_mutator(node, &context);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static Node *
|
|
|
|
|
flatten_join_alias_vars_mutator(Node *node,
|
|
|
|
|
flatten_join_alias_vars_context *context)
|
|
|
|
|
{
|
|
|
|
|
if (node == NULL)
|
|
|
|
|
return NULL;
|
|
|
|
|
if (IsA(node, Var))
|
|
|
|
|
{
|
|
|
|
|
Var *var = (Var *) node;
|
|
|
|
|
RangeTblEntry *rte;
|
|
|
|
|
Node *newvar;
|
|
|
|
|
|
|
|
|
|
/* No change unless Var belongs to a JOIN of the target level */
|
|
|
|
|
if (var->varlevelsup != context->sublevels_up)
|
|
|
|
|
return node; /* no need to copy, really */
|
|
|
|
|
rte = rt_fetch(var->varno, context->query->rtable);
|
|
|
|
|
if (rte->rtekind != RTE_JOIN)
|
|
|
|
|
return node;
|
|
|
|
|
if (var->varattno == InvalidAttrNumber)
|
|
|
|
|
{
|
|
|
|
|
/* Must expand whole-row reference */
|
|
|
|
|
RowExpr *rowexpr;
|
|
|
|
|
List *fields = NIL;
|
|
|
|
|
List *colnames = NIL;
|
|
|
|
|
AttrNumber attnum;
|
|
|
|
|
ListCell *lv;
|
|
|
|
|
ListCell *ln;
|
|
|
|
|
|
|
|
|
|
attnum = 0;
|
|
|
|
|
Assert(list_length(rte->joinaliasvars) == list_length(rte->eref->colnames));
|
|
|
|
|
forboth(lv, rte->joinaliasvars, ln, rte->eref->colnames)
|
|
|
|
|
{
|
|
|
|
|
newvar = (Node *) lfirst(lv);
|
|
|
|
|
attnum++;
|
|
|
|
|
/* Ignore dropped columns */
|
Change post-rewriter representation of dropped columns in joinaliasvars.
It's possible to drop a column from an input table of a JOIN clause in a
view, if that column is nowhere actually referenced in the view. But it
will still be there in the JOIN clause's joinaliasvars list. We used to
replace such entries with NULL Const nodes, which is handy for generation
of RowExpr expansion of a whole-row reference to the view. The trouble
with that is that it can't be distinguished from the situation after
subquery pull-up of a constant subquery output expression below the JOIN.
Instead, replace such joinaliasvars with null pointers (empty expression
trees), which can't be confused with pulled-up expressions. expandRTE()
still emits the old convention, though, for convenience of RowExpr
generation and to reduce the risk of breaking extension code.
In HEAD and 9.3, this patch also fixes a problem with some new code in
ruleutils.c that was failing to cope with implicitly-casted joinaliasvars
entries, as per recent report from Feike Steenbergen. That oversight was
because of an inadequate description of the data structure in parsenodes.h,
which I've now corrected. There were some pre-existing oversights of the
same ilk elsewhere, which I believe are now all fixed.
13 years ago
|
|
|
if (newvar == NULL)
|
|
|
|
|
continue;
|
|
|
|
|
newvar = copyObject(newvar);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* If we are expanding an alias carried down from an upper
|
|
|
|
|
* query, must adjust its varlevelsup fields.
|
|
|
|
|
*/
|
|
|
|
|
if (context->sublevels_up != 0)
|
|
|
|
|
IncrementVarSublevelsUp(newvar, context->sublevels_up, 0);
|
|
|
|
|
/* Preserve original Var's location, if possible */
|
|
|
|
|
if (IsA(newvar, Var))
|
|
|
|
|
((Var *) newvar)->location = var->location;
|
|
|
|
|
/* Recurse in case join input is itself a join */
|
|
|
|
|
/* (also takes care of setting inserted_sublink if needed) */
|
|
|
|
|
newvar = flatten_join_alias_vars_mutator(newvar, context);
|
|
|
|
|
fields = lappend(fields, newvar);
|
|
|
|
|
/* We need the names of non-dropped columns, too */
|
|
|
|
|
colnames = lappend(colnames, copyObject((Node *) lfirst(ln)));
|
|
|
|
|
}
|
|
|
|
|
rowexpr = makeNode(RowExpr);
|
|
|
|
|
rowexpr->args = fields;
|
|
|
|
|
rowexpr->row_typeid = var->vartype;
|
|
|
|
|
rowexpr->row_format = COERCE_IMPLICIT_CAST;
|
|
|
|
|
rowexpr->colnames = colnames;
|
|
|
|
|
rowexpr->location = var->location;
|
|
|
|
|
|
|
|
|
|
return (Node *) rowexpr;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Expand join alias reference */
|
|
|
|
|
Assert(var->varattno > 0);
|
|
|
|
|
newvar = (Node *) list_nth(rte->joinaliasvars, var->varattno - 1);
|
Change post-rewriter representation of dropped columns in joinaliasvars.
It's possible to drop a column from an input table of a JOIN clause in a
view, if that column is nowhere actually referenced in the view. But it
will still be there in the JOIN clause's joinaliasvars list. We used to
replace such entries with NULL Const nodes, which is handy for generation
of RowExpr expansion of a whole-row reference to the view. The trouble
with that is that it can't be distinguished from the situation after
subquery pull-up of a constant subquery output expression below the JOIN.
Instead, replace such joinaliasvars with null pointers (empty expression
trees), which can't be confused with pulled-up expressions. expandRTE()
still emits the old convention, though, for convenience of RowExpr
generation and to reduce the risk of breaking extension code.
In HEAD and 9.3, this patch also fixes a problem with some new code in
ruleutils.c that was failing to cope with implicitly-casted joinaliasvars
entries, as per recent report from Feike Steenbergen. That oversight was
because of an inadequate description of the data structure in parsenodes.h,
which I've now corrected. There were some pre-existing oversights of the
same ilk elsewhere, which I believe are now all fixed.
13 years ago
|
|
|
Assert(newvar != NULL);
|
|
|
|
|
newvar = copyObject(newvar);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* If we are expanding an alias carried down from an upper query, must
|
|
|
|
|
* adjust its varlevelsup fields.
|
|
|
|
|
*/
|
|
|
|
|
if (context->sublevels_up != 0)
|
|
|
|
|
IncrementVarSublevelsUp(newvar, context->sublevels_up, 0);
|
|
|
|
|
|
|
|
|
|
/* Preserve original Var's location, if possible */
|
|
|
|
|
if (IsA(newvar, Var))
|
|
|
|
|
((Var *) newvar)->location = var->location;
|
|
|
|
|
|
|
|
|
|
/* Recurse in case join input is itself a join */
|
|
|
|
|
newvar = flatten_join_alias_vars_mutator(newvar, context);
|
|
|
|
|
|
|
|
|
|
/* Detect if we are adding a sublink to query */
|
|
|
|
|
if (context->possible_sublink && !context->inserted_sublink)
|
|
|
|
|
context->inserted_sublink = checkExprHasSubLink(newvar);
|
|
|
|
|
|
|
|
|
|
return newvar;
|
|
|
|
|
}
|
|
|
|
|
if (IsA(node, PlaceHolderVar))
|
|
|
|
|
{
|
|
|
|
|
/* Copy the PlaceHolderVar node with correct mutation of subnodes */
|
|
|
|
|
PlaceHolderVar *phv;
|
|
|
|
|
|
|
|
|
|
phv = (PlaceHolderVar *) expression_tree_mutator(node,
|
Phase 3 of pgindent updates.
Don't move parenthesized lines to the left, even if that means they
flow past the right margin.
By default, BSD indent lines up statement continuation lines that are
within parentheses so that they start just to the right of the preceding
left parenthesis. However, traditionally, if that resulted in the
continuation line extending to the right of the desired right margin,
then indent would push it left just far enough to not overrun the margin,
if it could do so without making the continuation line start to the left of
the current statement indent. That makes for a weird mix of indentations
unless one has been completely rigid about never violating the 80-column
limit.
This behavior has been pretty universally panned by Postgres developers.
Hence, disable it with indent's new -lpl switch, so that parenthesized
lines are always lined up with the preceding left paren.
This patch is much less interesting than the first round of indent
changes, but also bulkier, so I thought it best to separate the effects.
Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org
Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
9 years ago
|
|
|
flatten_join_alias_vars_mutator,
|
|
|
|
|
(void *) context);
|
|
|
|
|
/* now fix PlaceHolderVar's relid sets */
|
|
|
|
|
if (phv->phlevelsup == context->sublevels_up)
|
|
|
|
|
{
|
|
|
|
|
phv->phrels = alias_relid_set(context->query,
|
|
|
|
|
phv->phrels);
|
|
|
|
|
}
|
|
|
|
|
return (Node *) phv;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (IsA(node, Query))
|
|
|
|
|
{
|
|
|
|
|
/* Recurse into RTE subquery or not-yet-planned sublink subquery */
|
|
|
|
|
Query *newnode;
|
|
|
|
|
bool save_inserted_sublink;
|
|
|
|
|
|
|
|
|
|
context->sublevels_up++;
|
|
|
|
|
save_inserted_sublink = context->inserted_sublink;
|
|
|
|
|
context->inserted_sublink = ((Query *) node)->hasSubLinks;
|
|
|
|
|
newnode = query_tree_mutator((Query *) node,
|
|
|
|
|
flatten_join_alias_vars_mutator,
|
|
|
|
|
(void *) context,
|
|
|
|
|
QTW_IGNORE_JOINALIASES);
|
|
|
|
|
newnode->hasSubLinks |= context->inserted_sublink;
|
|
|
|
|
context->inserted_sublink = save_inserted_sublink;
|
|
|
|
|
context->sublevels_up--;
|
|
|
|
|
return (Node *) newnode;
|
|
|
|
|
}
|
|
|
|
|
/* Already-planned tree not supported */
|
|
|
|
|
Assert(!IsA(node, SubPlan));
|
|
|
|
|
/* Shouldn't need to handle these planner auxiliary nodes here */
|
|
|
|
|
Assert(!IsA(node, SpecialJoinInfo));
|
|
|
|
|
Assert(!IsA(node, PlaceHolderInfo));
|
|
|
|
|
Assert(!IsA(node, MinMaxAggInfo));
|
|
|
|
|
|
|
|
|
|
return expression_tree_mutator(node, flatten_join_alias_vars_mutator,
|
|
|
|
|
(void *) context);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* alias_relid_set: in a set of RT indexes, replace joins by their
|
|
|
|
|
* underlying base relids
|
|
|
|
|
*/
|
|
|
|
|
static Relids
|
|
|
|
|
alias_relid_set(Query *query, Relids relids)
|
|
|
|
|
{
|
|
|
|
|
Relids result = NULL;
|
|
|
|
|
int rtindex;
|
|
|
|
|
|
|
|
|
|
rtindex = -1;
|
|
|
|
|
while ((rtindex = bms_next_member(relids, rtindex)) >= 0)
|
|
|
|
|
{
|
|
|
|
|
RangeTblEntry *rte = rt_fetch(rtindex, query->rtable);
|
|
|
|
|
|
|
|
|
|
if (rte->rtekind == RTE_JOIN)
|
|
|
|
|
result = bms_join(result, get_relids_for_join(query, rtindex));
|
|
|
|
|
else
|
|
|
|
|
result = bms_add_member(result, rtindex);
|
|
|
|
|
}
|
|
|
|
|
return result;
|
|
|
|
|
}
|