|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
*
|
|
|
|
* analyzejoins.c
|
|
|
|
* Routines for simplifying joins after initial query analysis
|
|
|
|
*
|
|
|
|
* While we do a great deal of join simplification in prep/prepjointree.c,
|
|
|
|
* certain optimizations cannot be performed at that stage for lack of
|
|
|
|
* detailed information about the query. The routines here are invoked
|
|
|
|
* after initsplan.c has done its work, and can do additional join removal
|
|
|
|
* and simplification steps based on the information extracted. The penalty
|
|
|
|
* is that we have to work harder to clean up after ourselves when we modify
|
|
|
|
* the query, since the derived data structures have to be updated too.
|
|
|
|
*
|
|
|
|
* Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
|
|
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
|
|
|
*
|
|
|
|
*
|
|
|
|
* IDENTIFICATION
|
|
|
|
* src/backend/optimizer/plan/analyzejoins.c
|
|
|
|
*
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
#include "postgres.h"
|
|
|
|
|
|
|
|
#include "nodes/nodeFuncs.h"
|
|
|
|
#include "optimizer/clauses.h"
|
|
|
|
#include "optimizer/joininfo.h"
|
|
|
|
#include "optimizer/optimizer.h"
|
|
|
|
#include "optimizer/pathnode.h"
|
|
|
|
#include "optimizer/paths.h"
|
|
|
|
#include "optimizer/planmain.h"
|
|
|
|
#include "optimizer/tlist.h"
|
|
|
|
#include "utils/lsyscache.h"
|
|
|
|
|
|
|
|
/* local functions */
|
|
|
|
static bool join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo);
|
Make Vars be outer-join-aware.
Traditionally we used the same Var struct to represent the value
of a table column everywhere in parse and plan trees. This choice
predates our support for SQL outer joins, and it's really a pretty
bad idea with outer joins, because the Var's value can depend on
where it is in the tree: it might go to NULL above an outer join.
So expression nodes that are equal() per equalfuncs.c might not
represent the same value, which is a huge correctness hazard for
the planner.
To improve this, decorate Var nodes with a bitmapset showing
which outer joins (identified by RTE indexes) may have nulled
them at the point in the parse tree where the Var appears.
This allows us to trust that equal() Vars represent the same value.
A certain amount of klugery is still needed to cope with cases
where we re-order two outer joins, but it's possible to make it
work without sacrificing that core principle. PlaceHolderVars
receive similar decoration for the same reason.
In the planner, we include these outer join bitmapsets into the relids
that an expression is considered to depend on, and in consequence also
add outer-join relids to the relids of join RelOptInfos. This allows
us to correctly perceive whether an expression can be calculated above
or below a particular outer join.
This change affects FDWs that want to plan foreign joins. They *must*
follow suit when labeling foreign joins in order to match with the
core planner, but for many purposes (if postgres_fdw is any guide)
they'd prefer to consider only base relations within the join.
To support both requirements, redefine ForeignScan.fs_relids as
base+OJ relids, and add a new field fs_base_relids that's set up by
the core planner.
Large though it is, this commit just does the minimum necessary to
install the new mechanisms and get check-world passing again.
Follow-up patches will perform some cleanup. (The README additions
and comments mention some stuff that will appear in the follow-up.)
Patch by me; thanks to Richard Guo for review.
Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us
3 years ago
|
|
|
static void remove_rel_from_query(PlannerInfo *root, int relid, int ojrelid,
|
|
|
|
Relids joinrelids);
|
|
|
|
static List *remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved);
|
|
|
|
static bool rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel);
|
|
|
|
static bool rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel,
|
|
|
|
List *clause_list);
|
|
|
|
static Oid distinct_col_search(int colno, List *colnos, List *opids);
|
|
|
|
static bool is_innerrel_unique_for(PlannerInfo *root,
|
|
|
|
Relids joinrelids,
|
|
|
|
Relids outerrelids,
|
|
|
|
RelOptInfo *innerrel,
|
|
|
|
JoinType jointype,
|
|
|
|
List *restrictlist);
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* remove_useless_joins
|
|
|
|
* Check for relations that don't actually need to be joined at all,
|
|
|
|
* and remove them from the query.
|
|
|
|
*
|
|
|
|
* We are passed the current joinlist and return the updated list. Other
|
|
|
|
* data structures that have to be updated are accessible via "root".
|
|
|
|
*/
|
|
|
|
List *
|
|
|
|
remove_useless_joins(PlannerInfo *root, List *joinlist)
|
|
|
|
{
|
|
|
|
ListCell *lc;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We are only interested in relations that are left-joined to, so we can
|
|
|
|
* scan the join_info_list to find them easily.
|
|
|
|
*/
|
|
|
|
restart:
|
|
|
|
foreach(lc, root->join_info_list)
|
|
|
|
{
|
|
|
|
SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
|
Make Vars be outer-join-aware.
Traditionally we used the same Var struct to represent the value
of a table column everywhere in parse and plan trees. This choice
predates our support for SQL outer joins, and it's really a pretty
bad idea with outer joins, because the Var's value can depend on
where it is in the tree: it might go to NULL above an outer join.
So expression nodes that are equal() per equalfuncs.c might not
represent the same value, which is a huge correctness hazard for
the planner.
To improve this, decorate Var nodes with a bitmapset showing
which outer joins (identified by RTE indexes) may have nulled
them at the point in the parse tree where the Var appears.
This allows us to trust that equal() Vars represent the same value.
A certain amount of klugery is still needed to cope with cases
where we re-order two outer joins, but it's possible to make it
work without sacrificing that core principle. PlaceHolderVars
receive similar decoration for the same reason.
In the planner, we include these outer join bitmapsets into the relids
that an expression is considered to depend on, and in consequence also
add outer-join relids to the relids of join RelOptInfos. This allows
us to correctly perceive whether an expression can be calculated above
or below a particular outer join.
This change affects FDWs that want to plan foreign joins. They *must*
follow suit when labeling foreign joins in order to match with the
core planner, but for many purposes (if postgres_fdw is any guide)
they'd prefer to consider only base relations within the join.
To support both requirements, redefine ForeignScan.fs_relids as
base+OJ relids, and add a new field fs_base_relids that's set up by
the core planner.
Large though it is, this commit just does the minimum necessary to
install the new mechanisms and get check-world passing again.
Follow-up patches will perform some cleanup. (The README additions
and comments mention some stuff that will appear in the follow-up.)
Patch by me; thanks to Richard Guo for review.
Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us
3 years ago
|
|
|
Relids joinrelids;
|
|
|
|
int innerrelid;
|
|
|
|
int nremoved;
|
|
|
|
|
|
|
|
/* Skip if not removable */
|
|
|
|
if (!join_is_removable(root, sjinfo))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Currently, join_is_removable can only succeed when the sjinfo's
|
|
|
|
* righthand is a single baserel. Remove that rel from the query and
|
|
|
|
* joinlist.
|
|
|
|
*/
|
|
|
|
innerrelid = bms_singleton_member(sjinfo->min_righthand);
|
|
|
|
|
Make Vars be outer-join-aware.
Traditionally we used the same Var struct to represent the value
of a table column everywhere in parse and plan trees. This choice
predates our support for SQL outer joins, and it's really a pretty
bad idea with outer joins, because the Var's value can depend on
where it is in the tree: it might go to NULL above an outer join.
So expression nodes that are equal() per equalfuncs.c might not
represent the same value, which is a huge correctness hazard for
the planner.
To improve this, decorate Var nodes with a bitmapset showing
which outer joins (identified by RTE indexes) may have nulled
them at the point in the parse tree where the Var appears.
This allows us to trust that equal() Vars represent the same value.
A certain amount of klugery is still needed to cope with cases
where we re-order two outer joins, but it's possible to make it
work without sacrificing that core principle. PlaceHolderVars
receive similar decoration for the same reason.
In the planner, we include these outer join bitmapsets into the relids
that an expression is considered to depend on, and in consequence also
add outer-join relids to the relids of join RelOptInfos. This allows
us to correctly perceive whether an expression can be calculated above
or below a particular outer join.
This change affects FDWs that want to plan foreign joins. They *must*
follow suit when labeling foreign joins in order to match with the
core planner, but for many purposes (if postgres_fdw is any guide)
they'd prefer to consider only base relations within the join.
To support both requirements, redefine ForeignScan.fs_relids as
base+OJ relids, and add a new field fs_base_relids that's set up by
the core planner.
Large though it is, this commit just does the minimum necessary to
install the new mechanisms and get check-world passing again.
Follow-up patches will perform some cleanup. (The README additions
and comments mention some stuff that will appear in the follow-up.)
Patch by me; thanks to Richard Guo for review.
Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us
3 years ago
|
|
|
/* Compute the relid set for the join we are considering */
|
|
|
|
joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
|
|
|
|
if (sjinfo->ojrelid != 0)
|
|
|
|
joinrelids = bms_add_member(joinrelids, sjinfo->ojrelid);
|
|
|
|
|
|
|
|
remove_rel_from_query(root, innerrelid, sjinfo->ojrelid, joinrelids);
|
|
|
|
|
|
|
|
/* We verify that exactly one reference gets removed from joinlist */
|
|
|
|
nremoved = 0;
|
|
|
|
joinlist = remove_rel_from_joinlist(joinlist, innerrelid, &nremoved);
|
|
|
|
if (nremoved != 1)
|
|
|
|
elog(ERROR, "failed to find relation %d in joinlist", innerrelid);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We can delete this SpecialJoinInfo from the list too, since it's no
|
Represent Lists as expansible arrays, not chains of cons-cells.
Originally, Postgres Lists were a more or less exact reimplementation of
Lisp lists, which consist of chains of separately-allocated cons cells,
each having a value and a next-cell link. We'd hacked that once before
(commit d0b4399d8) to add a separate List header, but the data was still
in cons cells. That makes some operations -- notably list_nth() -- O(N),
and it's bulky because of the next-cell pointers and per-cell palloc
overhead, and it's very cache-unfriendly if the cons cells end up
scattered around rather than being adjacent.
In this rewrite, we still have List headers, but the data is in a
resizable array of values, with no next-cell links. Now we need at
most two palloc's per List, and often only one, since we can allocate
some values in the same palloc call as the List header. (Of course,
extending an existing List may require repalloc's to enlarge the array.
But this involves just O(log N) allocations not O(N).)
Of course this is not without downsides. The key difficulty is that
addition or deletion of a list entry may now cause other entries to
move, which it did not before.
For example, that breaks foreach() and sister macros, which historically
used a pointer to the current cons-cell as loop state. We can repair
those macros transparently by making their actual loop state be an
integer list index; the exposed "ListCell *" pointer is no longer state
carried across loop iterations, but is just a derived value. (In
practice, modern compilers can optimize things back to having just one
loop state value, at least for simple cases with inline loop bodies.)
In principle, this is a semantics change for cases where the loop body
inserts or deletes list entries ahead of the current loop index; but
I found no such cases in the Postgres code.
The change is not at all transparent for code that doesn't use foreach()
but chases lists "by hand" using lnext(). The largest share of such
code in the backend is in loops that were maintaining "prev" and "next"
variables in addition to the current-cell pointer, in order to delete
list cells efficiently using list_delete_cell(). However, we no longer
need a previous-cell pointer to delete a list cell efficiently. Keeping
a next-cell pointer doesn't work, as explained above, but we can improve
matters by changing such code to use a regular foreach() loop and then
using the new macro foreach_delete_current() to delete the current cell.
(This macro knows how to update the associated foreach loop's state so
that no cells will be missed in the traversal.)
There remains a nontrivial risk of code assuming that a ListCell *
pointer will remain good over an operation that could now move the list
contents. To help catch such errors, list.c can be compiled with a new
define symbol DEBUG_LIST_MEMORY_USAGE that forcibly moves list contents
whenever that could possibly happen. This makes list operations
significantly more expensive so it's not normally turned on (though it
is on by default if USE_VALGRIND is on).
There are two notable API differences from the previous code:
* lnext() now requires the List's header pointer in addition to the
current cell's address.
* list_delete_cell() no longer requires a previous-cell argument.
These changes are somewhat unfortunate, but on the other hand code using
either function needs inspection to see if it is assuming anything
it shouldn't, so it's not all bad.
Programmers should be aware of these significant performance changes:
* list_nth() and related functions are now O(1); so there's no
major access-speed difference between a list and an array.
* Inserting or deleting a list element now takes time proportional to
the distance to the end of the list, due to moving the array elements.
(However, it typically *doesn't* require palloc or pfree, so except in
long lists it's probably still faster than before.) Notably, lcons()
used to be about the same cost as lappend(), but that's no longer true
if the list is long. Code that uses lcons() and list_delete_first()
to maintain a stack might usefully be rewritten to push and pop at the
end of the list rather than the beginning.
* There are now list_insert_nth...() and list_delete_nth...() functions
that add or remove a list cell identified by index. These have the
data-movement penalty explained above, but there's no search penalty.
* list_concat() and variants now copy the second list's data into
storage belonging to the first list, so there is no longer any
sharing of cells between the input lists. The second argument is
now declared "const List *" to reflect that it isn't changed.
This patch just does the minimum needed to get the new implementation
in place and fix bugs exposed by the regression tests. As suggested
by the foregoing, there's a fair amount of followup work remaining to
do.
Also, the ENABLE_LIST_COMPAT macros are finally removed in this
commit. Code using those should have been gone a dozen years ago.
Patch by me; thanks to David Rowley, Jesper Pedersen, and others
for review.
Discussion: https://postgr.es/m/11587.1550975080@sss.pgh.pa.us
6 years ago
|
|
|
* longer of interest. (Since we'll restart the foreach loop
|
|
|
|
* immediately, we don't bother with foreach_delete_current.)
|
|
|
|
*/
|
Represent Lists as expansible arrays, not chains of cons-cells.
Originally, Postgres Lists were a more or less exact reimplementation of
Lisp lists, which consist of chains of separately-allocated cons cells,
each having a value and a next-cell link. We'd hacked that once before
(commit d0b4399d8) to add a separate List header, but the data was still
in cons cells. That makes some operations -- notably list_nth() -- O(N),
and it's bulky because of the next-cell pointers and per-cell palloc
overhead, and it's very cache-unfriendly if the cons cells end up
scattered around rather than being adjacent.
In this rewrite, we still have List headers, but the data is in a
resizable array of values, with no next-cell links. Now we need at
most two palloc's per List, and often only one, since we can allocate
some values in the same palloc call as the List header. (Of course,
extending an existing List may require repalloc's to enlarge the array.
But this involves just O(log N) allocations not O(N).)
Of course this is not without downsides. The key difficulty is that
addition or deletion of a list entry may now cause other entries to
move, which it did not before.
For example, that breaks foreach() and sister macros, which historically
used a pointer to the current cons-cell as loop state. We can repair
those macros transparently by making their actual loop state be an
integer list index; the exposed "ListCell *" pointer is no longer state
carried across loop iterations, but is just a derived value. (In
practice, modern compilers can optimize things back to having just one
loop state value, at least for simple cases with inline loop bodies.)
In principle, this is a semantics change for cases where the loop body
inserts or deletes list entries ahead of the current loop index; but
I found no such cases in the Postgres code.
The change is not at all transparent for code that doesn't use foreach()
but chases lists "by hand" using lnext(). The largest share of such
code in the backend is in loops that were maintaining "prev" and "next"
variables in addition to the current-cell pointer, in order to delete
list cells efficiently using list_delete_cell(). However, we no longer
need a previous-cell pointer to delete a list cell efficiently. Keeping
a next-cell pointer doesn't work, as explained above, but we can improve
matters by changing such code to use a regular foreach() loop and then
using the new macro foreach_delete_current() to delete the current cell.
(This macro knows how to update the associated foreach loop's state so
that no cells will be missed in the traversal.)
There remains a nontrivial risk of code assuming that a ListCell *
pointer will remain good over an operation that could now move the list
contents. To help catch such errors, list.c can be compiled with a new
define symbol DEBUG_LIST_MEMORY_USAGE that forcibly moves list contents
whenever that could possibly happen. This makes list operations
significantly more expensive so it's not normally turned on (though it
is on by default if USE_VALGRIND is on).
There are two notable API differences from the previous code:
* lnext() now requires the List's header pointer in addition to the
current cell's address.
* list_delete_cell() no longer requires a previous-cell argument.
These changes are somewhat unfortunate, but on the other hand code using
either function needs inspection to see if it is assuming anything
it shouldn't, so it's not all bad.
Programmers should be aware of these significant performance changes:
* list_nth() and related functions are now O(1); so there's no
major access-speed difference between a list and an array.
* Inserting or deleting a list element now takes time proportional to
the distance to the end of the list, due to moving the array elements.
(However, it typically *doesn't* require palloc or pfree, so except in
long lists it's probably still faster than before.) Notably, lcons()
used to be about the same cost as lappend(), but that's no longer true
if the list is long. Code that uses lcons() and list_delete_first()
to maintain a stack might usefully be rewritten to push and pop at the
end of the list rather than the beginning.
* There are now list_insert_nth...() and list_delete_nth...() functions
that add or remove a list cell identified by index. These have the
data-movement penalty explained above, but there's no search penalty.
* list_concat() and variants now copy the second list's data into
storage belonging to the first list, so there is no longer any
sharing of cells between the input lists. The second argument is
now declared "const List *" to reflect that it isn't changed.
This patch just does the minimum needed to get the new implementation
in place and fix bugs exposed by the regression tests. As suggested
by the foregoing, there's a fair amount of followup work remaining to
do.
Also, the ENABLE_LIST_COMPAT macros are finally removed in this
commit. Code using those should have been gone a dozen years ago.
Patch by me; thanks to David Rowley, Jesper Pedersen, and others
for review.
Discussion: https://postgr.es/m/11587.1550975080@sss.pgh.pa.us
6 years ago
|
|
|
root->join_info_list = list_delete_cell(root->join_info_list, lc);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Restart the scan. This is necessary to ensure we find all
|
|
|
|
* removable joins independently of ordering of the join_info_list
|
|
|
|
* (note that removal of attr_needed bits may make a join appear
|
Represent Lists as expansible arrays, not chains of cons-cells.
Originally, Postgres Lists were a more or less exact reimplementation of
Lisp lists, which consist of chains of separately-allocated cons cells,
each having a value and a next-cell link. We'd hacked that once before
(commit d0b4399d8) to add a separate List header, but the data was still
in cons cells. That makes some operations -- notably list_nth() -- O(N),
and it's bulky because of the next-cell pointers and per-cell palloc
overhead, and it's very cache-unfriendly if the cons cells end up
scattered around rather than being adjacent.
In this rewrite, we still have List headers, but the data is in a
resizable array of values, with no next-cell links. Now we need at
most two palloc's per List, and often only one, since we can allocate
some values in the same palloc call as the List header. (Of course,
extending an existing List may require repalloc's to enlarge the array.
But this involves just O(log N) allocations not O(N).)
Of course this is not without downsides. The key difficulty is that
addition or deletion of a list entry may now cause other entries to
move, which it did not before.
For example, that breaks foreach() and sister macros, which historically
used a pointer to the current cons-cell as loop state. We can repair
those macros transparently by making their actual loop state be an
integer list index; the exposed "ListCell *" pointer is no longer state
carried across loop iterations, but is just a derived value. (In
practice, modern compilers can optimize things back to having just one
loop state value, at least for simple cases with inline loop bodies.)
In principle, this is a semantics change for cases where the loop body
inserts or deletes list entries ahead of the current loop index; but
I found no such cases in the Postgres code.
The change is not at all transparent for code that doesn't use foreach()
but chases lists "by hand" using lnext(). The largest share of such
code in the backend is in loops that were maintaining "prev" and "next"
variables in addition to the current-cell pointer, in order to delete
list cells efficiently using list_delete_cell(). However, we no longer
need a previous-cell pointer to delete a list cell efficiently. Keeping
a next-cell pointer doesn't work, as explained above, but we can improve
matters by changing such code to use a regular foreach() loop and then
using the new macro foreach_delete_current() to delete the current cell.
(This macro knows how to update the associated foreach loop's state so
that no cells will be missed in the traversal.)
There remains a nontrivial risk of code assuming that a ListCell *
pointer will remain good over an operation that could now move the list
contents. To help catch such errors, list.c can be compiled with a new
define symbol DEBUG_LIST_MEMORY_USAGE that forcibly moves list contents
whenever that could possibly happen. This makes list operations
significantly more expensive so it's not normally turned on (though it
is on by default if USE_VALGRIND is on).
There are two notable API differences from the previous code:
* lnext() now requires the List's header pointer in addition to the
current cell's address.
* list_delete_cell() no longer requires a previous-cell argument.
These changes are somewhat unfortunate, but on the other hand code using
either function needs inspection to see if it is assuming anything
it shouldn't, so it's not all bad.
Programmers should be aware of these significant performance changes:
* list_nth() and related functions are now O(1); so there's no
major access-speed difference between a list and an array.
* Inserting or deleting a list element now takes time proportional to
the distance to the end of the list, due to moving the array elements.
(However, it typically *doesn't* require palloc or pfree, so except in
long lists it's probably still faster than before.) Notably, lcons()
used to be about the same cost as lappend(), but that's no longer true
if the list is long. Code that uses lcons() and list_delete_first()
to maintain a stack might usefully be rewritten to push and pop at the
end of the list rather than the beginning.
* There are now list_insert_nth...() and list_delete_nth...() functions
that add or remove a list cell identified by index. These have the
data-movement penalty explained above, but there's no search penalty.
* list_concat() and variants now copy the second list's data into
storage belonging to the first list, so there is no longer any
sharing of cells between the input lists. The second argument is
now declared "const List *" to reflect that it isn't changed.
This patch just does the minimum needed to get the new implementation
in place and fix bugs exposed by the regression tests. As suggested
by the foregoing, there's a fair amount of followup work remaining to
do.
Also, the ENABLE_LIST_COMPAT macros are finally removed in this
commit. Code using those should have been gone a dozen years ago.
Patch by me; thanks to David Rowley, Jesper Pedersen, and others
for review.
Discussion: https://postgr.es/m/11587.1550975080@sss.pgh.pa.us
6 years ago
|
|
|
* removable that did not before).
|
|
|
|
*/
|
|
|
|
goto restart;
|
|
|
|
}
|
|
|
|
|
|
|
|
return joinlist;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* clause_sides_match_join
|
|
|
|
* Determine whether a join clause is of the right form to use in this join.
|
|
|
|
*
|
|
|
|
* We already know that the clause is a binary opclause referencing only the
|
|
|
|
* rels in the current join. The point here is to check whether it has the
|
|
|
|
* form "outerrel_expr op innerrel_expr" or "innerrel_expr op outerrel_expr",
|
|
|
|
* rather than mixing outer and inner vars on either side. If it matches,
|
|
|
|
* we set the transient flag outer_is_left to identify which side is which.
|
|
|
|
*/
|
|
|
|
static inline bool
|
|
|
|
clause_sides_match_join(RestrictInfo *rinfo, Relids outerrelids,
|
|
|
|
Relids innerrelids)
|
|
|
|
{
|
|
|
|
if (bms_is_subset(rinfo->left_relids, outerrelids) &&
|
|
|
|
bms_is_subset(rinfo->right_relids, innerrelids))
|
|
|
|
{
|
|
|
|
/* lefthand side is outer */
|
|
|
|
rinfo->outer_is_left = true;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
else if (bms_is_subset(rinfo->left_relids, innerrelids) &&
|
|
|
|
bms_is_subset(rinfo->right_relids, outerrelids))
|
|
|
|
{
|
|
|
|
/* righthand side is outer */
|
|
|
|
rinfo->outer_is_left = false;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false; /* no good for these input relations */
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* join_is_removable
|
|
|
|
* Check whether we need not perform this special join at all, because
|
|
|
|
* it will just duplicate its left input.
|
|
|
|
*
|
|
|
|
* This is true for a left join for which the join condition cannot match
|
|
|
|
* more than one inner-side row. (There are other possibly interesting
|
|
|
|
* cases, but we don't have the infrastructure to prove them.) We also
|
|
|
|
* have to check that the inner side doesn't generate any variables needed
|
|
|
|
* above the join.
|
|
|
|
*/
|
|
|
|
static bool
|
|
|
|
join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
|
|
|
|
{
|
|
|
|
int innerrelid;
|
|
|
|
RelOptInfo *innerrel;
|
|
|
|
Relids joinrelids;
|
|
|
|
List *clause_list = NIL;
|
|
|
|
ListCell *l;
|
|
|
|
int attroff;
|
|
|
|
|
|
|
|
/*
|
Do assorted mop-up in the planner.
Remove RestrictInfo.nullable_relids, along with a good deal of
infrastructure that calculated it. One use-case for it was in
join_clause_is_movable_to, but we can now replace that usage with
a check to see if the clause's relids include any outer join
that can null the target relation. The other use-case was in
join_clause_is_movable_into, but that test can just be dropped
entirely now that the clause's relids include outer joins.
Furthermore, join_clause_is_movable_into should now be
accurate enough that it will accept anything returned by
generate_join_implied_equalities, so we can restore the Assert
that was diked out in commit 95f4e59c3.
Remove the outerjoin_delayed mechanism. We needed this before to
prevent quals from getting evaluated below outer joins that should
null some of their vars. Now that we consider varnullingrels while
placing quals, that's taken care of automatically, so throw the
whole thing away.
Teach remove_useless_result_rtes to also remove useless FromExprs.
Having done that, the delay_upper_joins flag serves no purpose any
more and we can remove it, largely reverting 11086f2f2.
Use constant TRUE for "dummy" clauses when throwing back outer joins.
This improves on a hack I introduced in commit 6a6522529. If we
have a left-join clause l.x = r.y, and a WHERE clause l.x = constant,
we generate r.y = constant and then don't really have a need for the
join clause. But we must throw the join clause back anyway after
marking it redundant, so that the join search heuristics won't think
this is a clauseless join and avoid it. That was a kluge introduced
under time pressure, and after looking at it I thought of a better
way: let's just introduce constant-TRUE "join clauses" instead,
and get rid of them at the end. This improves the generated plans for
such cases by not having to test a redundant join clause. We can also
get rid of the ugly hack used to mark such clauses as redundant for
selectivity estimation.
Patch by me; thanks to Richard Guo for review.
Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us
3 years ago
|
|
|
* Must be a left join to a single baserel, else we aren't going to be
|
|
|
|
* able to do anything with it.
|
|
|
|
*/
|
Do assorted mop-up in the planner.
Remove RestrictInfo.nullable_relids, along with a good deal of
infrastructure that calculated it. One use-case for it was in
join_clause_is_movable_to, but we can now replace that usage with
a check to see if the clause's relids include any outer join
that can null the target relation. The other use-case was in
join_clause_is_movable_into, but that test can just be dropped
entirely now that the clause's relids include outer joins.
Furthermore, join_clause_is_movable_into should now be
accurate enough that it will accept anything returned by
generate_join_implied_equalities, so we can restore the Assert
that was diked out in commit 95f4e59c3.
Remove the outerjoin_delayed mechanism. We needed this before to
prevent quals from getting evaluated below outer joins that should
null some of their vars. Now that we consider varnullingrels while
placing quals, that's taken care of automatically, so throw the
whole thing away.
Teach remove_useless_result_rtes to also remove useless FromExprs.
Having done that, the delay_upper_joins flag serves no purpose any
more and we can remove it, largely reverting 11086f2f2.
Use constant TRUE for "dummy" clauses when throwing back outer joins.
This improves on a hack I introduced in commit 6a6522529. If we
have a left-join clause l.x = r.y, and a WHERE clause l.x = constant,
we generate r.y = constant and then don't really have a need for the
join clause. But we must throw the join clause back anyway after
marking it redundant, so that the join search heuristics won't think
this is a clauseless join and avoid it. That was a kluge introduced
under time pressure, and after looking at it I thought of a better
way: let's just introduce constant-TRUE "join clauses" instead,
and get rid of them at the end. This improves the generated plans for
such cases by not having to test a redundant join clause. We can also
get rid of the ugly hack used to mark such clauses as redundant for
selectivity estimation.
Patch by me; thanks to Richard Guo for review.
Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us
3 years ago
|
|
|
if (sjinfo->jointype != JOIN_LEFT)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
if (!bms_get_singleton_member(sjinfo->min_righthand, &innerrelid))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
innerrel = find_base_rel(root, innerrelid);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Before we go to the effort of checking whether any innerrel variables
|
|
|
|
* are needed above the join, make a quick check to eliminate cases in
|
|
|
|
* which we will surely be unable to prove uniqueness of the innerrel.
|
|
|
|
*/
|
|
|
|
if (!rel_supports_distinctness(root, innerrel))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
/* Compute the relid set for the join we are considering */
|
|
|
|
joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
|
Make Vars be outer-join-aware.
Traditionally we used the same Var struct to represent the value
of a table column everywhere in parse and plan trees. This choice
predates our support for SQL outer joins, and it's really a pretty
bad idea with outer joins, because the Var's value can depend on
where it is in the tree: it might go to NULL above an outer join.
So expression nodes that are equal() per equalfuncs.c might not
represent the same value, which is a huge correctness hazard for
the planner.
To improve this, decorate Var nodes with a bitmapset showing
which outer joins (identified by RTE indexes) may have nulled
them at the point in the parse tree where the Var appears.
This allows us to trust that equal() Vars represent the same value.
A certain amount of klugery is still needed to cope with cases
where we re-order two outer joins, but it's possible to make it
work without sacrificing that core principle. PlaceHolderVars
receive similar decoration for the same reason.
In the planner, we include these outer join bitmapsets into the relids
that an expression is considered to depend on, and in consequence also
add outer-join relids to the relids of join RelOptInfos. This allows
us to correctly perceive whether an expression can be calculated above
or below a particular outer join.
This change affects FDWs that want to plan foreign joins. They *must*
follow suit when labeling foreign joins in order to match with the
core planner, but for many purposes (if postgres_fdw is any guide)
they'd prefer to consider only base relations within the join.
To support both requirements, redefine ForeignScan.fs_relids as
base+OJ relids, and add a new field fs_base_relids that's set up by
the core planner.
Large though it is, this commit just does the minimum necessary to
install the new mechanisms and get check-world passing again.
Follow-up patches will perform some cleanup. (The README additions
and comments mention some stuff that will appear in the follow-up.)
Patch by me; thanks to Richard Guo for review.
Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us
3 years ago
|
|
|
if (sjinfo->ojrelid != 0)
|
|
|
|
joinrelids = bms_add_member(joinrelids, sjinfo->ojrelid);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We can't remove the join if any inner-rel attributes are used above the
|
|
|
|
* join.
|
|
|
|
*
|
|
|
|
* Note that this test only detects use of inner-rel attributes in higher
|
|
|
|
* join conditions and the target list. There might be such attributes in
|
|
|
|
* pushed-down conditions at this join, too. We check that case below.
|
|
|
|
*
|
|
|
|
* As a micro-optimization, it seems better to start with max_attr and
|
|
|
|
* count down rather than starting with min_attr and counting up, on the
|
|
|
|
* theory that the system attributes are somewhat less likely to be wanted
|
|
|
|
* and should be tested last.
|
|
|
|
*/
|
|
|
|
for (attroff = innerrel->max_attr - innerrel->min_attr;
|
|
|
|
attroff >= 0;
|
|
|
|
attroff--)
|
|
|
|
{
|
|
|
|
if (!bms_is_subset(innerrel->attr_needed[attroff], joinrelids))
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Similarly check that the inner rel isn't needed by any PlaceHolderVars
|
|
|
|
* that will be used above the join. We only need to fail if such a PHV
|
|
|
|
* actually references some inner-rel attributes; but the correct check
|
|
|
|
* for that is relatively expensive, so we first check against ph_eval_at,
|
|
|
|
* which must mention the inner rel if the PHV uses any inner-rel attrs as
|
|
|
|
* non-lateral references. Note that if the PHV's syntactic scope is just
|
|
|
|
* the inner rel, we can't drop the rel even if the PHV is variable-free.
|
|
|
|
*/
|
|
|
|
foreach(l, root->placeholder_list)
|
|
|
|
{
|
|
|
|
PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l);
|
|
|
|
|
|
|
|
if (bms_overlap(phinfo->ph_lateral, innerrel->relids))
|
|
|
|
return false; /* it references innerrel laterally */
|
|
|
|
if (bms_is_subset(phinfo->ph_needed, joinrelids))
|
|
|
|
continue; /* PHV is not used above the join */
|
|
|
|
if (!bms_overlap(phinfo->ph_eval_at, innerrel->relids))
|
|
|
|
continue; /* it definitely doesn't reference innerrel */
|
|
|
|
if (bms_is_subset(phinfo->ph_eval_at, innerrel->relids))
|
|
|
|
return false; /* there isn't any other place to eval PHV */
|
Fix pull_varnos' miscomputation of relids set for a PlaceHolderVar.
Previously, pull_varnos() took the relids of a PlaceHolderVar as being
equal to the relids in its contents, but that fails to account for the
possibility that we have to postpone evaluation of the PHV due to outer
joins. This could result in a malformed plan. The known cases end up
triggering the "failed to assign all NestLoopParams to plan nodes"
sanity check in createplan.c, but other symptoms may be possible.
The right value to use is the join level we actually intend to evaluate
the PHV at. We can get that from the ph_eval_at field of the associated
PlaceHolderInfo. However, there are some places that call pull_varnos()
before the PlaceHolderInfos have been created; in that case, fall back
to the conservative assumption that the PHV will be evaluated at its
syntactic level. (In principle this might result in missing some legal
optimization, but I'm not aware of any cases where it's an issue in
practice.) Things are also a bit ticklish for calls occurring during
deconstruct_jointree(), but AFAICS the ph_eval_at fields should have
reached their final values by the time we need them.
The main problem in making this work is that pull_varnos() has no
way to get at the PlaceHolderInfos. We can fix that easily, if a
bit tediously, in HEAD by passing it the planner "root" pointer.
In the back branches that'd cause an unacceptable API/ABI break for
extensions, so leave the existing entry points alone and add new ones
with the additional parameter. (If an old entry point is called and
encounters a PHV, it'll fall back to using the syntactic level,
again possibly missing some valid optimization.)
Back-patch to v12. The computation is surely also wrong before that,
but it appears that we cannot reach a bad plan thanks to join order
restrictions imposed on the subquery that the PlaceHolderVar came from.
The error only became reachable when commit 4be058fe9 allowed trivial
subqueries to be collapsed out completely, eliminating their join order
restrictions.
Per report from Stephan Springl.
Discussion: https://postgr.es/m/171041.1610849523@sss.pgh.pa.us
5 years ago
|
|
|
if (bms_overlap(pull_varnos(root, (Node *) phinfo->ph_var->phexpr),
|
|
|
|
innerrel->relids))
|
|
|
|
return false; /* it does reference innerrel */
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Search for mergejoinable clauses that constrain the inner rel against
|
|
|
|
* either the outer rel or a pseudoconstant. If an operator is
|
|
|
|
* mergejoinable then it behaves like equality for some btree opclass, so
|
|
|
|
* it's what we want. The mergejoinability test also eliminates clauses
|
|
|
|
* containing volatile functions, which we couldn't depend on.
|
|
|
|
*/
|
|
|
|
foreach(l, innerrel->joininfo)
|
|
|
|
{
|
|
|
|
RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
|
|
|
|
|
Make Vars be outer-join-aware.
Traditionally we used the same Var struct to represent the value
of a table column everywhere in parse and plan trees. This choice
predates our support for SQL outer joins, and it's really a pretty
bad idea with outer joins, because the Var's value can depend on
where it is in the tree: it might go to NULL above an outer join.
So expression nodes that are equal() per equalfuncs.c might not
represent the same value, which is a huge correctness hazard for
the planner.
To improve this, decorate Var nodes with a bitmapset showing
which outer joins (identified by RTE indexes) may have nulled
them at the point in the parse tree where the Var appears.
This allows us to trust that equal() Vars represent the same value.
A certain amount of klugery is still needed to cope with cases
where we re-order two outer joins, but it's possible to make it
work without sacrificing that core principle. PlaceHolderVars
receive similar decoration for the same reason.
In the planner, we include these outer join bitmapsets into the relids
that an expression is considered to depend on, and in consequence also
add outer-join relids to the relids of join RelOptInfos. This allows
us to correctly perceive whether an expression can be calculated above
or below a particular outer join.
This change affects FDWs that want to plan foreign joins. They *must*
follow suit when labeling foreign joins in order to match with the
core planner, but for many purposes (if postgres_fdw is any guide)
they'd prefer to consider only base relations within the join.
To support both requirements, redefine ForeignScan.fs_relids as
base+OJ relids, and add a new field fs_base_relids that's set up by
the core planner.
Large though it is, this commit just does the minimum necessary to
install the new mechanisms and get check-world passing again.
Follow-up patches will perform some cleanup. (The README additions
and comments mention some stuff that will appear in the follow-up.)
Patch by me; thanks to Richard Guo for review.
Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us
3 years ago
|
|
|
/*
|
|
|
|
* If the current join commutes with some other outer join(s) via
|
|
|
|
* outer join identity 3, there will be multiple clones of its join
|
|
|
|
* clauses in the joininfo list. We want to consider only the
|
|
|
|
* has_clone form of such clauses. Processing more than one form
|
|
|
|
* would be wasteful, and also some of the others would confuse the
|
|
|
|
* RINFO_IS_PUSHED_DOWN test below.
|
|
|
|
*/
|
|
|
|
if (restrictinfo->is_clone)
|
|
|
|
continue; /* ignore it */
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If it's not a join clause for this outer join, we can't use it.
|
|
|
|
* Note that if the clause is pushed-down, then it is logically from
|
|
|
|
* above the outer join, even if it references no other rels (it might
|
|
|
|
* be from WHERE, for example).
|
|
|
|
*/
|
|
|
|
if (RINFO_IS_PUSHED_DOWN(restrictinfo, joinrelids))
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* If such a clause actually references the inner rel then join
|
|
|
|
* removal has to be disallowed. We have to check this despite
|
|
|
|
* the previous attr_needed checks because of the possibility of
|
|
|
|
* pushed-down clauses referencing the rel.
|
|
|
|
*/
|
|
|
|
if (bms_is_member(innerrelid, restrictinfo->clause_relids))
|
|
|
|
return false;
|
|
|
|
continue; /* else, ignore; not useful here */
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Ignore if it's not a mergejoinable clause */
|
|
|
|
if (!restrictinfo->can_join ||
|
|
|
|
restrictinfo->mergeopfamilies == NIL)
|
|
|
|
continue; /* not mergejoinable */
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Check if clause has the form "outer op inner" or "inner op outer",
|
|
|
|
* and if so mark which side is inner.
|
|
|
|
*/
|
|
|
|
if (!clause_sides_match_join(restrictinfo, sjinfo->min_lefthand,
|
|
|
|
innerrel->relids))
|
|
|
|
continue; /* no good for these input relations */
|
|
|
|
|
|
|
|
/* OK, add to list */
|
|
|
|
clause_list = lappend(clause_list, restrictinfo);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Now that we have the relevant equality join clauses, try to prove the
|
|
|
|
* innerrel distinct.
|
|
|
|
*/
|
|
|
|
if (rel_is_distinct_for(root, innerrel, clause_list))
|
|
|
|
return true;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Some day it would be nice to check for other methods of establishing
|
|
|
|
* distinctness.
|
|
|
|
*/
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Remove the target relid from the planner's data structures, having
|
|
|
|
* determined that there is no need to include it in the query.
|
|
|
|
*
|
|
|
|
* We are not terribly thorough here. We must make sure that the rel is
|
|
|
|
* no longer treated as a baserel, and that attributes of other baserels
|
|
|
|
* are no longer marked as being needed at joins involving this rel.
|
|
|
|
* Also, join quals involving the rel have to be removed from the joininfo
|
Make Vars be outer-join-aware.
Traditionally we used the same Var struct to represent the value
of a table column everywhere in parse and plan trees. This choice
predates our support for SQL outer joins, and it's really a pretty
bad idea with outer joins, because the Var's value can depend on
where it is in the tree: it might go to NULL above an outer join.
So expression nodes that are equal() per equalfuncs.c might not
represent the same value, which is a huge correctness hazard for
the planner.
To improve this, decorate Var nodes with a bitmapset showing
which outer joins (identified by RTE indexes) may have nulled
them at the point in the parse tree where the Var appears.
This allows us to trust that equal() Vars represent the same value.
A certain amount of klugery is still needed to cope with cases
where we re-order two outer joins, but it's possible to make it
work without sacrificing that core principle. PlaceHolderVars
receive similar decoration for the same reason.
In the planner, we include these outer join bitmapsets into the relids
that an expression is considered to depend on, and in consequence also
add outer-join relids to the relids of join RelOptInfos. This allows
us to correctly perceive whether an expression can be calculated above
or below a particular outer join.
This change affects FDWs that want to plan foreign joins. They *must*
follow suit when labeling foreign joins in order to match with the
core planner, but for many purposes (if postgres_fdw is any guide)
they'd prefer to consider only base relations within the join.
To support both requirements, redefine ForeignScan.fs_relids as
base+OJ relids, and add a new field fs_base_relids that's set up by
the core planner.
Large though it is, this commit just does the minimum necessary to
install the new mechanisms and get check-world passing again.
Follow-up patches will perform some cleanup. (The README additions
and comments mention some stuff that will appear in the follow-up.)
Patch by me; thanks to Richard Guo for review.
Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us
3 years ago
|
|
|
* lists, but only if they belong to the outer join identified by ojrelid
|
|
|
|
* and joinrelids.
|
|
|
|
*/
|
|
|
|
static void
|
Make Vars be outer-join-aware.
Traditionally we used the same Var struct to represent the value
of a table column everywhere in parse and plan trees. This choice
predates our support for SQL outer joins, and it's really a pretty
bad idea with outer joins, because the Var's value can depend on
where it is in the tree: it might go to NULL above an outer join.
So expression nodes that are equal() per equalfuncs.c might not
represent the same value, which is a huge correctness hazard for
the planner.
To improve this, decorate Var nodes with a bitmapset showing
which outer joins (identified by RTE indexes) may have nulled
them at the point in the parse tree where the Var appears.
This allows us to trust that equal() Vars represent the same value.
A certain amount of klugery is still needed to cope with cases
where we re-order two outer joins, but it's possible to make it
work without sacrificing that core principle. PlaceHolderVars
receive similar decoration for the same reason.
In the planner, we include these outer join bitmapsets into the relids
that an expression is considered to depend on, and in consequence also
add outer-join relids to the relids of join RelOptInfos. This allows
us to correctly perceive whether an expression can be calculated above
or below a particular outer join.
This change affects FDWs that want to plan foreign joins. They *must*
follow suit when labeling foreign joins in order to match with the
core planner, but for many purposes (if postgres_fdw is any guide)
they'd prefer to consider only base relations within the join.
To support both requirements, redefine ForeignScan.fs_relids as
base+OJ relids, and add a new field fs_base_relids that's set up by
the core planner.
Large though it is, this commit just does the minimum necessary to
install the new mechanisms and get check-world passing again.
Follow-up patches will perform some cleanup. (The README additions
and comments mention some stuff that will appear in the follow-up.)
Patch by me; thanks to Richard Guo for review.
Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us
3 years ago
|
|
|
remove_rel_from_query(PlannerInfo *root, int relid, int ojrelid,
|
|
|
|
Relids joinrelids)
|
|
|
|
{
|
|
|
|
RelOptInfo *rel = find_base_rel(root, relid);
|
|
|
|
List *joininfos;
|
|
|
|
Index rti;
|
|
|
|
ListCell *l;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Mark the rel as "dead" to show it is no longer part of the join tree.
|
|
|
|
* (Removing it from the baserel array altogether seems too risky.)
|
|
|
|
*/
|
|
|
|
rel->reloptkind = RELOPT_DEADREL;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Remove references to the rel from other baserels' attr_needed arrays.
|
|
|
|
*/
|
|
|
|
for (rti = 1; rti < root->simple_rel_array_size; rti++)
|
|
|
|
{
|
|
|
|
RelOptInfo *otherrel = root->simple_rel_array[rti];
|
|
|
|
int attroff;
|
|
|
|
|
|
|
|
/* there may be empty slots corresponding to non-baserel RTEs */
|
|
|
|
if (otherrel == NULL)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
Assert(otherrel->relid == rti); /* sanity check on array */
|
|
|
|
|
|
|
|
/* no point in processing target rel itself */
|
|
|
|
if (otherrel == rel)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
for (attroff = otherrel->max_attr - otherrel->min_attr;
|
|
|
|
attroff >= 0;
|
|
|
|
attroff--)
|
|
|
|
{
|
|
|
|
otherrel->attr_needed[attroff] =
|
|
|
|
bms_del_member(otherrel->attr_needed[attroff], relid);
|
Make Vars be outer-join-aware.
Traditionally we used the same Var struct to represent the value
of a table column everywhere in parse and plan trees. This choice
predates our support for SQL outer joins, and it's really a pretty
bad idea with outer joins, because the Var's value can depend on
where it is in the tree: it might go to NULL above an outer join.
So expression nodes that are equal() per equalfuncs.c might not
represent the same value, which is a huge correctness hazard for
the planner.
To improve this, decorate Var nodes with a bitmapset showing
which outer joins (identified by RTE indexes) may have nulled
them at the point in the parse tree where the Var appears.
This allows us to trust that equal() Vars represent the same value.
A certain amount of klugery is still needed to cope with cases
where we re-order two outer joins, but it's possible to make it
work without sacrificing that core principle. PlaceHolderVars
receive similar decoration for the same reason.
In the planner, we include these outer join bitmapsets into the relids
that an expression is considered to depend on, and in consequence also
add outer-join relids to the relids of join RelOptInfos. This allows
us to correctly perceive whether an expression can be calculated above
or below a particular outer join.
This change affects FDWs that want to plan foreign joins. They *must*
follow suit when labeling foreign joins in order to match with the
core planner, but for many purposes (if postgres_fdw is any guide)
they'd prefer to consider only base relations within the join.
To support both requirements, redefine ForeignScan.fs_relids as
base+OJ relids, and add a new field fs_base_relids that's set up by
the core planner.
Large though it is, this commit just does the minimum necessary to
install the new mechanisms and get check-world passing again.
Follow-up patches will perform some cleanup. (The README additions
and comments mention some stuff that will appear in the follow-up.)
Patch by me; thanks to Richard Guo for review.
Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us
3 years ago
|
|
|
otherrel->attr_needed[attroff] =
|
|
|
|
bms_del_member(otherrel->attr_needed[attroff], ojrelid);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
Make Vars be outer-join-aware.
Traditionally we used the same Var struct to represent the value
of a table column everywhere in parse and plan trees. This choice
predates our support for SQL outer joins, and it's really a pretty
bad idea with outer joins, because the Var's value can depend on
where it is in the tree: it might go to NULL above an outer join.
So expression nodes that are equal() per equalfuncs.c might not
represent the same value, which is a huge correctness hazard for
the planner.
To improve this, decorate Var nodes with a bitmapset showing
which outer joins (identified by RTE indexes) may have nulled
them at the point in the parse tree where the Var appears.
This allows us to trust that equal() Vars represent the same value.
A certain amount of klugery is still needed to cope with cases
where we re-order two outer joins, but it's possible to make it
work without sacrificing that core principle. PlaceHolderVars
receive similar decoration for the same reason.
In the planner, we include these outer join bitmapsets into the relids
that an expression is considered to depend on, and in consequence also
add outer-join relids to the relids of join RelOptInfos. This allows
us to correctly perceive whether an expression can be calculated above
or below a particular outer join.
This change affects FDWs that want to plan foreign joins. They *must*
follow suit when labeling foreign joins in order to match with the
core planner, but for many purposes (if postgres_fdw is any guide)
they'd prefer to consider only base relations within the join.
To support both requirements, redefine ForeignScan.fs_relids as
base+OJ relids, and add a new field fs_base_relids that's set up by
the core planner.
Large though it is, this commit just does the minimum necessary to
install the new mechanisms and get check-world passing again.
Follow-up patches will perform some cleanup. (The README additions
and comments mention some stuff that will appear in the follow-up.)
Patch by me; thanks to Richard Guo for review.
Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us
3 years ago
|
|
|
/*
|
|
|
|
* Update all_baserels and related relid sets.
|
|
|
|
*/
|
|
|
|
root->all_baserels = bms_del_member(root->all_baserels, relid);
|
|
|
|
root->outer_join_rels = bms_del_member(root->outer_join_rels, ojrelid);
|
|
|
|
root->all_query_rels = bms_del_member(root->all_query_rels, relid);
|
|
|
|
root->all_query_rels = bms_del_member(root->all_query_rels, ojrelid);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Likewise remove references from SpecialJoinInfo data structures.
|
|
|
|
*
|
|
|
|
* This is relevant in case the outer join we're deleting is nested inside
|
|
|
|
* other outer joins: the upper joins' relid sets have to be adjusted. The
|
|
|
|
* RHS of the target outer join will be made empty here, but that's OK
|
|
|
|
* since caller will delete that SpecialJoinInfo entirely.
|
|
|
|
*/
|
|
|
|
foreach(l, root->join_info_list)
|
|
|
|
{
|
|
|
|
SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(l);
|
|
|
|
|
|
|
|
sjinfo->min_lefthand = bms_del_member(sjinfo->min_lefthand, relid);
|
|
|
|
sjinfo->min_righthand = bms_del_member(sjinfo->min_righthand, relid);
|
|
|
|
sjinfo->syn_lefthand = bms_del_member(sjinfo->syn_lefthand, relid);
|
|
|
|
sjinfo->syn_righthand = bms_del_member(sjinfo->syn_righthand, relid);
|
Make Vars be outer-join-aware.
Traditionally we used the same Var struct to represent the value
of a table column everywhere in parse and plan trees. This choice
predates our support for SQL outer joins, and it's really a pretty
bad idea with outer joins, because the Var's value can depend on
where it is in the tree: it might go to NULL above an outer join.
So expression nodes that are equal() per equalfuncs.c might not
represent the same value, which is a huge correctness hazard for
the planner.
To improve this, decorate Var nodes with a bitmapset showing
which outer joins (identified by RTE indexes) may have nulled
them at the point in the parse tree where the Var appears.
This allows us to trust that equal() Vars represent the same value.
A certain amount of klugery is still needed to cope with cases
where we re-order two outer joins, but it's possible to make it
work without sacrificing that core principle. PlaceHolderVars
receive similar decoration for the same reason.
In the planner, we include these outer join bitmapsets into the relids
that an expression is considered to depend on, and in consequence also
add outer-join relids to the relids of join RelOptInfos. This allows
us to correctly perceive whether an expression can be calculated above
or below a particular outer join.
This change affects FDWs that want to plan foreign joins. They *must*
follow suit when labeling foreign joins in order to match with the
core planner, but for many purposes (if postgres_fdw is any guide)
they'd prefer to consider only base relations within the join.
To support both requirements, redefine ForeignScan.fs_relids as
base+OJ relids, and add a new field fs_base_relids that's set up by
the core planner.
Large though it is, this commit just does the minimum necessary to
install the new mechanisms and get check-world passing again.
Follow-up patches will perform some cleanup. (The README additions
and comments mention some stuff that will appear in the follow-up.)
Patch by me; thanks to Richard Guo for review.
Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us
3 years ago
|
|
|
sjinfo->min_lefthand = bms_del_member(sjinfo->min_lefthand, ojrelid);
|
|
|
|
sjinfo->min_righthand = bms_del_member(sjinfo->min_righthand, ojrelid);
|
|
|
|
sjinfo->syn_lefthand = bms_del_member(sjinfo->syn_lefthand, ojrelid);
|
|
|
|
sjinfo->syn_righthand = bms_del_member(sjinfo->syn_righthand, ojrelid);
|
|
|
|
/* relid cannot appear in these fields, but ojrelid can: */
|
|
|
|
sjinfo->commute_above_l = bms_del_member(sjinfo->commute_above_l, ojrelid);
|
|
|
|
sjinfo->commute_above_r = bms_del_member(sjinfo->commute_above_r, ojrelid);
|
|
|
|
sjinfo->commute_below = bms_del_member(sjinfo->commute_below, ojrelid);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Likewise remove references from PlaceHolderVar data structures,
|
|
|
|
* removing any no-longer-needed placeholders entirely.
|
|
|
|
*
|
|
|
|
* Removal is a bit trickier than it might seem: we can remove PHVs that
|
|
|
|
* are used at the target rel and/or in the join qual, but not those that
|
|
|
|
* are used at join partner rels or above the join. It's not that easy to
|
|
|
|
* distinguish PHVs used at partner rels from those used in the join qual,
|
|
|
|
* since they will both have ph_needed sets that are subsets of
|
|
|
|
* joinrelids. However, a PHV used at a partner rel could not have the
|
|
|
|
* target rel in ph_eval_at, so we check that while deciding whether to
|
|
|
|
* remove or just update the PHV. There is no corresponding test in
|
|
|
|
* join_is_removable because it doesn't need to distinguish those cases.
|
|
|
|
*/
|
Represent Lists as expansible arrays, not chains of cons-cells.
Originally, Postgres Lists were a more or less exact reimplementation of
Lisp lists, which consist of chains of separately-allocated cons cells,
each having a value and a next-cell link. We'd hacked that once before
(commit d0b4399d8) to add a separate List header, but the data was still
in cons cells. That makes some operations -- notably list_nth() -- O(N),
and it's bulky because of the next-cell pointers and per-cell palloc
overhead, and it's very cache-unfriendly if the cons cells end up
scattered around rather than being adjacent.
In this rewrite, we still have List headers, but the data is in a
resizable array of values, with no next-cell links. Now we need at
most two palloc's per List, and often only one, since we can allocate
some values in the same palloc call as the List header. (Of course,
extending an existing List may require repalloc's to enlarge the array.
But this involves just O(log N) allocations not O(N).)
Of course this is not without downsides. The key difficulty is that
addition or deletion of a list entry may now cause other entries to
move, which it did not before.
For example, that breaks foreach() and sister macros, which historically
used a pointer to the current cons-cell as loop state. We can repair
those macros transparently by making their actual loop state be an
integer list index; the exposed "ListCell *" pointer is no longer state
carried across loop iterations, but is just a derived value. (In
practice, modern compilers can optimize things back to having just one
loop state value, at least for simple cases with inline loop bodies.)
In principle, this is a semantics change for cases where the loop body
inserts or deletes list entries ahead of the current loop index; but
I found no such cases in the Postgres code.
The change is not at all transparent for code that doesn't use foreach()
but chases lists "by hand" using lnext(). The largest share of such
code in the backend is in loops that were maintaining "prev" and "next"
variables in addition to the current-cell pointer, in order to delete
list cells efficiently using list_delete_cell(). However, we no longer
need a previous-cell pointer to delete a list cell efficiently. Keeping
a next-cell pointer doesn't work, as explained above, but we can improve
matters by changing such code to use a regular foreach() loop and then
using the new macro foreach_delete_current() to delete the current cell.
(This macro knows how to update the associated foreach loop's state so
that no cells will be missed in the traversal.)
There remains a nontrivial risk of code assuming that a ListCell *
pointer will remain good over an operation that could now move the list
contents. To help catch such errors, list.c can be compiled with a new
define symbol DEBUG_LIST_MEMORY_USAGE that forcibly moves list contents
whenever that could possibly happen. This makes list operations
significantly more expensive so it's not normally turned on (though it
is on by default if USE_VALGRIND is on).
There are two notable API differences from the previous code:
* lnext() now requires the List's header pointer in addition to the
current cell's address.
* list_delete_cell() no longer requires a previous-cell argument.
These changes are somewhat unfortunate, but on the other hand code using
either function needs inspection to see if it is assuming anything
it shouldn't, so it's not all bad.
Programmers should be aware of these significant performance changes:
* list_nth() and related functions are now O(1); so there's no
major access-speed difference between a list and an array.
* Inserting or deleting a list element now takes time proportional to
the distance to the end of the list, due to moving the array elements.
(However, it typically *doesn't* require palloc or pfree, so except in
long lists it's probably still faster than before.) Notably, lcons()
used to be about the same cost as lappend(), but that's no longer true
if the list is long. Code that uses lcons() and list_delete_first()
to maintain a stack might usefully be rewritten to push and pop at the
end of the list rather than the beginning.
* There are now list_insert_nth...() and list_delete_nth...() functions
that add or remove a list cell identified by index. These have the
data-movement penalty explained above, but there's no search penalty.
* list_concat() and variants now copy the second list's data into
storage belonging to the first list, so there is no longer any
sharing of cells between the input lists. The second argument is
now declared "const List *" to reflect that it isn't changed.
This patch just does the minimum needed to get the new implementation
in place and fix bugs exposed by the regression tests. As suggested
by the foregoing, there's a fair amount of followup work remaining to
do.
Also, the ENABLE_LIST_COMPAT macros are finally removed in this
commit. Code using those should have been gone a dozen years ago.
Patch by me; thanks to David Rowley, Jesper Pedersen, and others
for review.
Discussion: https://postgr.es/m/11587.1550975080@sss.pgh.pa.us
6 years ago
|
|
|
foreach(l, root->placeholder_list)
|
|
|
|
{
|
|
|
|
PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l);
|
|
|
|
|
|
|
|
Assert(!bms_is_member(relid, phinfo->ph_lateral));
|
|
|
|
if (bms_is_subset(phinfo->ph_needed, joinrelids) &&
|
|
|
|
bms_is_member(relid, phinfo->ph_eval_at))
|
|
|
|
{
|
Represent Lists as expansible arrays, not chains of cons-cells.
Originally, Postgres Lists were a more or less exact reimplementation of
Lisp lists, which consist of chains of separately-allocated cons cells,
each having a value and a next-cell link. We'd hacked that once before
(commit d0b4399d8) to add a separate List header, but the data was still
in cons cells. That makes some operations -- notably list_nth() -- O(N),
and it's bulky because of the next-cell pointers and per-cell palloc
overhead, and it's very cache-unfriendly if the cons cells end up
scattered around rather than being adjacent.
In this rewrite, we still have List headers, but the data is in a
resizable array of values, with no next-cell links. Now we need at
most two palloc's per List, and often only one, since we can allocate
some values in the same palloc call as the List header. (Of course,
extending an existing List may require repalloc's to enlarge the array.
But this involves just O(log N) allocations not O(N).)
Of course this is not without downsides. The key difficulty is that
addition or deletion of a list entry may now cause other entries to
move, which it did not before.
For example, that breaks foreach() and sister macros, which historically
used a pointer to the current cons-cell as loop state. We can repair
those macros transparently by making their actual loop state be an
integer list index; the exposed "ListCell *" pointer is no longer state
carried across loop iterations, but is just a derived value. (In
practice, modern compilers can optimize things back to having just one
loop state value, at least for simple cases with inline loop bodies.)
In principle, this is a semantics change for cases where the loop body
inserts or deletes list entries ahead of the current loop index; but
I found no such cases in the Postgres code.
The change is not at all transparent for code that doesn't use foreach()
but chases lists "by hand" using lnext(). The largest share of such
code in the backend is in loops that were maintaining "prev" and "next"
variables in addition to the current-cell pointer, in order to delete
list cells efficiently using list_delete_cell(). However, we no longer
need a previous-cell pointer to delete a list cell efficiently. Keeping
a next-cell pointer doesn't work, as explained above, but we can improve
matters by changing such code to use a regular foreach() loop and then
using the new macro foreach_delete_current() to delete the current cell.
(This macro knows how to update the associated foreach loop's state so
that no cells will be missed in the traversal.)
There remains a nontrivial risk of code assuming that a ListCell *
pointer will remain good over an operation that could now move the list
contents. To help catch such errors, list.c can be compiled with a new
define symbol DEBUG_LIST_MEMORY_USAGE that forcibly moves list contents
whenever that could possibly happen. This makes list operations
significantly more expensive so it's not normally turned on (though it
is on by default if USE_VALGRIND is on).
There are two notable API differences from the previous code:
* lnext() now requires the List's header pointer in addition to the
current cell's address.
* list_delete_cell() no longer requires a previous-cell argument.
These changes are somewhat unfortunate, but on the other hand code using
either function needs inspection to see if it is assuming anything
it shouldn't, so it's not all bad.
Programmers should be aware of these significant performance changes:
* list_nth() and related functions are now O(1); so there's no
major access-speed difference between a list and an array.
* Inserting or deleting a list element now takes time proportional to
the distance to the end of the list, due to moving the array elements.
(However, it typically *doesn't* require palloc or pfree, so except in
long lists it's probably still faster than before.) Notably, lcons()
used to be about the same cost as lappend(), but that's no longer true
if the list is long. Code that uses lcons() and list_delete_first()
to maintain a stack might usefully be rewritten to push and pop at the
end of the list rather than the beginning.
* There are now list_insert_nth...() and list_delete_nth...() functions
that add or remove a list cell identified by index. These have the
data-movement penalty explained above, but there's no search penalty.
* list_concat() and variants now copy the second list's data into
storage belonging to the first list, so there is no longer any
sharing of cells between the input lists. The second argument is
now declared "const List *" to reflect that it isn't changed.
This patch just does the minimum needed to get the new implementation
in place and fix bugs exposed by the regression tests. As suggested
by the foregoing, there's a fair amount of followup work remaining to
do.
Also, the ENABLE_LIST_COMPAT macros are finally removed in this
commit. Code using those should have been gone a dozen years ago.
Patch by me; thanks to David Rowley, Jesper Pedersen, and others
for review.
Discussion: https://postgr.es/m/11587.1550975080@sss.pgh.pa.us
6 years ago
|
|
|
root->placeholder_list = foreach_delete_current(root->placeholder_list,
|
|
|
|
l);
|
|
|
|
root->placeholder_array[phinfo->phid] = NULL;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
phinfo->ph_eval_at = bms_del_member(phinfo->ph_eval_at, relid);
|
Make Vars be outer-join-aware.
Traditionally we used the same Var struct to represent the value
of a table column everywhere in parse and plan trees. This choice
predates our support for SQL outer joins, and it's really a pretty
bad idea with outer joins, because the Var's value can depend on
where it is in the tree: it might go to NULL above an outer join.
So expression nodes that are equal() per equalfuncs.c might not
represent the same value, which is a huge correctness hazard for
the planner.
To improve this, decorate Var nodes with a bitmapset showing
which outer joins (identified by RTE indexes) may have nulled
them at the point in the parse tree where the Var appears.
This allows us to trust that equal() Vars represent the same value.
A certain amount of klugery is still needed to cope with cases
where we re-order two outer joins, but it's possible to make it
work without sacrificing that core principle. PlaceHolderVars
receive similar decoration for the same reason.
In the planner, we include these outer join bitmapsets into the relids
that an expression is considered to depend on, and in consequence also
add outer-join relids to the relids of join RelOptInfos. This allows
us to correctly perceive whether an expression can be calculated above
or below a particular outer join.
This change affects FDWs that want to plan foreign joins. They *must*
follow suit when labeling foreign joins in order to match with the
core planner, but for many purposes (if postgres_fdw is any guide)
they'd prefer to consider only base relations within the join.
To support both requirements, redefine ForeignScan.fs_relids as
base+OJ relids, and add a new field fs_base_relids that's set up by
the core planner.
Large though it is, this commit just does the minimum necessary to
install the new mechanisms and get check-world passing again.
Follow-up patches will perform some cleanup. (The README additions
and comments mention some stuff that will appear in the follow-up.)
Patch by me; thanks to Richard Guo for review.
Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us
3 years ago
|
|
|
phinfo->ph_eval_at = bms_del_member(phinfo->ph_eval_at, ojrelid);
|
|
|
|
Assert(!bms_is_empty(phinfo->ph_eval_at));
|
|
|
|
phinfo->ph_needed = bms_del_member(phinfo->ph_needed, relid);
|
Make Vars be outer-join-aware.
Traditionally we used the same Var struct to represent the value
of a table column everywhere in parse and plan trees. This choice
predates our support for SQL outer joins, and it's really a pretty
bad idea with outer joins, because the Var's value can depend on
where it is in the tree: it might go to NULL above an outer join.
So expression nodes that are equal() per equalfuncs.c might not
represent the same value, which is a huge correctness hazard for
the planner.
To improve this, decorate Var nodes with a bitmapset showing
which outer joins (identified by RTE indexes) may have nulled
them at the point in the parse tree where the Var appears.
This allows us to trust that equal() Vars represent the same value.
A certain amount of klugery is still needed to cope with cases
where we re-order two outer joins, but it's possible to make it
work without sacrificing that core principle. PlaceHolderVars
receive similar decoration for the same reason.
In the planner, we include these outer join bitmapsets into the relids
that an expression is considered to depend on, and in consequence also
add outer-join relids to the relids of join RelOptInfos. This allows
us to correctly perceive whether an expression can be calculated above
or below a particular outer join.
This change affects FDWs that want to plan foreign joins. They *must*
follow suit when labeling foreign joins in order to match with the
core planner, but for many purposes (if postgres_fdw is any guide)
they'd prefer to consider only base relations within the join.
To support both requirements, redefine ForeignScan.fs_relids as
base+OJ relids, and add a new field fs_base_relids that's set up by
the core planner.
Large though it is, this commit just does the minimum necessary to
install the new mechanisms and get check-world passing again.
Follow-up patches will perform some cleanup. (The README additions
and comments mention some stuff that will appear in the follow-up.)
Patch by me; thanks to Richard Guo for review.
Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us
3 years ago
|
|
|
phinfo->ph_needed = bms_del_member(phinfo->ph_needed, ojrelid);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Remove any joinquals referencing the rel from the joininfo lists.
|
|
|
|
*
|
|
|
|
* In some cases, a joinqual has to be put back after deleting its
|
|
|
|
* reference to the target rel. This can occur for pseudoconstant and
|
|
|
|
* outerjoin-delayed quals, which can get marked as requiring the rel in
|
|
|
|
* order to force them to be evaluated at or above the join. We can't
|
|
|
|
* just discard them, though. Only quals that logically belonged to the
|
|
|
|
* outer join being discarded should be removed from the query.
|
|
|
|
*
|
|
|
|
* We must make a copy of the rel's old joininfo list before starting the
|
|
|
|
* loop, because otherwise remove_join_clause_from_rels would destroy the
|
|
|
|
* list while we're scanning it.
|
|
|
|
*/
|
|
|
|
joininfos = list_copy(rel->joininfo);
|
|
|
|
foreach(l, joininfos)
|
|
|
|
{
|
|
|
|
RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
|
|
|
|
|
|
|
|
remove_join_clause_from_rels(root, rinfo, rinfo->required_relids);
|
|
|
|
|
Make Vars be outer-join-aware.
Traditionally we used the same Var struct to represent the value
of a table column everywhere in parse and plan trees. This choice
predates our support for SQL outer joins, and it's really a pretty
bad idea with outer joins, because the Var's value can depend on
where it is in the tree: it might go to NULL above an outer join.
So expression nodes that are equal() per equalfuncs.c might not
represent the same value, which is a huge correctness hazard for
the planner.
To improve this, decorate Var nodes with a bitmapset showing
which outer joins (identified by RTE indexes) may have nulled
them at the point in the parse tree where the Var appears.
This allows us to trust that equal() Vars represent the same value.
A certain amount of klugery is still needed to cope with cases
where we re-order two outer joins, but it's possible to make it
work without sacrificing that core principle. PlaceHolderVars
receive similar decoration for the same reason.
In the planner, we include these outer join bitmapsets into the relids
that an expression is considered to depend on, and in consequence also
add outer-join relids to the relids of join RelOptInfos. This allows
us to correctly perceive whether an expression can be calculated above
or below a particular outer join.
This change affects FDWs that want to plan foreign joins. They *must*
follow suit when labeling foreign joins in order to match with the
core planner, but for many purposes (if postgres_fdw is any guide)
they'd prefer to consider only base relations within the join.
To support both requirements, redefine ForeignScan.fs_relids as
base+OJ relids, and add a new field fs_base_relids that's set up by
the core planner.
Large though it is, this commit just does the minimum necessary to
install the new mechanisms and get check-world passing again.
Follow-up patches will perform some cleanup. (The README additions
and comments mention some stuff that will appear in the follow-up.)
Patch by me; thanks to Richard Guo for review.
Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us
3 years ago
|
|
|
/*
|
|
|
|
* If the qual lists ojrelid in its required_relids, it must have come
|
|
|
|
* from above the outer join we're removing (so we need to keep it);
|
|
|
|
* if it does not, then it didn't and we can discard it.
|
|
|
|
*/
|
|
|
|
if (bms_is_member(ojrelid, rinfo->required_relids))
|
|
|
|
{
|
|
|
|
/* Recheck that qual doesn't actually reference the target rel */
|
|
|
|
Assert(!bms_is_member(relid, rinfo->clause_relids));
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The required_relids probably aren't shared with anything else,
|
|
|
|
* but let's copy them just to be sure.
|
|
|
|
*/
|
|
|
|
rinfo->required_relids = bms_copy(rinfo->required_relids);
|
|
|
|
rinfo->required_relids = bms_del_member(rinfo->required_relids,
|
|
|
|
relid);
|
Make Vars be outer-join-aware.
Traditionally we used the same Var struct to represent the value
of a table column everywhere in parse and plan trees. This choice
predates our support for SQL outer joins, and it's really a pretty
bad idea with outer joins, because the Var's value can depend on
where it is in the tree: it might go to NULL above an outer join.
So expression nodes that are equal() per equalfuncs.c might not
represent the same value, which is a huge correctness hazard for
the planner.
To improve this, decorate Var nodes with a bitmapset showing
which outer joins (identified by RTE indexes) may have nulled
them at the point in the parse tree where the Var appears.
This allows us to trust that equal() Vars represent the same value.
A certain amount of klugery is still needed to cope with cases
where we re-order two outer joins, but it's possible to make it
work without sacrificing that core principle. PlaceHolderVars
receive similar decoration for the same reason.
In the planner, we include these outer join bitmapsets into the relids
that an expression is considered to depend on, and in consequence also
add outer-join relids to the relids of join RelOptInfos. This allows
us to correctly perceive whether an expression can be calculated above
or below a particular outer join.
This change affects FDWs that want to plan foreign joins. They *must*
follow suit when labeling foreign joins in order to match with the
core planner, but for many purposes (if postgres_fdw is any guide)
they'd prefer to consider only base relations within the join.
To support both requirements, redefine ForeignScan.fs_relids as
base+OJ relids, and add a new field fs_base_relids that's set up by
the core planner.
Large though it is, this commit just does the minimum necessary to
install the new mechanisms and get check-world passing again.
Follow-up patches will perform some cleanup. (The README additions
and comments mention some stuff that will appear in the follow-up.)
Patch by me; thanks to Richard Guo for review.
Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us
3 years ago
|
|
|
rinfo->required_relids = bms_del_member(rinfo->required_relids,
|
|
|
|
ojrelid);
|
|
|
|
distribute_restrictinfo_to_rels(root, rinfo);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* There may be references to the rel in root->fkey_list, but if so,
|
|
|
|
* match_foreign_keys_to_quals() will get rid of them.
|
|
|
|
*/
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Remove any occurrences of the target relid from a joinlist structure.
|
|
|
|
*
|
|
|
|
* It's easiest to build a whole new list structure, so we handle it that
|
|
|
|
* way. Efficiency is not a big deal here.
|
|
|
|
*
|
|
|
|
* *nremoved is incremented by the number of occurrences removed (there
|
|
|
|
* should be exactly one, but the caller checks that).
|
|
|
|
*/
|
|
|
|
static List *
|
|
|
|
remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved)
|
|
|
|
{
|
|
|
|
List *result = NIL;
|
|
|
|
ListCell *jl;
|
|
|
|
|
|
|
|
foreach(jl, joinlist)
|
|
|
|
{
|
|
|
|
Node *jlnode = (Node *) lfirst(jl);
|
|
|
|
|
|
|
|
if (IsA(jlnode, RangeTblRef))
|
|
|
|
{
|
|
|
|
int varno = ((RangeTblRef *) jlnode)->rtindex;
|
|
|
|
|
|
|
|
if (varno == relid)
|
|
|
|
(*nremoved)++;
|
|
|
|
else
|
|
|
|
result = lappend(result, jlnode);
|
|
|
|
}
|
|
|
|
else if (IsA(jlnode, List))
|
|
|
|
{
|
|
|
|
/* Recurse to handle subproblem */
|
|
|
|
List *sublist;
|
|
|
|
|
|
|
|
sublist = remove_rel_from_joinlist((List *) jlnode,
|
|
|
|
relid, nremoved);
|
|
|
|
/* Avoid including empty sub-lists in the result */
|
|
|
|
if (sublist)
|
|
|
|
result = lappend(result, sublist);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
elog(ERROR, "unrecognized joinlist node type: %d",
|
|
|
|
(int) nodeTag(jlnode));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* reduce_unique_semijoins
|
|
|
|
* Check for semijoins that can be simplified to plain inner joins
|
|
|
|
* because the inner relation is provably unique for the join clauses.
|
|
|
|
*
|
|
|
|
* Ideally this would happen during reduce_outer_joins, but we don't have
|
|
|
|
* enough information at that point.
|
|
|
|
*
|
|
|
|
* To perform the strength reduction when applicable, we need only delete
|
|
|
|
* the semijoin's SpecialJoinInfo from root->join_info_list. (We don't
|
|
|
|
* bother fixing the join type attributed to it in the query jointree,
|
|
|
|
* since that won't be consulted again.)
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
reduce_unique_semijoins(PlannerInfo *root)
|
|
|
|
{
|
|
|
|
ListCell *lc;
|
|
|
|
|
|
|
|
/*
|
Represent Lists as expansible arrays, not chains of cons-cells.
Originally, Postgres Lists were a more or less exact reimplementation of
Lisp lists, which consist of chains of separately-allocated cons cells,
each having a value and a next-cell link. We'd hacked that once before
(commit d0b4399d8) to add a separate List header, but the data was still
in cons cells. That makes some operations -- notably list_nth() -- O(N),
and it's bulky because of the next-cell pointers and per-cell palloc
overhead, and it's very cache-unfriendly if the cons cells end up
scattered around rather than being adjacent.
In this rewrite, we still have List headers, but the data is in a
resizable array of values, with no next-cell links. Now we need at
most two palloc's per List, and often only one, since we can allocate
some values in the same palloc call as the List header. (Of course,
extending an existing List may require repalloc's to enlarge the array.
But this involves just O(log N) allocations not O(N).)
Of course this is not without downsides. The key difficulty is that
addition or deletion of a list entry may now cause other entries to
move, which it did not before.
For example, that breaks foreach() and sister macros, which historically
used a pointer to the current cons-cell as loop state. We can repair
those macros transparently by making their actual loop state be an
integer list index; the exposed "ListCell *" pointer is no longer state
carried across loop iterations, but is just a derived value. (In
practice, modern compilers can optimize things back to having just one
loop state value, at least for simple cases with inline loop bodies.)
In principle, this is a semantics change for cases where the loop body
inserts or deletes list entries ahead of the current loop index; but
I found no such cases in the Postgres code.
The change is not at all transparent for code that doesn't use foreach()
but chases lists "by hand" using lnext(). The largest share of such
code in the backend is in loops that were maintaining "prev" and "next"
variables in addition to the current-cell pointer, in order to delete
list cells efficiently using list_delete_cell(). However, we no longer
need a previous-cell pointer to delete a list cell efficiently. Keeping
a next-cell pointer doesn't work, as explained above, but we can improve
matters by changing such code to use a regular foreach() loop and then
using the new macro foreach_delete_current() to delete the current cell.
(This macro knows how to update the associated foreach loop's state so
that no cells will be missed in the traversal.)
There remains a nontrivial risk of code assuming that a ListCell *
pointer will remain good over an operation that could now move the list
contents. To help catch such errors, list.c can be compiled with a new
define symbol DEBUG_LIST_MEMORY_USAGE that forcibly moves list contents
whenever that could possibly happen. This makes list operations
significantly more expensive so it's not normally turned on (though it
is on by default if USE_VALGRIND is on).
There are two notable API differences from the previous code:
* lnext() now requires the List's header pointer in addition to the
current cell's address.
* list_delete_cell() no longer requires a previous-cell argument.
These changes are somewhat unfortunate, but on the other hand code using
either function needs inspection to see if it is assuming anything
it shouldn't, so it's not all bad.
Programmers should be aware of these significant performance changes:
* list_nth() and related functions are now O(1); so there's no
major access-speed difference between a list and an array.
* Inserting or deleting a list element now takes time proportional to
the distance to the end of the list, due to moving the array elements.
(However, it typically *doesn't* require palloc or pfree, so except in
long lists it's probably still faster than before.) Notably, lcons()
used to be about the same cost as lappend(), but that's no longer true
if the list is long. Code that uses lcons() and list_delete_first()
to maintain a stack might usefully be rewritten to push and pop at the
end of the list rather than the beginning.
* There are now list_insert_nth...() and list_delete_nth...() functions
that add or remove a list cell identified by index. These have the
data-movement penalty explained above, but there's no search penalty.
* list_concat() and variants now copy the second list's data into
storage belonging to the first list, so there is no longer any
sharing of cells between the input lists. The second argument is
now declared "const List *" to reflect that it isn't changed.
This patch just does the minimum needed to get the new implementation
in place and fix bugs exposed by the regression tests. As suggested
by the foregoing, there's a fair amount of followup work remaining to
do.
Also, the ENABLE_LIST_COMPAT macros are finally removed in this
commit. Code using those should have been gone a dozen years ago.
Patch by me; thanks to David Rowley, Jesper Pedersen, and others
for review.
Discussion: https://postgr.es/m/11587.1550975080@sss.pgh.pa.us
6 years ago
|
|
|
* Scan the join_info_list to find semijoins.
|
|
|
|
*/
|
Represent Lists as expansible arrays, not chains of cons-cells.
Originally, Postgres Lists were a more or less exact reimplementation of
Lisp lists, which consist of chains of separately-allocated cons cells,
each having a value and a next-cell link. We'd hacked that once before
(commit d0b4399d8) to add a separate List header, but the data was still
in cons cells. That makes some operations -- notably list_nth() -- O(N),
and it's bulky because of the next-cell pointers and per-cell palloc
overhead, and it's very cache-unfriendly if the cons cells end up
scattered around rather than being adjacent.
In this rewrite, we still have List headers, but the data is in a
resizable array of values, with no next-cell links. Now we need at
most two palloc's per List, and often only one, since we can allocate
some values in the same palloc call as the List header. (Of course,
extending an existing List may require repalloc's to enlarge the array.
But this involves just O(log N) allocations not O(N).)
Of course this is not without downsides. The key difficulty is that
addition or deletion of a list entry may now cause other entries to
move, which it did not before.
For example, that breaks foreach() and sister macros, which historically
used a pointer to the current cons-cell as loop state. We can repair
those macros transparently by making their actual loop state be an
integer list index; the exposed "ListCell *" pointer is no longer state
carried across loop iterations, but is just a derived value. (In
practice, modern compilers can optimize things back to having just one
loop state value, at least for simple cases with inline loop bodies.)
In principle, this is a semantics change for cases where the loop body
inserts or deletes list entries ahead of the current loop index; but
I found no such cases in the Postgres code.
The change is not at all transparent for code that doesn't use foreach()
but chases lists "by hand" using lnext(). The largest share of such
code in the backend is in loops that were maintaining "prev" and "next"
variables in addition to the current-cell pointer, in order to delete
list cells efficiently using list_delete_cell(). However, we no longer
need a previous-cell pointer to delete a list cell efficiently. Keeping
a next-cell pointer doesn't work, as explained above, but we can improve
matters by changing such code to use a regular foreach() loop and then
using the new macro foreach_delete_current() to delete the current cell.
(This macro knows how to update the associated foreach loop's state so
that no cells will be missed in the traversal.)
There remains a nontrivial risk of code assuming that a ListCell *
pointer will remain good over an operation that could now move the list
contents. To help catch such errors, list.c can be compiled with a new
define symbol DEBUG_LIST_MEMORY_USAGE that forcibly moves list contents
whenever that could possibly happen. This makes list operations
significantly more expensive so it's not normally turned on (though it
is on by default if USE_VALGRIND is on).
There are two notable API differences from the previous code:
* lnext() now requires the List's header pointer in addition to the
current cell's address.
* list_delete_cell() no longer requires a previous-cell argument.
These changes are somewhat unfortunate, but on the other hand code using
either function needs inspection to see if it is assuming anything
it shouldn't, so it's not all bad.
Programmers should be aware of these significant performance changes:
* list_nth() and related functions are now O(1); so there's no
major access-speed difference between a list and an array.
* Inserting or deleting a list element now takes time proportional to
the distance to the end of the list, due to moving the array elements.
(However, it typically *doesn't* require palloc or pfree, so except in
long lists it's probably still faster than before.) Notably, lcons()
used to be about the same cost as lappend(), but that's no longer true
if the list is long. Code that uses lcons() and list_delete_first()
to maintain a stack might usefully be rewritten to push and pop at the
end of the list rather than the beginning.
* There are now list_insert_nth...() and list_delete_nth...() functions
that add or remove a list cell identified by index. These have the
data-movement penalty explained above, but there's no search penalty.
* list_concat() and variants now copy the second list's data into
storage belonging to the first list, so there is no longer any
sharing of cells between the input lists. The second argument is
now declared "const List *" to reflect that it isn't changed.
This patch just does the minimum needed to get the new implementation
in place and fix bugs exposed by the regression tests. As suggested
by the foregoing, there's a fair amount of followup work remaining to
do.
Also, the ENABLE_LIST_COMPAT macros are finally removed in this
commit. Code using those should have been gone a dozen years ago.
Patch by me; thanks to David Rowley, Jesper Pedersen, and others
for review.
Discussion: https://postgr.es/m/11587.1550975080@sss.pgh.pa.us
6 years ago
|
|
|
foreach(lc, root->join_info_list)
|
|
|
|
{
|
|
|
|
SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
|
|
|
|
int innerrelid;
|
|
|
|
RelOptInfo *innerrel;
|
|
|
|
Relids joinrelids;
|
|
|
|
List *restrictlist;
|
|
|
|
|
|
|
|
/*
|
Do assorted mop-up in the planner.
Remove RestrictInfo.nullable_relids, along with a good deal of
infrastructure that calculated it. One use-case for it was in
join_clause_is_movable_to, but we can now replace that usage with
a check to see if the clause's relids include any outer join
that can null the target relation. The other use-case was in
join_clause_is_movable_into, but that test can just be dropped
entirely now that the clause's relids include outer joins.
Furthermore, join_clause_is_movable_into should now be
accurate enough that it will accept anything returned by
generate_join_implied_equalities, so we can restore the Assert
that was diked out in commit 95f4e59c3.
Remove the outerjoin_delayed mechanism. We needed this before to
prevent quals from getting evaluated below outer joins that should
null some of their vars. Now that we consider varnullingrels while
placing quals, that's taken care of automatically, so throw the
whole thing away.
Teach remove_useless_result_rtes to also remove useless FromExprs.
Having done that, the delay_upper_joins flag serves no purpose any
more and we can remove it, largely reverting 11086f2f2.
Use constant TRUE for "dummy" clauses when throwing back outer joins.
This improves on a hack I introduced in commit 6a6522529. If we
have a left-join clause l.x = r.y, and a WHERE clause l.x = constant,
we generate r.y = constant and then don't really have a need for the
join clause. But we must throw the join clause back anyway after
marking it redundant, so that the join search heuristics won't think
this is a clauseless join and avoid it. That was a kluge introduced
under time pressure, and after looking at it I thought of a better
way: let's just introduce constant-TRUE "join clauses" instead,
and get rid of them at the end. This improves the generated plans for
such cases by not having to test a redundant join clause. We can also
get rid of the ugly hack used to mark such clauses as redundant for
selectivity estimation.
Patch by me; thanks to Richard Guo for review.
Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us
3 years ago
|
|
|
* Must be a semijoin to a single baserel, else we aren't going to be
|
|
|
|
* able to do anything with it.
|
|
|
|
*/
|
Do assorted mop-up in the planner.
Remove RestrictInfo.nullable_relids, along with a good deal of
infrastructure that calculated it. One use-case for it was in
join_clause_is_movable_to, but we can now replace that usage with
a check to see if the clause's relids include any outer join
that can null the target relation. The other use-case was in
join_clause_is_movable_into, but that test can just be dropped
entirely now that the clause's relids include outer joins.
Furthermore, join_clause_is_movable_into should now be
accurate enough that it will accept anything returned by
generate_join_implied_equalities, so we can restore the Assert
that was diked out in commit 95f4e59c3.
Remove the outerjoin_delayed mechanism. We needed this before to
prevent quals from getting evaluated below outer joins that should
null some of their vars. Now that we consider varnullingrels while
placing quals, that's taken care of automatically, so throw the
whole thing away.
Teach remove_useless_result_rtes to also remove useless FromExprs.
Having done that, the delay_upper_joins flag serves no purpose any
more and we can remove it, largely reverting 11086f2f2.
Use constant TRUE for "dummy" clauses when throwing back outer joins.
This improves on a hack I introduced in commit 6a6522529. If we
have a left-join clause l.x = r.y, and a WHERE clause l.x = constant,
we generate r.y = constant and then don't really have a need for the
join clause. But we must throw the join clause back anyway after
marking it redundant, so that the join search heuristics won't think
this is a clauseless join and avoid it. That was a kluge introduced
under time pressure, and after looking at it I thought of a better
way: let's just introduce constant-TRUE "join clauses" instead,
and get rid of them at the end. This improves the generated plans for
such cases by not having to test a redundant join clause. We can also
get rid of the ugly hack used to mark such clauses as redundant for
selectivity estimation.
Patch by me; thanks to Richard Guo for review.
Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us
3 years ago
|
|
|
if (sjinfo->jointype != JOIN_SEMI)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (!bms_get_singleton_member(sjinfo->min_righthand, &innerrelid))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
innerrel = find_base_rel(root, innerrelid);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Before we trouble to run generate_join_implied_equalities, make a
|
|
|
|
* quick check to eliminate cases in which we will surely be unable to
|
|
|
|
* prove uniqueness of the innerrel.
|
|
|
|
*/
|
|
|
|
if (!rel_supports_distinctness(root, innerrel))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
/* Compute the relid set for the join we are considering */
|
|
|
|
joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
|
Make Vars be outer-join-aware.
Traditionally we used the same Var struct to represent the value
of a table column everywhere in parse and plan trees. This choice
predates our support for SQL outer joins, and it's really a pretty
bad idea with outer joins, because the Var's value can depend on
where it is in the tree: it might go to NULL above an outer join.
So expression nodes that are equal() per equalfuncs.c might not
represent the same value, which is a huge correctness hazard for
the planner.
To improve this, decorate Var nodes with a bitmapset showing
which outer joins (identified by RTE indexes) may have nulled
them at the point in the parse tree where the Var appears.
This allows us to trust that equal() Vars represent the same value.
A certain amount of klugery is still needed to cope with cases
where we re-order two outer joins, but it's possible to make it
work without sacrificing that core principle. PlaceHolderVars
receive similar decoration for the same reason.
In the planner, we include these outer join bitmapsets into the relids
that an expression is considered to depend on, and in consequence also
add outer-join relids to the relids of join RelOptInfos. This allows
us to correctly perceive whether an expression can be calculated above
or below a particular outer join.
This change affects FDWs that want to plan foreign joins. They *must*
follow suit when labeling foreign joins in order to match with the
core planner, but for many purposes (if postgres_fdw is any guide)
they'd prefer to consider only base relations within the join.
To support both requirements, redefine ForeignScan.fs_relids as
base+OJ relids, and add a new field fs_base_relids that's set up by
the core planner.
Large though it is, this commit just does the minimum necessary to
install the new mechanisms and get check-world passing again.
Follow-up patches will perform some cleanup. (The README additions
and comments mention some stuff that will appear in the follow-up.)
Patch by me; thanks to Richard Guo for review.
Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us
3 years ago
|
|
|
Assert(sjinfo->ojrelid == 0); /* SEMI joins don't have RT indexes */
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Since we're only considering a single-rel RHS, any join clauses it
|
|
|
|
* has must be clauses linking it to the semijoin's min_lefthand. We
|
|
|
|
* can also consider EC-derived join clauses.
|
|
|
|
*/
|
|
|
|
restrictlist =
|
|
|
|
list_concat(generate_join_implied_equalities(root,
|
|
|
|
joinrelids,
|
|
|
|
sjinfo->min_lefthand,
|
|
|
|
innerrel),
|
|
|
|
innerrel->joininfo);
|
|
|
|
|
|
|
|
/* Test whether the innerrel is unique for those clauses. */
|
|
|
|
if (!innerrel_is_unique(root,
|
|
|
|
joinrelids, sjinfo->min_lefthand, innerrel,
|
|
|
|
JOIN_SEMI, restrictlist, true))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
/* OK, remove the SpecialJoinInfo from the list. */
|
Represent Lists as expansible arrays, not chains of cons-cells.
Originally, Postgres Lists were a more or less exact reimplementation of
Lisp lists, which consist of chains of separately-allocated cons cells,
each having a value and a next-cell link. We'd hacked that once before
(commit d0b4399d8) to add a separate List header, but the data was still
in cons cells. That makes some operations -- notably list_nth() -- O(N),
and it's bulky because of the next-cell pointers and per-cell palloc
overhead, and it's very cache-unfriendly if the cons cells end up
scattered around rather than being adjacent.
In this rewrite, we still have List headers, but the data is in a
resizable array of values, with no next-cell links. Now we need at
most two palloc's per List, and often only one, since we can allocate
some values in the same palloc call as the List header. (Of course,
extending an existing List may require repalloc's to enlarge the array.
But this involves just O(log N) allocations not O(N).)
Of course this is not without downsides. The key difficulty is that
addition or deletion of a list entry may now cause other entries to
move, which it did not before.
For example, that breaks foreach() and sister macros, which historically
used a pointer to the current cons-cell as loop state. We can repair
those macros transparently by making their actual loop state be an
integer list index; the exposed "ListCell *" pointer is no longer state
carried across loop iterations, but is just a derived value. (In
practice, modern compilers can optimize things back to having just one
loop state value, at least for simple cases with inline loop bodies.)
In principle, this is a semantics change for cases where the loop body
inserts or deletes list entries ahead of the current loop index; but
I found no such cases in the Postgres code.
The change is not at all transparent for code that doesn't use foreach()
but chases lists "by hand" using lnext(). The largest share of such
code in the backend is in loops that were maintaining "prev" and "next"
variables in addition to the current-cell pointer, in order to delete
list cells efficiently using list_delete_cell(). However, we no longer
need a previous-cell pointer to delete a list cell efficiently. Keeping
a next-cell pointer doesn't work, as explained above, but we can improve
matters by changing such code to use a regular foreach() loop and then
using the new macro foreach_delete_current() to delete the current cell.
(This macro knows how to update the associated foreach loop's state so
that no cells will be missed in the traversal.)
There remains a nontrivial risk of code assuming that a ListCell *
pointer will remain good over an operation that could now move the list
contents. To help catch such errors, list.c can be compiled with a new
define symbol DEBUG_LIST_MEMORY_USAGE that forcibly moves list contents
whenever that could possibly happen. This makes list operations
significantly more expensive so it's not normally turned on (though it
is on by default if USE_VALGRIND is on).
There are two notable API differences from the previous code:
* lnext() now requires the List's header pointer in addition to the
current cell's address.
* list_delete_cell() no longer requires a previous-cell argument.
These changes are somewhat unfortunate, but on the other hand code using
either function needs inspection to see if it is assuming anything
it shouldn't, so it's not all bad.
Programmers should be aware of these significant performance changes:
* list_nth() and related functions are now O(1); so there's no
major access-speed difference between a list and an array.
* Inserting or deleting a list element now takes time proportional to
the distance to the end of the list, due to moving the array elements.
(However, it typically *doesn't* require palloc or pfree, so except in
long lists it's probably still faster than before.) Notably, lcons()
used to be about the same cost as lappend(), but that's no longer true
if the list is long. Code that uses lcons() and list_delete_first()
to maintain a stack might usefully be rewritten to push and pop at the
end of the list rather than the beginning.
* There are now list_insert_nth...() and list_delete_nth...() functions
that add or remove a list cell identified by index. These have the
data-movement penalty explained above, but there's no search penalty.
* list_concat() and variants now copy the second list's data into
storage belonging to the first list, so there is no longer any
sharing of cells between the input lists. The second argument is
now declared "const List *" to reflect that it isn't changed.
This patch just does the minimum needed to get the new implementation
in place and fix bugs exposed by the regression tests. As suggested
by the foregoing, there's a fair amount of followup work remaining to
do.
Also, the ENABLE_LIST_COMPAT macros are finally removed in this
commit. Code using those should have been gone a dozen years ago.
Patch by me; thanks to David Rowley, Jesper Pedersen, and others
for review.
Discussion: https://postgr.es/m/11587.1550975080@sss.pgh.pa.us
6 years ago
|
|
|
root->join_info_list = foreach_delete_current(root->join_info_list, lc);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* rel_supports_distinctness
|
|
|
|
* Could the relation possibly be proven distinct on some set of columns?
|
|
|
|
*
|
|
|
|
* This is effectively a pre-checking function for rel_is_distinct_for().
|
|
|
|
* It must return true if rel_is_distinct_for() could possibly return true
|
|
|
|
* with this rel, but it should not expend a lot of cycles. The idea is
|
|
|
|
* that callers can avoid doing possibly-expensive processing to compute
|
|
|
|
* rel_is_distinct_for()'s argument lists if the call could not possibly
|
|
|
|
* succeed.
|
|
|
|
*/
|
|
|
|
static bool
|
|
|
|
rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel)
|
|
|
|
{
|
|
|
|
/* We only know about baserels ... */
|
|
|
|
if (rel->reloptkind != RELOPT_BASEREL)
|
|
|
|
return false;
|
|
|
|
if (rel->rtekind == RTE_RELATION)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* For a plain relation, we only know how to prove uniqueness by
|
|
|
|
* reference to unique indexes. Make sure there's at least one
|
|
|
|
* suitable unique index. It must be immediately enforced, and if
|
|
|
|
* it's a partial index, it must match the query. (Keep these
|
|
|
|
* conditions in sync with relation_has_unique_index_for!)
|
|
|
|
*/
|
|
|
|
ListCell *lc;
|
|
|
|
|
|
|
|
foreach(lc, rel->indexlist)
|
|
|
|
{
|
|
|
|
IndexOptInfo *ind = (IndexOptInfo *) lfirst(lc);
|
|
|
|
|
|
|
|
if (ind->unique && ind->immediate &&
|
|
|
|
(ind->indpred == NIL || ind->predOK))
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else if (rel->rtekind == RTE_SUBQUERY)
|
|
|
|
{
|
|
|
|
Query *subquery = root->simple_rte_array[rel->relid]->subquery;
|
|
|
|
|
|
|
|
/* Check if the subquery has any qualities that support distinctness */
|
|
|
|
if (query_supports_distinctness(subquery))
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
/* We have no proof rules for any other rtekinds. */
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* rel_is_distinct_for
|
|
|
|
* Does the relation return only distinct rows according to clause_list?
|
|
|
|
*
|
|
|
|
* clause_list is a list of join restriction clauses involving this rel and
|
|
|
|
* some other one. Return true if no two rows emitted by this rel could
|
|
|
|
* possibly join to the same row of the other rel.
|
|
|
|
*
|
|
|
|
* The caller must have already determined that each condition is a
|
|
|
|
* mergejoinable equality with an expression in this relation on one side, and
|
|
|
|
* an expression not involving this relation on the other. The transient
|
|
|
|
* outer_is_left flag is used to identify which side references this relation:
|
|
|
|
* left side if outer_is_left is false, right side if it is true.
|
|
|
|
*
|
|
|
|
* Note that the passed-in clause_list may be destructively modified! This
|
|
|
|
* is OK for current uses, because the clause_list is built by the caller for
|
|
|
|
* the sole purpose of passing to this function.
|
|
|
|
*/
|
|
|
|
static bool
|
|
|
|
rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *clause_list)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* We could skip a couple of tests here if we assume all callers checked
|
|
|
|
* rel_supports_distinctness first, but it doesn't seem worth taking any
|
|
|
|
* risk for.
|
|
|
|
*/
|
|
|
|
if (rel->reloptkind != RELOPT_BASEREL)
|
|
|
|
return false;
|
|
|
|
if (rel->rtekind == RTE_RELATION)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Examine the indexes to see if we have a matching unique index.
|
|
|
|
* relation_has_unique_index_for automatically adds any usable
|
|
|
|
* restriction clauses for the rel, so we needn't do that here.
|
|
|
|
*/
|
|
|
|
if (relation_has_unique_index_for(root, rel, clause_list, NIL, NIL))
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
else if (rel->rtekind == RTE_SUBQUERY)
|
|
|
|
{
|
|
|
|
Index relid = rel->relid;
|
|
|
|
Query *subquery = root->simple_rte_array[relid]->subquery;
|
|
|
|
List *colnos = NIL;
|
|
|
|
List *opids = NIL;
|
|
|
|
ListCell *l;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Build the argument lists for query_is_distinct_for: a list of
|
|
|
|
* output column numbers that the query needs to be distinct over, and
|
|
|
|
* a list of equality operators that the output columns need to be
|
|
|
|
* distinct according to.
|
|
|
|
*
|
|
|
|
* (XXX we are not considering restriction clauses attached to the
|
|
|
|
* subquery; is that worth doing?)
|
|
|
|
*/
|
|
|
|
foreach(l, clause_list)
|
|
|
|
{
|
Improve castNode notation by introducing list-extraction-specific variants.
This extends the castNode() notation introduced by commit 5bcab1114 to
provide, in one step, extraction of a list cell's pointer and coercion to
a concrete node type. For example, "lfirst_node(Foo, lc)" is the same
as "castNode(Foo, lfirst(lc))". Almost half of the uses of castNode
that have appeared so far include a list extraction call, so this is
pretty widely useful, and it saves a few more keystrokes compared to the
old way.
As with the previous patch, back-patch the addition of these macros to
pg_list.h, so that the notation will be available when back-patching.
Patch by me, after an idea of Andrew Gierth's.
Discussion: https://postgr.es/m/14197.1491841216@sss.pgh.pa.us
9 years ago
|
|
|
RestrictInfo *rinfo = lfirst_node(RestrictInfo, l);
|
|
|
|
Oid op;
|
|
|
|
Var *var;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Get the equality operator we need uniqueness according to.
|
|
|
|
* (This might be a cross-type operator and thus not exactly the
|
|
|
|
* same operator the subquery would consider; that's all right
|
|
|
|
* since query_is_distinct_for can resolve such cases.) The
|
|
|
|
* caller's mergejoinability test should have selected only
|
|
|
|
* OpExprs.
|
|
|
|
*/
|
|
|
|
op = castNode(OpExpr, rinfo->clause)->opno;
|
|
|
|
|
|
|
|
/* caller identified the inner side for us */
|
|
|
|
if (rinfo->outer_is_left)
|
|
|
|
var = (Var *) get_rightop(rinfo->clause);
|
|
|
|
else
|
|
|
|
var = (Var *) get_leftop(rinfo->clause);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We may ignore any RelabelType node above the operand. (There
|
|
|
|
* won't be more than one, since eval_const_expressions() has been
|
|
|
|
* applied already.)
|
|
|
|
*/
|
|
|
|
if (var && IsA(var, RelabelType))
|
|
|
|
var = (Var *) ((RelabelType *) var)->arg;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If inner side isn't a Var referencing a subquery output column,
|
|
|
|
* this clause doesn't help us.
|
|
|
|
*/
|
|
|
|
if (!var || !IsA(var, Var) ||
|
|
|
|
var->varno != relid || var->varlevelsup != 0)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
colnos = lappend_int(colnos, var->varattno);
|
|
|
|
opids = lappend_oid(opids, op);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (query_is_distinct_for(subquery, colnos, opids))
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* query_supports_distinctness - could the query possibly be proven distinct
|
|
|
|
* on some set of output columns?
|
|
|
|
*
|
|
|
|
* This is effectively a pre-checking function for query_is_distinct_for().
|
|
|
|
* It must return true if query_is_distinct_for() could possibly return true
|
|
|
|
* with this query, but it should not expend a lot of cycles. The idea is
|
|
|
|
* that callers can avoid doing possibly-expensive processing to compute
|
|
|
|
* query_is_distinct_for()'s argument lists if the call could not possibly
|
|
|
|
* succeed.
|
|
|
|
*/
|
|
|
|
bool
|
|
|
|
query_supports_distinctness(Query *query)
|
|
|
|
{
|
Improve planner's handling of set-returning functions in grouping columns.
Improve query_is_distinct_for() to accept SRFs in the targetlist when
we can prove distinctness from a DISTINCT clause. In that case the
de-duplication will surely happen after SRF expansion, so the proof
still works. Continue to punt in the case where we'd try to prove
distinctness from GROUP BY (or, in the future, source relations).
To do that, we'd have to determine whether the SRFs were in the
grouping columns or elsewhere in the tlist, and it still doesn't
seem worth the trouble. But this trivial change allows us to
recognize that "SELECT DISTINCT unnest(foo) FROM ..." produces
unique-ified output, which seems worth having.
Also, fix estimate_num_groups() to consider the possibility of SRFs in
the grouping columns. Its failure to do so was masked before v10 because
grouping_planner() scaled up plan rowcount estimates by the estimated SRF
multiplier after performing grouping. That doesn't happen anymore, which
is more correct, but it means we need an adjustment in the estimate for
the number of groups. Failure to do this leads to an underestimate for
the number of output rows of subqueries like "SELECT DISTINCT unnest(foo)"
compared to what 9.6 and earlier estimated, thus breaking plan choices
in some cases.
Per report from Dmitry Shalashov. Back-patch to v10 to avoid degraded
plan choices compared to previous releases.
Discussion: https://postgr.es/m/CAKPeCUGAeHgoh5O=SvcQxREVkoX7UdeJUMj1F5=aBNvoTa+O8w@mail.gmail.com
8 years ago
|
|
|
/* SRFs break distinctness except with DISTINCT, see below */
|
|
|
|
if (query->hasTargetSRFs && query->distinctClause == NIL)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
/* check for features we can prove distinctness with */
|
|
|
|
if (query->distinctClause != NIL ||
|
|
|
|
query->groupClause != NIL ||
|
Support GROUPING SETS, CUBE and ROLLUP.
This SQL standard functionality allows to aggregate data by different
GROUP BY clauses at once. Each grouping set returns rows with columns
grouped by in other sets set to NULL.
This could previously be achieved by doing each grouping as a separate
query, conjoined by UNION ALLs. Besides being considerably more concise,
grouping sets will in many cases be faster, requiring only one scan over
the underlying data.
The current implementation of grouping sets only supports using sorting
for input. Individual sets that share a sort order are computed in one
pass. If there are sets that don't share a sort order, additional sort &
aggregation steps are performed. These additional passes are sourced by
the previous sort step; thus avoiding repeated scans of the source data.
The code is structured in a way that adding support for purely using
hash aggregation or a mix of hashing and sorting is possible. Sorting
was chosen to be supported first, as it is the most generic method of
implementation.
Instead of, as in an earlier versions of the patch, representing the
chain of sort and aggregation steps as full blown planner and executor
nodes, all but the first sort are performed inside the aggregation node
itself. This avoids the need to do some unusual gymnastics to handle
having to return aggregated and non-aggregated tuples from underlying
nodes, as well as having to shut down underlying nodes early to limit
memory usage. The optimizer still builds Sort/Agg node to describe each
phase, but they're not part of the plan tree, but instead additional
data for the aggregation node. They're a convenient and preexisting way
to describe aggregation and sorting. The first (and possibly only) sort
step is still performed as a separate execution step. That retains
similarity with existing group by plans, makes rescans fairly simple,
avoids very deep plans (leading to slow explains) and easily allows to
avoid the sorting step if the underlying data is sorted by other means.
A somewhat ugly side of this patch is having to deal with a grammar
ambiguity between the new CUBE keyword and the cube extension/functions
named cube (and rollup). To avoid breaking existing deployments of the
cube extension it has not been renamed, neither has cube been made a
reserved keyword. Instead precedence hacking is used to make GROUP BY
cube(..) refer to the CUBE grouping sets feature, and not the function
cube(). To actually group by a function cube(), unlikely as that might
be, the function name has to be quoted.
Needs a catversion bump because stored rules may change.
Author: Andrew Gierth and Atri Sharma, with contributions from Andres Freund
Reviewed-By: Andres Freund, Noah Misch, Tom Lane, Svenne Krap, Tomas
Vondra, Erik Rijkers, Marti Raudsepp, Pavel Stehule
Discussion: CAOeZVidmVRe2jU6aMk_5qkxnB7dfmPROzM7Ur8JPW5j8Y5X-Lw@mail.gmail.com
10 years ago
|
|
|
query->groupingSets != NIL ||
|
|
|
|
query->hasAggs ||
|
|
|
|
query->havingQual ||
|
|
|
|
query->setOperations)
|
|
|
|
return true;
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* query_is_distinct_for - does query never return duplicates of the
|
|
|
|
* specified columns?
|
|
|
|
*
|
|
|
|
* query is a not-yet-planned subquery (in current usage, it's always from
|
|
|
|
* a subquery RTE, which the planner avoids scribbling on).
|
|
|
|
*
|
|
|
|
* colnos is an integer list of output column numbers (resno's). We are
|
|
|
|
* interested in whether rows consisting of just these columns are certain
|
|
|
|
* to be distinct. "Distinctness" is defined according to whether the
|
|
|
|
* corresponding upper-level equality operators listed in opids would think
|
|
|
|
* the values are distinct. (Note: the opids entries could be cross-type
|
|
|
|
* operators, and thus not exactly the equality operators that the subquery
|
|
|
|
* would use itself. We use equality_ops_are_compatible() to check
|
|
|
|
* compatibility. That looks at btree or hash opfamily membership, and so
|
|
|
|
* should give trustworthy answers for all operators that we might need
|
|
|
|
* to deal with here.)
|
|
|
|
*/
|
|
|
|
bool
|
|
|
|
query_is_distinct_for(Query *query, List *colnos, List *opids)
|
|
|
|
{
|
|
|
|
ListCell *l;
|
|
|
|
Oid opid;
|
|
|
|
|
|
|
|
Assert(list_length(colnos) == list_length(opids));
|
|
|
|
|
|
|
|
/*
|
|
|
|
* DISTINCT (including DISTINCT ON) guarantees uniqueness if all the
|
|
|
|
* columns in the DISTINCT clause appear in colnos and operator semantics
|
Improve planner's handling of set-returning functions in grouping columns.
Improve query_is_distinct_for() to accept SRFs in the targetlist when
we can prove distinctness from a DISTINCT clause. In that case the
de-duplication will surely happen after SRF expansion, so the proof
still works. Continue to punt in the case where we'd try to prove
distinctness from GROUP BY (or, in the future, source relations).
To do that, we'd have to determine whether the SRFs were in the
grouping columns or elsewhere in the tlist, and it still doesn't
seem worth the trouble. But this trivial change allows us to
recognize that "SELECT DISTINCT unnest(foo) FROM ..." produces
unique-ified output, which seems worth having.
Also, fix estimate_num_groups() to consider the possibility of SRFs in
the grouping columns. Its failure to do so was masked before v10 because
grouping_planner() scaled up plan rowcount estimates by the estimated SRF
multiplier after performing grouping. That doesn't happen anymore, which
is more correct, but it means we need an adjustment in the estimate for
the number of groups. Failure to do this leads to an underestimate for
the number of output rows of subqueries like "SELECT DISTINCT unnest(foo)"
compared to what 9.6 and earlier estimated, thus breaking plan choices
in some cases.
Per report from Dmitry Shalashov. Back-patch to v10 to avoid degraded
plan choices compared to previous releases.
Discussion: https://postgr.es/m/CAKPeCUGAeHgoh5O=SvcQxREVkoX7UdeJUMj1F5=aBNvoTa+O8w@mail.gmail.com
8 years ago
|
|
|
* match. This is true even if there are SRFs in the DISTINCT columns or
|
|
|
|
* elsewhere in the tlist.
|
|
|
|
*/
|
|
|
|
if (query->distinctClause)
|
|
|
|
{
|
|
|
|
foreach(l, query->distinctClause)
|
|
|
|
{
|
|
|
|
SortGroupClause *sgc = (SortGroupClause *) lfirst(l);
|
|
|
|
TargetEntry *tle = get_sortgroupclause_tle(sgc,
|
|
|
|
query->targetList);
|
|
|
|
|
|
|
|
opid = distinct_col_search(tle->resno, colnos, opids);
|
|
|
|
if (!OidIsValid(opid) ||
|
|
|
|
!equality_ops_are_compatible(opid, sgc->eqop))
|
|
|
|
break; /* exit early if no match */
|
|
|
|
}
|
|
|
|
if (l == NULL) /* had matches for all? */
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
Improve planner's handling of set-returning functions in grouping columns.
Improve query_is_distinct_for() to accept SRFs in the targetlist when
we can prove distinctness from a DISTINCT clause. In that case the
de-duplication will surely happen after SRF expansion, so the proof
still works. Continue to punt in the case where we'd try to prove
distinctness from GROUP BY (or, in the future, source relations).
To do that, we'd have to determine whether the SRFs were in the
grouping columns or elsewhere in the tlist, and it still doesn't
seem worth the trouble. But this trivial change allows us to
recognize that "SELECT DISTINCT unnest(foo) FROM ..." produces
unique-ified output, which seems worth having.
Also, fix estimate_num_groups() to consider the possibility of SRFs in
the grouping columns. Its failure to do so was masked before v10 because
grouping_planner() scaled up plan rowcount estimates by the estimated SRF
multiplier after performing grouping. That doesn't happen anymore, which
is more correct, but it means we need an adjustment in the estimate for
the number of groups. Failure to do this leads to an underestimate for
the number of output rows of subqueries like "SELECT DISTINCT unnest(foo)"
compared to what 9.6 and earlier estimated, thus breaking plan choices
in some cases.
Per report from Dmitry Shalashov. Back-patch to v10 to avoid degraded
plan choices compared to previous releases.
Discussion: https://postgr.es/m/CAKPeCUGAeHgoh5O=SvcQxREVkoX7UdeJUMj1F5=aBNvoTa+O8w@mail.gmail.com
8 years ago
|
|
|
/*
|
|
|
|
* Otherwise, a set-returning function in the query's targetlist can
|
|
|
|
* result in returning duplicate rows, despite any grouping that might
|
|
|
|
* occur before tlist evaluation. (If all tlist SRFs are within GROUP BY
|
|
|
|
* columns, it would be safe because they'd be expanded before grouping.
|
|
|
|
* But it doesn't currently seem worth the effort to check for that.)
|
|
|
|
*/
|
|
|
|
if (query->hasTargetSRFs)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
/*
|
Support GROUPING SETS, CUBE and ROLLUP.
This SQL standard functionality allows to aggregate data by different
GROUP BY clauses at once. Each grouping set returns rows with columns
grouped by in other sets set to NULL.
This could previously be achieved by doing each grouping as a separate
query, conjoined by UNION ALLs. Besides being considerably more concise,
grouping sets will in many cases be faster, requiring only one scan over
the underlying data.
The current implementation of grouping sets only supports using sorting
for input. Individual sets that share a sort order are computed in one
pass. If there are sets that don't share a sort order, additional sort &
aggregation steps are performed. These additional passes are sourced by
the previous sort step; thus avoiding repeated scans of the source data.
The code is structured in a way that adding support for purely using
hash aggregation or a mix of hashing and sorting is possible. Sorting
was chosen to be supported first, as it is the most generic method of
implementation.
Instead of, as in an earlier versions of the patch, representing the
chain of sort and aggregation steps as full blown planner and executor
nodes, all but the first sort are performed inside the aggregation node
itself. This avoids the need to do some unusual gymnastics to handle
having to return aggregated and non-aggregated tuples from underlying
nodes, as well as having to shut down underlying nodes early to limit
memory usage. The optimizer still builds Sort/Agg node to describe each
phase, but they're not part of the plan tree, but instead additional
data for the aggregation node. They're a convenient and preexisting way
to describe aggregation and sorting. The first (and possibly only) sort
step is still performed as a separate execution step. That retains
similarity with existing group by plans, makes rescans fairly simple,
avoids very deep plans (leading to slow explains) and easily allows to
avoid the sorting step if the underlying data is sorted by other means.
A somewhat ugly side of this patch is having to deal with a grammar
ambiguity between the new CUBE keyword and the cube extension/functions
named cube (and rollup). To avoid breaking existing deployments of the
cube extension it has not been renamed, neither has cube been made a
reserved keyword. Instead precedence hacking is used to make GROUP BY
cube(..) refer to the CUBE grouping sets feature, and not the function
cube(). To actually group by a function cube(), unlikely as that might
be, the function name has to be quoted.
Needs a catversion bump because stored rules may change.
Author: Andrew Gierth and Atri Sharma, with contributions from Andres Freund
Reviewed-By: Andres Freund, Noah Misch, Tom Lane, Svenne Krap, Tomas
Vondra, Erik Rijkers, Marti Raudsepp, Pavel Stehule
Discussion: CAOeZVidmVRe2jU6aMk_5qkxnB7dfmPROzM7Ur8JPW5j8Y5X-Lw@mail.gmail.com
10 years ago
|
|
|
* Similarly, GROUP BY without GROUPING SETS guarantees uniqueness if all
|
|
|
|
* the grouped columns appear in colnos and operator semantics match.
|
|
|
|
*/
|
Support GROUPING SETS, CUBE and ROLLUP.
This SQL standard functionality allows to aggregate data by different
GROUP BY clauses at once. Each grouping set returns rows with columns
grouped by in other sets set to NULL.
This could previously be achieved by doing each grouping as a separate
query, conjoined by UNION ALLs. Besides being considerably more concise,
grouping sets will in many cases be faster, requiring only one scan over
the underlying data.
The current implementation of grouping sets only supports using sorting
for input. Individual sets that share a sort order are computed in one
pass. If there are sets that don't share a sort order, additional sort &
aggregation steps are performed. These additional passes are sourced by
the previous sort step; thus avoiding repeated scans of the source data.
The code is structured in a way that adding support for purely using
hash aggregation or a mix of hashing and sorting is possible. Sorting
was chosen to be supported first, as it is the most generic method of
implementation.
Instead of, as in an earlier versions of the patch, representing the
chain of sort and aggregation steps as full blown planner and executor
nodes, all but the first sort are performed inside the aggregation node
itself. This avoids the need to do some unusual gymnastics to handle
having to return aggregated and non-aggregated tuples from underlying
nodes, as well as having to shut down underlying nodes early to limit
memory usage. The optimizer still builds Sort/Agg node to describe each
phase, but they're not part of the plan tree, but instead additional
data for the aggregation node. They're a convenient and preexisting way
to describe aggregation and sorting. The first (and possibly only) sort
step is still performed as a separate execution step. That retains
similarity with existing group by plans, makes rescans fairly simple,
avoids very deep plans (leading to slow explains) and easily allows to
avoid the sorting step if the underlying data is sorted by other means.
A somewhat ugly side of this patch is having to deal with a grammar
ambiguity between the new CUBE keyword and the cube extension/functions
named cube (and rollup). To avoid breaking existing deployments of the
cube extension it has not been renamed, neither has cube been made a
reserved keyword. Instead precedence hacking is used to make GROUP BY
cube(..) refer to the CUBE grouping sets feature, and not the function
cube(). To actually group by a function cube(), unlikely as that might
be, the function name has to be quoted.
Needs a catversion bump because stored rules may change.
Author: Andrew Gierth and Atri Sharma, with contributions from Andres Freund
Reviewed-By: Andres Freund, Noah Misch, Tom Lane, Svenne Krap, Tomas
Vondra, Erik Rijkers, Marti Raudsepp, Pavel Stehule
Discussion: CAOeZVidmVRe2jU6aMk_5qkxnB7dfmPROzM7Ur8JPW5j8Y5X-Lw@mail.gmail.com
10 years ago
|
|
|
if (query->groupClause && !query->groupingSets)
|
|
|
|
{
|
|
|
|
foreach(l, query->groupClause)
|
|
|
|
{
|
|
|
|
SortGroupClause *sgc = (SortGroupClause *) lfirst(l);
|
|
|
|
TargetEntry *tle = get_sortgroupclause_tle(sgc,
|
|
|
|
query->targetList);
|
|
|
|
|
|
|
|
opid = distinct_col_search(tle->resno, colnos, opids);
|
|
|
|
if (!OidIsValid(opid) ||
|
|
|
|
!equality_ops_are_compatible(opid, sgc->eqop))
|
|
|
|
break; /* exit early if no match */
|
|
|
|
}
|
|
|
|
if (l == NULL) /* had matches for all? */
|
|
|
|
return true;
|
|
|
|
}
|
Support GROUPING SETS, CUBE and ROLLUP.
This SQL standard functionality allows to aggregate data by different
GROUP BY clauses at once. Each grouping set returns rows with columns
grouped by in other sets set to NULL.
This could previously be achieved by doing each grouping as a separate
query, conjoined by UNION ALLs. Besides being considerably more concise,
grouping sets will in many cases be faster, requiring only one scan over
the underlying data.
The current implementation of grouping sets only supports using sorting
for input. Individual sets that share a sort order are computed in one
pass. If there are sets that don't share a sort order, additional sort &
aggregation steps are performed. These additional passes are sourced by
the previous sort step; thus avoiding repeated scans of the source data.
The code is structured in a way that adding support for purely using
hash aggregation or a mix of hashing and sorting is possible. Sorting
was chosen to be supported first, as it is the most generic method of
implementation.
Instead of, as in an earlier versions of the patch, representing the
chain of sort and aggregation steps as full blown planner and executor
nodes, all but the first sort are performed inside the aggregation node
itself. This avoids the need to do some unusual gymnastics to handle
having to return aggregated and non-aggregated tuples from underlying
nodes, as well as having to shut down underlying nodes early to limit
memory usage. The optimizer still builds Sort/Agg node to describe each
phase, but they're not part of the plan tree, but instead additional
data for the aggregation node. They're a convenient and preexisting way
to describe aggregation and sorting. The first (and possibly only) sort
step is still performed as a separate execution step. That retains
similarity with existing group by plans, makes rescans fairly simple,
avoids very deep plans (leading to slow explains) and easily allows to
avoid the sorting step if the underlying data is sorted by other means.
A somewhat ugly side of this patch is having to deal with a grammar
ambiguity between the new CUBE keyword and the cube extension/functions
named cube (and rollup). To avoid breaking existing deployments of the
cube extension it has not been renamed, neither has cube been made a
reserved keyword. Instead precedence hacking is used to make GROUP BY
cube(..) refer to the CUBE grouping sets feature, and not the function
cube(). To actually group by a function cube(), unlikely as that might
be, the function name has to be quoted.
Needs a catversion bump because stored rules may change.
Author: Andrew Gierth and Atri Sharma, with contributions from Andres Freund
Reviewed-By: Andres Freund, Noah Misch, Tom Lane, Svenne Krap, Tomas
Vondra, Erik Rijkers, Marti Raudsepp, Pavel Stehule
Discussion: CAOeZVidmVRe2jU6aMk_5qkxnB7dfmPROzM7Ur8JPW5j8Y5X-Lw@mail.gmail.com
10 years ago
|
|
|
else if (query->groupingSets)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* If we have grouping sets with expressions, we probably don't have
|
|
|
|
* uniqueness and analysis would be hard. Punt.
|
Support GROUPING SETS, CUBE and ROLLUP.
This SQL standard functionality allows to aggregate data by different
GROUP BY clauses at once. Each grouping set returns rows with columns
grouped by in other sets set to NULL.
This could previously be achieved by doing each grouping as a separate
query, conjoined by UNION ALLs. Besides being considerably more concise,
grouping sets will in many cases be faster, requiring only one scan over
the underlying data.
The current implementation of grouping sets only supports using sorting
for input. Individual sets that share a sort order are computed in one
pass. If there are sets that don't share a sort order, additional sort &
aggregation steps are performed. These additional passes are sourced by
the previous sort step; thus avoiding repeated scans of the source data.
The code is structured in a way that adding support for purely using
hash aggregation or a mix of hashing and sorting is possible. Sorting
was chosen to be supported first, as it is the most generic method of
implementation.
Instead of, as in an earlier versions of the patch, representing the
chain of sort and aggregation steps as full blown planner and executor
nodes, all but the first sort are performed inside the aggregation node
itself. This avoids the need to do some unusual gymnastics to handle
having to return aggregated and non-aggregated tuples from underlying
nodes, as well as having to shut down underlying nodes early to limit
memory usage. The optimizer still builds Sort/Agg node to describe each
phase, but they're not part of the plan tree, but instead additional
data for the aggregation node. They're a convenient and preexisting way
to describe aggregation and sorting. The first (and possibly only) sort
step is still performed as a separate execution step. That retains
similarity with existing group by plans, makes rescans fairly simple,
avoids very deep plans (leading to slow explains) and easily allows to
avoid the sorting step if the underlying data is sorted by other means.
A somewhat ugly side of this patch is having to deal with a grammar
ambiguity between the new CUBE keyword and the cube extension/functions
named cube (and rollup). To avoid breaking existing deployments of the
cube extension it has not been renamed, neither has cube been made a
reserved keyword. Instead precedence hacking is used to make GROUP BY
cube(..) refer to the CUBE grouping sets feature, and not the function
cube(). To actually group by a function cube(), unlikely as that might
be, the function name has to be quoted.
Needs a catversion bump because stored rules may change.
Author: Andrew Gierth and Atri Sharma, with contributions from Andres Freund
Reviewed-By: Andres Freund, Noah Misch, Tom Lane, Svenne Krap, Tomas
Vondra, Erik Rijkers, Marti Raudsepp, Pavel Stehule
Discussion: CAOeZVidmVRe2jU6aMk_5qkxnB7dfmPROzM7Ur8JPW5j8Y5X-Lw@mail.gmail.com
10 years ago
|
|
|
*/
|
|
|
|
if (query->groupClause)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If we have no groupClause (therefore no grouping expressions), we
|
|
|
|
* might have one or many empty grouping sets. If there's just one,
|
|
|
|
* then we're returning only one row and are certainly unique. But
|
|
|
|
* otherwise, we know we're certainly not unique.
|
Support GROUPING SETS, CUBE and ROLLUP.
This SQL standard functionality allows to aggregate data by different
GROUP BY clauses at once. Each grouping set returns rows with columns
grouped by in other sets set to NULL.
This could previously be achieved by doing each grouping as a separate
query, conjoined by UNION ALLs. Besides being considerably more concise,
grouping sets will in many cases be faster, requiring only one scan over
the underlying data.
The current implementation of grouping sets only supports using sorting
for input. Individual sets that share a sort order are computed in one
pass. If there are sets that don't share a sort order, additional sort &
aggregation steps are performed. These additional passes are sourced by
the previous sort step; thus avoiding repeated scans of the source data.
The code is structured in a way that adding support for purely using
hash aggregation or a mix of hashing and sorting is possible. Sorting
was chosen to be supported first, as it is the most generic method of
implementation.
Instead of, as in an earlier versions of the patch, representing the
chain of sort and aggregation steps as full blown planner and executor
nodes, all but the first sort are performed inside the aggregation node
itself. This avoids the need to do some unusual gymnastics to handle
having to return aggregated and non-aggregated tuples from underlying
nodes, as well as having to shut down underlying nodes early to limit
memory usage. The optimizer still builds Sort/Agg node to describe each
phase, but they're not part of the plan tree, but instead additional
data for the aggregation node. They're a convenient and preexisting way
to describe aggregation and sorting. The first (and possibly only) sort
step is still performed as a separate execution step. That retains
similarity with existing group by plans, makes rescans fairly simple,
avoids very deep plans (leading to slow explains) and easily allows to
avoid the sorting step if the underlying data is sorted by other means.
A somewhat ugly side of this patch is having to deal with a grammar
ambiguity between the new CUBE keyword and the cube extension/functions
named cube (and rollup). To avoid breaking existing deployments of the
cube extension it has not been renamed, neither has cube been made a
reserved keyword. Instead precedence hacking is used to make GROUP BY
cube(..) refer to the CUBE grouping sets feature, and not the function
cube(). To actually group by a function cube(), unlikely as that might
be, the function name has to be quoted.
Needs a catversion bump because stored rules may change.
Author: Andrew Gierth and Atri Sharma, with contributions from Andres Freund
Reviewed-By: Andres Freund, Noah Misch, Tom Lane, Svenne Krap, Tomas
Vondra, Erik Rijkers, Marti Raudsepp, Pavel Stehule
Discussion: CAOeZVidmVRe2jU6aMk_5qkxnB7dfmPROzM7Ur8JPW5j8Y5X-Lw@mail.gmail.com
10 years ago
|
|
|
*/
|
|
|
|
if (list_length(query->groupingSets) == 1 &&
|
|
|
|
((GroupingSet *) linitial(query->groupingSets))->kind == GROUPING_SET_EMPTY)
|
Support GROUPING SETS, CUBE and ROLLUP.
This SQL standard functionality allows to aggregate data by different
GROUP BY clauses at once. Each grouping set returns rows with columns
grouped by in other sets set to NULL.
This could previously be achieved by doing each grouping as a separate
query, conjoined by UNION ALLs. Besides being considerably more concise,
grouping sets will in many cases be faster, requiring only one scan over
the underlying data.
The current implementation of grouping sets only supports using sorting
for input. Individual sets that share a sort order are computed in one
pass. If there are sets that don't share a sort order, additional sort &
aggregation steps are performed. These additional passes are sourced by
the previous sort step; thus avoiding repeated scans of the source data.
The code is structured in a way that adding support for purely using
hash aggregation or a mix of hashing and sorting is possible. Sorting
was chosen to be supported first, as it is the most generic method of
implementation.
Instead of, as in an earlier versions of the patch, representing the
chain of sort and aggregation steps as full blown planner and executor
nodes, all but the first sort are performed inside the aggregation node
itself. This avoids the need to do some unusual gymnastics to handle
having to return aggregated and non-aggregated tuples from underlying
nodes, as well as having to shut down underlying nodes early to limit
memory usage. The optimizer still builds Sort/Agg node to describe each
phase, but they're not part of the plan tree, but instead additional
data for the aggregation node. They're a convenient and preexisting way
to describe aggregation and sorting. The first (and possibly only) sort
step is still performed as a separate execution step. That retains
similarity with existing group by plans, makes rescans fairly simple,
avoids very deep plans (leading to slow explains) and easily allows to
avoid the sorting step if the underlying data is sorted by other means.
A somewhat ugly side of this patch is having to deal with a grammar
ambiguity between the new CUBE keyword and the cube extension/functions
named cube (and rollup). To avoid breaking existing deployments of the
cube extension it has not been renamed, neither has cube been made a
reserved keyword. Instead precedence hacking is used to make GROUP BY
cube(..) refer to the CUBE grouping sets feature, and not the function
cube(). To actually group by a function cube(), unlikely as that might
be, the function name has to be quoted.
Needs a catversion bump because stored rules may change.
Author: Andrew Gierth and Atri Sharma, with contributions from Andres Freund
Reviewed-By: Andres Freund, Noah Misch, Tom Lane, Svenne Krap, Tomas
Vondra, Erik Rijkers, Marti Raudsepp, Pavel Stehule
Discussion: CAOeZVidmVRe2jU6aMk_5qkxnB7dfmPROzM7Ur8JPW5j8Y5X-Lw@mail.gmail.com
10 years ago
|
|
|
return true;
|
|
|
|
else
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* If we have no GROUP BY, but do have aggregates or HAVING, then the
|
|
|
|
* result is at most one row so it's surely unique, for any operators.
|
|
|
|
*/
|
|
|
|
if (query->hasAggs || query->havingQual)
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* UNION, INTERSECT, EXCEPT guarantee uniqueness of the whole output row,
|
|
|
|
* except with ALL.
|
|
|
|
*/
|
|
|
|
if (query->setOperations)
|
|
|
|
{
|
|
|
|
SetOperationStmt *topop = castNode(SetOperationStmt, query->setOperations);
|
|
|
|
|
|
|
|
Assert(topop->op != SETOP_NONE);
|
|
|
|
|
|
|
|
if (!topop->all)
|
|
|
|
{
|
|
|
|
ListCell *lg;
|
|
|
|
|
|
|
|
/* We're good if all the nonjunk output columns are in colnos */
|
|
|
|
lg = list_head(topop->groupClauses);
|
|
|
|
foreach(l, query->targetList)
|
|
|
|
{
|
|
|
|
TargetEntry *tle = (TargetEntry *) lfirst(l);
|
|
|
|
SortGroupClause *sgc;
|
|
|
|
|
|
|
|
if (tle->resjunk)
|
|
|
|
continue; /* ignore resjunk columns */
|
|
|
|
|
|
|
|
/* non-resjunk columns should have grouping clauses */
|
|
|
|
Assert(lg != NULL);
|
|
|
|
sgc = (SortGroupClause *) lfirst(lg);
|
Represent Lists as expansible arrays, not chains of cons-cells.
Originally, Postgres Lists were a more or less exact reimplementation of
Lisp lists, which consist of chains of separately-allocated cons cells,
each having a value and a next-cell link. We'd hacked that once before
(commit d0b4399d8) to add a separate List header, but the data was still
in cons cells. That makes some operations -- notably list_nth() -- O(N),
and it's bulky because of the next-cell pointers and per-cell palloc
overhead, and it's very cache-unfriendly if the cons cells end up
scattered around rather than being adjacent.
In this rewrite, we still have List headers, but the data is in a
resizable array of values, with no next-cell links. Now we need at
most two palloc's per List, and often only one, since we can allocate
some values in the same palloc call as the List header. (Of course,
extending an existing List may require repalloc's to enlarge the array.
But this involves just O(log N) allocations not O(N).)
Of course this is not without downsides. The key difficulty is that
addition or deletion of a list entry may now cause other entries to
move, which it did not before.
For example, that breaks foreach() and sister macros, which historically
used a pointer to the current cons-cell as loop state. We can repair
those macros transparently by making their actual loop state be an
integer list index; the exposed "ListCell *" pointer is no longer state
carried across loop iterations, but is just a derived value. (In
practice, modern compilers can optimize things back to having just one
loop state value, at least for simple cases with inline loop bodies.)
In principle, this is a semantics change for cases where the loop body
inserts or deletes list entries ahead of the current loop index; but
I found no such cases in the Postgres code.
The change is not at all transparent for code that doesn't use foreach()
but chases lists "by hand" using lnext(). The largest share of such
code in the backend is in loops that were maintaining "prev" and "next"
variables in addition to the current-cell pointer, in order to delete
list cells efficiently using list_delete_cell(). However, we no longer
need a previous-cell pointer to delete a list cell efficiently. Keeping
a next-cell pointer doesn't work, as explained above, but we can improve
matters by changing such code to use a regular foreach() loop and then
using the new macro foreach_delete_current() to delete the current cell.
(This macro knows how to update the associated foreach loop's state so
that no cells will be missed in the traversal.)
There remains a nontrivial risk of code assuming that a ListCell *
pointer will remain good over an operation that could now move the list
contents. To help catch such errors, list.c can be compiled with a new
define symbol DEBUG_LIST_MEMORY_USAGE that forcibly moves list contents
whenever that could possibly happen. This makes list operations
significantly more expensive so it's not normally turned on (though it
is on by default if USE_VALGRIND is on).
There are two notable API differences from the previous code:
* lnext() now requires the List's header pointer in addition to the
current cell's address.
* list_delete_cell() no longer requires a previous-cell argument.
These changes are somewhat unfortunate, but on the other hand code using
either function needs inspection to see if it is assuming anything
it shouldn't, so it's not all bad.
Programmers should be aware of these significant performance changes:
* list_nth() and related functions are now O(1); so there's no
major access-speed difference between a list and an array.
* Inserting or deleting a list element now takes time proportional to
the distance to the end of the list, due to moving the array elements.
(However, it typically *doesn't* require palloc or pfree, so except in
long lists it's probably still faster than before.) Notably, lcons()
used to be about the same cost as lappend(), but that's no longer true
if the list is long. Code that uses lcons() and list_delete_first()
to maintain a stack might usefully be rewritten to push and pop at the
end of the list rather than the beginning.
* There are now list_insert_nth...() and list_delete_nth...() functions
that add or remove a list cell identified by index. These have the
data-movement penalty explained above, but there's no search penalty.
* list_concat() and variants now copy the second list's data into
storage belonging to the first list, so there is no longer any
sharing of cells between the input lists. The second argument is
now declared "const List *" to reflect that it isn't changed.
This patch just does the minimum needed to get the new implementation
in place and fix bugs exposed by the regression tests. As suggested
by the foregoing, there's a fair amount of followup work remaining to
do.
Also, the ENABLE_LIST_COMPAT macros are finally removed in this
commit. Code using those should have been gone a dozen years ago.
Patch by me; thanks to David Rowley, Jesper Pedersen, and others
for review.
Discussion: https://postgr.es/m/11587.1550975080@sss.pgh.pa.us
6 years ago
|
|
|
lg = lnext(topop->groupClauses, lg);
|
|
|
|
|
|
|
|
opid = distinct_col_search(tle->resno, colnos, opids);
|
|
|
|
if (!OidIsValid(opid) ||
|
|
|
|
!equality_ops_are_compatible(opid, sgc->eqop))
|
|
|
|
break; /* exit early if no match */
|
|
|
|
}
|
|
|
|
if (l == NULL) /* had matches for all? */
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* XXX Are there any other cases in which we can easily see the result
|
|
|
|
* must be distinct?
|
|
|
|
*
|
|
|
|
* If you do add more smarts to this function, be sure to update
|
|
|
|
* query_supports_distinctness() to match.
|
|
|
|
*/
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* distinct_col_search - subroutine for query_is_distinct_for
|
|
|
|
*
|
|
|
|
* If colno is in colnos, return the corresponding element of opids,
|
|
|
|
* else return InvalidOid. (Ordinarily colnos would not contain duplicates,
|
|
|
|
* but if it does, we arbitrarily select the first match.)
|
|
|
|
*/
|
|
|
|
static Oid
|
|
|
|
distinct_col_search(int colno, List *colnos, List *opids)
|
|
|
|
{
|
|
|
|
ListCell *lc1,
|
|
|
|
*lc2;
|
|
|
|
|
|
|
|
forboth(lc1, colnos, lc2, opids)
|
|
|
|
{
|
|
|
|
if (colno == lfirst_int(lc1))
|
|
|
|
return lfirst_oid(lc2);
|
|
|
|
}
|
|
|
|
return InvalidOid;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* innerrel_is_unique
|
|
|
|
* Check if the innerrel provably contains at most one tuple matching any
|
|
|
|
* tuple from the outerrel, based on join clauses in the 'restrictlist'.
|
|
|
|
*
|
|
|
|
* We need an actual RelOptInfo for the innerrel, but it's sufficient to
|
|
|
|
* identify the outerrel by its Relids. This asymmetry supports use of this
|
|
|
|
* function before joinrels have been built. (The caller is expected to
|
|
|
|
* also supply the joinrelids, just to save recalculating that.)
|
|
|
|
*
|
|
|
|
* The proof must be made based only on clauses that will be "joinquals"
|
|
|
|
* rather than "otherquals" at execution. For an inner join there's no
|
|
|
|
* difference; but if the join is outer, we must ignore pushed-down quals,
|
|
|
|
* as those will become "otherquals". Note that this means the answer might
|
|
|
|
* vary depending on whether IS_OUTER_JOIN(jointype); since we cache the
|
|
|
|
* answer without regard to that, callers must take care not to call this
|
|
|
|
* with jointypes that would be classified differently by IS_OUTER_JOIN().
|
|
|
|
*
|
|
|
|
* The actual proof is undertaken by is_innerrel_unique_for(); this function
|
|
|
|
* is a frontend that is mainly concerned with caching the answers.
|
|
|
|
* In particular, the force_cache argument allows overriding the internal
|
|
|
|
* heuristic about whether to cache negative answers; it should be "true"
|
|
|
|
* if making an inquiry that is not part of the normal bottom-up join search
|
|
|
|
* sequence.
|
|
|
|
*/
|
|
|
|
bool
|
|
|
|
innerrel_is_unique(PlannerInfo *root,
|
|
|
|
Relids joinrelids,
|
|
|
|
Relids outerrelids,
|
|
|
|
RelOptInfo *innerrel,
|
|
|
|
JoinType jointype,
|
|
|
|
List *restrictlist,
|
|
|
|
bool force_cache)
|
|
|
|
{
|
|
|
|
MemoryContext old_context;
|
|
|
|
ListCell *lc;
|
|
|
|
|
|
|
|
/* Certainly can't prove uniqueness when there are no joinclauses */
|
|
|
|
if (restrictlist == NIL)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Make a quick check to eliminate cases in which we will surely be unable
|
|
|
|
* to prove uniqueness of the innerrel.
|
|
|
|
*/
|
|
|
|
if (!rel_supports_distinctness(root, innerrel))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Query the cache to see if we've managed to prove that innerrel is
|
|
|
|
* unique for any subset of this outerrel. We don't need an exact match,
|
|
|
|
* as extra outerrels can't make the innerrel any less unique (or more
|
|
|
|
* formally, the restrictlist for a join to a superset outerrel must be a
|
|
|
|
* superset of the conditions we successfully used before).
|
|
|
|
*/
|
|
|
|
foreach(lc, innerrel->unique_for_rels)
|
|
|
|
{
|
|
|
|
Relids unique_for_rels = (Relids) lfirst(lc);
|
|
|
|
|
|
|
|
if (bms_is_subset(unique_for_rels, outerrelids))
|
|
|
|
return true; /* Success! */
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Conversely, we may have already determined that this outerrel, or some
|
|
|
|
* superset thereof, cannot prove this innerrel to be unique.
|
|
|
|
*/
|
|
|
|
foreach(lc, innerrel->non_unique_for_rels)
|
|
|
|
{
|
|
|
|
Relids unique_for_rels = (Relids) lfirst(lc);
|
|
|
|
|
|
|
|
if (bms_is_subset(outerrelids, unique_for_rels))
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* No cached information, so try to make the proof. */
|
|
|
|
if (is_innerrel_unique_for(root, joinrelids, outerrelids, innerrel,
|
|
|
|
jointype, restrictlist))
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Cache the positive result for future probes, being sure to keep it
|
|
|
|
* in the planner_cxt even if we are working in GEQO.
|
|
|
|
*
|
|
|
|
* Note: one might consider trying to isolate the minimal subset of
|
|
|
|
* the outerrels that proved the innerrel unique. But it's not worth
|
|
|
|
* the trouble, because the planner builds up joinrels incrementally
|
|
|
|
* and so we'll see the minimally sufficient outerrels before any
|
|
|
|
* supersets of them anyway.
|
|
|
|
*/
|
|
|
|
old_context = MemoryContextSwitchTo(root->planner_cxt);
|
|
|
|
innerrel->unique_for_rels = lappend(innerrel->unique_for_rels,
|
|
|
|
bms_copy(outerrelids));
|
|
|
|
MemoryContextSwitchTo(old_context);
|
|
|
|
|
|
|
|
return true; /* Success! */
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* None of the join conditions for outerrel proved innerrel unique, so
|
|
|
|
* we can safely reject this outerrel or any subset of it in future
|
|
|
|
* checks.
|
|
|
|
*
|
|
|
|
* However, in normal planning mode, caching this knowledge is totally
|
|
|
|
* pointless; it won't be queried again, because we build up joinrels
|
|
|
|
* from smaller to larger. It is useful in GEQO mode, where the
|
|
|
|
* knowledge can be carried across successive planning attempts; and
|
|
|
|
* it's likely to be useful when using join-search plugins, too. Hence
|
|
|
|
* cache when join_search_private is non-NULL. (Yeah, that's a hack,
|
|
|
|
* but it seems reasonable.)
|
|
|
|
*
|
|
|
|
* Also, allow callers to override that heuristic and force caching;
|
|
|
|
* that's useful for reduce_unique_semijoins, which calls here before
|
|
|
|
* the normal join search starts.
|
|
|
|
*/
|
|
|
|
if (force_cache || root->join_search_private)
|
|
|
|
{
|
|
|
|
old_context = MemoryContextSwitchTo(root->planner_cxt);
|
|
|
|
innerrel->non_unique_for_rels =
|
|
|
|
lappend(innerrel->non_unique_for_rels,
|
|
|
|
bms_copy(outerrelids));
|
|
|
|
MemoryContextSwitchTo(old_context);
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* is_innerrel_unique_for
|
|
|
|
* Check if the innerrel provably contains at most one tuple matching any
|
|
|
|
* tuple from the outerrel, based on join clauses in the 'restrictlist'.
|
|
|
|
*/
|
|
|
|
static bool
|
|
|
|
is_innerrel_unique_for(PlannerInfo *root,
|
|
|
|
Relids joinrelids,
|
|
|
|
Relids outerrelids,
|
|
|
|
RelOptInfo *innerrel,
|
|
|
|
JoinType jointype,
|
|
|
|
List *restrictlist)
|
|
|
|
{
|
|
|
|
List *clause_list = NIL;
|
|
|
|
ListCell *lc;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Search for mergejoinable clauses that constrain the inner rel against
|
|
|
|
* the outer rel. If an operator is mergejoinable then it behaves like
|
|
|
|
* equality for some btree opclass, so it's what we want. The
|
|
|
|
* mergejoinability test also eliminates clauses containing volatile
|
|
|
|
* functions, which we couldn't depend on.
|
|
|
|
*/
|
|
|
|
foreach(lc, restrictlist)
|
|
|
|
{
|
|
|
|
RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(lc);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* As noted above, if it's a pushed-down clause and we're at an outer
|
|
|
|
* join, we can't use it.
|
|
|
|
*/
|
|
|
|
if (IS_OUTER_JOIN(jointype) &&
|
|
|
|
RINFO_IS_PUSHED_DOWN(restrictinfo, joinrelids))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
/* Ignore if it's not a mergejoinable clause */
|
|
|
|
if (!restrictinfo->can_join ||
|
|
|
|
restrictinfo->mergeopfamilies == NIL)
|
|
|
|
continue; /* not mergejoinable */
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Check if clause has the form "outer op inner" or "inner op outer",
|
|
|
|
* and if so mark which side is inner.
|
|
|
|
*/
|
|
|
|
if (!clause_sides_match_join(restrictinfo, outerrelids,
|
|
|
|
innerrel->relids))
|
|
|
|
continue; /* no good for these input relations */
|
|
|
|
|
|
|
|
/* OK, add to list */
|
|
|
|
clause_list = lappend(clause_list, restrictinfo);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Let rel_is_distinct_for() do the hard work */
|
|
|
|
return rel_is_distinct_for(root, innerrel, clause_list);
|
|
|
|
}
|