mirror of https://github.com/postgres/postgres
Tag:
Branch:
Tree:
fdf218f1d5
REL2_0B
REL6_4
REL6_5_PATCHES
REL7_0_PATCHES
REL7_1_STABLE
REL7_2_STABLE
REL7_3_STABLE
REL7_4_STABLE
REL8_0_STABLE
REL8_1_STABLE
REL8_2_STABLE
REL8_3_STABLE
REL8_4_STABLE
REL8_5_ALPHA1_BRANCH
REL8_5_ALPHA2_BRANCH
REL8_5_ALPHA3_BRANCH
REL9_0_ALPHA4_BRANCH
REL9_0_ALPHA5_BRANCH
REL9_0_STABLE
REL9_1_STABLE
REL9_2_STABLE
REL9_3_STABLE
REL9_4_STABLE
REL9_5_STABLE
REL9_6_STABLE
REL_10_STABLE
REL_11_STABLE
REL_12_STABLE
REL_13_STABLE
REL_14_STABLE
REL_15_STABLE
REL_16_STABLE
REL_17_STABLE
REL_18_STABLE
Release_1_0_3
WIN32_DEV
ecpg_big_bison
master
PG95-1_01
PG95-1_08
PG95-1_09
REL2_0
REL6_1
REL6_1_1
REL6_2
REL6_2_1
REL6_3
REL6_3_2
REL6_4_2
REL6_5
REL6_5_1
REL6_5_2
REL6_5_3
REL7_0
REL7_0_2
REL7_0_3
REL7_1
REL7_1_1
REL7_1_2
REL7_1_3
REL7_1_BETA
REL7_1_BETA2
REL7_1_BETA3
REL7_2
REL7_2_1
REL7_2_2
REL7_2_3
REL7_2_4
REL7_2_5
REL7_2_6
REL7_2_7
REL7_2_8
REL7_2_BETA1
REL7_2_BETA2
REL7_2_BETA3
REL7_2_BETA4
REL7_2_BETA5
REL7_2_RC1
REL7_2_RC2
REL7_3
REL7_3_1
REL7_3_10
REL7_3_11
REL7_3_12
REL7_3_13
REL7_3_14
REL7_3_15
REL7_3_16
REL7_3_17
REL7_3_18
REL7_3_19
REL7_3_2
REL7_3_20
REL7_3_21
REL7_3_3
REL7_3_4
REL7_3_5
REL7_3_6
REL7_3_7
REL7_3_8
REL7_3_9
REL7_4
REL7_4_1
REL7_4_10
REL7_4_11
REL7_4_12
REL7_4_13
REL7_4_14
REL7_4_15
REL7_4_16
REL7_4_17
REL7_4_18
REL7_4_19
REL7_4_2
REL7_4_20
REL7_4_21
REL7_4_22
REL7_4_23
REL7_4_24
REL7_4_25
REL7_4_26
REL7_4_27
REL7_4_28
REL7_4_29
REL7_4_3
REL7_4_30
REL7_4_4
REL7_4_5
REL7_4_6
REL7_4_7
REL7_4_8
REL7_4_9
REL7_4_BETA1
REL7_4_BETA2
REL7_4_BETA3
REL7_4_BETA4
REL7_4_BETA5
REL7_4_RC1
REL7_4_RC2
REL8_0_0
REL8_0_0BETA1
REL8_0_0BETA2
REL8_0_0BETA3
REL8_0_0BETA4
REL8_0_0BETA5
REL8_0_0RC1
REL8_0_0RC2
REL8_0_0RC3
REL8_0_0RC4
REL8_0_0RC5
REL8_0_1
REL8_0_10
REL8_0_11
REL8_0_12
REL8_0_13
REL8_0_14
REL8_0_15
REL8_0_16
REL8_0_17
REL8_0_18
REL8_0_19
REL8_0_2
REL8_0_20
REL8_0_21
REL8_0_22
REL8_0_23
REL8_0_24
REL8_0_25
REL8_0_26
REL8_0_3
REL8_0_4
REL8_0_5
REL8_0_6
REL8_0_7
REL8_0_8
REL8_0_9
REL8_1_0
REL8_1_0BETA1
REL8_1_0BETA2
REL8_1_0BETA3
REL8_1_0BETA4
REL8_1_0RC1
REL8_1_1
REL8_1_10
REL8_1_11
REL8_1_12
REL8_1_13
REL8_1_14
REL8_1_15
REL8_1_16
REL8_1_17
REL8_1_18
REL8_1_19
REL8_1_2
REL8_1_20
REL8_1_21
REL8_1_22
REL8_1_23
REL8_1_3
REL8_1_4
REL8_1_5
REL8_1_6
REL8_1_7
REL8_1_8
REL8_1_9
REL8_2_0
REL8_2_1
REL8_2_10
REL8_2_11
REL8_2_12
REL8_2_13
REL8_2_14
REL8_2_15
REL8_2_16
REL8_2_17
REL8_2_18
REL8_2_19
REL8_2_2
REL8_2_20
REL8_2_21
REL8_2_22
REL8_2_23
REL8_2_3
REL8_2_4
REL8_2_5
REL8_2_6
REL8_2_7
REL8_2_8
REL8_2_9
REL8_2_BETA1
REL8_2_BETA2
REL8_2_BETA3
REL8_2_RC1
REL8_3_0
REL8_3_1
REL8_3_10
REL8_3_11
REL8_3_12
REL8_3_13
REL8_3_14
REL8_3_15
REL8_3_16
REL8_3_17
REL8_3_18
REL8_3_19
REL8_3_2
REL8_3_20
REL8_3_21
REL8_3_22
REL8_3_23
REL8_3_3
REL8_3_4
REL8_3_5
REL8_3_6
REL8_3_7
REL8_3_8
REL8_3_9
REL8_3_BETA1
REL8_3_BETA2
REL8_3_BETA3
REL8_3_BETA4
REL8_3_RC1
REL8_3_RC2
REL8_4_0
REL8_4_1
REL8_4_10
REL8_4_11
REL8_4_12
REL8_4_13
REL8_4_14
REL8_4_15
REL8_4_16
REL8_4_17
REL8_4_18
REL8_4_19
REL8_4_2
REL8_4_20
REL8_4_21
REL8_4_22
REL8_4_3
REL8_4_4
REL8_4_5
REL8_4_6
REL8_4_7
REL8_4_8
REL8_4_9
REL8_4_BETA1
REL8_4_BETA2
REL8_4_RC1
REL8_4_RC2
REL8_5_ALPHA1
REL8_5_ALPHA2
REL8_5_ALPHA3
REL9_0_0
REL9_0_1
REL9_0_10
REL9_0_11
REL9_0_12
REL9_0_13
REL9_0_14
REL9_0_15
REL9_0_16
REL9_0_17
REL9_0_18
REL9_0_19
REL9_0_2
REL9_0_20
REL9_0_21
REL9_0_22
REL9_0_23
REL9_0_3
REL9_0_4
REL9_0_5
REL9_0_6
REL9_0_7
REL9_0_8
REL9_0_9
REL9_0_ALPHA4
REL9_0_ALPHA5
REL9_0_BETA1
REL9_0_BETA2
REL9_0_BETA3
REL9_0_BETA4
REL9_0_RC1
REL9_1_0
REL9_1_1
REL9_1_10
REL9_1_11
REL9_1_12
REL9_1_13
REL9_1_14
REL9_1_15
REL9_1_16
REL9_1_17
REL9_1_18
REL9_1_19
REL9_1_2
REL9_1_20
REL9_1_21
REL9_1_22
REL9_1_23
REL9_1_24
REL9_1_3
REL9_1_4
REL9_1_5
REL9_1_6
REL9_1_7
REL9_1_8
REL9_1_9
REL9_1_ALPHA1
REL9_1_ALPHA2
REL9_1_ALPHA3
REL9_1_ALPHA4
REL9_1_ALPHA5
REL9_1_BETA1
REL9_1_BETA2
REL9_1_BETA3
REL9_1_RC1
REL9_2_0
REL9_2_1
REL9_2_10
REL9_2_11
REL9_2_12
REL9_2_13
REL9_2_14
REL9_2_15
REL9_2_16
REL9_2_17
REL9_2_18
REL9_2_19
REL9_2_2
REL9_2_20
REL9_2_21
REL9_2_22
REL9_2_23
REL9_2_24
REL9_2_3
REL9_2_4
REL9_2_5
REL9_2_6
REL9_2_7
REL9_2_8
REL9_2_9
REL9_2_BETA1
REL9_2_BETA2
REL9_2_BETA3
REL9_2_BETA4
REL9_2_RC1
REL9_3_0
REL9_3_1
REL9_3_10
REL9_3_11
REL9_3_12
REL9_3_13
REL9_3_14
REL9_3_15
REL9_3_16
REL9_3_17
REL9_3_18
REL9_3_19
REL9_3_2
REL9_3_20
REL9_3_21
REL9_3_22
REL9_3_23
REL9_3_24
REL9_3_25
REL9_3_3
REL9_3_4
REL9_3_5
REL9_3_6
REL9_3_7
REL9_3_8
REL9_3_9
REL9_3_BETA1
REL9_3_BETA2
REL9_3_RC1
REL9_4_0
REL9_4_1
REL9_4_10
REL9_4_11
REL9_4_12
REL9_4_13
REL9_4_14
REL9_4_15
REL9_4_16
REL9_4_17
REL9_4_18
REL9_4_19
REL9_4_2
REL9_4_20
REL9_4_21
REL9_4_22
REL9_4_23
REL9_4_24
REL9_4_25
REL9_4_26
REL9_4_3
REL9_4_4
REL9_4_5
REL9_4_6
REL9_4_7
REL9_4_8
REL9_4_9
REL9_4_BETA1
REL9_4_BETA2
REL9_4_BETA3
REL9_4_RC1
REL9_5_0
REL9_5_1
REL9_5_10
REL9_5_11
REL9_5_12
REL9_5_13
REL9_5_14
REL9_5_15
REL9_5_16
REL9_5_17
REL9_5_18
REL9_5_19
REL9_5_2
REL9_5_20
REL9_5_21
REL9_5_22
REL9_5_23
REL9_5_24
REL9_5_25
REL9_5_3
REL9_5_4
REL9_5_5
REL9_5_6
REL9_5_7
REL9_5_8
REL9_5_9
REL9_5_ALPHA1
REL9_5_ALPHA2
REL9_5_BETA1
REL9_5_BETA2
REL9_5_RC1
REL9_6_0
REL9_6_1
REL9_6_10
REL9_6_11
REL9_6_12
REL9_6_13
REL9_6_14
REL9_6_15
REL9_6_16
REL9_6_17
REL9_6_18
REL9_6_19
REL9_6_2
REL9_6_20
REL9_6_21
REL9_6_22
REL9_6_23
REL9_6_24
REL9_6_3
REL9_6_4
REL9_6_5
REL9_6_6
REL9_6_7
REL9_6_8
REL9_6_9
REL9_6_BETA1
REL9_6_BETA2
REL9_6_BETA3
REL9_6_BETA4
REL9_6_RC1
REL_10_0
REL_10_1
REL_10_10
REL_10_11
REL_10_12
REL_10_13
REL_10_14
REL_10_15
REL_10_16
REL_10_17
REL_10_18
REL_10_19
REL_10_2
REL_10_20
REL_10_21
REL_10_22
REL_10_23
REL_10_3
REL_10_4
REL_10_5
REL_10_6
REL_10_7
REL_10_8
REL_10_9
REL_10_BETA1
REL_10_BETA2
REL_10_BETA3
REL_10_BETA4
REL_10_RC1
REL_11_0
REL_11_1
REL_11_10
REL_11_11
REL_11_12
REL_11_13
REL_11_14
REL_11_15
REL_11_16
REL_11_17
REL_11_18
REL_11_19
REL_11_2
REL_11_20
REL_11_21
REL_11_22
REL_11_3
REL_11_4
REL_11_5
REL_11_6
REL_11_7
REL_11_8
REL_11_9
REL_11_BETA1
REL_11_BETA2
REL_11_BETA3
REL_11_BETA4
REL_11_RC1
REL_12_0
REL_12_1
REL_12_10
REL_12_11
REL_12_12
REL_12_13
REL_12_14
REL_12_15
REL_12_16
REL_12_17
REL_12_18
REL_12_19
REL_12_2
REL_12_20
REL_12_21
REL_12_22
REL_12_3
REL_12_4
REL_12_5
REL_12_6
REL_12_7
REL_12_8
REL_12_9
REL_12_BETA1
REL_12_BETA2
REL_12_BETA3
REL_12_BETA4
REL_12_RC1
REL_13_0
REL_13_1
REL_13_10
REL_13_11
REL_13_12
REL_13_13
REL_13_14
REL_13_15
REL_13_16
REL_13_17
REL_13_18
REL_13_19
REL_13_2
REL_13_20
REL_13_21
REL_13_22
REL_13_3
REL_13_4
REL_13_5
REL_13_6
REL_13_7
REL_13_8
REL_13_9
REL_13_BETA1
REL_13_BETA2
REL_13_BETA3
REL_13_RC1
REL_14_0
REL_14_1
REL_14_10
REL_14_11
REL_14_12
REL_14_13
REL_14_14
REL_14_15
REL_14_16
REL_14_17
REL_14_18
REL_14_19
REL_14_2
REL_14_3
REL_14_4
REL_14_5
REL_14_6
REL_14_7
REL_14_8
REL_14_9
REL_14_BETA1
REL_14_BETA2
REL_14_BETA3
REL_14_RC1
REL_15_0
REL_15_1
REL_15_10
REL_15_11
REL_15_12
REL_15_13
REL_15_14
REL_15_2
REL_15_3
REL_15_4
REL_15_5
REL_15_6
REL_15_7
REL_15_8
REL_15_9
REL_15_BETA1
REL_15_BETA2
REL_15_BETA3
REL_15_BETA4
REL_15_RC1
REL_15_RC2
REL_16_0
REL_16_1
REL_16_10
REL_16_2
REL_16_3
REL_16_4
REL_16_5
REL_16_6
REL_16_7
REL_16_8
REL_16_9
REL_16_BETA1
REL_16_BETA2
REL_16_BETA3
REL_16_RC1
REL_17_0
REL_17_1
REL_17_2
REL_17_3
REL_17_4
REL_17_5
REL_17_6
REL_17_BETA1
REL_17_BETA2
REL_17_BETA3
REL_17_RC1
REL_18_BETA1
REL_18_BETA2
REL_18_BETA3
REL_18_RC1
Release_1_0_2
Release_2_0
Release_2_0_0
release-6-3
${ noResults }
547 Commits (fdf218f1d52b2c93145f16b28b8374703ae4ef19)
Author | SHA1 | Message | Date |
---|---|---|---|
![]() |
fdf218f1d5 |
Restrict accesses to non-system views and foreign tables during pg_dump.
When pg_dump retrieves the list of database objects and performs the data dump, there was possibility that objects are replaced with others of the same name, such as views, and access them. This vulnerability could result in code execution with superuser privileges during the pg_dump process. This issue can arise when dumping data of sequences, foreign tables (only 13 or later), or tables registered with a WHERE clause in the extension configuration table. To address this, pg_dump now utilizes the newly introduced restrict_nonsystem_relation_kind GUC parameter to restrict the accesses to non-system views and foreign tables during the dump process. This new GUC parameter is added to back branches too, but these changes do not require cluster recreation. Back-patch to all supported branches. Reviewed-by: Noah Misch Security: CVE-2024-7348 Backpatch-through: 12 |
1 year ago |
![]() |
7d2c7f08d9 |
Fix query pullup issue with WindowClause runCondition
|
1 year ago |
![]() |
0294df2f1f |
Add support for MERGE ... WHEN NOT MATCHED BY SOURCE.
This allows MERGE commands to include WHEN NOT MATCHED BY SOURCE actions, which operate on rows that exist in the target relation, but not in the data source. These actions can execute UPDATE, DELETE, or DO NOTHING sub-commands. This is in contrast to already-supported WHEN NOT MATCHED actions, which operate on rows that exist in the data source, but not in the target relation. To make this distinction clearer, such actions may now be written as WHEN NOT MATCHED BY TARGET. Writing WHEN NOT MATCHED without specifying BY SOURCE or BY TARGET is equivalent to writing WHEN NOT MATCHED BY TARGET. Dean Rasheed, reviewed by Alvaro Herrera, Ted Yu and Vik Fearing. Discussion: https://postgr.es/m/CAEZATCWqnKGc57Y_JanUBHQXNKcXd7r=0R4NEZUVwP+syRkWbA@mail.gmail.com |
1 year ago |
![]() |
b7e2121ab7 |
Postpone reparameterization of paths until create_plan().
When considering nestloop paths for individual partitions within
a partitionwise join, if the inner path is parameterized, it is
parameterized by the topmost parent of the outer rel, not the
corresponding outer rel itself. Therefore, we need to translate the
parameterization so that the inner path is parameterized by the
corresponding outer rel.
Up to now, we did this while generating join paths. However, that's
problematic because we must also translate some expressions that are
shared across all paths for a relation, such as restriction clauses
(kept in the RelOptInfo and/or IndexOptInfo) and TableSampleClauses
(kept in the RangeTblEntry). The existing code fails to translate
these at all, leading to wrong answers, odd failures such as
"variable not found in subplan target list", or executor crashes.
But we can't modify them during path generation, because that would
break things if we end up choosing some non-partitioned-join path.
So this patch postpones reparameterization of the inner path until
createplan.c, where it is safe to modify the referenced RangeTblEntry,
RelOptInfo or IndexOptInfo, because we have made a final choice of which
Path to use. We do still have to check during path generation that
the reparameterization will be possible. So we introduce a new
function path_is_reparameterizable_by_child() to detect that.
The duplication between path_is_reparameterizable_by_child() and
reparameterize_path_by_child() is a bit annoying, but there seems
no other good answer. A small benefit is that we can avoid building
useless reparameterized trees in cases where a non-partitioned join
is ultimately chosen. Also, reparameterize_path_by_child() can now
be allowed to scribble on the input paths, saving a few cycles.
This fix repairs the same problems previously addressed in the
back branches by commits
|
2 years ago |
![]() |
2cca95e175 |
Improve NestLoopParam generation for lateral subqueries
It was possible in cases where we had a LATERAL joined subquery that when the same Var is mentioned in both the lateral references and in the outer Vars of the scan clauses that the given Var wouldn't be assigned to the same NestLoopParam. This could cause issues in Memoize as the cache key would reference the Var for the scan clauses but when the parameter for the lateral references changed some code in Memoize would see that some other parameter had changed that's not part of the cache key and end up purging the entire cache as a result, thinking the cache had become stale. This could result in a Nested Loop -> Memoize plan being quite inefficient as, in the worst case, the cache purging could result in never getting a cache hit. In no cases could this problem lead to incorrect query results. Here we switch the order of operations so that we create NestLoopParam for the lateral references first before doing replace_nestloop_params(). replace_nestloop_params() will find and reuse the existing NestLoopParam in cases where the Var exists in both locations. Author: Richard Guo Reviewed-by: Tom Lane, David Rowley Discussion: https://postgr.es/m/CAMbWs48XHJEK1Q1CzAQ7L9sTANTs9W1cepXu8%3DKc0quUL%2Btg4Q%40mail.gmail.com |
2 years ago |
![]() |
29275b1d17 |
Update copyright for 2024
Reported-by: Michael Paquier Discussion: https://postgr.es/m/ZZKTDPxBBMt3C0J9@paquier.xyz Backpatch-through: 12 |
2 years ago |
![]() |
9e9931d2bf |
Re-allow FDWs and custom scan providers to replace joins with pseudoconstant quals.
This was disabled in commit
|
2 years ago |
![]() |
e08d74ca13 |
Allow plan nodes with initPlans to be considered parallel-safe.
If the plan itself is parallel-safe, and the initPlans are too, there's no reason anymore to prevent the plan from being marked parallel-safe. That restriction (dating to commit |
2 years ago |
![]() |
c65102006b |
Remove redundant PARTITION BY columns from WindowClauses
Here we adjust the query planner to have it remove items from a window
clause's PARTITION BY clause in cases where the pathkey for a column in
the PARTITION BY clause is redundant.
Doing this allows the optimization added in
|
2 years ago |
![]() |
5472743d9e
|
Revert "Move PartitionPruneInfo out of plan nodes into PlannedStmt"
This reverts commit |
2 years ago |
![]() |
9bfd2822b3 |
Enable use of Memoize atop an Append that came from UNION ALL.
create_append_path() would only apply get_baserel_parampathinfo when the path is for a partitioned table, but it's also potentially useful for paths for UNION ALL appendrels. Specifically, that supports building a Memoize path atop this one. While we're in the vicinity, delete some dead code in create_merge_append_plan(): there's no need for it to support parameterized MergeAppend paths, and it doesn't look like that is going to change anytime soon. It'll be easy enough to undo this when/if it becomes useful. Richard Guo Discussion: https://postgr.es/m/CAMbWs4_ABSu4PWG2rE1q10tJugEXHWgru3U8dAgkoFvgrb6aEA@mail.gmail.com |
3 years ago |
![]() |
c6c3b3bc3d |
Remove gratuitous assumptions about what make_modifytable can see.
For no clearly good reason, make_modifytable assumed that it could not reach its get-the-FDW-info-the-hard-way path in MERGE. It's currently possible to demonstrate that assertion failing, which seems to be due to an upstream planner bug; but there's no good reason to do it like this at all. Let's apply the principle of separation of concerns and make the MERGE check separately, after getting or not getting the fdwroutine pointer. Per report from Alexander Lakhin. No test case, since I think the potential test condition will go away soon. Discussion: https://postgr.es/m/36bee393-b351-16ac-93b2-d46d83637e45@gmail.com |
3 years ago |
![]() |
e9aaf06328 |
Remove dead NoMovementScanDirection code
Here remove some dead code from heapgettup() and heapgettup_pagemode() which was trying to support NoMovementScanDirection scans. This code can never be reached as standard_ExecutorRun() never calls ExecutePlan with NoMovementScanDirection. Additionally, plans which were scanning an unordered index would use NoMovementScanDirection rather than ForwardScanDirection. There was no real need for this, so here we adjust this so we use ForwardScanDirection for unordered index scans. A comment in pathnodes.h claimed that NoMovementScanDirection was used for PathKey reasons, but if that was true, it no longer is, per code in build_index_paths(). This does change the non-text format of the EXPLAIN output so that unordered index scans now have a "Forward" scan direction rather than "NoMovement". The text format of EXPLAIN has not changed. Author: Melanie Plageman Reviewed-by: Tom Lane, David Rowley Discussion: https://postgr.es/m/CAAKRu_bvkhka0CZQun28KTqhuUh5ZqY=_T8QEqZqOL02rpi2bw@mail.gmail.com |
3 years ago |
![]() |
3bef56e116 |
Invent "join domains" to replace the below_outer_join hack.
EquivalenceClasses are now understood as applying within a "join domain", which is a set of inner-joined relations (possibly underneath an outer join). We no longer need to treat an EC from below an outer join as a second-class citizen. I have hopes of eventually being able to treat outer-join clauses via EquivalenceClasses, by means of only applying deductions within the EC's join domain. There are still problems in the way of that, though, so for now the reconsider_outer_join_clause logic is still here. I haven't been able to get rid of RestrictInfo.is_pushed_down either, but I wonder if that could be recast using JoinDomains. I had to hack one test case in postgres_fdw.sql to make it still test what it was meant to, because postgres_fdw is inconsistent about how it deals with quals containing non-shippable expressions; see https://postgr.es/m/1691374.1671659838@sss.pgh.pa.us. That should be improved, but I don't think it's within the scope of this patch series. Patch by me; thanks to Richard Guo for review. Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us |
3 years ago |
![]() |
2489d76c49 |
Make Vars be outer-join-aware.
Traditionally we used the same Var struct to represent the value of a table column everywhere in parse and plan trees. This choice predates our support for SQL outer joins, and it's really a pretty bad idea with outer joins, because the Var's value can depend on where it is in the tree: it might go to NULL above an outer join. So expression nodes that are equal() per equalfuncs.c might not represent the same value, which is a huge correctness hazard for the planner. To improve this, decorate Var nodes with a bitmapset showing which outer joins (identified by RTE indexes) may have nulled them at the point in the parse tree where the Var appears. This allows us to trust that equal() Vars represent the same value. A certain amount of klugery is still needed to cope with cases where we re-order two outer joins, but it's possible to make it work without sacrificing that core principle. PlaceHolderVars receive similar decoration for the same reason. In the planner, we include these outer join bitmapsets into the relids that an expression is considered to depend on, and in consequence also add outer-join relids to the relids of join RelOptInfos. This allows us to correctly perceive whether an expression can be calculated above or below a particular outer join. This change affects FDWs that want to plan foreign joins. They *must* follow suit when labeling foreign joins in order to match with the core planner, but for many purposes (if postgres_fdw is any guide) they'd prefer to consider only base relations within the join. To support both requirements, redefine ForeignScan.fs_relids as base+OJ relids, and add a new field fs_base_relids that's set up by the core planner. Large though it is, this commit just does the minimum necessary to install the new mechanisms and get check-world passing again. Follow-up patches will perform some cleanup. (The README additions and comments mention some stuff that will appear in the follow-up.) Patch by me; thanks to Richard Guo for review. Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us |
3 years ago |
![]() |
8d83a5d0a2 |
Remove redundant grouping and DISTINCT columns.
Avoid explicitly grouping by columns that we know are redundant for sorting, for example we need group by only one of x and y in SELECT ... WHERE x = y GROUP BY x, y This comes up more often than you might think, as shown by the changes in the regression tests. It's nearly free to detect too, since we are just piggybacking on the existing logic that detects redundant pathkeys. (In some of the existing plans that change, it's visible that a sort step preceding the grouping step already didn't bother to sort by the redundant column, making the old plan a bit silly-looking.) To do this, build processed_groupClause and processed_distinctClause lists that omit any provably-redundant sort items, and consult those not the originals where relevant. This means that within the planner, one should usually consult root->processed_groupClause or root->processed_distinctClause if one wants to know which columns are to be grouped on; but to check whether grouping or distinct-ing is happening at all, check non-NIL-ness of parse->groupClause or parse->distinctClause. This is comparable to longstanding rules about handling the HAVING clause, so I don't think it'll be a huge maintenance problem. nodeAgg.c also needs minor mods, because it's now possible to generate AGG_PLAIN and AGG_SORTED Agg nodes with zero grouping columns. Patch by me; thanks to Richard Guo and David Rowley for review. Discussion: https://postgr.es/m/185315.1672179489@sss.pgh.pa.us |
3 years ago |
![]() |
c8e1ba736b |
Update copyright for 2023
Backpatch-through: 11 |
3 years ago |
![]() |
ec38694894
|
Move PartitioPruneInfo out of plan nodes into PlannedStmt
The planner will now add a given PartitioPruneInfo to PlannedStmt.partPruneInfos instead of directly to the Append/MergeAppend plan node. What gets set instead in the latter is an index field which points to the list element of PlannedStmt.partPruneInfos containing the PartitioPruneInfo belonging to the plan node. A later commit will make AcquireExecutorLocks() do the initial partition pruning to determine a minimal set of partitions to be locked when validating a plan tree and it will need to consult the PartitioPruneInfos referenced therein to do so. It would be better for the PartitioPruneInfos to be accessible directly than requiring a walk of the plan tree to find them, which is easier when it can be done by simply iterating over PlannedStmt.partPruneInfos. Author: Amit Langote <amitlangote09@gmail.com> Discussion: https://postgr.es/m/CA+HiwqFGkMSge6TgC9KQzde0ohpAycLQuV7ooitEEpbKB0O_mg@mail.gmail.com |
3 years ago |
![]() |
599b33b949
|
Stop accessing checkAsUser via RTE in some cases
A future commit will move the checkAsUser field from RangeTblEntry to a new node that, unlike RTEs, will only be created for tables mentioned in the query but not for the inheritance child relations added to the query by the planner. So, checkAsUser value for a given child relation will have to be obtained by referring to that for its ancestor mentioned in the query. In preparation, it seems better to expand the use of RelOptInfo.userid during planning in place of rte->checkAsUser so that there will be fewer places to adjust for the above change. Given that the child-to-ancestor mapping is not available during the execution of a given "child" ForeignScan node, add a checkAsUser field to ForeignScan to carry the child relation's RelOptInfo.userid. Author: Amit Langote <amitlangote09@gmail.com> Discussion: https://postgr.es/m/CA+HiwqGFCs2uq7VRKi7g+FFKbP6Ea_2_HkgZb2HPhUfaAKT3ng@mail.gmail.com |
3 years ago |
![]() |
cba4e78f35
|
Disallow MERGE cleanly for foreign partitions
While directly targetting a foreign table with MERGE was already expressly forbidden, we failed to catch the case of a partitioned table that has a foreign table as a partition; and the result if you try is an incomprehensible error. Fix that by adding a specific check. Backpatch to 15. Reported-by: Tatsuhiro Nakamori <bt22nakamorit@oss.nttdata.com> Discussion: https://postgr.es/m/bt22nakamorit@oss.nttdata.com |
3 years ago |
![]() |
a601366a46 |
Harmonize more parameter names in bulk.
Make sure that function declarations use names that exactly match the corresponding names from function definitions in optimizer, parser, utility, libpq, and "commands" code, as well as in remaining library code. Do the same for all code related to frontend programs (with the exception of pg_dump/pg_dumpall related code). Like other recent commits that cleaned up function parameter names, this commit was written with help from clang-tidy. Later commits will handle ecpg and pg_dump/pg_dumpall. Author: Peter Geoghegan <pg@bowt.ie> Reviewed-By: David Rowley <dgrowleyml@gmail.com> Discussion: https://postgr.es/m/CAH2-WznJt9CMM9KJTMjJh_zbL5hD9oX44qdJ4aqZtjFi-zA3Tg@mail.gmail.com |
3 years ago |
![]() |
b3ff6c742f |
Use an explicit state flag to control PlaceHolderInfo creation.
Up to now, callers of find_placeholder_info() were required to pass a flag indicating if it's OK to make a new PlaceHolderInfo. That'd be fine if the callers had free choice, but they do not. Once we begin deconstruct_jointree() it's no longer OK to make more PHIs; while callers before that always want to create a PHI if it's not there already. So there's no freedom of action, only the opportunity to cause bugs by creating PHIs too late. Let's get rid of that in favor of adding a state flag PlannerInfo.placeholdersFrozen, which we can set at the point where it's no longer OK to make more PHIs. This patch also simplifies a couple of call sites that were using complicated logic to avoid calling find_placeholder_info() as much as possible. Now that that lookup is O(1) thanks to the previous commit, the extra bitmap manipulations are probably a net negative. Discussion: https://postgr.es/m/1405792.1660677844@sss.pgh.pa.us |
3 years ago |
![]() |
efd0c16bec |
Avoid using list_length() to test for empty list.
The standard way to check for list emptiness is to compare the List pointer to NIL; our list code goes out of its way to ensure that that is the only representation of an empty list. (An acceptable alternative is a plain boolean test for non-null pointer, but explicit mention of NIL is usually preferable.) Various places didn't get that memo and expressed the condition with list_length(), which might not be so bad except that there were such a variety of ways to check it exactly: equal to zero, less than or equal to zero, less than one, yadda yadda. In the name of code readability, let's standardize all those spellings as "list == NIL" or "list != NIL". (There's probably some microscopic efficiency gain too, though few of these look to be at all performance-critical.) A very small number of cases were left as-is because they seemed more consistent with other adjacent list_length tests that way. Peter Smith, with bikeshedding from a number of us Discussion: https://postgr.es/m/CAHut+PtQYe+ENX5KrONMfugf0q6NHg4hR5dAhqEXEc2eefFeig@mail.gmail.com |
3 years ago |
![]() |
c23e3e6beb |
Use list_copy_head() instead of list_truncate(list_copy(...), ...)
Truncating off the end of a freshly copied List is not a very efficient
way of copying the first N elements of a List.
In many of the cases that are updated here, the pattern was only being
used to remove the final element of a List. That's about the best case
for it, but there were many instances where the truncate trimming the List
down much further.
|
3 years ago |
![]() |
a916cb9d5a |
Avoid overflow hazard when clamping group counts to "long int".
Several places in the planner tried to clamp a double value to fit in a "long" by doing (long) Min(x, (double) LONG_MAX); This is subtly incorrect, because it casts LONG_MAX to double and potentially back again. If long is 64 bits then the double value is inexact, and the platform might round it up to LONG_MAX+1 resulting in an overflow and an undesirably negative output. While it's not hard to rewrite the expression into a safe form, let's put it into a common function to reduce the risk of someone doing it wrong in future. In principle this is a bug fix, but since the problem could only manifest with group count estimates exceeding 2^63, it seems unlikely that anyone has actually hit this or will do so anytime soon. We're fixing it mainly to satisfy fuzzer-type tools. That being the case, a HEAD-only fix seems sufficient. Andrey Lepikhov Discussion: https://postgr.es/m/ebbc2efb-7ef9-bf2f-1ada-d6ec48f70e58@postgrespro.ru |
3 years ago |
![]() |
23e7b38bfe |
Pre-beta mechanical code beautification.
Run pgindent, pgperltidy, and reformat-dat-files. I manually fixed a couple of comments that pgindent uglified. |
3 years ago |
![]() |
5c854e7a2c |
Disable asynchronous execution if using gating Result nodes.
mark_async_capable_plan(), which is called from create_append_plan() to
determine whether subplans are async-capable, failed to take into
account that the given subplan created from a given subpath might
include a gating Result node if the subpath is a SubqueryScanPath or
ForeignPath, causing a segmentation fault there when the subplan created
from a SubqueryScanPath includes the Result node, or causing
ExecAsyncRequest() to throw an error about an unrecognized node type
when the subplan created from a ForeignPath includes the Result node,
because in the latter case the Result node was unintentionally
considered as async-capable, but we don't currently support executing
Result nodes asynchronously. Fix by modifying mark_async_capable_plan()
to disable asynchronous execution in such cases. Also, adjust code in
the ProjectionPath case in mark_async_capable_plan(), for consistency
with other cases, and adjust/improve comments there.
is_async_capable_path() added in commit
|
3 years ago |
![]() |
92e7a53752 |
Remove inadequate assertion check in CTE inlining.
inline_cte() expected to find exactly as many references to the target CTE as its cterefcount indicates. While that should be accurate for the tree as emitted by the parser, there are some optimizations that occur upstream of here that could falsify it, notably removal of unused subquery output expressions. Trying to make the accounting 100% accurate seems expensive and doomed to future breakage. It's not really worth it, because all this code is protecting is downstream assumptions that every referenced CTE has a plan. Let's convert those assertions to regular test-and-elog just in case there's some actual problem, and then drop the failing assertion. Per report from Tomas Vondra (thanks also to Richard Guo for analysis). Back-patch to v12 where the faulty code came in. Discussion: https://postgr.es/m/29196a1e-ed47-c7ca-9be2-b1c636816183@enterprisedb.com |
3 years ago |
![]() |
9d9c02ccd1 |
Teach planner and executor about monotonic window funcs
Window functions such as row_number() always return a value higher than the previously returned value for tuples in any given window partition. Traditionally queries such as; SELECT * FROM ( SELECT *, row_number() over (order by c) rn FROM t ) t WHERE rn <= 10; were executed fairly inefficiently. Neither the query planner nor the executor knew that once rn made it to 11 that nothing further would match the outer query's WHERE clause. It would blindly continue until all tuples were exhausted from the subquery. Here we implement means to make the above execute more efficiently. This is done by way of adding a pg_proc.prosupport function to various of the built-in window functions and adding supporting code to allow the support function to inform the planner if the window function is monotonically increasing, monotonically decreasing, both or neither. The planner is then able to make use of that information and possibly allow the executor to short-circuit execution by way of adding a "run condition" to the WindowAgg to allow it to determine if some of its execution work can be skipped. This "run condition" is not like a normal filter. These run conditions are only built using quals comparing values to monotonic window functions. For monotonic increasing functions, quals making use of the btree operators for <, <= and = can be used (assuming the window function column is on the left). You can see here that once such a condition becomes false that a monotonic increasing function could never make it subsequently true again. For monotonically decreasing functions the >, >= and = btree operators for the given type can be used for run conditions. The best-case situation for this is when there is a single WindowAgg node without a PARTITION BY clause. Here when the run condition becomes false the WindowAgg node can simply return NULL. No more tuples will ever match the run condition. It's a little more complex when there is a PARTITION BY clause. In this case, we cannot return NULL as we must still process other partitions. To speed this case up we pull tuples from the outer plan to check if they're from the same partition and simply discard them if they are. When we find a tuple belonging to another partition we start processing as normal again until the run condition becomes false or we run out of tuples to process. When there are multiple WindowAgg nodes to evaluate then this complicates the situation. For intermediate WindowAggs we must ensure we always return all tuples to the calling node. Any filtering done could lead to incorrect results in WindowAgg nodes above. For all intermediate nodes, we can still save some work when the run condition becomes false. We've no need to evaluate the WindowFuncs anymore. Other WindowAgg nodes cannot reference the value of these and these tuples will not appear in the final result anyway. The savings here are small in comparison to what can be saved in the top-level WingowAgg, but still worthwhile. Intermediate WindowAgg nodes never filter out tuples, but here we change WindowAgg so that the top-level WindowAgg filters out tuples that don't match the intermediate WindowAgg node's run condition. Such filters appear in the "Filter" clause in EXPLAIN for the top-level WindowAgg node. Here we add prosupport functions to allow the above to work for; row_number(), rank(), dense_rank(), count(*) and count(expr). It appears technically possible to do the same for min() and max(), however, it seems unlikely to be useful enough, so that's not done here. Bump catversion Author: David Rowley Reviewed-by: Andy Fan, Zhihong Yu Discussion: https://postgr.es/m/CAApHDvqvp3At8++yF8ij06sdcoo1S_b2YoaT9D4Nf+MObzsrLQ@mail.gmail.com |
3 years ago |
![]() |
c2bb02bc2e |
Allow asynchronous execution in more cases.
In commit
|
3 years ago |
![]() |
7103ebb7aa
|
Add support for MERGE SQL command
MERGE performs actions that modify rows in the target table using a source table or query. MERGE provides a single SQL statement that can conditionally INSERT/UPDATE/DELETE rows -- a task that would otherwise require multiple PL statements. For example, MERGE INTO target AS t USING source AS s ON t.tid = s.sid WHEN MATCHED AND t.balance > s.delta THEN UPDATE SET balance = t.balance - s.delta WHEN MATCHED THEN DELETE WHEN NOT MATCHED AND s.delta > 0 THEN INSERT VALUES (s.sid, s.delta) WHEN NOT MATCHED THEN DO NOTHING; MERGE works with regular tables, partitioned tables and inheritance hierarchies, including column and row security enforcement, as well as support for row and statement triggers and transition tables therein. MERGE is optimized for OLTP and is parameterizable, though also useful for large scale ETL/ELT. MERGE is not intended to be used in preference to existing single SQL commands for INSERT, UPDATE or DELETE since there is some overhead. MERGE can be used from PL/pgSQL. MERGE does not support targetting updatable views or foreign tables, and RETURNING clauses are not allowed either. These limitations are likely fixable with sufficient effort. Rewrite rules are also not supported, but it's not clear that we'd want to support them. Author: Pavan Deolasee <pavan.deolasee@gmail.com> Author: Álvaro Herrera <alvherre@alvh.no-ip.org> Author: Amit Langote <amitlangote09@gmail.com> Author: Simon Riggs <simon.riggs@enterprisedb.com> Reviewed-by: Peter Eisentraut <peter.eisentraut@enterprisedb.com> Reviewed-by: Andres Freund <andres@anarazel.de> (earlier versions) Reviewed-by: Peter Geoghegan <pg@bowt.ie> (earlier versions) Reviewed-by: Robert Haas <robertmhaas@gmail.com> (earlier versions) Reviewed-by: Japin Li <japinli@hotmail.com> Reviewed-by: Justin Pryzby <pryzby@telsasoft.com> Reviewed-by: Tomas Vondra <tomas.vondra@enterprisedb.com> Reviewed-by: Zhihong Yu <zyu@yugabyte.com> Discussion: https://postgr.es/m/CANP8+jKitBSrB7oTgT9CY2i1ObfOt36z0XMraQc+Xrz8QB0nXA@mail.gmail.com Discussion: https://postgr.es/m/CAH2-WzkJdBuxj9PO=2QaO9-3h3xGbQPZ34kJH=HukRekwM-GZg@mail.gmail.com Discussion: https://postgr.es/m/20201231134736.GA25392@alvherre.pgsql |
4 years ago |
![]() |
e5691cc917 |
Don't use_physical_tlist for an IOS with non-returnable columns.
createplan.c tries to save a runtime projection step by specifying
a scan plan node's output as being exactly the table's columns, or
index's columns in the case of an index-only scan, if there is not a
reason to do otherwise. This logic did not previously pay attention
to whether an index's columns are returnable. That worked, sort of
accidentally, until commit
|
4 years ago |
![]() |
27b77ecf9f |
Update copyright for 2022
Backpatch-through: 10 |
4 years ago |
![]() |
9a3ddeb519 |
Fix index-only scan plans, take 2.
Commit |
4 years ago |
![]() |
4ace456776 |
Fix index-only scan plans when not all index columns can be returned.
If an index has both returnable and non-returnable columns, and one of the non-returnable columns is an expression using a Var that is in a returnable column, then a query returning that expression could result in an index-only scan plan that attempts to read the non-returnable column, instead of recomputing the expression from the returnable column as intended. To fix, redefine the "indextlist" list of an IndexOnlyScan plan node as containing null Consts in place of any non-returnable columns. This solves the problem by preventing setrefs.c from falsely matching to such entries. The executor is happy since it only cares about the exposed types of the entries, and ruleutils.c doesn't care because a correct plan won't reference those entries. I considered some other ways to prevent setrefs.c from doing the wrong thing, but this way seems good since (a) it allows a very localized fix, (b) it makes the indextlist structure more compact in many cases, and (c) the indextlist is now a more faithful representation of what the index AM will actually produce, viz. nulls for any non-returnable columns. This is easier to hit since we introduced included columns, but it's possible to construct failing examples without that, as per the added regression test. Hence, back-patch to all supported branches. Per bug #17350 from Louis Jachiet. Discussion: https://postgr.es/m/17350-b5bdcf476e5badbb@postgresql.org |
4 years ago |
![]() |
411137a429 |
Flush Memoize cache when non-key parameters change, take 2
It's possible that a subplan below a Memoize node contains a parameter from above the Memoize node. If this parameter changes then cache entries may become out-dated due to the new parameter value. Previously Memoize was mistakenly not aware of this. We fix this here by flushing the cache whenever a parameter that's not part of the cache key changes. Bug: #17213 Reported by: Elvis Pranskevichus Author: David Rowley Discussion: https://postgr.es/m/17213-988ed34b225a2862@postgresql.org Backpatch-through: 14, where Memoize was added |
4 years ago |
![]() |
dad20ad470 |
Revert "Flush Memoize cache when non-key parameters change"
This reverts commit
|
4 years ago |
![]() |
1050048a31 |
Flush Memoize cache when non-key parameters change
It's possible that a subplan below a Memoize node contains a parameter from above the Memoize node. If this parameter changes then cache entries may become out-dated due to the new parameter value. Previously Memoize was mistakenly not aware of this. We fix this here by flushing the cache whenever a parameter that's not part of the cache key changes. Bug: #17213 Reported by: Elvis Pranskevichus Author: David Rowley Discussion: https://postgr.es/m/17213-988ed34b225a2862@postgresql.org Backpatch-through: 14, where Memoize was added |
4 years ago |
![]() |
e502150f7d |
Allow Memoize to operate in binary comparison mode
Memoize would always use the hash equality operator for the cache key types to determine if the current set of parameters were the same as some previously cached set. Certain types such as floating points where -0.0 and +0.0 differ in their binary representation but are classed as equal by the hash equality operator may cause problems as unless the join uses the same operator it's possible that whichever join operator is being used would be able to distinguish the two values. In which case we may accidentally return in the incorrect rows out of the cache. To fix this here we add a binary mode to Memoize to allow it to the current set of parameters to previously cached values by comparing bit-by-bit rather than logically using the hash equality operator. This binary mode is always used for LATERAL joins and it's used for normal joins when any of the join operators are not hashable. Reported-by: Tom Lane Author: David Rowley Discussion: https://postgr.es/m/3004308.1632952496@sss.pgh.pa.us Backpatch-through: 14, where Memoize was added |
4 years ago |
![]() |
e3ec3c00d8 |
Remove arbitrary 64K-or-so limit on rangetable size.
Up to now the size of a query's rangetable has been limited by the constants INNER_VAR et al, which mustn't be equal to any real rangetable index. 65000 doubtless seemed like enough for anybody, and it still is orders of magnitude larger than the number of joins we can realistically handle. However, we need a rangetable entry for each child partition that is (or might be) processed by a query. Queries with a few thousand partitions are getting more realistic, so that the day when that limit becomes a problem is in sight, even if it's not here yet. Hence, let's raise the limit. Rather than just increase the values of INNER_VAR et al, this patch adopts the approach of making them small negative values, so that rangetables could theoretically become as long as INT_MAX. The bulk of the patch is concerned with changing Var.varno and some related variables from "Index" (unsigned int) to plain "int". This is basically cosmetic, with little actual effect other than to help debuggers print their values nicely. As such, I've only bothered with changing places that could actually see INNER_VAR et al, which the parser and most of the planner don't. We do have to be careful in places that are performing less/greater comparisons on varnos, but there are very few such places, other than the IS_SPECIAL_VARNO macro itself. A notable side effect of this patch is that while it used to be possible to add INNER_VAR et al to a Bitmapset, that will now draw an error. I don't see any likelihood that it wouldn't be a bug to include these fake varnos in a bitmapset of real varnos, so I think this is all to the good. Although this touches outfuncs/readfuncs, I don't think a catversion bump is required, since stored rules would never contain Vars with these fake varnos. Andrey Lepikhov and Tom Lane, after a suggestion by Peter Eisentraut Discussion: https://postgr.es/m/43c7f2f5-1e27-27aa-8c65-c91859d15190@postgrespro.ru |
4 years ago |
![]() |
18fea737b5 |
Change NestPath node to contain JoinPath node
This makes the structure of all JoinPath-derived nodes the same, independent of whether they have additional fields. Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce@enterprisedb.com |
4 years ago |
![]() |
2226b4189b |
Change SeqScan node to contain Scan node
This makes the structure of all Scan-derived nodes the same, independent of whether they have additional fields. Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce@enterprisedb.com |
4 years ago |
![]() |
83f4fcc655 |
Change the name of the Result Cache node to Memoize
"Result Cache" was never a great name for this node, but nobody managed to come up with another name that anyone liked enough. That was until David Johnston mentioned "Node Memoization", which Tom Lane revised to just "Memoize". People seem to like "Memoize", so let's do the rename. Reviewed-by: Justin Pryzby Discussion: https://postgr.es/m/20210708165145.GG1176@momjian.us Backpatch-through: 14, where Result Cache was introduced |
4 years ago |
![]() |
955b3e0f92 |
Allow CustomScan providers to say whether they support projections.
Previously, all CustomScan providers had to support projections, but there may be cases where this is inconvenient. Add a flag bit to say if it's supported. Important item for the release notes: this is non-backwards-compatible since the default is now to assume that CustomScan providers can't project, instead of assuming that they can. It's fail-soft, but could result in visible performance penalties due to adding unnecessary Result nodes. Sven Klemm, reviewed by Aleksander Alekseev; some cosmetic fiddling by me. Discussion: https://postgr.es/m/CAMCrgp1kyakOz6c8aKhNDJXjhQ1dEjEnp+6KNT3KxPrjNtsrDg@mail.gmail.com |
4 years ago |
![]() |
6ee41a301e |
Fix mis-planning of repeated application of a projection.
create_projection_plan contains a hidden assumption (here made
explicit by an Assert) that a projection-capable Path will yield a
projection-capable Plan. Unfortunately, that assumption is violated
only a few lines away, by create_projection_plan itself. This means
that two stacked ProjectionPaths can yield an outcome where we try to
jam the upper path's tlist into a non-projection-capable child node,
resulting in an invalid plan.
There isn't any good reason to have stacked ProjectionPaths; indeed the
whole concept is faulty, since the set of Vars/Aggs/etc needed by the
upper one wouldn't necessarily be available in the output of the lower
one, nor could the lower one create such values if they weren't
available from its input. Hence, we can fix this by adjusting
create_projection_path to strip any top-level ProjectionPath from the
subpath it's given. (This amounts to saying "oh, we changed our
minds about what we need to project here".)
The test case added here only fails in v13 and HEAD; before that, we
don't attempt to shove the Sort into the parallel part of the plan,
for reasons that aren't entirely clear to me. However, all the
directly-related code looks generally the same as far back as v11,
where the hazard was introduced (by
|
4 years ago |
![]() |
def5b065ff |
Initial pgindent and pgperltidy run for v14.
Also "make reformat-dat-files". The only change worthy of note is that pgindent messed up the formatting of launcher.c's struct LogicalRepWorkerId, which led me to notice that that struct wasn't used at all anymore, so I just took it out. |
4 years ago |
![]() |
049e1e2edb |
Fix mishandling of resjunk columns in ON CONFLICT ... UPDATE tlists.
It's unusual to have any resjunk columns in an ON CONFLICT ... UPDATE list, but it can happen when MULTIEXPR_SUBLINK SubPlans are present. If it happens, the ON CONFLICT UPDATE code path would end up storing tuples that include the values of the extra resjunk columns. That's fairly harmless in the short run, but if new columns are added to the table then the values would become accessible, possibly leading to malfunctions if they don't match the datatypes of the new columns. This had escaped notice through a confluence of missing sanity checks, including * There's no cross-check that a tuple presented to heap_insert or heap_update matches the table rowtype. While it's difficult to check that fully at reasonable cost, we can easily add assertions that there aren't too many columns. * The output-column-assignment cases in execExprInterp.c lacked any sanity checks on the output column numbers, which seems like an oversight considering there are plenty of assertion checks on input column numbers. Add assertions there too. * We failed to apply nodeModifyTable's ExecCheckPlanOutput() to the ON CONFLICT UPDATE tlist. That wouldn't have caught this specific error, since that function is chartered to ignore resjunk columns; but it sure seems like a bad omission now that we've seen this bug. In HEAD, the right way to fix this is to make the processing of ON CONFLICT UPDATE tlists work the same as regular UPDATE tlists now do, that is don't add "SET x = x" entries, and use ExecBuildUpdateProjection to evaluate the tlist and combine it with old values of the not-set columns. This adds a little complication to ExecBuildUpdateProjection, but allows removal of a comparable amount of now-dead code from the planner. In the back branches, the most expedient solution seems to be to (a) use an output slot for the ON CONFLICT UPDATE projection that actually matches the target table, and then (b) invent a variant of ExecBuildProjectionInfo that can be told to not store values resulting from resjunk columns, so it doesn't try to store into nonexistent columns of the output slot. (We can't simply ignore the resjunk columns altogether; they have to be evaluated for MULTIEXPR_SUBLINK to work.) This works back to v10. In 9.6, projections work much differently and we can't cheaply give them such an option. The 9.6 version of this patch works by inserting a JunkFilter when it's necessary to get rid of resjunk columns. In addition, v11 and up have the reverse problem when trying to perform ON CONFLICT UPDATE on a partitioned table. Through a further oversight, adjust_partition_tlist() discarded resjunk columns when re-ordering the ON CONFLICT UPDATE tlist to match a partition. This accidentally prevented the storing-bogus-tuples problem, but at the cost that MULTIEXPR_SUBLINK cases didn't work, typically crashing if more than one row has to be updated. Fix by preserving resjunk columns in that routine. (I failed to resist the temptation to add more assertions there too, and to do some minor code beautification.) Per report from Andres Freund. Back-patch to all supported branches. Security: CVE-2021-32028 |
4 years ago |
![]() |
3753982441 |
Fix planner failure in some cases of sorting by an aggregate.
An oversight introduced by the incremental-sort patches caused "could not find pathkey item to sort" errors in some situations where a sort key involves an aggregate or window function. The basic problem here is that find_em_expr_usable_for_sorting_rel isn't properly modeling what prepare_sort_from_pathkeys will do later. Rather than hoping we can keep those functions in sync, let's refactor so that they actually share the code for identifying a suitable sort expression. With this refactoring, tlist.c's tlist_member_ignore_relabel is unused. I removed it in HEAD but left it in place in v13, in case any extensions are using it. Per report from Luc Vlaming. Back-patch to v13 where the problem arose. James Coleman and Tom Lane Discussion: https://postgr.es/m/91f3ec99-85a4-fa55-ea74-33f85a5c651f@swarm64.com |
4 years ago |
![]() |
9eacee2e62 |
Add Result Cache executor node (take 2)
Here we add a new executor node type named "Result Cache". The planner can include this node type in the plan to have the executor cache the results from the inner side of parameterized nested loop joins. This allows caching of tuples for sets of parameters so that in the event that the node sees the same parameter values again, it can just return the cached tuples instead of rescanning the inner side of the join all over again. Internally, result cache uses a hash table in order to quickly find tuples that have been previously cached. For certain data sets, this can significantly improve the performance of joins. The best cases for using this new node type are for join problems where a large portion of the tuples from the inner side of the join have no join partner on the outer side of the join. In such cases, hash join would have to hash values that are never looked up, thus bloating the hash table and possibly causing it to multi-batch. Merge joins would have to skip over all of the unmatched rows. If we use a nested loop join with a result cache, then we only cache tuples that have at least one join partner on the outer side of the join. The benefits of using a parameterized nested loop with a result cache increase when there are fewer distinct values being looked up and the number of lookups of each value is large. Also, hash probes to lookup the cache can be much faster than the hash probe in a hash join as it's common that the result cache's hash table is much smaller than the hash join's due to result cache only caching useful tuples rather than all tuples from the inner side of the join. This variation in hash probe performance is more significant when the hash join's hash table no longer fits into the CPU's L3 cache, but the result cache's hash table does. The apparent "random" access of hash buckets with each hash probe can cause a poor L3 cache hit ratio for large hash tables. Smaller hash tables generally perform better. The hash table used for the cache limits itself to not exceeding work_mem * hash_mem_multiplier in size. We maintain a dlist of keys for this cache and when we're adding new tuples and realize we've exceeded the memory budget, we evict cache entries starting with the least recently used ones until we have enough memory to add the new tuples to the cache. For parameterized nested loop joins, we now consider using one of these result cache nodes in between the nested loop node and its inner node. We determine when this might be useful based on cost, which is primarily driven off of what the expected cache hit ratio will be. Estimating the cache hit ratio relies on having good distinct estimates on the nested loop's parameters. For now, the planner will only consider using a result cache for parameterized nested loop joins. This works for both normal joins and also for LATERAL type joins to subqueries. It is possible to use this new node for other uses in the future. For example, to cache results from correlated subqueries. However, that's not done here due to some difficulties obtaining a distinct estimation on the outer plan to calculate the estimated cache hit ratio. Currently we plan the inner plan before planning the outer plan so there is no good way to know if a result cache would be useful or not since we can't estimate the number of times the subplan will be called until the outer plan is generated. The functionality being added here is newly introducing a dependency on the return value of estimate_num_groups() during the join search. Previously, during the join search, we only ever needed to perform selectivity estimations. With this commit, we need to use estimate_num_groups() in order to estimate what the hit ratio on the result cache will be. In simple terms, if we expect 10 distinct values and we expect 1000 outer rows, then we'll estimate the hit ratio to be 99%. Since cache hits are very cheap compared to scanning the underlying nodes on the inner side of the nested loop join, then this will significantly reduce the planner's cost for the join. However, it's fairly easy to see here that things will go bad when estimate_num_groups() incorrectly returns a value that's significantly lower than the actual number of distinct values. If this happens then that may cause us to make use of a nested loop join with a result cache instead of some other join type, such as a merge or hash join. Our distinct estimations have been known to be a source of trouble in the past, so the extra reliance on them here could cause the planner to choose slower plans than it did previous to having this feature. Distinct estimations are also fairly hard to estimate accurately when several tables have been joined already or when a WHERE clause filters out a set of values that are correlated to the expressions we're estimating the number of distinct value for. For now, the costing we perform during query planning for result caches does put quite a bit of faith in the distinct estimations being accurate. When these are accurate then we should generally see faster execution times for plans containing a result cache. However, in the real world, we may find that we need to either change the costings to put less trust in the distinct estimations being accurate or perhaps even disable this feature by default. There's always an element of risk when we teach the query planner to do new tricks that it decides to use that new trick at the wrong time and causes a regression. Users may opt to get the old behavior by turning the feature off using the enable_resultcache GUC. Currently, this is enabled by default. It remains to be seen if we'll maintain that setting for the release. Additionally, the name "Result Cache" is the best name I could think of for this new node at the time I started writing the patch. Nobody seems to strongly dislike the name. A few people did suggest other names but no other name seemed to dominate in the brief discussion that there was about names. Let's allow the beta period to see if the current name pleases enough people. If there's some consensus on a better name, then we can change it before the release. Please see the 2nd discussion link below for the discussion on the "Result Cache" name. Author: David Rowley Reviewed-by: Andy Fan, Justin Pryzby, Zhihong Yu, Hou Zhijie Tested-By: Konstantin Knizhnik Discussion: https://postgr.es/m/CAApHDvrPcQyQdWERGYWx8J%2B2DLUNgXu%2BfOSbQ1UscxrunyXyrQ%40mail.gmail.com Discussion: https://postgr.es/m/CAApHDvq=yQXr5kqhRviT2RhNKwToaWr9JAN5t+5_PzhuRJ3wvg@mail.gmail.com |
5 years ago |
![]() |
28b3e3905c |
Revert b6002a796
This removes "Add Result Cache executor node". It seems that something weird is going on with the tracking of cache hits and misses as highlighted by many buildfarm animals. It's not yet clear what the problem is as other parts of the plan indicate that the cache did work correctly, it's just the hits and misses that were being reported as 0. This is especially a bad time to have the buildfarm so broken, so reverting before too many more animals go red. Discussion: https://postgr.es/m/CAApHDvq_hydhfovm4=izgWs+C5HqEeRScjMbOgbpC-jRAeK3Yw@mail.gmail.com |
5 years ago |