mirror of https://github.com/postgres/postgres
Tag:
Branch:
Tree:
a6bc330192
REL2_0B
REL6_4
REL6_5_PATCHES
REL7_0_PATCHES
REL7_1_STABLE
REL7_2_STABLE
REL7_3_STABLE
REL7_4_STABLE
REL8_0_STABLE
REL8_1_STABLE
REL8_2_STABLE
REL8_3_STABLE
REL8_4_STABLE
REL8_5_ALPHA1_BRANCH
REL8_5_ALPHA2_BRANCH
REL8_5_ALPHA3_BRANCH
REL9_0_ALPHA4_BRANCH
REL9_0_ALPHA5_BRANCH
REL9_0_STABLE
REL9_1_STABLE
REL9_2_STABLE
REL9_3_STABLE
REL9_4_STABLE
REL9_5_STABLE
REL9_6_STABLE
REL_10_STABLE
REL_11_STABLE
REL_12_STABLE
REL_13_STABLE
REL_14_STABLE
REL_15_STABLE
REL_16_STABLE
REL_17_STABLE
Release_1_0_3
WIN32_DEV
ecpg_big_bison
master
PG95-1_01
PG95-1_08
PG95-1_09
REL2_0
REL6_1
REL6_1_1
REL6_2
REL6_2_1
REL6_3
REL6_3_2
REL6_4_2
REL6_5
REL6_5_1
REL6_5_2
REL6_5_3
REL7_0
REL7_0_2
REL7_0_3
REL7_1
REL7_1_1
REL7_1_2
REL7_1_3
REL7_1_BETA
REL7_1_BETA2
REL7_1_BETA3
REL7_2
REL7_2_1
REL7_2_2
REL7_2_3
REL7_2_4
REL7_2_5
REL7_2_6
REL7_2_7
REL7_2_8
REL7_2_BETA1
REL7_2_BETA2
REL7_2_BETA3
REL7_2_BETA4
REL7_2_BETA5
REL7_2_RC1
REL7_2_RC2
REL7_3
REL7_3_1
REL7_3_10
REL7_3_11
REL7_3_12
REL7_3_13
REL7_3_14
REL7_3_15
REL7_3_16
REL7_3_17
REL7_3_18
REL7_3_19
REL7_3_2
REL7_3_20
REL7_3_21
REL7_3_3
REL7_3_4
REL7_3_5
REL7_3_6
REL7_3_7
REL7_3_8
REL7_3_9
REL7_4
REL7_4_1
REL7_4_10
REL7_4_11
REL7_4_12
REL7_4_13
REL7_4_14
REL7_4_15
REL7_4_16
REL7_4_17
REL7_4_18
REL7_4_19
REL7_4_2
REL7_4_20
REL7_4_21
REL7_4_22
REL7_4_23
REL7_4_24
REL7_4_25
REL7_4_26
REL7_4_27
REL7_4_28
REL7_4_29
REL7_4_3
REL7_4_30
REL7_4_4
REL7_4_5
REL7_4_6
REL7_4_7
REL7_4_8
REL7_4_9
REL7_4_BETA1
REL7_4_BETA2
REL7_4_BETA3
REL7_4_BETA4
REL7_4_BETA5
REL7_4_RC1
REL7_4_RC2
REL8_0_0
REL8_0_0BETA1
REL8_0_0BETA2
REL8_0_0BETA3
REL8_0_0BETA4
REL8_0_0BETA5
REL8_0_0RC1
REL8_0_0RC2
REL8_0_0RC3
REL8_0_0RC4
REL8_0_0RC5
REL8_0_1
REL8_0_10
REL8_0_11
REL8_0_12
REL8_0_13
REL8_0_14
REL8_0_15
REL8_0_16
REL8_0_17
REL8_0_18
REL8_0_19
REL8_0_2
REL8_0_20
REL8_0_21
REL8_0_22
REL8_0_23
REL8_0_24
REL8_0_25
REL8_0_26
REL8_0_3
REL8_0_4
REL8_0_5
REL8_0_6
REL8_0_7
REL8_0_8
REL8_0_9
REL8_1_0
REL8_1_0BETA1
REL8_1_0BETA2
REL8_1_0BETA3
REL8_1_0BETA4
REL8_1_0RC1
REL8_1_1
REL8_1_10
REL8_1_11
REL8_1_12
REL8_1_13
REL8_1_14
REL8_1_15
REL8_1_16
REL8_1_17
REL8_1_18
REL8_1_19
REL8_1_2
REL8_1_20
REL8_1_21
REL8_1_22
REL8_1_23
REL8_1_3
REL8_1_4
REL8_1_5
REL8_1_6
REL8_1_7
REL8_1_8
REL8_1_9
REL8_2_0
REL8_2_1
REL8_2_10
REL8_2_11
REL8_2_12
REL8_2_13
REL8_2_14
REL8_2_15
REL8_2_16
REL8_2_17
REL8_2_18
REL8_2_19
REL8_2_2
REL8_2_20
REL8_2_21
REL8_2_22
REL8_2_23
REL8_2_3
REL8_2_4
REL8_2_5
REL8_2_6
REL8_2_7
REL8_2_8
REL8_2_9
REL8_2_BETA1
REL8_2_BETA2
REL8_2_BETA3
REL8_2_RC1
REL8_3_0
REL8_3_1
REL8_3_10
REL8_3_11
REL8_3_12
REL8_3_13
REL8_3_14
REL8_3_15
REL8_3_16
REL8_3_17
REL8_3_18
REL8_3_19
REL8_3_2
REL8_3_20
REL8_3_21
REL8_3_22
REL8_3_23
REL8_3_3
REL8_3_4
REL8_3_5
REL8_3_6
REL8_3_7
REL8_3_8
REL8_3_9
REL8_3_BETA1
REL8_3_BETA2
REL8_3_BETA3
REL8_3_BETA4
REL8_3_RC1
REL8_3_RC2
REL8_4_0
REL8_4_1
REL8_4_10
REL8_4_11
REL8_4_12
REL8_4_13
REL8_4_14
REL8_4_15
REL8_4_16
REL8_4_17
REL8_4_18
REL8_4_19
REL8_4_2
REL8_4_20
REL8_4_21
REL8_4_22
REL8_4_3
REL8_4_4
REL8_4_5
REL8_4_6
REL8_4_7
REL8_4_8
REL8_4_9
REL8_4_BETA1
REL8_4_BETA2
REL8_4_RC1
REL8_4_RC2
REL8_5_ALPHA1
REL8_5_ALPHA2
REL8_5_ALPHA3
REL9_0_0
REL9_0_1
REL9_0_10
REL9_0_11
REL9_0_12
REL9_0_13
REL9_0_14
REL9_0_15
REL9_0_16
REL9_0_17
REL9_0_18
REL9_0_19
REL9_0_2
REL9_0_20
REL9_0_21
REL9_0_22
REL9_0_23
REL9_0_3
REL9_0_4
REL9_0_5
REL9_0_6
REL9_0_7
REL9_0_8
REL9_0_9
REL9_0_ALPHA4
REL9_0_ALPHA5
REL9_0_BETA1
REL9_0_BETA2
REL9_0_BETA3
REL9_0_BETA4
REL9_0_RC1
REL9_1_0
REL9_1_1
REL9_1_10
REL9_1_11
REL9_1_12
REL9_1_13
REL9_1_14
REL9_1_15
REL9_1_16
REL9_1_17
REL9_1_18
REL9_1_19
REL9_1_2
REL9_1_20
REL9_1_21
REL9_1_22
REL9_1_23
REL9_1_24
REL9_1_3
REL9_1_4
REL9_1_5
REL9_1_6
REL9_1_7
REL9_1_8
REL9_1_9
REL9_1_ALPHA1
REL9_1_ALPHA2
REL9_1_ALPHA3
REL9_1_ALPHA4
REL9_1_ALPHA5
REL9_1_BETA1
REL9_1_BETA2
REL9_1_BETA3
REL9_1_RC1
REL9_2_0
REL9_2_1
REL9_2_10
REL9_2_11
REL9_2_12
REL9_2_13
REL9_2_14
REL9_2_15
REL9_2_16
REL9_2_17
REL9_2_18
REL9_2_19
REL9_2_2
REL9_2_20
REL9_2_21
REL9_2_22
REL9_2_23
REL9_2_24
REL9_2_3
REL9_2_4
REL9_2_5
REL9_2_6
REL9_2_7
REL9_2_8
REL9_2_9
REL9_2_BETA1
REL9_2_BETA2
REL9_2_BETA3
REL9_2_BETA4
REL9_2_RC1
REL9_3_0
REL9_3_1
REL9_3_10
REL9_3_11
REL9_3_12
REL9_3_13
REL9_3_14
REL9_3_15
REL9_3_16
REL9_3_17
REL9_3_18
REL9_3_19
REL9_3_2
REL9_3_20
REL9_3_21
REL9_3_22
REL9_3_23
REL9_3_24
REL9_3_25
REL9_3_3
REL9_3_4
REL9_3_5
REL9_3_6
REL9_3_7
REL9_3_8
REL9_3_9
REL9_3_BETA1
REL9_3_BETA2
REL9_3_RC1
REL9_4_0
REL9_4_1
REL9_4_10
REL9_4_11
REL9_4_12
REL9_4_13
REL9_4_14
REL9_4_15
REL9_4_16
REL9_4_17
REL9_4_18
REL9_4_19
REL9_4_2
REL9_4_20
REL9_4_21
REL9_4_22
REL9_4_23
REL9_4_24
REL9_4_25
REL9_4_26
REL9_4_3
REL9_4_4
REL9_4_5
REL9_4_6
REL9_4_7
REL9_4_8
REL9_4_9
REL9_4_BETA1
REL9_4_BETA2
REL9_4_BETA3
REL9_4_RC1
REL9_5_0
REL9_5_1
REL9_5_10
REL9_5_11
REL9_5_12
REL9_5_13
REL9_5_14
REL9_5_15
REL9_5_16
REL9_5_17
REL9_5_18
REL9_5_19
REL9_5_2
REL9_5_20
REL9_5_21
REL9_5_22
REL9_5_23
REL9_5_24
REL9_5_25
REL9_5_3
REL9_5_4
REL9_5_5
REL9_5_6
REL9_5_7
REL9_5_8
REL9_5_9
REL9_5_ALPHA1
REL9_5_ALPHA2
REL9_5_BETA1
REL9_5_BETA2
REL9_5_RC1
REL9_6_0
REL9_6_1
REL9_6_10
REL9_6_11
REL9_6_12
REL9_6_13
REL9_6_14
REL9_6_15
REL9_6_16
REL9_6_17
REL9_6_18
REL9_6_19
REL9_6_2
REL9_6_20
REL9_6_21
REL9_6_22
REL9_6_23
REL9_6_24
REL9_6_3
REL9_6_4
REL9_6_5
REL9_6_6
REL9_6_7
REL9_6_8
REL9_6_9
REL9_6_BETA1
REL9_6_BETA2
REL9_6_BETA3
REL9_6_BETA4
REL9_6_RC1
REL_10_0
REL_10_1
REL_10_10
REL_10_11
REL_10_12
REL_10_13
REL_10_14
REL_10_15
REL_10_16
REL_10_17
REL_10_18
REL_10_19
REL_10_2
REL_10_20
REL_10_21
REL_10_22
REL_10_23
REL_10_3
REL_10_4
REL_10_5
REL_10_6
REL_10_7
REL_10_8
REL_10_9
REL_10_BETA1
REL_10_BETA2
REL_10_BETA3
REL_10_BETA4
REL_10_RC1
REL_11_0
REL_11_1
REL_11_10
REL_11_11
REL_11_12
REL_11_13
REL_11_14
REL_11_15
REL_11_16
REL_11_17
REL_11_18
REL_11_19
REL_11_2
REL_11_20
REL_11_21
REL_11_22
REL_11_3
REL_11_4
REL_11_5
REL_11_6
REL_11_7
REL_11_8
REL_11_9
REL_11_BETA1
REL_11_BETA2
REL_11_BETA3
REL_11_BETA4
REL_11_RC1
REL_12_0
REL_12_1
REL_12_10
REL_12_11
REL_12_12
REL_12_13
REL_12_14
REL_12_15
REL_12_16
REL_12_17
REL_12_18
REL_12_19
REL_12_2
REL_12_20
REL_12_21
REL_12_22
REL_12_3
REL_12_4
REL_12_5
REL_12_6
REL_12_7
REL_12_8
REL_12_9
REL_12_BETA1
REL_12_BETA2
REL_12_BETA3
REL_12_BETA4
REL_12_RC1
REL_13_0
REL_13_1
REL_13_10
REL_13_11
REL_13_12
REL_13_13
REL_13_14
REL_13_15
REL_13_16
REL_13_17
REL_13_18
REL_13_19
REL_13_2
REL_13_20
REL_13_21
REL_13_3
REL_13_4
REL_13_5
REL_13_6
REL_13_7
REL_13_8
REL_13_9
REL_13_BETA1
REL_13_BETA2
REL_13_BETA3
REL_13_RC1
REL_14_0
REL_14_1
REL_14_10
REL_14_11
REL_14_12
REL_14_13
REL_14_14
REL_14_15
REL_14_16
REL_14_17
REL_14_18
REL_14_2
REL_14_3
REL_14_4
REL_14_5
REL_14_6
REL_14_7
REL_14_8
REL_14_9
REL_14_BETA1
REL_14_BETA2
REL_14_BETA3
REL_14_RC1
REL_15_0
REL_15_1
REL_15_10
REL_15_11
REL_15_12
REL_15_13
REL_15_2
REL_15_3
REL_15_4
REL_15_5
REL_15_6
REL_15_7
REL_15_8
REL_15_9
REL_15_BETA1
REL_15_BETA2
REL_15_BETA3
REL_15_BETA4
REL_15_RC1
REL_15_RC2
REL_16_0
REL_16_1
REL_16_2
REL_16_3
REL_16_4
REL_16_5
REL_16_6
REL_16_7
REL_16_8
REL_16_9
REL_16_BETA1
REL_16_BETA2
REL_16_BETA3
REL_16_RC1
REL_17_0
REL_17_1
REL_17_2
REL_17_3
REL_17_4
REL_17_5
REL_17_BETA1
REL_17_BETA2
REL_17_BETA3
REL_17_RC1
REL_18_BETA1
Release_1_0_2
Release_2_0
Release_2_0_0
release-6-3
${ noResults }
446 Commits (a6bc3301925e1a8ad1f58da629b9dd55bc4b8d9c)
Author | SHA1 | Message | Date |
---|---|---|---|
![]() |
a6bc330192 |
Add read support for some missing raw parse nodes
The node types A_Const, Constraint, and A_Expr had custom output functions, but no read functions were implemented so far. The A_Expr output format had to be tweaked a bit to make it easier to parse. Be a bit more cautious about applying strncmp to unterminated strings. Also error out if an unrecognized enum value is found in each case, instead of just printing a placeholder value. That was maybe ok for debugging but won't work if we want to have robust round-tripping. Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us> Discussion: https://www.postgresql.org/message-id/flat/4159834.1657405226@sss.pgh.pa.us |
3 years ago |
![]() |
2d04277121 |
Make serialization of Nodes' scalar-array fields more robust.
When the ability to print variable-length-array fields was first added to outfuncs.c, there was no corresponding read capability, as it was used only for debug dumps of planner-internal Nodes. Not a lot of thought seems to have been put into the output format: it's just the space-separated array elements and nothing else. Later such fields appeared in Plan nodes, and still later we grew read support so that Plans could be transferred to parallel workers, but the original text format wasn't rethought. It seems inadequate to me because (a) no cross-check is possible that we got the right number of array entries, (b) we can't tell the difference between a NULL pointer and a zero-length array, and (c) except for WRITE_INDEX_ARRAY, we'd crash if a non-zero length is specified when the pointer is NULL, a situation that can arise in some fields that we currently conveniently avoid printing. Since we're currently in a campaign to make the Node infrastructure generally more it-just-works-without-thinking-about-it, now seems like a good time to improve this. Let's adopt a format similar to that used for Lists, that is "<>" for a NULL pointer or "(item item item)" otherwise. Also retool the code to not have so many copies of the identical logic. I bumped catversion out of an abundance of caution, although I think that we don't use any such array fields in Nodes that can get into the catalogs. Discussion: https://postgr.es/m/1528424.1658272135@sss.pgh.pa.us |
3 years ago |
![]() |
ff33a8c887 |
Remove artificial restrictions on which node types have out/read funcs.
The initial version of gen_node_support.pl manually excluded most utility statement node types from having out/read support, and also some raw-parse-tree-only node types. That was mostly to keep the output comparable to the old hand-maintained code. We'd like to have out/read support for utility statements, for debugging purposes and so that they can be included in new-style SQL functions; so it's time to lift that restriction. Most if not all of the previously-excluded raw-parse-tree-only node types can appear in expression subtrees of utility statements, so they have to be handled too. We don't quite have full read support yet; certain custom_read_write node types need to have their handwritten read functions implemented before that will work. Doing this allows us to drop the previous hack in _outQuery to not dump the utilityStmt field in most cases, which means we no longer need manually-maintained out/read functions for Query, so get rid of those in favor of auto-generating them. Fix a couple of omissions in gen_node_support.pl that are exposed through having to handle more node types. catversion bump forced because somebody was sloppy about the field order in the manually-maintained Query out/read functions. (Committers should note that almost all changes in parsenodes.h are now grounds for a catversion bump.) |
3 years ago |
![]() |
3cd0ac9878 |
Doc: rearrange high-level commentary about node support coverage.
copyfuncs.c and friends no longer seem like great places to put
high-level remarks about what's covered and what isn't. Move that
material to backend/nodes/README and other more-prominent places.
Add back (versions of) some remarks that disappeared in
|
3 years ago |
![]() |
2be87f092a |
Remove code sections obsoleted by node support automation
This removes the code sections that were ifdef'ed out by
|
3 years ago |
![]() |
964d01ae90 |
Automatically generate node support functions
Add a script to automatically generate the node support functions (copy, equal, out, and read, as well as the node tags enum) from the struct definitions. For each of the four node support files, it creates two include files, e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main file. All the scaffolding of the main file stays in place. I have tried to mostly make the coverage of the output match what is currently there. For example, one could now do out/read coverage of utility statement nodes, but I have manually excluded those for now. The reason is mainly that it's easier to diff the before and after, and adding a bunch of stuff like this might require a separate analysis and review. Subtyping (TidScan -> Scan) is supported. For the hard cases, you can just write a manual function and exclude generating one. For the not so hard cases, there is a way of annotating struct fields to get special behaviors. For example, pg_node_attr(equal_ignore) has the field ignored in equal functions. (In this patch, I have only ifdef'ed out the code to could be removed, mainly so that it won't constantly have merge conflicts. It will be deleted in a separate patch. All the code comments that are worth keeping from those sections have already been moved to the header files where the structs are defined.) Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us> Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com |
3 years ago |
![]() |
251154bebe |
Remove T_Join and T_Plan
These are abstract node types that don't need to have a node tag defined. Discussion: https://www.postgresql.org/message-id/2592455.1657140387%40sss.pgh.pa.us |
3 years ago |
![]() |
55b8ac8172 |
Fix wrong field order in _readMergeWhenClause().
We hadn't noticed this because it's dead code: there is no situation where we read raw parse trees from text format. So maybe the right fix is to remove the function altogether, but I'll forbear for now; it's not the only dead code in readfuncs.c, I think. Noted while comparing existing code to the results of Peter's auto-generation script. |
3 years ago |
![]() |
3ab9a63cb6 |
Rename JsonIsPredicate.value_type, fix JSON backend/nodes/ infrastructure.
I started out with the intention to rename value_type to item_type to avoid a collision with a typedef name that appears on some platforms. Along the way, I noticed that the adjacent field "format" was not being correctly handled by the backend/nodes/ infrastructure functions: copyfuncs.c erroneously treated it as a scalar, while equalfuncs, outfuncs, and readfuncs omitted handling it at all. This looks like it might be cosmetic at the moment because the field is always NULL after parse analysis; but that's likely a bug in itself, and the code's certainly not very future-proof. Let's fix it while we can still do so without forcing an initdb on beta testers. Further study found a few other inconsistencies in the backend/nodes/ infrastructure for the recently-added JSON node types, so fix those too. catversion bumped because of potential change in stored rules. Discussion: https://postgr.es/m/526703.1652385613@sss.pgh.pa.us |
3 years ago |
![]() |
51e8179405 |
Make node output prefix match node structure name
as done in
|
3 years ago |
![]() |
9d9c02ccd1 |
Teach planner and executor about monotonic window funcs
Window functions such as row_number() always return a value higher than the previously returned value for tuples in any given window partition. Traditionally queries such as; SELECT * FROM ( SELECT *, row_number() over (order by c) rn FROM t ) t WHERE rn <= 10; were executed fairly inefficiently. Neither the query planner nor the executor knew that once rn made it to 11 that nothing further would match the outer query's WHERE clause. It would blindly continue until all tuples were exhausted from the subquery. Here we implement means to make the above execute more efficiently. This is done by way of adding a pg_proc.prosupport function to various of the built-in window functions and adding supporting code to allow the support function to inform the planner if the window function is monotonically increasing, monotonically decreasing, both or neither. The planner is then able to make use of that information and possibly allow the executor to short-circuit execution by way of adding a "run condition" to the WindowAgg to allow it to determine if some of its execution work can be skipped. This "run condition" is not like a normal filter. These run conditions are only built using quals comparing values to monotonic window functions. For monotonic increasing functions, quals making use of the btree operators for <, <= and = can be used (assuming the window function column is on the left). You can see here that once such a condition becomes false that a monotonic increasing function could never make it subsequently true again. For monotonically decreasing functions the >, >= and = btree operators for the given type can be used for run conditions. The best-case situation for this is when there is a single WindowAgg node without a PARTITION BY clause. Here when the run condition becomes false the WindowAgg node can simply return NULL. No more tuples will ever match the run condition. It's a little more complex when there is a PARTITION BY clause. In this case, we cannot return NULL as we must still process other partitions. To speed this case up we pull tuples from the outer plan to check if they're from the same partition and simply discard them if they are. When we find a tuple belonging to another partition we start processing as normal again until the run condition becomes false or we run out of tuples to process. When there are multiple WindowAgg nodes to evaluate then this complicates the situation. For intermediate WindowAggs we must ensure we always return all tuples to the calling node. Any filtering done could lead to incorrect results in WindowAgg nodes above. For all intermediate nodes, we can still save some work when the run condition becomes false. We've no need to evaluate the WindowFuncs anymore. Other WindowAgg nodes cannot reference the value of these and these tuples will not appear in the final result anyway. The savings here are small in comparison to what can be saved in the top-level WingowAgg, but still worthwhile. Intermediate WindowAgg nodes never filter out tuples, but here we change WindowAgg so that the top-level WindowAgg filters out tuples that don't match the intermediate WindowAgg node's run condition. Such filters appear in the "Filter" clause in EXPLAIN for the top-level WindowAgg node. Here we add prosupport functions to allow the above to work for; row_number(), rank(), dense_rank(), count(*) and count(expr). It appears technically possible to do the same for min() and max(), however, it seems unlikely to be useful enough, so that's not done here. Bump catversion Author: David Rowley Reviewed-by: Andy Fan, Zhihong Yu Discussion: https://postgr.es/m/CAApHDvqvp3At8++yF8ij06sdcoo1S_b2YoaT9D4Nf+MObzsrLQ@mail.gmail.com |
3 years ago |
![]() |
c2bb02bc2e |
Allow asynchronous execution in more cases.
In commit
|
3 years ago |
![]() |
fadb48b00e |
PLAN clauses for JSON_TABLE
These clauses allow the user to specify how data from nested paths are joined, allowing considerable freedom in shaping the tabular output of JSON_TABLE. PLAN DEFAULT allows the user to specify the global strategies when dealing with sibling or child nested paths. The is often sufficient to achieve the necessary goal, and is considerably simpler than the full PLAN clause, which allows the user to specify the strategy to be used for each named nested path. Nikita Glukhov Reviewers have included (in no particular order) Andres Freund, Alexander Korotkov, Pavel Stehule, Andrew Alsup, Erik Rijkers, Zhihong Yu, Himanshu Upadhyaya, Daniel Gustafsson, Justin Pryzby. Discussion: https://postgr.es/m/7e2cb85d-24cf-4abb-30a5-1a33715959bd@postgrespro.ru |
3 years ago |
![]() |
4e34747c88 |
JSON_TABLE
This feature allows jsonb data to be treated as a table and thus used in a FROM clause like other tabular data. Data can be selected from the jsonb using jsonpath expressions, and hoisted out of nested structures in the jsonb to form multiple rows, more or less like an outer join. Nikita Glukhov Reviewers have included (in no particular order) Andres Freund, Alexander Korotkov, Pavel Stehule, Andrew Alsup, Erik Rijkers, Zhihong Yu (whose name I previously misspelled), Himanshu Upadhyaya, Daniel Gustafsson, Justin Pryzby. Discussion: https://postgr.es/m/7e2cb85d-24cf-4abb-30a5-1a33715959bd@postgrespro.ru |
3 years ago |
![]() |
1a36bc9dba |
SQL/JSON query functions
This introduces the SQL/JSON functions for querying JSON data using jsonpath expressions. The functions are: JSON_EXISTS() JSON_QUERY() JSON_VALUE() All of these functions only operate on jsonb. The workaround for now is to cast the argument to jsonb. JSON_EXISTS() tests if the jsonpath expression applied to the jsonb value yields any values. JSON_VALUE() must return a single value, and an error occurs if it tries to return multiple values. JSON_QUERY() must return a json object or array, and there are various WRAPPER options for handling scalar or multi-value results. Both these functions have options for handling EMPTY and ERROR conditions. Nikita Glukhov Reviewers have included (in no particular order) Andres Freund, Alexander Korotkov, Pavel Stehule, Andrew Alsup, Erik Rijkers, Zihong Yu, Himanshu Upadhyaya, Daniel Gustafsson, Justin Pryzby. Discussion: https://postgr.es/m/cd0bb935-0158-78a7-08b5-904886deac4b@postgrespro.ru |
3 years ago |
![]() |
33a377608f |
IS JSON predicate
This patch intrdocuces the SQL standard IS JSON predicate. It operates on text and bytea values representing JSON as well as on the json and jsonb types. Each test has an IS and IS NOT variant. The tests are: IS JSON [VALUE] IS JSON ARRAY IS JSON OBJECT IS JSON SCALAR IS JSON WITH | WITHOUT UNIQUE KEYS These are mostly self-explanatory, but note that IS JSON WITHOUT UNIQUE KEYS is true whenever IS JSON is true, and IS JSON WITH UNIQUE KEYS is true whenever IS JSON is true except it IS JSON OBJECT is true and there are duplicate keys (which is never the case when applied to jsonb values). Nikita Glukhov Reviewers have included (in no particular order) Andres Freund, Alexander Korotkov, Pavel Stehule, Andrew Alsup, Erik Rijkers, Zihong Yu, Himanshu Upadhyaya, Daniel Gustafsson, Justin Pryzby. Discussion: https://postgr.es/m/cd0bb935-0158-78a7-08b5-904886deac4b@postgrespro.ru |
3 years ago |
![]() |
7103ebb7aa
|
Add support for MERGE SQL command
MERGE performs actions that modify rows in the target table using a source table or query. MERGE provides a single SQL statement that can conditionally INSERT/UPDATE/DELETE rows -- a task that would otherwise require multiple PL statements. For example, MERGE INTO target AS t USING source AS s ON t.tid = s.sid WHEN MATCHED AND t.balance > s.delta THEN UPDATE SET balance = t.balance - s.delta WHEN MATCHED THEN DELETE WHEN NOT MATCHED AND s.delta > 0 THEN INSERT VALUES (s.sid, s.delta) WHEN NOT MATCHED THEN DO NOTHING; MERGE works with regular tables, partitioned tables and inheritance hierarchies, including column and row security enforcement, as well as support for row and statement triggers and transition tables therein. MERGE is optimized for OLTP and is parameterizable, though also useful for large scale ETL/ELT. MERGE is not intended to be used in preference to existing single SQL commands for INSERT, UPDATE or DELETE since there is some overhead. MERGE can be used from PL/pgSQL. MERGE does not support targetting updatable views or foreign tables, and RETURNING clauses are not allowed either. These limitations are likely fixable with sufficient effort. Rewrite rules are also not supported, but it's not clear that we'd want to support them. Author: Pavan Deolasee <pavan.deolasee@gmail.com> Author: Álvaro Herrera <alvherre@alvh.no-ip.org> Author: Amit Langote <amitlangote09@gmail.com> Author: Simon Riggs <simon.riggs@enterprisedb.com> Reviewed-by: Peter Eisentraut <peter.eisentraut@enterprisedb.com> Reviewed-by: Andres Freund <andres@anarazel.de> (earlier versions) Reviewed-by: Peter Geoghegan <pg@bowt.ie> (earlier versions) Reviewed-by: Robert Haas <robertmhaas@gmail.com> (earlier versions) Reviewed-by: Japin Li <japinli@hotmail.com> Reviewed-by: Justin Pryzby <pryzby@telsasoft.com> Reviewed-by: Tomas Vondra <tomas.vondra@enterprisedb.com> Reviewed-by: Zhihong Yu <zyu@yugabyte.com> Discussion: https://postgr.es/m/CANP8+jKitBSrB7oTgT9CY2i1ObfOt36z0XMraQc+Xrz8QB0nXA@mail.gmail.com Discussion: https://postgr.es/m/CAH2-WzkJdBuxj9PO=2QaO9-3h3xGbQPZ34kJH=HukRekwM-GZg@mail.gmail.com Discussion: https://postgr.es/m/20201231134736.GA25392@alvherre.pgsql |
3 years ago |
![]() |
f4fb45d15c |
SQL/JSON constructors
This patch introduces the SQL/JSON standard constructors for JSON: JSON() JSON_ARRAY() JSON_ARRAYAGG() JSON_OBJECT() JSON_OBJECTAGG() For the most part these functions provide facilities that mimic existing json/jsonb functions. However, they also offer some useful additional functionality. In addition to text input, the JSON() function accepts bytea input, which it will decode and constuct a json value from. The other functions provide useful options for handling duplicate keys and null values. This series of patches will be followed by a consolidated documentation patch. Nikita Glukhov Reviewers have included (in no particular order) Andres Freund, Alexander Korotkov, Pavel Stehule, Andrew Alsup, Erik Rijkers, Zihong Yu, Himanshu Upadhyaya, Daniel Gustafsson, Justin Pryzby. Discussion: https://postgr.es/m/cd0bb935-0158-78a7-08b5-904886deac4b@postgrespro.ru |
3 years ago |
![]() |
f79b803dcc |
Common SQL/JSON clauses
This introduces some of the building blocks used by the SQL/JSON constructor and query functions. Specifically, it provides node executor and grammar support for the FORMAT JSON [ENCODING foo] clause, and values decorated with it, and for the RETURNING clause. The following SQL/JSON patches will leverage these. Nikita Glukhov (who probably deserves an award for perseverance). Reviewers have included (in no particular order) Andres Freund, Alexander Korotkov, Pavel Stehule, Andrew Alsup, Erik Rijkers, Zihong Yu, Himanshu Upadhyaya, Daniel Gustafsson, Justin Pryzby. Discussion: https://postgr.es/m/cd0bb935-0158-78a7-08b5-904886deac4b@postgrespro.ru |
3 years ago |
![]() |
1460fc5942 |
Revert "Common SQL/JSON clauses"
This reverts commit
|
3 years ago |
![]() |
865fe4d5df |
Common SQL/JSON clauses
This introduces some of the building blocks used by the SQL/JSON constructor and query functions. Specifically, it provides node executor and grammar support for the FORMAT JSON [ENCODING foo] clause, and values decorated with it, and for the RETURNING clause. The following SQL/JSON patches will leverage these. Nikita Glukhov (who probably deserves an award for perseverance). Reviewers have included (in no particular order) Andres Freund, Alexander Korotkov, Pavel Stehule, Andrew Alsup. Erik Rijkers, Zihong Yu and Himanshu Upadhyaya. Discussion: https://postgr.es/m/cd0bb935-0158-78a7-08b5-904886deac4b@postgrespro.ru |
3 years ago |
![]() |
27b77ecf9f |
Update copyright for 2022
Backpatch-through: 10 |
4 years ago |
![]() |
9a3ddeb519 |
Fix index-only scan plans, take 2.
Commit |
4 years ago |
![]() |
4965f75484 |
Remove unused include
"utils/builtins.h" was used for pg_strtouint64(), added by |
4 years ago |
![]() |
2f4fd1a73b |
Remove unused include
"fmgr.h" was used for load_external_function(), added by |
4 years ago |
![]() |
3c6f8c011f |
Simplify the general-purpose 64-bit integer parsing APIs
pg_strtouint64() is a wrapper around strtoull/strtoul/_strtoui64, but it seems no longer necessary to have this indirection. msvc/Solution.pm claims HAVE_STRTOULL, so the "MSVC only" part seems unnecessary. Also, we have code in c.h to substitute alternatives for strtoull() if not found, and that would appear to cover all currently supported platforms, so having a further fallback in pg_strtouint64() seems unnecessary. Therefore, we could remove pg_strtouint64(), and use strtoull() directly in all call sites. However, it seems useful to keep a separate notation for parsing exactly 64-bit integers, matching the type definition int64/uint64. For that, add new macros strtoi64() and strtou64() in c.h as thin wrappers around strtol()/strtoul() or strtoll()/stroull(). This makes these functions available everywhere instead of just in the server code, and it makes the function naming notably different from the pg_strtointNN() functions in numutils.c, which have a different API. Discussion: https://www.postgresql.org/message-id/flat/a3df47c9-b1b4-29f2-7e91-427baf8b75a3%40enterprisedb.com |
4 years ago |
![]() |
411137a429 |
Flush Memoize cache when non-key parameters change, take 2
It's possible that a subplan below a Memoize node contains a parameter from above the Memoize node. If this parameter changes then cache entries may become out-dated due to the new parameter value. Previously Memoize was mistakenly not aware of this. We fix this here by flushing the cache whenever a parameter that's not part of the cache key changes. Bug: #17213 Reported by: Elvis Pranskevichus Author: David Rowley Discussion: https://postgr.es/m/17213-988ed34b225a2862@postgresql.org Backpatch-through: 14, where Memoize was added |
4 years ago |
![]() |
e502150f7d |
Allow Memoize to operate in binary comparison mode
Memoize would always use the hash equality operator for the cache key types to determine if the current set of parameters were the same as some previously cached set. Certain types such as floating points where -0.0 and +0.0 differ in their binary representation but are classed as equal by the hash equality operator may cause problems as unless the join uses the same operator it's possible that whichever join operator is being used would be able to distinguish the two values. In which case we may accidentally return in the incorrect rows out of the cache. To fix this here we add a binary mode to Memoize to allow it to the current set of parameters to previously cached values by comparing bit-by-bit rather than logically using the hash equality operator. This binary mode is always used for LATERAL joins and it's used for normal joins when any of the join operators are not hashable. Reported-by: Tom Lane Author: David Rowley Discussion: https://postgr.es/m/3004308.1632952496@sss.pgh.pa.us Backpatch-through: 14, where Memoize was added |
4 years ago |
![]() |
e3ec3c00d8 |
Remove arbitrary 64K-or-so limit on rangetable size.
Up to now the size of a query's rangetable has been limited by the constants INNER_VAR et al, which mustn't be equal to any real rangetable index. 65000 doubtless seemed like enough for anybody, and it still is orders of magnitude larger than the number of joins we can realistically handle. However, we need a rangetable entry for each child partition that is (or might be) processed by a query. Queries with a few thousand partitions are getting more realistic, so that the day when that limit becomes a problem is in sight, even if it's not here yet. Hence, let's raise the limit. Rather than just increase the values of INNER_VAR et al, this patch adopts the approach of making them small negative values, so that rangetables could theoretically become as long as INT_MAX. The bulk of the patch is concerned with changing Var.varno and some related variables from "Index" (unsigned int) to plain "int". This is basically cosmetic, with little actual effect other than to help debuggers print their values nicely. As such, I've only bothered with changing places that could actually see INNER_VAR et al, which the parser and most of the planner don't. We do have to be careful in places that are performing less/greater comparisons on varnos, but there are very few such places, other than the IS_SPECIAL_VARNO macro itself. A notable side effect of this patch is that while it used to be possible to add INNER_VAR et al to a Bitmapset, that will now draw an error. I don't see any likelihood that it wouldn't be a bug to include these fake varnos in a bitmapset of real varnos, so I think this is all to the good. Although this touches outfuncs/readfuncs, I don't think a catversion bump is required, since stored rules would never contain Vars with these fake varnos. Andrey Lepikhov and Tom Lane, after a suggestion by Peter Eisentraut Discussion: https://postgr.es/m/43c7f2f5-1e27-27aa-8c65-c91859d15190@postgrespro.ru |
4 years ago |
![]() |
e581360696 |
Make node output prefix match node structure name
In most cases, the prefix string in a node output is the upper case of the node structure name, e.g., MergeAppend -> MERGEAPPEND. There were a few exceptions that for either no apparent reason or perhaps minor aesthetic reasons deviated from this. In order to simplify this and perhaps allow automatic generation without having to deal with exception cases, make them all match. Discussion: https://www.postgresql.org/message-id/c091e5cd-45f8-69ee-6a9b-de86912cc7e7@enterprisedb.com |
4 years ago |
![]() |
2226b4189b |
Change SeqScan node to contain Scan node
This makes the structure of all Scan-derived nodes the same, independent of whether they have additional fields. Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce@enterprisedb.com |
4 years ago |
![]() |
83f4fcc655 |
Change the name of the Result Cache node to Memoize
"Result Cache" was never a great name for this node, but nobody managed to come up with another name that anyone liked enough. That was until David Johnston mentioned "Node Memoization", which Tom Lane revised to just "Memoize". People seem to like "Memoize", so let's do the rename. Reviewed-by: Justin Pryzby Discussion: https://postgr.es/m/20210708165145.GG1176@momjian.us Backpatch-through: 14, where Result Cache was introduced |
4 years ago |
![]() |
29f45e299e |
Use a hash table to speed up NOT IN(values)
Similar to
|
4 years ago |
![]() |
a2dee328bb |
Standardize nodes/*funcs.c cosmetics for ForeignScan.resultRelation.
catversion bump due to readfuncs.c field order change. |
4 years ago |
![]() |
049e1e2edb |
Fix mishandling of resjunk columns in ON CONFLICT ... UPDATE tlists.
It's unusual to have any resjunk columns in an ON CONFLICT ... UPDATE list, but it can happen when MULTIEXPR_SUBLINK SubPlans are present. If it happens, the ON CONFLICT UPDATE code path would end up storing tuples that include the values of the extra resjunk columns. That's fairly harmless in the short run, but if new columns are added to the table then the values would become accessible, possibly leading to malfunctions if they don't match the datatypes of the new columns. This had escaped notice through a confluence of missing sanity checks, including * There's no cross-check that a tuple presented to heap_insert or heap_update matches the table rowtype. While it's difficult to check that fully at reasonable cost, we can easily add assertions that there aren't too many columns. * The output-column-assignment cases in execExprInterp.c lacked any sanity checks on the output column numbers, which seems like an oversight considering there are plenty of assertion checks on input column numbers. Add assertions there too. * We failed to apply nodeModifyTable's ExecCheckPlanOutput() to the ON CONFLICT UPDATE tlist. That wouldn't have caught this specific error, since that function is chartered to ignore resjunk columns; but it sure seems like a bad omission now that we've seen this bug. In HEAD, the right way to fix this is to make the processing of ON CONFLICT UPDATE tlists work the same as regular UPDATE tlists now do, that is don't add "SET x = x" entries, and use ExecBuildUpdateProjection to evaluate the tlist and combine it with old values of the not-set columns. This adds a little complication to ExecBuildUpdateProjection, but allows removal of a comparable amount of now-dead code from the planner. In the back branches, the most expedient solution seems to be to (a) use an output slot for the ON CONFLICT UPDATE projection that actually matches the target table, and then (b) invent a variant of ExecBuildProjectionInfo that can be told to not store values resulting from resjunk columns, so it doesn't try to store into nonexistent columns of the output slot. (We can't simply ignore the resjunk columns altogether; they have to be evaluated for MULTIEXPR_SUBLINK to work.) This works back to v10. In 9.6, projections work much differently and we can't cheaply give them such an option. The 9.6 version of this patch works by inserting a JunkFilter when it's necessary to get rid of resjunk columns. In addition, v11 and up have the reverse problem when trying to perform ON CONFLICT UPDATE on a partitioned table. Through a further oversight, adjust_partition_tlist() discarded resjunk columns when re-ordering the ON CONFLICT UPDATE tlist to match a partition. This accidentally prevented the storing-bogus-tuples problem, but at the cost that MULTIEXPR_SUBLINK cases didn't work, typically crashing if more than one row has to be updated. Fix by preserving resjunk columns in that routine. (I failed to resist the temptation to add more assertions there too, and to do some minor code beautification.) Per report from Andres Freund. Back-patch to all supported branches. Security: CVE-2021-32028 |
4 years ago |
![]() |
50e17ad281 |
Speedup ScalarArrayOpExpr evaluation
ScalarArrayOpExprs with "useOr=true" and a set of Consts on the righthand side have traditionally been evaluated by using a linear search over the array. When these arrays contain large numbers of elements then this linear search could become a significant part of execution time. Here we add a new method of evaluating ScalarArrayOpExpr expressions to allow them to be evaluated by first building a hash table containing each element, then on subsequent evaluations, we just probe that hash table to determine if there is a match. The planner is in charge of determining when this optimization is possible and it enables it by setting hashfuncid in the ScalarArrayOpExpr. The executor will only perform the hash table evaluation when the hashfuncid is set. This means that not all cases are optimized. For example CHECK constraints containing an IN clause won't go through the planner, so won't get the hashfuncid set. We could maybe do something about that at some later date. The reason we're not doing it now is from fear that we may slow down cases where the expression is evaluated only once. Those cases can be common, for example, a single row INSERT to a table with a CHECK constraint containing an IN clause. In the planner, we enable this when there are suitable hash functions for the ScalarArrayOpExpr's operator and only when there is at least MIN_ARRAY_SIZE_FOR_HASHED_SAOP elements in the array. The threshold is currently set to 9. Author: James Coleman, David Rowley Reviewed-by: David Rowley, Tomas Vondra, Heikki Linnakangas Discussion: https://postgr.es/m/CAAaqYe8x62+=wn0zvNKCj55tPpg-JBHzhZFFc6ANovdqFw7-dA@mail.gmail.com |
4 years ago |
![]() |
e717a9a18b |
SQL-standard function body
This adds support for writing CREATE FUNCTION and CREATE PROCEDURE statements for language SQL with a function body that conforms to the SQL standard and is portable to other implementations. Instead of the PostgreSQL-specific AS $$ string literal $$ syntax, this allows writing out the SQL statements making up the body unquoted, either as a single statement: CREATE FUNCTION add(a integer, b integer) RETURNS integer LANGUAGE SQL RETURN a + b; or as a block CREATE PROCEDURE insert_data(a integer, b integer) LANGUAGE SQL BEGIN ATOMIC INSERT INTO tbl VALUES (a); INSERT INTO tbl VALUES (b); END; The function body is parsed at function definition time and stored as expression nodes in a new pg_proc column prosqlbody. So at run time, no further parsing is required. However, this form does not support polymorphic arguments, because there is no more parse analysis done at call time. Dependencies between the function and the objects it uses are fully tracked. A new RETURN statement is introduced. This can only be used inside function bodies. Internally, it is treated much like a SELECT statement. psql needs some new intelligence to keep track of function body boundaries so that it doesn't send off statements when it sees semicolons that are inside a function body. Tested-by: Jaime Casanova <jcasanov@systemguards.com.ec> Reviewed-by: Julien Rouhaud <rjuju123@gmail.com> Discussion: https://www.postgresql.org/message-id/flat/1c11f1eb-f00c-43b7-799d-2d44132c02d7@2ndquadrant.com |
4 years ago |
![]() |
9eacee2e62 |
Add Result Cache executor node (take 2)
Here we add a new executor node type named "Result Cache". The planner can include this node type in the plan to have the executor cache the results from the inner side of parameterized nested loop joins. This allows caching of tuples for sets of parameters so that in the event that the node sees the same parameter values again, it can just return the cached tuples instead of rescanning the inner side of the join all over again. Internally, result cache uses a hash table in order to quickly find tuples that have been previously cached. For certain data sets, this can significantly improve the performance of joins. The best cases for using this new node type are for join problems where a large portion of the tuples from the inner side of the join have no join partner on the outer side of the join. In such cases, hash join would have to hash values that are never looked up, thus bloating the hash table and possibly causing it to multi-batch. Merge joins would have to skip over all of the unmatched rows. If we use a nested loop join with a result cache, then we only cache tuples that have at least one join partner on the outer side of the join. The benefits of using a parameterized nested loop with a result cache increase when there are fewer distinct values being looked up and the number of lookups of each value is large. Also, hash probes to lookup the cache can be much faster than the hash probe in a hash join as it's common that the result cache's hash table is much smaller than the hash join's due to result cache only caching useful tuples rather than all tuples from the inner side of the join. This variation in hash probe performance is more significant when the hash join's hash table no longer fits into the CPU's L3 cache, but the result cache's hash table does. The apparent "random" access of hash buckets with each hash probe can cause a poor L3 cache hit ratio for large hash tables. Smaller hash tables generally perform better. The hash table used for the cache limits itself to not exceeding work_mem * hash_mem_multiplier in size. We maintain a dlist of keys for this cache and when we're adding new tuples and realize we've exceeded the memory budget, we evict cache entries starting with the least recently used ones until we have enough memory to add the new tuples to the cache. For parameterized nested loop joins, we now consider using one of these result cache nodes in between the nested loop node and its inner node. We determine when this might be useful based on cost, which is primarily driven off of what the expected cache hit ratio will be. Estimating the cache hit ratio relies on having good distinct estimates on the nested loop's parameters. For now, the planner will only consider using a result cache for parameterized nested loop joins. This works for both normal joins and also for LATERAL type joins to subqueries. It is possible to use this new node for other uses in the future. For example, to cache results from correlated subqueries. However, that's not done here due to some difficulties obtaining a distinct estimation on the outer plan to calculate the estimated cache hit ratio. Currently we plan the inner plan before planning the outer plan so there is no good way to know if a result cache would be useful or not since we can't estimate the number of times the subplan will be called until the outer plan is generated. The functionality being added here is newly introducing a dependency on the return value of estimate_num_groups() during the join search. Previously, during the join search, we only ever needed to perform selectivity estimations. With this commit, we need to use estimate_num_groups() in order to estimate what the hit ratio on the result cache will be. In simple terms, if we expect 10 distinct values and we expect 1000 outer rows, then we'll estimate the hit ratio to be 99%. Since cache hits are very cheap compared to scanning the underlying nodes on the inner side of the nested loop join, then this will significantly reduce the planner's cost for the join. However, it's fairly easy to see here that things will go bad when estimate_num_groups() incorrectly returns a value that's significantly lower than the actual number of distinct values. If this happens then that may cause us to make use of a nested loop join with a result cache instead of some other join type, such as a merge or hash join. Our distinct estimations have been known to be a source of trouble in the past, so the extra reliance on them here could cause the planner to choose slower plans than it did previous to having this feature. Distinct estimations are also fairly hard to estimate accurately when several tables have been joined already or when a WHERE clause filters out a set of values that are correlated to the expressions we're estimating the number of distinct value for. For now, the costing we perform during query planning for result caches does put quite a bit of faith in the distinct estimations being accurate. When these are accurate then we should generally see faster execution times for plans containing a result cache. However, in the real world, we may find that we need to either change the costings to put less trust in the distinct estimations being accurate or perhaps even disable this feature by default. There's always an element of risk when we teach the query planner to do new tricks that it decides to use that new trick at the wrong time and causes a regression. Users may opt to get the old behavior by turning the feature off using the enable_resultcache GUC. Currently, this is enabled by default. It remains to be seen if we'll maintain that setting for the release. Additionally, the name "Result Cache" is the best name I could think of for this new node at the time I started writing the patch. Nobody seems to strongly dislike the name. A few people did suggest other names but no other name seemed to dominate in the brief discussion that there was about names. Let's allow the beta period to see if the current name pleases enough people. If there's some consensus on a better name, then we can change it before the release. Please see the 2nd discussion link below for the discussion on the "Result Cache" name. Author: David Rowley Reviewed-by: Andy Fan, Justin Pryzby, Zhihong Yu, Hou Zhijie Tested-By: Konstantin Knizhnik Discussion: https://postgr.es/m/CAApHDvrPcQyQdWERGYWx8J%2B2DLUNgXu%2BfOSbQ1UscxrunyXyrQ%40mail.gmail.com Discussion: https://postgr.es/m/CAApHDvq=yQXr5kqhRviT2RhNKwToaWr9JAN5t+5_PzhuRJ3wvg@mail.gmail.com |
4 years ago |
![]() |
28b3e3905c |
Revert b6002a796
This removes "Add Result Cache executor node". It seems that something weird is going on with the tracking of cache hits and misses as highlighted by many buildfarm animals. It's not yet clear what the problem is as other parts of the plan indicate that the cache did work correctly, it's just the hits and misses that were being reported as 0. This is especially a bad time to have the buildfarm so broken, so reverting before too many more animals go red. Discussion: https://postgr.es/m/CAApHDvq_hydhfovm4=izgWs+C5HqEeRScjMbOgbpC-jRAeK3Yw@mail.gmail.com |
4 years ago |
![]() |
b6002a796d |
Add Result Cache executor node
Here we add a new executor node type named "Result Cache". The planner can include this node type in the plan to have the executor cache the results from the inner side of parameterized nested loop joins. This allows caching of tuples for sets of parameters so that in the event that the node sees the same parameter values again, it can just return the cached tuples instead of rescanning the inner side of the join all over again. Internally, result cache uses a hash table in order to quickly find tuples that have been previously cached. For certain data sets, this can significantly improve the performance of joins. The best cases for using this new node type are for join problems where a large portion of the tuples from the inner side of the join have no join partner on the outer side of the join. In such cases, hash join would have to hash values that are never looked up, thus bloating the hash table and possibly causing it to multi-batch. Merge joins would have to skip over all of the unmatched rows. If we use a nested loop join with a result cache, then we only cache tuples that have at least one join partner on the outer side of the join. The benefits of using a parameterized nested loop with a result cache increase when there are fewer distinct values being looked up and the number of lookups of each value is large. Also, hash probes to lookup the cache can be much faster than the hash probe in a hash join as it's common that the result cache's hash table is much smaller than the hash join's due to result cache only caching useful tuples rather than all tuples from the inner side of the join. This variation in hash probe performance is more significant when the hash join's hash table no longer fits into the CPU's L3 cache, but the result cache's hash table does. The apparent "random" access of hash buckets with each hash probe can cause a poor L3 cache hit ratio for large hash tables. Smaller hash tables generally perform better. The hash table used for the cache limits itself to not exceeding work_mem * hash_mem_multiplier in size. We maintain a dlist of keys for this cache and when we're adding new tuples and realize we've exceeded the memory budget, we evict cache entries starting with the least recently used ones until we have enough memory to add the new tuples to the cache. For parameterized nested loop joins, we now consider using one of these result cache nodes in between the nested loop node and its inner node. We determine when this might be useful based on cost, which is primarily driven off of what the expected cache hit ratio will be. Estimating the cache hit ratio relies on having good distinct estimates on the nested loop's parameters. For now, the planner will only consider using a result cache for parameterized nested loop joins. This works for both normal joins and also for LATERAL type joins to subqueries. It is possible to use this new node for other uses in the future. For example, to cache results from correlated subqueries. However, that's not done here due to some difficulties obtaining a distinct estimation on the outer plan to calculate the estimated cache hit ratio. Currently we plan the inner plan before planning the outer plan so there is no good way to know if a result cache would be useful or not since we can't estimate the number of times the subplan will be called until the outer plan is generated. The functionality being added here is newly introducing a dependency on the return value of estimate_num_groups() during the join search. Previously, during the join search, we only ever needed to perform selectivity estimations. With this commit, we need to use estimate_num_groups() in order to estimate what the hit ratio on the result cache will be. In simple terms, if we expect 10 distinct values and we expect 1000 outer rows, then we'll estimate the hit ratio to be 99%. Since cache hits are very cheap compared to scanning the underlying nodes on the inner side of the nested loop join, then this will significantly reduce the planner's cost for the join. However, it's fairly easy to see here that things will go bad when estimate_num_groups() incorrectly returns a value that's significantly lower than the actual number of distinct values. If this happens then that may cause us to make use of a nested loop join with a result cache instead of some other join type, such as a merge or hash join. Our distinct estimations have been known to be a source of trouble in the past, so the extra reliance on them here could cause the planner to choose slower plans than it did previous to having this feature. Distinct estimations are also fairly hard to estimate accurately when several tables have been joined already or when a WHERE clause filters out a set of values that are correlated to the expressions we're estimating the number of distinct value for. For now, the costing we perform during query planning for result caches does put quite a bit of faith in the distinct estimations being accurate. When these are accurate then we should generally see faster execution times for plans containing a result cache. However, in the real world, we may find that we need to either change the costings to put less trust in the distinct estimations being accurate or perhaps even disable this feature by default. There's always an element of risk when we teach the query planner to do new tricks that it decides to use that new trick at the wrong time and causes a regression. Users may opt to get the old behavior by turning the feature off using the enable_resultcache GUC. Currently, this is enabled by default. It remains to be seen if we'll maintain that setting for the release. Additionally, the name "Result Cache" is the best name I could think of for this new node at the time I started writing the patch. Nobody seems to strongly dislike the name. A few people did suggest other names but no other name seemed to dominate in the brief discussion that there was about names. Let's allow the beta period to see if the current name pleases enough people. If there's some consensus on a better name, then we can change it before the release. Please see the 2nd discussion link below for the discussion on the "Result Cache" name. Author: David Rowley Reviewed-by: Andy Fan, Justin Pryzby, Zhihong Yu Tested-By: Konstantin Knizhnik Discussion: https://postgr.es/m/CAApHDvrPcQyQdWERGYWx8J%2B2DLUNgXu%2BfOSbQ1UscxrunyXyrQ%40mail.gmail.com Discussion: https://postgr.es/m/CAApHDvq=yQXr5kqhRviT2RhNKwToaWr9JAN5t+5_PzhuRJ3wvg@mail.gmail.com |
4 years ago |
![]() |
86dc90056d |
Rework planning and execution of UPDATE and DELETE.
This patch makes two closely related sets of changes: 1. For UPDATE, the subplan of the ModifyTable node now only delivers the new values of the changed columns (i.e., the expressions computed in the query's SET clause) plus row identity information such as CTID. ModifyTable must re-fetch the original tuple to merge in the old values of any unchanged columns. The core advantage of this is that the changed columns are uniform across all tables of an inherited or partitioned target relation, whereas the other columns might not be. A secondary advantage, when the UPDATE involves joins, is that less data needs to pass through the plan tree. The disadvantage of course is an extra fetch of each tuple to be updated. However, that seems to be very nearly free in context; even worst-case tests don't show it to add more than a couple percent to the total query cost. At some point it might be interesting to combine the re-fetch with the tuple access that ModifyTable must do anyway to mark the old tuple dead; but that would require a good deal of refactoring and it seems it wouldn't buy all that much, so this patch doesn't attempt it. 2. For inherited UPDATE/DELETE, instead of generating a separate subplan for each target relation, we now generate a single subplan that is just exactly like a SELECT's plan, then stick ModifyTable on top of that. To let ModifyTable know which target relation a given incoming row refers to, a tableoid junk column is added to the row identity information. This gets rid of the horrid hack that was inheritance_planner(), eliminating O(N^2) planning cost and memory consumption in cases where there were many unprunable target relations. Point 2 of course requires point 1, so that there is a uniform definition of the non-junk columns to be returned by the subplan. We can't insist on uniform definition of the row identity junk columns however, if we want to keep the ability to have both plain and foreign tables in a partitioning hierarchy. Since it wouldn't scale very far to have every child table have its own row identity column, this patch includes provisions to merge similar row identity columns into one column of the subplan result. In particular, we can merge the whole-row Vars typically used as row identity by FDWs into one column by pretending they are type RECORD. (It's still okay for the actual composite Datums to be labeled with the table's rowtype OID, though.) There is more that can be done to file down residual inefficiencies in this patch, but it seems to be committable now. FDW authors should note several API changes: * The argument list for AddForeignUpdateTargets() has changed, and so has the method it must use for adding junk columns to the query. Call add_row_identity_var() instead of manipulating the parse tree directly. You might want to reconsider exactly what you're adding, too. * PlanDirectModify() must now work a little harder to find the ForeignScan plan node; if the foreign table is part of a partitioning hierarchy then the ForeignScan might not be the direct child of ModifyTable. See postgres_fdw for sample code. * To check whether a relation is a target relation, it's no longer sufficient to compare its relid to root->parse->resultRelation. Instead, check it against all_result_relids or leaf_result_relids, as appropriate. Amit Langote and Tom Lane Discussion: https://postgr.es/m/CA+HiwqHpHdqdDn48yCEhynnniahH78rwcrv1rEX65-fsZGBOLQ@mail.gmail.com |
4 years ago |
![]() |
055fee7eb4 |
Allow an alias to be attached to a JOIN ... USING
This allows something like SELECT ... FROM t1 JOIN t2 USING (a, b, c) AS x where x has the columns a, b, c and unlike a regular alias it does not hide the range variables of the tables being joined t1 and t2. Per SQL:2016 feature F404 "Range variable for common column names". Reviewed-by: Vik Fearing <vik.fearing@2ndquadrant.com> Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us> Discussion: https://www.postgresql.org/message-id/flat/454638cf-d563-ab76-a585-2564428062af@2ndquadrant.com |
4 years ago |
![]() |
27e1f14563 |
Add support for asynchronous execution.
This implements asynchronous execution, which runs multiple parts of a non-parallel-aware Append concurrently rather than serially to improve performance when possible. Currently, the only node type that can be run concurrently is a ForeignScan that is an immediate child of such an Append. In the case where such ForeignScans access data on different remote servers, this would run those ForeignScans concurrently, and overlap the remote operations to be performed simultaneously, so it'll improve the performance especially when the operations involve time-consuming ones such as remote join and remote aggregation. We may extend this to other node types such as joins or aggregates over ForeignScans in the future. This also adds the support for postgres_fdw, which is enabled by the table-level/server-level option "async_capable". The default is false. Robert Haas, Kyotaro Horiguchi, Thomas Munro, and myself. This commit is mostly based on the patch proposed by Robert Haas, but also uses stuff from the patch proposed by Kyotaro Horiguchi and from the patch proposed by Thomas Munro. Reviewed by Kyotaro Horiguchi, Konstantin Knizhnik, Andrey Lepikhov, Movead Li, Thomas Munro, Justin Pryzby, and others. Discussion: https://postgr.es/m/CA%2BTgmoaXQEt4tZ03FtQhnzeDEMzBck%2BLrni0UWHVVgOTnA6C1w%40mail.gmail.com Discussion: https://postgr.es/m/CA%2BhUKGLBRyu0rHrDCMC4%3DRn3252gogyp1SjOgG8SEKKZv%3DFwfQ%40mail.gmail.com Discussion: https://postgr.es/m/20200228.170650.667613673625155850.horikyota.ntt%40gmail.com |
4 years ago |
![]() |
26acb54a13 |
Revert "Enable parallel SELECT for "INSERT INTO ... SELECT ..."."
To allow inserts in parallel-mode this feature has to ensure that all the constraints, triggers, etc. are parallel-safe for the partition hierarchy which is costly and we need to find a better way to do that. Additionally, we could have used existing cached information in some cases like indexes, domains, etc. to determine the parallel-safety. List of commits reverted, in reverse chronological order: |
4 years ago |
![]() |
be45be9c33 |
Implement GROUP BY DISTINCT
With grouping sets, it's possible that some of the grouping sets are duplicate. This is especially common with CUBE and ROLLUP clauses. For example GROUP BY CUBE (a,b), CUBE (b,c) is equivalent to GROUP BY GROUPING SETS ( (a, b, c), (a, b, c), (a, b, c), (a, b), (a, b), (a, b), (a), (a), (a), (c, a), (c, a), (c, a), (c), (b, c), (b), () ) Some of the grouping sets are calculated multiple times, which is mostly unnecessary. This commit implements a new GROUP BY DISTINCT feature, as defined in the SQL standard, which eliminates the duplicate sets. Author: Vik Fearing Reviewed-by: Erik Rijkers, Georgios Kokolatos, Tomas Vondra Discussion: https://postgr.es/m/bf3805a8-d7d1-ae61-fece-761b7ff41ecc@postgresfriends.org |
4 years ago |
![]() |
05c8482f7f |
Enable parallel SELECT for "INSERT INTO ... SELECT ...".
Parallel SELECT can't be utilized for INSERT in the following cases: - INSERT statement uses the ON CONFLICT DO UPDATE clause - Target table has a parallel-unsafe: trigger, index expression or predicate, column default expression or check constraint - Target table has a parallel-unsafe domain constraint on any column - Target table is a partitioned table with a parallel-unsafe partition key expression or support function The planner is updated to perform additional parallel-safety checks for the cases listed above, for determining whether it is safe to run INSERT in parallel-mode with an underlying parallel SELECT. The planner will consider using parallel SELECT for "INSERT INTO ... SELECT ...", provided nothing unsafe is found from the additional parallel-safety checks, or from the existing parallel-safety checks for SELECT. While checking parallel-safety, we need to check it for all the partitions on the table which can be costly especially when we decide not to use a parallel plan. So, in a separate patch, we will introduce a GUC and or a reloption to enable/disable parallelism for Insert statements. Prior to entering parallel-mode for the execution of INSERT with parallel SELECT, a TransactionId is acquired and assigned to the current transaction state. This is necessary to prevent the INSERT from attempting to assign the TransactionId whilst in parallel-mode, which is not allowed. This approach has a disadvantage in that if the underlying SELECT does not return any rows, then the TransactionId is not used, however that shouldn't happen in practice in many cases. Author: Greg Nancarrow, Amit Langote, Amit Kapila Reviewed-by: Amit Langote, Hou Zhijie, Takayuki Tsunakawa, Antonin Houska, Bharath Rupireddy, Dilip Kumar, Vignesh C, Zhihong Yu, Amit Kapila Tested-by: Tang, Haiying Discussion: https://postgr.es/m/CAJcOf-cXnB5cnMKqWEp2E2z7Mvcd04iLVmV=qpFJrR3AcrTS3g@mail.gmail.com Discussion: https://postgr.es/m/CAJcOf-fAdj=nDKMsRhQzndm-O13NY4dL6xGcEvdX5Xvbbi0V7g@mail.gmail.com |
4 years ago |
![]() |
977b2c0853 |
Add missing TidRangeScan readfunc
Mistakenly forgotten in
|
4 years ago |
![]() |
3696a600e2 |
SEARCH and CYCLE clauses
This adds the SQL standard feature that adds the SEARCH and CYCLE clauses to recursive queries to be able to do produce breadth- or depth-first search orders and detect cycles. These clauses can be rewritten into queries using existing syntax, and that is what this patch does in the rewriter. Reviewed-by: Vik Fearing <vik@postgresfriends.org> Reviewed-by: Pavel Stehule <pavel.stehule@gmail.com> Discussion: https://www.postgresql.org/message-id/flat/db80ceee-6f97-9b4a-8ee8-3ba0c58e5be2@2ndquadrant.com |
4 years ago |
![]() |
ca3b37487b |
Update copyright for 2021
Backpatch-through: 9.5 |
5 years ago |
![]() |
c7aba7c14e |
Support subscripting of arbitrary types, not only arrays.
This patch generalizes the subscripting infrastructure so that any data type can be subscripted, if it provides a handler function to define what that means. Traditional variable-length (varlena) arrays all use array_subscript_handler(), while the existing fixed-length types that support subscripting use raw_array_subscript_handler(). It's expected that other types that want to use subscripting notation will define their own handlers. (This patch provides no such new features, though; it only lays the foundation for them.) To do this, move the parser's semantic processing of subscripts (including coercion to whatever data type is required) into a method callback supplied by the handler. On the execution side, replace the ExecEvalSubscriptingRef* layer of functions with direct calls to callback-supplied execution routines. (Thus, essentially no new run-time overhead should be caused by this patch. Indeed, there is room to remove some overhead by supplying specialized execution routines. This patch does a little bit in that line, but more could be done.) Additional work is required here and there to remove formerly hard-wired assumptions about the result type, collation, etc of a SubscriptingRef expression node; and to remove assumptions that the subscript values must be integers. One useful side-effect of this is that we now have a less squishy mechanism for identifying whether a data type is a "true" array: instead of wiring in weird rules about typlen, we can look to see if pg_type.typsubscript == F_ARRAY_SUBSCRIPT_HANDLER. For this to be bulletproof, we have to forbid user-defined types from using that handler directly; but there seems no good reason for them to do so. This patch also removes assumptions that the number of subscripts is limited to MAXDIM (6), or indeed has any hard-wired limit. That limit still applies to types handled by array_subscript_handler or raw_array_subscript_handler, but to discourage other dependencies on this constant, I've moved it from c.h to utils/array.h. Dmitry Dolgov, reviewed at various times by Tom Lane, Arthur Zakirov, Peter Eisentraut, Pavel Stehule Discussion: https://postgr.es/m/CA+q6zcVDuGBv=M0FqBYX8DPebS3F_0KQ6OVFobGJPM507_SZ_w@mail.gmail.com Discussion: https://postgr.es/m/CA+q6zcVovR+XY4mfk-7oNk-rF91gH0PebnNfuUjuuDsyHjOcVA@mail.gmail.com |
5 years ago |