mirror of https://github.com/postgres/postgres
Tag:
Branch:
Tree:
17b6f39f19
REL2_0B
REL6_4
REL6_5_PATCHES
REL7_0_PATCHES
REL7_1_STABLE
REL7_2_STABLE
REL7_3_STABLE
REL7_4_STABLE
REL8_0_STABLE
REL8_1_STABLE
REL8_2_STABLE
REL8_3_STABLE
REL8_4_STABLE
REL8_5_ALPHA1_BRANCH
REL8_5_ALPHA2_BRANCH
REL8_5_ALPHA3_BRANCH
REL9_0_ALPHA4_BRANCH
REL9_0_ALPHA5_BRANCH
REL9_0_STABLE
REL9_1_STABLE
REL9_2_STABLE
REL9_3_STABLE
REL9_4_STABLE
REL9_5_STABLE
REL9_6_STABLE
REL_10_STABLE
REL_11_STABLE
REL_12_STABLE
REL_13_STABLE
REL_14_STABLE
REL_15_STABLE
REL_16_STABLE
REL_17_STABLE
REL_18_STABLE
Release_1_0_3
WIN32_DEV
ecpg_big_bison
master
PG95-1_01
PG95-1_08
PG95-1_09
REL2_0
REL6_1
REL6_1_1
REL6_2
REL6_2_1
REL6_3
REL6_3_2
REL6_4_2
REL6_5
REL6_5_1
REL6_5_2
REL6_5_3
REL7_0
REL7_0_2
REL7_0_3
REL7_1
REL7_1_1
REL7_1_2
REL7_1_3
REL7_1_BETA
REL7_1_BETA2
REL7_1_BETA3
REL7_2
REL7_2_1
REL7_2_2
REL7_2_3
REL7_2_4
REL7_2_5
REL7_2_6
REL7_2_7
REL7_2_8
REL7_2_BETA1
REL7_2_BETA2
REL7_2_BETA3
REL7_2_BETA4
REL7_2_BETA5
REL7_2_RC1
REL7_2_RC2
REL7_3
REL7_3_1
REL7_3_10
REL7_3_11
REL7_3_12
REL7_3_13
REL7_3_14
REL7_3_15
REL7_3_16
REL7_3_17
REL7_3_18
REL7_3_19
REL7_3_2
REL7_3_20
REL7_3_21
REL7_3_3
REL7_3_4
REL7_3_5
REL7_3_6
REL7_3_7
REL7_3_8
REL7_3_9
REL7_4
REL7_4_1
REL7_4_10
REL7_4_11
REL7_4_12
REL7_4_13
REL7_4_14
REL7_4_15
REL7_4_16
REL7_4_17
REL7_4_18
REL7_4_19
REL7_4_2
REL7_4_20
REL7_4_21
REL7_4_22
REL7_4_23
REL7_4_24
REL7_4_25
REL7_4_26
REL7_4_27
REL7_4_28
REL7_4_29
REL7_4_3
REL7_4_30
REL7_4_4
REL7_4_5
REL7_4_6
REL7_4_7
REL7_4_8
REL7_4_9
REL7_4_BETA1
REL7_4_BETA2
REL7_4_BETA3
REL7_4_BETA4
REL7_4_BETA5
REL7_4_RC1
REL7_4_RC2
REL8_0_0
REL8_0_0BETA1
REL8_0_0BETA2
REL8_0_0BETA3
REL8_0_0BETA4
REL8_0_0BETA5
REL8_0_0RC1
REL8_0_0RC2
REL8_0_0RC3
REL8_0_0RC4
REL8_0_0RC5
REL8_0_1
REL8_0_10
REL8_0_11
REL8_0_12
REL8_0_13
REL8_0_14
REL8_0_15
REL8_0_16
REL8_0_17
REL8_0_18
REL8_0_19
REL8_0_2
REL8_0_20
REL8_0_21
REL8_0_22
REL8_0_23
REL8_0_24
REL8_0_25
REL8_0_26
REL8_0_3
REL8_0_4
REL8_0_5
REL8_0_6
REL8_0_7
REL8_0_8
REL8_0_9
REL8_1_0
REL8_1_0BETA1
REL8_1_0BETA2
REL8_1_0BETA3
REL8_1_0BETA4
REL8_1_0RC1
REL8_1_1
REL8_1_10
REL8_1_11
REL8_1_12
REL8_1_13
REL8_1_14
REL8_1_15
REL8_1_16
REL8_1_17
REL8_1_18
REL8_1_19
REL8_1_2
REL8_1_20
REL8_1_21
REL8_1_22
REL8_1_23
REL8_1_3
REL8_1_4
REL8_1_5
REL8_1_6
REL8_1_7
REL8_1_8
REL8_1_9
REL8_2_0
REL8_2_1
REL8_2_10
REL8_2_11
REL8_2_12
REL8_2_13
REL8_2_14
REL8_2_15
REL8_2_16
REL8_2_17
REL8_2_18
REL8_2_19
REL8_2_2
REL8_2_20
REL8_2_21
REL8_2_22
REL8_2_23
REL8_2_3
REL8_2_4
REL8_2_5
REL8_2_6
REL8_2_7
REL8_2_8
REL8_2_9
REL8_2_BETA1
REL8_2_BETA2
REL8_2_BETA3
REL8_2_RC1
REL8_3_0
REL8_3_1
REL8_3_10
REL8_3_11
REL8_3_12
REL8_3_13
REL8_3_14
REL8_3_15
REL8_3_16
REL8_3_17
REL8_3_18
REL8_3_19
REL8_3_2
REL8_3_20
REL8_3_21
REL8_3_22
REL8_3_23
REL8_3_3
REL8_3_4
REL8_3_5
REL8_3_6
REL8_3_7
REL8_3_8
REL8_3_9
REL8_3_BETA1
REL8_3_BETA2
REL8_3_BETA3
REL8_3_BETA4
REL8_3_RC1
REL8_3_RC2
REL8_4_0
REL8_4_1
REL8_4_10
REL8_4_11
REL8_4_12
REL8_4_13
REL8_4_14
REL8_4_15
REL8_4_16
REL8_4_17
REL8_4_18
REL8_4_19
REL8_4_2
REL8_4_20
REL8_4_21
REL8_4_22
REL8_4_3
REL8_4_4
REL8_4_5
REL8_4_6
REL8_4_7
REL8_4_8
REL8_4_9
REL8_4_BETA1
REL8_4_BETA2
REL8_4_RC1
REL8_4_RC2
REL8_5_ALPHA1
REL8_5_ALPHA2
REL8_5_ALPHA3
REL9_0_0
REL9_0_1
REL9_0_10
REL9_0_11
REL9_0_12
REL9_0_13
REL9_0_14
REL9_0_15
REL9_0_16
REL9_0_17
REL9_0_18
REL9_0_19
REL9_0_2
REL9_0_20
REL9_0_21
REL9_0_22
REL9_0_23
REL9_0_3
REL9_0_4
REL9_0_5
REL9_0_6
REL9_0_7
REL9_0_8
REL9_0_9
REL9_0_ALPHA4
REL9_0_ALPHA5
REL9_0_BETA1
REL9_0_BETA2
REL9_0_BETA3
REL9_0_BETA4
REL9_0_RC1
REL9_1_0
REL9_1_1
REL9_1_10
REL9_1_11
REL9_1_12
REL9_1_13
REL9_1_14
REL9_1_15
REL9_1_16
REL9_1_17
REL9_1_18
REL9_1_19
REL9_1_2
REL9_1_20
REL9_1_21
REL9_1_22
REL9_1_23
REL9_1_24
REL9_1_3
REL9_1_4
REL9_1_5
REL9_1_6
REL9_1_7
REL9_1_8
REL9_1_9
REL9_1_ALPHA1
REL9_1_ALPHA2
REL9_1_ALPHA3
REL9_1_ALPHA4
REL9_1_ALPHA5
REL9_1_BETA1
REL9_1_BETA2
REL9_1_BETA3
REL9_1_RC1
REL9_2_0
REL9_2_1
REL9_2_10
REL9_2_11
REL9_2_12
REL9_2_13
REL9_2_14
REL9_2_15
REL9_2_16
REL9_2_17
REL9_2_18
REL9_2_19
REL9_2_2
REL9_2_20
REL9_2_21
REL9_2_22
REL9_2_23
REL9_2_24
REL9_2_3
REL9_2_4
REL9_2_5
REL9_2_6
REL9_2_7
REL9_2_8
REL9_2_9
REL9_2_BETA1
REL9_2_BETA2
REL9_2_BETA3
REL9_2_BETA4
REL9_2_RC1
REL9_3_0
REL9_3_1
REL9_3_10
REL9_3_11
REL9_3_12
REL9_3_13
REL9_3_14
REL9_3_15
REL9_3_16
REL9_3_17
REL9_3_18
REL9_3_19
REL9_3_2
REL9_3_20
REL9_3_21
REL9_3_22
REL9_3_23
REL9_3_24
REL9_3_25
REL9_3_3
REL9_3_4
REL9_3_5
REL9_3_6
REL9_3_7
REL9_3_8
REL9_3_9
REL9_3_BETA1
REL9_3_BETA2
REL9_3_RC1
REL9_4_0
REL9_4_1
REL9_4_10
REL9_4_11
REL9_4_12
REL9_4_13
REL9_4_14
REL9_4_15
REL9_4_16
REL9_4_17
REL9_4_18
REL9_4_19
REL9_4_2
REL9_4_20
REL9_4_21
REL9_4_22
REL9_4_23
REL9_4_24
REL9_4_25
REL9_4_26
REL9_4_3
REL9_4_4
REL9_4_5
REL9_4_6
REL9_4_7
REL9_4_8
REL9_4_9
REL9_4_BETA1
REL9_4_BETA2
REL9_4_BETA3
REL9_4_RC1
REL9_5_0
REL9_5_1
REL9_5_10
REL9_5_11
REL9_5_12
REL9_5_13
REL9_5_14
REL9_5_15
REL9_5_16
REL9_5_17
REL9_5_18
REL9_5_19
REL9_5_2
REL9_5_20
REL9_5_21
REL9_5_22
REL9_5_23
REL9_5_24
REL9_5_25
REL9_5_3
REL9_5_4
REL9_5_5
REL9_5_6
REL9_5_7
REL9_5_8
REL9_5_9
REL9_5_ALPHA1
REL9_5_ALPHA2
REL9_5_BETA1
REL9_5_BETA2
REL9_5_RC1
REL9_6_0
REL9_6_1
REL9_6_10
REL9_6_11
REL9_6_12
REL9_6_13
REL9_6_14
REL9_6_15
REL9_6_16
REL9_6_17
REL9_6_18
REL9_6_19
REL9_6_2
REL9_6_20
REL9_6_21
REL9_6_22
REL9_6_23
REL9_6_24
REL9_6_3
REL9_6_4
REL9_6_5
REL9_6_6
REL9_6_7
REL9_6_8
REL9_6_9
REL9_6_BETA1
REL9_6_BETA2
REL9_6_BETA3
REL9_6_BETA4
REL9_6_RC1
REL_10_0
REL_10_1
REL_10_10
REL_10_11
REL_10_12
REL_10_13
REL_10_14
REL_10_15
REL_10_16
REL_10_17
REL_10_18
REL_10_19
REL_10_2
REL_10_20
REL_10_21
REL_10_22
REL_10_23
REL_10_3
REL_10_4
REL_10_5
REL_10_6
REL_10_7
REL_10_8
REL_10_9
REL_10_BETA1
REL_10_BETA2
REL_10_BETA3
REL_10_BETA4
REL_10_RC1
REL_11_0
REL_11_1
REL_11_10
REL_11_11
REL_11_12
REL_11_13
REL_11_14
REL_11_15
REL_11_16
REL_11_17
REL_11_18
REL_11_19
REL_11_2
REL_11_20
REL_11_21
REL_11_22
REL_11_3
REL_11_4
REL_11_5
REL_11_6
REL_11_7
REL_11_8
REL_11_9
REL_11_BETA1
REL_11_BETA2
REL_11_BETA3
REL_11_BETA4
REL_11_RC1
REL_12_0
REL_12_1
REL_12_10
REL_12_11
REL_12_12
REL_12_13
REL_12_14
REL_12_15
REL_12_16
REL_12_17
REL_12_18
REL_12_19
REL_12_2
REL_12_20
REL_12_21
REL_12_22
REL_12_3
REL_12_4
REL_12_5
REL_12_6
REL_12_7
REL_12_8
REL_12_9
REL_12_BETA1
REL_12_BETA2
REL_12_BETA3
REL_12_BETA4
REL_12_RC1
REL_13_0
REL_13_1
REL_13_10
REL_13_11
REL_13_12
REL_13_13
REL_13_14
REL_13_15
REL_13_16
REL_13_17
REL_13_18
REL_13_19
REL_13_2
REL_13_20
REL_13_21
REL_13_22
REL_13_3
REL_13_4
REL_13_5
REL_13_6
REL_13_7
REL_13_8
REL_13_9
REL_13_BETA1
REL_13_BETA2
REL_13_BETA3
REL_13_RC1
REL_14_0
REL_14_1
REL_14_10
REL_14_11
REL_14_12
REL_14_13
REL_14_14
REL_14_15
REL_14_16
REL_14_17
REL_14_18
REL_14_19
REL_14_2
REL_14_3
REL_14_4
REL_14_5
REL_14_6
REL_14_7
REL_14_8
REL_14_9
REL_14_BETA1
REL_14_BETA2
REL_14_BETA3
REL_14_RC1
REL_15_0
REL_15_1
REL_15_10
REL_15_11
REL_15_12
REL_15_13
REL_15_14
REL_15_2
REL_15_3
REL_15_4
REL_15_5
REL_15_6
REL_15_7
REL_15_8
REL_15_9
REL_15_BETA1
REL_15_BETA2
REL_15_BETA3
REL_15_BETA4
REL_15_RC1
REL_15_RC2
REL_16_0
REL_16_1
REL_16_10
REL_16_2
REL_16_3
REL_16_4
REL_16_5
REL_16_6
REL_16_7
REL_16_8
REL_16_9
REL_16_BETA1
REL_16_BETA2
REL_16_BETA3
REL_16_RC1
REL_17_0
REL_17_1
REL_17_2
REL_17_3
REL_17_4
REL_17_5
REL_17_6
REL_17_BETA1
REL_17_BETA2
REL_17_BETA3
REL_17_RC1
REL_18_BETA1
REL_18_BETA2
REL_18_BETA3
REL_18_RC1
Release_1_0_2
Release_2_0
Release_2_0_0
release-6-3
${ noResults }
209 Commits (17b6f39f19b1a54baac36f38b49067b0ce9fd7b0)
Author | SHA1 | Message | Date |
---|---|---|---|
![]() |
17b6f39f19 |
Fix indentation in ExecParallelHashIncreaseNumBatches()
Backpatch-through: 12 |
2 years ago |
![]() |
60de25c6ef |
Fix oversized memory allocation in Parallel Hash Join
During the calculations of the maximum for the number of buckets, take into account that later we round that to the next power of 2. Reported-by: Karen Talarico Bug: #16925 Discussion: https://postgr.es/m/16925-ec96d83529d0d629%40postgresql.org Author: Thomas Munro, Andrei Lepikhov, Alexander Korotkov Reviewed-by: Alena Rybakina Backpatch-through: 12 |
2 years ago |
![]() |
6e94d62e3f |
Fix race in parallel hash join batch cleanup, take II.
With unlucky timing and parallel_leader_participation=off (not the
default), PHJ could attempt to access per-batch shared state just as it
was being freed. There was code intended to prevent that by checking
for a cleared pointer, but it was racy. Fix, by introducing an extra
barrier phase. The new phase PHJ_BUILD_RUNNING means that it's safe to
access the per-batch state to find a batch to help with, and
PHJ_BUILD_DONE means that it is too late. The last to detach will free
the array of per-batch state as before, but now it will also atomically
advance the phase, so that late attachers can avoid the hazard. This
mirrors the way per-batch hash tables are freed (see phases
PHJ_BATCH_PROBING and PHJ_BATCH_DONE).
An earlier attempt to fix this (commit
|
3 years ago |
![]() |
4878ea717c |
Avoid misbehavior when hash_table_bytes < bucket_size.
It's possible to reach this case when work_mem is very small and tupsize
is (relatively) very large. In that case ExecChooseHashTableSize would
get an assertion failure, or with asserts off it'd compute nbuckets = 0,
which'd likely cause misbehavior later (I've not checked). To fix,
clamp the number of buckets to be at least 1.
This is due to faulty conversion of old my_log2() coding in
|
3 years ago |
![]() |
2b8f3f5a7c |
Get rid of artificial restriction on hash table sizes on Windows.
The point of introducing the hash_mem_multiplier GUC was to let users reproduce the old behavior of hash aggregation, i.e. that it could use more than work_mem at need. However, the implementation failed to get the job done on Win64, where work_mem is clamped to 2GB to protect various places that calculate memory sizes using "long int". As written, the same clamp was applied to hash_mem. This resulted in severe performance regressions for queries requiring a bit more than 2GB for hash aggregation, as they now spill to disk and there's no way to stop that. Getting rid of the work_mem restriction seems like a good idea, but it's a big job and could not conceivably be back-patched. However, there's only a fairly small number of places that are concerned with the hash_mem value, and it turns out to be possible to remove the restriction there without too much code churn or any ABI breaks. So, let's do that for now to fix the regression, and leave the larger task for another day. This patch does introduce a bit more infrastructure that should help with the larger task, namely pg_bitutils.h support for working with size_t values. Per gripe from Laurent Hasson. Back-patch to v13 where the behavior change came in. Discussion: https://postgr.es/m/997817.1627074924@sss.pgh.pa.us Discussion: https://postgr.es/m/MN2PR15MB25601E80A9B6D1BA6F592B1985E39@MN2PR15MB2560.namprd15.prod.outlook.com |
4 years ago |
![]() |
69739ec317 |
Revert "Fix race in Parallel Hash Join batch cleanup."
This reverts commit
|
5 years ago |
![]() |
4e0f0995e9 |
Fix race in Parallel Hash Join batch cleanup.
With very unlucky timing and parallel_leader_participation off, PHJ could attempt to access per-batch state just as it was being freed. There was code intended to prevent that by checking for a cleared pointer, but it was buggy. Fix, by introducing an extra barrier phase. The new phase PHJ_BUILD_RUNNING means that it's safe to access the per-batch state to find a batch to help with, and PHJ_BUILD_DONE means that it is too late. The last to detach will free the array of per-batch state as before, but now it will also atomically advance the phase at the same time, so that late attachers can avoid the hazard, without the data race. This mirrors the way per-batch hash tables are freed (see phases PHJ_BATCH_PROBING and PHJ_BATCH_DONE). Revealed by a one-off build farm failure, where BarrierAttach() failed a sanity check assertion, because the memory had been clobbered by dsa_free(). Back-patch to 11, where the code arrived. Reported-by: Michael Paquier <michael@paquier.xyz> Discussion: https://postgr.es/m/20200929061142.GA29096%40paquier.xyz |
5 years ago |
![]() |
78530c8e7a |
Add hash_mem_multiplier GUC.
Add a GUC that acts as a multiplier on work_mem. It gets applied when
sizing executor node hash tables that were previously size constrained
using work_mem alone.
The new GUC can be used to preferentially give hash-based nodes more
memory than the generic work_mem limit. It is intended to enable admin
tuning of the executor's memory usage. Overall system throughput and
system responsiveness can be improved by giving hash-based executor
nodes more memory (especially over sort-based alternatives, which are
often much less sensitive to being memory constrained).
The default value for hash_mem_multiplier is 1.0, which is also the
minimum valid value. This means that hash-based nodes continue to apply
work_mem in the traditional way by default.
hash_mem_multiplier is generally useful. However, it is being added now
due to concerns about hash aggregate performance stability for users
that upgrade to Postgres 13 (which added disk-based hash aggregation in
commit
|
5 years ago |
![]() |
3048898e73 |
Mop-up for wait event naming issues.
Synchronize the event names for parallel hash join waits with other event names, by getting rid of the slashes and dropping "-ing" suffixes. Rename ClogGroupUpdate to XactGroupUpdate, to match the new SLRU name. Move the ProcSignalBarrier event to the IPC category; it doesn't belong under IO. Also a bit more wordsmithing in the wait event documentation tables. Discussion: https://postgr.es/m/4505.1589640417@sss.pgh.pa.us |
5 years ago |
![]() |
17cc133f01
|
Dial back -Wimplicit-fallthrough to level 3
The additional pain from level 4 is excessive for the gain. Also revert all the source annotation changes to their original wordings, to avoid back-patching pain. Discussion: https://postgr.es/m/31166.1589378554@sss.pgh.pa.us |
5 years ago |
![]() |
3e9744465d
|
Add -Wimplicit-fallthrough to CFLAGS and CXXFLAGS
Use it at level 4, a bit more restrictive than the default level, and tweak our commanding comments to FALLTHROUGH. (However, leave zic.c alone, since it's external code; to avoid the warnings that would appear there, change CFLAGS for that file in the Makefile.) Author: Julien Rouhaud <rjuju123@gmail.com> Author: Álvaro Herrera <alvherre@alvh.no-ip.org> Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us> Discussion: https://postgr.es/m/20200412081825.qyo5vwwco3fv4gdo@nol Discussion: https://postgr.es/m/flat/E1fDenm-0000C8-IJ@gemulon.postgresql.org |
5 years ago |
![]() |
969f9d0b4b |
Make EXPLAIN report maximum hashtable usage across multiple rescans.
Before discarding the old hash table in ExecReScanHashJoin, capture its statistics, ensuring that we report the maximum hashtable size across repeated rescans of the hash input relation. We can repurpose the existing code for reporting hashtable size in parallel workers to help with this, making the patch pretty small. This also ensures that if rescans happen within parallel workers, we get the correct maximums across all instances. Konstantin Knizhnik and Tom Lane, per diagnosis by Thomas Munro of a trouble report from Alvaro Herrera. Discussion: https://postgr.es/m/20200323165059.GA24950@alvherre.pgsql |
6 years ago |
![]() |
02a2e8b442 |
Modify additional power 2 calculations to use new helper functions
2nd pass of modifying various places which obtain the next power
of 2 of a number and make them use the new functions added in
|
6 years ago |
![]() |
7559d8ebfa |
Update copyrights for 2020
Backpatch-through: update all files in master, backpatch legal files through 9.4 |
6 years ago |
![]() |
e69d644547 |
Rotate instead of shifting hash join batch number.
Our algorithm for choosing batch numbers turned out not to work effectively for multi-billion key inner relations. We would use more hash bits than we have, and effectively concentrate all tuples into a smaller number of batches than we intended. While ideally we should switch to wider hashes, for now, change the algorithm to one that effectively gives up bits from the bucket number when we don't have enough bits. That means we'll finish up with longer bucket chains than would be ideal, but that's better than having batches that don't fit in work_mem and can't be divided. Batch-patch to all supported releases. Author: Thomas Munro Reviewed-by: Tom Lane, thanks also to Tomas Vondra, Alvaro Herrera, Andres Freund for testing and discussion Reported-by: James Coleman Discussion: https://postgr.es/m/16104-dc11ed911f1ab9df%40postgresql.org |
6 years ago |
![]() |
14aec03502 |
Make the order of the header file includes consistent in backend modules.
Similar to commits |
6 years ago |
![]() |
2abd7ae9b2 |
Fix representation of hash keys in Hash/HashJoin nodes.
In
|
6 years ago |
![]() |
3412030205 |
Fix more typos and inconsistencies in the tree
Author: Alexander Lakhin Discussion: https://postgr.es/m/0a5419ea-1452-a4e6-72ff-545b1a5a8076@gmail.com |
6 years ago |
![]() |
8255c7a5ee |
Phase 2 pgindent run for v12.
Switch to 2.1 version of pg_bsd_indent. This formats multiline function declarations "correctly", that is with additional lines of parameter declarations indented to match where the first line's left parenthesis is. Discussion: https://postgr.es/m/CAEepm=0P3FeTXRcU5B2W3jv3PgRVZ-kGUXLGfd42FFhUROO3ug@mail.gmail.com |
6 years ago |
![]() |
5e1963fb76 |
Collations with nondeterministic comparison
This adds a flag "deterministic" to collations. If that is false, such a collation disables various optimizations that assume that strings are equal only if they are byte-wise equal. That then allows use cases such as case-insensitive or accent-insensitive comparisons or handling of strings with different Unicode normal forms. This functionality is only supported with the ICU provider. At least glibc doesn't appear to have any locales that work in a nondeterministic way, so it's not worth supporting this for the libc provider. The term "deterministic comparison" in this context is from Unicode Technical Standard #10 (https://unicode.org/reports/tr10/#Deterministic_Comparison). This patch makes changes in three areas: - CREATE COLLATION DDL changes and system catalog changes to support this new flag. - Many executor nodes and auxiliary code are extended to track collations. Previously, this code would just throw away collation information, because the eventually-called user-defined functions didn't use it since they only cared about equality, which didn't need collation information. - String data type functions that do equality comparisons and hashing are changed to take the (non-)deterministic flag into account. For comparison, this just means skipping various shortcuts and tie breakers that use byte-wise comparison. For hashing, we first need to convert the input string to a canonical "sort key" using the ICU analogue of strxfrm(). Reviewed-by: Daniel Verite <daniel@manitou-mail.org> Reviewed-by: Peter Geoghegan <pg@bowt.ie> Discussion: https://www.postgresql.org/message-id/flat/1ccc668f-4cbc-0bef-af67-450b47cdfee7@2ndquadrant.com |
7 years ago |
![]() |
97c39498e5 |
Update copyright for 2019
Backpatch-through: certain files through 9.4 |
7 years ago |
![]() |
1a0586de36 |
Introduce notion of different types of slots (without implementing them).
Upcoming work intends to allow pluggable ways to introduce new ways of storing table data. Accessing those table access methods from the executor requires TupleTableSlots to be carry tuples in the native format of such storage methods; otherwise there'll be a significant conversion overhead. Different access methods will require different data to store tuples efficiently (just like virtual, minimal, heap already require fields in TupleTableSlot). To allow that without requiring additional pointer indirections, we want to have different structs (embedding TupleTableSlot) for different types of slots. Thus different types of slots are needed, which requires adapting creators of slots. The slot that most efficiently can represent a type of tuple in an executor node will often depend on the type of slot a child node uses. Therefore we need to track the type of slot is returned by nodes, so parent slots can create slots based on that. Relatedly, JIT compilation of tuple deforming needs to know which type of slot a certain expression refers to, so it can create an appropriate deforming function for the type of tuple in the slot. But not all nodes will only return one type of slot, e.g. an append node will potentially return different types of slots for each of its subplans. Therefore add function that allows to query the type of a node's result slot, and whether it'll always be the same type (whether it's fixed). This can be queried using ExecGetResultSlotOps(). The scan, result, inner, outer type of slots are automatically inferred from ExecInitScanTupleSlot(), ExecInitResultSlot(), left/right subtrees respectively. If that's not correct for a node, that can be overwritten using new fields in PlanState. This commit does not introduce the actually abstracted implementation of different kind of TupleTableSlots, that will be left for a followup commit. The different types of slots introduced will, for now, still use the same backing implementation. While this already partially invalidates the big comment in tuptable.h, it seems to make more sense to update it later, when the different TupleTableSlot implementations actually exist. Author: Ashutosh Bapat and Andres Freund, with changes by Amit Khandekar Discussion: https://postgr.es/m/20181105210039.hh4vvi4vwoq5ba2q@alap3.anarazel.de |
7 years ago |
![]() |
763f2edd92 |
Rejigger materializing and fetching a HeapTuple from a slot.
Previously materializing a slot always returned a HeapTuple. As current work aims to reduce the reliance on HeapTuples (so other storage systems can work efficiently), that needs to change. Thus split the tasks of materializing a slot (i.e. making it independent from the underlying storage / other memory contexts) from fetching a HeapTuple from the slot. For brevity, allow to fetch a HeapTuple from a slot and materializing the slot at the same time, controlled by a parameter. For now some callers of ExecFetchSlotHeapTuple, with materialize = true, expect that changes to the heap tuple will be reflected in the underlying slot. Those places will be adapted in due course, so while not pretty, that's OK for now. Also rename ExecFetchSlotTuple to ExecFetchSlotHeapTupleDatum and ExecFetchSlotTupleDatum to ExecFetchSlotHeapTupleDatum, as it's likely that future storage methods will need similar methods. There already is ExecFetchSlotMinimalTuple, so the new names make the naming scheme more coherent. Author: Ashutosh Bapat and Andres Freund, with changes by Amit Khandekar Discussion: https://postgr.es/m/20181105210039.hh4vvi4vwoq5ba2q@alap3.anarazel.de |
7 years ago |
![]() |
1ef6bd2954 |
Don't require return slots for nodes without projection.
In a lot of nodes the return slot is not required. That can either be because the node doesn't do any projection (say an Append node), or because the node does perform projections but the projection is optimized away because the projection would yield an identical row. Slots aren't that small, especially for wide rows, so it's worthwhile to avoid creating them. It's not possible to just skip creating the slot - it's currently used to determine the tuple descriptor returned by ExecGetResultType(). So separate the determination of the result type from the slot creation. The work previously done internally ExecInitResultTupleSlotTL() can now also be done separately with ExecInitResultTypeTL() and ExecInitResultSlot(). That way nodes that aren't guaranteed to need a result slot, can use ExecInitResultTypeTL() to determine the result type of the node, and ExecAssignScanProjectionInfo() (via ExecConditionalAssignProjectionInfo()) determines that a result slot is needed, it is created with ExecInitResultSlot(). Besides the advantage of avoiding to create slots that then are unused, this is necessary preparation for later patches around tuple table slot abstraction. In particular separating the return descriptor and slot is a prerequisite to allow JITing of tuple deforming with knowledge of the underlying tuple format, and to avoid unnecessarily creating JITed tuple deforming for virtual slots. This commit removes a redundant argument from ExecInitResultTupleSlotTL(). While this commit touches a lot of the relevant lines anyway, it'd normally still not worthwhile to cause breakage, except that aforementioned later commits will touch *all* ExecInitResultTupleSlotTL() callers anyway (but fits worse thematically). Author: Andres Freund Discussion: https://postgr.es/m/20181105210039.hh4vvi4vwoq5ba2q@alap3.anarazel.de |
7 years ago |
![]() |
86a2218eb0 |
Limit Parallel Hash's bucket array to MaxAllocSize.
Make sure that we don't exceed MaxAllocSize when increasing the number of buckets. Perhaps later we'll remove that limit and use DSA_ALLOC_HUGE, but for now just prevent further increases like the non-parallel code. This change avoids the error from bug report #15225. Author: Thomas Munro Reviewed-By: Tom Lane Reported-by: Frits Jalvingh Discussion: https://postgr.es/m/152802081668.26724.16985037679312485972%40wrigleys.postgresql.org |
7 years ago |
![]() |
9e17bdb8a5 |
Fix query-lifespan memory leakage in repeatedly executed hash joins.
ExecHashTableCreate allocated some memory that wasn't freed by ExecHashTableDestroy, specifically the per-hash-key function information. That's not a huge amount of data, but if one runs a query that repeats a hash join enough times, it builds up. Fix by arranging for the data in question to be kept in the hashtable's hashCxt instead of leaving it "loose" in the query-lifespan executor context. (This ensures that we'll also clean up anything that the hash functions allocate in fn_mcxt.) Per report from Amit Khandekar. It's been like this forever, so back-patch to all supported branches. Discussion: https://postgr.es/m/CAJ3gD9cFofAWGvcxLOxDHC=B0hjtW8yGmUsF2hdGh97CM38=7g@mail.gmail.com |
8 years ago |
![]() |
ad7dbee368 |
Allow tupleslots to have a fixed tupledesc, use in executor nodes.
The reason for doing so is that it will allow expression evaluation to optimize based on the underlying tupledesc. In particular it will allow to JIT tuple deforming together with the expression itself. For that expression initialization needs to be moved after the relevant slots are initialized - mostly unproblematic, except in the case of nodeWorktablescan.c. After doing so there's no need for ExecAssignResultType() and ExecAssignResultTypeFromTL() anymore, as all former callers have been converted to create a slot with a fixed descriptor. When creating a slot with a fixed descriptor, tts_values/isnull can be allocated together with the main slot, reducing allocation overhead and increasing cache density a bit. Author: Andres Freund Discussion: https://postgr.es/m/20171206093717.vqdxe5icqttpxs3p@alap3.anarazel.de |
8 years ago |
![]() |
05d0f13f07 |
Skip setting up shared instrumentation for Hash node if not needed.
We don't need to set up the shared space for hash join instrumentation data
if instrumentation hasn't been requested. Let's follow the example of the
similar Sort node code and save a few cycles by skipping that when we can.
This reverts commit
|
8 years ago |
![]() |
d59ff4ab31 |
Fix another instance of unsafe coding for shm_toc_lookup failure.
One or another author of commit
|
8 years ago |
![]() |
c12693d8f3 |
Introduce ExecQualAndReset() helper.
It's a common task to evaluate a qual and reset the corresponding expression context. Currently that requires storing the result of the qual eval, resetting the context, and then reacting on the result. As that's awkward several places only reset the context next time through a node. That's not great, so introduce a helper that evaluates and resets. It's a bit ugly that it currently uses MemoryContextReset() instead of ResetExprContext(), but that seems easier than reordering all of executor.h. Author: Andres Freund Discussion: https://postgr.es/m/20180109222544.f7loxrunqh3xjl5f@alap3.anarazel.de |
8 years ago |
![]() |
6fcde24063 |
Fix some minor errors in new PHJ code.
Correct ExecParallelHashTuplePrealloc's estimate of whether the space_allowed limit is exceeded. Be more consistent about tuples that are exactly HASH_CHUNK_THRESHOLD in size (they're "small", not "large"). Neither of these things explain the current buildfarm unhappiness, but they're still bugs. Thomas Munro, per gripe by me Discussion: https://postgr.es/m/CAEepm=34PDuR69kfYVhmZPgMdy8pSA-MYbpesEN1SR+2oj3Y+w@mail.gmail.com |
8 years ago |
![]() |
9d4649ca49 |
Update copyright for 2018
Backpatch-through: certain files through 9.3 |
8 years ago |
![]() |
5dc692f78d |
Ensure proper alignment of tuples in HashMemoryChunkData buffers.
The previous coding relied (without any documentation) on the data[] member of HashMemoryChunkData being at a MAXALIGN'ed offset. If it was not, the tuples would not be maxaligned either, leading to failures on alignment-picky machines. While there seems to be no live bug on any platform we support, this is clearly pretty fragile: any addition to or rearrangement of the fields in HashMemoryChunkData could break it. Let's remove the hazard by getting rid of the data[] member and instead using pointer arithmetic with an explicitly maxalign'ed offset. Discussion: https://postgr.es/m/14483.1514938129@sss.pgh.pa.us |
8 years ago |
![]() |
93ea78b17c |
Fix EXPLAIN ANALYZE output for Parallel Hash.
In a race case, EXPLAIN ANALYZE could fail to display correct nbatch and size information. Refactor so that participants report only on batches they worked on rather than trying to report on all of them, and teach explain.c to consider the HashInstrumentation object from all participants instead of picking the first one it can find. This should fix an occasional build farm failure in the "join" regression test. Author: Thomas Munro Reviewed-By: Andres Freund Discussion: https://postgr.es/m/30219.1514428346%40sss.pgh.pa.us |
8 years ago |
![]() |
f83040c62a |
Fix rare assertion failure in parallel hash join.
When a backend runs out of inner tuples to hash, it should detach from
grow_batch_barrier only after it has flushed all batches to disk and
merged counters, not before. Otherwise a concurrent backend in
ExecParallelHashIncreaseNumBatches() could stop waiting for this
backend and try to read tuples before they have been written. This
commit reorders those operations and should fix the assertion failures
seen occasionally on the build farm since commit
|
8 years ago |
![]() |
4e2970f880 |
Fix assert with side effects in the new PHJ code.
Instead of asserting the assert just set the value to what it was supposed to test... Per coverity. |
8 years ago |
![]() |
1804284042 |
Add parallel-aware hash joins.
Introduce parallel-aware hash joins that appear in EXPLAIN plans as Parallel Hash Join with Parallel Hash. While hash joins could already appear in parallel queries, they were previously always parallel-oblivious and had a partial subplan only on the outer side, meaning that the work of the inner subplan was duplicated in every worker. After this commit, the planner will consider using a partial subplan on the inner side too, using the Parallel Hash node to divide the work over the available CPU cores and combine its results in shared memory. If the join needs to be split into multiple batches in order to respect work_mem, then workers process different batches as much as possible and then work together on the remaining batches. The advantages of a parallel-aware hash join over a parallel-oblivious hash join used in a parallel query are that it: * avoids wasting memory on duplicated hash tables * avoids wasting disk space on duplicated batch files * divides the work of building the hash table over the CPUs One disadvantage is that there is some communication between the participating CPUs which might outweigh the benefits of parallelism in the case of small hash tables. This is avoided by the planner's existing reluctance to supply partial plans for small scans, but it may be necessary to estimate synchronization costs in future if that situation changes. Another is that outer batch 0 must be written to disk if multiple batches are required. A potential future advantage of parallel-aware hash joins is that right and full outer joins could be supported, since there is a single set of matched bits for each hashtable, but that is not yet implemented. A new GUC enable_parallel_hash is defined to control the feature, defaulting to on. Author: Thomas Munro Reviewed-By: Andres Freund, Robert Haas Tested-By: Rafia Sabih, Prabhat Sahu Discussion: https://postgr.es/m/CAEepm=2W=cOkiZxcg6qiFQP-dHUe09aqTrEMM7yJDrHMhDv_RA@mail.gmail.com https://postgr.es/m/CAEepm=37HKyJ4U6XOLi=JgfSHM3o6B-GaeO-6hkOmneTDkH+Uw@mail.gmail.com |
8 years ago |
![]() |
8526bcb2df |
Try again to fix accumulation of parallel worker instrumentation.
When a Gather or Gather Merge node is started and stopped multiple
times, accumulate instrumentation data only once, at the end, instead
of after each execution, to avoid recording inflated totals.
Commit
|
8 years ago |
![]() |
5bcf389ecf |
Fix EXPLAIN ANALYZE of hash join when the leader doesn't participate.
If a hash join appears in a parallel query, there may be no hash table
available for explain.c to inspect even though a hash table may have
been built in other processes. This could happen either because
parallel_leader_participation was set to off or because the leader
happened to hit the end of the outer relation immediately (even though
the complete relation is not empty) and decided not to build the hash
table.
Commit
|
8 years ago |
![]() |
2eb4a831e5 |
Change TRUE/FALSE to true/false
The lower case spellings are C and C++ standard and are used in most parts of the PostgreSQL sources. The upper case spellings are only used in some files/modules. So standardize on the standard spellings. The APIs for ICU, Perl, and Windows define their own TRUE and FALSE, so those are left as is when using those APIs. In code comments, we use the lower-case spelling for the C concepts and keep the upper-case spelling for the SQL concepts. Reviewed-by: Michael Paquier <michael.paquier@gmail.com> |
8 years ago |
![]() |
cc9f08b6b8 |
Move ExecProcNode from dispatch to function pointer based model.
This allows us to add stack-depth checks the first time an executor
node is called, and skip that overhead on following
calls. Additionally it yields a nice speedup.
While it'd probably have been a good idea to have that check all
along, it has become more important after the new expression
evaluation framework in
|
8 years ago |
![]() |
d47cfef711 |
Move interrupt checking from ExecProcNode() to executor nodes.
In a followup commit ExecProcNode(), and especially the large switch it contains, will largely be replaced by a function pointer directly to the correct node. The node functions will then get invoked by a thin inline function wrapper. To avoid having to include miscadmin.h in headers - CHECK_FOR_INTERRUPTS() - move the interrupt checks into the individual executor routines. While looking through all executor nodes, I noticed a number of arguably missing interrupt checks, add these too. Author: Andres Freund, Tom Lane Reviewed-By: Tom Lane Discussion: https://postgr.es/m/22833.1490390175@sss.pgh.pa.us |
8 years ago |
![]() |
382ceffdf7 |
Phase 3 of pgindent updates.
Don't move parenthesized lines to the left, even if that means they flow past the right margin. By default, BSD indent lines up statement continuation lines that are within parentheses so that they start just to the right of the preceding left parenthesis. However, traditionally, if that resulted in the continuation line extending to the right of the desired right margin, then indent would push it left just far enough to not overrun the margin, if it could do so without making the continuation line start to the left of the current statement indent. That makes for a weird mix of indentations unless one has been completely rigid about never violating the 80-column limit. This behavior has been pretty universally panned by Postgres developers. Hence, disable it with indent's new -lpl switch, so that parenthesized lines are always lined up with the preceding left paren. This patch is much less interesting than the first round of indent changes, but also bulkier, so I thought it best to separate the effects. Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us |
8 years ago |
![]() |
c7b8998ebb |
Phase 2 of pgindent updates.
Change pg_bsd_indent to follow upstream rules for placement of comments
to the right of code, and remove pgindent hack that caused comments
following #endif to not obey the general rule.
Commit
|
8 years ago |
![]() |
9aab83fc50 |
Redesign get_attstatsslot()/free_attstatsslot() for more safety and speed.
The mess cleaned up in commit
|
8 years ago |
![]() |
b8d7f053c5 |
Faster expression evaluation and targetlist projection.
This replaces the old, recursive tree-walk based evaluation, with non-recursive, opcode dispatch based, expression evaluation. Projection is now implemented as part of expression evaluation. This both leads to significant performance improvements, and makes future just-in-time compilation of expressions easier. The speed gains primarily come from: - non-recursive implementation reduces stack usage / overhead - simple sub-expressions are implemented with a single jump, without function calls - sharing some state between different sub-expressions - reduced amount of indirect/hard to predict memory accesses by laying out operation metadata sequentially; including the avoidance of nearly all of the previously used linked lists - more code has been moved to expression initialization, avoiding constant re-checks at evaluation time Future just-in-time compilation (JIT) has become easier, as demonstrated by released patches intended to be merged in a later release, for primarily two reasons: Firstly, due to a stricter split between expression initialization and evaluation, less code has to be handled by the JIT. Secondly, due to the non-recursive nature of the generated "instructions", less performance-critical code-paths can easily be shared between interpreted and compiled evaluation. The new framework allows for significant future optimizations. E.g.: - basic infrastructure for to later reduce the per executor-startup overhead of expression evaluation, by caching state in prepared statements. That'd be helpful in OLTPish scenarios where initialization overhead is measurable. - optimizing the generated "code". A number of proposals for potential work has already been made. - optimizing the interpreter. Similarly a number of proposals have been made here too. The move of logic into the expression initialization step leads to some backward-incompatible changes: - Function permission checks are now done during expression initialization, whereas previously they were done during execution. In edge cases this can lead to errors being raised that previously wouldn't have been, e.g. a NULL array being coerced to a different array type previously didn't perform checks. - The set of domain constraints to be checked, is now evaluated once during expression initialization, previously it was re-built every time a domain check was evaluated. For normal queries this doesn't change much, but e.g. for plpgsql functions, which caches ExprStates, the old set could stick around longer. The behavior around might still change. Author: Andres Freund, with significant changes by Tom Lane, changes by Heikki Linnakangas Reviewed-By: Tom Lane, Heikki Linnakangas Discussion: https://postgr.es/m/20161206034955.bh33paeralxbtluv@alap3.anarazel.de |
9 years ago |
![]() |
f2ec57dee9 |
Make sure that hash join's bulk-tuple-transfer loops are interruptible.
The loops in ExecHashJoinNewBatch(), ExecHashIncreaseNumBatches(), and ExecHashRemoveNextSkewBucket() are all capable of iterating over many tuples without ever doing a CHECK_FOR_INTERRUPTS, so that the backend might fail to respond to SIGINT or SIGTERM for an unreasonably long time. Fix that. In the case of ExecHashJoinNewBatch(), it seems useful to put the added CHECK_FOR_INTERRUPTS into ExecHashJoinGetSavedTuple() rather than directly in the loop, because that will also ensure that both principal code paths through ExecHashJoinOuterGetTuple() will do a CHECK_FOR_INTERRUPTS, which seems like a good idea to avoid surprises. Back-patch to all supported branches. Tom Lane and Thomas Munro Discussion: https://postgr.es/m/6044.1487121720@sss.pgh.pa.us |
9 years ago |
![]() |
ea15e18677 |
Remove obsoleted code relating to targetlist SRF evaluation.
Since
|
9 years ago |
![]() |
1d25779284 |
Update copyright via script for 2017
|
9 years ago |
![]() |
ea268cdc9a |
Add macros to make AllocSetContextCreate() calls simpler and safer.
I found that half a dozen (nearly 5%) of our AllocSetContextCreate calls had typos in the context-sizing parameters. While none of these led to especially significant problems, they did create minor inefficiencies, and it's now clear that expecting people to copy-and-paste those calls accurately is not a great idea. Let's reduce the risk of future errors by introducing single macros that encapsulate the common use-cases. Three such macros are enough to cover all but two special-purpose contexts; those two calls can be left as-is, I think. While this patch doesn't in itself improve matters for third-party extensions, it doesn't break anything for them either, and they can gradually adopt the simplified notation over time. In passing, change TopMemoryContext to use the default allocation parameters. Formerly it could only be extended 8K at a time. That was probably reasonable when this code was written; but nowadays we create many more contexts than we did then, so that it's not unusual to have a couple hundred K in TopMemoryContext, even without considering various dubious code that sticks other things there. There seems no good reason not to let it use growing blocks like most other contexts. Back-patch to 9.6, mostly because that's still close enough to HEAD that it's easy to do so, and keeping the branches in sync can be expected to avoid some future back-patching pain. The bugs fixed by these changes don't seem to be significant enough to justify fixing them further back. Discussion: <21072.1472321324@sss.pgh.pa.us> |
9 years ago |