mirror of https://github.com/postgres/postgres
Tag:
Branch:
Tree:
e6bdfd9700
REL2_0B
REL6_4
REL6_5_PATCHES
REL7_0_PATCHES
REL7_1_STABLE
REL7_2_STABLE
REL7_3_STABLE
REL7_4_STABLE
REL8_0_STABLE
REL8_1_STABLE
REL8_2_STABLE
REL8_3_STABLE
REL8_4_STABLE
REL8_5_ALPHA1_BRANCH
REL8_5_ALPHA2_BRANCH
REL8_5_ALPHA3_BRANCH
REL9_0_ALPHA4_BRANCH
REL9_0_ALPHA5_BRANCH
REL9_0_STABLE
REL9_1_STABLE
REL9_2_STABLE
REL9_3_STABLE
REL9_4_STABLE
REL9_5_STABLE
REL9_6_STABLE
REL_10_STABLE
REL_11_STABLE
REL_12_STABLE
REL_13_STABLE
REL_14_STABLE
REL_15_STABLE
REL_16_STABLE
REL_17_STABLE
REL_18_STABLE
Release_1_0_3
WIN32_DEV
ecpg_big_bison
master
PG95-1_01
PG95-1_08
PG95-1_09
REL2_0
REL6_1
REL6_1_1
REL6_2
REL6_2_1
REL6_3
REL6_3_2
REL6_4_2
REL6_5
REL6_5_1
REL6_5_2
REL6_5_3
REL7_0
REL7_0_2
REL7_0_3
REL7_1
REL7_1_1
REL7_1_2
REL7_1_3
REL7_1_BETA
REL7_1_BETA2
REL7_1_BETA3
REL7_2
REL7_2_1
REL7_2_2
REL7_2_3
REL7_2_4
REL7_2_5
REL7_2_6
REL7_2_7
REL7_2_8
REL7_2_BETA1
REL7_2_BETA2
REL7_2_BETA3
REL7_2_BETA4
REL7_2_BETA5
REL7_2_RC1
REL7_2_RC2
REL7_3
REL7_3_1
REL7_3_10
REL7_3_11
REL7_3_12
REL7_3_13
REL7_3_14
REL7_3_15
REL7_3_16
REL7_3_17
REL7_3_18
REL7_3_19
REL7_3_2
REL7_3_20
REL7_3_21
REL7_3_3
REL7_3_4
REL7_3_5
REL7_3_6
REL7_3_7
REL7_3_8
REL7_3_9
REL7_4
REL7_4_1
REL7_4_10
REL7_4_11
REL7_4_12
REL7_4_13
REL7_4_14
REL7_4_15
REL7_4_16
REL7_4_17
REL7_4_18
REL7_4_19
REL7_4_2
REL7_4_20
REL7_4_21
REL7_4_22
REL7_4_23
REL7_4_24
REL7_4_25
REL7_4_26
REL7_4_27
REL7_4_28
REL7_4_29
REL7_4_3
REL7_4_30
REL7_4_4
REL7_4_5
REL7_4_6
REL7_4_7
REL7_4_8
REL7_4_9
REL7_4_BETA1
REL7_4_BETA2
REL7_4_BETA3
REL7_4_BETA4
REL7_4_BETA5
REL7_4_RC1
REL7_4_RC2
REL8_0_0
REL8_0_0BETA1
REL8_0_0BETA2
REL8_0_0BETA3
REL8_0_0BETA4
REL8_0_0BETA5
REL8_0_0RC1
REL8_0_0RC2
REL8_0_0RC3
REL8_0_0RC4
REL8_0_0RC5
REL8_0_1
REL8_0_10
REL8_0_11
REL8_0_12
REL8_0_13
REL8_0_14
REL8_0_15
REL8_0_16
REL8_0_17
REL8_0_18
REL8_0_19
REL8_0_2
REL8_0_20
REL8_0_21
REL8_0_22
REL8_0_23
REL8_0_24
REL8_0_25
REL8_0_26
REL8_0_3
REL8_0_4
REL8_0_5
REL8_0_6
REL8_0_7
REL8_0_8
REL8_0_9
REL8_1_0
REL8_1_0BETA1
REL8_1_0BETA2
REL8_1_0BETA3
REL8_1_0BETA4
REL8_1_0RC1
REL8_1_1
REL8_1_10
REL8_1_11
REL8_1_12
REL8_1_13
REL8_1_14
REL8_1_15
REL8_1_16
REL8_1_17
REL8_1_18
REL8_1_19
REL8_1_2
REL8_1_20
REL8_1_21
REL8_1_22
REL8_1_23
REL8_1_3
REL8_1_4
REL8_1_5
REL8_1_6
REL8_1_7
REL8_1_8
REL8_1_9
REL8_2_0
REL8_2_1
REL8_2_10
REL8_2_11
REL8_2_12
REL8_2_13
REL8_2_14
REL8_2_15
REL8_2_16
REL8_2_17
REL8_2_18
REL8_2_19
REL8_2_2
REL8_2_20
REL8_2_21
REL8_2_22
REL8_2_23
REL8_2_3
REL8_2_4
REL8_2_5
REL8_2_6
REL8_2_7
REL8_2_8
REL8_2_9
REL8_2_BETA1
REL8_2_BETA2
REL8_2_BETA3
REL8_2_RC1
REL8_3_0
REL8_3_1
REL8_3_10
REL8_3_11
REL8_3_12
REL8_3_13
REL8_3_14
REL8_3_15
REL8_3_16
REL8_3_17
REL8_3_18
REL8_3_19
REL8_3_2
REL8_3_20
REL8_3_21
REL8_3_22
REL8_3_23
REL8_3_3
REL8_3_4
REL8_3_5
REL8_3_6
REL8_3_7
REL8_3_8
REL8_3_9
REL8_3_BETA1
REL8_3_BETA2
REL8_3_BETA3
REL8_3_BETA4
REL8_3_RC1
REL8_3_RC2
REL8_4_0
REL8_4_1
REL8_4_10
REL8_4_11
REL8_4_12
REL8_4_13
REL8_4_14
REL8_4_15
REL8_4_16
REL8_4_17
REL8_4_18
REL8_4_19
REL8_4_2
REL8_4_20
REL8_4_21
REL8_4_22
REL8_4_3
REL8_4_4
REL8_4_5
REL8_4_6
REL8_4_7
REL8_4_8
REL8_4_9
REL8_4_BETA1
REL8_4_BETA2
REL8_4_RC1
REL8_4_RC2
REL8_5_ALPHA1
REL8_5_ALPHA2
REL8_5_ALPHA3
REL9_0_0
REL9_0_1
REL9_0_10
REL9_0_11
REL9_0_12
REL9_0_13
REL9_0_14
REL9_0_15
REL9_0_16
REL9_0_17
REL9_0_18
REL9_0_19
REL9_0_2
REL9_0_20
REL9_0_21
REL9_0_22
REL9_0_23
REL9_0_3
REL9_0_4
REL9_0_5
REL9_0_6
REL9_0_7
REL9_0_8
REL9_0_9
REL9_0_ALPHA4
REL9_0_ALPHA5
REL9_0_BETA1
REL9_0_BETA2
REL9_0_BETA3
REL9_0_BETA4
REL9_0_RC1
REL9_1_0
REL9_1_1
REL9_1_10
REL9_1_11
REL9_1_12
REL9_1_13
REL9_1_14
REL9_1_15
REL9_1_16
REL9_1_17
REL9_1_18
REL9_1_19
REL9_1_2
REL9_1_20
REL9_1_21
REL9_1_22
REL9_1_23
REL9_1_24
REL9_1_3
REL9_1_4
REL9_1_5
REL9_1_6
REL9_1_7
REL9_1_8
REL9_1_9
REL9_1_ALPHA1
REL9_1_ALPHA2
REL9_1_ALPHA3
REL9_1_ALPHA4
REL9_1_ALPHA5
REL9_1_BETA1
REL9_1_BETA2
REL9_1_BETA3
REL9_1_RC1
REL9_2_0
REL9_2_1
REL9_2_10
REL9_2_11
REL9_2_12
REL9_2_13
REL9_2_14
REL9_2_15
REL9_2_16
REL9_2_17
REL9_2_18
REL9_2_19
REL9_2_2
REL9_2_20
REL9_2_21
REL9_2_22
REL9_2_23
REL9_2_24
REL9_2_3
REL9_2_4
REL9_2_5
REL9_2_6
REL9_2_7
REL9_2_8
REL9_2_9
REL9_2_BETA1
REL9_2_BETA2
REL9_2_BETA3
REL9_2_BETA4
REL9_2_RC1
REL9_3_0
REL9_3_1
REL9_3_10
REL9_3_11
REL9_3_12
REL9_3_13
REL9_3_14
REL9_3_15
REL9_3_16
REL9_3_17
REL9_3_18
REL9_3_19
REL9_3_2
REL9_3_20
REL9_3_21
REL9_3_22
REL9_3_23
REL9_3_24
REL9_3_25
REL9_3_3
REL9_3_4
REL9_3_5
REL9_3_6
REL9_3_7
REL9_3_8
REL9_3_9
REL9_3_BETA1
REL9_3_BETA2
REL9_3_RC1
REL9_4_0
REL9_4_1
REL9_4_10
REL9_4_11
REL9_4_12
REL9_4_13
REL9_4_14
REL9_4_15
REL9_4_16
REL9_4_17
REL9_4_18
REL9_4_19
REL9_4_2
REL9_4_20
REL9_4_21
REL9_4_22
REL9_4_23
REL9_4_24
REL9_4_25
REL9_4_26
REL9_4_3
REL9_4_4
REL9_4_5
REL9_4_6
REL9_4_7
REL9_4_8
REL9_4_9
REL9_4_BETA1
REL9_4_BETA2
REL9_4_BETA3
REL9_4_RC1
REL9_5_0
REL9_5_1
REL9_5_10
REL9_5_11
REL9_5_12
REL9_5_13
REL9_5_14
REL9_5_15
REL9_5_16
REL9_5_17
REL9_5_18
REL9_5_19
REL9_5_2
REL9_5_20
REL9_5_21
REL9_5_22
REL9_5_23
REL9_5_24
REL9_5_25
REL9_5_3
REL9_5_4
REL9_5_5
REL9_5_6
REL9_5_7
REL9_5_8
REL9_5_9
REL9_5_ALPHA1
REL9_5_ALPHA2
REL9_5_BETA1
REL9_5_BETA2
REL9_5_RC1
REL9_6_0
REL9_6_1
REL9_6_10
REL9_6_11
REL9_6_12
REL9_6_13
REL9_6_14
REL9_6_15
REL9_6_16
REL9_6_17
REL9_6_18
REL9_6_19
REL9_6_2
REL9_6_20
REL9_6_21
REL9_6_22
REL9_6_23
REL9_6_24
REL9_6_3
REL9_6_4
REL9_6_5
REL9_6_6
REL9_6_7
REL9_6_8
REL9_6_9
REL9_6_BETA1
REL9_6_BETA2
REL9_6_BETA3
REL9_6_BETA4
REL9_6_RC1
REL_10_0
REL_10_1
REL_10_10
REL_10_11
REL_10_12
REL_10_13
REL_10_14
REL_10_15
REL_10_16
REL_10_17
REL_10_18
REL_10_19
REL_10_2
REL_10_20
REL_10_21
REL_10_22
REL_10_23
REL_10_3
REL_10_4
REL_10_5
REL_10_6
REL_10_7
REL_10_8
REL_10_9
REL_10_BETA1
REL_10_BETA2
REL_10_BETA3
REL_10_BETA4
REL_10_RC1
REL_11_0
REL_11_1
REL_11_10
REL_11_11
REL_11_12
REL_11_13
REL_11_14
REL_11_15
REL_11_16
REL_11_17
REL_11_18
REL_11_19
REL_11_2
REL_11_20
REL_11_21
REL_11_22
REL_11_3
REL_11_4
REL_11_5
REL_11_6
REL_11_7
REL_11_8
REL_11_9
REL_11_BETA1
REL_11_BETA2
REL_11_BETA3
REL_11_BETA4
REL_11_RC1
REL_12_0
REL_12_1
REL_12_10
REL_12_11
REL_12_12
REL_12_13
REL_12_14
REL_12_15
REL_12_16
REL_12_17
REL_12_18
REL_12_19
REL_12_2
REL_12_20
REL_12_21
REL_12_22
REL_12_3
REL_12_4
REL_12_5
REL_12_6
REL_12_7
REL_12_8
REL_12_9
REL_12_BETA1
REL_12_BETA2
REL_12_BETA3
REL_12_BETA4
REL_12_RC1
REL_13_0
REL_13_1
REL_13_10
REL_13_11
REL_13_12
REL_13_13
REL_13_14
REL_13_15
REL_13_16
REL_13_17
REL_13_18
REL_13_19
REL_13_2
REL_13_20
REL_13_21
REL_13_22
REL_13_23
REL_13_3
REL_13_4
REL_13_5
REL_13_6
REL_13_7
REL_13_8
REL_13_9
REL_13_BETA1
REL_13_BETA2
REL_13_BETA3
REL_13_RC1
REL_14_0
REL_14_1
REL_14_10
REL_14_11
REL_14_12
REL_14_13
REL_14_14
REL_14_15
REL_14_16
REL_14_17
REL_14_18
REL_14_19
REL_14_2
REL_14_20
REL_14_3
REL_14_4
REL_14_5
REL_14_6
REL_14_7
REL_14_8
REL_14_9
REL_14_BETA1
REL_14_BETA2
REL_14_BETA3
REL_14_RC1
REL_15_0
REL_15_1
REL_15_10
REL_15_11
REL_15_12
REL_15_13
REL_15_14
REL_15_15
REL_15_2
REL_15_3
REL_15_4
REL_15_5
REL_15_6
REL_15_7
REL_15_8
REL_15_9
REL_15_BETA1
REL_15_BETA2
REL_15_BETA3
REL_15_BETA4
REL_15_RC1
REL_15_RC2
REL_16_0
REL_16_1
REL_16_10
REL_16_11
REL_16_2
REL_16_3
REL_16_4
REL_16_5
REL_16_6
REL_16_7
REL_16_8
REL_16_9
REL_16_BETA1
REL_16_BETA2
REL_16_BETA3
REL_16_RC1
REL_17_0
REL_17_1
REL_17_2
REL_17_3
REL_17_4
REL_17_5
REL_17_6
REL_17_7
REL_17_BETA1
REL_17_BETA2
REL_17_BETA3
REL_17_RC1
REL_18_0
REL_18_1
REL_18_BETA1
REL_18_BETA2
REL_18_BETA3
REL_18_RC1
Release_1_0_2
Release_2_0
Release_2_0_0
release-6-3
${ noResults }
15397 Commits (e6bdfd9700ebfc7df811c97c2fc46d7e94e329a2)
| Author | SHA1 | Message | Date |
|---|---|---|---|
|
|
c30f54ad73 |
Detect POLLHUP/POLLRDHUP while running queries.
Provide a new GUC check_client_connection_interval that can be used to check whether the client connection has gone away, while running very long queries. It is disabled by default. For now this uses a non-standard Linux extension (also adopted by at least one other OS). POLLRDHUP is not defined by POSIX, and other OSes don't have a reliable way to know if a connection was closed without actually trying to read or write. In future we might consider trying to send a no-op/heartbeat message instead, but that could require protocol changes. Author: Sergey Cherkashin <s.cherkashin@postgrespro.ru> Author: Thomas Munro <thomas.munro@gmail.com> Reviewed-by: Thomas Munro <thomas.munro@gmail.com> Reviewed-by: Tatsuo Ishii <ishii@sraoss.co.jp> Reviewed-by: Konstantin Knizhnik <k.knizhnik@postgrespro.ru> Reviewed-by: Zhihong Yu <zyu@yugabyte.com> Reviewed-by: Andres Freund <andres@anarazel.de> Reviewed-by: Maksim Milyutin <milyutinma@gmail.com> Reviewed-by: Tsunakawa, Takayuki/綱川 貴之 <tsunakawa.takay@fujitsu.com> Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us> (much earlier version) Discussion: https://postgr.es/m/77def86b27e41f0efcba411460e929ae%40postgrespro.ru |
5 years ago |
|
|
174edbe9f9 |
Clarify documentation of RESET ROLE
Command-line options, or previous "ALTER (ROLE|DATABASE) ... SET ROLE ..." commands, can change the value of the default role for a session. In the presence of one of these, RESET ROLE will change the current user identifier to the default role rather than the session user identifier. Fix the documentation to reflect this reality. Backpatch to all supported versions. Author: Nathan Bossart Reviewed-By: Laurenz Albe, David G. Johnston, Joe Conway Reported by: Nathan Bossart Discussion: https://postgr.es/m/flat/925134DB-8212-4F60-8AB1-B1231D750CB4%40amazon.com Backpatch-through: 9.6 |
5 years ago |
|
|
b1be3074ac |
postgres_fdw: Add option to control whether to keep connections open.
This commit adds a new option keep_connections that controls whether postgres_fdw keeps the connections to the foreign server open so that the subsequent queries can re-use them. This option can only be specified for a foreign server. The default is on. If set to off, all connections to the foreign server will be discarded at the end of transaction. Closed connections will be re-established when they are necessary by future queries using a foreign table. This option is useful, for example, when users want to prevent the connections from eating up the foreign servers connections capacity. Author: Bharath Rupireddy Reviewed-by: Alexey Kondratov, Vignesh C, Fujii Masao Discussion: https://postgr.es/m/CALj2ACVvrp5=AVp2PupEm+nAC8S4buqR3fJMmaCoc7ftT0aD2A@mail.gmail.com |
5 years ago |
|
|
6fb66c268d |
doc: Clarify how to generate backup files with non-exclusive backups
The current instructions describing how to write the backup_label and tablespace_map files are confusing. For example, opening a file in text mode on Windows and copy-pasting the file's contents would result in a failure at recovery because of the extra CRLF characters generated. The documentation was not stating that clearly, and per discussion this is not considered as a supported scenario. This commit extends a bit the documentation to mention that it may be required to open the file in binary mode before writing its data. Reported-by: Wang Shenhao Author: David Steele Reviewed-by: Andrew Dunstan, Magnus Hagander Discussion: https://postgr.es/m/8373f61426074f2cb6be92e02f838389@G08CNEXMBPEKD06.g08.fujitsu.local Backpatch-through: 9.6 |
5 years ago |
|
|
2bda93f813 |
doc: mention that intervening major releases can be skipped
Also mention that you should read the intervening major releases notes. This change was also applied to the website. Discussion: https://postgr.es/m/20210330144949.GA8259@momjian.us Backpatch-through: 9.6 |
5 years ago |
|
|
9eacee2e62 |
Add Result Cache executor node (take 2)
Here we add a new executor node type named "Result Cache". The planner can include this node type in the plan to have the executor cache the results from the inner side of parameterized nested loop joins. This allows caching of tuples for sets of parameters so that in the event that the node sees the same parameter values again, it can just return the cached tuples instead of rescanning the inner side of the join all over again. Internally, result cache uses a hash table in order to quickly find tuples that have been previously cached. For certain data sets, this can significantly improve the performance of joins. The best cases for using this new node type are for join problems where a large portion of the tuples from the inner side of the join have no join partner on the outer side of the join. In such cases, hash join would have to hash values that are never looked up, thus bloating the hash table and possibly causing it to multi-batch. Merge joins would have to skip over all of the unmatched rows. If we use a nested loop join with a result cache, then we only cache tuples that have at least one join partner on the outer side of the join. The benefits of using a parameterized nested loop with a result cache increase when there are fewer distinct values being looked up and the number of lookups of each value is large. Also, hash probes to lookup the cache can be much faster than the hash probe in a hash join as it's common that the result cache's hash table is much smaller than the hash join's due to result cache only caching useful tuples rather than all tuples from the inner side of the join. This variation in hash probe performance is more significant when the hash join's hash table no longer fits into the CPU's L3 cache, but the result cache's hash table does. The apparent "random" access of hash buckets with each hash probe can cause a poor L3 cache hit ratio for large hash tables. Smaller hash tables generally perform better. The hash table used for the cache limits itself to not exceeding work_mem * hash_mem_multiplier in size. We maintain a dlist of keys for this cache and when we're adding new tuples and realize we've exceeded the memory budget, we evict cache entries starting with the least recently used ones until we have enough memory to add the new tuples to the cache. For parameterized nested loop joins, we now consider using one of these result cache nodes in between the nested loop node and its inner node. We determine when this might be useful based on cost, which is primarily driven off of what the expected cache hit ratio will be. Estimating the cache hit ratio relies on having good distinct estimates on the nested loop's parameters. For now, the planner will only consider using a result cache for parameterized nested loop joins. This works for both normal joins and also for LATERAL type joins to subqueries. It is possible to use this new node for other uses in the future. For example, to cache results from correlated subqueries. However, that's not done here due to some difficulties obtaining a distinct estimation on the outer plan to calculate the estimated cache hit ratio. Currently we plan the inner plan before planning the outer plan so there is no good way to know if a result cache would be useful or not since we can't estimate the number of times the subplan will be called until the outer plan is generated. The functionality being added here is newly introducing a dependency on the return value of estimate_num_groups() during the join search. Previously, during the join search, we only ever needed to perform selectivity estimations. With this commit, we need to use estimate_num_groups() in order to estimate what the hit ratio on the result cache will be. In simple terms, if we expect 10 distinct values and we expect 1000 outer rows, then we'll estimate the hit ratio to be 99%. Since cache hits are very cheap compared to scanning the underlying nodes on the inner side of the nested loop join, then this will significantly reduce the planner's cost for the join. However, it's fairly easy to see here that things will go bad when estimate_num_groups() incorrectly returns a value that's significantly lower than the actual number of distinct values. If this happens then that may cause us to make use of a nested loop join with a result cache instead of some other join type, such as a merge or hash join. Our distinct estimations have been known to be a source of trouble in the past, so the extra reliance on them here could cause the planner to choose slower plans than it did previous to having this feature. Distinct estimations are also fairly hard to estimate accurately when several tables have been joined already or when a WHERE clause filters out a set of values that are correlated to the expressions we're estimating the number of distinct value for. For now, the costing we perform during query planning for result caches does put quite a bit of faith in the distinct estimations being accurate. When these are accurate then we should generally see faster execution times for plans containing a result cache. However, in the real world, we may find that we need to either change the costings to put less trust in the distinct estimations being accurate or perhaps even disable this feature by default. There's always an element of risk when we teach the query planner to do new tricks that it decides to use that new trick at the wrong time and causes a regression. Users may opt to get the old behavior by turning the feature off using the enable_resultcache GUC. Currently, this is enabled by default. It remains to be seen if we'll maintain that setting for the release. Additionally, the name "Result Cache" is the best name I could think of for this new node at the time I started writing the patch. Nobody seems to strongly dislike the name. A few people did suggest other names but no other name seemed to dominate in the brief discussion that there was about names. Let's allow the beta period to see if the current name pleases enough people. If there's some consensus on a better name, then we can change it before the release. Please see the 2nd discussion link below for the discussion on the "Result Cache" name. Author: David Rowley Reviewed-by: Andy Fan, Justin Pryzby, Zhihong Yu, Hou Zhijie Tested-By: Konstantin Knizhnik Discussion: https://postgr.es/m/CAApHDvrPcQyQdWERGYWx8J%2B2DLUNgXu%2BfOSbQ1UscxrunyXyrQ%40mail.gmail.com Discussion: https://postgr.es/m/CAApHDvq=yQXr5kqhRviT2RhNKwToaWr9JAN5t+5_PzhuRJ3wvg@mail.gmail.com |
5 years ago |
|
|
c9c41c7a33 |
Rename Default Roles to Predefined Roles
The term 'default roles' wasn't quite apt as these roles aren't able to be modified or removed after installation, so rename them to be 'Predefined Roles' instead, adding an entry into the newly added Obsolete Appendix to help users of current releases find the new documentation. Bruce Momjian and Stephen Frost Discussion: https://postgr.es/m/157742545062.1149.11052653770497832538%40wrigleys.postgresql.org and https://www.postgresql.org/message-id/20201120211304.GG16415@tamriel.snowman.net |
5 years ago |
|
|
ea1b99a661 |
Add 'noError' argument to encoding conversion functions.
With the 'noError' argument, you can try to convert a buffer without knowing the character boundaries beforehand. The functions now need to return the number of input bytes successfully converted. This is is a backwards-incompatible change, if you have created a custom encoding conversion with CREATE CONVERSION. This adds a check to pg_upgrade for that, refusing the upgrade if there are any user-defined encoding conversions. Custom conversions are very rare, there are no commonly used extensions that I know of that uses that feature. No other objects can depend on conversions, so if you do have one, you can fairly easily drop it before upgrading, and recreate it after the upgrade with an updated version. Add regression tests for built-in encoding conversions. This doesn't cover every conversion, but it covers all the internal functions in conv.c that are used to implement the conversions. Reviewed-by: John Naylor Discussion: https://www.postgresql.org/message-id/e7861509-3960-538a-9025-b75a61188e01%40iki.fi |
5 years ago |
|
|
ffd3391ea9 |
doc: Clarify use of ACCESS EXCLUSIVE lock in various sections
Some sections of the documentation used "exclusive lock" to describe that an ACCESS EXCLUSIVE lock is taken during a given operation. This can be confusing to the reader as ACCESS SHARE is allowed with an EXCLUSIVE lock is used, but that would not be the case with what is described on those parts of the documentation. Author: Greg Rychlewski Discussion: https://postgr.es/m/CAKemG7VptD=7fNWckFMsMVZL_zzvgDO6v2yVmQ+ZiBfc_06kCQ@mail.gmail.com Backpatch-through: 9.6 |
5 years ago |
|
|
4778826532 |
Ensure to send a prepare after we detect concurrent abort during decoding.
It is possible that while decoding a prepared transaction, it gets aborted concurrently via a ROLLBACK PREPARED command. In that case, we were skipping all the changes and directly sending Rollback Prepared when we find the same in WAL. However, the downstream has no idea of the GID of such a transaction. So, ensure to send prepare even when a concurrent abort is detected. Author: Ajin Cherian Reviewed-by: Markus Wanner, Amit Kapila Discussion: https://postgr.es/m/f82133c6-6055-b400-7922-97dae9f2b50b@enterprisedb.com |
5 years ago |
|
|
28b3e3905c |
Revert b6002a796
This removes "Add Result Cache executor node". It seems that something weird is going on with the tracking of cache hits and misses as highlighted by many buildfarm animals. It's not yet clear what the problem is as other parts of the plan indicate that the cache did work correctly, it's just the hits and misses that were being reported as 0. This is especially a bad time to have the buildfarm so broken, so reverting before too many more animals go red. Discussion: https://postgr.es/m/CAApHDvq_hydhfovm4=izgWs+C5HqEeRScjMbOgbpC-jRAeK3Yw@mail.gmail.com |
5 years ago |
|
|
b6002a796d |
Add Result Cache executor node
Here we add a new executor node type named "Result Cache". The planner can include this node type in the plan to have the executor cache the results from the inner side of parameterized nested loop joins. This allows caching of tuples for sets of parameters so that in the event that the node sees the same parameter values again, it can just return the cached tuples instead of rescanning the inner side of the join all over again. Internally, result cache uses a hash table in order to quickly find tuples that have been previously cached. For certain data sets, this can significantly improve the performance of joins. The best cases for using this new node type are for join problems where a large portion of the tuples from the inner side of the join have no join partner on the outer side of the join. In such cases, hash join would have to hash values that are never looked up, thus bloating the hash table and possibly causing it to multi-batch. Merge joins would have to skip over all of the unmatched rows. If we use a nested loop join with a result cache, then we only cache tuples that have at least one join partner on the outer side of the join. The benefits of using a parameterized nested loop with a result cache increase when there are fewer distinct values being looked up and the number of lookups of each value is large. Also, hash probes to lookup the cache can be much faster than the hash probe in a hash join as it's common that the result cache's hash table is much smaller than the hash join's due to result cache only caching useful tuples rather than all tuples from the inner side of the join. This variation in hash probe performance is more significant when the hash join's hash table no longer fits into the CPU's L3 cache, but the result cache's hash table does. The apparent "random" access of hash buckets with each hash probe can cause a poor L3 cache hit ratio for large hash tables. Smaller hash tables generally perform better. The hash table used for the cache limits itself to not exceeding work_mem * hash_mem_multiplier in size. We maintain a dlist of keys for this cache and when we're adding new tuples and realize we've exceeded the memory budget, we evict cache entries starting with the least recently used ones until we have enough memory to add the new tuples to the cache. For parameterized nested loop joins, we now consider using one of these result cache nodes in between the nested loop node and its inner node. We determine when this might be useful based on cost, which is primarily driven off of what the expected cache hit ratio will be. Estimating the cache hit ratio relies on having good distinct estimates on the nested loop's parameters. For now, the planner will only consider using a result cache for parameterized nested loop joins. This works for both normal joins and also for LATERAL type joins to subqueries. It is possible to use this new node for other uses in the future. For example, to cache results from correlated subqueries. However, that's not done here due to some difficulties obtaining a distinct estimation on the outer plan to calculate the estimated cache hit ratio. Currently we plan the inner plan before planning the outer plan so there is no good way to know if a result cache would be useful or not since we can't estimate the number of times the subplan will be called until the outer plan is generated. The functionality being added here is newly introducing a dependency on the return value of estimate_num_groups() during the join search. Previously, during the join search, we only ever needed to perform selectivity estimations. With this commit, we need to use estimate_num_groups() in order to estimate what the hit ratio on the result cache will be. In simple terms, if we expect 10 distinct values and we expect 1000 outer rows, then we'll estimate the hit ratio to be 99%. Since cache hits are very cheap compared to scanning the underlying nodes on the inner side of the nested loop join, then this will significantly reduce the planner's cost for the join. However, it's fairly easy to see here that things will go bad when estimate_num_groups() incorrectly returns a value that's significantly lower than the actual number of distinct values. If this happens then that may cause us to make use of a nested loop join with a result cache instead of some other join type, such as a merge or hash join. Our distinct estimations have been known to be a source of trouble in the past, so the extra reliance on them here could cause the planner to choose slower plans than it did previous to having this feature. Distinct estimations are also fairly hard to estimate accurately when several tables have been joined already or when a WHERE clause filters out a set of values that are correlated to the expressions we're estimating the number of distinct value for. For now, the costing we perform during query planning for result caches does put quite a bit of faith in the distinct estimations being accurate. When these are accurate then we should generally see faster execution times for plans containing a result cache. However, in the real world, we may find that we need to either change the costings to put less trust in the distinct estimations being accurate or perhaps even disable this feature by default. There's always an element of risk when we teach the query planner to do new tricks that it decides to use that new trick at the wrong time and causes a regression. Users may opt to get the old behavior by turning the feature off using the enable_resultcache GUC. Currently, this is enabled by default. It remains to be seen if we'll maintain that setting for the release. Additionally, the name "Result Cache" is the best name I could think of for this new node at the time I started writing the patch. Nobody seems to strongly dislike the name. A few people did suggest other names but no other name seemed to dominate in the brief discussion that there was about names. Let's allow the beta period to see if the current name pleases enough people. If there's some consensus on a better name, then we can change it before the release. Please see the 2nd discussion link below for the discussion on the "Result Cache" name. Author: David Rowley Reviewed-by: Andy Fan, Justin Pryzby, Zhihong Yu Tested-By: Konstantin Knizhnik Discussion: https://postgr.es/m/CAApHDvrPcQyQdWERGYWx8J%2B2DLUNgXu%2BfOSbQ1UscxrunyXyrQ%40mail.gmail.com Discussion: https://postgr.es/m/CAApHDvq=yQXr5kqhRviT2RhNKwToaWr9JAN5t+5_PzhuRJ3wvg@mail.gmail.com |
5 years ago |
|
|
3b0c647bbf |
Add a docs section for obsoleted and renamed functions and settings
The new appendix groups information on renamed or removed settings, commands, etc into an out-of-the-way part of the docs. The original id elements are retained in each subsection to ensure that the same filenames are produced for HTML docs. This prevents /current/ links on the web from breaking, and allows users of the web docs to follow links from old version pages to info on the changes in the new version. Prior to this change, a link to /current/ for renamed sections like the recovery.conf docs would just 404. Similarly if someone searched for recovery.conf they would find the pg11 docs, but there would be no /12/ or /current/ link, so they couldn't easily find out that it was removed in pg12 or how to adapt. Index entries are also added so that there's a breadcrumb trail for users to follow when they know the old name, but not what we changed it to. So a user who is trying to find out how to set standby_mode in PostgreSQL 12+, or where pg_resetxlog went, now has more chance of finding that information. Craig Ringer and Stephen Frost Reviewed-by: Euler Taveira Discussion: https://postgr.es/m/CAGRY4nzPNOyYQ_1-pWYToUVqQ0ThqP5jdURnJMZPm539fdizOg%40mail.gmail.com Backpatch-through: 10 |
5 years ago |
|
|
86dc90056d |
Rework planning and execution of UPDATE and DELETE.
This patch makes two closely related sets of changes: 1. For UPDATE, the subplan of the ModifyTable node now only delivers the new values of the changed columns (i.e., the expressions computed in the query's SET clause) plus row identity information such as CTID. ModifyTable must re-fetch the original tuple to merge in the old values of any unchanged columns. The core advantage of this is that the changed columns are uniform across all tables of an inherited or partitioned target relation, whereas the other columns might not be. A secondary advantage, when the UPDATE involves joins, is that less data needs to pass through the plan tree. The disadvantage of course is an extra fetch of each tuple to be updated. However, that seems to be very nearly free in context; even worst-case tests don't show it to add more than a couple percent to the total query cost. At some point it might be interesting to combine the re-fetch with the tuple access that ModifyTable must do anyway to mark the old tuple dead; but that would require a good deal of refactoring and it seems it wouldn't buy all that much, so this patch doesn't attempt it. 2. For inherited UPDATE/DELETE, instead of generating a separate subplan for each target relation, we now generate a single subplan that is just exactly like a SELECT's plan, then stick ModifyTable on top of that. To let ModifyTable know which target relation a given incoming row refers to, a tableoid junk column is added to the row identity information. This gets rid of the horrid hack that was inheritance_planner(), eliminating O(N^2) planning cost and memory consumption in cases where there were many unprunable target relations. Point 2 of course requires point 1, so that there is a uniform definition of the non-junk columns to be returned by the subplan. We can't insist on uniform definition of the row identity junk columns however, if we want to keep the ability to have both plain and foreign tables in a partitioning hierarchy. Since it wouldn't scale very far to have every child table have its own row identity column, this patch includes provisions to merge similar row identity columns into one column of the subplan result. In particular, we can merge the whole-row Vars typically used as row identity by FDWs into one column by pretending they are type RECORD. (It's still okay for the actual composite Datums to be labeled with the table's rowtype OID, though.) There is more that can be done to file down residual inefficiencies in this patch, but it seems to be committable now. FDW authors should note several API changes: * The argument list for AddForeignUpdateTargets() has changed, and so has the method it must use for adding junk columns to the query. Call add_row_identity_var() instead of manipulating the parse tree directly. You might want to reconsider exactly what you're adding, too. * PlanDirectModify() must now work a little harder to find the ForeignScan plan node; if the foreign table is part of a partitioning hierarchy then the ForeignScan might not be the direct child of ModifyTable. See postgres_fdw for sample code. * To check whether a relation is a target relation, it's no longer sufficient to compare its relid to root->parse->resultRelation. Instead, check it against all_result_relids or leaf_result_relids, as appropriate. Amit Langote and Tom Lane Discussion: https://postgr.es/m/CA+HiwqHpHdqdDn48yCEhynnniahH78rwcrv1rEX65-fsZGBOLQ@mail.gmail.com |
5 years ago |
|
|
055fee7eb4 |
Allow an alias to be attached to a JOIN ... USING
This allows something like
SELECT ... FROM t1 JOIN t2 USING (a, b, c) AS x
where x has the columns a, b, c and unlike a regular alias it does not
hide the range variables of the tables being joined t1 and t2.
Per SQL:2016 feature F404 "Range variable for common column names".
Reviewed-by: Vik Fearing <vik.fearing@2ndquadrant.com>
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/454638cf-d563-ab76-a585-2564428062af@2ndquadrant.com
|
5 years ago |
|
|
27e1f14563 |
Add support for asynchronous execution.
This implements asynchronous execution, which runs multiple parts of a non-parallel-aware Append concurrently rather than serially to improve performance when possible. Currently, the only node type that can be run concurrently is a ForeignScan that is an immediate child of such an Append. In the case where such ForeignScans access data on different remote servers, this would run those ForeignScans concurrently, and overlap the remote operations to be performed simultaneously, so it'll improve the performance especially when the operations involve time-consuming ones such as remote join and remote aggregation. We may extend this to other node types such as joins or aggregates over ForeignScans in the future. This also adds the support for postgres_fdw, which is enabled by the table-level/server-level option "async_capable". The default is false. Robert Haas, Kyotaro Horiguchi, Thomas Munro, and myself. This commit is mostly based on the patch proposed by Robert Haas, but also uses stuff from the patch proposed by Kyotaro Horiguchi and from the patch proposed by Thomas Munro. Reviewed by Kyotaro Horiguchi, Konstantin Knizhnik, Andrey Lepikhov, Movead Li, Thomas Munro, Justin Pryzby, and others. Discussion: https://postgr.es/m/CA%2BTgmoaXQEt4tZ03FtQhnzeDEMzBck%2BLrni0UWHVVgOTnA6C1w%40mail.gmail.com Discussion: https://postgr.es/m/CA%2BhUKGLBRyu0rHrDCMC4%3DRn3252gogyp1SjOgG8SEKKZv%3DFwfQ%40mail.gmail.com Discussion: https://postgr.es/m/20200228.170650.667613673625155850.horikyota.ntt%40gmail.com |
5 years ago |
|
|
91c5a8caaa |
Add errhint_plural() function and make use of it
Similar to existing errmsg_plural() and errdetail_plural(). Some errhint() calls hadn't received the proper plural treatment yet. |
5 years ago |
|
|
287d2a97c1 |
doc: Remove Cyrillic from unistr example
Not supported by PDF build right now, so let's do without it. |
5 years ago |
|
|
9f45631766 |
Doc: Use consistent terminology for tablesync slots.
At some places in the docs, we refer to them as tablesync slots and at other places as table synchronization slots. For consistency, we refer to them as table synchronization slots at all places. Author: Peter Smith Reviewed-by: Amit Kapila Discussion: https://postgr.es/m/CAHut+PvzYNKCeZ=kKBDkh3dw-r=2D3fk=nNc9SXSW=CZGk69xg@mail.gmail.com |
5 years ago |
|
|
6568cef26e |
Add support for --extension in pg_dump
When specified, only extensions matching the given pattern are included in dumps. Similarly to --table and --schema, when --strict-names is used, a perfect match is required. Also, like the two other options, this new option offers no guarantee that dependent objects have been dumped, so a restore may fail on a clean database. Tests are added in test_pg_dump/, checking after a set of positive and negative cases, with or without an extension's contents added to the dump generated. Author: Guillaume Lelarge Reviewed-by: David Fetter, Tom Lane, Michael Paquier, Asif Rehman, Julien Rouhaud Discussion: https://postgr.es/m/CAECtzeXOt4cnMU5+XMZzxBPJ_wu76pNy6HZKPRBL-j7yj1E4+g@mail.gmail.com |
5 years ago |
|
|
198b3716db
|
Improve PQtrace() output format
Transform the PQtrace output format from its ancient (and mostly useless) byte-level output format to a logical-message-level output, making it much more usable. This implementation allows the printing code to be written (as it indeed was) by looking at the protocol documentation, which gives more confidence that the three (docs, trace code and actual code) actually match. Author: 岩田 彩 (Aya Iwata) <iwata.aya@fujitsu.com> Reviewed-by: 綱川 貴之 (Takayuki Tsunakawa) <tsunakawa.takay@fujitsu.com> Reviewed-by: Kirk Jamison <k.jamison@fujitsu.com> Reviewed-by: Kyotaro Horiguchi <horikyota.ntt@gmail.com> Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us> Reviewed-by: 黒田 隼人 (Hayato Kuroda) <kuroda.hayato@fujitsu.com> Reviewed-by: "Nagaura, Ryohei" <nagaura.ryohei@jp.fujitsu.com> Reviewed-by: Ryo Matsumura <matsumura.ryo@fujitsu.com> Reviewed-by: Greg Nancarrow <gregn4422@gmail.com> Reviewed-by: Jim Doty <jdoty@pivotal.io> Reviewed-by: Álvaro Herrera <alvherre@alvh.no-ip.org> Discussion: https://postgr.es/m/71E660EB361DF14299875B198D4CE5423DE3FBA4@g01jpexmbkw25 |
5 years ago |
|
|
f64ea6dc5c |
Add a xid argument to the filter_prepare callback for output plugins.
Along with gid, this provides a different way to identify the transaction. The users that use xid in some way to prepare the transactions can use it to filter prepare transactions. The later commands COMMIT PREPARED or ROLLBACK PREPARED carries both identifiers, providing an output plugin the choice of what to use. Author: Markus Wanner Reviewed-by: Vignesh C, Amit Kapila Discussion: https://postgr.es/m/ee280000-7355-c4dc-e47b-2436e7be959c@enterprisedb.com |
5 years ago |
|
|
6d7a6feac4 |
Allow matching the DN of a client certificate for authentication
Currently we only recognize the Common Name (CN) of a certificate's subject to be matched against the user name. Thus certificates with subjects '/OU=eng/CN=fred' and '/OU=sales/CN=fred' will have the same connection rights. This patch provides an option to match the whole Distinguished Name (DN) instead of just the CN. On any hba line using client certificate identity, there is an option 'clientname' which can have values of 'DN' or 'CN'. The default is 'CN', the current procedure. The DN is matched against the RFC2253 formatted DN, which looks like 'CN=fred,OU=eng'. This facility of probably best used in conjunction with an ident map. Discussion: https://postgr.es/m/92e70110-9273-d93c-5913-0bccb6562740@dunslane.net Reviewed-By: Michael Paquier, Daniel Gustafsson, Jacob Champion |
5 years ago |
|
|
f37fec837c |
Add unistr function
This allows decoding a string with Unicode escape sequences. It is similar to Unicode escape strings, but offers some more flexibility. Author: Pavel Stehule <pavel.stehule@gmail.com> Reviewed-by: Asif Rehman <asifr.rehman@gmail.com> Discussion: https://www.postgresql.org/message-id/flat/CAFj8pRA5GnKT+gDVwbVRH2ep451H_myBt+NTz8RkYUARE9+qOQ@mail.gmail.com |
5 years ago |
|
|
b64654d6c4 |
doc: Define TLS as an acronym
Commit
|
5 years ago |
|
|
a4d75c86bf |
Extended statistics on expressions
Allow defining extended statistics on expressions, not just just on
simple column references. With this commit, expressions are supported
by all existing extended statistics kinds, improving the same types of
estimates. A simple example may look like this:
CREATE TABLE t (a int);
CREATE STATISTICS s ON mod(a,10), mod(a,20) FROM t;
ANALYZE t;
The collected statistics are useful e.g. to estimate queries with those
expressions in WHERE or GROUP BY clauses:
SELECT * FROM t WHERE mod(a,10) = 0 AND mod(a,20) = 0;
SELECT 1 FROM t GROUP BY mod(a,10), mod(a,20);
This introduces new internal statistics kind 'e' (expressions) which is
built automatically when the statistics object definition includes any
expressions. This represents single-expression statistics, as if there
was an expression index (but without the index maintenance overhead).
The statistics is stored in pg_statistics_ext_data as an array of
composite types, which is possible thanks to
|
5 years ago |
|
|
a14a0118a1 |
Add "pg_database_owner" default role.
Membership consists, implicitly, of the current database owner. Expect use in template databases. Once pg_database_owner has rights within a template, each owner of a database instantiated from that template will exercise those rights. Reviewed by John Naylor. Discussion: https://postgr.es/m/20201228043148.GA1053024@rfd.leadboat.com |
5 years ago |
|
|
ab596105b5 |
BRIN minmax-multi indexes
Adds BRIN opclasses similar to the existing minmax, except that instead of summarizing the page range into a single [min,max] range, the summary consists of multiple ranges and/or points, allowing gaps. This allows more efficient handling of data with poor correlation to physical location within the table and/or outlier values, for which the regular minmax opclassed tend to work poorly. It's possible to specify the number of values kept for each page range, either as a single point or an interval boundary. CREATE TABLE t (a int); CREATE INDEX ON t USING brin (a int4_minmax_multi_ops(values_per_range=16)); When building the summary, the values are combined into intervals with the goal to minimize the "covering" (sum of interval lengths), using a support procedure computing distance between two values. Bump catversion, due to various catalog changes. Author: Tomas Vondra <tomas.vondra@postgresql.org> Reviewed-by: Alvaro Herrera <alvherre@alvh.no-ip.org> Reviewed-by: Alexander Korotkov <aekorotkov@gmail.com> Reviewed-by: Sokolov Yura <y.sokolov@postgrespro.ru> Reviewed-by: John Naylor <john.naylor@enterprisedb.com> Discussion: https://postgr.es/m/c1138ead-7668-f0e1-0638-c3be3237e812@2ndquadrant.com Discussion: https://postgr.es/m/5d78b774-7e9c-c94e-12cf-fef51cc89b1a%402ndquadrant.com |
5 years ago |
|
|
77b88cd1bb |
BRIN bloom indexes
Adds a BRIN opclass using a Bloom filter to summarize the range. Indexes
using the new opclasses allow only equality queries (similar to hash
indexes), but that works fine for data like UUID, MAC addresses etc. for
which range queries are not very common. This also means the indexes
work for data that is not well correlated to physical location within
the table, or perhaps even entirely random (which is a common issue with
existing BRIN minmax opclasses).
It's possible to specify opclass parameters with the usual Bloom filter
parameters, i.e. the desired false-positive rate and the expected number
of distinct values per page range.
CREATE TABLE t (a int);
CREATE INDEX ON t
USING brin (a int4_bloom_ops(false_positive_rate = 0.05,
n_distinct_per_range = 100));
The opclasses do not operate on the indexed values directly, but compute
a 32-bit hash first, and the Bloom filter is built on the hash value.
Collisions should not be a huge issue though, as the number of distinct
values in a page ranges is usually fairly small.
Bump catversion, due to various catalog changes.
Author: Tomas Vondra <tomas.vondra@postgresql.org>
Reviewed-by: Alvaro Herrera <alvherre@alvh.no-ip.org>
Reviewed-by: Alexander Korotkov <aekorotkov@gmail.com>
Reviewed-by: Sokolov Yura <y.sokolov@postgrespro.ru>
Reviewed-by: Nico Williams <nico@cryptonector.com>
Reviewed-by: John Naylor <john.naylor@enterprisedb.com>
Discussion: https://postgr.es/m/c1138ead-7668-f0e1-0638-c3be3237e812@2ndquadrant.com
Discussion: https://postgr.es/m/5d78b774-7e9c-c94e-12cf-fef51cc89b1a%402ndquadrant.com
|
5 years ago |
|
|
a681e3c107 |
Support the old signature of BRIN consistent function
Commit |
5 years ago |
|
|
71f4c8c6f7
|
ALTER TABLE ... DETACH PARTITION ... CONCURRENTLY
Allow a partition be detached from its partitioned table without blocking concurrent queries, by running in two transactions and only requiring ShareUpdateExclusive in the partitioned table. Because it runs in two transactions, it cannot be used in a transaction block. This is the main reason to use dedicated syntax: so that users can choose to use the original mode if they need it. But also, it doesn't work when a default partition exists (because an exclusive lock would still need to be obtained on it, in order to change its partition constraint.) In case the second transaction is cancelled or a crash occurs, there's ALTER TABLE .. DETACH PARTITION .. FINALIZE, which executes the final steps. The main trick to make this work is the addition of column pg_inherits.inhdetachpending, initially false; can only be set true in the first part of this command. Once that is committed, concurrent transactions that use a PartitionDirectory will include or ignore partitions so marked: in optimizer they are ignored if the row is marked committed for the snapshot; in executor they are always included. As a result, and because of the way PartitionDirectory caches partition descriptors, queries that were planned before the detach will see the rows in the detached partition and queries that are planned after the detach, won't. A CHECK constraint is created that duplicates the partition constraint. This is probably not strictly necessary, and some users will prefer to remove it afterwards, but if the partition is re-attached to a partitioned table, the constraint needn't be rechecked. Author: Álvaro Herrera <alvherre@alvh.no-ip.org> Reviewed-by: Amit Langote <amitlangote09@gmail.com> Reviewed-by: Justin Pryzby <pryzby@telsasoft.com> Discussion: https://postgr.es/m/20200803234854.GA24158@alvherre.pgsql |
5 years ago |
|
|
650d623530
|
Document lock obtained during partition detach
On partition detach, we acquire a SHARE lock on all tables that
reference the partitioned table that we're detaching a partition from,
but failed to document this fact. My oversight in commit
|
5 years ago |
|
|
ad8305a43d |
Add DECLARE STATEMENT command to ECPG
This command declares a SQL identifier for a SQL statement to be used in other embedded SQL statements. The identifier is linked to a connection. Author: Hayato Kuroda <kuroda.hayato@fujitsu.com> Reviewed-by: Shawn Wang <shawn.wang.pg@gmail.com> Discussion: https://www.postgresql.org/message-id/flat/TY2PR01MB24438A52DB04E71D0E501452F5630@TY2PR01MB2443.jpnprd01.prod.outlook.com |
5 years ago |
|
|
5173e42892 |
doc: Fix typo
Reported-by: Erik Rijkers <er@xs4all.nl> |
5 years ago |
|
|
bbcc4eb2e0 |
Change checkpoint_completion_target default to 0.9
Common recommendations are that the checkpoint should be spread out as much as possible, provided we avoid having it take too long. This change updates the default to 0.9 (from 0.5) to match that recommendation. There was some debate about possibly removing the option entirely but it seems there may be some corner-cases where having it set much lower to try to force the checkpoint to be as fast as possible could result in fewer periods of time of reduced performance due to kernel flushing. General agreement is that the "spread more" is the preferred approach though and those who need to tune away from that value are much less common. Reviewed-By: Michael Paquier, Peter Eisentraut, Tom Lane, David Steele, Nathan Bossart Discussion: https://postgr.es/m/20201207175329.GM16415%40tamriel.snowman.net |
5 years ago |
|
|
e5595de03e |
Tidy up more loose ends related to configurable TOAST compression.
Change the default_toast_compression GUC to be an enum rather than
a string. Earlier, uncommitted versions of the patch supported using
CREATE ACCESS METHOD to add new compression methods to a running
system, but that idea was dropped before commit. So, we can simplify
the GUC handling as well, which has the nice side effect of improving
the error messages.
While updating the documentation to reflect the new GUC type, also
move it back to the right place in the list. I moved this while
revising what became commit
|
5 years ago |
|
|
49ab61f0bd |
Add date_bin function
Similar to date_trunc, but allows binning by an arbitrary interval rather than just full units. Author: John Naylor <john.naylor@enterprisedb.com> Reviewed-by: David Fetter <david@fetter.org> Reviewed-by: Isaac Morland <isaac.morland@gmail.com> Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us> Reviewed-by: Artur Zakirov <zaartur@gmail.com> Discussion: https://www.postgresql.org/message-id/flat/CACPNZCt4buQFRgy6DyjuZS-2aPDpccRkrJBmgUfwYc1KiaXYxg@mail.gmail.com |
5 years ago |
|
|
26acb54a13 |
Revert "Enable parallel SELECT for "INSERT INTO ... SELECT ..."."
To allow inserts in parallel-mode this feature has to ensure that all the constraints, triggers, etc. are parallel-safe for the partition hierarchy which is costly and we need to find a better way to do that. Additionally, we could have used existing cached information in some cases like indexes, domains, etc. to determine the parallel-safety. List of commits reverted, in reverse chronological order: |
5 years ago |
|
|
84007043fc |
Rename wait event WalrcvExit to WalReceiverExit.
Commit
|
5 years ago |
|
|
a6715af1e7 |
Add bit_count SQL function
This function for bit and bytea counts the set bits in the bit or byte string. Internally, we use the existing popcount functionality. For the name, after some discussion, we settled on bit_count, which also exists with this meaning in MySQL, Java, and Python. Author: David Fetter <david@fetter.org> Discussion: https://www.postgresql.org/message-id/flat/20201230105535.GJ13234@fetter.org |
5 years ago |
|
|
a5f002ad9a |
Use correct spelling of statistics kind
A couple error messages and comments used 'statistic kind', not the correct 'statistics kind'. Fix and backpatch all the way back to 10, where extended statistics were introduced. Backpatch-through: 10 |
5 years ago |
|
|
1e3e8b51bd |
Change the type of WalReceiverWaitStart wait event from Client to IPC.
Previously the type of this wait event was Client. But while this wait event is being reported, walreceiver process is waiting for the startup process to set initial data for streaming replication. It's not waiting for any activity on a socket connected to a user application or walsender. So this commit changes the type for WalReceiverWaitStart wait event to IPC. Author: Fujii Masao Reviewed-by: Kyotaro Horiguchi Discussion: https://postgr.es/m/cdacc27c-37ff-f1a4-20e2-ce19933abfcc@oss.nttdata.com |
5 years ago |
|
|
a1c649d889 |
Pass all scan keys to BRIN consistent function at once
This commit changes how we pass scan keys to BRIN consistent function. Instead of passing them one by one, we now pass all scan keys for a given attribute at once. That makes the consistent function a bit more complex, as it has to loop through the keys, but it does allow more elaborate opclasses that can use multiple keys to eliminate ranges much more effectively. The existing BRIN opclasses (minmax, inclusion) don't really benefit from this change. The primary purpose is to allow future opclases to benefit from seeing all keys at once. This does change the BRIN API, because the signature of the consistent function changes (a new parameter with number of scan keys). So this breaks existing opclasses, and will require supporting two variants of the code for different PostgreSQL versions. We've considered supporting two variants of the consistent, but we've decided not to do that. Firstly, there's another patch that moves handling of NULL values from the opclass, which means the opclasses need to be updated anyway. Secondly, we're not aware of any out-of-core BRIN opclasses, so it does not seem worth the extra complexity. Bump catversion, because of pg_proc changes. Author: Tomas Vondra <tomas.vondra@postgresql.org> Reviewed-by: Alvaro Herrera <alvherre@alvh.no-ip.org> Reviewed-by: Mark Dilger <hornschnorter@gmail.com> Reviewed-by: Alexander Korotkov <aekorotkov@gmail.com> Reviewed-by: John Naylor <john.naylor@enterprisedb.com> Reviewed-by: Nikita Glukhov <n.gluhov@postgrespro.ru> Discussion: https://postgr.es/m/c1138ead-7668-f0e1-0638-c3be3237e812@2ndquadrant.com |
5 years ago |
|
|
24f0e395ac |
docs: Fix omissions related to configurable TOAST compression.
Previously, the default_toast_compression GUC was not documented, and neither was pg_dump's new --no-toast-compression option. Justin Pryzby and Robert Haas Discussion: http://postgr.es/m/20210321235544.GD4203@telsasoft.com |
5 years ago |
|
|
61752afb26 |
Provide recovery_init_sync_method=syncfs.
Since commit
|
5 years ago |
|
|
bbe0a81db6 |
Allow configurable LZ4 TOAST compression.
There is now a per-column COMPRESSION option which can be set to pglz (the default, and the only option in up until now) or lz4. Or, if you like, you can set the new default_toast_compression GUC to lz4, and then that will be the default for new table columns for which no value is specified. We don't have lz4 support in the PostgreSQL code, so to use lz4 compression, PostgreSQL must be built --with-lz4. In general, TOAST compression means compression of individual column values, not the whole tuple, and those values can either be compressed inline within the tuple or compressed and then stored externally in the TOAST table, so those properties also apply to this feature. Prior to this commit, a TOAST pointer has two unused bits as part of the va_extsize field, and a compessed datum has two unused bits as part of the va_rawsize field. These bits are unused because the length of a varlena is limited to 1GB; we now use them to indicate the compression type that was used. This means we only have bit space for 2 more built-in compresison types, but we could work around that problem, if necessary, by introducing a new vartag_external value for any further types we end up wanting to add. Hopefully, it won't be too important to offer a wide selection of algorithms here, since each one we add not only takes more coding but also adds a build dependency for every packager. Nevertheless, it seems worth doing at least this much, because LZ4 gets better compression than PGLZ with less CPU usage. It's possible for LZ4-compressed datums to leak into composite type values stored on disk, just as it is for PGLZ. It's also possible for LZ4-compressed attributes to be copied into a different table via SQL commands such as CREATE TABLE AS or INSERT .. SELECT. It would be expensive to force such values to be decompressed, so PostgreSQL has never done so. For the same reasons, we also don't force recompression of already-compressed values even if the target table prefers a different compression method than was used for the source data. These architectural decisions are perhaps arguable but revisiting them is well beyond the scope of what seemed possible to do as part of this project. However, it's relatively cheap to recompress as part of VACUUM FULL or CLUSTER, so this commit adjusts those commands to do so, if the configured compression method of the table happens not to match what was used for some column value stored therein. Dilip Kumar. The original patches on which this work was based were written by Ildus Kurbangaliev, and those were patches were based on even earlier work by Nikita Glukhov, but the design has since changed very substantially, since allow a potentially large number of compression methods that could be added and dropped on a running system proved too problematic given some of the architectural issues mentioned above; the choice of which specific compression method to add first is now different; and a lot of the code has been heavily refactored. More recently, Justin Przyby helped quite a bit with testing and reviewing and this version also includes some code contributions from him. Other design input and review from Tomas Vondra, Álvaro Herrera, Andres Freund, Oleg Bartunov, Alexander Korotkov, and me. Discussion: http://postgr.es/m/20170907194236.4cefce96%40wp.localdomain Discussion: http://postgr.es/m/CAFiTN-uUpX3ck%3DK0mLEk-G_kUQY%3DSNOTeqdaNRR9FMdQrHKebw%40mail.gmail.com |
5 years ago |
|
|
be45be9c33 |
Implement GROUP BY DISTINCT
With grouping sets, it's possible that some of the grouping sets are
duplicate. This is especially common with CUBE and ROLLUP clauses. For
example GROUP BY CUBE (a,b), CUBE (b,c) is equivalent to
GROUP BY GROUPING SETS (
(a, b, c),
(a, b, c),
(a, b, c),
(a, b),
(a, b),
(a, b),
(a),
(a),
(a),
(c, a),
(c, a),
(c, a),
(c),
(b, c),
(b),
()
)
Some of the grouping sets are calculated multiple times, which is mostly
unnecessary. This commit implements a new GROUP BY DISTINCT feature, as
defined in the SQL standard, which eliminates the duplicate sets.
Author: Vik Fearing
Reviewed-by: Erik Rijkers, Georgios Kokolatos, Tomas Vondra
Discussion: https://postgr.es/m/bf3805a8-d7d1-ae61-fece-761b7ff41ecc@postgresfriends.org
|
5 years ago |
|
|
cd91de0d17 |
Remove temporary files after backend crash
After a crash of a backend using temporary files, the files used to be left behind, on the basis that it might be useful for debugging. But we don't have any reports of anyone actually doing that, and it means the disk usage may grow over time due to repeated backend failures (possibly even hitting ENOSPC). So this behavior is a bit unfortunate, and fixing it required either manual cleanup (deleting files, which is error-prone) or restart of the instance (i.e. service disruption). This implements automatic cleanup of temporary files, controled by a new GUC remove_temp_files_after_crash. By default the files are removed, but it can be disabled to restore the old behavior if needed. Author: Euler Taveira Reviewed-by: Tomas Vondra, Michael Paquier, Anastasia Lubennikova, Thomas Munro Discussion: https://postgr.es/m/CAH503wDKdYzyq7U-QJqGn%3DGm6XmoK%2B6_6xTJ-Yn5WSvoHLY1Ww%40mail.gmail.com |
5 years ago |
|
|
ed62d3737c |
Doc: Update description for parallel insert reloption.
Commit
|
5 years ago |
|
|
c8f78b6161 |
Add a new GUC and a reloption to enable inserts in parallel-mode.
Commit
|
5 years ago |