mirror of https://github.com/postgres/postgres
Tag:
Branch:
Tree:
344ac149cf
REL2_0B
REL6_4
REL6_5_PATCHES
REL7_0_PATCHES
REL7_1_STABLE
REL7_2_STABLE
REL7_3_STABLE
REL7_4_STABLE
REL8_0_STABLE
REL8_1_STABLE
REL8_2_STABLE
REL8_3_STABLE
REL8_4_STABLE
REL8_5_ALPHA1_BRANCH
REL8_5_ALPHA2_BRANCH
REL8_5_ALPHA3_BRANCH
REL9_0_ALPHA4_BRANCH
REL9_0_ALPHA5_BRANCH
REL9_0_STABLE
REL9_1_STABLE
REL9_2_STABLE
REL9_3_STABLE
REL9_4_STABLE
REL9_5_STABLE
REL9_6_STABLE
REL_10_STABLE
REL_11_STABLE
REL_12_STABLE
REL_13_STABLE
REL_14_STABLE
REL_15_STABLE
REL_16_STABLE
REL_17_STABLE
REL_18_STABLE
Release_1_0_3
WIN32_DEV
ecpg_big_bison
master
PG95-1_01
PG95-1_08
PG95-1_09
REL2_0
REL6_1
REL6_1_1
REL6_2
REL6_2_1
REL6_3
REL6_3_2
REL6_4_2
REL6_5
REL6_5_1
REL6_5_2
REL6_5_3
REL7_0
REL7_0_2
REL7_0_3
REL7_1
REL7_1_1
REL7_1_2
REL7_1_3
REL7_1_BETA
REL7_1_BETA2
REL7_1_BETA3
REL7_2
REL7_2_1
REL7_2_2
REL7_2_3
REL7_2_4
REL7_2_5
REL7_2_6
REL7_2_7
REL7_2_8
REL7_2_BETA1
REL7_2_BETA2
REL7_2_BETA3
REL7_2_BETA4
REL7_2_BETA5
REL7_2_RC1
REL7_2_RC2
REL7_3
REL7_3_1
REL7_3_10
REL7_3_11
REL7_3_12
REL7_3_13
REL7_3_14
REL7_3_15
REL7_3_16
REL7_3_17
REL7_3_18
REL7_3_19
REL7_3_2
REL7_3_20
REL7_3_21
REL7_3_3
REL7_3_4
REL7_3_5
REL7_3_6
REL7_3_7
REL7_3_8
REL7_3_9
REL7_4
REL7_4_1
REL7_4_10
REL7_4_11
REL7_4_12
REL7_4_13
REL7_4_14
REL7_4_15
REL7_4_16
REL7_4_17
REL7_4_18
REL7_4_19
REL7_4_2
REL7_4_20
REL7_4_21
REL7_4_22
REL7_4_23
REL7_4_24
REL7_4_25
REL7_4_26
REL7_4_27
REL7_4_28
REL7_4_29
REL7_4_3
REL7_4_30
REL7_4_4
REL7_4_5
REL7_4_6
REL7_4_7
REL7_4_8
REL7_4_9
REL7_4_BETA1
REL7_4_BETA2
REL7_4_BETA3
REL7_4_BETA4
REL7_4_BETA5
REL7_4_RC1
REL7_4_RC2
REL8_0_0
REL8_0_0BETA1
REL8_0_0BETA2
REL8_0_0BETA3
REL8_0_0BETA4
REL8_0_0BETA5
REL8_0_0RC1
REL8_0_0RC2
REL8_0_0RC3
REL8_0_0RC4
REL8_0_0RC5
REL8_0_1
REL8_0_10
REL8_0_11
REL8_0_12
REL8_0_13
REL8_0_14
REL8_0_15
REL8_0_16
REL8_0_17
REL8_0_18
REL8_0_19
REL8_0_2
REL8_0_20
REL8_0_21
REL8_0_22
REL8_0_23
REL8_0_24
REL8_0_25
REL8_0_26
REL8_0_3
REL8_0_4
REL8_0_5
REL8_0_6
REL8_0_7
REL8_0_8
REL8_0_9
REL8_1_0
REL8_1_0BETA1
REL8_1_0BETA2
REL8_1_0BETA3
REL8_1_0BETA4
REL8_1_0RC1
REL8_1_1
REL8_1_10
REL8_1_11
REL8_1_12
REL8_1_13
REL8_1_14
REL8_1_15
REL8_1_16
REL8_1_17
REL8_1_18
REL8_1_19
REL8_1_2
REL8_1_20
REL8_1_21
REL8_1_22
REL8_1_23
REL8_1_3
REL8_1_4
REL8_1_5
REL8_1_6
REL8_1_7
REL8_1_8
REL8_1_9
REL8_2_0
REL8_2_1
REL8_2_10
REL8_2_11
REL8_2_12
REL8_2_13
REL8_2_14
REL8_2_15
REL8_2_16
REL8_2_17
REL8_2_18
REL8_2_19
REL8_2_2
REL8_2_20
REL8_2_21
REL8_2_22
REL8_2_23
REL8_2_3
REL8_2_4
REL8_2_5
REL8_2_6
REL8_2_7
REL8_2_8
REL8_2_9
REL8_2_BETA1
REL8_2_BETA2
REL8_2_BETA3
REL8_2_RC1
REL8_3_0
REL8_3_1
REL8_3_10
REL8_3_11
REL8_3_12
REL8_3_13
REL8_3_14
REL8_3_15
REL8_3_16
REL8_3_17
REL8_3_18
REL8_3_19
REL8_3_2
REL8_3_20
REL8_3_21
REL8_3_22
REL8_3_23
REL8_3_3
REL8_3_4
REL8_3_5
REL8_3_6
REL8_3_7
REL8_3_8
REL8_3_9
REL8_3_BETA1
REL8_3_BETA2
REL8_3_BETA3
REL8_3_BETA4
REL8_3_RC1
REL8_3_RC2
REL8_4_0
REL8_4_1
REL8_4_10
REL8_4_11
REL8_4_12
REL8_4_13
REL8_4_14
REL8_4_15
REL8_4_16
REL8_4_17
REL8_4_18
REL8_4_19
REL8_4_2
REL8_4_20
REL8_4_21
REL8_4_22
REL8_4_3
REL8_4_4
REL8_4_5
REL8_4_6
REL8_4_7
REL8_4_8
REL8_4_9
REL8_4_BETA1
REL8_4_BETA2
REL8_4_RC1
REL8_4_RC2
REL8_5_ALPHA1
REL8_5_ALPHA2
REL8_5_ALPHA3
REL9_0_0
REL9_0_1
REL9_0_10
REL9_0_11
REL9_0_12
REL9_0_13
REL9_0_14
REL9_0_15
REL9_0_16
REL9_0_17
REL9_0_18
REL9_0_19
REL9_0_2
REL9_0_20
REL9_0_21
REL9_0_22
REL9_0_23
REL9_0_3
REL9_0_4
REL9_0_5
REL9_0_6
REL9_0_7
REL9_0_8
REL9_0_9
REL9_0_ALPHA4
REL9_0_ALPHA5
REL9_0_BETA1
REL9_0_BETA2
REL9_0_BETA3
REL9_0_BETA4
REL9_0_RC1
REL9_1_0
REL9_1_1
REL9_1_10
REL9_1_11
REL9_1_12
REL9_1_13
REL9_1_14
REL9_1_15
REL9_1_16
REL9_1_17
REL9_1_18
REL9_1_19
REL9_1_2
REL9_1_20
REL9_1_21
REL9_1_22
REL9_1_23
REL9_1_24
REL9_1_3
REL9_1_4
REL9_1_5
REL9_1_6
REL9_1_7
REL9_1_8
REL9_1_9
REL9_1_ALPHA1
REL9_1_ALPHA2
REL9_1_ALPHA3
REL9_1_ALPHA4
REL9_1_ALPHA5
REL9_1_BETA1
REL9_1_BETA2
REL9_1_BETA3
REL9_1_RC1
REL9_2_0
REL9_2_1
REL9_2_10
REL9_2_11
REL9_2_12
REL9_2_13
REL9_2_14
REL9_2_15
REL9_2_16
REL9_2_17
REL9_2_18
REL9_2_19
REL9_2_2
REL9_2_20
REL9_2_21
REL9_2_22
REL9_2_23
REL9_2_24
REL9_2_3
REL9_2_4
REL9_2_5
REL9_2_6
REL9_2_7
REL9_2_8
REL9_2_9
REL9_2_BETA1
REL9_2_BETA2
REL9_2_BETA3
REL9_2_BETA4
REL9_2_RC1
REL9_3_0
REL9_3_1
REL9_3_10
REL9_3_11
REL9_3_12
REL9_3_13
REL9_3_14
REL9_3_15
REL9_3_16
REL9_3_17
REL9_3_18
REL9_3_19
REL9_3_2
REL9_3_20
REL9_3_21
REL9_3_22
REL9_3_23
REL9_3_24
REL9_3_25
REL9_3_3
REL9_3_4
REL9_3_5
REL9_3_6
REL9_3_7
REL9_3_8
REL9_3_9
REL9_3_BETA1
REL9_3_BETA2
REL9_3_RC1
REL9_4_0
REL9_4_1
REL9_4_10
REL9_4_11
REL9_4_12
REL9_4_13
REL9_4_14
REL9_4_15
REL9_4_16
REL9_4_17
REL9_4_18
REL9_4_19
REL9_4_2
REL9_4_20
REL9_4_21
REL9_4_22
REL9_4_23
REL9_4_24
REL9_4_25
REL9_4_26
REL9_4_3
REL9_4_4
REL9_4_5
REL9_4_6
REL9_4_7
REL9_4_8
REL9_4_9
REL9_4_BETA1
REL9_4_BETA2
REL9_4_BETA3
REL9_4_RC1
REL9_5_0
REL9_5_1
REL9_5_10
REL9_5_11
REL9_5_12
REL9_5_13
REL9_5_14
REL9_5_15
REL9_5_16
REL9_5_17
REL9_5_18
REL9_5_19
REL9_5_2
REL9_5_20
REL9_5_21
REL9_5_22
REL9_5_23
REL9_5_24
REL9_5_25
REL9_5_3
REL9_5_4
REL9_5_5
REL9_5_6
REL9_5_7
REL9_5_8
REL9_5_9
REL9_5_ALPHA1
REL9_5_ALPHA2
REL9_5_BETA1
REL9_5_BETA2
REL9_5_RC1
REL9_6_0
REL9_6_1
REL9_6_10
REL9_6_11
REL9_6_12
REL9_6_13
REL9_6_14
REL9_6_15
REL9_6_16
REL9_6_17
REL9_6_18
REL9_6_19
REL9_6_2
REL9_6_20
REL9_6_21
REL9_6_22
REL9_6_23
REL9_6_24
REL9_6_3
REL9_6_4
REL9_6_5
REL9_6_6
REL9_6_7
REL9_6_8
REL9_6_9
REL9_6_BETA1
REL9_6_BETA2
REL9_6_BETA3
REL9_6_BETA4
REL9_6_RC1
REL_10_0
REL_10_1
REL_10_10
REL_10_11
REL_10_12
REL_10_13
REL_10_14
REL_10_15
REL_10_16
REL_10_17
REL_10_18
REL_10_19
REL_10_2
REL_10_20
REL_10_21
REL_10_22
REL_10_23
REL_10_3
REL_10_4
REL_10_5
REL_10_6
REL_10_7
REL_10_8
REL_10_9
REL_10_BETA1
REL_10_BETA2
REL_10_BETA3
REL_10_BETA4
REL_10_RC1
REL_11_0
REL_11_1
REL_11_10
REL_11_11
REL_11_12
REL_11_13
REL_11_14
REL_11_15
REL_11_16
REL_11_17
REL_11_18
REL_11_19
REL_11_2
REL_11_20
REL_11_21
REL_11_22
REL_11_3
REL_11_4
REL_11_5
REL_11_6
REL_11_7
REL_11_8
REL_11_9
REL_11_BETA1
REL_11_BETA2
REL_11_BETA3
REL_11_BETA4
REL_11_RC1
REL_12_0
REL_12_1
REL_12_10
REL_12_11
REL_12_12
REL_12_13
REL_12_14
REL_12_15
REL_12_16
REL_12_17
REL_12_18
REL_12_19
REL_12_2
REL_12_20
REL_12_21
REL_12_22
REL_12_3
REL_12_4
REL_12_5
REL_12_6
REL_12_7
REL_12_8
REL_12_9
REL_12_BETA1
REL_12_BETA2
REL_12_BETA3
REL_12_BETA4
REL_12_RC1
REL_13_0
REL_13_1
REL_13_10
REL_13_11
REL_13_12
REL_13_13
REL_13_14
REL_13_15
REL_13_16
REL_13_17
REL_13_18
REL_13_19
REL_13_2
REL_13_20
REL_13_21
REL_13_22
REL_13_23
REL_13_3
REL_13_4
REL_13_5
REL_13_6
REL_13_7
REL_13_8
REL_13_9
REL_13_BETA1
REL_13_BETA2
REL_13_BETA3
REL_13_RC1
REL_14_0
REL_14_1
REL_14_10
REL_14_11
REL_14_12
REL_14_13
REL_14_14
REL_14_15
REL_14_16
REL_14_17
REL_14_18
REL_14_19
REL_14_2
REL_14_20
REL_14_3
REL_14_4
REL_14_5
REL_14_6
REL_14_7
REL_14_8
REL_14_9
REL_14_BETA1
REL_14_BETA2
REL_14_BETA3
REL_14_RC1
REL_15_0
REL_15_1
REL_15_10
REL_15_11
REL_15_12
REL_15_13
REL_15_14
REL_15_15
REL_15_2
REL_15_3
REL_15_4
REL_15_5
REL_15_6
REL_15_7
REL_15_8
REL_15_9
REL_15_BETA1
REL_15_BETA2
REL_15_BETA3
REL_15_BETA4
REL_15_RC1
REL_15_RC2
REL_16_0
REL_16_1
REL_16_10
REL_16_11
REL_16_2
REL_16_3
REL_16_4
REL_16_5
REL_16_6
REL_16_7
REL_16_8
REL_16_9
REL_16_BETA1
REL_16_BETA2
REL_16_BETA3
REL_16_RC1
REL_17_0
REL_17_1
REL_17_2
REL_17_3
REL_17_4
REL_17_5
REL_17_6
REL_17_7
REL_17_BETA1
REL_17_BETA2
REL_17_BETA3
REL_17_RC1
REL_18_0
REL_18_1
REL_18_BETA1
REL_18_BETA2
REL_18_BETA3
REL_18_RC1
Release_1_0_2
Release_2_0
Release_2_0_0
release-6-3
${ noResults }
4289 Commits (344ac149cfdfc1efe1608f46f40f05f4af91c8eb)
| Author | SHA1 | Message | Date |
|---|---|---|---|
|
|
4c70887295 |
Revert "For inplace update, send nontransactional invalidations."
This reverts commit
|
1 year ago |
|
|
9a1c73636d |
Revert "WAL-log inplace update before revealing it to other sessions."
This reverts commit
|
1 year ago |
|
|
11e3f288f3 |
Unpin buffer before inplace update waits for an XID to end.
Commit
|
1 year ago |
|
|
acf49584e2 |
WAL-log inplace update before revealing it to other sessions.
A buffer lock won't stop a reader having already checked tuple
visibility. If a vac_update_datfrozenid() and then a crash happened
during inplace update of a relfrozenxid value, datfrozenxid could
overtake relfrozenxid. That could lead to "could not access status of
transaction" errors. Back-patch to v12 (all supported versions). In
v14 and earlier, this also back-patches the assertion removal from
commit
|
1 year ago |
|
|
e3914bd136 |
For inplace update, send nontransactional invalidations.
The inplace update survives ROLLBACK. The inval didn't, so another backend's DDL could then update the row without incorporating the inplace update. In the test this fixes, a mix of CREATE INDEX and ALTER TABLE resulted in a table with an index, yet relhasindex=f. That is a source of index corruption. Back-patch to v12 (all supported versions). The back branch versions don't change WAL, because those branches just added end-of-recovery SIResetAll(). All branches change the ABI of extern function PrepareToInvalidateCacheTuple(). No PGXN extension calls that, and there's no apparent use case in extensions. Reviewed by Nitin Motiani and (in earlier versions) Andres Freund. Discussion: https://postgr.es/m/20240523000548.58.nmisch@google.com |
1 year ago |
|
|
dca68242a8 |
At end of recovery, reset all sinval-managed caches.
An inplace update's invalidation messages are part of its transaction's commit record. However, the update survives even if its transaction aborts or we stop recovery before replaying its transaction commit. After recovery, a backend that started in recovery could update the row without incorporating the inplace update. That could result in a table with an index, yet relhasindex=f. That is a source of index corruption. This bulk invalidation avoids the functional consequences. A future change can fix the !RecoveryInProgress() scenario without changing the WAL format. Back-patch to v17 - v12 (all supported versions). v18 will instead add invalidations to WAL. Discussion: https://postgr.es/m/20240618152349.7f.nmisch@google.com |
1 year ago |
|
|
ad24b75659 |
Stop reading uninitialized memory in heap_inplace_lock().
Stop computing a never-used value. This removes the read; the read had
no functional implications. Back-patch to v12, like commit
|
1 year ago |
|
|
efe706e273 |
Fix fetching default toast value during decoding of in-progress transactions.
During logical decoding of in-progress transactions, we perform the toast table scan while fetching the default toast value for an attribute. We forgot to initialize the flag during this scan to indicate that the system table scan is in progress. We need this flag to ensure that during logical decoding we never directly access the tableam or heap APIs because we check for concurrent aborts only in systable_* APIs. Reported-by: Alexander Lakhin Author: Takeshi Ideriha, Hou Zhijie Reviewed-by: Amit Kapila, Hou Zhijie Backpatch-through: 14 Discussion: https://postgr.es/m/18641-6687273b7f15269d@postgresql.org |
1 year ago |
|
|
5f15107875 |
Fix race condition in COMMIT PREPARED causing orphaned 2PC files
COMMIT PREPARED removes on-disk 2PC files near its end, but the state
checked if a file is on-disk or not gets read from shared memory while
not holding the two-phase state lock.
Because of that, there was a small window where a second backend doing a
PREPARE TRANSACTION could reuse the GlobalTransaction put back into the
2PC free list by the COMMIT PREPARED, overwriting the "ondisk" flag read
afterwards by the COMMIT PREPARED to decide if its on-disk two-phase
state file should be removed, preventing the file deletion.
This commit fixes this issue so as the "ondisk" flag in the
GlobalTransaction is read while holding the two-phase state lock, not
from shared memory after its entry has been added to the free list.
Orphaned two-phase state files flushed to disk after a checkpoint are
discarded at the beginning of recovery. However, a truncation of
pg_xact/ would make the startup process issue a FATAL when it cannot
read the SLRU page holding the state of the transaction whose 2PC file
was orphaned, which is a necessary step to decide if the 2PC file should
be removed or not. Removing manually the file would be necessary in
this case.
Issue introduced by
|
1 year ago |
|
|
f51b34b3ed |
For inplace update durability, make heap_update() callers wait.
The previous commit fixed some ways of losing an inplace update. It
remained possible to lose one when a backend working toward a
heap_update() copied a tuple into memory just before inplace update of
that tuple. In catalogs eligible for inplace update, use LOCKTAG_TUPLE
to govern admission to the steps of copying an old tuple, modifying it,
and issuing heap_update(). This includes MERGE commands. To avoid
changing most of the pg_class DDL, don't require LOCKTAG_TUPLE when
holding a relation lock sufficient to exclude inplace updaters.
Back-patch to v12 (all supported versions). In v13 and v12, "UPDATE
pg_class" or "UPDATE pg_database" can still lose an inplace update. The
v14+ UPDATE fix needs commit
|
1 year ago |
|
|
82c2d9e022 |
Fix data loss at inplace update after heap_update().
As previously-added tests demonstrated, heap_inplace_update() could instead update an unrelated tuple of the same catalog. It could lose the update. Losing relhasindex=t was a source of index corruption. Inplace-updating commands like VACUUM will now wait for heap_update() commands like GRANT TABLE and GRANT DATABASE. That isn't ideal, but a long-running GRANT already hurts VACUUM progress more just by keeping an XID running. The VACUUM will behave like a DELETE or UPDATE waiting for the uncommitted change. For implementation details, start at the systable_inplace_update_begin() header comment and README.tuplock. Back-patch to v12 (all supported versions). In back branches, retain a deprecated heap_inplace_update(), for extensions. Reported by Smolkin Grigory. Reviewed by Nitin Motiani, (in earlier versions) Heikki Linnakangas, and (in earlier versions) Alexander Lakhin. Discussion: https://postgr.es/m/CAMp+ueZQz3yDk7qg42hk6-9gxniYbp-=bG2mgqecErqR5gGGOA@mail.gmail.com |
1 year ago |
|
|
b49013f2e8 |
Allow _h_indexbuild() to be interrupted.
When we are building a hash index that is large enough to need pre-sorting (larger than either maintenance_work_mem or NBuffers), the initial sorting phase is interruptible, but the insertion phase wasn't. Add the missing CHECK_FOR_INTERRUPTS(). Per bug #18616 from Alexander Lakhin. Back-patch to all supported branches. Pavel Borisov Discussion: https://postgr.es/m/18616-acbb9e5caf41e964@postgresql.org |
1 year ago |
|
|
5f851b8bd0 |
Revert "Allow parallel workers to cope with a newly-created session user ID."
This reverts commit |
1 year ago |
|
|
97380d4803 |
Allow parallel workers to cope with a newly-created session user ID.
Parallel workers failed after a sequence like BEGIN; CREATE USER foo; SET SESSION AUTHORIZATION foo; because check_session_authorization could not see the uncommitted pg_authid row for "foo". This is because we ran RestoreGUCState() in a separate transaction using an ordinary just-created snapshot. The same disease afflicts any other GUC that requires catalog lookups and isn't forgiving about the lookups failing. To fix, postpone RestoreGUCState() into the worker's main transaction after we've set up a snapshot duplicating the leader's. This affects check_transaction_isolation and check_transaction_deferrable, which think they should only run during transaction start. Make them act like check_transaction_read_only, which already knows it should silently accept the value when InitializingParallelWorker. Per bug #18545 from Andrey Rachitskiy. Back-patch to all supported branches, because this has been wrong for awhile. Discussion: https://postgr.es/m/18545-feba138862f19aaa@postgresql.org |
1 year ago |
|
|
45ce054c02 |
Ensure vacuum removes all visibly dead tuples older than OldestXmin
If vacuum fails to remove a tuple with xmax older than VacuumCutoffs->OldestXmin and younger than GlobalVisState->maybe_needed, it will loop infinitely in lazy_scan_prune(), which compares tuples' visibility information to OldestXmin. Starting in version 14, which uses GlobalVisState for visibility testing during pruning, it is possible for GlobalVisState->maybe_needed to precede OldestXmin if maybe_needed is forced to go backward while vacuum is running. This can happen if a disconnected standby with a running transaction older than VacuumCutoffs->OldestXmin reconnects to the primary after vacuum initially calculates GlobalVisState and OldestXmin. Fix this by having vacuum always remove tuples older than OldestXmin during pruning. This is okay because the standby won't replay the tuple removal until the tuple is removable. Thus, the worst that can happen is a recovery conflict. Fixes BUG# 17257 Back-patched in versions 14-17 Author: Melanie Plageman Reviewed-by: Noah Misch, Peter Geoghegan, Robert Haas, Andres Freund, and Heikki Linnakangas Discussion: https://postgr.es/m/CAAKRu_Y_NJzF4-8gzTTeaOuUL3CcGoXPjXcAHbTTygT8AyVqag%40mail.gmail.com |
1 year ago |
|
|
9dbf8ab48c |
Fix MVCC bug with prepared xact with subxacts on standby
We did not recover the subtransaction IDs of prepared transactions when starting a hot standby from a shutdown checkpoint. As a result, such subtransactions were considered as aborted, rather than in-progress. That would lead to hint bits being set incorrectly, and the subtransactions suddenly becoming visible to old snapshots when the prepared transaction was committed. To fix, update pg_subtrans with prepared transactions's subxids when starting hot standby from a shutdown checkpoint. The snapshots taken from that state need to be marked as "suboverflowed", so that we also check the pg_subtrans. Backport to all supported versions. Discussion: https://www.postgresql.org/message-id/6b852e98-2d49-4ca1-9e95-db419a2696e0@iki.fi |
2 years ago |
|
|
4c8e00ae9a |
Fix bugs in MultiXact truncation
1. TruncateMultiXact() performs the SLRU truncations in a critical
section. Deleting the SLRU segments calls ForwardSyncRequest(), which
will try to compact the request queue if it's full
(CompactCheckpointerRequestQueue()). That in turn allocates memory,
which is not allowed in a critical section. Backtrace:
TRAP: failed Assert("CritSectionCount == 0 || (context)->allowInCritSection"), File: "../src/backend/utils/mmgr/mcxt.c", Line: 1353, PID: 920981
postgres: autovacuum worker template0(ExceptionalCondition+0x6e)[0x560a501e866e]
postgres: autovacuum worker template0(+0x5dce3d)[0x560a50217e3d]
postgres: autovacuum worker template0(ForwardSyncRequest+0x8e)[0x560a4ffec95e]
postgres: autovacuum worker template0(RegisterSyncRequest+0x2b)[0x560a50091eeb]
postgres: autovacuum worker template0(+0x187b0a)[0x560a4fdc2b0a]
postgres: autovacuum worker template0(SlruDeleteSegment+0x101)[0x560a4fdc2ab1]
postgres: autovacuum worker template0(TruncateMultiXact+0x2fb)[0x560a4fdbde1b]
postgres: autovacuum worker template0(vac_update_datfrozenxid+0x4b3)[0x560a4febd2f3]
postgres: autovacuum worker template0(+0x3adf66)[0x560a4ffe8f66]
postgres: autovacuum worker template0(AutoVacWorkerMain+0x3ed)[0x560a4ffe7c2d]
postgres: autovacuum worker template0(+0x3b1ead)[0x560a4ffecead]
postgres: autovacuum worker template0(+0x3b620e)[0x560a4fff120e]
postgres: autovacuum worker template0(+0x3b3fbb)[0x560a4ffeefbb]
postgres: autovacuum worker template0(+0x2f724e)[0x560a4ff3224e]
/lib/x86_64-linux-gnu/libc.so.6(+0x27c8a)[0x7f62cc642c8a]
/lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0x85)[0x7f62cc642d45]
postgres: autovacuum worker template0(_start+0x21)[0x560a4fd16f31]
To fix, bail out in CompactCheckpointerRequestQueue() without doing
anything, if it's called in a critical section. That covers the above
call path, as well as any other similar cases where
RegisterSyncRequest might be called in a critical section.
2. After fixing that, another problem became apparent: Autovacuum
process doing that truncation can deadlock with the checkpointer
process. TruncateMultiXact() sets "MyProc->delayChkptFlags |=
DELAY_CHKPT_START". If the sync request queue is full and cannot be
compacted, the process will repeatedly sleep and retry, until there is
room in the queue. However, if the checkpointer is trying to start a
checkpoint at the same time, and is waiting for the DELAY_CHKPT_START
processes to finish, the queue will never shrink.
More concretely, the autovacuum process is stuck here:
#0 0x00007fc934926dc3 in epoll_wait () from /lib/x86_64-linux-gnu/libc.so.6
#1 0x000056220b24348b in WaitEventSetWaitBlock (set=0x56220c2e4b50, occurred_events=0x7ffe7856d040, nevents=1, cur_timeout=<optimized out>) at ../src/backend/storage/ipc/latch.c:1570
#2 WaitEventSetWait (set=0x56220c2e4b50, timeout=timeout@entry=10, occurred_events=<optimized out>, occurred_events@entry=0x7ffe7856d040, nevents=nevents@entry=1,
wait_event_info=wait_event_info@entry=150994949) at ../src/backend/storage/ipc/latch.c:1516
#3 0x000056220b243224 in WaitLatch (latch=<optimized out>, latch@entry=0x0, wakeEvents=wakeEvents@entry=40, timeout=timeout@entry=10, wait_event_info=wait_event_info@entry=150994949)
at ../src/backend/storage/ipc/latch.c:538
#4 0x000056220b26cf46 in RegisterSyncRequest (ftag=ftag@entry=0x7ffe7856d0a0, type=type@entry=SYNC_FORGET_REQUEST, retryOnError=true) at ../src/backend/storage/sync/sync.c:614
#5 0x000056220af9db0a in SlruInternalDeleteSegment (ctl=ctl@entry=0x56220b7beb60 <MultiXactMemberCtlData>, segno=segno@entry=11350) at ../src/backend/access/transam/slru.c:1495
#6 0x000056220af9dab1 in SlruDeleteSegment (ctl=ctl@entry=0x56220b7beb60 <MultiXactMemberCtlData>, segno=segno@entry=11350) at ../src/backend/access/transam/slru.c:1566
#7 0x000056220af98e1b in PerformMembersTruncation (oldestOffset=<optimized out>, newOldestOffset=<optimized out>) at ../src/backend/access/transam/multixact.c:3006
#8 TruncateMultiXact (newOldestMulti=newOldestMulti@entry=3221225472, newOldestMultiDB=newOldestMultiDB@entry=4) at ../src/backend/access/transam/multixact.c:3201
#9 0x000056220b098303 in vac_truncate_clog (frozenXID=749, minMulti=<optimized out>, lastSaneFrozenXid=749, lastSaneMinMulti=3221225472) at ../src/backend/commands/vacuum.c:1917
#10 vac_update_datfrozenxid () at ../src/backend/commands/vacuum.c:1760
#11 0x000056220b1c3f76 in do_autovacuum () at ../src/backend/postmaster/autovacuum.c:2550
#12 0x000056220b1c2c3d in AutoVacWorkerMain (startup_data=<optimized out>, startup_data_len=<optimized out>) at ../src/backend/postmaster/autovacuum.c:1569
and the checkpointer is stuck here:
#0 0x00007fc9348ebf93 in clock_nanosleep () from /lib/x86_64-linux-gnu/libc.so.6
#1 0x00007fc9348fe353 in nanosleep () from /lib/x86_64-linux-gnu/libc.so.6
#2 0x000056220b40ecb4 in pg_usleep (microsec=microsec@entry=10000) at ../src/port/pgsleep.c:50
#3 0x000056220afb43c3 in CreateCheckPoint (flags=flags@entry=108) at ../src/backend/access/transam/xlog.c:7098
#4 0x000056220b1c6e86 in CheckpointerMain (startup_data=<optimized out>, startup_data_len=<optimized out>) at ../src/backend/postmaster/checkpointer.c:464
To fix, add AbsorbSyncRequests() to the loops where the checkpointer
waits for DELAY_CHKPT_START or DELAY_CHKPT_COMPLETE operations to
finish.
Backpatch to v14. Before that, SLRU deletion didn't call
RegisterSyncRequest, which avoided this failure. I'm not sure if there
are other similar scenarios on older versions, but we haven't had
any such reports.
Discussion: https://www.postgresql.org/message-id/ccc66933-31c1-4f6a-bf4b-45fef0d4f22e@iki.fi
|
2 years ago |
|
|
e4a55378f7 |
Fix insertion of SP-GiST REDIRECT tuples during REINDEX CONCURRENTLY.
Reconstruction of an SP-GiST index by REINDEX CONCURRENTLY may insert some REDIRECT tuples. This will typically happen in a transaction that lacks an XID, which leads either to assertion failure in spgFormDeadTuple or to insertion of a REDIRECT tuple with zero xid. The latter's not good either, since eventually VACUUM will apply GlobalVisTestIsRemovableXid() to the zero xid, resulting in either an assertion failure or a garbage answer. In practice, since REINDEX CONCURRENTLY locks out index scans till it's done, it doesn't matter whether it inserts REDIRECTs or PLACEHOLDERs; and likewise it doesn't matter how soon VACUUM reduces such a REDIRECT to a PLACEHOLDER. So in non-assert builds there's no observable problem here, other than perhaps a little index bloat. But it's not behaving as intended. To fix, remove the failing Assert in spgFormDeadTuple, acknowledging that we might sometimes insert a zero XID; and guard VACUUM's GlobalVisTestIsRemovableXid() call with a test for valid XID, ensuring that we'll reduce such a REDIRECT the first time VACUUM sees it. (Versions before v14 use TransactionIdPrecedes here, which won't fail on zero xid, so they really have no bug at all in non-assert builds.) Another solution could be to not create REDIRECTs at all during REINDEX CONCURRENTLY, making the relevant code paths treat that case like index build (which likewise knows that no concurrent index scans can be happening). That would allow restoring the Assert in spgFormDeadTuple, but we'd still need the VACUUM change because redirection tuples with zero xid may be out there already. But there doesn't seem to be a nice way for spginsert() to tell that it's being called in REINDEX CONCURRENTLY without some API changes, so we'll leave that as a possible future improvement. In HEAD, also rename the SpGistState.myXid field to redirectXid, which seems less misleading (since it might not in fact be our transaction's XID) and is certainly less uninformatively generic. Per bug #18499 from Alexander Lakhin. Back-patch to all supported branches. Discussion: https://postgr.es/m/18499-8a519c280f956480@postgresql.org |
2 years ago |
|
|
d881f6fdf5 |
Clamp result of MultiXactMemberFreezeThreshold
The purpose of the function is to reduce the effective autovacuum_multixact_freeze_max_age if the multixact members SLRU is approaching wraparound, to make multixid freezing more aggressive. The returned value should therefore never be greater than plain autovacuum_multixact_freeze_max_age. Reviewed-by: Robert Haas Discussion: https://www.postgresql.org/message-id/85fb354c-f89f-4d47-b3a2-3cbd461c90a3@iki.fi Backpatch-through: 12, all supported versions |
2 years ago |
|
|
ad23af83da |
Update nbits_set in brin_bloom_union
Properly update the number of bits set in the bitmap after merging the filters in brin_bloom_union. This is mostly harmless, as the counter is used only in the output function, which means pageinspect may show incorrect information about the BRIN summary. The counter does not affect correctness. Discovered while adding a regression test comparing indexes built with and without parallelism. The parallel index builds exercise the union procedure when merging results from workers, which is otherwise very hard to do in a test. Which is why this went unnoticed until now. Backpatch through 14, where the BRIN bloom opclasses were introduced. Backpatch-through: 14 Discussion: https://postgr.es/m/1df00a66-db5a-4e66-809a-99b386a06d86%40enterprisedb.com |
2 years ago |
|
|
09f09884c1 |
Promote assertion about !ReindexIsProcessingIndex to runtime error.
When this assertion was installed (in commit
|
2 years ago |
|
|
f120c08724 |
Fix locking when fixing an incomplete split of a GIN internal page
ginFinishSplit() expects the caller to hold an exclusive lock on the buffer, but when finishing an earlier "leftover" incomplete split of an internal page, the caller held a shared lock. That caused an assertion failure in MarkBufferDirty(). Without assertions, it could lead to corruption if two backends tried to complete the split at the same time. On master, add a test case using the new injection point facility. Report and analysis by Fei Changhong. Backpatch the fix to all supported versions. Reviewed-by: Fei Changhong, Michael Paquier Discussion: https://www.postgresql.org/message-id/tencent_A3CE810F59132D8E230475A5F0F7A08C8307@qq.com |
2 years ago |
|
|
b685b41cf0 |
Add try_index_open(), conditional variant of index_open()
try_index_open() is able to open an index if its relkind fits, except that it would return NULL instead of generated an error if the relation does not exist. This new routine will be used by an upcoming patch to make REINDEX on partitioned relations more robust when an index in a partition tree is dropped. Extracted from a larger patch by the same author. Author: Fei Changhong Discussion: https://postgr.es/m/tencent_6A52106095ACDE55333E3AD33F304C0C3909@qq.com Backpatch-through: 14 |
2 years ago |
|
|
5f6ec27a64 |
lwlock: Fix quadratic behavior with very long wait lists
Until now LWLockDequeueSelf() sequentially searched the list of waiters to see
if the current proc is still is on the list of waiters, or has already been
removed. In extreme workloads, where the wait lists are very long, this leads
to a quadratic behavior. #backends iterating over a list #backends
long. Additionally, the likelihood of needing to call LWLockDequeueSelf() in
the first place also increases with the increased length of the wait queue, as
it becomes more likely that a lock is released while waiting for the wait list
lock, which is held for longer during lock release.
Due to the exponential back-off in perform_spin_delay() this is surprisingly
hard to detect. We should make that easier, e.g. by adding a wait event around
the pg_usleep() - but that's a separate patch.
The fix is simple - track whether a proc is currently waiting in the wait list
or already removed but waiting to be woken up in PGPROC->lwWaiting.
In some workloads with a lot of clients contending for a small number of
lwlocks (e.g. WALWriteLock), the fix can substantially increase throughput.
This has been originally fixed for 16~ with
|
2 years ago |
|
|
375f441bd1 |
Avoid trying to fetch metapage of an SPGist partitioned index.
This is necessary when spgcanreturn() is invoked on a partitioned
index, and the failure might be reachable in other scenarios as
well. The rest of what spgGetCache() does is perfectly sensible
for a partitioned index, so we should allow it to go through.
I think the main takeaway from this is that we lack sufficient test
coverage for non-btree partitioned indexes. Therefore, I added
simple test cases for brin and gin as well as spgist (hash and
gist AMs were covered already in indexing.sql).
Per bug #18256 from Alexander Lakhin. Although the known test case
only fails since v16 (
|
2 years ago |
|
|
0cfd3ddfe0 |
Prevent tuples to be marked as dead in subtransactions on standbys
Dead tuples are ignored and are not marked as dead during recovery, as
it can lead to MVCC issues on a standby because its xmin may not match
with the primary. This information is tracked by a field called
"xactStartedInRecovery" in the transaction state data, switched on when
starting a transaction in recovery.
Unfortunately, this information was not correctly tracked when starting
a subtransaction, because the transaction state used for the
subtransaction did not update "xactStartedInRecovery" based on the state
of its parent. This would cause index scans done in subtransactions to
return inconsistent data, depending on how the xmin of the primary
and/or the standby evolved.
This is broken since the introduction of hot standby in
|
2 years ago |
|
|
c49b6cab1e |
Fix compilation on Windows with WAL_DEBUG
This has been broken since
|
2 years ago |
|
|
59c62a21f2 |
Fix assertions with RI triggers in heap_update and heap_delete.
If the tuple being updated is not visible to the crosscheck snapshot, we return TM_Updated but the assertions would not hold in that case. Move them to before the cross-check. Fixes bug #17893. Backpatch to all supported versions. Author: Alexander Lakhin Backpatch-through: 12 Discussion: https://www.postgresql.org/message-id/17893-35847009eec517b5%40postgresql.org |
2 years ago |
|
|
d900e74e05 |
Don't release index root page pin in ginFindParents().
It's clearly stated in the comments that ginFindParents() must keep the pin on the index's root page that's associated with the topmost GinBtreeStack item. However, the code path for the case that the desired downlink has been pushed down to the next index level ignored this proviso, and would release the pin anyway if we were still examining the root level. That led to an assertion failure or "buffer NNNN is not owned by resource owner" error later, when we try to release the pin again at the end of the insertion. This is quite hard to reproduce, since it can only happen if an index root page split occurs concurrently with our own insertion. Thanks to Jeff Janes for finding a test case that triggers it often enough to allow investigation. This has been there since the beginning of GIN, so back-patch to all supported branches. Discussion: https://postgr.es/m/CAMkU=1yCAKtv86dMrD__Ja-7KzjE=uMeKX8y__cx5W-OEWy2ow@mail.gmail.com |
2 years ago |
|
|
33185d3042 |
Ensure we use the correct spelling of "ensure"
We seem to have accidentally used "insure" in a few places. Correct that. Author: Peter Smith Discussion: https://postgr.es/m/CAHut+Pv0biqrhA3pMhu40aDsj343mTsD75khKnHsLqR8P04f=Q@mail.gmail.com Backpatch-through: 12, oldest supported version |
2 years ago |
|
|
59fc39c0d5 |
Enlarge assertion in bloom_init() for false_positive_rate
false_positive_rate is a parameter that can be set with the bloom
opclass in BRIN, and setting it to a value of exactly 0.25 would trigger
an assertion in the first INSERT done on the index with value set.
The assertion changed here relied on BLOOM_{MIN|MAX}_FALSE_POSITIVE_RATE
that are somewhat arbitrary values, and specifying an out-of-range value
would also trigger a failure when defining such an index. So, as-is,
the assertion was just doubling on the min-max check of the reloption.
This is now enlarged to check that it is a correct percentage value,
instead, based on a suggestion by Tom Lane.
Author: Alexander Lakhin
Reviewed-by: Tom Lane, Shihao Zhong
Discussion: https://postgr.es/m/17969-a6c54de48026d694@postgresql.org
Backpatch-through: 14
|
2 years ago |
|
|
57354242c0 |
doc: 1-byte varlena headers can be used for user PLAIN storage
This also updates some C comments. Reported-by: suchithjn22@gmail.com Discussion: https://postgr.es/m/167336599095.2667301.15497893107226841625@wrigleys.postgresql.org Author: Laurenz Albe (doc patch) Backpatch-through: 11 |
2 years ago |
|
|
0a7b183fdc |
Diagnose !indisvalid in more SQL functions.
pgstatindex failed with ERRCODE_DATA_CORRUPTED, of the "can't-happen" class XX. The other functions succeeded on an empty index; they might have malfunctioned if the failed index build left torn I/O or other complex state. Report an ERROR in statistics functions pgstatindex, pgstatginindex, pgstathashindex, and pgstattuple. Report DEBUG1 and skip all index I/O in maintenance functions brin_desummarize_range, brin_summarize_new_values, brin_summarize_range, and gin_clean_pending_list. Back-patch to v11 (all supported versions). Discussion: https://postgr.es/m/20231001195309.a3@google.com |
2 years ago |
|
|
0fa73c5cd0 |
Fix minmax-multi distance for extreme interval values
When calculating distance for interval values, the code mostly mimicked interval_mi, i.e. it built a new interval value for the difference. That however does not work for sufficiently distant interval values, when the difference overflows the interval range. Instead, we can calculate the distance directly, without constructing the intermediate (and unnecessary) interval value. Backpatch to 14, where minmax-multi indexes were introduced. Reported-by: Dean Rasheed Reviewed-by: Ashutosh Bapat, Dean Rasheed Backpatch-through: 14 Discussion: https://postgr.es/m/eef0ea8c-4aaa-8d0d-027f-58b1f35dd170@enterprisedb.com |
2 years ago |
|
|
52c934cc1f |
Fix minmax-multi on infinite date/timestamp values
Make sure that infinite values in date/timestamp columns are treated as if in infinite distance. Infinite values should not be merged with other values, leaving them as outliers. The code however returned distance 0 in this case, so that infinite values were merged first. While this does not break the index (i.e. it still produces correct query results), it may make it much less efficient. We don't need explicit handling of infinite date/timestamp values when calculating distances, because those values are represented as extreme but regular values (e.g. INT64_MIN/MAX for the timestamp type). We don't need an exact distance, just a value that is much larger than distanced between regular values. With the added cast to double values, we can simply subtract the values. The regression test queries a value in the "gap" and checks the range was properly eliminated by the BRIN index. This only affects minmax-multi indexes on timestamp/date columns with infinite values, which is not very common in practice. The affected indexes may need to be rebuilt. Backpatch to 14, where minmax-multi indexes were introduced. Reported-by: Ashutosh Bapat Reviewed-by: Ashutosh Bapat, Dean Rasheed Backpatch-through: 14 Discussion: https://postgr.es/m/eef0ea8c-4aaa-8d0d-027f-58b1f35dd170@enterprisedb.com |
2 years ago |
|
|
d1740e169d |
Fix calculation in brin_minmax_multi_distance_date
When calculating the distance between date values, make sure to subtract them in the right order, i.e. (larger - smaller). The distance is used to determine which values to merge, and is expected to be a positive value. The code unfortunately did the subtraction in the opposite order, i.e. (smaller - larger), thus producing negative values and merging values the most distant values first. The resulting index is correct (i.e. produces correct results), but may be significantly less efficient. This affects all minmax-multi indexes on date columns. Backpatch to 14, where minmax-multi indexes were introduced. Reported-by: Ashutosh Bapat Reviewed-by: Ashutosh Bapat, Dean Rasheed Backpatch-through: 14 Discussion: https://postgr.es/m/eef0ea8c-4aaa-8d0d-027f-58b1f35dd170@enterprisedb.com |
2 years ago |
|
|
90c4da6d43 |
Fix overflow when calculating timestamp distance in BRIN
When calculating distances for timestamp values for BRIN minmax-multi indexes, we need to be careful about overflows for extreme values. If the value overflows into a negative value, the index may be inefficient. The new regression test checks this for the timestamp type by adding a table with enough values to force range compaction/merging. The values are close to min/max, which means a risk of overflow. Fixed by converting the int64 values to double first, before calculating the distance. This prevents the overflow. We may lose some precision, of course, but that's good enough. In the worst case we build a slightly less efficient index, but for large distances this won't matter. This only affects minmax-multi indexes on timestamp columns, with ranges containing values sufficiently distant to cause an overflow. That seems like a fairly rare case in practice. Backpatch to 14, where minmax-multi indexes were introduced. Reported-by: Ashutosh Bapat Reviewed-by: Ashutosh Bapat, Dean Rasheed Backpatch-through: 14 Discussion: https://postgr.es/m/eef0ea8c-4aaa-8d0d-027f-58b1f35dd170@enterprisedb.com |
2 years ago |
|
|
6615bb95af |
Fix bug in GenericXLogFinish().
Mark the buffers dirty before writing WAL. Discussion: https://postgr.es/m/25104133-7df8-cae3-b9a2-1c0aaa1c094a@iki.fi Reviewed-by: Heikki Linnakangas Backpatch-through: 11 |
2 years ago |
|
|
baeb8542c1 |
Fail hard on out-of-memory failures in xlogreader.c
This commit changes the WAL reader routines so as a FATAL for the backend or exit(FAILURE) for the frontend is triggered if an allocation for a WAL record decode fails in walreader.c, rather than treating this case as bogus data, which would be equivalent to the end of WAL. The key is to avoid palloc_extended(MCXT_ALLOC_NO_OOM) in walreader.c, relying on plain palloc() calls. The previous behavior could make WAL replay finish too early than it should. For example, crash recovery finishing earlier may corrupt clusters because not all the WAL available locally was replayed to ensure a consistent state. Out-of-memory failures would show up randomly depending on the memory pressure on the host, but one simple case would be to generate a large record, then replay this record after downsizing a host, as Ethan Mertz originally reported. This relies on |
2 years ago |
|
|
41486c4aae |
Fix btmarkpos/btrestrpos array key wraparound bug.
nbtree's mark/restore processing failed to correctly handle an edge case involving array key advancement and related search-type scan key state. Scans with ScalarArrayScalarArrayOpExpr quals requiring mark/restore processing (for a merge join) could incorrectly conclude that an affected array/scan key must not have advanced during the time between marking and restoring the scan's position. As a result of all this, array key handling within btrestrpos could skip a required call to _bt_preprocess_keys(). This confusion allowed later primitive index scans to overlook tuples matching the true current array keys. The scan's search-type scan keys would still have spurious values corresponding to the final array element(s) -- not values matching the first/now-current array element(s). To fix, remember that "array key wraparound" has taken place during the ongoing btrescan in a flag variable stored in the scan's state, and use that information at the point where btrestrpos decides if another call to _bt_preprocess_keys is required. Oversight in commit |
2 years ago |
|
|
c33d42d2e9 |
Fix typo in src/backend/access/transam/README.
|
2 years ago |
|
|
747cef5a58 |
Fix another bug in parent page splitting during GiST index build.
Yet another bug in the ilk of commits |
2 years ago |
|
|
3d413c5a76 |
Fix edge-case for xl_tot_len broken by bae868ca.
|
2 years ago |
|
|
3ce3b53d76 |
Don't trust unvalidated xl_tot_len.
xl_tot_len comes first in a WAL record. Usually we don't trust it to be
the true length until we've validated the record header. If the record
header was split across two pages, previously we wouldn't do the
validation until after we'd already tried to allocate enough memory to
hold the record, which was bad because it might actually be garbage
bytes from a recycled WAL file, so we could try to allocate a lot of
memory. Release 15 made it worse.
Since
|
2 years ago |
|
|
10323f140f |
Fix COMMIT/ROLLBACK AND CHAIN in the presence of subtransactions.
In older branches, COMMIT/ROLLBACK AND CHAIN failed to propagate
the current transaction's properties to the new transaction if
there was any open subtransaction (unreleased savepoint).
Instead, some previous transaction's properties would be restored.
This is because the "if (s->chain)" check in CommitTransactionCommand
examined the wrong instance of the "chain" flag and falsely
concluded that it didn't need to save transaction properties.
Our regression tests would have noticed this, except they used
identical transaction properties for multiple tests in a row,
so that the faulty behavior was not distinguishable from correct
behavior.
Commit
|
2 years ago |
|
|
78df27e2ff |
Fix GiST README's explanation of the NSN cross-check.
The text got the condition backwards, it's "NSN > LSN", not "NSN < LSN". While we're at it, expand it a little for clarity. Reviewed-by: Daniel Gustafsson Discussion: https://www.postgresql.org/message-id/4cb46e18-e688-524a-0f73-b1f03ed5d6ee@iki.fi |
2 years ago |
|
|
2f13e8d9ec |
Make recovery report error message when invalid page header is found.
Commit |
2 years ago |
|
|
7f4515a58e |
Cache by-reference missing values in a long lived context
Attribute missing values might be needed past the lifetime of the tuple descriptors from which they are extracted. To avoid possibly using pointers for by-reference values which might thus be left dangling, we cache a datumCopy'd version of the datum in the TopMemoryContext. Since we first search for the value this only needs to be done once per session for any such value. Original complaint from Tom Lane, idea for mitigation by Andrew Dunstan, tweaked by Tom Lane. Backpatch to version 11 where missing values were introduced. Discussion: https://postgr.es/m/1306569.1687978174@sss.pgh.pa.us |
2 years ago |
|
|
763d26205a |
Fix indentation in twophase.c
This has been missed in
|
3 years ago |
|
|
442749100d |
Fix recovery of 2PC transaction during crash recovery
A crash in the middle of a checkpoint with some two-phase state data
already flushed to disk by this checkpoint could cause a follow-up crash
recovery to recover twice the same transaction, once from what has been
found in pg_twophase/ at the beginning of recovery and a second time
when replaying its corresponding record.
This would lead to FATAL failures in the startup process during
recovery, where the same transaction would have a state recovered twice
instead of once:
LOG: recovering prepared transaction 731 from shared memory
LOG: recovering prepared transaction 731 from shared memory
FATAL: lock ExclusiveLock on object 731/0/0 is already held
This issue is fixed by skipping the addition of any 2PC state coming
from a record whose equivalent 2PC state file has already been loaded in
TwoPhaseState at the beginning of recovery by restoreTwoPhaseData(),
which is OK as long as the system has not reached a consistent state.
The timing to get a messed up recovery processing is very racy, and
would very unlikely happen. The thread that has reported the issue has
demonstrated the bug using injection points to force a PANIC in the
middle of a checkpoint.
Issue introduced in
|
3 years ago |