|
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
|
*
|
|
|
|
|
* ipci.c
|
|
|
|
|
* POSTGRES inter-process communication initialization code.
|
|
|
|
|
*
|
|
|
|
|
* Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
|
|
|
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
|
|
|
|
*
|
|
|
|
|
*
|
|
|
|
|
* IDENTIFICATION
|
|
|
|
|
* src/backend/storage/ipc/ipci.c
|
|
|
|
|
*
|
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
|
*/
|
|
|
|
|
#include "postgres.h"
|
|
|
|
|
|
|
|
|
|
#include "access/clog.h"
|
Keep track of transaction commit timestamps
Transactions can now set their commit timestamp directly as they commit,
or an external transaction commit timestamp can be fed from an outside
system using the new function TransactionTreeSetCommitTsData(). This
data is crash-safe, and truncated at Xid freeze point, same as pg_clog.
This module is disabled by default because it causes a performance hit,
but can be enabled in postgresql.conf requiring only a server restart.
A new test in src/test/modules is included.
Catalog version bumped due to the new subdirectory within PGDATA and a
couple of new SQL functions.
Authors: Álvaro Herrera and Petr Jelínek
Reviewed to varying degrees by Michael Paquier, Andres Freund, Robert
Haas, Amit Kapila, Fujii Masao, Jaime Casanova, Simon Riggs, Steven
Singer, Peter Eisentraut
11 years ago
|
|
|
#include "access/commit_ts.h"
|
|
|
|
|
#include "access/heapam.h"
|
|
|
|
|
#include "access/multixact.h"
|
|
|
|
|
#include "access/nbtree.h"
|
|
|
|
|
#include "access/subtrans.h"
|
|
|
|
|
#include "access/syncscan.h"
|
|
|
|
|
#include "access/transam.h"
|
|
|
|
|
#include "access/twophase.h"
|
|
|
|
|
#include "access/xlogprefetcher.h"
|
|
|
|
|
#include "access/xlogrecovery.h"
|
|
|
|
|
#include "commands/async.h"
|
|
|
|
|
#include "miscadmin.h"
|
|
|
|
|
#include "pgstat.h"
|
|
|
|
|
#include "postmaster/autovacuum.h"
|
Allow background workers to be started dynamically.
There is a new API, RegisterDynamicBackgroundWorker, which allows
an ordinary user backend to register a new background writer during
normal running. This means that it's no longer necessary for all
background workers to be registered during processing of
shared_preload_libraries, although the option of registering workers
at that time remains available.
When a background worker exits and will not be restarted, the
slot previously used by that background worker is automatically
released and becomes available for reuse. Slots used by background
workers that are configured for automatic restart can't (yet) be
released without shutting down the system.
This commit adds a new source file, bgworker.c, and moves some
of the existing control logic for background workers there.
Previously, there was little enough logic that it made sense to
keep everything in postmaster.c, but not any more.
This commit also makes the worker_spi contrib module into an
extension and adds a new function, worker_spi_launch, which can
be used to demonstrate the new facility.
13 years ago
|
|
|
#include "postmaster/bgworker_internals.h"
|
|
|
|
|
#include "postmaster/bgwriter.h"
|
|
|
|
|
#include "postmaster/postmaster.h"
|
|
|
|
|
#include "postmaster/walsummarizer.h"
|
|
|
|
|
#include "replication/logicallauncher.h"
|
|
|
|
|
#include "replication/origin.h"
|
|
|
|
|
#include "replication/slot.h"
|
|
|
|
|
#include "replication/walreceiver.h"
|
|
|
|
|
#include "replication/walsender.h"
|
|
|
|
|
#include "storage/bufmgr.h"
|
|
|
|
|
#include "storage/dsm.h"
|
Introduce the dynamic shared memory registry.
Presently, the most straightforward way for a shared library to use
shared memory is to request it at server startup via a
shmem_request_hook, which requires specifying the library in
shared_preload_libraries. Alternatively, the library can create a
dynamic shared memory (DSM) segment, but absent a shared location
to store the segment's handle, other backends cannot use it. This
commit introduces a registry for DSM segments so that these other
backends can look up existing segments with a library-specified
string. This allows libraries to easily use shared memory without
needing to request it at server startup.
The registry is accessed via the new GetNamedDSMSegment() function.
This function handles allocating the segment and initializing it
via a provided callback. If another backend already created and
initialized the segment, it simply attaches the segment.
GetNamedDSMSegment() locks the registry appropriately to ensure
that only one backend initializes the segment and that all other
backends just attach it.
The registry itself is comprised of a dshash table that stores the
DSM segment handles keyed by a library-specified string.
Reviewed-by: Michael Paquier, Andrei Lepikhov, Nikita Malakhov, Robert Haas, Bharath Rupireddy, Zhang Mingli, Amul Sul
Discussion: https://postgr.es/m/20231205034647.GA2705267%40nathanxps13
2 years ago
|
|
|
#include "storage/dsm_registry.h"
|
|
|
|
|
#include "storage/ipc.h"
|
|
|
|
|
#include "storage/pg_shmem.h"
|
|
|
|
|
#include "storage/pmsignal.h"
|
Implement genuine serializable isolation level.
Until now, our Serializable mode has in fact been what's called Snapshot
Isolation, which allows some anomalies that could not occur in any
serialized ordering of the transactions. This patch fixes that using a
method called Serializable Snapshot Isolation, based on research papers by
Michael J. Cahill (see README-SSI for full references). In Serializable
Snapshot Isolation, transactions run like they do in Snapshot Isolation,
but a predicate lock manager observes the reads and writes performed and
aborts transactions if it detects that an anomaly might occur. This method
produces some false positives, ie. it sometimes aborts transactions even
though there is no anomaly.
To track reads we implement predicate locking, see storage/lmgr/predicate.c.
Whenever a tuple is read, a predicate lock is acquired on the tuple. Shared
memory is finite, so when a transaction takes many tuple-level locks on a
page, the locks are promoted to a single page-level lock, and further to a
single relation level lock if necessary. To lock key values with no matching
tuple, a sequential scan always takes a relation-level lock, and an index
scan acquires a page-level lock that covers the search key, whether or not
there are any matching keys at the moment.
A predicate lock doesn't conflict with any regular locks or with another
predicate locks in the normal sense. They're only used by the predicate lock
manager to detect the danger of anomalies. Only serializable transactions
participate in predicate locking, so there should be no extra overhead for
for other transactions.
Predicate locks can't be released at commit, but must be remembered until
all the transactions that overlapped with it have completed. That means that
we need to remember an unbounded amount of predicate locks, so we apply a
lossy but conservative method of tracking locks for committed transactions.
If we run short of shared memory, we overflow to a new "pg_serial" SLRU
pool.
We don't currently allow Serializable transactions in Hot Standby mode.
That would be hard, because even read-only transactions can cause anomalies
that wouldn't otherwise occur.
Serializable isolation mode now means the new fully serializable level.
Repeatable Read gives you the old Snapshot Isolation level that we have
always had.
Kevin Grittner and Dan Ports, reviewed by Jeff Davis, Heikki Linnakangas and
Anssi Kääriäinen
15 years ago
|
|
|
#include "storage/predicate.h"
|
|
|
|
|
#include "storage/proc.h"
|
|
|
|
|
#include "storage/procarray.h"
|
|
|
|
|
#include "storage/procsignal.h"
|
|
|
|
|
#include "storage/sinvaladt.h"
|
|
|
|
|
#include "storage/spin.h"
|
Split up guc.c for better build speed and ease of maintenance.
guc.c has grown to be one of our largest .c files, making it
a bottleneck for compilation. It's also acquired a bunch of
knowledge that'd be better kept elsewhere, because of our not
very good habit of putting variable-specific check hooks here.
Hence, split it up along these lines:
* guc.c itself retains just the core GUC housekeeping mechanisms.
* New file guc_funcs.c contains the SET/SHOW interfaces and some
SQL-accessible functions for GUC manipulation.
* New file guc_tables.c contains the data arrays that define the
built-in GUC variables, along with some already-exported constant
tables.
* GUC check/assign/show hook functions are moved to the variable's
home module, whenever that's clearly identifiable. A few hard-
to-classify hooks ended up in commands/variable.c, which was
already a home for miscellaneous GUC hook functions.
To avoid cluttering a lot more header files with #include "guc.h",
I also invented a new header file utils/guc_hooks.h and put all
the GUC hook functions' declarations there, regardless of their
originating module. That allowed removal of #include "guc.h"
from some existing headers. The fallout from that (hopefully
all caught here) demonstrates clearly why such inclusions are
best minimized: there are a lot of files that, for example,
were getting array.h at two or more levels of remove, despite
not having any connection at all to GUCs in themselves.
There is some very minor code beautification here, such as
renaming a couple of inconsistently-named hook functions
and improving some comments. But mostly this just moves
code from point A to point B and deals with the ensuing
needs for #include adjustments and exporting a few functions
that previously weren't exported.
Patch by me, per a suggestion from Andres Freund; thanks also
to Michael Paquier for the idea to invent guc_funcs.c.
Discussion: https://postgr.es/m/587607.1662836699@sss.pgh.pa.us
3 years ago
|
|
|
#include "utils/guc.h"
|
|
|
|
|
#include "utils/snapmgr.h"
|
|
|
|
|
#include "utils/wait_event.h"
|
|
|
|
|
|
|
|
|
|
/* GUCs */
|
|
|
|
|
int shared_memory_type = DEFAULT_SHARED_MEMORY_TYPE;
|
|
|
|
|
|
|
|
|
|
shmem_startup_hook_type shmem_startup_hook = NULL;
|
|
|
|
|
|
|
|
|
|
static Size total_addin_request = 0;
|
|
|
|
|
|
|
|
|
|
static void CreateOrAttachShmemStructs(void);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* RequestAddinShmemSpace
|
|
|
|
|
* Request that extra shmem space be allocated for use by
|
|
|
|
|
* a loadable module.
|
|
|
|
|
*
|
|
|
|
|
* This may only be called via the shmem_request_hook of a library that is
|
|
|
|
|
* loaded into the postmaster via shared_preload_libraries. Calls from
|
|
|
|
|
* elsewhere will fail.
|
|
|
|
|
*/
|
|
|
|
|
void
|
|
|
|
|
RequestAddinShmemSpace(Size size)
|
|
|
|
|
{
|
|
|
|
|
if (!process_shmem_requests_in_progress)
|
|
|
|
|
elog(FATAL, "cannot request additional shared memory outside shmem_request_hook");
|
|
|
|
|
total_addin_request = add_size(total_addin_request, size);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* CalculateShmemSize
|
|
|
|
|
* Calculates the amount of shared memory and number of semaphores needed.
|
|
|
|
|
*
|
|
|
|
|
* If num_semaphores is not NULL, it will be set to the number of semaphores
|
|
|
|
|
* required.
|
|
|
|
|
*/
|
|
|
|
|
Size
|
|
|
|
|
CalculateShmemSize(int *num_semaphores)
|
|
|
|
|
{
|
|
|
|
|
Size size;
|
|
|
|
|
int numSemas;
|
|
|
|
|
|
|
|
|
|
/* Compute number of semaphores we'll need */
|
|
|
|
|
numSemas = ProcGlobalSemas();
|
|
|
|
|
numSemas += SpinlockSemas();
|
|
|
|
|
|
|
|
|
|
/* Return the number of semaphores if requested by the caller */
|
|
|
|
|
if (num_semaphores)
|
|
|
|
|
*num_semaphores = numSemas;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Size of the Postgres shared-memory block is estimated via moderately-
|
|
|
|
|
* accurate estimates for the big hogs, plus 100K for the stuff that's too
|
|
|
|
|
* small to bother with estimating.
|
|
|
|
|
*
|
|
|
|
|
* We take some care to ensure that the total size request doesn't
|
|
|
|
|
* overflow size_t. If this gets through, we don't need to be so careful
|
|
|
|
|
* during the actual allocation phase.
|
|
|
|
|
*/
|
|
|
|
|
size = 100000;
|
|
|
|
|
size = add_size(size, PGSemaphoreShmemSize(numSemas));
|
|
|
|
|
size = add_size(size, SpinlockSemaSize());
|
|
|
|
|
size = add_size(size, hash_estimate_size(SHMEM_INDEX_SIZE,
|
|
|
|
|
sizeof(ShmemIndexEnt)));
|
|
|
|
|
size = add_size(size, dsm_estimate_size());
|
Introduce the dynamic shared memory registry.
Presently, the most straightforward way for a shared library to use
shared memory is to request it at server startup via a
shmem_request_hook, which requires specifying the library in
shared_preload_libraries. Alternatively, the library can create a
dynamic shared memory (DSM) segment, but absent a shared location
to store the segment's handle, other backends cannot use it. This
commit introduces a registry for DSM segments so that these other
backends can look up existing segments with a library-specified
string. This allows libraries to easily use shared memory without
needing to request it at server startup.
The registry is accessed via the new GetNamedDSMSegment() function.
This function handles allocating the segment and initializing it
via a provided callback. If another backend already created and
initialized the segment, it simply attaches the segment.
GetNamedDSMSegment() locks the registry appropriately to ensure
that only one backend initializes the segment and that all other
backends just attach it.
The registry itself is comprised of a dshash table that stores the
DSM segment handles keyed by a library-specified string.
Reviewed-by: Michael Paquier, Andrei Lepikhov, Nikita Malakhov, Robert Haas, Bharath Rupireddy, Zhang Mingli, Amul Sul
Discussion: https://postgr.es/m/20231205034647.GA2705267%40nathanxps13
2 years ago
|
|
|
size = add_size(size, DSMRegistryShmemSize());
|
|
|
|
|
size = add_size(size, BufferShmemSize());
|
|
|
|
|
size = add_size(size, LockShmemSize());
|
|
|
|
|
size = add_size(size, PredicateLockShmemSize());
|
|
|
|
|
size = add_size(size, ProcGlobalShmemSize());
|
|
|
|
|
size = add_size(size, XLogPrefetchShmemSize());
|
|
|
|
|
size = add_size(size, VarsupShmemSize());
|
|
|
|
|
size = add_size(size, XLOGShmemSize());
|
|
|
|
|
size = add_size(size, XLogRecoveryShmemSize());
|
|
|
|
|
size = add_size(size, CLOGShmemSize());
|
|
|
|
|
size = add_size(size, CommitTsShmemSize());
|
|
|
|
|
size = add_size(size, SUBTRANSShmemSize());
|
|
|
|
|
size = add_size(size, TwoPhaseShmemSize());
|
|
|
|
|
size = add_size(size, BackgroundWorkerShmemSize());
|
|
|
|
|
size = add_size(size, MultiXactShmemSize());
|
|
|
|
|
size = add_size(size, LWLockShmemSize());
|
|
|
|
|
size = add_size(size, ProcArrayShmemSize());
|
|
|
|
|
size = add_size(size, BackendStatusShmemSize());
|
|
|
|
|
size = add_size(size, SInvalShmemSize());
|
|
|
|
|
size = add_size(size, PMSignalShmemSize());
|
|
|
|
|
size = add_size(size, ProcSignalShmemSize());
|
|
|
|
|
size = add_size(size, CheckpointerShmemSize());
|
|
|
|
|
size = add_size(size, AutoVacuumShmemSize());
|
|
|
|
|
size = add_size(size, ReplicationSlotsShmemSize());
|
|
|
|
|
size = add_size(size, ReplicationOriginShmemSize());
|
|
|
|
|
size = add_size(size, WalSndShmemSize());
|
|
|
|
|
size = add_size(size, WalRcvShmemSize());
|
|
|
|
|
size = add_size(size, WalSummarizerShmemSize());
|
|
|
|
|
size = add_size(size, PgArchShmemSize());
|
|
|
|
|
size = add_size(size, ApplyLauncherShmemSize());
|
|
|
|
|
size = add_size(size, BTreeShmemSize());
|
|
|
|
|
size = add_size(size, SyncScanShmemSize());
|
|
|
|
|
size = add_size(size, AsyncShmemSize());
|
|
|
|
|
size = add_size(size, StatsShmemSize());
|
|
|
|
|
size = add_size(size, WaitEventExtensionShmemSize());
|
|
|
|
|
#ifdef EXEC_BACKEND
|
|
|
|
|
size = add_size(size, ShmemBackendArraySize());
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
/* include additional requested shmem from preload libraries */
|
|
|
|
|
size = add_size(size, total_addin_request);
|
|
|
|
|
|
|
|
|
|
/* might as well round it off to a multiple of a typical page size */
|
|
|
|
|
size = add_size(size, 8192 - (size % 8192));
|
|
|
|
|
|
|
|
|
|
return size;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#ifdef EXEC_BACKEND
|
|
|
|
|
/*
|
|
|
|
|
* AttachSharedMemoryStructs
|
|
|
|
|
* Initialize a postmaster child process's access to shared memory
|
|
|
|
|
* structures.
|
|
|
|
|
*
|
|
|
|
|
* In !EXEC_BACKEND mode, we inherit everything through the fork, and this
|
|
|
|
|
* isn't needed.
|
|
|
|
|
*/
|
|
|
|
|
void
|
|
|
|
|
AttachSharedMemoryStructs(void)
|
|
|
|
|
{
|
|
|
|
|
/* InitProcess must've been called already */
|
|
|
|
|
Assert(MyProc != NULL);
|
|
|
|
|
Assert(IsUnderPostmaster);
|
|
|
|
|
|
|
|
|
|
CreateOrAttachShmemStructs();
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Now give loadable modules a chance to set up their shmem allocations
|
|
|
|
|
*/
|
|
|
|
|
if (shmem_startup_hook)
|
|
|
|
|
shmem_startup_hook();
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* CreateSharedMemoryAndSemaphores
|
|
|
|
|
* Creates and initializes shared memory and semaphores.
|
|
|
|
|
*/
|
|
|
|
|
void
|
|
|
|
|
CreateSharedMemoryAndSemaphores(void)
|
|
|
|
|
{
|
|
|
|
|
PGShmemHeader *shim;
|
|
|
|
|
PGShmemHeader *seghdr;
|
|
|
|
|
Size size;
|
|
|
|
|
int numSemas;
|
|
|
|
|
|
|
|
|
|
Assert(!IsUnderPostmaster);
|
|
|
|
|
|
|
|
|
|
/* Compute the size of the shared-memory block */
|
|
|
|
|
size = CalculateShmemSize(&numSemas);
|
|
|
|
|
elog(DEBUG3, "invoking IpcMemoryCreate(size=%zu)", size);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Create the shmem segment
|
|
|
|
|
*/
|
|
|
|
|
seghdr = PGSharedMemoryCreate(size, &shim);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Make sure that huge pages are never reported as "unknown" while the
|
|
|
|
|
* server is running.
|
|
|
|
|
*/
|
|
|
|
|
Assert(strcmp("unknown",
|
|
|
|
|
GetConfigOption("huge_pages_status", false, false)) != 0);
|
|
|
|
|
|
|
|
|
|
InitShmemAccess(seghdr);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Create semaphores
|
|
|
|
|
*/
|
|
|
|
|
PGReserveSemaphores(numSemas);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* If spinlocks are disabled, initialize emulation layer (which depends on
|
|
|
|
|
* semaphores, so the order is important here).
|
|
|
|
|
*/
|
|
|
|
|
#ifndef HAVE_SPINLOCKS
|
|
|
|
|
SpinlockSemaInit();
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Set up shared memory allocation mechanism
|
|
|
|
|
*/
|
|
|
|
|
InitShmemAllocation();
|
|
|
|
|
|
|
|
|
|
/* Initialize subsystems */
|
|
|
|
|
CreateOrAttachShmemStructs();
|
|
|
|
|
|
|
|
|
|
#ifdef EXEC_BACKEND
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Alloc the win32 shared backend array
|
|
|
|
|
*/
|
|
|
|
|
ShmemBackendArrayAllocation();
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
/* Initialize dynamic shared memory facilities. */
|
|
|
|
|
dsm_postmaster_startup(shim);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Now give loadable modules a chance to set up their shmem allocations
|
|
|
|
|
*/
|
|
|
|
|
if (shmem_startup_hook)
|
|
|
|
|
shmem_startup_hook();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Initialize various subsystems, setting up their data structures in
|
|
|
|
|
* shared memory.
|
|
|
|
|
*
|
|
|
|
|
* This is called by the postmaster or by a standalone backend.
|
|
|
|
|
* It is also called by a backend forked from the postmaster in the
|
|
|
|
|
* EXEC_BACKEND case. In the latter case, the shared memory segment
|
|
|
|
|
* already exists and has been physically attached to, but we have to
|
|
|
|
|
* initialize pointers in local memory that reference the shared structures,
|
|
|
|
|
* because we didn't inherit the correct pointer values from the postmaster
|
|
|
|
|
* as we do in the fork() scenario. The easiest way to do that is to run
|
|
|
|
|
* through the same code as before. (Note that the called routines mostly
|
|
|
|
|
* check IsUnderPostmaster, rather than EXEC_BACKEND, to detect this case.
|
|
|
|
|
* This is a bit code-wasteful and could be cleaned up.)
|
|
|
|
|
*/
|
|
|
|
|
static void
|
|
|
|
|
CreateOrAttachShmemStructs(void)
|
|
|
|
|
{
|
|
|
|
|
/*
|
|
|
|
|
* Now initialize LWLocks, which do shared memory allocation and are
|
|
|
|
|
* needed for InitShmemIndex.
|
|
|
|
|
*/
|
|
|
|
|
CreateLWLocks();
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Set up shmem.c index hashtable
|
|
|
|
|
*/
|
|
|
|
|
InitShmemIndex();
|
|
|
|
|
|
|
|
|
|
dsm_shmem_init();
|
Introduce the dynamic shared memory registry.
Presently, the most straightforward way for a shared library to use
shared memory is to request it at server startup via a
shmem_request_hook, which requires specifying the library in
shared_preload_libraries. Alternatively, the library can create a
dynamic shared memory (DSM) segment, but absent a shared location
to store the segment's handle, other backends cannot use it. This
commit introduces a registry for DSM segments so that these other
backends can look up existing segments with a library-specified
string. This allows libraries to easily use shared memory without
needing to request it at server startup.
The registry is accessed via the new GetNamedDSMSegment() function.
This function handles allocating the segment and initializing it
via a provided callback. If another backend already created and
initialized the segment, it simply attaches the segment.
GetNamedDSMSegment() locks the registry appropriately to ensure
that only one backend initializes the segment and that all other
backends just attach it.
The registry itself is comprised of a dshash table that stores the
DSM segment handles keyed by a library-specified string.
Reviewed-by: Michael Paquier, Andrei Lepikhov, Nikita Malakhov, Robert Haas, Bharath Rupireddy, Zhang Mingli, Amul Sul
Discussion: https://postgr.es/m/20231205034647.GA2705267%40nathanxps13
2 years ago
|
|
|
DSMRegistryShmemInit();
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Set up xlog, clog, and buffers
|
|
|
|
|
*/
|
|
|
|
|
VarsupShmemInit();
|
|
|
|
|
XLOGShmemInit();
|
|
|
|
|
XLogPrefetchShmemInit();
|
|
|
|
|
XLogRecoveryShmemInit();
|
|
|
|
|
CLOGShmemInit();
|
Keep track of transaction commit timestamps
Transactions can now set their commit timestamp directly as they commit,
or an external transaction commit timestamp can be fed from an outside
system using the new function TransactionTreeSetCommitTsData(). This
data is crash-safe, and truncated at Xid freeze point, same as pg_clog.
This module is disabled by default because it causes a performance hit,
but can be enabled in postgresql.conf requiring only a server restart.
A new test in src/test/modules is included.
Catalog version bumped due to the new subdirectory within PGDATA and a
couple of new SQL functions.
Authors: Álvaro Herrera and Petr Jelínek
Reviewed to varying degrees by Michael Paquier, Andres Freund, Robert
Haas, Amit Kapila, Fujii Masao, Jaime Casanova, Simon Riggs, Steven
Singer, Peter Eisentraut
11 years ago
|
|
|
CommitTsShmemInit();
|
|
|
|
|
SUBTRANSShmemInit();
|
|
|
|
|
MultiXactShmemInit();
|
|
|
|
|
InitBufferPool();
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Set up lock manager
|
|
|
|
|
*/
|
|
|
|
|
InitLocks();
|
|
|
|
|
|
Implement genuine serializable isolation level.
Until now, our Serializable mode has in fact been what's called Snapshot
Isolation, which allows some anomalies that could not occur in any
serialized ordering of the transactions. This patch fixes that using a
method called Serializable Snapshot Isolation, based on research papers by
Michael J. Cahill (see README-SSI for full references). In Serializable
Snapshot Isolation, transactions run like they do in Snapshot Isolation,
but a predicate lock manager observes the reads and writes performed and
aborts transactions if it detects that an anomaly might occur. This method
produces some false positives, ie. it sometimes aborts transactions even
though there is no anomaly.
To track reads we implement predicate locking, see storage/lmgr/predicate.c.
Whenever a tuple is read, a predicate lock is acquired on the tuple. Shared
memory is finite, so when a transaction takes many tuple-level locks on a
page, the locks are promoted to a single page-level lock, and further to a
single relation level lock if necessary. To lock key values with no matching
tuple, a sequential scan always takes a relation-level lock, and an index
scan acquires a page-level lock that covers the search key, whether or not
there are any matching keys at the moment.
A predicate lock doesn't conflict with any regular locks or with another
predicate locks in the normal sense. They're only used by the predicate lock
manager to detect the danger of anomalies. Only serializable transactions
participate in predicate locking, so there should be no extra overhead for
for other transactions.
Predicate locks can't be released at commit, but must be remembered until
all the transactions that overlapped with it have completed. That means that
we need to remember an unbounded amount of predicate locks, so we apply a
lossy but conservative method of tracking locks for committed transactions.
If we run short of shared memory, we overflow to a new "pg_serial" SLRU
pool.
We don't currently allow Serializable transactions in Hot Standby mode.
That would be hard, because even read-only transactions can cause anomalies
that wouldn't otherwise occur.
Serializable isolation mode now means the new fully serializable level.
Repeatable Read gives you the old Snapshot Isolation level that we have
always had.
Kevin Grittner and Dan Ports, reviewed by Jeff Davis, Heikki Linnakangas and
Anssi Kääriäinen
15 years ago
|
|
|
/*
|
|
|
|
|
* Set up predicate lock manager
|
|
|
|
|
*/
|
|
|
|
|
InitPredicateLocks();
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Set up process table
|
|
|
|
|
*/
|
|
|
|
|
if (!IsUnderPostmaster)
|
|
|
|
|
InitProcGlobal();
|
|
|
|
|
CreateSharedProcArray();
|
|
|
|
|
CreateSharedBackendStatus();
|
|
|
|
|
TwoPhaseShmemInit();
|
Allow background workers to be started dynamically.
There is a new API, RegisterDynamicBackgroundWorker, which allows
an ordinary user backend to register a new background writer during
normal running. This means that it's no longer necessary for all
background workers to be registered during processing of
shared_preload_libraries, although the option of registering workers
at that time remains available.
When a background worker exits and will not be restarted, the
slot previously used by that background worker is automatically
released and becomes available for reuse. Slots used by background
workers that are configured for automatic restart can't (yet) be
released without shutting down the system.
This commit adds a new source file, bgworker.c, and moves some
of the existing control logic for background workers there.
Previously, there was little enough logic that it made sense to
keep everything in postmaster.c, but not any more.
This commit also makes the worker_spi contrib module into an
extension and adds a new function, worker_spi_launch, which can
be used to demonstrate the new facility.
13 years ago
|
|
|
BackgroundWorkerShmemInit();
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Set up shared-inval messaging
|
|
|
|
|
*/
|
|
|
|
|
CreateSharedInvalidationState();
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Set up interprocess signaling mechanisms
|
|
|
|
|
*/
|
Install a "dead man switch" to allow the postmaster to detect cases where
a backend has done exit(0) or exit(1) without having disengaged itself
from shared memory. We are at risk for this whenever third-party code is
loaded into a backend, since such code might not know it's supposed to go
through proc_exit() instead. Also, it is reported that under Windows
there are ways to externally kill a process that cause the status code
returned to the postmaster to be indistinguishable from a voluntary exit
(thank you, Microsoft). If this does happen then the system is probably
hosed --- for instance, the dead session might still be holding locks.
So the best recovery method is to treat this like a backend crash.
The dead man switch is armed for a particular child process when it
acquires a regular PGPROC, and disarmed when the PGPROC is released;
these should be the first and last touches of shared memory resources
in a backend, or close enough anyway. This choice means there is no
coverage for auxiliary processes, but I doubt we need that, since they
shouldn't be executing any user-provided code anyway.
This patch also improves the management of the EXEC_BACKEND
ShmemBackendArray array a bit, by reducing search costs.
Although this problem is of long standing, the lack of field complaints
seems to mean it's not critical enough to risk back-patching; at least
not till we get some more testing of this mechanism.
17 years ago
|
|
|
PMSignalShmemInit();
|
|
|
|
|
ProcSignalShmemInit();
|
|
|
|
|
CheckpointerShmemInit();
|
|
|
|
|
AutoVacuumShmemInit();
|
|
|
|
|
ReplicationSlotsShmemInit();
|
Introduce replication progress tracking infrastructure.
When implementing a replication solution ontop of logical decoding, two
related problems exist:
* How to safely keep track of replication progress
* How to change replication behavior, based on the origin of a row;
e.g. to avoid loops in bi-directional replication setups
The solution to these problems, as implemented here, consist out of
three parts:
1) 'replication origins', which identify nodes in a replication setup.
2) 'replication progress tracking', which remembers, for each
replication origin, how far replay has progressed in a efficient and
crash safe manner.
3) The ability to filter out changes performed on the behest of a
replication origin during logical decoding; this allows complex
replication topologies. E.g. by filtering all replayed changes out.
Most of this could also be implemented in "userspace", e.g. by inserting
additional rows contain origin information, but that ends up being much
less efficient and more complicated. We don't want to require various
replication solutions to reimplement logic for this independently. The
infrastructure is intended to be generic enough to be reusable.
This infrastructure also replaces the 'nodeid' infrastructure of commit
timestamps. It is intended to provide all the former capabilities,
except that there's only 2^16 different origins; but now they integrate
with logical decoding. Additionally more functionality is accessible via
SQL. Since the commit timestamp infrastructure has also been introduced
in 9.5 (commit 73c986add) changing the API is not a problem.
For now the number of origins for which the replication progress can be
tracked simultaneously is determined by the max_replication_slots
GUC. That GUC is not a perfect match to configure this, but there
doesn't seem to be sufficient reason to introduce a separate new one.
Bumps both catversion and wal page magic.
Author: Andres Freund, with contributions from Petr Jelinek and Craig Ringer
Reviewed-By: Heikki Linnakangas, Petr Jelinek, Robert Haas, Steve Singer
Discussion: 20150216002155.GI15326@awork2.anarazel.de,
20140923182422.GA15776@alap3.anarazel.de,
20131114172632.GE7522@alap2.anarazel.de
11 years ago
|
|
|
ReplicationOriginShmemInit();
|
|
|
|
|
WalSndShmemInit();
|
|
|
|
|
WalRcvShmemInit();
|
|
|
|
|
WalSummarizerShmemInit();
|
Make archiver process an auxiliary process.
This commit changes WAL archiver process so that it's treated as
an auxiliary process and can use shared memory. This is an infrastructure
patch required for upcoming shared-memory based stats collector patch
series. These patch series basically need any processes including archiver
that can report the statistics to access to shared memory. Since this patch
itself is useful to simplify the code and when users monitor the status of
archiver, it's committed separately in advance.
This commit simplifies the code for WAL archiving. For example, previously
backends need to signal to archiver via postmaster when they notify
archiver that there are some WAL files to archive. On the other hand,
this commit removes that signal to postmaster and enables backends to
notify archier directly using shared latch.
Also, as the side of this change, the information about archiver process
becomes viewable at pg_stat_activity view.
Author: Kyotaro Horiguchi
Reviewed-by: Andres Freund, Álvaro Herrera, Julien Rouhaud, Tomas Vondra, Arthur Zakirov, Fujii Masao
Discussion: https://postgr.es/m/20180629.173418.190173462.horiguchi.kyotaro@lab.ntt.co.jp
5 years ago
|
|
|
PgArchShmemInit();
|
|
|
|
|
ApplyLauncherShmemInit();
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Set up other modules that need some shared memory space
|
|
|
|
|
*/
|
|
|
|
|
BTreeShmemInit();
|
|
|
|
|
SyncScanShmemInit();
|
|
|
|
|
AsyncShmemInit();
|
|
|
|
|
StatsShmemInit();
|
|
|
|
|
WaitEventExtensionShmemInit();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* InitializeShmemGUCs
|
|
|
|
|
*
|
|
|
|
|
* This function initializes runtime-computed GUCs related to the amount of
|
|
|
|
|
* shared memory required for the current configuration.
|
|
|
|
|
*/
|
|
|
|
|
void
|
|
|
|
|
InitializeShmemGUCs(void)
|
|
|
|
|
{
|
|
|
|
|
char buf[64];
|
|
|
|
|
Size size_b;
|
|
|
|
|
Size size_mb;
|
|
|
|
|
Size hp_size;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Calculate the shared memory size and round up to the nearest megabyte.
|
|
|
|
|
*/
|
|
|
|
|
size_b = CalculateShmemSize(NULL);
|
|
|
|
|
size_mb = add_size(size_b, (1024 * 1024) - 1) / (1024 * 1024);
|
|
|
|
|
sprintf(buf, "%zu", size_mb);
|
|
|
|
|
SetConfigOption("shared_memory_size", buf,
|
|
|
|
|
PGC_INTERNAL, PGC_S_DYNAMIC_DEFAULT);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Calculate the number of huge pages required.
|
|
|
|
|
*/
|
|
|
|
|
GetHugePageSize(&hp_size, NULL);
|
|
|
|
|
if (hp_size != 0)
|
|
|
|
|
{
|
|
|
|
|
Size hp_required;
|
|
|
|
|
|
|
|
|
|
hp_required = add_size(size_b / hp_size, 1);
|
|
|
|
|
sprintf(buf, "%zu", hp_required);
|
|
|
|
|
SetConfigOption("shared_memory_size_in_huge_pages", buf,
|
|
|
|
|
PGC_INTERNAL, PGC_S_DYNAMIC_DEFAULT);
|
|
|
|
|
}
|
|
|
|
|
}
|