|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
*
|
|
|
|
* postinit.c
|
|
|
|
* postgres initialization utilities
|
|
|
|
*
|
|
|
|
* Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
|
|
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
|
|
|
*
|
|
|
|
*
|
|
|
|
* IDENTIFICATION
|
|
|
|
* src/backend/utils/init/postinit.c
|
|
|
|
*
|
|
|
|
*
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
#include "postgres.h"
|
|
|
|
|
|
|
|
#include <ctype.h>
|
|
|
|
#include <fcntl.h>
|
|
|
|
#include <unistd.h>
|
|
|
|
|
|
|
|
#include "access/heapam.h"
|
|
|
|
#include "access/htup_details.h"
|
|
|
|
#include "access/sysattr.h"
|
|
|
|
#include "access/xact.h"
|
|
|
|
#include "catalog/catalog.h"
|
|
|
|
#include "catalog/indexing.h"
|
|
|
|
#include "catalog/namespace.h"
|
|
|
|
#include "catalog/pg_authid.h"
|
|
|
|
#include "catalog/pg_database.h"
|
|
|
|
#include "catalog/pg_db_role_setting.h"
|
|
|
|
#include "catalog/pg_tablespace.h"
|
|
|
|
#include "libpq/auth.h"
|
|
|
|
#include "libpq/libpq-be.h"
|
|
|
|
#include "mb/pg_wchar.h"
|
|
|
|
#include "miscadmin.h"
|
|
|
|
#include "pgstat.h"
|
|
|
|
#include "postmaster/autovacuum.h"
|
|
|
|
#include "postmaster/postmaster.h"
|
|
|
|
#include "replication/walsender.h"
|
|
|
|
#include "storage/bufmgr.h"
|
|
|
|
#include "storage/fd.h"
|
|
|
|
#include "storage/ipc.h"
|
|
|
|
#include "storage/lmgr.h"
|
|
|
|
#include "storage/procarray.h"
|
|
|
|
#include "storage/procsignal.h"
|
|
|
|
#include "storage/proc.h"
|
|
|
|
#include "storage/sinvaladt.h"
|
|
|
|
#include "storage/smgr.h"
|
|
|
|
#include "tcop/tcopprot.h"
|
|
|
|
#include "utils/acl.h"
|
|
|
|
#include "utils/fmgroids.h"
|
|
|
|
#include "utils/guc.h"
|
|
|
|
#include "utils/pg_locale.h"
|
|
|
|
#include "utils/portal.h"
|
|
|
|
#include "utils/ps_status.h"
|
|
|
|
#include "utils/snapmgr.h"
|
|
|
|
#include "utils/syscache.h"
|
Introduce timeout handling framework
Management of timeouts was getting a little cumbersome; what we
originally had was more than enough back when we were only concerned
about deadlocks and query cancel; however, when we added timeouts for
standby processes, the code got considerably messier. Since there are
plans to add more complex timeouts, this seems a good time to introduce
a central timeout handling module.
External modules register their timeout handlers during process
initialization, and later enable and disable them as they see fit using
a simple API; timeout.c is in charge of keeping track of which timeouts
are in effect at any time, installing a common SIGALRM signal handler,
and calling setitimer() as appropriate to ensure timely firing of
external handlers.
timeout.c additionally supports pluggable modules to add their own
timeouts, though this capability isn't exercised anywhere yet.
Additionally, as of this commit, walsender processes are aware of
timeouts; we had a preexisting bug there that made those ignore SIGALRM,
thus being subject to unhandled deadlocks, particularly during the
authentication phase. This has already been fixed in back branches in
commit 0bf8eb2a, which see for more details.
Main author: Zoltán Böszörményi
Some review and cleanup by Álvaro Herrera
Extensive reworking by Tom Lane
13 years ago
|
|
|
#include "utils/timeout.h"
|
|
|
|
#include "utils/tqual.h"
|
|
|
|
|
|
|
|
|
|
|
|
static HeapTuple GetDatabaseTuple(const char *dbname);
|
|
|
|
static HeapTuple GetDatabaseTupleByOid(Oid dboid);
|
|
|
|
static void PerformAuthentication(Port *port);
|
|
|
|
static void CheckMyDatabase(const char *name, bool am_superuser);
|
|
|
|
static void InitCommunication(void);
|
|
|
|
static void ShutdownPostgres(int code, Datum arg);
|
Introduce timeout handling framework
Management of timeouts was getting a little cumbersome; what we
originally had was more than enough back when we were only concerned
about deadlocks and query cancel; however, when we added timeouts for
standby processes, the code got considerably messier. Since there are
plans to add more complex timeouts, this seems a good time to introduce
a central timeout handling module.
External modules register their timeout handlers during process
initialization, and later enable and disable them as they see fit using
a simple API; timeout.c is in charge of keeping track of which timeouts
are in effect at any time, installing a common SIGALRM signal handler,
and calling setitimer() as appropriate to ensure timely firing of
external handlers.
timeout.c additionally supports pluggable modules to add their own
timeouts, though this capability isn't exercised anywhere yet.
Additionally, as of this commit, walsender processes are aware of
timeouts; we had a preexisting bug there that made those ignore SIGALRM,
thus being subject to unhandled deadlocks, particularly during the
authentication phase. This has already been fixed in back branches in
commit 0bf8eb2a, which see for more details.
Main author: Zoltán Böszörményi
Some review and cleanup by Álvaro Herrera
Extensive reworking by Tom Lane
13 years ago
|
|
|
static void StatementTimeoutHandler(void);
|
|
|
|
static void LockTimeoutHandler(void);
|
|
|
|
static bool ThereIsAtLeastOneRole(void);
|
|
|
|
static void process_startup_options(Port *port, bool am_superuser);
|
|
|
|
static void process_settings(Oid databaseid, Oid roleid);
|
|
|
|
|
|
|
|
|
|
|
|
/*** InitPostgres support ***/
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* GetDatabaseTuple -- fetch the pg_database row for a database
|
|
|
|
*
|
|
|
|
* This is used during backend startup when we don't yet have any access to
|
|
|
|
* system catalogs in general. In the worst case, we can seqscan pg_database
|
|
|
|
* using nothing but the hard-wired descriptor that relcache.c creates for
|
|
|
|
* pg_database. In more typical cases, relcache.c was able to load
|
|
|
|
* descriptors for both pg_database and its indexes from the shared relcache
|
|
|
|
* cache file, and so we can do an indexscan. criticalSharedRelcachesBuilt
|
|
|
|
* tells whether we got the cached descriptors.
|
|
|
|
*/
|
|
|
|
static HeapTuple
|
|
|
|
GetDatabaseTuple(const char *dbname)
|
|
|
|
{
|
|
|
|
HeapTuple tuple;
|
|
|
|
Relation relation;
|
|
|
|
SysScanDesc scan;
|
|
|
|
ScanKeyData key[1];
|
|
|
|
|
|
|
|
/*
|
|
|
|
* form a scan key
|
|
|
|
*/
|
|
|
|
ScanKeyInit(&key[0],
|
|
|
|
Anum_pg_database_datname,
|
|
|
|
BTEqualStrategyNumber, F_NAMEEQ,
|
|
|
|
CStringGetDatum(dbname));
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Open pg_database and fetch a tuple. Force heap scan if we haven't yet
|
|
|
|
* built the critical shared relcache entries (i.e., we're starting up
|
|
|
|
* without a shared relcache cache file).
|
|
|
|
*/
|
|
|
|
relation = heap_open(DatabaseRelationId, AccessShareLock);
|
|
|
|
scan = systable_beginscan(relation, DatabaseNameIndexId,
|
|
|
|
criticalSharedRelcachesBuilt,
|
Use an MVCC snapshot, rather than SnapshotNow, for catalog scans.
SnapshotNow scans have the undesirable property that, in the face of
concurrent updates, the scan can fail to see either the old or the new
versions of the row. In many cases, we work around this by requiring
DDL operations to hold AccessExclusiveLock on the object being
modified; in some cases, the existing locking is inadequate and random
failures occur as a result. This commit doesn't change anything
related to locking, but will hopefully pave the way to allowing lock
strength reductions in the future.
The major issue has held us back from making this change in the past
is that taking an MVCC snapshot is significantly more expensive than
using a static special snapshot such as SnapshotNow. However, testing
of various worst-case scenarios reveals that this problem is not
severe except under fairly extreme workloads. To mitigate those
problems, we avoid retaking the MVCC snapshot for each new scan;
instead, we take a new snapshot only when invalidation messages have
been processed. The catcache machinery already requires that
invalidation messages be sent before releasing the related heavyweight
lock; else other backends might rely on locally-cached data rather
than scanning the catalog at all. Thus, making snapshot reuse
dependent on the same guarantees shouldn't break anything that wasn't
already subtly broken.
Patch by me. Review by Michael Paquier and Andres Freund.
12 years ago
|
|
|
NULL,
|
|
|
|
1, key);
|
|
|
|
|
|
|
|
tuple = systable_getnext(scan);
|
|
|
|
|
|
|
|
/* Must copy tuple before releasing buffer */
|
|
|
|
if (HeapTupleIsValid(tuple))
|
|
|
|
tuple = heap_copytuple(tuple);
|
|
|
|
|
|
|
|
/* all done */
|
|
|
|
systable_endscan(scan);
|
|
|
|
heap_close(relation, AccessShareLock);
|
|
|
|
|
|
|
|
return tuple;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* GetDatabaseTupleByOid -- as above, but search by database OID
|
|
|
|
*/
|
|
|
|
static HeapTuple
|
|
|
|
GetDatabaseTupleByOid(Oid dboid)
|
|
|
|
{
|
|
|
|
HeapTuple tuple;
|
|
|
|
Relation relation;
|
|
|
|
SysScanDesc scan;
|
|
|
|
ScanKeyData key[1];
|
|
|
|
|
|
|
|
/*
|
|
|
|
* form a scan key
|
|
|
|
*/
|
|
|
|
ScanKeyInit(&key[0],
|
|
|
|
ObjectIdAttributeNumber,
|
|
|
|
BTEqualStrategyNumber, F_OIDEQ,
|
|
|
|
ObjectIdGetDatum(dboid));
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Open pg_database and fetch a tuple. Force heap scan if we haven't yet
|
|
|
|
* built the critical shared relcache entries (i.e., we're starting up
|
|
|
|
* without a shared relcache cache file).
|
|
|
|
*/
|
|
|
|
relation = heap_open(DatabaseRelationId, AccessShareLock);
|
|
|
|
scan = systable_beginscan(relation, DatabaseOidIndexId,
|
|
|
|
criticalSharedRelcachesBuilt,
|
Use an MVCC snapshot, rather than SnapshotNow, for catalog scans.
SnapshotNow scans have the undesirable property that, in the face of
concurrent updates, the scan can fail to see either the old or the new
versions of the row. In many cases, we work around this by requiring
DDL operations to hold AccessExclusiveLock on the object being
modified; in some cases, the existing locking is inadequate and random
failures occur as a result. This commit doesn't change anything
related to locking, but will hopefully pave the way to allowing lock
strength reductions in the future.
The major issue has held us back from making this change in the past
is that taking an MVCC snapshot is significantly more expensive than
using a static special snapshot such as SnapshotNow. However, testing
of various worst-case scenarios reveals that this problem is not
severe except under fairly extreme workloads. To mitigate those
problems, we avoid retaking the MVCC snapshot for each new scan;
instead, we take a new snapshot only when invalidation messages have
been processed. The catcache machinery already requires that
invalidation messages be sent before releasing the related heavyweight
lock; else other backends might rely on locally-cached data rather
than scanning the catalog at all. Thus, making snapshot reuse
dependent on the same guarantees shouldn't break anything that wasn't
already subtly broken.
Patch by me. Review by Michael Paquier and Andres Freund.
12 years ago
|
|
|
NULL,
|
|
|
|
1, key);
|
|
|
|
|
|
|
|
tuple = systable_getnext(scan);
|
|
|
|
|
|
|
|
/* Must copy tuple before releasing buffer */
|
|
|
|
if (HeapTupleIsValid(tuple))
|
|
|
|
tuple = heap_copytuple(tuple);
|
|
|
|
|
|
|
|
/* all done */
|
|
|
|
systable_endscan(scan);
|
|
|
|
heap_close(relation, AccessShareLock);
|
|
|
|
|
|
|
|
return tuple;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* PerformAuthentication -- authenticate a remote client
|
|
|
|
*
|
|
|
|
* returns: nothing. Will not return at all if there's any failure.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
PerformAuthentication(Port *port)
|
|
|
|
{
|
|
|
|
/* This should be set already, but let's make sure */
|
|
|
|
ClientAuthInProgress = true; /* limit visibility of log messages */
|
|
|
|
|
|
|
|
/*
|
|
|
|
* In EXEC_BACKEND case, we didn't inherit the contents of pg_hba.conf
|
|
|
|
* etcetera from the postmaster, and have to load them ourselves.
|
|
|
|
*
|
|
|
|
* FIXME: [fork/exec] Ugh. Is there a way around this overhead?
|
|
|
|
*/
|
|
|
|
#ifdef EXEC_BACKEND
|
|
|
|
if (!load_hba())
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* It makes no sense to continue if we fail to load the HBA file,
|
|
|
|
* since there is no way to connect to the database in this case.
|
|
|
|
*/
|
|
|
|
ereport(FATAL,
|
|
|
|
(errmsg("could not load pg_hba.conf")));
|
|
|
|
}
|
Parse pg_ident.conf when it's loaded, keeping it in memory in parsed format.
Similar changes were done to pg_hba.conf earlier already, this commit makes
pg_ident.conf to behave the same as pg_hba.conf.
This has two user-visible effects. First, if pg_ident.conf contains multiple
errors, the whole file is parsed at postmaster startup time and all the
errors are immediately reported. Before this patch, the file was parsed and
the errors were reported only when someone tries to connect using an
authentication method that uses the file, and the parsing stopped on first
error. Second, if you SIGHUP to reload the config files, and the new
pg_ident.conf file contains an error, the error is logged but the old file
stays in effect.
Also, regular expressions in pg_ident.conf are now compiled only once when
the file is loaded, rather than every time the a user is authenticated. That
should speed up authentication if you have a lot of regexps in the file.
Amit Kapila
13 years ago
|
|
|
|
|
|
|
if (!load_ident())
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* It is ok to continue if we fail to load the IDENT file, although it
|
|
|
|
* means that you cannot log in using any of the authentication
|
|
|
|
* methods that need a user name mapping. load_ident() already logged
|
|
|
|
* the details of error to the log.
|
Parse pg_ident.conf when it's loaded, keeping it in memory in parsed format.
Similar changes were done to pg_hba.conf earlier already, this commit makes
pg_ident.conf to behave the same as pg_hba.conf.
This has two user-visible effects. First, if pg_ident.conf contains multiple
errors, the whole file is parsed at postmaster startup time and all the
errors are immediately reported. Before this patch, the file was parsed and
the errors were reported only when someone tries to connect using an
authentication method that uses the file, and the parsing stopped on first
error. Second, if you SIGHUP to reload the config files, and the new
pg_ident.conf file contains an error, the error is logged but the old file
stays in effect.
Also, regular expressions in pg_ident.conf are now compiled only once when
the file is loaded, rather than every time the a user is authenticated. That
should speed up authentication if you have a lot of regexps in the file.
Amit Kapila
13 years ago
|
|
|
*/
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Set up a timeout in case a buggy or malicious client fails to respond
|
|
|
|
* during authentication. Since we're inside a transaction and might do
|
|
|
|
* database access, we have to use the statement_timeout infrastructure.
|
|
|
|
*/
|
Introduce timeout handling framework
Management of timeouts was getting a little cumbersome; what we
originally had was more than enough back when we were only concerned
about deadlocks and query cancel; however, when we added timeouts for
standby processes, the code got considerably messier. Since there are
plans to add more complex timeouts, this seems a good time to introduce
a central timeout handling module.
External modules register their timeout handlers during process
initialization, and later enable and disable them as they see fit using
a simple API; timeout.c is in charge of keeping track of which timeouts
are in effect at any time, installing a common SIGALRM signal handler,
and calling setitimer() as appropriate to ensure timely firing of
external handlers.
timeout.c additionally supports pluggable modules to add their own
timeouts, though this capability isn't exercised anywhere yet.
Additionally, as of this commit, walsender processes are aware of
timeouts; we had a preexisting bug there that made those ignore SIGALRM,
thus being subject to unhandled deadlocks, particularly during the
authentication phase. This has already been fixed in back branches in
commit 0bf8eb2a, which see for more details.
Main author: Zoltán Böszörményi
Some review and cleanup by Álvaro Herrera
Extensive reworking by Tom Lane
13 years ago
|
|
|
enable_timeout_after(STATEMENT_TIMEOUT, AuthenticationTimeout * 1000);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Now perform authentication exchange.
|
|
|
|
*/
|
|
|
|
ClientAuthentication(port); /* might not return, if failure */
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Done with authentication. Disable the timeout, and log if needed.
|
|
|
|
*/
|
Introduce timeout handling framework
Management of timeouts was getting a little cumbersome; what we
originally had was more than enough back when we were only concerned
about deadlocks and query cancel; however, when we added timeouts for
standby processes, the code got considerably messier. Since there are
plans to add more complex timeouts, this seems a good time to introduce
a central timeout handling module.
External modules register their timeout handlers during process
initialization, and later enable and disable them as they see fit using
a simple API; timeout.c is in charge of keeping track of which timeouts
are in effect at any time, installing a common SIGALRM signal handler,
and calling setitimer() as appropriate to ensure timely firing of
external handlers.
timeout.c additionally supports pluggable modules to add their own
timeouts, though this capability isn't exercised anywhere yet.
Additionally, as of this commit, walsender processes are aware of
timeouts; we had a preexisting bug there that made those ignore SIGALRM,
thus being subject to unhandled deadlocks, particularly during the
authentication phase. This has already been fixed in back branches in
commit 0bf8eb2a, which see for more details.
Main author: Zoltán Böszörményi
Some review and cleanup by Álvaro Herrera
Extensive reworking by Tom Lane
13 years ago
|
|
|
disable_timeout(STATEMENT_TIMEOUT, false);
|
|
|
|
|
|
|
|
if (Log_connections)
|
|
|
|
{
|
|
|
|
if (am_walsender)
|
|
|
|
{
|
Break out OpenSSL-specific code to separate files.
This refactoring is in preparation for adding support for other SSL
implementations, with no user-visible effects. There are now two #defines,
USE_OPENSSL which is defined when building with OpenSSL, and USE_SSL which
is defined when building with any SSL implementation. Currently, OpenSSL is
the only implementation so the two #defines go together, but USE_SSL is
supposed to be used for implementation-independent code.
The libpq SSL code is changed to use a custom BIO, which does all the raw
I/O, like we've been doing in the backend for a long time. That makes it
possible to use MSG_NOSIGNAL to block SIGPIPE when using SSL, which avoids
a couple of syscall for each send(). Probably doesn't make much performance
difference in practice - the SSL encryption is expensive enough to mask the
effect - but it was a natural result of this refactoring.
Based on a patch by Martijn van Oosterhout from 2006. Briefly reviewed by
Alvaro Herrera, Andreas Karlsson, Jeff Janes.
11 years ago
|
|
|
#ifdef USE_OPENSSL
|
|
|
|
if (port->ssl_in_use)
|
|
|
|
ereport(LOG,
|
|
|
|
(errmsg("replication connection authorized: user=%s SSL enabled (protocol=%s, cipher=%s, compression=%s)",
|
|
|
|
port->user_name, SSL_get_version(port->ssl), SSL_get_cipher(port->ssl),
|
|
|
|
SSL_get_current_compression(port->ssl) ? _("on") : _("off"))));
|
|
|
|
else
|
|
|
|
#endif
|
|
|
|
ereport(LOG,
|
|
|
|
(errmsg("replication connection authorized: user=%s",
|
|
|
|
port->user_name)));
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
Break out OpenSSL-specific code to separate files.
This refactoring is in preparation for adding support for other SSL
implementations, with no user-visible effects. There are now two #defines,
USE_OPENSSL which is defined when building with OpenSSL, and USE_SSL which
is defined when building with any SSL implementation. Currently, OpenSSL is
the only implementation so the two #defines go together, but USE_SSL is
supposed to be used for implementation-independent code.
The libpq SSL code is changed to use a custom BIO, which does all the raw
I/O, like we've been doing in the backend for a long time. That makes it
possible to use MSG_NOSIGNAL to block SIGPIPE when using SSL, which avoids
a couple of syscall for each send(). Probably doesn't make much performance
difference in practice - the SSL encryption is expensive enough to mask the
effect - but it was a natural result of this refactoring.
Based on a patch by Martijn van Oosterhout from 2006. Briefly reviewed by
Alvaro Herrera, Andreas Karlsson, Jeff Janes.
11 years ago
|
|
|
#ifdef USE_OPENSSL
|
|
|
|
if (port->ssl_in_use)
|
|
|
|
ereport(LOG,
|
|
|
|
(errmsg("connection authorized: user=%s database=%s SSL enabled (protocol=%s, cipher=%s, compression=%s)",
|
|
|
|
port->user_name, port->database_name, SSL_get_version(port->ssl), SSL_get_cipher(port->ssl),
|
|
|
|
SSL_get_current_compression(port->ssl) ? _("on") : _("off"))));
|
|
|
|
else
|
|
|
|
#endif
|
|
|
|
ereport(LOG,
|
|
|
|
(errmsg("connection authorized: user=%s database=%s",
|
|
|
|
port->user_name, port->database_name)));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
set_ps_display("startup", false);
|
|
|
|
|
|
|
|
ClientAuthInProgress = false; /* client_min_messages is active now */
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* CheckMyDatabase -- fetch information from the pg_database entry for our DB
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
CheckMyDatabase(const char *name, bool am_superuser)
|
|
|
|
{
|
|
|
|
HeapTuple tup;
|
|
|
|
Form_pg_database dbform;
|
|
|
|
char *collate;
|
|
|
|
char *ctype;
|
|
|
|
|
|
|
|
/* Fetch our pg_database row normally, via syscache */
|
|
|
|
tup = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(MyDatabaseId));
|
|
|
|
if (!HeapTupleIsValid(tup))
|
|
|
|
elog(ERROR, "cache lookup failed for database %u", MyDatabaseId);
|
|
|
|
dbform = (Form_pg_database) GETSTRUCT(tup);
|
|
|
|
|
|
|
|
/* This recheck is strictly paranoia */
|
|
|
|
if (strcmp(name, NameStr(dbform->datname)) != 0)
|
|
|
|
ereport(FATAL,
|
|
|
|
(errcode(ERRCODE_UNDEFINED_DATABASE),
|
|
|
|
errmsg("database \"%s\" has disappeared from pg_database",
|
|
|
|
name),
|
|
|
|
errdetail("Database OID %u now seems to belong to \"%s\".",
|
|
|
|
MyDatabaseId, NameStr(dbform->datname))));
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Check permissions to connect to the database.
|
|
|
|
*
|
|
|
|
* These checks are not enforced when in standalone mode, so that there is
|
|
|
|
* a way to recover from disabling all access to all databases, for
|
|
|
|
* example "UPDATE pg_database SET datallowconn = false;".
|
|
|
|
*
|
|
|
|
* We do not enforce them for autovacuum worker processes either.
|
|
|
|
*/
|
|
|
|
if (IsUnderPostmaster && !IsAutoVacuumWorkerProcess())
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Check that the database is currently allowing connections.
|
|
|
|
*/
|
|
|
|
if (!dbform->datallowconn)
|
|
|
|
ereport(FATAL,
|
|
|
|
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
|
|
|
|
errmsg("database \"%s\" is not currently accepting connections",
|
|
|
|
name)));
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Check privilege to connect to the database. (The am_superuser test
|
|
|
|
* is redundant, but since we have the flag, might as well check it
|
|
|
|
* and save a few cycles.)
|
|
|
|
*/
|
|
|
|
if (!am_superuser &&
|
|
|
|
pg_database_aclcheck(MyDatabaseId, GetUserId(),
|
|
|
|
ACL_CONNECT) != ACLCHECK_OK)
|
|
|
|
ereport(FATAL,
|
|
|
|
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
|
|
|
|
errmsg("permission denied for database \"%s\"", name),
|
|
|
|
errdetail("User does not have CONNECT privilege.")));
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Check connection limit for this database.
|
|
|
|
*
|
|
|
|
* There is a race condition here --- we create our PGPROC before
|
|
|
|
* checking for other PGPROCs. If two backends did this at about the
|
|
|
|
* same time, they might both think they were over the limit, while
|
|
|
|
* ideally one should succeed and one fail. Getting that to work
|
|
|
|
* exactly seems more trouble than it is worth, however; instead we
|
|
|
|
* just document that the connection limit is approximate.
|
|
|
|
*/
|
|
|
|
if (dbform->datconnlimit >= 0 &&
|
|
|
|
!am_superuser &&
|
|
|
|
CountDBBackends(MyDatabaseId) > dbform->datconnlimit)
|
|
|
|
ereport(FATAL,
|
|
|
|
(errcode(ERRCODE_TOO_MANY_CONNECTIONS),
|
|
|
|
errmsg("too many connections for database \"%s\"",
|
|
|
|
name)));
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* OK, we're golden. Next to-do item is to save the encoding info out of
|
|
|
|
* the pg_database tuple.
|
|
|
|
*/
|
|
|
|
SetDatabaseEncoding(dbform->encoding);
|
|
|
|
/* Record it as a GUC internal option, too */
|
|
|
|
SetConfigOption("server_encoding", GetDatabaseEncodingName(),
|
|
|
|
PGC_INTERNAL, PGC_S_OVERRIDE);
|
|
|
|
/* If we have no other source of client_encoding, use server encoding */
|
|
|
|
SetConfigOption("client_encoding", GetDatabaseEncodingName(),
|
Split PGC_S_DEFAULT into two values, for true boot_val vs computed default.
Failure to distinguish these cases is the real cause behind the recent
reports of Windows builds crashing on 'infinity'::timestamp, which was
directly due to failure to establish a value of timezone_abbreviations
in postmaster child processes. The postmaster had the desired value,
but write_one_nondefault_variable() didn't transmit it to backends.
To fix that, invent a new value PGC_S_DYNAMIC_DEFAULT, and be sure to use
that or PGC_S_ENV_VAR (as appropriate) for "default" settings that are
computed during initialization. (We need both because there's at least
one variable that could receive a value from either source.)
This commit also fixes ProcessConfigFile's failure to restore the correct
default value for certain GUC variables if they are set in postgresql.conf
and then removed/commented out of the file. We have to recompute and
reinstall the value for any GUC variable that could have received a value
from PGC_S_DYNAMIC_DEFAULT or PGC_S_ENV_VAR sources, and there were a
number of oversights. (That whole thing is a crock that needs to be
redesigned, but not today.)
However, I intentionally didn't make it work "exactly right" for the cases
of timezone and log_timezone. The exactly right behavior would involve
running select_default_timezone, which we'd have to do independently in
each postgres process, causing the whole database to become entirely
unresponsive for as much as several seconds. That didn't seem like a good
idea, especially since the variable's removal from postgresql.conf might be
just an accidental edit. Instead the behavior is to adopt the previously
active setting as if it were default.
Note that this patch creates an ABI break for extensions that use any of
the PGC_S_XXX constants; they'll need to be recompiled.
14 years ago
|
|
|
PGC_BACKEND, PGC_S_DYNAMIC_DEFAULT);
|
|
|
|
|
|
|
|
/* assign locale variables */
|
|
|
|
collate = NameStr(dbform->datcollate);
|
|
|
|
ctype = NameStr(dbform->datctype);
|
|
|
|
|
|
|
|
if (pg_perm_setlocale(LC_COLLATE, collate) == NULL)
|
|
|
|
ereport(FATAL,
|
|
|
|
(errmsg("database locale is incompatible with operating system"),
|
|
|
|
errdetail("The database was initialized with LC_COLLATE \"%s\", "
|
|
|
|
" which is not recognized by setlocale().", collate),
|
|
|
|
errhint("Recreate the database with another locale or install the missing locale.")));
|
|
|
|
|
|
|
|
if (pg_perm_setlocale(LC_CTYPE, ctype) == NULL)
|
|
|
|
ereport(FATAL,
|
|
|
|
(errmsg("database locale is incompatible with operating system"),
|
|
|
|
errdetail("The database was initialized with LC_CTYPE \"%s\", "
|
|
|
|
" which is not recognized by setlocale().", ctype),
|
|
|
|
errhint("Recreate the database with another locale or install the missing locale.")));
|
|
|
|
|
|
|
|
/* Make the locale settings visible as GUC variables, too */
|
|
|
|
SetConfigOption("lc_collate", collate, PGC_INTERNAL, PGC_S_OVERRIDE);
|
|
|
|
SetConfigOption("lc_ctype", ctype, PGC_INTERNAL, PGC_S_OVERRIDE);
|
|
|
|
|
|
|
|
ReleaseSysCache(tup);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* --------------------------------
|
|
|
|
* InitCommunication
|
|
|
|
*
|
|
|
|
* This routine initializes stuff needed for ipc, locking, etc.
|
|
|
|
* it should be called something more informative.
|
|
|
|
* --------------------------------
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
InitCommunication(void)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* initialize shared memory and semaphores appropriately.
|
|
|
|
*/
|
|
|
|
if (!IsUnderPostmaster) /* postmaster already did this */
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* We're running a postgres bootstrap process or a standalone backend.
|
|
|
|
* Create private "shmem" and semaphores.
|
|
|
|
*/
|
|
|
|
CreateSharedMemoryAndSemaphores(true, 0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* pg_split_opts -- split a string of options and append it to an argv array
|
|
|
|
*
|
|
|
|
* The caller is responsible for ensuring the argv array is large enough. The
|
|
|
|
* maximum possible number of arguments added by this routine is
|
|
|
|
* (strlen(optstr) + 1) / 2.
|
|
|
|
*
|
|
|
|
* Because some option values can contain spaces we allow escaping using
|
|
|
|
* backslashes, with \\ representing a literal backslash.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
pg_split_opts(char **argv, int *argcp, char *optstr)
|
|
|
|
{
|
|
|
|
StringInfoData s;
|
|
|
|
|
|
|
|
initStringInfo(&s);
|
|
|
|
|
|
|
|
while (*optstr)
|
|
|
|
{
|
|
|
|
bool last_was_escape = false;
|
|
|
|
|
|
|
|
resetStringInfo(&s);
|
|
|
|
|
|
|
|
/* skip over leading space */
|
|
|
|
while (isspace((unsigned char) *optstr))
|
|
|
|
optstr++;
|
|
|
|
|
|
|
|
if (*optstr == '\0')
|
|
|
|
break;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Parse a single option + value, stopping at the first space, unless
|
|
|
|
* it's escaped.
|
|
|
|
*/
|
|
|
|
while (*optstr)
|
|
|
|
{
|
|
|
|
if (isspace(*optstr) && !last_was_escape)
|
|
|
|
break;
|
|
|
|
|
|
|
|
if (!last_was_escape && *optstr == '\\')
|
|
|
|
last_was_escape = true;
|
|
|
|
else
|
|
|
|
{
|
|
|
|
last_was_escape = false;
|
|
|
|
appendStringInfoChar(&s, *optstr);
|
|
|
|
}
|
|
|
|
|
|
|
|
optstr++;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* now store the option */
|
|
|
|
argv[(*argcp)++] = pstrdup(s.data);
|
|
|
|
}
|
|
|
|
resetStringInfo(&s);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Initialize MaxBackends value from config options.
|
|
|
|
*
|
|
|
|
* This must be called after modules have had the chance to register background
|
|
|
|
* workers in shared_preload_libraries, and before shared memory size is
|
|
|
|
* determined.
|
|
|
|
*
|
|
|
|
* Note that in EXEC_BACKEND environment, the value is passed down from
|
|
|
|
* postmaster to subprocesses via BackendParameters in SubPostmasterMain; only
|
|
|
|
* postmaster itself and processes not under postmaster control should call
|
|
|
|
* this.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
InitializeMaxBackends(void)
|
|
|
|
{
|
|
|
|
Assert(MaxBackends == 0);
|
|
|
|
|
|
|
|
/* the extra unit accounts for the autovacuum launcher */
|
|
|
|
MaxBackends = MaxConnections + autovacuum_max_workers + 1 +
|
|
|
|
+max_worker_processes;
|
|
|
|
|
|
|
|
/* internal error because the values were all checked previously */
|
|
|
|
if (MaxBackends > MAX_BACKENDS)
|
|
|
|
elog(ERROR, "too many backends configured");
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Early initialization of a backend (either standalone or under postmaster).
|
|
|
|
* This happens even before InitPostgres.
|
|
|
|
*
|
|
|
|
* This is separate from InitPostgres because it is also called by auxiliary
|
|
|
|
* processes, such as the background writer process, which may not call
|
|
|
|
* InitPostgres at all.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
BaseInit(void)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Attach to shared memory and semaphores, and initialize our
|
|
|
|
* input/output/debugging file descriptors.
|
|
|
|
*/
|
|
|
|
InitCommunication();
|
|
|
|
DebugFileOpen();
|
|
|
|
|
|
|
|
/* Do local initialization of file, storage and buffer managers */
|
|
|
|
InitFileAccess();
|
|
|
|
smgrinit();
|
|
|
|
InitBufferPoolAccess();
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* --------------------------------
|
|
|
|
* InitPostgres
|
|
|
|
* Initialize POSTGRES.
|
|
|
|
*
|
|
|
|
* The database can be specified by name, using the in_dbname parameter, or by
|
|
|
|
* OID, using the dboid parameter. In the latter case, the actual database
|
|
|
|
* name can be returned to the caller in out_dbname. If out_dbname isn't
|
|
|
|
* NULL, it must point to a buffer of size NAMEDATALEN.
|
|
|
|
*
|
|
|
|
* In bootstrap mode no parameters are used. The autovacuum launcher process
|
|
|
|
* doesn't use any parameters either, because it only goes far enough to be
|
|
|
|
* able to read pg_database; it doesn't connect to any particular database.
|
|
|
|
* In walsender mode only username is used.
|
|
|
|
*
|
|
|
|
* As of PostgreSQL 8.2, we expect InitProcess() was already called, so we
|
|
|
|
* already have a PGPROC struct ... but it's not completely filled in yet.
|
|
|
|
*
|
|
|
|
* Note:
|
|
|
|
* Be very careful with the order of calls in the InitPostgres function.
|
|
|
|
* --------------------------------
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
InitPostgres(const char *in_dbname, Oid dboid, const char *username,
|
|
|
|
char *out_dbname)
|
|
|
|
{
|
|
|
|
bool bootstrap = IsBootstrapProcessingMode();
|
|
|
|
bool am_superuser;
|
|
|
|
char *fullpath;
|
|
|
|
char dbname[NAMEDATALEN];
|
|
|
|
|
|
|
|
elog(DEBUG3, "InitPostgres");
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Add my PGPROC struct to the ProcArray.
|
|
|
|
*
|
|
|
|
* Once I have done this, I am visible to other backends!
|
|
|
|
*/
|
|
|
|
InitProcessPhase2();
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Initialize my entry in the shared-invalidation manager's array of
|
|
|
|
* per-backend data.
|
|
|
|
*
|
|
|
|
* Sets up MyBackendId, a unique backend identifier.
|
|
|
|
*/
|
|
|
|
MyBackendId = InvalidBackendId;
|
|
|
|
|
Allow read only connections during recovery, known as Hot Standby.
Enabled by recovery_connections = on (default) and forcing archive recovery using a recovery.conf. Recovery processing now emulates the original transactions as they are replayed, providing full locking and MVCC behaviour for read only queries. Recovery must enter consistent state before connections are allowed, so there is a delay, typically short, before connections succeed. Replay of recovering transactions can conflict and in some cases deadlock with queries during recovery; these result in query cancellation after max_standby_delay seconds have expired. Infrastructure changes have minor effects on normal running, though introduce four new types of WAL record.
New test mode "make standbycheck" allows regression tests of static command behaviour on a standby server while in recovery. Typical and extreme dynamic behaviours have been checked via code inspection and manual testing. Few port specific behaviours have been utilised, though primary testing has been on Linux only so far.
This commit is the basic patch. Additional changes will follow in this release to enhance some aspects of behaviour, notably improved handling of conflicts, deadlock detection and query cancellation. Changes to VACUUM FULL are also required.
Simon Riggs, with significant and lengthy review by Heikki Linnakangas, including streamlined redesign of snapshot creation and two-phase commit.
Important contributions from Florian Pflug, Mark Kirkwood, Merlin Moncure, Greg Stark, Gianni Ciolli, Gabriele Bartolini, Hannu Krosing, Robert Haas, Tatsuo Ishii, Hiroyuki Yamada plus support and feedback from many other community members.
16 years ago
|
|
|
SharedInvalBackendInit(false);
|
|
|
|
|
|
|
|
if (MyBackendId > MaxBackends || MyBackendId <= 0)
|
|
|
|
elog(FATAL, "bad backend ID: %d", MyBackendId);
|
|
|
|
|
|
|
|
/* Now that we have a BackendId, we can participate in ProcSignal */
|
|
|
|
ProcSignalInit(MyBackendId);
|
|
|
|
|
Introduce timeout handling framework
Management of timeouts was getting a little cumbersome; what we
originally had was more than enough back when we were only concerned
about deadlocks and query cancel; however, when we added timeouts for
standby processes, the code got considerably messier. Since there are
plans to add more complex timeouts, this seems a good time to introduce
a central timeout handling module.
External modules register their timeout handlers during process
initialization, and later enable and disable them as they see fit using
a simple API; timeout.c is in charge of keeping track of which timeouts
are in effect at any time, installing a common SIGALRM signal handler,
and calling setitimer() as appropriate to ensure timely firing of
external handlers.
timeout.c additionally supports pluggable modules to add their own
timeouts, though this capability isn't exercised anywhere yet.
Additionally, as of this commit, walsender processes are aware of
timeouts; we had a preexisting bug there that made those ignore SIGALRM,
thus being subject to unhandled deadlocks, particularly during the
authentication phase. This has already been fixed in back branches in
commit 0bf8eb2a, which see for more details.
Main author: Zoltán Böszörményi
Some review and cleanup by Álvaro Herrera
Extensive reworking by Tom Lane
13 years ago
|
|
|
/*
|
|
|
|
* Also set up timeout handlers needed for backend operation. We need
|
|
|
|
* these in every case except bootstrap.
|
|
|
|
*/
|
|
|
|
if (!bootstrap)
|
|
|
|
{
|
|
|
|
RegisterTimeout(DEADLOCK_TIMEOUT, CheckDeadLock);
|
|
|
|
RegisterTimeout(STATEMENT_TIMEOUT, StatementTimeoutHandler);
|
|
|
|
RegisterTimeout(LOCK_TIMEOUT, LockTimeoutHandler);
|
Introduce timeout handling framework
Management of timeouts was getting a little cumbersome; what we
originally had was more than enough back when we were only concerned
about deadlocks and query cancel; however, when we added timeouts for
standby processes, the code got considerably messier. Since there are
plans to add more complex timeouts, this seems a good time to introduce
a central timeout handling module.
External modules register their timeout handlers during process
initialization, and later enable and disable them as they see fit using
a simple API; timeout.c is in charge of keeping track of which timeouts
are in effect at any time, installing a common SIGALRM signal handler,
and calling setitimer() as appropriate to ensure timely firing of
external handlers.
timeout.c additionally supports pluggable modules to add their own
timeouts, though this capability isn't exercised anywhere yet.
Additionally, as of this commit, walsender processes are aware of
timeouts; we had a preexisting bug there that made those ignore SIGALRM,
thus being subject to unhandled deadlocks, particularly during the
authentication phase. This has already been fixed in back branches in
commit 0bf8eb2a, which see for more details.
Main author: Zoltán Böszörményi
Some review and cleanup by Álvaro Herrera
Extensive reworking by Tom Lane
13 years ago
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* bufmgr needs another initialization call too
|
|
|
|
*/
|
|
|
|
InitBufferPoolBackend();
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Initialize local process's access to XLOG.
|
|
|
|
*/
|
|
|
|
if (IsUnderPostmaster)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* The postmaster already started the XLOG machinery, but we need to
|
|
|
|
* call InitXLOGAccess(), if the system isn't in hot-standby mode.
|
|
|
|
* This is handled by calling RecoveryInProgress and ignoring the
|
|
|
|
* result.
|
|
|
|
*/
|
Allow read only connections during recovery, known as Hot Standby.
Enabled by recovery_connections = on (default) and forcing archive recovery using a recovery.conf. Recovery processing now emulates the original transactions as they are replayed, providing full locking and MVCC behaviour for read only queries. Recovery must enter consistent state before connections are allowed, so there is a delay, typically short, before connections succeed. Replay of recovering transactions can conflict and in some cases deadlock with queries during recovery; these result in query cancellation after max_standby_delay seconds have expired. Infrastructure changes have minor effects on normal running, though introduce four new types of WAL record.
New test mode "make standbycheck" allows regression tests of static command behaviour on a standby server while in recovery. Typical and extreme dynamic behaviours have been checked via code inspection and manual testing. Few port specific behaviours have been utilised, though primary testing has been on Linux only so far.
This commit is the basic patch. Additional changes will follow in this release to enhance some aspects of behaviour, notably improved handling of conflicts, deadlock detection and query cancellation. Changes to VACUUM FULL are also required.
Simon Riggs, with significant and lengthy review by Heikki Linnakangas, including streamlined redesign of snapshot creation and two-phase commit.
Important contributions from Florian Pflug, Mark Kirkwood, Merlin Moncure, Greg Stark, Gianni Ciolli, Gabriele Bartolini, Hannu Krosing, Robert Haas, Tatsuo Ishii, Hiroyuki Yamada plus support and feedback from many other community members.
16 years ago
|
|
|
(void) RecoveryInProgress();
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* We are either a bootstrap process or a standalone backend. Either
|
|
|
|
* way, start up the XLOG machinery, and register to have it closed
|
|
|
|
* down at exit.
|
|
|
|
*/
|
|
|
|
StartupXLOG();
|
|
|
|
on_shmem_exit(ShutdownXLOG, 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Initialize the relation cache and the system catalog caches. Note that
|
|
|
|
* no catalog access happens here; we only set up the hashtable structure.
|
|
|
|
* We must do this before starting a transaction because transaction abort
|
|
|
|
* would try to touch these hashtables.
|
|
|
|
*/
|
|
|
|
RelationCacheInitialize();
|
|
|
|
InitCatalogCache();
|
|
|
|
InitPlanCache();
|
|
|
|
|
|
|
|
/* Initialize portal manager */
|
|
|
|
EnablePortalManager();
|
|
|
|
|
|
|
|
/* Initialize stats collection --- must happen before first xact */
|
|
|
|
if (!bootstrap)
|
|
|
|
pgstat_initialize();
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Load relcache entries for the shared system catalogs. This must create
|
|
|
|
* at least entries for pg_database and catalogs used for authentication.
|
|
|
|
*/
|
|
|
|
RelationCacheInitializePhase2();
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Set up process-exit callback to do pre-shutdown cleanup. This is the
|
|
|
|
* first before_shmem_exit callback we register; thus, this will be the
|
|
|
|
* last thing we do before low-level modules like the buffer manager begin
|
|
|
|
* to close down. We need to have this in place before we begin our first
|
|
|
|
* transaction --- if we fail during the initialization transaction, as is
|
|
|
|
* entirely possible, we need the AbortTransaction call to clean up.
|
|
|
|
*/
|
|
|
|
before_shmem_exit(ShutdownPostgres, 0);
|
|
|
|
|
|
|
|
/* The autovacuum launcher is done here */
|
|
|
|
if (IsAutoVacuumLauncherProcess())
|
|
|
|
return;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Start a new transaction here before first access to db, and get a
|
|
|
|
* snapshot. We don't have a use for the snapshot itself, but we're
|
|
|
|
* interested in the secondary effect that it sets RecentGlobalXmin. (This
|
|
|
|
* is critical for anything that reads heap pages, because HOT may decide
|
|
|
|
* to prune them even if the process doesn't attempt to modify any
|
|
|
|
* tuples.)
|
|
|
|
*/
|
|
|
|
if (!bootstrap)
|
|
|
|
{
|
|
|
|
/* statement_timestamp must be set for timeouts to work correctly */
|
|
|
|
SetCurrentStatementStartTimestamp();
|
|
|
|
StartTransactionCommand();
|
Fix issues with checks for unsupported transaction states in Hot Standby.
The GUC check hooks for transaction_read_only and transaction_isolation
tried to check RecoveryInProgress(), so as to disallow setting read/write
mode or serializable isolation level (respectively) in hot standby
sessions. However, GUC check hooks can be called in many situations where
we're not connected to shared memory at all, resulting in a crash in
RecoveryInProgress(). Among other cases, this results in EXEC_BACKEND
builds crashing during child process start if default_transaction_isolation
is serializable, as reported by Heikki Linnakangas. Protect those calls
by silently allowing any setting when not inside a transaction; which is
okay anyway since these GUCs are always reset at start of transaction.
Also, add a check to GetSerializableTransactionSnapshot() to complain
if we are in hot standby. We need that check despite the one in
check_XactIsoLevel() because default_transaction_isolation could be
serializable. We don't want to complain any sooner than this in such
cases, since that would prevent running transactions at all in such a
state; but a transaction can be run, if SET TRANSACTION ISOLATION is done
before setting a snapshot. Per report some months ago from Robert Haas.
Back-patch to 9.1, since these problems were introduced by the SSI patch.
Kevin Grittner and Tom Lane, with ideas from Heikki Linnakangas
13 years ago
|
|
|
|
|
|
|
/*
|
|
|
|
* transaction_isolation will have been set to the default by the
|
|
|
|
* above. If the default is "serializable", and we are in hot
|
|
|
|
* standby, we will fail if we don't change it to something lower.
|
|
|
|
* Fortunately, "read committed" is plenty good enough.
|
|
|
|
*/
|
|
|
|
XactIsoLevel = XACT_READ_COMMITTED;
|
|
|
|
|
|
|
|
(void) GetTransactionSnapshot();
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Perform client authentication if necessary, then figure out our
|
|
|
|
* postgres user ID, and see if we are a superuser.
|
|
|
|
*
|
|
|
|
* In standalone mode and in autovacuum worker processes, we use a fixed
|
|
|
|
* ID, otherwise we figure it out from the authenticated user name.
|
|
|
|
*/
|
|
|
|
if (bootstrap || IsAutoVacuumWorkerProcess())
|
|
|
|
{
|
|
|
|
InitializeSessionUserIdStandalone();
|
|
|
|
am_superuser = true;
|
|
|
|
}
|
|
|
|
else if (!IsUnderPostmaster)
|
|
|
|
{
|
|
|
|
InitializeSessionUserIdStandalone();
|
|
|
|
am_superuser = true;
|
|
|
|
if (!ThereIsAtLeastOneRole())
|
|
|
|
ereport(WARNING,
|
|
|
|
(errcode(ERRCODE_UNDEFINED_OBJECT),
|
|
|
|
errmsg("no roles are defined in this database system"),
|
|
|
|
errhint("You should immediately run CREATE USER \"%s\" SUPERUSER;.",
|
|
|
|
username)));
|
|
|
|
}
|
Background worker processes
Background workers are postmaster subprocesses that run arbitrary
user-specified code. They can request shared memory access as well as
backend database connections; or they can just use plain libpq frontend
database connections.
Modules listed in shared_preload_libraries can register background
workers in their _PG_init() function; this is early enough that it's not
necessary to provide an extra GUC option, because the necessary extra
resources can be allocated early on. Modules can install more than one
bgworker, if necessary.
Care is taken that these extra processes do not interfere with other
postmaster tasks: only one such process is started on each ServerLoop
iteration. This means a large number of them could be waiting to be
started up and postmaster is still able to quickly service external
connection requests. Also, shutdown sequence should not be impacted by
a worker process that's reasonably well behaved (i.e. promptly responds
to termination signals.)
The current implementation lets worker processes specify their start
time, i.e. at what point in the server startup process they are to be
started: right after postmaster start (in which case they mustn't ask
for shared memory access), when consistent state has been reached
(useful during recovery in a HOT standby server), or when recovery has
terminated (i.e. when normal backends are allowed).
In case of a bgworker crash, actions to take depend on registration
data: if shared memory was requested, then all other connections are
taken down (as well as other bgworkers), just like it were a regular
backend crashing. The bgworker itself is restarted, too, within a
configurable timeframe (which can be configured to be never).
More features to add to this framework can be imagined without much
effort, and have been discussed, but this seems good enough as a useful
unit already.
An elementary sample module is supplied.
Author: Álvaro Herrera
This patch is loosely based on prior patches submitted by KaiGai Kohei,
and unsubmitted code by Simon Riggs.
Reviewed by: KaiGai Kohei, Markus Wanner, Andres Freund,
Heikki Linnakangas, Simon Riggs, Amit Kapila
13 years ago
|
|
|
else if (IsBackgroundWorker)
|
|
|
|
{
|
|
|
|
if (username == NULL)
|
|
|
|
{
|
|
|
|
InitializeSessionUserIdStandalone();
|
|
|
|
am_superuser = true;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
InitializeSessionUserId(username);
|
|
|
|
am_superuser = superuser();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* normal multiuser case */
|
|
|
|
Assert(MyProcPort != NULL);
|
|
|
|
PerformAuthentication(MyProcPort);
|
|
|
|
InitializeSessionUserId(username);
|
|
|
|
am_superuser = superuser();
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If we're trying to shut down, only superusers can connect, and new
|
|
|
|
* replication connections are not allowed.
|
|
|
|
*/
|
|
|
|
if ((!am_superuser || am_walsender) &&
|
|
|
|
MyProcPort != NULL &&
|
|
|
|
MyProcPort->canAcceptConnections == CAC_WAITBACKUP)
|
|
|
|
{
|
|
|
|
if (am_walsender)
|
|
|
|
ereport(FATAL,
|
|
|
|
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
|
|
|
|
errmsg("new replication connections are not allowed during database shutdown")));
|
|
|
|
else
|
|
|
|
ereport(FATAL,
|
|
|
|
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
|
|
|
|
errmsg("must be superuser to connect during database shutdown")));
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Binary upgrades only allowed super-user connections
|
|
|
|
*/
|
|
|
|
if (IsBinaryUpgrade && !am_superuser)
|
|
|
|
{
|
|
|
|
ereport(FATAL,
|
|
|
|
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
|
|
|
|
errmsg("must be superuser to connect in binary upgrade mode")));
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The last few connections slots are reserved for superusers. Although
|
|
|
|
* replication connections currently require superuser privileges, we
|
|
|
|
* don't allow them to consume the reserved slots, which are intended for
|
|
|
|
* interactive use.
|
|
|
|
*/
|
|
|
|
if ((!am_superuser || am_walsender) &&
|
|
|
|
ReservedBackends > 0 &&
|
|
|
|
!HaveNFreeProcs(ReservedBackends))
|
|
|
|
ereport(FATAL,
|
|
|
|
(errcode(ERRCODE_TOO_MANY_CONNECTIONS),
|
|
|
|
errmsg("remaining connection slots are reserved for non-replication superuser connections")));
|
|
|
|
|
|
|
|
/* Check replication permissions needed for walsender processes. */
|
|
|
|
if (am_walsender)
|
|
|
|
{
|
|
|
|
Assert(!bootstrap);
|
|
|
|
|
|
|
|
if (!superuser() && !has_rolreplication(GetUserId()))
|
|
|
|
ereport(FATAL,
|
|
|
|
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
|
|
|
|
errmsg("must be superuser or replication role to start walsender")));
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If this is a plain walsender only supporting physical replication, we
|
|
|
|
* don't want to connect to any particular database. Just finish the
|
|
|
|
* backend startup by processing any options from the startup packet, and
|
|
|
|
* we're done.
|
|
|
|
*/
|
|
|
|
if (am_walsender && !am_db_walsender)
|
|
|
|
{
|
|
|
|
/* process any options passed in the startup packet */
|
|
|
|
if (MyProcPort != NULL)
|
|
|
|
process_startup_options(MyProcPort, am_superuser);
|
|
|
|
|
|
|
|
/* Apply PostAuthDelay as soon as we've read all options */
|
|
|
|
if (PostAuthDelay > 0)
|
|
|
|
pg_usleep(PostAuthDelay * 1000000L);
|
|
|
|
|
|
|
|
/* initialize client encoding */
|
|
|
|
InitializeClientEncoding();
|
|
|
|
|
|
|
|
/* report this backend in the PgBackendStatus array */
|
|
|
|
pgstat_bestart();
|
|
|
|
|
|
|
|
/* close the transaction we started above */
|
|
|
|
CommitTransactionCommand();
|
|
|
|
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Set up the global variables holding database id and default tablespace.
|
|
|
|
* But note we won't actually try to touch the database just yet.
|
|
|
|
*
|
|
|
|
* We take a shortcut in the bootstrap case, otherwise we have to look up
|
|
|
|
* the db's entry in pg_database.
|
|
|
|
*/
|
|
|
|
if (bootstrap)
|
|
|
|
{
|
|
|
|
MyDatabaseId = TemplateDbOid;
|
|
|
|
MyDatabaseTableSpace = DEFAULTTABLESPACE_OID;
|
|
|
|
}
|
|
|
|
else if (in_dbname != NULL)
|
|
|
|
{
|
|
|
|
HeapTuple tuple;
|
|
|
|
Form_pg_database dbform;
|
|
|
|
|
|
|
|
tuple = GetDatabaseTuple(in_dbname);
|
|
|
|
if (!HeapTupleIsValid(tuple))
|
|
|
|
ereport(FATAL,
|
|
|
|
(errcode(ERRCODE_UNDEFINED_DATABASE),
|
|
|
|
errmsg("database \"%s\" does not exist", in_dbname)));
|
|
|
|
dbform = (Form_pg_database) GETSTRUCT(tuple);
|
|
|
|
MyDatabaseId = HeapTupleGetOid(tuple);
|
|
|
|
MyDatabaseTableSpace = dbform->dattablespace;
|
|
|
|
/* take database name from the caller, just for paranoia */
|
|
|
|
strlcpy(dbname, in_dbname, sizeof(dbname));
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* caller specified database by OID */
|
|
|
|
HeapTuple tuple;
|
|
|
|
Form_pg_database dbform;
|
|
|
|
|
|
|
|
tuple = GetDatabaseTupleByOid(dboid);
|
|
|
|
if (!HeapTupleIsValid(tuple))
|
|
|
|
ereport(FATAL,
|
|
|
|
(errcode(ERRCODE_UNDEFINED_DATABASE),
|
|
|
|
errmsg("database %u does not exist", dboid)));
|
|
|
|
dbform = (Form_pg_database) GETSTRUCT(tuple);
|
|
|
|
MyDatabaseId = HeapTupleGetOid(tuple);
|
|
|
|
MyDatabaseTableSpace = dbform->dattablespace;
|
|
|
|
Assert(MyDatabaseId == dboid);
|
|
|
|
strlcpy(dbname, NameStr(dbform->datname), sizeof(dbname));
|
|
|
|
/* pass the database name back to the caller */
|
|
|
|
if (out_dbname)
|
|
|
|
strcpy(out_dbname, dbname);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Now we can mark our PGPROC entry with the database ID */
|
|
|
|
/* (We assume this is an atomic store so no lock is needed) */
|
|
|
|
MyProc->databaseId = MyDatabaseId;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Now, take a writer's lock on the database we are trying to connect to.
|
|
|
|
* If there is a concurrently running DROP DATABASE on that database, this
|
|
|
|
* will block us until it finishes (and has committed its update of
|
|
|
|
* pg_database).
|
|
|
|
*
|
|
|
|
* Note that the lock is not held long, only until the end of this startup
|
|
|
|
* transaction. This is OK since we are already advertising our use of
|
|
|
|
* the database in the PGPROC array; anyone trying a DROP DATABASE after
|
|
|
|
* this point will see us there.
|
|
|
|
*
|
|
|
|
* Note: use of RowExclusiveLock here is reasonable because we envision
|
|
|
|
* our session as being a concurrent writer of the database. If we had a
|
|
|
|
* way of declaring a session as being guaranteed-read-only, we could use
|
|
|
|
* AccessShareLock for such sessions and thereby not conflict against
|
|
|
|
* CREATE DATABASE.
|
|
|
|
*/
|
|
|
|
if (!bootstrap)
|
|
|
|
LockSharedObject(DatabaseRelationId, MyDatabaseId, 0,
|
|
|
|
RowExclusiveLock);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Recheck pg_database to make sure the target database hasn't gone away.
|
|
|
|
* If there was a concurrent DROP DATABASE, this ensures we will die
|
|
|
|
* cleanly without creating a mess.
|
|
|
|
*/
|
|
|
|
if (!bootstrap)
|
|
|
|
{
|
|
|
|
HeapTuple tuple;
|
|
|
|
|
|
|
|
tuple = GetDatabaseTuple(dbname);
|
|
|
|
if (!HeapTupleIsValid(tuple) ||
|
|
|
|
MyDatabaseId != HeapTupleGetOid(tuple) ||
|
|
|
|
MyDatabaseTableSpace != ((Form_pg_database) GETSTRUCT(tuple))->dattablespace)
|
|
|
|
ereport(FATAL,
|
|
|
|
(errcode(ERRCODE_UNDEFINED_DATABASE),
|
|
|
|
errmsg("database \"%s\" does not exist", dbname),
|
|
|
|
errdetail("It seems to have just been dropped or renamed.")));
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Now we should be able to access the database directory safely. Verify
|
|
|
|
* it's there and looks reasonable.
|
|
|
|
*/
|
|
|
|
fullpath = GetDatabasePath(MyDatabaseId, MyDatabaseTableSpace);
|
|
|
|
|
|
|
|
if (!bootstrap)
|
|
|
|
{
|
|
|
|
if (access(fullpath, F_OK) == -1)
|
|
|
|
{
|
|
|
|
if (errno == ENOENT)
|
|
|
|
ereport(FATAL,
|
|
|
|
(errcode(ERRCODE_UNDEFINED_DATABASE),
|
|
|
|
errmsg("database \"%s\" does not exist",
|
|
|
|
dbname),
|
|
|
|
errdetail("The database subdirectory \"%s\" is missing.",
|
|
|
|
fullpath)));
|
|
|
|
else
|
|
|
|
ereport(FATAL,
|
|
|
|
(errcode_for_file_access(),
|
|
|
|
errmsg("could not access directory \"%s\": %m",
|
|
|
|
fullpath)));
|
|
|
|
}
|
|
|
|
|
|
|
|
ValidatePgVersion(fullpath);
|
|
|
|
}
|
|
|
|
|
|
|
|
SetDatabasePath(fullpath);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* It's now possible to do real access to the system catalogs.
|
|
|
|
*
|
|
|
|
* Load relcache entries for the system catalogs. This must create at
|
|
|
|
* least the minimum set of "nailed-in" cache entries.
|
|
|
|
*/
|
|
|
|
RelationCacheInitializePhase3();
|
|
|
|
|
|
|
|
/* set up ACL framework (so CheckMyDatabase can check permissions) */
|
|
|
|
initialize_acl();
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Re-read the pg_database row for our database, check permissions and set
|
|
|
|
* up database-specific GUC settings. We can't do this until all the
|
|
|
|
* database-access infrastructure is up. (Also, it wants to know if the
|
|
|
|
* user is a superuser, so the above stuff has to happen first.)
|
|
|
|
*/
|
|
|
|
if (!bootstrap)
|
|
|
|
CheckMyDatabase(dbname, am_superuser);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Now process any command-line switches and any additional GUC variable
|
|
|
|
* settings passed in the startup packet. We couldn't do this before
|
|
|
|
* because we didn't know if client is a superuser.
|
|
|
|
*/
|
|
|
|
if (MyProcPort != NULL)
|
|
|
|
process_startup_options(MyProcPort, am_superuser);
|
|
|
|
|
|
|
|
/* Process pg_db_role_setting options */
|
|
|
|
process_settings(MyDatabaseId, GetSessionUserId());
|
|
|
|
|
|
|
|
/* Apply PostAuthDelay as soon as we've read all options */
|
|
|
|
if (PostAuthDelay > 0)
|
|
|
|
pg_usleep(PostAuthDelay * 1000000L);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Initialize various default states that can't be set up until we've
|
|
|
|
* selected the active user and gotten the right GUC settings.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* set default namespace search path */
|
|
|
|
InitializeSearchPath();
|
|
|
|
|
|
|
|
/* initialize client encoding */
|
|
|
|
InitializeClientEncoding();
|
|
|
|
|
|
|
|
/* report this backend in the PgBackendStatus array */
|
|
|
|
if (!bootstrap)
|
|
|
|
pgstat_bestart();
|
|
|
|
|
|
|
|
/* close the transaction we started above */
|
|
|
|
if (!bootstrap)
|
|
|
|
CommitTransactionCommand();
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Process any command-line switches and any additional GUC variable
|
|
|
|
* settings passed in the startup packet.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
process_startup_options(Port *port, bool am_superuser)
|
|
|
|
{
|
|
|
|
GucContext gucctx;
|
|
|
|
ListCell *gucopts;
|
|
|
|
|
|
|
|
gucctx = am_superuser ? PGC_SUSET : PGC_BACKEND;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* First process any command-line switches that were included in the
|
|
|
|
* startup packet, if we are in a regular backend.
|
|
|
|
*/
|
|
|
|
if (port->cmdline_options != NULL)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* The maximum possible number of commandline arguments that could
|
|
|
|
* come from port->cmdline_options is (strlen + 1) / 2; see
|
|
|
|
* pg_split_opts().
|
|
|
|
*/
|
|
|
|
char **av;
|
|
|
|
int maxac;
|
|
|
|
int ac;
|
|
|
|
|
|
|
|
maxac = 2 + (strlen(port->cmdline_options) + 1) / 2;
|
|
|
|
|
|
|
|
av = (char **) palloc(maxac * sizeof(char *));
|
|
|
|
ac = 0;
|
|
|
|
|
|
|
|
av[ac++] = "postgres";
|
|
|
|
|
|
|
|
pg_split_opts(av, &ac, port->cmdline_options);
|
|
|
|
|
|
|
|
av[ac] = NULL;
|
|
|
|
|
|
|
|
Assert(ac < maxac);
|
|
|
|
|
|
|
|
(void) process_postgres_switches(ac, av, gucctx, NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Process any additional GUC variable settings passed in startup packet.
|
|
|
|
* These are handled exactly like command-line variables.
|
|
|
|
*/
|
|
|
|
gucopts = list_head(port->guc_options);
|
|
|
|
while (gucopts)
|
|
|
|
{
|
|
|
|
char *name;
|
|
|
|
char *value;
|
|
|
|
|
|
|
|
name = lfirst(gucopts);
|
|
|
|
gucopts = lnext(gucopts);
|
|
|
|
|
|
|
|
value = lfirst(gucopts);
|
|
|
|
gucopts = lnext(gucopts);
|
|
|
|
|
|
|
|
SetConfigOption(name, value, gucctx, PGC_S_CLIENT);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Load GUC settings from pg_db_role_setting.
|
|
|
|
*
|
|
|
|
* We try specific settings for the database/role combination, as well as
|
|
|
|
* general for this database and for this user.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
process_settings(Oid databaseid, Oid roleid)
|
|
|
|
{
|
|
|
|
Relation relsetting;
|
Use an MVCC snapshot, rather than SnapshotNow, for catalog scans.
SnapshotNow scans have the undesirable property that, in the face of
concurrent updates, the scan can fail to see either the old or the new
versions of the row. In many cases, we work around this by requiring
DDL operations to hold AccessExclusiveLock on the object being
modified; in some cases, the existing locking is inadequate and random
failures occur as a result. This commit doesn't change anything
related to locking, but will hopefully pave the way to allowing lock
strength reductions in the future.
The major issue has held us back from making this change in the past
is that taking an MVCC snapshot is significantly more expensive than
using a static special snapshot such as SnapshotNow. However, testing
of various worst-case scenarios reveals that this problem is not
severe except under fairly extreme workloads. To mitigate those
problems, we avoid retaking the MVCC snapshot for each new scan;
instead, we take a new snapshot only when invalidation messages have
been processed. The catcache machinery already requires that
invalidation messages be sent before releasing the related heavyweight
lock; else other backends might rely on locally-cached data rather
than scanning the catalog at all. Thus, making snapshot reuse
dependent on the same guarantees shouldn't break anything that wasn't
already subtly broken.
Patch by me. Review by Michael Paquier and Andres Freund.
12 years ago
|
|
|
Snapshot snapshot;
|
|
|
|
|
|
|
|
if (!IsUnderPostmaster)
|
|
|
|
return;
|
|
|
|
|
|
|
|
relsetting = heap_open(DbRoleSettingRelationId, AccessShareLock);
|
|
|
|
|
Use an MVCC snapshot, rather than SnapshotNow, for catalog scans.
SnapshotNow scans have the undesirable property that, in the face of
concurrent updates, the scan can fail to see either the old or the new
versions of the row. In many cases, we work around this by requiring
DDL operations to hold AccessExclusiveLock on the object being
modified; in some cases, the existing locking is inadequate and random
failures occur as a result. This commit doesn't change anything
related to locking, but will hopefully pave the way to allowing lock
strength reductions in the future.
The major issue has held us back from making this change in the past
is that taking an MVCC snapshot is significantly more expensive than
using a static special snapshot such as SnapshotNow. However, testing
of various worst-case scenarios reveals that this problem is not
severe except under fairly extreme workloads. To mitigate those
problems, we avoid retaking the MVCC snapshot for each new scan;
instead, we take a new snapshot only when invalidation messages have
been processed. The catcache machinery already requires that
invalidation messages be sent before releasing the related heavyweight
lock; else other backends might rely on locally-cached data rather
than scanning the catalog at all. Thus, making snapshot reuse
dependent on the same guarantees shouldn't break anything that wasn't
already subtly broken.
Patch by me. Review by Michael Paquier and Andres Freund.
12 years ago
|
|
|
/* read all the settings under the same snapsot for efficiency */
|
|
|
|
snapshot = RegisterSnapshot(GetCatalogSnapshot(DbRoleSettingRelationId));
|
|
|
|
|
|
|
|
/* Later settings are ignored if set earlier. */
|
Use an MVCC snapshot, rather than SnapshotNow, for catalog scans.
SnapshotNow scans have the undesirable property that, in the face of
concurrent updates, the scan can fail to see either the old or the new
versions of the row. In many cases, we work around this by requiring
DDL operations to hold AccessExclusiveLock on the object being
modified; in some cases, the existing locking is inadequate and random
failures occur as a result. This commit doesn't change anything
related to locking, but will hopefully pave the way to allowing lock
strength reductions in the future.
The major issue has held us back from making this change in the past
is that taking an MVCC snapshot is significantly more expensive than
using a static special snapshot such as SnapshotNow. However, testing
of various worst-case scenarios reveals that this problem is not
severe except under fairly extreme workloads. To mitigate those
problems, we avoid retaking the MVCC snapshot for each new scan;
instead, we take a new snapshot only when invalidation messages have
been processed. The catcache machinery already requires that
invalidation messages be sent before releasing the related heavyweight
lock; else other backends might rely on locally-cached data rather
than scanning the catalog at all. Thus, making snapshot reuse
dependent on the same guarantees shouldn't break anything that wasn't
already subtly broken.
Patch by me. Review by Michael Paquier and Andres Freund.
12 years ago
|
|
|
ApplySetting(snapshot, databaseid, roleid, relsetting, PGC_S_DATABASE_USER);
|
|
|
|
ApplySetting(snapshot, InvalidOid, roleid, relsetting, PGC_S_USER);
|
|
|
|
ApplySetting(snapshot, databaseid, InvalidOid, relsetting, PGC_S_DATABASE);
|
|
|
|
ApplySetting(snapshot, InvalidOid, InvalidOid, relsetting, PGC_S_GLOBAL);
|
|
|
|
|
Use an MVCC snapshot, rather than SnapshotNow, for catalog scans.
SnapshotNow scans have the undesirable property that, in the face of
concurrent updates, the scan can fail to see either the old or the new
versions of the row. In many cases, we work around this by requiring
DDL operations to hold AccessExclusiveLock on the object being
modified; in some cases, the existing locking is inadequate and random
failures occur as a result. This commit doesn't change anything
related to locking, but will hopefully pave the way to allowing lock
strength reductions in the future.
The major issue has held us back from making this change in the past
is that taking an MVCC snapshot is significantly more expensive than
using a static special snapshot such as SnapshotNow. However, testing
of various worst-case scenarios reveals that this problem is not
severe except under fairly extreme workloads. To mitigate those
problems, we avoid retaking the MVCC snapshot for each new scan;
instead, we take a new snapshot only when invalidation messages have
been processed. The catcache machinery already requires that
invalidation messages be sent before releasing the related heavyweight
lock; else other backends might rely on locally-cached data rather
than scanning the catalog at all. Thus, making snapshot reuse
dependent on the same guarantees shouldn't break anything that wasn't
already subtly broken.
Patch by me. Review by Michael Paquier and Andres Freund.
12 years ago
|
|
|
UnregisterSnapshot(snapshot);
|
|
|
|
heap_close(relsetting, AccessShareLock);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Backend-shutdown callback. Do cleanup that we want to be sure happens
|
|
|
|
* before all the supporting modules begin to nail their doors shut via
|
|
|
|
* their own callbacks.
|
|
|
|
*
|
|
|
|
* User-level cleanup, such as temp-relation removal and UNLISTEN, happens
|
|
|
|
* via separate callbacks that execute before this one. We don't combine the
|
|
|
|
* callbacks because we still want this one to happen if the user-level
|
|
|
|
* cleanup fails.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
ShutdownPostgres(int code, Datum arg)
|
|
|
|
{
|
|
|
|
/* Make sure we've killed any active transaction */
|
|
|
|
AbortOutOfAnyTransaction();
|
|
|
|
|
|
|
|
/*
|
|
|
|
* User locks are not released by transaction end, so be sure to release
|
|
|
|
* them explicitly.
|
|
|
|
*/
|
|
|
|
LockReleaseAll(USER_LOCKMETHOD, true);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
Introduce timeout handling framework
Management of timeouts was getting a little cumbersome; what we
originally had was more than enough back when we were only concerned
about deadlocks and query cancel; however, when we added timeouts for
standby processes, the code got considerably messier. Since there are
plans to add more complex timeouts, this seems a good time to introduce
a central timeout handling module.
External modules register their timeout handlers during process
initialization, and later enable and disable them as they see fit using
a simple API; timeout.c is in charge of keeping track of which timeouts
are in effect at any time, installing a common SIGALRM signal handler,
and calling setitimer() as appropriate to ensure timely firing of
external handlers.
timeout.c additionally supports pluggable modules to add their own
timeouts, though this capability isn't exercised anywhere yet.
Additionally, as of this commit, walsender processes are aware of
timeouts; we had a preexisting bug there that made those ignore SIGALRM,
thus being subject to unhandled deadlocks, particularly during the
authentication phase. This has already been fixed in back branches in
commit 0bf8eb2a, which see for more details.
Main author: Zoltán Böszörményi
Some review and cleanup by Álvaro Herrera
Extensive reworking by Tom Lane
13 years ago
|
|
|
/*
|
|
|
|
* STATEMENT_TIMEOUT handler: trigger a query-cancel interrupt.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
StatementTimeoutHandler(void)
|
|
|
|
{
|
|
|
|
#ifdef HAVE_SETSID
|
|
|
|
/* try to signal whole process group */
|
|
|
|
kill(-MyProcPid, SIGINT);
|
|
|
|
#endif
|
|
|
|
kill(MyProcPid, SIGINT);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* LOCK_TIMEOUT handler: trigger a query-cancel interrupt.
|
|
|
|
*
|
|
|
|
* This is identical to StatementTimeoutHandler, but since it's so short,
|
|
|
|
* we might as well keep the two functions separate for clarity.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
LockTimeoutHandler(void)
|
|
|
|
{
|
|
|
|
#ifdef HAVE_SETSID
|
|
|
|
/* try to signal whole process group */
|
|
|
|
kill(-MyProcPid, SIGINT);
|
|
|
|
#endif
|
|
|
|
kill(MyProcPid, SIGINT);
|
|
|
|
}
|
|
|
|
|
Introduce timeout handling framework
Management of timeouts was getting a little cumbersome; what we
originally had was more than enough back when we were only concerned
about deadlocks and query cancel; however, when we added timeouts for
standby processes, the code got considerably messier. Since there are
plans to add more complex timeouts, this seems a good time to introduce
a central timeout handling module.
External modules register their timeout handlers during process
initialization, and later enable and disable them as they see fit using
a simple API; timeout.c is in charge of keeping track of which timeouts
are in effect at any time, installing a common SIGALRM signal handler,
and calling setitimer() as appropriate to ensure timely firing of
external handlers.
timeout.c additionally supports pluggable modules to add their own
timeouts, though this capability isn't exercised anywhere yet.
Additionally, as of this commit, walsender processes are aware of
timeouts; we had a preexisting bug there that made those ignore SIGALRM,
thus being subject to unhandled deadlocks, particularly during the
authentication phase. This has already been fixed in back branches in
commit 0bf8eb2a, which see for more details.
Main author: Zoltán Böszörményi
Some review and cleanup by Álvaro Herrera
Extensive reworking by Tom Lane
13 years ago
|
|
|
|
|
|
|
/*
|
|
|
|
* Returns true if at least one role is defined in this database cluster.
|
|
|
|
*/
|
|
|
|
static bool
|
|
|
|
ThereIsAtLeastOneRole(void)
|
|
|
|
{
|
|
|
|
Relation pg_authid_rel;
|
|
|
|
HeapScanDesc scan;
|
|
|
|
bool result;
|
|
|
|
|
|
|
|
pg_authid_rel = heap_open(AuthIdRelationId, AccessShareLock);
|
|
|
|
|
Use an MVCC snapshot, rather than SnapshotNow, for catalog scans.
SnapshotNow scans have the undesirable property that, in the face of
concurrent updates, the scan can fail to see either the old or the new
versions of the row. In many cases, we work around this by requiring
DDL operations to hold AccessExclusiveLock on the object being
modified; in some cases, the existing locking is inadequate and random
failures occur as a result. This commit doesn't change anything
related to locking, but will hopefully pave the way to allowing lock
strength reductions in the future.
The major issue has held us back from making this change in the past
is that taking an MVCC snapshot is significantly more expensive than
using a static special snapshot such as SnapshotNow. However, testing
of various worst-case scenarios reveals that this problem is not
severe except under fairly extreme workloads. To mitigate those
problems, we avoid retaking the MVCC snapshot for each new scan;
instead, we take a new snapshot only when invalidation messages have
been processed. The catcache machinery already requires that
invalidation messages be sent before releasing the related heavyweight
lock; else other backends might rely on locally-cached data rather
than scanning the catalog at all. Thus, making snapshot reuse
dependent on the same guarantees shouldn't break anything that wasn't
already subtly broken.
Patch by me. Review by Michael Paquier and Andres Freund.
12 years ago
|
|
|
scan = heap_beginscan_catalog(pg_authid_rel, 0, NULL);
|
|
|
|
result = (heap_getnext(scan, ForwardScanDirection) != NULL);
|
|
|
|
|
|
|
|
heap_endscan(scan);
|
|
|
|
heap_close(pg_authid_rel, AccessShareLock);
|
|
|
|
|
|
|
|
return result;
|
|
|
|
}
|