mirror of https://github.com/postgres/postgres
parent
5d5f1a79e6
commit
80f6c35833
@ -1,22 +0,0 @@ |
|||||||
PROGRAM = pg_autovacuum
|
|
||||||
OBJS = pg_autovacuum.o dllist.o
|
|
||||||
|
|
||||||
PG_CPPFLAGS = -I$(libpq_srcdir) -DFRONTEND
|
|
||||||
PG_LIBS = $(libpq_pgport)
|
|
||||||
|
|
||||||
DOCS = README.pg_autovacuum
|
|
||||||
|
|
||||||
EXTRA_CLEAN = dllist.c
|
|
||||||
|
|
||||||
ifdef USE_PGXS |
|
||||||
PGXS = $(shell pg_config --pgxs)
|
|
||||||
include $(PGXS) |
|
||||||
else |
|
||||||
subdir = contrib/pg_autovacuum
|
|
||||||
top_builddir = ../..
|
|
||||||
include $(top_builddir)/src/Makefile.global |
|
||||||
include $(top_srcdir)/contrib/contrib-global.mk |
|
||||||
endif |
|
||||||
|
|
||||||
dllist.c: $(top_srcdir)/src/backend/lib/dllist.c |
|
||||||
rm -f $@ && $(LN_S) $< .
|
|
@ -1,246 +0,0 @@ |
|||||||
pg_autovacuum README |
|
||||||
-------------------- |
|
||||||
|
|
||||||
pg_autovacuum is a libpq client program that monitors all the |
|
||||||
databases associated with a PostgreSQL server. It uses the statistics |
|
||||||
collector to monitor insert, update and delete activity. |
|
||||||
|
|
||||||
When a table exceeds a insert or delete threshold (for more detail on |
|
||||||
thresholds, see "Vacuum and Analyze" below) then that table will be |
|
||||||
vacuumed and/or analyzed. |
|
||||||
|
|
||||||
This allows PostgreSQL to keep the FSM (Free Space Map) and table |
|
||||||
statistics up to date, and eliminates the need to schedule periodic |
|
||||||
vacuums. |
|
||||||
|
|
||||||
The primary benefit of pg_autovacuum is that the FSM and table |
|
||||||
statistic information are updated more nearly as frequently as needed. |
|
||||||
When a table is actively changing, pg_autovacuum will perform the |
|
||||||
VACUUMs and ANALYZEs that such a table needs, whereas if a table |
|
||||||
remains static, no cycles will be wasted performing this |
|
||||||
unnecessarily. |
|
||||||
|
|
||||||
A secondary benefit of pg_autovacuum is that it ensures that a |
|
||||||
database wide vacuum is performed prior to XID wraparound. This is an |
|
||||||
important, if rare, problem, as failing to do so can result in major |
|
||||||
data loss. (See the section in the _Administrator's Guide_ entitled |
|
||||||
"Preventing transaction ID wraparound failures" for more details.) |
|
||||||
|
|
||||||
KNOWN ISSUES: |
|
||||||
------------- |
|
||||||
|
|
||||||
pg_autovacuum has been tested under Redhat Linux (by me) and Debian |
|
||||||
GNU/Linux, Solaris, and AIX (by Christopher B. Browne) and all known |
|
||||||
bugs have been resolved. Please report any problems to the hackers |
|
||||||
list. |
|
||||||
|
|
||||||
pg_autovacuum requires that the statistics system be enabled and |
|
||||||
reporting row level stats. The overhead of the stats system has been |
|
||||||
shown to be significant under certain workloads. For instance, a |
|
||||||
tight loop of queries performing "select 1" was found to run nearly |
|
||||||
30% slower when row-level stats were enabled. However, in practice, |
|
||||||
with more realistic workloads, the stats system overhead is usually |
|
||||||
nominal. |
|
||||||
|
|
||||||
pg_autovacuum does not get started automatically by either the |
|
||||||
postmaster or by pg_ctl. Similarly, when the postmaster exits, no one |
|
||||||
tells pg_autovacuum. The result of that is that at the start of the |
|
||||||
next loop, pg_autovacuum will fail to connect to the server and |
|
||||||
exit(). Any time it fails to connect pg_autovacuum exit()s. |
|
||||||
|
|
||||||
While pg_autovacuum can manage vacuums for as many databases as you |
|
||||||
may have tied to a particular PostgreSQL postmaster, it can only |
|
||||||
connect to a single PostgreSQL postmaster. Thus, if you have multiple |
|
||||||
postmasters on a particular host, you will need multiple pg_autovacuum |
|
||||||
instances, and they have no way, at present, to coordinate between one |
|
||||||
another to ensure that they do not concurrently vacuum big tables. |
|
||||||
|
|
||||||
When installed as a service under Windows, there is currently no way to |
|
||||||
know the name of the PostgreSQL server service (if there even is one) |
|
||||||
so it is not possible to specify a startup dependency. It is therefore |
|
||||||
possible for pg_autovacuum to start before the server. |
|
||||||
|
|
||||||
When installed as a service under Windows, if the -P option is used to |
|
||||||
specify the connection password, this option (and the password) is |
|
||||||
stored in plain text in the registry. |
|
||||||
|
|
||||||
TODO: |
|
||||||
----- |
|
||||||
|
|
||||||
At present, there are no sample scripts to automatically start up |
|
||||||
pg_autovacuum along with the database. It would be desirable to have |
|
||||||
a SysV script to start up pg_autovacuum after PostgreSQL has been |
|
||||||
started. |
|
||||||
|
|
||||||
Some users have expressed interest in making pg_autovacuum more |
|
||||||
configurable so that certain tables known to be inactive could be |
|
||||||
excluded from being vacuumed. It would probably make sense to |
|
||||||
introduce this sort of functionality by providing arguments to specify |
|
||||||
the database and schema in which to find a configuration table. |
|
||||||
|
|
||||||
It would also be desirable for the daemon to monitor how busy the |
|
||||||
system is, with a view to deferring vacuums until there is less other |
|
||||||
activity. |
|
||||||
|
|
||||||
INSTALL: |
|
||||||
-------- |
|
||||||
|
|
||||||
As of postgresql v7.4 pg_autovacuum is included in the main source |
|
||||||
tree under contrib. Therefore you merely need to "make && make |
|
||||||
install" (similar to most other contrib modules) and it will be |
|
||||||
installed for you. |
|
||||||
|
|
||||||
If you are using an earlier version of PostgreSQL, uncompress the |
|
||||||
tar.gz file into the contrib directory and modify the contrib/Makefile |
|
||||||
to include the pg_autovacuum directory. pg_autovacuum will then be |
|
||||||
built as part of the standard postgresql install. It is known to work |
|
||||||
with v7.3 releases; it is not presently compatible with v7.2. |
|
||||||
|
|
||||||
make sure that the following are set in postgresql.conf: |
|
||||||
|
|
||||||
stats_start_collector = true |
|
||||||
stats_row_level = true |
|
||||||
|
|
||||||
Start up the postmaster, then execute the pg_autovacuum executable. |
|
||||||
|
|
||||||
If you have a script that automatically starts up the PostgreSQL |
|
||||||
instance, you might add in, after that, something similar to the |
|
||||||
following: |
|
||||||
|
|
||||||
sleep 10 # To give the database some time to start up |
|
||||||
$PGBINS/pg_autovacuum -D -s $SBASE -S $SSCALE ... [other arguments] |
|
||||||
|
|
||||||
Command line arguments: |
|
||||||
----------------------- |
|
||||||
|
|
||||||
pg_autovacuum has the following optional arguments: |
|
||||||
|
|
||||||
-d debug: 0 silent, 1 basic info, 2 more debug info, etc... |
|
||||||
-D daemonize: Detach from tty and run in background. |
|
||||||
-s sleep base value: see "Sleeping" below. |
|
||||||
-S sleep scaling factor: see "Sleeping" below. |
|
||||||
-v vacuum base threshold: see "Vacuum and Analyze" below. |
|
||||||
-V vacuum scaling factor: see "Vacuum and Analyze" below. |
|
||||||
-a analyze base threshold: see "Vacuum and Analyze" below. |
|
||||||
-A analyze scaling factor: see "Vacuum and Analyze" below. |
|
||||||
-i update interval: how often (in terms of iterations of the primary loop |
|
||||||
over the database list) to update the database list. The default is 2, |
|
||||||
which means the list will be updated before every other pass through |
|
||||||
the database list. |
|
||||||
-L log file: Name of file to which output is submitted, otherwise STDERR |
|
||||||
-U username: Username pg_autovacuum will use to connect with, if not |
|
||||||
specified the current username is used. |
|
||||||
-P password: Password pg_autovacuum will use to connect with. *WARNING* |
|
||||||
This option is insecure. When installed as a Windows Service, this |
|
||||||
option will be stored in plain text in the registry. When used with |
|
||||||
most Unix variants, other users will be able to see the argument to |
|
||||||
the "-P" option via ps(1). The ~/.pgpass file can be used to |
|
||||||
specify a password more securely. |
|
||||||
-H host: host name or IP to connect to. |
|
||||||
-p port: port used for connection. |
|
||||||
-h help: list of command line options. |
|
||||||
|
|
||||||
The following 5 autovacuum command line options correspond to the various |
|
||||||
cost-based vacuum settings. If not given, then the cluster default values |
|
||||||
will be used. |
|
||||||
|
|
||||||
-c vacuum_cost_delay |
|
||||||
-C vacuum_cost_page_hit |
|
||||||
-m vacuum_cost_page_miss |
|
||||||
-n vacuum_cost_page_dirty |
|
||||||
-l vacuum_cost_limit |
|
||||||
|
|
||||||
|
|
||||||
Numerous arguments have default values defined in pg_autovacuum.h. At |
|
||||||
the time of writing they are: |
|
||||||
|
|
||||||
-d 1 |
|
||||||
-v 1000 |
|
||||||
-V 2 |
|
||||||
-a 500 (half of -v if not specified) |
|
||||||
-A 1 (half of -V if not specified) |
|
||||||
-s 300 (5 minutes) |
|
||||||
-S 2 |
|
||||||
-i 2 |
|
||||||
|
|
||||||
The following arguments are used on Windows only: |
|
||||||
|
|
||||||
-I Install the executable as a Windows service. Other appropriate command |
|
||||||
line options will be stored in the registry and passed to the service |
|
||||||
at startup. *WARNING* This includes the connection password which will |
|
||||||
be stored in plain text. |
|
||||||
|
|
||||||
-N service user: Name of the Windows user account under which the service |
|
||||||
will run. Only used when installing as a Windows service. |
|
||||||
|
|
||||||
-W service password: The password for the service account. Only used when |
|
||||||
installing as a Windows service. |
|
||||||
|
|
||||||
-R Uninstall pg_autovacuum as a service. |
|
||||||
|
|
||||||
-E Dependent service that must start before this service. Normally this will be |
|
||||||
a PostgreSQL instance, e.g. "-E pgsql-8.0.0". Only used when installing as |
|
||||||
a Windows service. |
|
||||||
|
|
||||||
Vacuum and Analyze: |
|
||||||
------------------- |
|
||||||
|
|
||||||
pg_autovacuum performs either a VACUUM ANALYZE or just ANALYZE |
|
||||||
depending on the mixture of table activity (insert, update, or |
|
||||||
delete): |
|
||||||
|
|
||||||
- If the number of (inserts + updates + deletes) > AnalyzeThreshold, then |
|
||||||
only an analyze is performed. |
|
||||||
|
|
||||||
- If the number of (deletes + updates) > VacuumThreshold, then a |
|
||||||
vacuum analyze is performed. |
|
||||||
|
|
||||||
VacuumThreshold is equal to: |
|
||||||
vacuum_base_value + (vacuum_scaling_factor * "number of tuples in the table") |
|
||||||
|
|
||||||
AnalyzeThreshold is equal to: |
|
||||||
analyze_base_value + (analyze_scaling_factor * "number of tuples in the table") |
|
||||||
|
|
||||||
The AnalyzeThreshold defaults to half of the VacuumThreshold since it |
|
||||||
represents a much less expensive operation (approx 5%-10% of vacuum), |
|
||||||
and running ANALYZE more often should not substantially degrade system |
|
||||||
performance. |
|
||||||
|
|
||||||
Sleeping: |
|
||||||
--------- |
|
||||||
|
|
||||||
pg_autovacuum sleeps for a while after it is done checking all the |
|
||||||
databases. It does this in order to limit the amount of system |
|
||||||
resources it consumes. This allows the system administrator to |
|
||||||
configure pg_autovacuum to be more or less aggressive. |
|
||||||
|
|
||||||
Reducing the sleep time will cause pg_autovacuum to respond more |
|
||||||
quickly to changes, whether they be database addition/removal, table |
|
||||||
addition/removal, or just normal table activity. |
|
||||||
|
|
||||||
On the other hand, setting pg_autovacuum to sleep values too |
|
||||||
aggressively (to too short periods of time) can have a negative effect |
|
||||||
on server performance. For instance, if a table gets vacuumed 5 times |
|
||||||
during the course of a large set of updates, this is likely to take a |
|
||||||
lot more work than if the table was vacuumed just once, at the end. |
|
||||||
|
|
||||||
The total time it sleeps is equal to: |
|
||||||
|
|
||||||
base_sleep_value + sleep_scaling_factor * "duration of the previous |
|
||||||
loop" |
|
||||||
|
|
||||||
Note that timing measurements are made in seconds; specifying |
|
||||||
"pg_vacuum -s 1" means pg_autovacuum could poll the database up to 60 |
|
||||||
times minute. In a system with large tables where vacuums may run for |
|
||||||
several minutes, rather longer times between vacuums are likely to be |
|
||||||
appropriate. |
|
||||||
|
|
||||||
What pg_autovacuum monitors: |
|
||||||
---------------------------- |
|
||||||
|
|
||||||
pg_autovacuum dynamically generates a list of all databases and tables |
|
||||||
that exist on the server. It will dynamically add and remove |
|
||||||
databases and tables that are removed from the database server while |
|
||||||
pg_autovacuum is running. Overhead is fairly small per object. For |
|
||||||
example: 10 databases with 10 tables each appears to less than 10k of |
|
||||||
memory on my Linux box. |
|
@ -1,45 +0,0 @@ |
|||||||
Todo Items for pg_autovacuum client |
|
||||||
-------------------------------------------------------------------------- |
|
||||||
|
|
||||||
_Add Startup Message (with datetime stamp) to Logfile when starting and logging |
|
||||||
|
|
||||||
_create a FSM export function and see if I can use it for pg_autovacuum |
|
||||||
|
|
||||||
_look into possible benifits of pgstattuple contrib work |
|
||||||
|
|
||||||
_Continue trying to reduce server load created by polling. |
|
||||||
|
|
||||||
Done: |
|
||||||
-------------------------------------------------------------------------- |
|
||||||
_Check if required pg_stats are enables, if not exit with error |
|
||||||
|
|
||||||
_Reduce the number connections and queries to the server |
|
||||||
_Make database adding and removal part of the normal loop |
|
||||||
_make table adding and removal part of the normal loop |
|
||||||
|
|
||||||
_Separate logic for vacuum and analyze |
|
||||||
|
|
||||||
_all pg_autovacuum specific functions are now static |
|
||||||
|
|
||||||
_correct usage of snprintf |
|
||||||
|
|
||||||
_reworked database and table update functions, now they |
|
||||||
use the existing database connection and only one query |
|
||||||
|
|
||||||
_fixed -h option output |
|
||||||
|
|
||||||
_cleanup of 'constant == variable' used much more consistently now. |
|
||||||
|
|
||||||
_Guarantee database wide vacuum prior to Xid wraparound |
|
||||||
|
|
||||||
_change name to pg_autovacuum |
|
||||||
|
|
||||||
_Add proper table and database removal functions so that we can properly |
|
||||||
clear up before we exit, and make sure we don't leak memory when removing tables and such. |
|
||||||
|
|
||||||
_Decouple insert and delete thresholds |
|
||||||
|
|
||||||
_Fix Vacuum debug routine to include the database name. |
|
||||||
|
|
||||||
_Allow it to detach from the tty |
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
@ -1,121 +0,0 @@ |
|||||||
/* pg_autovacuum.h
|
|
||||||
* Header file for pg_autovacuum.c |
|
||||||
* (c) 2003 Matthew T. O'Connor |
|
||||||
* |
|
||||||
* $PostgreSQL: pgsql/contrib/pg_autovacuum/pg_autovacuum.h,v 1.15 2005/04/19 03:35:15 momjian Exp $ |
|
||||||
*/ |
|
||||||
|
|
||||||
#ifndef _PG_AUTOVACUUM_H |
|
||||||
#define _PG_AUTOVACUUM_H |
|
||||||
|
|
||||||
#include "libpq-fe.h" |
|
||||||
#include "lib/dllist.h" |
|
||||||
|
|
||||||
#define AUTOVACUUM_DEBUG 0 |
|
||||||
#define VACBASETHRESHOLD 1000 |
|
||||||
#define VACSCALINGFACTOR 2 |
|
||||||
#define SLEEPBASEVALUE 300 |
|
||||||
#define SLEEPSCALINGFACTOR 2 |
|
||||||
#define UPDATE_INTERVAL 2 |
|
||||||
|
|
||||||
/* these two constants are used to tell update_table_stats what operation we just perfomred */ |
|
||||||
#define VACUUM_ANALYZE 0 |
|
||||||
#define ANALYZE_ONLY 1 |
|
||||||
|
|
||||||
|
|
||||||
#define TABLE_STATS_QUERY "select a.oid,a.relname,a.relnamespace,a.relpages,a.relisshared,a.reltuples,b.schemaname,b.n_tup_ins,b.n_tup_upd,b.n_tup_del from pg_class a, pg_stat_all_tables b where a.oid=b.relid and a.relkind = 'r'" |
|
||||||
|
|
||||||
#define PAGES_QUERY "select oid,reltuples,relpages from pg_class where oid=%u" |
|
||||||
#define FROZENOID_QUERY "select oid,age(datfrozenxid) from pg_database where datname = 'template1'" |
|
||||||
#define FROZENOID_QUERY2 "select oid,datname,age(datfrozenxid) from pg_database where datname!='template0'" |
|
||||||
|
|
||||||
/* Log levels */ |
|
||||||
enum |
|
||||||
{ |
|
||||||
LVL_DEBUG = 1, |
|
||||||
LVL_INFO, |
|
||||||
LVL_WARNING, |
|
||||||
LVL_ERROR, |
|
||||||
LVL_EXTRA |
|
||||||
}; |
|
||||||
|
|
||||||
/* define cmd_args stucture */ |
|
||||||
typedef struct cmdargs |
|
||||||
{ |
|
||||||
int vacuum_base_threshold, |
|
||||||
analyze_base_threshold, |
|
||||||
update_interval, |
|
||||||
sleep_base_value, |
|
||||||
debug, |
|
||||||
|
|
||||||
/*
|
|
||||||
* Cost-Based Vacuum Delay Settings for pg_autovacuum |
|
||||||
*/ |
|
||||||
av_vacuum_cost_delay, |
|
||||||
av_vacuum_cost_page_hit, |
|
||||||
av_vacuum_cost_page_miss, |
|
||||||
av_vacuum_cost_page_dirty, |
|
||||||
av_vacuum_cost_limit, |
|
||||||
|
|
||||||
#ifndef WIN32 |
|
||||||
daemonize; |
|
||||||
#else |
|
||||||
install_as_service, |
|
||||||
remove_as_service; |
|
||||||
#endif |
|
||||||
float vacuum_scaling_factor, |
|
||||||
analyze_scaling_factor, |
|
||||||
sleep_scaling_factor; |
|
||||||
char *user, |
|
||||||
*password, |
|
||||||
#ifdef WIN32 |
|
||||||
*service_dependencies, |
|
||||||
*service_user, |
|
||||||
*service_password, |
|
||||||
#endif |
|
||||||
*host, |
|
||||||
*logfile, |
|
||||||
*port; |
|
||||||
} cmd_args; |
|
||||||
|
|
||||||
/*
|
|
||||||
* Might need to add a time value for last time the whole database was |
|
||||||
* vacuumed. We need to guarantee this happens approx every 1Billion TX's |
|
||||||
*/ |
|
||||||
typedef struct dbinfo |
|
||||||
{ |
|
||||||
Oid oid; |
|
||||||
long age; |
|
||||||
long analyze_threshold, |
|
||||||
vacuum_threshold; /* Use these as defaults for table
|
|
||||||
* thresholds */ |
|
||||||
PGconn *conn; |
|
||||||
char *dbname, |
|
||||||
*username, |
|
||||||
*password; |
|
||||||
Dllist *table_list; |
|
||||||
} db_info; |
|
||||||
|
|
||||||
typedef struct tableinfo |
|
||||||
{ |
|
||||||
char *schema_name, |
|
||||||
*table_name; |
|
||||||
float reltuples; |
|
||||||
int relisshared; |
|
||||||
Oid relid, |
|
||||||
relpages; |
|
||||||
long analyze_threshold, |
|
||||||
vacuum_threshold; |
|
||||||
long CountAtLastAnalyze; /* equal to: inserts + updates as
|
|
||||||
* of the last analyze or initial |
|
||||||
* values at startup */ |
|
||||||
long CountAtLastVacuum; /* equal to: deletes + updates as
|
|
||||||
* of the last vacuum or initial |
|
||||||
* values at startup */ |
|
||||||
long curr_analyze_count, |
|
||||||
curr_vacuum_count; /* Latest values from stats system */ |
|
||||||
db_info *dbi; /* pointer to the database that this table
|
|
||||||
* belongs to */ |
|
||||||
} tbl_info; |
|
||||||
|
|
||||||
#endif /* _PG_AUTOVACUUM_H */ |
|
Loading…
Reference in new issue