mirror of https://github.com/postgres/postgres
parent
5d5f1a79e6
commit
80f6c35833
@ -1,22 +0,0 @@ |
||||
PROGRAM = pg_autovacuum
|
||||
OBJS = pg_autovacuum.o dllist.o
|
||||
|
||||
PG_CPPFLAGS = -I$(libpq_srcdir) -DFRONTEND
|
||||
PG_LIBS = $(libpq_pgport)
|
||||
|
||||
DOCS = README.pg_autovacuum
|
||||
|
||||
EXTRA_CLEAN = dllist.c
|
||||
|
||||
ifdef USE_PGXS |
||||
PGXS = $(shell pg_config --pgxs)
|
||||
include $(PGXS) |
||||
else |
||||
subdir = contrib/pg_autovacuum
|
||||
top_builddir = ../..
|
||||
include $(top_builddir)/src/Makefile.global |
||||
include $(top_srcdir)/contrib/contrib-global.mk |
||||
endif |
||||
|
||||
dllist.c: $(top_srcdir)/src/backend/lib/dllist.c |
||||
rm -f $@ && $(LN_S) $< .
|
@ -1,246 +0,0 @@ |
||||
pg_autovacuum README |
||||
-------------------- |
||||
|
||||
pg_autovacuum is a libpq client program that monitors all the |
||||
databases associated with a PostgreSQL server. It uses the statistics |
||||
collector to monitor insert, update and delete activity. |
||||
|
||||
When a table exceeds a insert or delete threshold (for more detail on |
||||
thresholds, see "Vacuum and Analyze" below) then that table will be |
||||
vacuumed and/or analyzed. |
||||
|
||||
This allows PostgreSQL to keep the FSM (Free Space Map) and table |
||||
statistics up to date, and eliminates the need to schedule periodic |
||||
vacuums. |
||||
|
||||
The primary benefit of pg_autovacuum is that the FSM and table |
||||
statistic information are updated more nearly as frequently as needed. |
||||
When a table is actively changing, pg_autovacuum will perform the |
||||
VACUUMs and ANALYZEs that such a table needs, whereas if a table |
||||
remains static, no cycles will be wasted performing this |
||||
unnecessarily. |
||||
|
||||
A secondary benefit of pg_autovacuum is that it ensures that a |
||||
database wide vacuum is performed prior to XID wraparound. This is an |
||||
important, if rare, problem, as failing to do so can result in major |
||||
data loss. (See the section in the _Administrator's Guide_ entitled |
||||
"Preventing transaction ID wraparound failures" for more details.) |
||||
|
||||
KNOWN ISSUES: |
||||
------------- |
||||
|
||||
pg_autovacuum has been tested under Redhat Linux (by me) and Debian |
||||
GNU/Linux, Solaris, and AIX (by Christopher B. Browne) and all known |
||||
bugs have been resolved. Please report any problems to the hackers |
||||
list. |
||||
|
||||
pg_autovacuum requires that the statistics system be enabled and |
||||
reporting row level stats. The overhead of the stats system has been |
||||
shown to be significant under certain workloads. For instance, a |
||||
tight loop of queries performing "select 1" was found to run nearly |
||||
30% slower when row-level stats were enabled. However, in practice, |
||||
with more realistic workloads, the stats system overhead is usually |
||||
nominal. |
||||
|
||||
pg_autovacuum does not get started automatically by either the |
||||
postmaster or by pg_ctl. Similarly, when the postmaster exits, no one |
||||
tells pg_autovacuum. The result of that is that at the start of the |
||||
next loop, pg_autovacuum will fail to connect to the server and |
||||
exit(). Any time it fails to connect pg_autovacuum exit()s. |
||||
|
||||
While pg_autovacuum can manage vacuums for as many databases as you |
||||
may have tied to a particular PostgreSQL postmaster, it can only |
||||
connect to a single PostgreSQL postmaster. Thus, if you have multiple |
||||
postmasters on a particular host, you will need multiple pg_autovacuum |
||||
instances, and they have no way, at present, to coordinate between one |
||||
another to ensure that they do not concurrently vacuum big tables. |
||||
|
||||
When installed as a service under Windows, there is currently no way to |
||||
know the name of the PostgreSQL server service (if there even is one) |
||||
so it is not possible to specify a startup dependency. It is therefore |
||||
possible for pg_autovacuum to start before the server. |
||||
|
||||
When installed as a service under Windows, if the -P option is used to |
||||
specify the connection password, this option (and the password) is |
||||
stored in plain text in the registry. |
||||
|
||||
TODO: |
||||
----- |
||||
|
||||
At present, there are no sample scripts to automatically start up |
||||
pg_autovacuum along with the database. It would be desirable to have |
||||
a SysV script to start up pg_autovacuum after PostgreSQL has been |
||||
started. |
||||
|
||||
Some users have expressed interest in making pg_autovacuum more |
||||
configurable so that certain tables known to be inactive could be |
||||
excluded from being vacuumed. It would probably make sense to |
||||
introduce this sort of functionality by providing arguments to specify |
||||
the database and schema in which to find a configuration table. |
||||
|
||||
It would also be desirable for the daemon to monitor how busy the |
||||
system is, with a view to deferring vacuums until there is less other |
||||
activity. |
||||
|
||||
INSTALL: |
||||
-------- |
||||
|
||||
As of postgresql v7.4 pg_autovacuum is included in the main source |
||||
tree under contrib. Therefore you merely need to "make && make |
||||
install" (similar to most other contrib modules) and it will be |
||||
installed for you. |
||||
|
||||
If you are using an earlier version of PostgreSQL, uncompress the |
||||
tar.gz file into the contrib directory and modify the contrib/Makefile |
||||
to include the pg_autovacuum directory. pg_autovacuum will then be |
||||
built as part of the standard postgresql install. It is known to work |
||||
with v7.3 releases; it is not presently compatible with v7.2. |
||||
|
||||
make sure that the following are set in postgresql.conf: |
||||
|
||||
stats_start_collector = true |
||||
stats_row_level = true |
||||
|
||||
Start up the postmaster, then execute the pg_autovacuum executable. |
||||
|
||||
If you have a script that automatically starts up the PostgreSQL |
||||
instance, you might add in, after that, something similar to the |
||||
following: |
||||
|
||||
sleep 10 # To give the database some time to start up |
||||
$PGBINS/pg_autovacuum -D -s $SBASE -S $SSCALE ... [other arguments] |
||||
|
||||
Command line arguments: |
||||
----------------------- |
||||
|
||||
pg_autovacuum has the following optional arguments: |
||||
|
||||
-d debug: 0 silent, 1 basic info, 2 more debug info, etc... |
||||
-D daemonize: Detach from tty and run in background. |
||||
-s sleep base value: see "Sleeping" below. |
||||
-S sleep scaling factor: see "Sleeping" below. |
||||
-v vacuum base threshold: see "Vacuum and Analyze" below. |
||||
-V vacuum scaling factor: see "Vacuum and Analyze" below. |
||||
-a analyze base threshold: see "Vacuum and Analyze" below. |
||||
-A analyze scaling factor: see "Vacuum and Analyze" below. |
||||
-i update interval: how often (in terms of iterations of the primary loop |
||||
over the database list) to update the database list. The default is 2, |
||||
which means the list will be updated before every other pass through |
||||
the database list. |
||||
-L log file: Name of file to which output is submitted, otherwise STDERR |
||||
-U username: Username pg_autovacuum will use to connect with, if not |
||||
specified the current username is used. |
||||
-P password: Password pg_autovacuum will use to connect with. *WARNING* |
||||
This option is insecure. When installed as a Windows Service, this |
||||
option will be stored in plain text in the registry. When used with |
||||
most Unix variants, other users will be able to see the argument to |
||||
the "-P" option via ps(1). The ~/.pgpass file can be used to |
||||
specify a password more securely. |
||||
-H host: host name or IP to connect to. |
||||
-p port: port used for connection. |
||||
-h help: list of command line options. |
||||
|
||||
The following 5 autovacuum command line options correspond to the various |
||||
cost-based vacuum settings. If not given, then the cluster default values |
||||
will be used. |
||||
|
||||
-c vacuum_cost_delay |
||||
-C vacuum_cost_page_hit |
||||
-m vacuum_cost_page_miss |
||||
-n vacuum_cost_page_dirty |
||||
-l vacuum_cost_limit |
||||
|
||||
|
||||
Numerous arguments have default values defined in pg_autovacuum.h. At |
||||
the time of writing they are: |
||||
|
||||
-d 1 |
||||
-v 1000 |
||||
-V 2 |
||||
-a 500 (half of -v if not specified) |
||||
-A 1 (half of -V if not specified) |
||||
-s 300 (5 minutes) |
||||
-S 2 |
||||
-i 2 |
||||
|
||||
The following arguments are used on Windows only: |
||||
|
||||
-I Install the executable as a Windows service. Other appropriate command |
||||
line options will be stored in the registry and passed to the service |
||||
at startup. *WARNING* This includes the connection password which will |
||||
be stored in plain text. |
||||
|
||||
-N service user: Name of the Windows user account under which the service |
||||
will run. Only used when installing as a Windows service. |
||||
|
||||
-W service password: The password for the service account. Only used when |
||||
installing as a Windows service. |
||||
|
||||
-R Uninstall pg_autovacuum as a service. |
||||
|
||||
-E Dependent service that must start before this service. Normally this will be |
||||
a PostgreSQL instance, e.g. "-E pgsql-8.0.0". Only used when installing as |
||||
a Windows service. |
||||
|
||||
Vacuum and Analyze: |
||||
------------------- |
||||
|
||||
pg_autovacuum performs either a VACUUM ANALYZE or just ANALYZE |
||||
depending on the mixture of table activity (insert, update, or |
||||
delete): |
||||
|
||||
- If the number of (inserts + updates + deletes) > AnalyzeThreshold, then |
||||
only an analyze is performed. |
||||
|
||||
- If the number of (deletes + updates) > VacuumThreshold, then a |
||||
vacuum analyze is performed. |
||||
|
||||
VacuumThreshold is equal to: |
||||
vacuum_base_value + (vacuum_scaling_factor * "number of tuples in the table") |
||||
|
||||
AnalyzeThreshold is equal to: |
||||
analyze_base_value + (analyze_scaling_factor * "number of tuples in the table") |
||||
|
||||
The AnalyzeThreshold defaults to half of the VacuumThreshold since it |
||||
represents a much less expensive operation (approx 5%-10% of vacuum), |
||||
and running ANALYZE more often should not substantially degrade system |
||||
performance. |
||||
|
||||
Sleeping: |
||||
--------- |
||||
|
||||
pg_autovacuum sleeps for a while after it is done checking all the |
||||
databases. It does this in order to limit the amount of system |
||||
resources it consumes. This allows the system administrator to |
||||
configure pg_autovacuum to be more or less aggressive. |
||||
|
||||
Reducing the sleep time will cause pg_autovacuum to respond more |
||||
quickly to changes, whether they be database addition/removal, table |
||||
addition/removal, or just normal table activity. |
||||
|
||||
On the other hand, setting pg_autovacuum to sleep values too |
||||
aggressively (to too short periods of time) can have a negative effect |
||||
on server performance. For instance, if a table gets vacuumed 5 times |
||||
during the course of a large set of updates, this is likely to take a |
||||
lot more work than if the table was vacuumed just once, at the end. |
||||
|
||||
The total time it sleeps is equal to: |
||||
|
||||
base_sleep_value + sleep_scaling_factor * "duration of the previous |
||||
loop" |
||||
|
||||
Note that timing measurements are made in seconds; specifying |
||||
"pg_vacuum -s 1" means pg_autovacuum could poll the database up to 60 |
||||
times minute. In a system with large tables where vacuums may run for |
||||
several minutes, rather longer times between vacuums are likely to be |
||||
appropriate. |
||||
|
||||
What pg_autovacuum monitors: |
||||
---------------------------- |
||||
|
||||
pg_autovacuum dynamically generates a list of all databases and tables |
||||
that exist on the server. It will dynamically add and remove |
||||
databases and tables that are removed from the database server while |
||||
pg_autovacuum is running. Overhead is fairly small per object. For |
||||
example: 10 databases with 10 tables each appears to less than 10k of |
||||
memory on my Linux box. |
@ -1,45 +0,0 @@ |
||||
Todo Items for pg_autovacuum client |
||||
-------------------------------------------------------------------------- |
||||
|
||||
_Add Startup Message (with datetime stamp) to Logfile when starting and logging |
||||
|
||||
_create a FSM export function and see if I can use it for pg_autovacuum |
||||
|
||||
_look into possible benifits of pgstattuple contrib work |
||||
|
||||
_Continue trying to reduce server load created by polling. |
||||
|
||||
Done: |
||||
-------------------------------------------------------------------------- |
||||
_Check if required pg_stats are enables, if not exit with error |
||||
|
||||
_Reduce the number connections and queries to the server |
||||
_Make database adding and removal part of the normal loop |
||||
_make table adding and removal part of the normal loop |
||||
|
||||
_Separate logic for vacuum and analyze |
||||
|
||||
_all pg_autovacuum specific functions are now static |
||||
|
||||
_correct usage of snprintf |
||||
|
||||
_reworked database and table update functions, now they |
||||
use the existing database connection and only one query |
||||
|
||||
_fixed -h option output |
||||
|
||||
_cleanup of 'constant == variable' used much more consistently now. |
||||
|
||||
_Guarantee database wide vacuum prior to Xid wraparound |
||||
|
||||
_change name to pg_autovacuum |
||||
|
||||
_Add proper table and database removal functions so that we can properly |
||||
clear up before we exit, and make sure we don't leak memory when removing tables and such. |
||||
|
||||
_Decouple insert and delete thresholds |
||||
|
||||
_Fix Vacuum debug routine to include the database name. |
||||
|
||||
_Allow it to detach from the tty |
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,121 +0,0 @@ |
||||
/* pg_autovacuum.h
|
||||
* Header file for pg_autovacuum.c |
||||
* (c) 2003 Matthew T. O'Connor |
||||
* |
||||
* $PostgreSQL: pgsql/contrib/pg_autovacuum/pg_autovacuum.h,v 1.15 2005/04/19 03:35:15 momjian Exp $ |
||||
*/ |
||||
|
||||
#ifndef _PG_AUTOVACUUM_H |
||||
#define _PG_AUTOVACUUM_H |
||||
|
||||
#include "libpq-fe.h" |
||||
#include "lib/dllist.h" |
||||
|
||||
#define AUTOVACUUM_DEBUG 0 |
||||
#define VACBASETHRESHOLD 1000 |
||||
#define VACSCALINGFACTOR 2 |
||||
#define SLEEPBASEVALUE 300 |
||||
#define SLEEPSCALINGFACTOR 2 |
||||
#define UPDATE_INTERVAL 2 |
||||
|
||||
/* these two constants are used to tell update_table_stats what operation we just perfomred */ |
||||
#define VACUUM_ANALYZE 0 |
||||
#define ANALYZE_ONLY 1 |
||||
|
||||
|
||||
#define TABLE_STATS_QUERY "select a.oid,a.relname,a.relnamespace,a.relpages,a.relisshared,a.reltuples,b.schemaname,b.n_tup_ins,b.n_tup_upd,b.n_tup_del from pg_class a, pg_stat_all_tables b where a.oid=b.relid and a.relkind = 'r'" |
||||
|
||||
#define PAGES_QUERY "select oid,reltuples,relpages from pg_class where oid=%u" |
||||
#define FROZENOID_QUERY "select oid,age(datfrozenxid) from pg_database where datname = 'template1'" |
||||
#define FROZENOID_QUERY2 "select oid,datname,age(datfrozenxid) from pg_database where datname!='template0'" |
||||
|
||||
/* Log levels */ |
||||
enum |
||||
{ |
||||
LVL_DEBUG = 1, |
||||
LVL_INFO, |
||||
LVL_WARNING, |
||||
LVL_ERROR, |
||||
LVL_EXTRA |
||||
}; |
||||
|
||||
/* define cmd_args stucture */ |
||||
typedef struct cmdargs |
||||
{ |
||||
int vacuum_base_threshold, |
||||
analyze_base_threshold, |
||||
update_interval, |
||||
sleep_base_value, |
||||
debug, |
||||
|
||||
/*
|
||||
* Cost-Based Vacuum Delay Settings for pg_autovacuum |
||||
*/ |
||||
av_vacuum_cost_delay, |
||||
av_vacuum_cost_page_hit, |
||||
av_vacuum_cost_page_miss, |
||||
av_vacuum_cost_page_dirty, |
||||
av_vacuum_cost_limit, |
||||
|
||||
#ifndef WIN32 |
||||
daemonize; |
||||
#else |
||||
install_as_service, |
||||
remove_as_service; |
||||
#endif |
||||
float vacuum_scaling_factor, |
||||
analyze_scaling_factor, |
||||
sleep_scaling_factor; |
||||
char *user, |
||||
*password, |
||||
#ifdef WIN32 |
||||
*service_dependencies, |
||||
*service_user, |
||||
*service_password, |
||||
#endif |
||||
*host, |
||||
*logfile, |
||||
*port; |
||||
} cmd_args; |
||||
|
||||
/*
|
||||
* Might need to add a time value for last time the whole database was |
||||
* vacuumed. We need to guarantee this happens approx every 1Billion TX's |
||||
*/ |
||||
typedef struct dbinfo |
||||
{ |
||||
Oid oid; |
||||
long age; |
||||
long analyze_threshold, |
||||
vacuum_threshold; /* Use these as defaults for table
|
||||
* thresholds */ |
||||
PGconn *conn; |
||||
char *dbname, |
||||
*username, |
||||
*password; |
||||
Dllist *table_list; |
||||
} db_info; |
||||
|
||||
typedef struct tableinfo |
||||
{ |
||||
char *schema_name, |
||||
*table_name; |
||||
float reltuples; |
||||
int relisshared; |
||||
Oid relid, |
||||
relpages; |
||||
long analyze_threshold, |
||||
vacuum_threshold; |
||||
long CountAtLastAnalyze; /* equal to: inserts + updates as
|
||||
* of the last analyze or initial |
||||
* values at startup */ |
||||
long CountAtLastVacuum; /* equal to: deletes + updates as
|
||||
* of the last vacuum or initial |
||||
* values at startup */ |
||||
long curr_analyze_count, |
||||
curr_vacuum_count; /* Latest values from stats system */ |
||||
db_info *dbi; /* pointer to the database that this table
|
||||
* belongs to */ |
||||
} tbl_info; |
||||
|
||||
#endif /* _PG_AUTOVACUUM_H */ |
Loading…
Reference in new issue