mirror of https://github.com/postgres/postgres
Test that stats are restored during normal restarts, discarded after a crash / immediate restart, and that a corrupted stats file leads to stats being reset. Author: Melanie Plageman <melanieplageman@gmail.com> Author: Andres Freund <andres@anarazel.de> Discussion: https://postgr.es/m/20220303021600.hs34ghqcw6zcokdh@alap3.anarazel.depull/81/head
parent
99392cdd78
commit
16acf7f1aa
@ -0,0 +1,307 @@ |
||||
# Copyright (c) 2021-2022, PostgreSQL Global Development Group |
||||
|
||||
# Tests statistics handling around restarts, including handling of crashes and |
||||
# invalid stats files, as well as restorting stats after "normal" restarts. |
||||
|
||||
use strict; |
||||
use warnings; |
||||
use PostgreSQL::Test::Cluster; |
||||
use PostgreSQL::Test::Utils; |
||||
use Test::More; |
||||
use File::Copy; |
||||
|
||||
my $node = PostgreSQL::Test::Cluster->new('primary'); |
||||
$node->init(allows_streaming => 1); |
||||
$node->append_conf('postgresql.conf', "track_functions = 'all'"); |
||||
$node->start; |
||||
|
||||
my $connect_db = 'postgres'; |
||||
my $db_under_test = 'test'; |
||||
|
||||
# create test objects |
||||
$node->safe_psql($connect_db, "CREATE DATABASE $db_under_test"); |
||||
$node->safe_psql($db_under_test, |
||||
"CREATE TABLE tab_stats_crash_discard_test1 AS SELECT generate_series(1,100) AS a" |
||||
); |
||||
$node->safe_psql($db_under_test, |
||||
"CREATE FUNCTION func_stats_crash_discard1() RETURNS VOID AS 'select 2;' LANGUAGE SQL IMMUTABLE" |
||||
); |
||||
|
||||
# collect object oids |
||||
my $dboid = $node->safe_psql($db_under_test, |
||||
"SELECT oid FROM pg_database WHERE datname = '$db_under_test'"); |
||||
my $funcoid = $node->safe_psql($db_under_test, |
||||
"SELECT 'func_stats_crash_discard1()'::regprocedure::oid"); |
||||
my $tableoid = $node->safe_psql($db_under_test, |
||||
"SELECT 'tab_stats_crash_discard_test1'::regclass::oid"); |
||||
|
||||
# generate stats and flush them |
||||
trigger_funcrel_stat(); |
||||
|
||||
# verify stats objects exist |
||||
my $sect = "initial"; |
||||
is(have_stats('database', $dboid, 0), 't', "$sect: db stats do exist"); |
||||
is(have_stats('function', $dboid, $funcoid), |
||||
't', "$sect: function stats do exist"); |
||||
is(have_stats('relation', $dboid, $tableoid), |
||||
't', "$sect: relation stats do exist"); |
||||
|
||||
# regular shutdown |
||||
$node->stop(); |
||||
|
||||
# backup stats files |
||||
my $statsfile = $PostgreSQL::Test::Utils::tmp_check . '/' . "discard_stats1"; |
||||
ok(!-f "$statsfile", "backup statsfile cannot already exist"); |
||||
|
||||
my $datadir = $node->data_dir(); |
||||
my $og_stats = "$datadir/pg_stat/pgstat.stat"; |
||||
ok(-f "$og_stats", "origin stats file must exist"); |
||||
copy($og_stats, $statsfile) or die "Copy failed: $!"; |
||||
|
||||
|
||||
## test discarding of stats file after crash etc |
||||
|
||||
$node->start; |
||||
|
||||
$sect = "copy"; |
||||
is(have_stats('database', $dboid, 0), 't', "$sect: db stats do exist"); |
||||
is(have_stats('function', $dboid, $funcoid), |
||||
't', "$sect: function stats do exist"); |
||||
is(have_stats('relation', $dboid, $tableoid), |
||||
't', "$sect: relation stats do exist"); |
||||
|
||||
$node->stop('immediate'); |
||||
|
||||
ok(!-f "$og_stats", "no stats file should exist after immediate shutdown"); |
||||
|
||||
# copy the old stats back to test we discard stats after crash restart |
||||
copy($statsfile, $og_stats) or die "Copy failed: $!"; |
||||
|
||||
$node->start; |
||||
|
||||
# stats should have been discarded |
||||
$sect = "post immediate"; |
||||
is(have_stats('database', $dboid, 0), 'f', "$sect: db stats do not exist"); |
||||
is(have_stats('function', $dboid, $funcoid), |
||||
'f', "$sect: function stats do exist"); |
||||
is(have_stats('relation', $dboid, $tableoid), |
||||
'f', "$sect: relation stats do not exist"); |
||||
|
||||
# get rid of backup statsfile |
||||
unlink $statsfile or die "cannot unlink $statsfile $!"; |
||||
|
||||
|
||||
# generate new stats and flush them |
||||
trigger_funcrel_stat(); |
||||
|
||||
$sect = "post immediate, new"; |
||||
is(have_stats('database', $dboid, 0), 't', "$sect: db stats do exist"); |
||||
is(have_stats('function', $dboid, $funcoid), |
||||
't', "$sect: function stats do exist"); |
||||
is(have_stats('relation', $dboid, $tableoid), |
||||
't', "$sect: relation stats do exist"); |
||||
|
||||
# regular shutdown |
||||
$node->stop(); |
||||
|
||||
|
||||
## check an invalid stats file is handled |
||||
|
||||
overwrite_file($og_stats, "ZZZZZZZZZZZZZ"); |
||||
|
||||
# normal startup and no issues despite invalid stats file |
||||
$node->start; |
||||
|
||||
# no stats present due to invalid stats file |
||||
$sect = "invalid"; |
||||
is(have_stats('database', $dboid, 0), 'f', "$sect: db stats do not exist"); |
||||
is(have_stats('function', $dboid, $funcoid), |
||||
'f', "$sect: function stats do not exist"); |
||||
is(have_stats('relation', $dboid, $tableoid), |
||||
'f', "$sect: relation stats do not exist"); |
||||
|
||||
|
||||
## checks related to stats persistency around restarts and resets |
||||
|
||||
# Ensure enough checkpoints to protect against races for test after reset, |
||||
# even on very slow machines. |
||||
$node->safe_psql($connect_db, "CHECKPOINT; CHECKPOINT;"); |
||||
|
||||
|
||||
## check checkpoint and wal stats are incremented due to restart |
||||
|
||||
my $ckpt_start = checkpoint_stats(); |
||||
my $wal_start = wal_stats(); |
||||
$node->restart; |
||||
|
||||
$sect = "post restart"; |
||||
my $ckpt_restart = checkpoint_stats(); |
||||
my $wal_restart = wal_stats(); |
||||
|
||||
cmp_ok( |
||||
$ckpt_start->{count}, '<', |
||||
$ckpt_restart->{count}, |
||||
"$sect: increased checkpoint count"); |
||||
cmp_ok( |
||||
$wal_start->{records}, '<', |
||||
$wal_restart->{records}, |
||||
"$sect: increased wal record count"); |
||||
cmp_ok($wal_start->{bytes}, '<', $wal_restart->{bytes}, |
||||
"$sect: increased wal bytes"); |
||||
is( $ckpt_start->{reset}, |
||||
$ckpt_restart->{reset}, |
||||
"$sect: checkpoint stats_reset equal"); |
||||
is($wal_start->{reset}, $wal_restart->{reset}, |
||||
"$sect: wal stats_reset equal"); |
||||
|
||||
|
||||
## Check that checkpoint stats are reset, WAL stats aren't affected |
||||
|
||||
$node->safe_psql($connect_db, "SELECT pg_stat_reset_shared('bgwriter')"); |
||||
|
||||
$sect = "post ckpt reset"; |
||||
my $ckpt_reset = checkpoint_stats(); |
||||
my $wal_ckpt_reset = wal_stats(); |
||||
|
||||
cmp_ok($ckpt_restart->{count}, |
||||
'>', $ckpt_reset->{count}, "$sect: checkpoint count smaller"); |
||||
cmp_ok($ckpt_start->{reset}, 'lt', $ckpt_reset->{reset}, |
||||
"$sect: stats_reset newer"); |
||||
|
||||
cmp_ok( |
||||
$wal_restart->{records}, |
||||
'<=', |
||||
$wal_ckpt_reset->{records}, |
||||
"$sect: wal record count not affected by reset"); |
||||
is( $wal_start->{reset}, |
||||
$wal_ckpt_reset->{reset}, |
||||
"$sect: wal stats_reset equal"); |
||||
|
||||
|
||||
## check that checkpoint stats stay reset after restart |
||||
|
||||
$node->restart; |
||||
|
||||
$sect = "post ckpt reset & restart"; |
||||
my $ckpt_restart_reset = checkpoint_stats(); |
||||
my $wal_restart2 = wal_stats(); |
||||
|
||||
# made sure above there's enough checkpoints that this will be stable even on slow machines |
||||
cmp_ok( |
||||
$ckpt_restart_reset->{count}, |
||||
'<', |
||||
$ckpt_restart->{count}, |
||||
"$sect: checkpoint still reset"); |
||||
is($ckpt_restart_reset->{reset}, |
||||
$ckpt_reset->{reset}, "$sect: stats_reset same"); |
||||
|
||||
cmp_ok( |
||||
$wal_ckpt_reset->{records}, |
||||
'<', |
||||
$wal_restart2->{records}, |
||||
"$sect: increased wal record count"); |
||||
cmp_ok( |
||||
$wal_ckpt_reset->{bytes}, |
||||
'<', |
||||
$wal_restart2->{bytes}, |
||||
"$sect: increased wal bytes"); |
||||
is( $wal_start->{reset}, |
||||
$wal_restart2->{reset}, |
||||
"$sect: wal stats_reset equal"); |
||||
|
||||
|
||||
## check WAL stats stay reset |
||||
|
||||
$node->safe_psql($connect_db, "SELECT pg_stat_reset_shared('wal')"); |
||||
|
||||
$sect = "post wal reset"; |
||||
my $wal_reset = wal_stats(); |
||||
|
||||
cmp_ok( |
||||
$wal_reset->{records}, '<', |
||||
$wal_restart2->{records}, |
||||
"$sect: smaller record count"); |
||||
cmp_ok( |
||||
$wal_reset->{bytes}, '<', |
||||
$wal_restart2->{bytes}, |
||||
"$sect: smaller bytes"); |
||||
cmp_ok( |
||||
$wal_reset->{reset}, 'gt', |
||||
$wal_restart2->{reset}, |
||||
"$sect: newer stats_reset"); |
||||
|
||||
$node->restart; |
||||
|
||||
$sect = "post wal reset & restart"; |
||||
my $wal_reset_restart = wal_stats(); |
||||
|
||||
# enough WAL generated during prior tests and initdb to make this not racy |
||||
cmp_ok( |
||||
$wal_reset_restart->{records}, |
||||
'<', |
||||
$wal_restart2->{records}, |
||||
"$sect: smaller record count"); |
||||
cmp_ok( |
||||
$wal_reset->{bytes}, '<', |
||||
$wal_restart2->{bytes}, |
||||
"$sect: smaller bytes"); |
||||
cmp_ok( |
||||
$wal_reset->{reset}, 'gt', |
||||
$wal_restart2->{reset}, |
||||
"$sect: newer stats_reset"); |
||||
|
||||
|
||||
$node->stop; |
||||
done_testing(); |
||||
|
||||
sub trigger_funcrel_stat |
||||
{ |
||||
$node->safe_psql( |
||||
$db_under_test, q[ |
||||
SELECT * FROM tab_stats_crash_discard_test1; |
||||
SELECT func_stats_crash_discard1(); |
||||
SELECT pg_stat_force_next_flush();]); |
||||
} |
||||
|
||||
sub have_stats |
||||
{ |
||||
my ($kind, $dboid, $objoid) = @_; |
||||
|
||||
return $node->safe_psql($connect_db, |
||||
"SELECT pg_stat_have_stats('$kind', $dboid, $objoid)"); |
||||
} |
||||
|
||||
sub overwrite_file |
||||
{ |
||||
my ($filename, $str) = @_; |
||||
open my $fh, ">", $filename |
||||
or die "could not write \"$filename\": $!"; |
||||
print $fh $str; |
||||
close $fh; |
||||
return; |
||||
} |
||||
|
||||
sub checkpoint_stats |
||||
{ |
||||
my %results; |
||||
|
||||
$results{count} = $node->safe_psql($connect_db, |
||||
"SELECT checkpoints_timed + checkpoints_req FROM pg_stat_bgwriter"); |
||||
$results{reset} = $node->safe_psql($connect_db, |
||||
"SELECT stats_reset FROM pg_stat_bgwriter"); |
||||
|
||||
return \%results; |
||||
} |
||||
|
||||
sub wal_stats |
||||
{ |
||||
my %results; |
||||
$results{records} = |
||||
$node->safe_psql($connect_db, "SELECT wal_records FROM pg_stat_wal"); |
||||
$results{bytes} = |
||||
$node->safe_psql($connect_db, "SELECT wal_bytes FROM pg_stat_wal"); |
||||
$results{reset} = |
||||
$node->safe_psql($connect_db, "SELECT stats_reset FROM pg_stat_wal"); |
||||
|
||||
return \%results; |
||||
} |
Loading…
Reference in new issue