mirror of https://github.com/postgres/postgres
Test that stats are restored during normal restarts, discarded after a crash / immediate restart, and that a corrupted stats file leads to stats being reset. Author: Melanie Plageman <melanieplageman@gmail.com> Author: Andres Freund <andres@anarazel.de> Discussion: https://postgr.es/m/20220303021600.hs34ghqcw6zcokdh@alap3.anarazel.depull/81/head
parent
99392cdd78
commit
16acf7f1aa
@ -0,0 +1,307 @@ |
|||||||
|
# Copyright (c) 2021-2022, PostgreSQL Global Development Group |
||||||
|
|
||||||
|
# Tests statistics handling around restarts, including handling of crashes and |
||||||
|
# invalid stats files, as well as restorting stats after "normal" restarts. |
||||||
|
|
||||||
|
use strict; |
||||||
|
use warnings; |
||||||
|
use PostgreSQL::Test::Cluster; |
||||||
|
use PostgreSQL::Test::Utils; |
||||||
|
use Test::More; |
||||||
|
use File::Copy; |
||||||
|
|
||||||
|
my $node = PostgreSQL::Test::Cluster->new('primary'); |
||||||
|
$node->init(allows_streaming => 1); |
||||||
|
$node->append_conf('postgresql.conf', "track_functions = 'all'"); |
||||||
|
$node->start; |
||||||
|
|
||||||
|
my $connect_db = 'postgres'; |
||||||
|
my $db_under_test = 'test'; |
||||||
|
|
||||||
|
# create test objects |
||||||
|
$node->safe_psql($connect_db, "CREATE DATABASE $db_under_test"); |
||||||
|
$node->safe_psql($db_under_test, |
||||||
|
"CREATE TABLE tab_stats_crash_discard_test1 AS SELECT generate_series(1,100) AS a" |
||||||
|
); |
||||||
|
$node->safe_psql($db_under_test, |
||||||
|
"CREATE FUNCTION func_stats_crash_discard1() RETURNS VOID AS 'select 2;' LANGUAGE SQL IMMUTABLE" |
||||||
|
); |
||||||
|
|
||||||
|
# collect object oids |
||||||
|
my $dboid = $node->safe_psql($db_under_test, |
||||||
|
"SELECT oid FROM pg_database WHERE datname = '$db_under_test'"); |
||||||
|
my $funcoid = $node->safe_psql($db_under_test, |
||||||
|
"SELECT 'func_stats_crash_discard1()'::regprocedure::oid"); |
||||||
|
my $tableoid = $node->safe_psql($db_under_test, |
||||||
|
"SELECT 'tab_stats_crash_discard_test1'::regclass::oid"); |
||||||
|
|
||||||
|
# generate stats and flush them |
||||||
|
trigger_funcrel_stat(); |
||||||
|
|
||||||
|
# verify stats objects exist |
||||||
|
my $sect = "initial"; |
||||||
|
is(have_stats('database', $dboid, 0), 't', "$sect: db stats do exist"); |
||||||
|
is(have_stats('function', $dboid, $funcoid), |
||||||
|
't', "$sect: function stats do exist"); |
||||||
|
is(have_stats('relation', $dboid, $tableoid), |
||||||
|
't', "$sect: relation stats do exist"); |
||||||
|
|
||||||
|
# regular shutdown |
||||||
|
$node->stop(); |
||||||
|
|
||||||
|
# backup stats files |
||||||
|
my $statsfile = $PostgreSQL::Test::Utils::tmp_check . '/' . "discard_stats1"; |
||||||
|
ok(!-f "$statsfile", "backup statsfile cannot already exist"); |
||||||
|
|
||||||
|
my $datadir = $node->data_dir(); |
||||||
|
my $og_stats = "$datadir/pg_stat/pgstat.stat"; |
||||||
|
ok(-f "$og_stats", "origin stats file must exist"); |
||||||
|
copy($og_stats, $statsfile) or die "Copy failed: $!"; |
||||||
|
|
||||||
|
|
||||||
|
## test discarding of stats file after crash etc |
||||||
|
|
||||||
|
$node->start; |
||||||
|
|
||||||
|
$sect = "copy"; |
||||||
|
is(have_stats('database', $dboid, 0), 't', "$sect: db stats do exist"); |
||||||
|
is(have_stats('function', $dboid, $funcoid), |
||||||
|
't', "$sect: function stats do exist"); |
||||||
|
is(have_stats('relation', $dboid, $tableoid), |
||||||
|
't', "$sect: relation stats do exist"); |
||||||
|
|
||||||
|
$node->stop('immediate'); |
||||||
|
|
||||||
|
ok(!-f "$og_stats", "no stats file should exist after immediate shutdown"); |
||||||
|
|
||||||
|
# copy the old stats back to test we discard stats after crash restart |
||||||
|
copy($statsfile, $og_stats) or die "Copy failed: $!"; |
||||||
|
|
||||||
|
$node->start; |
||||||
|
|
||||||
|
# stats should have been discarded |
||||||
|
$sect = "post immediate"; |
||||||
|
is(have_stats('database', $dboid, 0), 'f', "$sect: db stats do not exist"); |
||||||
|
is(have_stats('function', $dboid, $funcoid), |
||||||
|
'f', "$sect: function stats do exist"); |
||||||
|
is(have_stats('relation', $dboid, $tableoid), |
||||||
|
'f', "$sect: relation stats do not exist"); |
||||||
|
|
||||||
|
# get rid of backup statsfile |
||||||
|
unlink $statsfile or die "cannot unlink $statsfile $!"; |
||||||
|
|
||||||
|
|
||||||
|
# generate new stats and flush them |
||||||
|
trigger_funcrel_stat(); |
||||||
|
|
||||||
|
$sect = "post immediate, new"; |
||||||
|
is(have_stats('database', $dboid, 0), 't', "$sect: db stats do exist"); |
||||||
|
is(have_stats('function', $dboid, $funcoid), |
||||||
|
't', "$sect: function stats do exist"); |
||||||
|
is(have_stats('relation', $dboid, $tableoid), |
||||||
|
't', "$sect: relation stats do exist"); |
||||||
|
|
||||||
|
# regular shutdown |
||||||
|
$node->stop(); |
||||||
|
|
||||||
|
|
||||||
|
## check an invalid stats file is handled |
||||||
|
|
||||||
|
overwrite_file($og_stats, "ZZZZZZZZZZZZZ"); |
||||||
|
|
||||||
|
# normal startup and no issues despite invalid stats file |
||||||
|
$node->start; |
||||||
|
|
||||||
|
# no stats present due to invalid stats file |
||||||
|
$sect = "invalid"; |
||||||
|
is(have_stats('database', $dboid, 0), 'f', "$sect: db stats do not exist"); |
||||||
|
is(have_stats('function', $dboid, $funcoid), |
||||||
|
'f', "$sect: function stats do not exist"); |
||||||
|
is(have_stats('relation', $dboid, $tableoid), |
||||||
|
'f', "$sect: relation stats do not exist"); |
||||||
|
|
||||||
|
|
||||||
|
## checks related to stats persistency around restarts and resets |
||||||
|
|
||||||
|
# Ensure enough checkpoints to protect against races for test after reset, |
||||||
|
# even on very slow machines. |
||||||
|
$node->safe_psql($connect_db, "CHECKPOINT; CHECKPOINT;"); |
||||||
|
|
||||||
|
|
||||||
|
## check checkpoint and wal stats are incremented due to restart |
||||||
|
|
||||||
|
my $ckpt_start = checkpoint_stats(); |
||||||
|
my $wal_start = wal_stats(); |
||||||
|
$node->restart; |
||||||
|
|
||||||
|
$sect = "post restart"; |
||||||
|
my $ckpt_restart = checkpoint_stats(); |
||||||
|
my $wal_restart = wal_stats(); |
||||||
|
|
||||||
|
cmp_ok( |
||||||
|
$ckpt_start->{count}, '<', |
||||||
|
$ckpt_restart->{count}, |
||||||
|
"$sect: increased checkpoint count"); |
||||||
|
cmp_ok( |
||||||
|
$wal_start->{records}, '<', |
||||||
|
$wal_restart->{records}, |
||||||
|
"$sect: increased wal record count"); |
||||||
|
cmp_ok($wal_start->{bytes}, '<', $wal_restart->{bytes}, |
||||||
|
"$sect: increased wal bytes"); |
||||||
|
is( $ckpt_start->{reset}, |
||||||
|
$ckpt_restart->{reset}, |
||||||
|
"$sect: checkpoint stats_reset equal"); |
||||||
|
is($wal_start->{reset}, $wal_restart->{reset}, |
||||||
|
"$sect: wal stats_reset equal"); |
||||||
|
|
||||||
|
|
||||||
|
## Check that checkpoint stats are reset, WAL stats aren't affected |
||||||
|
|
||||||
|
$node->safe_psql($connect_db, "SELECT pg_stat_reset_shared('bgwriter')"); |
||||||
|
|
||||||
|
$sect = "post ckpt reset"; |
||||||
|
my $ckpt_reset = checkpoint_stats(); |
||||||
|
my $wal_ckpt_reset = wal_stats(); |
||||||
|
|
||||||
|
cmp_ok($ckpt_restart->{count}, |
||||||
|
'>', $ckpt_reset->{count}, "$sect: checkpoint count smaller"); |
||||||
|
cmp_ok($ckpt_start->{reset}, 'lt', $ckpt_reset->{reset}, |
||||||
|
"$sect: stats_reset newer"); |
||||||
|
|
||||||
|
cmp_ok( |
||||||
|
$wal_restart->{records}, |
||||||
|
'<=', |
||||||
|
$wal_ckpt_reset->{records}, |
||||||
|
"$sect: wal record count not affected by reset"); |
||||||
|
is( $wal_start->{reset}, |
||||||
|
$wal_ckpt_reset->{reset}, |
||||||
|
"$sect: wal stats_reset equal"); |
||||||
|
|
||||||
|
|
||||||
|
## check that checkpoint stats stay reset after restart |
||||||
|
|
||||||
|
$node->restart; |
||||||
|
|
||||||
|
$sect = "post ckpt reset & restart"; |
||||||
|
my $ckpt_restart_reset = checkpoint_stats(); |
||||||
|
my $wal_restart2 = wal_stats(); |
||||||
|
|
||||||
|
# made sure above there's enough checkpoints that this will be stable even on slow machines |
||||||
|
cmp_ok( |
||||||
|
$ckpt_restart_reset->{count}, |
||||||
|
'<', |
||||||
|
$ckpt_restart->{count}, |
||||||
|
"$sect: checkpoint still reset"); |
||||||
|
is($ckpt_restart_reset->{reset}, |
||||||
|
$ckpt_reset->{reset}, "$sect: stats_reset same"); |
||||||
|
|
||||||
|
cmp_ok( |
||||||
|
$wal_ckpt_reset->{records}, |
||||||
|
'<', |
||||||
|
$wal_restart2->{records}, |
||||||
|
"$sect: increased wal record count"); |
||||||
|
cmp_ok( |
||||||
|
$wal_ckpt_reset->{bytes}, |
||||||
|
'<', |
||||||
|
$wal_restart2->{bytes}, |
||||||
|
"$sect: increased wal bytes"); |
||||||
|
is( $wal_start->{reset}, |
||||||
|
$wal_restart2->{reset}, |
||||||
|
"$sect: wal stats_reset equal"); |
||||||
|
|
||||||
|
|
||||||
|
## check WAL stats stay reset |
||||||
|
|
||||||
|
$node->safe_psql($connect_db, "SELECT pg_stat_reset_shared('wal')"); |
||||||
|
|
||||||
|
$sect = "post wal reset"; |
||||||
|
my $wal_reset = wal_stats(); |
||||||
|
|
||||||
|
cmp_ok( |
||||||
|
$wal_reset->{records}, '<', |
||||||
|
$wal_restart2->{records}, |
||||||
|
"$sect: smaller record count"); |
||||||
|
cmp_ok( |
||||||
|
$wal_reset->{bytes}, '<', |
||||||
|
$wal_restart2->{bytes}, |
||||||
|
"$sect: smaller bytes"); |
||||||
|
cmp_ok( |
||||||
|
$wal_reset->{reset}, 'gt', |
||||||
|
$wal_restart2->{reset}, |
||||||
|
"$sect: newer stats_reset"); |
||||||
|
|
||||||
|
$node->restart; |
||||||
|
|
||||||
|
$sect = "post wal reset & restart"; |
||||||
|
my $wal_reset_restart = wal_stats(); |
||||||
|
|
||||||
|
# enough WAL generated during prior tests and initdb to make this not racy |
||||||
|
cmp_ok( |
||||||
|
$wal_reset_restart->{records}, |
||||||
|
'<', |
||||||
|
$wal_restart2->{records}, |
||||||
|
"$sect: smaller record count"); |
||||||
|
cmp_ok( |
||||||
|
$wal_reset->{bytes}, '<', |
||||||
|
$wal_restart2->{bytes}, |
||||||
|
"$sect: smaller bytes"); |
||||||
|
cmp_ok( |
||||||
|
$wal_reset->{reset}, 'gt', |
||||||
|
$wal_restart2->{reset}, |
||||||
|
"$sect: newer stats_reset"); |
||||||
|
|
||||||
|
|
||||||
|
$node->stop; |
||||||
|
done_testing(); |
||||||
|
|
||||||
|
sub trigger_funcrel_stat |
||||||
|
{ |
||||||
|
$node->safe_psql( |
||||||
|
$db_under_test, q[ |
||||||
|
SELECT * FROM tab_stats_crash_discard_test1; |
||||||
|
SELECT func_stats_crash_discard1(); |
||||||
|
SELECT pg_stat_force_next_flush();]); |
||||||
|
} |
||||||
|
|
||||||
|
sub have_stats |
||||||
|
{ |
||||||
|
my ($kind, $dboid, $objoid) = @_; |
||||||
|
|
||||||
|
return $node->safe_psql($connect_db, |
||||||
|
"SELECT pg_stat_have_stats('$kind', $dboid, $objoid)"); |
||||||
|
} |
||||||
|
|
||||||
|
sub overwrite_file |
||||||
|
{ |
||||||
|
my ($filename, $str) = @_; |
||||||
|
open my $fh, ">", $filename |
||||||
|
or die "could not write \"$filename\": $!"; |
||||||
|
print $fh $str; |
||||||
|
close $fh; |
||||||
|
return; |
||||||
|
} |
||||||
|
|
||||||
|
sub checkpoint_stats |
||||||
|
{ |
||||||
|
my %results; |
||||||
|
|
||||||
|
$results{count} = $node->safe_psql($connect_db, |
||||||
|
"SELECT checkpoints_timed + checkpoints_req FROM pg_stat_bgwriter"); |
||||||
|
$results{reset} = $node->safe_psql($connect_db, |
||||||
|
"SELECT stats_reset FROM pg_stat_bgwriter"); |
||||||
|
|
||||||
|
return \%results; |
||||||
|
} |
||||||
|
|
||||||
|
sub wal_stats |
||||||
|
{ |
||||||
|
my %results; |
||||||
|
$results{records} = |
||||||
|
$node->safe_psql($connect_db, "SELECT wal_records FROM pg_stat_wal"); |
||||||
|
$results{bytes} = |
||||||
|
$node->safe_psql($connect_db, "SELECT wal_bytes FROM pg_stat_wal"); |
||||||
|
$results{reset} = |
||||||
|
$node->safe_psql($connect_db, "SELECT stats_reset FROM pg_stat_wal"); |
||||||
|
|
||||||
|
return \%results; |
||||||
|
} |
Loading…
Reference in new issue