mirror of https://github.com/postgres/postgres
exist anymore...pull/50/head
parent
046873ac90
commit
76bf7f9d68
@ -1,180 +0,0 @@ |
||||
From ccshag@cclabs.missouri.edu Wed Jul 10 04:27:16 1996 |
||||
Received: from realtime.cc.missouri.edu (realtime.cc.missouri.edu [128.206.212.69]) by ki.net (8.7.5/8.7.5) with ESMTP id EAA12722 for <scrappy@ki.net>; Wed, 10 Jul 1996 04:27:13 -0400 (EDT) |
||||
Received: from localhost (ccshag@localhost) by realtime.cc.missouri.edu (8.7.1/8.7.1) with SMTP id DAA00915; Wed, 10 Jul 1996 03:26:42 -0500 (CDT) |
||||
X-Authentication-Warning: realtime.cc.missouri.edu: ccshag owned process doing -bs |
||||
Date: Wed, 10 Jul 1996 03:26:41 -0500 (CDT) |
||||
From: "Paul 'Shag' Walmsley" <ccshag@cclabs.missouri.edu> |
||||
Sender: ccshag@cclabs.missouri.edu |
||||
To: postgres95-hackers@oozoo.vnet.net |
||||
cc: pixel@tiger.coe.missouri.edu, michael.siebenborn@ae3.Hypo.DE, |
||||
scrappy@ki.net |
||||
Subject: Workaround for database corruption problems [long] |
||||
Message-ID: <Pine.SGI.3.93.960710023021.673B-100000@realtime.cc.missouri.edu> |
||||
X-Disclaimer: The opinions of this writer do not necessarily represent those of the University of Missouri-Columbia. |
||||
MIME-Version: 1.0 |
||||
Content-Type: TEXT/PLAIN; charset=US-ASCII |
||||
Status: RO |
||||
X-Status: |
||||
|
||||
|
||||
I've been poking through the guts of POSTGRES95 in pursuit of the |
||||
previously-mentioned "database corruption" problem, and I think that I |
||||
have some preliminary answers and a workaround for people to play around |
||||
with. |
||||
|
||||
Specifically, the problem that I'm referring to is manifested by |
||||
pg_log, pg_time, or pg_variable either growing to huge sizes (usually |
||||
128MB) or showing evidence of internal corruption. Symptoms that appear |
||||
to the user include receiving messages like the following in psql or |
||||
monitor: |
||||
"WARN:cannot open segment 1 of relation pg_time" |
||||
"WARN:cannot write block 16384 of pg_log" |
||||
.. which are followed up in a 'postmaster -d 3' debug log by a |
||||
"NOTICE:AbortTransaction and not in in-progress state" |
||||
|
||||
It's been my experience that this problem is difficult to reproduce on |
||||
demand; I've tried several different ways and haven't succeeded. (If |
||||
anyone knows how to reliably reproduce this, please E-mail me.) I was, |
||||
however, able to obtain a database in a corrupted state from one of the |
||||
fellows on campus here running POSTGRES95; and following is something of a |
||||
post-mortem. |
||||
|
||||
---------- |
||||
The problem with the database is that after any query, POSTGRES95 attempts |
||||
to generate a huge pg_log file and fails (running out of disk space) with |
||||
a "WARN:cannot write block 16384 of pg_log" message. The contents of the |
||||
database and its internal structure were reasonably intact - I discovered |
||||
that I was able to execute a single SQL query per-backend, and could |
||||
therefore back the contents of the database up with a COPY command. (I |
||||
wasn't able to use pg_dump; it failed on its BEGIN TRANSACTION step.) |
||||
Inspection of data/pg_log, data/pg_variable, and data/pg_time revealed the |
||||
following; |
||||
|
||||
realtime /usr/postgres95 > od -x data/pg_log.old | head |
||||
0000000 0060 2000 2000 2000 f680 0000 ed00 0000 |
||||
0000020 e300 0000 dc00 0000 ce00 0000 bc00 0000 |
||||
0000040 b140 0000 a740 0000 9dc0 0000 9180 0000 |
||||
0000060 8580 0000 7c80 0000 6fc0 0000 6480 0000 |
||||
0000100 58c0 0000 4fc0 0000 4640 0000 3a80 0000 |
||||
0000120 30c0 0000 23c0 0000 1900 0000 0c40 0000 |
||||
0000140 0000 0000 0000 0000 0000 0000 0000 0000 |
||||
* |
||||
0000200 8800 0000 0000 0000 0800 0000 0000 0000 |
||||
0000220 0800 0000 0000 0000 0800 0000 0000 0000 |
||||
|
||||
|
||||
Now, already something is obviously wrong: pg_log shouldn't have any |
||||
values other than 0, 1, 2, 4, 8, a, c, or e for each nybble (see the XID_ |
||||
... definitions in src/backend/access/transam.h:35; two of these can be |
||||
stored per nybble.) |
||||
|
||||
|
||||
realtime /usr/postgres95 > od -x data/pg_variable.old | head |
||||
0000000 0058 2000 2000 2740 f240 0000 e400 0000 |
||||
0000020 d780 0000 cac0 0000 b840 0000 a900 0000 |
||||
0000040 9f80 0000 9280 0000 8580 0000 78c0 0000 |
||||
0000060 6c80 0000 6200 0000 5800 0000 4f80 0000 |
||||
0000100 44c0 0000 39c0 0000 2d80 0000 2600 0000 |
||||
0000120 1c00 0000 0e00 0000 0000 0000 0000 0000 |
||||
0000140 0000 0000 0000 0000 0000 0000 0000 0000 |
||||
* |
||||
0000700 0000 01bc 0000 009c 0014 ffff ffff 0000 |
||||
0000720 0004 5f0b 0000 0000 0000 0ba3 0000 0c23 |
||||
|
||||
|
||||
This is obviously broken too; the only bytes that should be non-zero in |
||||
pg_variable are the first 16 (see src/backend/access/transam.h:99). |
||||
|
||||
|
||||
realtime /usr/postgres95 > od -x data/pg_time.old | head -10 |
||||
0000000 0000 0000 0000 0000 0000 0000 0000 0000 |
||||
* |
||||
0004000 31c6 09ae 0000 0000 31c6 09b3 31a9 79a6 |
||||
0004020 31a9 79a6 31a9 79a8 31a9 79a9 31a9 79a9 |
||||
0004040 31a9 79a9 31a9 79aa 31a9 79aa 31a9 79aa |
||||
0004060 31a9 79aa 31a9 79aa 31a9 79ab 31a9 79ab |
||||
0004100 31a9 79ab 31a9 79ac 31a9 79ac 31a9 79ac |
||||
0004120 31a9 79ac 31a9 79ad 31a9 79ad 31a9 79ae |
||||
0004140 31a9 79ae 31a9 79af 31a9 79af 31a9 79af |
||||
0004160 31a9 79b0 31a9 79b0 31a9 79b4 31a9 79b4 |
||||
realtime /usr/postgres95 > od -x data/pg_time.old +32b | head -10 |
||||
0032000 6665 6520 666f 7220 31c5 cb61 696e 6720 |
||||
0032020 6465 7665 6c6f 7065 7273 2062 7579 2061 |
||||
0032040 206c 6f77 2d69 6e63 6f6d 6520 6170 6172 |
||||
0032060 746d 656e 7420 636f 6d70 6c65 7869 6e20 |
||||
0032100 7468 6520 7472 6561 7375 7265 7227 7320 |
||||
0032120 7761 7264 2066 726f 6d20 7468 6520 6665 |
||||
0032140 6465 7261 6c20 676f 7665 726e 6d65 6e74 |
||||
0032160 2066 6f72 2024 313b 2064 6576 656c 6f70 |
||||
0032200 6572 7361 6c73 6f20 31c5 cb7c 31be 91a2 |
||||
0032220 31be 91a2 31be 91a2 31be 91a2 31be 91a3 |
||||
|
||||
The first part of pg_time looked okay -- filled with 4-byte time values |
||||
with the occasional "00 00 00 00" here and there -- but inspection into |
||||
pg_time's guts revealed some preposterous numbers. |
||||
|
||||
For your comparing pleasure, I include copies of what these files |
||||
_should_ look like -- more or less -- from a newly initdb'd database |
||||
directory: |
||||
|
||||
realtime /usr/postgres95 > od -x data-new-initdb/pg_log |
||||
0000000 0000 0000 0000 0000 0000 0000 0000 0000 |
||||
* |
||||
0000200 8aaa aaaa aaaa aaa8 0000 0000 0000 0000 |
||||
0000220 0000 0000 0000 0000 0000 0000 0000 0000 |
||||
* |
||||
0020000 |
||||
realtime /usr/postgres95 > od -x data-new-initdb/pg_variable |
||||
0000000 0000 0000 0000 0222 0000 021e 0000 5120 |
||||
0000020 0000 0000 0000 0000 0000 0000 0000 0000 |
||||
* |
||||
0020000 |
||||
realtime /usr/postgres95 > od -x data-new-initdb/pg_time |
||||
0000000 0000 0000 0000 0000 0000 0000 0000 0000 |
||||
* |
||||
0004000 31e3 4fb3 0000 0000 31e3 4fb4 31e3 4fb4 |
||||
0004020 31e3 4fb4 31e3 4fb5 31e3 4fb5 31e3 4fb5 |
||||
0004040 31e3 4fb6 31e3 4fb6 31e3 4fb6 31e3 4fb6 |
||||
0004060 31e3 4fb7 31e3 4fb7 31e3 4fb7 31e3 4fb7 |
||||
0004100 31e3 4fb8 31e3 4fb8 31e3 4fb8 31e3 4fb9 |
||||
0004120 31e3 4fb9 31e3 4fb9 31e3 4fb9 31e3 4fba |
||||
0004140 31e3 4fba 31e3 4fba 31e3 4fbb 31e3 4fbb |
||||
0004160 31e3 4fbb 31e3 4fbb 31e3 4fbb 0000 0000 |
||||
0004200 0000 0000 0000 0000 0000 0000 0000 0000 |
||||
* |
||||
0020000 |
||||
|
||||
|
||||
What's more, if I view pg_log, pg_variable, and pg_time with a text |
||||
editor, I discover that the corrupted areas contain fragments of tuples |
||||
from the database! |
||||
---------- |
||||
|
||||
|
||||
Since I have been unable to reproduce this problem on demand (and |
||||
therefore have a complete picture of all of the queries executed on the |
||||
database), I've only been able to speculate on the original cause of the |
||||
problem. One explanation would be some kind of buffer manager bug; or |
||||
perhaps the backend is running low on file descriptors and bollixing |
||||
existing ones? I intend to continue tracking this down as I have the |
||||
time, but in the interim, I've created a quick Perl script that is |
||||
designed to restore some sense of rationality to the |
||||
pg_{log,time,variable} files. It's a horribly barbaric way to do this -- |
||||
at the very least, it almost certainly breaks time travel, and will |
||||
almost certainly result in duplicate oid problems -- but it restored my |
||||
pathological database here to a sufficiently working state such that I |
||||
could run pg_dump on it and do various other queries with it. The script |
||||
is included at the bottom of this message. |
||||
|
||||
Anyway, fellow posthackers, assuming that no one raises any red flags |
||||
about this, I'll post it on postgres95@oozoo.vnet.net as a temporary |
||||
stop-gap workaround to allow people to recover their databases when this |
||||
sort of problem occurs. |
||||
|
||||
I wish I had more time to do this kind of thing. |
||||
|
||||
|
||||
- Paul "Shag" Walmsley <ccshag@cclabs.missouri.edu> |
||||
"Knowing is not enough." -- Hal Hartley, "Surviving Desire" |
||||
|
||||
|
@ -1,93 +0,0 @@ |
||||
#!/bin/perl |
||||
# |
||||
# zap_ltv - attempt to restore a POSTGRES95 database afflicted with |
||||
# pg_log, pg_time, or pg_variable corruption to |
||||
# minimal functionality |
||||
# |
||||
# Paul Walmsley <ccshag@cclabs.missouri.edu> |
||||
# |
||||
# Legalese: |
||||
# |
||||
# In no event shall Paul Walmsley be liable to any party for direct, |
||||
# indirect, special, incidental, or consequential damages, including |
||||
# lost profits, arising from the use of this software, even if Paul Walmsley |
||||
# has been advised of the possibility of such damage. Paul Walmsley |
||||
# specifically disclaims any warranties, including, but not limited to, |
||||
# the implied warranties of merchantability and fitness for a particular |
||||
# purpose. The software provided hereunder is on an "as is" basis, |
||||
# and Paul Walmsley has no obligations to provide maintenance, support, |
||||
# updates, enhancements, or modifications. |
||||
# |
||||
# Thanks, Berkeley ;-) |
||||
|
||||
print "This program should only be run if POSTGRES95 is not currently\n"; |
||||
print "running on this system. It should also not be run unless you \n"; |
||||
print "are having seemingly unrecoverable problems with your POSTGRES95\n"; |
||||
print "database related to pg_log, pg_time, or pg_variable corruption.\n\n"; |
||||
print "This program replaces the existing pg_log, pg_time, and pg_variable\n"; |
||||
print "files with \"clean\" copies of those files. This will almost \n"; |
||||
print "certainly result in duplicate IDs when any INSERTs are attempted,\n"; |
||||
print "and probably has other side-effects as well. Back up your databases\n" |
||||
print "and re-initdb from scratch after using this!\n\n"; |
||||
print "This program will attempt to make a backup of your pg_time,\n"; |
||||
print "pg_log, and pg_variable files (to pg_time.backup, pg_log.backup,\n"; |
||||
print "and pg_variable.backup, respectively).\n\n"; |
||||
print "This program bears no guarantees nor any warranties whatsoever.\n"; |
||||
print "View the source for details.\n\n"; |
||||
print "Press ENTER to zap your pg_log, pg_time, and pg_variable files:"; |
||||
|
||||
$trash=<STDIN>; |
||||
|
||||
$pg_log_data=pack('xxxx@8192',0); |
||||
$pg_time_data=pack('xxxx@8192',0); |
||||
# next_tid, last_tid, and oid are pulled from a fresh initdb |
||||
$pg_variable_data=pack('xxxxxxCCxxCCxxCC@8192',2,34,2,30,81,32); |
||||
|
||||
if (length($pg_log_data)!=8192) { |
||||
die "pg_log_data must be exactly 8192 bytes long"; |
||||
} |
||||
if (length($pg_time_data)!=8192) { |
||||
die "pg_time_data must be exactly 8192 bytes long"; |
||||
} |
||||
if (length($pg_variable_data)!=8192) { |
||||
die "pg_variable_data must be exactly 8192 bytes long"; |
||||
} |
||||
|
||||
|
||||
if (! -f 'pg_database') { |
||||
die "This program must be run from your POSTGRES95 data directory."; |
||||
} |
||||
|
||||
system('cp pg_log pg_log.backup'); |
||||
open(PG_LOG,'>pg_log'); |
||||
binmode(PG_LOG); |
||||
$written=syswrite(PG_LOG,$pg_log_data,8192); |
||||
close(PG_LOG); |
||||
|
||||
if ($written!=8192) { |
||||
die "pg_log write failed: $!"; |
||||
} |
||||
|
||||
$written=0; |
||||
system('cp pg_time pg_time.backup'); |
||||
open(PG_TIME,'>pg_time'); |
||||
binmode(PG_TIME); |
||||
$written=syswrite(PG_TIME,$pg_time_data,8192); |
||||
close(PG_TIME); |
||||
|
||||
if ($written!=8192) { |
||||
die "pg_time write failed: $!"; |
||||
} |
||||
|
||||
$written=0; |
||||
system('cp pg_variable pg_variable.backup'); |
||||
open(PG_VARIABLE,'>pg_variable'); |
||||
binmode(PG_VARIABLE); |
||||
$written=syswrite(PG_VARIABLE,$pg_variable_data,8192); |
||||
close(PG_VARIABLE); |
||||
|
||||
if ($written!=8192) { |
||||
die "pg_variable write failed: $!"; |
||||
} |
||||
|
||||
print "Done.\n"; |
Loading…
Reference in new issue