PG-1710 Create helpers for decrypting/encrypting archived WAL

To support some common WAL archiving tools, e.g. PgBackRest, we
implement an archive_command and a restore_command which can wrap any
command and use pipe() to create fake file to either read from or write
to. The restore command makes sure to write encrypted files if WAL
encryption is enabled. It uses the fresh WAL key generated by the server
on the current start which works fine because we then just let the first
invocation of the restore command set the start LSN of the key.

For e.g. PgBackRest you would have the following commands:

  archive_command = 'pg_tde_archive_decrypt %p pgbackrest --stanza=demo archive-push %p'
  restore_command = 'pg_tde_restore_encrypt %f %p pgbackrest --stanza=demo archive-get %f "%p"'
pull/238/head
Andreas Karlsson 2 months ago committed by Andreas Karlsson
parent 80d515b322
commit f6f1ae33b1
  1. 2
      contrib/pg_tde/.gitignore
  2. 22
      contrib/pg_tde/Makefile
  3. 29
      contrib/pg_tde/meson.build
  4. 15
      contrib/pg_tde/src/access/pg_tde_xlog_smgr.c
  5. 228
      contrib/pg_tde/src/bin/pg_tde_archive_decrypt.c
  6. 220
      contrib/pg_tde/src/bin/pg_tde_restore_encrypt.c
  7. 1
      contrib/pg_tde/src/include/access/pg_tde_xlog_smgr.h
  8. 101
      contrib/pg_tde/t/wal_archiving.pl

@ -11,7 +11,9 @@ __pycache__
/configure~
/log
/results
/src/bin/pg_tde_archive_decrypt
/src/bin/pg_tde_change_key_provider
/src/bin/pg_tde_restore_encrypt
/t/results
/tmp_check

@ -51,10 +51,16 @@ src/libkmip/libkmip/src/kmip_bio.o \
src/libkmip/libkmip/src/kmip_locate.o \
src/libkmip/libkmip/src/kmip_memset.o
SCRIPTS_built = src/bin/pg_tde_change_key_provider
SCRIPTS_built = src/bin/pg_tde_archive_decrypt \
src/bin/pg_tde_change_key_provider \
src/bin/pg_tde_restore_encrypt
EXTRA_INSTALL += contrib/pg_buffercache contrib/test_decoding
EXTRA_CLEAN += src/bin/pg_tde_change_key_provider.o
EXTRA_CLEAN += src/bin/pg_tde_archive_decrypt.o \
src/bin/pg_tde_change_key_provider.o \
src/bin/pg_tde_restore_encrypt.o \
xlogreader.c \
xlogreader.o
ifdef USE_PGXS
PG_CONFIG = pg_config
@ -74,6 +80,18 @@ override SHLIB_LINK += -lcurl -lcrypto -lssl
src/bin/pg_tde_change_key_provider: src/bin/pg_tde_change_key_provider.o $(top_srcdir)/src/fe_utils/simple_list.o $(top_builddir)/src/libtde/libtde.a
$(CC) -DFRONTEND $(CFLAGS) $^ $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) -o $@$(X)
src/bin/pg_tde_archive_decrypt: src/bin/pg_tde_archive_decrypt.o xlogreader.o $(top_srcdir)/src/fe_utils/simple_list.o $(top_builddir)/src/libtde/libtdexlog.a $(top_builddir)/src/libtde/libtde.a
$(CC) -DFRONTEND $(CFLAGS) $^ $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) -o $@$(X)
src/bin/pg_tde_restore_encrypt: src/bin/pg_tde_restore_encrypt.o xlogreader.o $(top_srcdir)/src/fe_utils/simple_list.o $(top_builddir)/src/libtde/libtdexlog.a $(top_builddir)/src/libtde/libtde.a
$(CC) -DFRONTEND $(CFLAGS) $^ $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) -o $@$(X)
xlogreader.c: % : $(top_srcdir)/src/backend/access/transam/%
rm -f $@ && $(LN_S) $< .
xlogreader.o: xlogreader.c
$(CC) $(CPPFLAGS) -DFRONTEND -c $< -o $@
# Fetches typedefs list for PostgreSQL core and merges it with typedefs defined in this project.
# https://wiki.postgresql.org/wiki/Running_pgindent_on_non-core_code_or_development_code
update-typedefs:

@ -122,6 +122,7 @@ tap_tests = [
't/rotate_key.pl',
't/tde_heap.pl',
't/unlogged_tables.pl',
't/wal_archiving.pl',
't/wal_encrypt.pl',
]
@ -163,3 +164,31 @@ pg_tde_change_key_provider = executable('pg_tde_change_key_provider',
link_with: [pg_tde_frontend]
)
contrib_targets += pg_tde_change_key_provider
pg_tde_archive_decrypt_sources = files(
'src/bin/pg_tde_archive_decrypt.c',
) + xlogreader_sources
pg_tde_archive_decrypt = executable('pg_tde_archive_decrypt',
pg_tde_archive_decrypt_sources,
dependencies: [frontend_code],
c_args: ['-DFRONTEND'], # needed for xlogreader et al
kwargs: default_bin_args,
include_directories: [postgres_inc, pg_tde_inc],
link_with: [pg_tde_frontend]
)
contrib_targets += pg_tde_archive_decrypt
pg_tde_restore_encrypt_sources = files(
'src/bin/pg_tde_restore_encrypt.c',
) + xlogreader_sources
pg_tde_restore_encrypt = executable('pg_tde_restore_encrypt',
pg_tde_restore_encrypt_sources,
dependencies: [frontend_code],
c_args: ['-DFRONTEND'], # needed for xlogreader et al
kwargs: default_bin_args,
include_directories: [postgres_inc, pg_tde_inc],
link_with: [pg_tde_frontend]
)
contrib_targets += pg_tde_restore_encrypt

@ -229,6 +229,21 @@ TDEXLogSmgrInitWrite(bool encrypt_xlog)
pg_tde_set_db_file_path(GLOBAL_SPACE_RLOCATOR(XLOG_TDE_OID).dbOid, EncryptionState->db_map_path);
}
void
TDEXLogSmgrInitWriteReuseKey()
{
InternalKey *key = pg_tde_read_last_wal_key();
if (key)
{
EncryptionKey = *key;
TDEXLogSetEncKeyLsn(EncryptionKey.start_lsn);
pfree(key);
}
pg_tde_set_db_file_path(GLOBAL_SPACE_RLOCATOR(XLOG_TDE_OID).dbOid, EncryptionState->db_map_path);
}
/*
* Encrypt XLog page(s) from the buf and write to the segment file.
*/

@ -0,0 +1,228 @@
#include "postgres_fe.h"
#include <fcntl.h>
#include <sys/wait.h>
#include <unistd.h>
#include "access/xlog_internal.h"
#include "access/xlog_smgr.h"
#include "common/logging.h"
#include "access/pg_tde_fe_init.h"
#include "access/pg_tde_xlog_smgr.h"
static bool
is_segment(const char *filename)
{
return strspn(filename, "0123456789ABCDEF") == 24 && filename[24] == '\0';
}
static void
write_decrypted_segment(const char *segpath, const char *segname, int pipewr)
{
int fd;
off_t fsize;
int r;
int w;
TimeLineID tli;
XLogSegNo segno;
PGAlignedXLogBlock buf;
off_t pos = 0;
fd = open(segpath, O_RDONLY | PG_BINARY, 0);
if (fd < 0)
pg_fatal("could not open file \"%s\": %m", segname);
/*
* WalSegSz extracted from the first page header but it might be
* encrypted. But we need to know the segment seize to decrypt it (it's
* required for encryption offset calculations). So we get the segment
* size from the file's actual size. XLogLongPageHeaderData->xlp_seg_size
* there is "just as a cross-check" anyway.
*/
fsize = lseek(fd, 0, SEEK_END);
XLogFromFileName(segname, &tli, &segno, fsize);
r = xlog_smgr->seg_read(fd, buf.data, XLOG_BLCKSZ, pos, tli, segno, fsize);
if (r == XLOG_BLCKSZ)
{
XLogLongPageHeader longhdr = (XLogLongPageHeader) buf.data;
int walsegsz = longhdr->xlp_seg_size;
if (walsegsz != fsize)
pg_fatal("mismatch of segment size in WAL file \"%s\" (header: %d bytes, file size: %ld bytes)",
segname, walsegsz, fsize);
if (!IsValidWalSegSize(walsegsz))
{
pg_log_error(ngettext("invalid WAL segment size in WAL file \"%s\" (%d byte)",
"invalid WAL segment size in WAL file \"%s\" (%d bytes)",
walsegsz),
segname, walsegsz);
pg_log_error_detail("The WAL segment size must be a power of two between 1 MB and 1 GB.");
exit(1);
}
}
else if (r < 0)
pg_fatal("could not read file \"%s\": %m",
segpath);
else
pg_fatal("could not read file \"%s\": read %d of %d",
segpath, r, XLOG_BLCKSZ);
pos += r;
w = write(pipewr, buf.data, XLOG_BLCKSZ);
if (w < 0)
pg_fatal("could not write to pipe: %m");
else if (w != r)
pg_fatal("could not write to pipe: wrote %d of %d", w, r);
while (1)
{
r = xlog_smgr->seg_read(fd, buf.data, XLOG_BLCKSZ, pos, tli, segno, fsize);
if (r == 0)
break;
else if (r < 0)
pg_fatal("could not read file \"%s\": %m", segpath);
pos += r;
w = write(pipewr, buf.data, r);
if (w < 0)
pg_fatal("could not write to pipe: %m");
else if (w != r)
pg_fatal("could not write to pipe: wrote %d of %d", w, r);
}
close(fd);
}
static void
usage(const char *progname)
{
printf(_("%s wraps an archive command to make it archive unencrypted WAL.\n\n"), progname);
printf(_("Usage:\n %s %%p <archive command>\n\n"), progname);
printf(_("Options:\n"));
printf(_(" -V, --version output version information, then exit\n"));
printf(_(" -?, --help show this help, then exit\n"));
}
int
main(int argc, char *argv[])
{
const char *progname;
char *sourcepath;
char *sep;
char *sourcename;
char stdindir[MAXPGPATH] = "/tmp/pg_tde_archiveXXXXXX";
char stdinpath[MAXPGPATH];
bool issegment;
int pipefd[2];
pid_t child;
int status;
int r;
pg_logging_init(argv[0]);
progname = get_progname(argv[0]);
if (argc > 1)
{
if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
{
usage(progname);
exit(0);
}
if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
{
puts("pg_tde_archive_deceypt (PostgreSQL) " PG_VERSION);
exit(0);
}
}
if (argc < 3)
{
pg_log_error("too few arguments");
pg_log_error_detail("Try \"%s --help\" for more information.", progname);
exit(1);
}
sourcepath = argv[1];
pg_tde_fe_init("pg_tde");
TDEXLogSmgrInit();
sep = strrchr(sourcepath, '/');
if (sep != NULL)
sourcename = sep + 1;
else
sourcename = sourcepath;
issegment = is_segment(sourcename);
if (issegment)
{
char *s;
if (mkdtemp(stdindir) == NULL)
pg_fatal("could not create temporary directory \"%s\": %m", stdindir);
s = stpcpy(stdinpath, stdindir);
s = stpcpy(s, "/");
stpcpy(s, sourcename);
if (pipe(pipefd) < 0)
pg_fatal("could not create pipe: %m");
if (symlink("/dev/stdin", stdinpath) < 0)
pg_fatal("could not create symlink \"%s\": %m", stdinpath);
for (int i = 2; i < argc; i++)
if (strcmp(sourcepath, argv[i]) == 0)
argv[i] = stdinpath;
}
child = fork();
if (child == 0)
{
if (issegment)
{
close(0);
dup2(pipefd[0], 0);
close(pipefd[0]);
close(pipefd[1]);
}
if (execvp(argv[2], argv + 2) < 0)
pg_fatal("exec failed: %m");
}
else if (child < 0)
pg_fatal("could not create background process: %m");
if (issegment)
{
close(pipefd[0]);
write_decrypted_segment(sourcepath, sourcename, pipefd[1]);
close(pipefd[1]);
}
r = waitpid(child, &status, 0);
if (r == (pid_t) -1)
pg_fatal("could not wait for child process: %m");
if (r != child)
pg_fatal("child %d died, expected %d", (int) r, (int) child);
if (status != 0)
pg_fatal("%s", wait_result_to_str(status));
if (issegment && unlink(stdinpath) < 0)
pg_log_warning("could not remove symlink \"%s\": %m", stdinpath);
if (issegment && rmdir(stdindir) < 0)
pg_log_warning("could not remove directory \"%s\": %m", stdindir);
return 0;
}

@ -0,0 +1,220 @@
#include "postgres_fe.h"
#include <fcntl.h>
#include <sys/wait.h>
#include <unistd.h>
#include "access/xlog_internal.h"
#include "access/xlog_smgr.h"
#include "common/logging.h"
#include "access/pg_tde_fe_init.h"
#include "access/pg_tde_xlog_smgr.h"
static bool
is_segment(const char *filename)
{
return strspn(filename, "0123456789ABCDEF") == 24 && filename[24] == '\0';
}
static void
write_encrypted_segment(const char *segpath, const char *segname, int piperd)
{
int fd;
PGAlignedXLogBlock buf;
int r;
int w;
int pos = 0;
XLogLongPageHeader longhdr;
int walsegsz;
TimeLineID tli;
XLogSegNo segno;
fd = open(segpath, O_CREAT | O_WRONLY | PG_BINARY, 0666);
if (fd < 0)
pg_fatal("could not open file \"%s\": %m", segpath);
r = read(piperd, buf.data, XLOG_BLCKSZ);
if (r < 0)
pg_fatal("could not read from pipe: %m");
else if (r != XLOG_BLCKSZ)
pg_fatal("could not read from pipe: read %d of %d",
r, XLOG_BLCKSZ);
longhdr = (XLogLongPageHeader) buf.data;
walsegsz = longhdr->xlp_seg_size;
if (!IsValidWalSegSize(walsegsz))
{
pg_log_error(ngettext("invalid WAL segment size in WAL file \"%s\" (%d byte)",
"invalid WAL segment size in WAL file \"%s\" (%d bytes)",
walsegsz),
segname, walsegsz);
pg_log_error_detail("The WAL segment size must be a power of two between 1 MB and 1 GB.");
exit(1);
}
XLogFromFileName(segname, &tli, &segno, walsegsz);
TDEXLogSmgrInitWriteReuseKey();
w = xlog_smgr->seg_write(fd, buf.data, r, pos, tli, segno, walsegsz);
if (w < 0)
pg_fatal("could not write file \"%s\": %m", segpath);
else if (w != r)
pg_fatal("could not write file \"%s\": wrote %d of %d",
segpath, w, r);
pos += w;
while (1)
{
r = read(piperd, buf.data, XLOG_BLCKSZ);
if (r == 0)
break;
else if (r < 0)
pg_fatal("could not read from pipe: %m");
w = xlog_smgr->seg_write(fd, buf.data, r, pos, tli, segno, walsegsz);
if (w < 0)
pg_fatal("could not write file \"%s\": %m", segpath);
else if (w != r)
pg_fatal("could not write file \"%s\": wrote %d of %d",
segpath, w, r);
pos += w;
}
close(fd);
}
static void
usage(const char *progname)
{
printf(_("%s wraps a restore command to make it write encrypted WAL to pg_wal.\n\n"), progname);
printf(_("Usage:\n %s %%f %%p <restore command>\n\n"), progname);
printf(_("Options:\n"));
printf(_(" -V, --version output version information, then exit\n"));
printf(_(" -?, --help show this help, then exit\n"));
}
int
main(int argc, char *argv[])
{
const char *progname;
char *sourcename;
char *targetpath;
char *sep;
char *targetname;
char stdoutdir[MAXPGPATH] = "/tmp/pg_tde_restoreXXXXXX";
char stdoutpath[MAXPGPATH];
bool issegment;
int pipefd[2];
pid_t child;
int status;
int r;
pg_logging_init(argv[0]);
progname = get_progname(argv[0]);
if (argc > 1)
{
if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
{
usage(progname);
exit(0);
}
if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
{
puts("pg_tde_restore_encrypt (PostgreSQL) " PG_VERSION);
exit(0);
}
}
if (argc < 4)
{
pg_log_error("too few arguments");
pg_log_error_detail("Try \"%s --help\" for more information.", progname);
exit(1);
}
sourcename = argv[1];
targetpath = argv[2];
pg_tde_fe_init("pg_tde");
TDEXLogSmgrInit();
sep = strrchr(targetpath, '/');
if (sep != NULL)
targetname = sep + 1;
else
targetname = targetpath;
issegment = is_segment(sourcename);
if (issegment)
{
char *s;
if (mkdtemp(stdoutdir) == NULL)
pg_fatal("could not create temporary directory \"%s\": %m", stdoutdir);
s = stpcpy(stdoutpath, stdoutdir);
s = stpcpy(s, "/");
stpcpy(s, targetname);
if (pipe(pipefd) < 0)
pg_fatal("could not create pipe: %m");
if (symlink("/dev/stdout", stdoutpath) < 0)
pg_fatal("could not create symlink \"%s\": %m", stdoutpath);
for (int i = 2; i < argc; i++)
if (strcmp(targetpath, argv[i]) == 0)
argv[i] = stdoutpath;
}
child = fork();
if (child == 0)
{
if (issegment)
{
close(1);
dup2(pipefd[1], 1);
close(pipefd[0]);
close(pipefd[1]);
}
if (execvp(argv[3], argv + 3) < 0)
pg_fatal("exec failed: %m");
}
else if (child < 0)
pg_fatal("could not create background process: %m");
if (issegment)
{
close(pipefd[1]);
write_encrypted_segment(targetpath, sourcename, pipefd[0]);
close(pipefd[0]);
}
r = waitpid(child, &status, 0);
if (r == (pid_t) -1)
pg_fatal("could not wait for child process: %m");
if (r != child)
pg_fatal("child %d died, expected %d", (int) r, (int) child);
if (status != 0)
pg_fatal("%s", wait_result_to_str(status));
if (issegment && unlink(stdoutpath) < 0)
pg_log_warning("could not remove symlink \"%s\": %m", stdoutpath);
if (issegment && rmdir(stdoutdir) < 0)
pg_log_warning("could not remove directory \"%s\": %m", stdoutdir);
return 0;
}

@ -11,5 +11,6 @@ extern Size TDEXLogEncryptStateSize(void);
extern void TDEXLogShmemInit(void);
extern void TDEXLogSmgrInit(void);
extern void TDEXLogSmgrInitWrite(bool encrypt_xlog);
extern void TDEXLogSmgrInitWriteReuseKey(void);
#endif /* PG_TDE_XLOGSMGR_H */

@ -0,0 +1,101 @@
#!/usr/bin/perl
use strict;
use warnings;
use File::Basename;
use Test::More;
use lib 't';
use pgtde;
unlink('/tmp/wal_archiving.per');
# Test archive_command
my $primary = PostgreSQL::Test::Cluster->new('primary');
my $archive_dir = $primary->archive_dir;
$primary->init(allows_streaming => 1);
$primary->append_conf('postgresql.conf',
"shared_preload_libraries = 'pg_tde'");
$primary->append_conf('postgresql.conf', "wal_level = 'replica'");
$primary->append_conf('postgresql.conf', "autovacuum = off");
$primary->append_conf('postgresql.conf', "checkpoint_timeout = 1h");
$primary->append_conf('postgresql.conf', "archive_mode = on");
$primary->append_conf('postgresql.conf',
"archive_command = 'pg_tde_archive_decrypt %p cp %p $archive_dir/%f'");
$primary->start;
$primary->safe_psql('postgres', "CREATE EXTENSION pg_tde;");
$primary->safe_psql('postgres',
"SELECT pg_tde_add_global_key_provider_file('keyring', '/tmp/wal_archiving.per');"
);
$primary->safe_psql('postgres',
"SELECT pg_tde_create_key_using_global_key_provider('server-key', 'keyring');"
);
$primary->safe_psql('postgres',
"SELECT pg_tde_set_server_key_using_global_key_provider('server-key', 'keyring');"
);
$primary->append_conf('postgresql.conf', "pg_tde.wal_encrypt = on");
$primary->backup('backup', backup_options => [ '-X', 'none' ]);
$primary->safe_psql('postgres',
"CREATE TABLE t1 AS SELECT 'foobar_plain' AS x");
$primary->restart;
$primary->safe_psql('postgres',
"CREATE TABLE t2 AS SELECT 'foobar_enc' AS x");
my $data_dir = $primary->data_dir;
like(
`strings $data_dir/pg_wal/0000000100000000000000??`,
qr/foobar_plain/,
'should find foobar_plain in WAL');
unlike(
`strings $data_dir/pg_wal/0000000100000000000000??`,
qr/foobar_enc/,
'should not find foobar_enc in WAL');
$primary->stop;
like(
`strings $archive_dir/0000000100000000000000??`,
qr/foobar_plain/,
'should find foobar_plain in archive');
like(
`strings $archive_dir/0000000100000000000000??`,
qr/foobar_enc/,
'should find foobar_enc in archive');
# Test restore_command
my $replica = PostgreSQL::Test::Cluster->new('replica');
$replica->init_from_backup($primary, 'backup');
$replica->append_conf('postgresql.conf',
"restore_command = 'pg_tde_restore_encrypt %f %p cp $archive_dir/%f %p'");
$replica->append_conf('postgresql.conf', "recovery_target_action = promote");
$replica->set_recovery_mode;
$replica->start;
$data_dir = $replica->data_dir;
unlike(
`strings $data_dir/pg_wal/0000000100000000000000??`,
qr/foobar_plain/,
'should not find foobar_plain in WAL since it is encrypted');
unlike(
`strings $data_dir/pg_wal/0000000100000000000000??`,
qr/foobar_enc/,
'should not find foobar_enc in WAL since it is encrypted');
my $result = $replica->safe_psql('postgres',
'SELECT * FROM t1 UNION ALL SELECT * FROM t2');
is($result, "foobar_plain\nfoobar_enc", 'b');
$replica->stop;
done_testing();
Loading…
Cancel
Save