mirror of https://github.com/postgres/postgres
parent
7689301f3f
commit
ce431e03d1
@ -0,0 +1,911 @@ |
|||||||
|
From 5ffbc7c35bb3248501b2517d26f99afe02fb53d6 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Matthias van de Meent <boekewurm+postgres@gmail.com>
|
||||||
|
Date: Tue, 27 Jun 2023 15:59:23 +0200
|
||||||
|
Subject: [PATCH v1 1/5] Expose f_smgr to extensions for manual implementation
|
||||||
|
|
||||||
|
There are various reasons why one would want to create their own
|
||||||
|
implementation of a storage manager, among which are block-level compression,
|
||||||
|
encryption and offloading to cold storage. This patch is a first patch that
|
||||||
|
allows extensions to register their own SMgr.
|
||||||
|
|
||||||
|
Note, however, that this SMgr is not yet used - only the first SMgr to register
|
||||||
|
is used, and this is currently the md.c smgr. Future commits will include
|
||||||
|
facilities to select an SMgr for each tablespace.
|
||||||
|
---
|
||||||
|
src/backend/postmaster/postmaster.c | 5 +
|
||||||
|
src/backend/storage/smgr/md.c | 172 +++++++++++++++++++---------
|
||||||
|
src/backend/storage/smgr/smgr.c | 129 ++++++++++-----------
|
||||||
|
src/backend/utils/init/miscinit.c | 13 +++
|
||||||
|
src/include/miscadmin.h | 1 +
|
||||||
|
src/include/storage/md.h | 4 +
|
||||||
|
src/include/storage/smgr.h | 59 ++++++++--
|
||||||
|
7 files changed, 252 insertions(+), 131 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
|
||||||
|
index feb471dd1d..a0e46fe1f2 100644
|
||||||
|
--- a/src/backend/postmaster/postmaster.c
|
||||||
|
+++ b/src/backend/postmaster/postmaster.c
|
||||||
|
@@ -1010,6 +1010,11 @@ PostmasterMain(int argc, char *argv[])
|
||||||
|
*/
|
||||||
|
ApplyLauncherRegister();
|
||||||
|
|
||||||
|
+ /*
|
||||||
|
+ * Register built-in managers that are not part of static arrays
|
||||||
|
+ */
|
||||||
|
+ register_builtin_dynamic_managers();
|
||||||
|
+
|
||||||
|
/*
|
||||||
|
* process any libraries that should be preloaded at postmaster start
|
||||||
|
*/
|
||||||
|
diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c
|
||||||
|
index b1e9932a29..66a93101ab 100644
|
||||||
|
--- a/src/backend/storage/smgr/md.c
|
||||||
|
+++ b/src/backend/storage/smgr/md.c
|
||||||
|
@@ -87,6 +87,21 @@ typedef struct _MdfdVec
|
||||||
|
} MdfdVec;
|
||||||
|
|
||||||
|
static MemoryContext MdCxt; /* context for all MdfdVec objects */
|
||||||
|
+SMgrId MdSMgrId;
|
||||||
|
+
|
||||||
|
+typedef struct MdSMgrRelationData
|
||||||
|
+{
|
||||||
|
+ /* parent data */
|
||||||
|
+ SMgrRelationData reln;
|
||||||
|
+ /*
|
||||||
|
+ * for md.c; per-fork arrays of the number of open segments
|
||||||
|
+ * (md_num_open_segs) and the segments themselves (md_seg_fds).
|
||||||
|
+ */
|
||||||
|
+ int md_num_open_segs[MAX_FORKNUM + 1];
|
||||||
|
+ struct _MdfdVec *md_seg_fds[MAX_FORKNUM + 1];
|
||||||
|
+} MdSMgrRelationData;
|
||||||
|
+
|
||||||
|
+typedef MdSMgrRelationData *MdSMgrRelation;
|
||||||
|
|
||||||
|
|
||||||
|
/* Populate a file tag describing an md.c segment file. */
|
||||||
|
@@ -121,26 +136,52 @@ static MemoryContext MdCxt; /* context for all MdfdVec objects */
|
||||||
|
#define EXTENSION_DONT_OPEN (1 << 5)
|
||||||
|
|
||||||
|
|
||||||
|
+void mdsmgr_register(void)
|
||||||
|
+{
|
||||||
|
+ /* magnetic disk */
|
||||||
|
+ f_smgr md_smgr = (f_smgr) {
|
||||||
|
+ .name = "md",
|
||||||
|
+ .smgr_init = mdinit,
|
||||||
|
+ .smgr_shutdown = NULL,
|
||||||
|
+ .smgr_open = mdopen,
|
||||||
|
+ .smgr_close = mdclose,
|
||||||
|
+ .smgr_create = mdcreate,
|
||||||
|
+ .smgr_exists = mdexists,
|
||||||
|
+ .smgr_unlink = mdunlink,
|
||||||
|
+ .smgr_extend = mdextend,
|
||||||
|
+ .smgr_zeroextend = mdzeroextend,
|
||||||
|
+ .smgr_prefetch = mdprefetch,
|
||||||
|
+ .smgr_readv = mdreadv,
|
||||||
|
+ .smgr_writev = mdwritev,
|
||||||
|
+ .smgr_writeback = mdwriteback,
|
||||||
|
+ .smgr_nblocks = mdnblocks,
|
||||||
|
+ .smgr_truncate = mdtruncate,
|
||||||
|
+ .smgr_immedsync = mdimmedsync,
|
||||||
|
+ };
|
||||||
|
+
|
||||||
|
+ MdSMgrId = smgr_register(&md_smgr, sizeof(MdSMgrRelationData));
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
/* local routines */
|
||||||
|
static void mdunlinkfork(RelFileLocatorBackend rlocator, ForkNumber forknum,
|
||||||
|
bool isRedo);
|
||||||
|
-static MdfdVec *mdopenfork(SMgrRelation reln, ForkNumber forknum, int behavior);
|
||||||
|
-static void register_dirty_segment(SMgrRelation reln, ForkNumber forknum,
|
||||||
|
+static MdfdVec *mdopenfork(MdSMgrRelation reln, ForkNumber forknum, int behavior);
|
||||||
|
+static void register_dirty_segment(MdSMgrRelation reln, ForkNumber forknum,
|
||||||
|
MdfdVec *seg);
|
||||||
|
static void register_unlink_segment(RelFileLocatorBackend rlocator, ForkNumber forknum,
|
||||||
|
BlockNumber segno);
|
||||||
|
static void register_forget_request(RelFileLocatorBackend rlocator, ForkNumber forknum,
|
||||||
|
BlockNumber segno);
|
||||||
|
-static void _fdvec_resize(SMgrRelation reln,
|
||||||
|
+static void _fdvec_resize(MdSMgrRelation reln,
|
||||||
|
ForkNumber forknum,
|
||||||
|
int nseg);
|
||||||
|
-static char *_mdfd_segpath(SMgrRelation reln, ForkNumber forknum,
|
||||||
|
+static char *_mdfd_segpath(MdSMgrRelation reln, ForkNumber forknum,
|
||||||
|
BlockNumber segno);
|
||||||
|
-static MdfdVec *_mdfd_openseg(SMgrRelation reln, ForkNumber forknum,
|
||||||
|
+static MdfdVec *_mdfd_openseg(MdSMgrRelation reln, ForkNumber forknum,
|
||||||
|
BlockNumber segno, int oflags);
|
||||||
|
-static MdfdVec *_mdfd_getseg(SMgrRelation reln, ForkNumber forknum,
|
||||||
|
+static MdfdVec *_mdfd_getseg(MdSMgrRelation reln, ForkNumber forknum,
|
||||||
|
BlockNumber blkno, bool skipFsync, int behavior);
|
||||||
|
-static BlockNumber _mdnblocks(SMgrRelation reln, ForkNumber forknum,
|
||||||
|
+static BlockNumber _mdnblocks(MdSMgrRelation reln, ForkNumber forknum,
|
||||||
|
MdfdVec *seg);
|
||||||
|
|
||||||
|
static inline int
|
||||||
|
@@ -173,6 +214,8 @@ mdinit(void)
|
||||||
|
bool
|
||||||
|
mdexists(SMgrRelation reln, ForkNumber forknum)
|
||||||
|
{
|
||||||
|
+ MdSMgrRelation mdreln = (MdSMgrRelation) reln;
|
||||||
|
+
|
||||||
|
/*
|
||||||
|
* Close it first, to ensure that we notice if the fork has been unlinked
|
||||||
|
* since we opened it. As an optimization, we can skip that in recovery,
|
||||||
|
@@ -181,7 +224,7 @@ mdexists(SMgrRelation reln, ForkNumber forknum)
|
||||||
|
if (!InRecovery)
|
||||||
|
mdclose(reln, forknum);
|
||||||
|
|
||||||
|
- return (mdopenfork(reln, forknum, EXTENSION_RETURN_NULL) != NULL);
|
||||||
|
+ return (mdopenfork(mdreln, forknum, EXTENSION_RETURN_NULL) != NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
@@ -195,11 +238,13 @@ mdcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo)
|
||||||
|
MdfdVec *mdfd;
|
||||||
|
char *path;
|
||||||
|
File fd;
|
||||||
|
+ MdSMgrRelation mdreln = (MdSMgrRelation) reln;
|
||||||
|
+ // Assert(reln->smgr_which == MdSMgrId);
|
||||||
|
|
||||||
|
- if (isRedo && reln->md_num_open_segs[forknum] > 0)
|
||||||
|
+ if (isRedo && mdreln->md_num_open_segs[forknum] > 0)
|
||||||
|
return; /* created and opened already... */
|
||||||
|
|
||||||
|
- Assert(reln->md_num_open_segs[forknum] == 0);
|
||||||
|
+ Assert(mdreln->md_num_open_segs[forknum] == 0);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We may be using the target table space for the first time in this
|
||||||
|
@@ -236,13 +281,13 @@ mdcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo)
|
||||||
|
|
||||||
|
pfree(path);
|
||||||
|
|
||||||
|
- _fdvec_resize(reln, forknum, 1);
|
||||||
|
- mdfd = &reln->md_seg_fds[forknum][0];
|
||||||
|
+ _fdvec_resize(mdreln, forknum, 1);
|
||||||
|
+ mdfd = &mdreln->md_seg_fds[forknum][0];
|
||||||
|
mdfd->mdfd_vfd = fd;
|
||||||
|
mdfd->mdfd_segno = 0;
|
||||||
|
|
||||||
|
if (!SmgrIsTemp(reln))
|
||||||
|
- register_dirty_segment(reln, forknum, mdfd);
|
||||||
|
+ register_dirty_segment(mdreln, forknum, mdfd);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
@@ -466,6 +511,7 @@ mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
|
||||||
|
off_t seekpos;
|
||||||
|
int nbytes;
|
||||||
|
MdfdVec *v;
|
||||||
|
+ MdSMgrRelation mdreln = (MdSMgrRelation) reln;
|
||||||
|
|
||||||
|
/* If this build supports direct I/O, the buffer must be I/O aligned. */
|
||||||
|
if (PG_O_DIRECT != 0 && PG_IO_ALIGN_SIZE <= BLCKSZ)
|
||||||
|
@@ -489,7 +535,7 @@ mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
|
||||||
|
relpath(reln->smgr_rlocator, forknum),
|
||||||
|
InvalidBlockNumber)));
|
||||||
|
|
||||||
|
- v = _mdfd_getseg(reln, forknum, blocknum, skipFsync, EXTENSION_CREATE);
|
||||||
|
+ v = _mdfd_getseg(mdreln, forknum, blocknum, skipFsync, EXTENSION_CREATE);
|
||||||
|
|
||||||
|
seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
|
||||||
|
|
||||||
|
@@ -513,9 +559,9 @@ mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!skipFsync && !SmgrIsTemp(reln))
|
||||||
|
- register_dirty_segment(reln, forknum, v);
|
||||||
|
+ register_dirty_segment(mdreln, forknum, v);
|
||||||
|
|
||||||
|
- Assert(_mdnblocks(reln, forknum, v) <= ((BlockNumber) RELSEG_SIZE));
|
||||||
|
+ Assert(_mdnblocks(mdreln, forknum, v) <= ((BlockNumber) RELSEG_SIZE));
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
@@ -531,6 +577,7 @@ mdzeroextend(SMgrRelation reln, ForkNumber forknum,
|
||||||
|
MdfdVec *v;
|
||||||
|
BlockNumber curblocknum = blocknum;
|
||||||
|
int remblocks = nblocks;
|
||||||
|
+ MdSMgrRelation mdreln = (MdSMgrRelation) reln;
|
||||||
|
|
||||||
|
Assert(nblocks > 0);
|
||||||
|
|
||||||
|
@@ -562,7 +609,7 @@ mdzeroextend(SMgrRelation reln, ForkNumber forknum,
|
||||||
|
else
|
||||||
|
numblocks = remblocks;
|
||||||
|
|
||||||
|
- v = _mdfd_getseg(reln, forknum, curblocknum, skipFsync, EXTENSION_CREATE);
|
||||||
|
+ v = _mdfd_getseg(mdreln, forknum, curblocknum, skipFsync, EXTENSION_CREATE);
|
||||||
|
|
||||||
|
Assert(segstartblock < RELSEG_SIZE);
|
||||||
|
Assert(segstartblock + numblocks <= RELSEG_SIZE);
|
||||||
|
@@ -617,9 +664,9 @@ mdzeroextend(SMgrRelation reln, ForkNumber forknum,
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!skipFsync && !SmgrIsTemp(reln))
|
||||||
|
- register_dirty_segment(reln, forknum, v);
|
||||||
|
+ register_dirty_segment(mdreln, forknum, v);
|
||||||
|
|
||||||
|
- Assert(_mdnblocks(reln, forknum, v) <= ((BlockNumber) RELSEG_SIZE));
|
||||||
|
+ Assert(_mdnblocks(mdreln, forknum, v) <= ((BlockNumber) RELSEG_SIZE));
|
||||||
|
|
||||||
|
remblocks -= numblocks;
|
||||||
|
curblocknum += numblocks;
|
||||||
|
@@ -637,7 +684,7 @@ mdzeroextend(SMgrRelation reln, ForkNumber forknum,
|
||||||
|
* invent one out of whole cloth.
|
||||||
|
*/
|
||||||
|
static MdfdVec *
|
||||||
|
-mdopenfork(SMgrRelation reln, ForkNumber forknum, int behavior)
|
||||||
|
+mdopenfork(MdSMgrRelation reln, ForkNumber forknum, int behavior)
|
||||||
|
{
|
||||||
|
MdfdVec *mdfd;
|
||||||
|
char *path;
|
||||||
|
@@ -647,7 +694,7 @@ mdopenfork(SMgrRelation reln, ForkNumber forknum, int behavior)
|
||||||
|
if (reln->md_num_open_segs[forknum] > 0)
|
||||||
|
return &reln->md_seg_fds[forknum][0];
|
||||||
|
|
||||||
|
- path = relpath(reln->smgr_rlocator, forknum);
|
||||||
|
+ path = relpath(reln->reln.smgr_rlocator, forknum);
|
||||||
|
|
||||||
|
fd = PathNameOpenFile(path, _mdfd_open_flags());
|
||||||
|
|
||||||
|
@@ -682,9 +729,10 @@ mdopenfork(SMgrRelation reln, ForkNumber forknum, int behavior)
|
||||||
|
void
|
||||||
|
mdopen(SMgrRelation reln)
|
||||||
|
{
|
||||||
|
+ MdSMgrRelation mdreln = (MdSMgrRelation) reln;
|
||||||
|
/* mark it not open */
|
||||||
|
for (int forknum = 0; forknum <= MAX_FORKNUM; forknum++)
|
||||||
|
- reln->md_num_open_segs[forknum] = 0;
|
||||||
|
+ mdreln->md_num_open_segs[forknum] = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
@@ -693,7 +741,8 @@ mdopen(SMgrRelation reln)
|
||||||
|
void
|
||||||
|
mdclose(SMgrRelation reln, ForkNumber forknum)
|
||||||
|
{
|
||||||
|
- int nopensegs = reln->md_num_open_segs[forknum];
|
||||||
|
+ MdSMgrRelation mdreln = (MdSMgrRelation) reln;
|
||||||
|
+ int nopensegs = mdreln->md_num_open_segs[forknum];
|
||||||
|
|
||||||
|
/* No work if already closed */
|
||||||
|
if (nopensegs == 0)
|
||||||
|
@@ -702,10 +751,10 @@ mdclose(SMgrRelation reln, ForkNumber forknum)
|
||||||
|
/* close segments starting from the end */
|
||||||
|
while (nopensegs > 0)
|
||||||
|
{
|
||||||
|
- MdfdVec *v = &reln->md_seg_fds[forknum][nopensegs - 1];
|
||||||
|
+ MdfdVec *v = &mdreln->md_seg_fds[forknum][nopensegs - 1];
|
||||||
|
|
||||||
|
FileClose(v->mdfd_vfd);
|
||||||
|
- _fdvec_resize(reln, forknum, nopensegs - 1);
|
||||||
|
+ _fdvec_resize(mdreln, forknum, nopensegs - 1);
|
||||||
|
nopensegs--;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@@ -718,6 +767,7 @@ mdprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
|
||||||
|
int nblocks)
|
||||||
|
{
|
||||||
|
#ifdef USE_PREFETCH
|
||||||
|
+ MdSMgrRelation mdreln = (MdSMgrRelation) reln;
|
||||||
|
|
||||||
|
Assert((io_direct_flags & IO_DIRECT_DATA) == 0);
|
||||||
|
|
||||||
|
@@ -730,7 +780,7 @@ mdprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
|
||||||
|
MdfdVec *v;
|
||||||
|
int nblocks_this_segment;
|
||||||
|
|
||||||
|
- v = _mdfd_getseg(reln, forknum, blocknum, false,
|
||||||
|
+ v = _mdfd_getseg(mdreln, forknum, blocknum, false,
|
||||||
|
InRecovery ? EXTENSION_RETURN_NULL : EXTENSION_FAIL);
|
||||||
|
if (v == NULL)
|
||||||
|
return false;
|
||||||
|
@@ -813,6 +863,8 @@ void
|
||||||
|
mdreadv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
|
||||||
|
void **buffers, BlockNumber nblocks)
|
||||||
|
{
|
||||||
|
+ MdSMgrRelation mdreln = (MdSMgrRelation) reln;
|
||||||
|
+
|
||||||
|
while (nblocks > 0)
|
||||||
|
{
|
||||||
|
struct iovec iov[PG_IOV_MAX];
|
||||||
|
@@ -824,7 +876,7 @@ mdreadv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
|
||||||
|
size_t transferred_this_segment;
|
||||||
|
size_t size_this_segment;
|
||||||
|
|
||||||
|
- v = _mdfd_getseg(reln, forknum, blocknum, false,
|
||||||
|
+ v = _mdfd_getseg(mdreln, forknum, blocknum, false,
|
||||||
|
EXTENSION_FAIL | EXTENSION_CREATE_RECOVERY);
|
||||||
|
|
||||||
|
seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
|
||||||
|
@@ -931,6 +983,8 @@ void
|
||||||
|
mdwritev(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
|
||||||
|
const void **buffers, BlockNumber nblocks, bool skipFsync)
|
||||||
|
{
|
||||||
|
+ MdSMgrRelation mdreln = (MdSMgrRelation) reln;
|
||||||
|
+
|
||||||
|
/* This assert is too expensive to have on normally ... */
|
||||||
|
#ifdef CHECK_WRITE_VS_EXTEND
|
||||||
|
Assert(blocknum < mdnblocks(reln, forknum));
|
||||||
|
@@ -947,7 +1001,7 @@ mdwritev(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
|
||||||
|
size_t transferred_this_segment;
|
||||||
|
size_t size_this_segment;
|
||||||
|
|
||||||
|
- v = _mdfd_getseg(reln, forknum, blocknum, skipFsync,
|
||||||
|
+ v = _mdfd_getseg(mdreln, forknum, blocknum, skipFsync,
|
||||||
|
EXTENSION_FAIL | EXTENSION_CREATE_RECOVERY);
|
||||||
|
|
||||||
|
seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
|
||||||
|
@@ -1014,7 +1068,7 @@ mdwritev(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!skipFsync && !SmgrIsTemp(reln))
|
||||||
|
- register_dirty_segment(reln, forknum, v);
|
||||||
|
+ register_dirty_segment(mdreln, forknum, v);
|
||||||
|
|
||||||
|
nblocks -= nblocks_this_segment;
|
||||||
|
buffers += nblocks_this_segment;
|
||||||
|
@@ -1033,6 +1087,7 @@ void
|
||||||
|
mdwriteback(SMgrRelation reln, ForkNumber forknum,
|
||||||
|
BlockNumber blocknum, BlockNumber nblocks)
|
||||||
|
{
|
||||||
|
+ MdSMgrRelation mdreln = (MdSMgrRelation) reln;
|
||||||
|
Assert((io_direct_flags & IO_DIRECT_DATA) == 0);
|
||||||
|
|
||||||
|
/*
|
||||||
|
@@ -1047,7 +1102,7 @@ mdwriteback(SMgrRelation reln, ForkNumber forknum,
|
||||||
|
int segnum_start,
|
||||||
|
segnum_end;
|
||||||
|
|
||||||
|
- v = _mdfd_getseg(reln, forknum, blocknum, true /* not used */ ,
|
||||||
|
+ v = _mdfd_getseg(mdreln, forknum, blocknum, true /* not used */ ,
|
||||||
|
EXTENSION_DONT_OPEN);
|
||||||
|
|
||||||
|
/*
|
||||||
|
@@ -1094,11 +1149,12 @@ mdnblocks(SMgrRelation reln, ForkNumber forknum)
|
||||||
|
MdfdVec *v;
|
||||||
|
BlockNumber nblocks;
|
||||||
|
BlockNumber segno;
|
||||||
|
+ MdSMgrRelation mdreln = (MdSMgrRelation) reln;
|
||||||
|
|
||||||
|
- mdopenfork(reln, forknum, EXTENSION_FAIL);
|
||||||
|
+ mdopenfork(mdreln, forknum, EXTENSION_FAIL);
|
||||||
|
|
||||||
|
/* mdopen has opened the first segment */
|
||||||
|
- Assert(reln->md_num_open_segs[forknum] > 0);
|
||||||
|
+ Assert(mdreln->md_num_open_segs[forknum] > 0);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Start from the last open segments, to avoid redundant seeks. We have
|
||||||
|
@@ -1113,12 +1169,12 @@ mdnblocks(SMgrRelation reln, ForkNumber forknum)
|
||||||
|
* that's OK because the checkpointer never needs to compute relation
|
||||||
|
* size.)
|
||||||
|
*/
|
||||||
|
- segno = reln->md_num_open_segs[forknum] - 1;
|
||||||
|
- v = &reln->md_seg_fds[forknum][segno];
|
||||||
|
+ segno = mdreln->md_num_open_segs[forknum] - 1;
|
||||||
|
+ v = &mdreln->md_seg_fds[forknum][segno];
|
||||||
|
|
||||||
|
for (;;)
|
||||||
|
{
|
||||||
|
- nblocks = _mdnblocks(reln, forknum, v);
|
||||||
|
+ nblocks = _mdnblocks(mdreln, forknum, v);
|
||||||
|
if (nblocks > ((BlockNumber) RELSEG_SIZE))
|
||||||
|
elog(FATAL, "segment too big");
|
||||||
|
if (nblocks < ((BlockNumber) RELSEG_SIZE))
|
||||||
|
@@ -1136,7 +1192,7 @@ mdnblocks(SMgrRelation reln, ForkNumber forknum)
|
||||||
|
* undermines _mdfd_getseg's attempts to notice and report an error
|
||||||
|
* upon access to a missing segment.
|
||||||
|
*/
|
||||||
|
- v = _mdfd_openseg(reln, forknum, segno, 0);
|
||||||
|
+ v = _mdfd_openseg(mdreln, forknum, segno, 0);
|
||||||
|
if (v == NULL)
|
||||||
|
return segno * ((BlockNumber) RELSEG_SIZE);
|
||||||
|
}
|
||||||
|
@@ -1151,6 +1207,7 @@ mdtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks)
|
||||||
|
BlockNumber curnblk;
|
||||||
|
BlockNumber priorblocks;
|
||||||
|
int curopensegs;
|
||||||
|
+ MdSMgrRelation mdreln = (MdSMgrRelation) reln;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* NOTE: mdnblocks makes sure we have opened all active segments, so that
|
||||||
|
@@ -1174,14 +1231,14 @@ mdtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks)
|
||||||
|
* Truncate segments, starting at the last one. Starting at the end makes
|
||||||
|
* managing the memory for the fd array easier, should there be errors.
|
||||||
|
*/
|
||||||
|
- curopensegs = reln->md_num_open_segs[forknum];
|
||||||
|
+ curopensegs = mdreln->md_num_open_segs[forknum];
|
||||||
|
while (curopensegs > 0)
|
||||||
|
{
|
||||||
|
MdfdVec *v;
|
||||||
|
|
||||||
|
priorblocks = (curopensegs - 1) * RELSEG_SIZE;
|
||||||
|
|
||||||
|
- v = &reln->md_seg_fds[forknum][curopensegs - 1];
|
||||||
|
+ v = &mdreln->md_seg_fds[forknum][curopensegs - 1];
|
||||||
|
|
||||||
|
if (priorblocks > nblocks)
|
||||||
|
{
|
||||||
|
@@ -1196,13 +1253,13 @@ mdtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks)
|
||||||
|
FilePathName(v->mdfd_vfd))));
|
||||||
|
|
||||||
|
if (!SmgrIsTemp(reln))
|
||||||
|
- register_dirty_segment(reln, forknum, v);
|
||||||
|
+ register_dirty_segment(mdreln, forknum, v);
|
||||||
|
|
||||||
|
/* we never drop the 1st segment */
|
||||||
|
- Assert(v != &reln->md_seg_fds[forknum][0]);
|
||||||
|
+ Assert(v != &mdreln->md_seg_fds[forknum][0]);
|
||||||
|
|
||||||
|
FileClose(v->mdfd_vfd);
|
||||||
|
- _fdvec_resize(reln, forknum, curopensegs - 1);
|
||||||
|
+ _fdvec_resize(mdreln, forknum, curopensegs - 1);
|
||||||
|
}
|
||||||
|
else if (priorblocks + ((BlockNumber) RELSEG_SIZE) > nblocks)
|
||||||
|
{
|
||||||
|
@@ -1222,7 +1279,7 @@ mdtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks)
|
||||||
|
FilePathName(v->mdfd_vfd),
|
||||||
|
nblocks)));
|
||||||
|
if (!SmgrIsTemp(reln))
|
||||||
|
- register_dirty_segment(reln, forknum, v);
|
||||||
|
+ register_dirty_segment(mdreln, forknum, v);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
@@ -1252,6 +1309,7 @@ mdimmedsync(SMgrRelation reln, ForkNumber forknum)
|
||||||
|
{
|
||||||
|
int segno;
|
||||||
|
int min_inactive_seg;
|
||||||
|
+ MdSMgrRelation mdreln = (MdSMgrRelation) reln;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* NOTE: mdnblocks makes sure we have opened all active segments, so that
|
||||||
|
@@ -1259,7 +1317,7 @@ mdimmedsync(SMgrRelation reln, ForkNumber forknum)
|
||||||
|
*/
|
||||||
|
mdnblocks(reln, forknum);
|
||||||
|
|
||||||
|
- min_inactive_seg = segno = reln->md_num_open_segs[forknum];
|
||||||
|
+ min_inactive_seg = segno = mdreln->md_num_open_segs[forknum];
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Temporarily open inactive segments, then close them after sync. There
|
||||||
|
@@ -1267,12 +1325,12 @@ mdimmedsync(SMgrRelation reln, ForkNumber forknum)
|
||||||
|
* is harmless. We don't bother to clean them up and take a risk of
|
||||||
|
* further trouble. The next mdclose() will soon close them.
|
||||||
|
*/
|
||||||
|
- while (_mdfd_openseg(reln, forknum, segno, 0) != NULL)
|
||||||
|
+ while (_mdfd_openseg(mdreln, forknum, segno, 0) != NULL)
|
||||||
|
segno++;
|
||||||
|
|
||||||
|
while (segno > 0)
|
||||||
|
{
|
||||||
|
- MdfdVec *v = &reln->md_seg_fds[forknum][segno - 1];
|
||||||
|
+ MdfdVec *v = &mdreln->md_seg_fds[forknum][segno - 1];
|
||||||
|
|
||||||
|
/*
|
||||||
|
* fsyncs done through mdimmedsync() should be tracked in a separate
|
||||||
|
@@ -1293,7 +1351,7 @@ mdimmedsync(SMgrRelation reln, ForkNumber forknum)
|
||||||
|
if (segno > min_inactive_seg)
|
||||||
|
{
|
||||||
|
FileClose(v->mdfd_vfd);
|
||||||
|
- _fdvec_resize(reln, forknum, segno - 1);
|
||||||
|
+ _fdvec_resize(mdreln, forknum, segno - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
segno--;
|
||||||
|
@@ -1310,14 +1368,14 @@ mdimmedsync(SMgrRelation reln, ForkNumber forknum)
|
||||||
|
* enough to be a performance problem).
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
-register_dirty_segment(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
|
||||||
|
+register_dirty_segment(MdSMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
|
||||||
|
{
|
||||||
|
FileTag tag;
|
||||||
|
|
||||||
|
- INIT_MD_FILETAG(tag, reln->smgr_rlocator.locator, forknum, seg->mdfd_segno);
|
||||||
|
+ INIT_MD_FILETAG(tag, reln->reln.smgr_rlocator.locator, forknum, seg->mdfd_segno);
|
||||||
|
|
||||||
|
/* Temp relations should never be fsync'd */
|
||||||
|
- Assert(!SmgrIsTemp(reln));
|
||||||
|
+ Assert(!SmgrIsTemp(&reln->reln));
|
||||||
|
|
||||||
|
if (!RegisterSyncRequest(&tag, SYNC_REQUEST, false /* retryOnError */ ))
|
||||||
|
{
|
||||||
|
@@ -1435,7 +1493,7 @@ DropRelationFiles(RelFileLocator *delrels, int ndelrels, bool isRedo)
|
||||||
|
* _fdvec_resize() -- Resize the fork's open segments array
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
-_fdvec_resize(SMgrRelation reln,
|
||||||
|
+_fdvec_resize(MdSMgrRelation reln,
|
||||||
|
ForkNumber forknum,
|
||||||
|
int nseg)
|
||||||
|
{
|
||||||
|
@@ -1473,12 +1531,12 @@ _fdvec_resize(SMgrRelation reln,
|
||||||
|
* returned string is palloc'd.
|
||||||
|
*/
|
||||||
|
static char *
|
||||||
|
-_mdfd_segpath(SMgrRelation reln, ForkNumber forknum, BlockNumber segno)
|
||||||
|
+_mdfd_segpath(MdSMgrRelation reln, ForkNumber forknum, BlockNumber segno)
|
||||||
|
{
|
||||||
|
char *path,
|
||||||
|
*fullpath;
|
||||||
|
|
||||||
|
- path = relpath(reln->smgr_rlocator, forknum);
|
||||||
|
+ path = relpath(reln->reln.smgr_rlocator, forknum);
|
||||||
|
|
||||||
|
if (segno > 0)
|
||||||
|
{
|
||||||
|
@@ -1496,7 +1554,7 @@ _mdfd_segpath(SMgrRelation reln, ForkNumber forknum, BlockNumber segno)
|
||||||
|
* and make a MdfdVec object for it. Returns NULL on failure.
|
||||||
|
*/
|
||||||
|
static MdfdVec *
|
||||||
|
-_mdfd_openseg(SMgrRelation reln, ForkNumber forknum, BlockNumber segno,
|
||||||
|
+_mdfd_openseg(MdSMgrRelation reln, ForkNumber forknum, BlockNumber segno,
|
||||||
|
int oflags)
|
||||||
|
{
|
||||||
|
MdfdVec *v;
|
||||||
|
@@ -1541,7 +1599,7 @@ _mdfd_openseg(SMgrRelation reln, ForkNumber forknum, BlockNumber segno,
|
||||||
|
* EXTENSION_CREATE case.
|
||||||
|
*/
|
||||||
|
static MdfdVec *
|
||||||
|
-_mdfd_getseg(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,
|
||||||
|
+_mdfd_getseg(MdSMgrRelation reln, ForkNumber forknum, BlockNumber blkno,
|
||||||
|
bool skipFsync, int behavior)
|
||||||
|
{
|
||||||
|
MdfdVec *v;
|
||||||
|
@@ -1615,7 +1673,7 @@ _mdfd_getseg(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,
|
||||||
|
char *zerobuf = palloc_aligned(BLCKSZ, PG_IO_ALIGN_SIZE,
|
||||||
|
MCXT_ALLOC_ZERO);
|
||||||
|
|
||||||
|
- mdextend(reln, forknum,
|
||||||
|
+ mdextend((SMgrRelation) reln, forknum,
|
||||||
|
nextsegno * ((BlockNumber) RELSEG_SIZE) - 1,
|
||||||
|
zerobuf, skipFsync);
|
||||||
|
pfree(zerobuf);
|
||||||
|
@@ -1672,7 +1730,7 @@ _mdfd_getseg(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,
|
||||||
|
* Get number of blocks present in a single disk file
|
||||||
|
*/
|
||||||
|
static BlockNumber
|
||||||
|
-_mdnblocks(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
|
||||||
|
+_mdnblocks(MdSMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
|
||||||
|
{
|
||||||
|
off_t len;
|
||||||
|
|
||||||
|
@@ -1695,7 +1753,7 @@ _mdnblocks(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
|
||||||
|
int
|
||||||
|
mdsyncfiletag(const FileTag *ftag, char *path)
|
||||||
|
{
|
||||||
|
- SMgrRelation reln = smgropen(ftag->rlocator, InvalidBackendId);
|
||||||
|
+ MdSMgrRelation reln = (MdSMgrRelation) smgropen(ftag->rlocator, InvalidBackendId);
|
||||||
|
File file;
|
||||||
|
instr_time io_start;
|
||||||
|
bool need_to_close;
|
||||||
|
diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c
|
||||||
|
index 563a0be5c7..b586e6e25a 100644
|
||||||
|
--- a/src/backend/storage/smgr/smgr.c
|
||||||
|
+++ b/src/backend/storage/smgr/smgr.c
|
||||||
|
@@ -19,80 +19,23 @@
|
||||||
|
|
||||||
|
#include "access/xlogutils.h"
|
||||||
|
#include "lib/ilist.h"
|
||||||
|
+#include "miscadmin.h"
|
||||||
|
#include "storage/bufmgr.h"
|
||||||
|
#include "storage/fd.h"
|
||||||
|
#include "storage/ipc.h"
|
||||||
|
#include "storage/md.h"
|
||||||
|
#include "storage/smgr.h"
|
||||||
|
+#include "port/atomics.h"
|
||||||
|
#include "utils/hsearch.h"
|
||||||
|
#include "utils/inval.h"
|
||||||
|
+#include "utils/memutils.h"
|
||||||
|
|
||||||
|
|
||||||
|
-/*
|
||||||
|
- * This struct of function pointers defines the API between smgr.c and
|
||||||
|
- * any individual storage manager module. Note that smgr subfunctions are
|
||||||
|
- * generally expected to report problems via elog(ERROR). An exception is
|
||||||
|
- * that smgr_unlink should use elog(WARNING), rather than erroring out,
|
||||||
|
- * because we normally unlink relations during post-commit/abort cleanup,
|
||||||
|
- * and so it's too late to raise an error. Also, various conditions that
|
||||||
|
- * would normally be errors should be allowed during bootstrap and/or WAL
|
||||||
|
- * recovery --- see comments in md.c for details.
|
||||||
|
- */
|
||||||
|
-typedef struct f_smgr
|
||||||
|
-{
|
||||||
|
- void (*smgr_init) (void); /* may be NULL */
|
||||||
|
- void (*smgr_shutdown) (void); /* may be NULL */
|
||||||
|
- void (*smgr_open) (SMgrRelation reln);
|
||||||
|
- void (*smgr_close) (SMgrRelation reln, ForkNumber forknum);
|
||||||
|
- void (*smgr_create) (SMgrRelation reln, ForkNumber forknum,
|
||||||
|
- bool isRedo);
|
||||||
|
- bool (*smgr_exists) (SMgrRelation reln, ForkNumber forknum);
|
||||||
|
- void (*smgr_unlink) (RelFileLocatorBackend rlocator, ForkNumber forknum,
|
||||||
|
- bool isRedo);
|
||||||
|
- void (*smgr_extend) (SMgrRelation reln, ForkNumber forknum,
|
||||||
|
- BlockNumber blocknum, const void *buffer, bool skipFsync);
|
||||||
|
- void (*smgr_zeroextend) (SMgrRelation reln, ForkNumber forknum,
|
||||||
|
- BlockNumber blocknum, int nblocks, bool skipFsync);
|
||||||
|
- bool (*smgr_prefetch) (SMgrRelation reln, ForkNumber forknum,
|
||||||
|
- BlockNumber blocknum, int nblocks);
|
||||||
|
- void (*smgr_readv) (SMgrRelation reln, ForkNumber forknum,
|
||||||
|
- BlockNumber blocknum,
|
||||||
|
- void **buffers, BlockNumber nblocks);
|
||||||
|
- void (*smgr_writev) (SMgrRelation reln, ForkNumber forknum,
|
||||||
|
- BlockNumber blocknum,
|
||||||
|
- const void **buffers, BlockNumber nblocks,
|
||||||
|
- bool skipFsync);
|
||||||
|
- void (*smgr_writeback) (SMgrRelation reln, ForkNumber forknum,
|
||||||
|
- BlockNumber blocknum, BlockNumber nblocks);
|
||||||
|
- BlockNumber (*smgr_nblocks) (SMgrRelation reln, ForkNumber forknum);
|
||||||
|
- void (*smgr_truncate) (SMgrRelation reln, ForkNumber forknum,
|
||||||
|
- BlockNumber nblocks);
|
||||||
|
- void (*smgr_immedsync) (SMgrRelation reln, ForkNumber forknum);
|
||||||
|
-} f_smgr;
|
||||||
|
-
|
||||||
|
-static const f_smgr smgrsw[] = {
|
||||||
|
- /* magnetic disk */
|
||||||
|
- {
|
||||||
|
- .smgr_init = mdinit,
|
||||||
|
- .smgr_shutdown = NULL,
|
||||||
|
- .smgr_open = mdopen,
|
||||||
|
- .smgr_close = mdclose,
|
||||||
|
- .smgr_create = mdcreate,
|
||||||
|
- .smgr_exists = mdexists,
|
||||||
|
- .smgr_unlink = mdunlink,
|
||||||
|
- .smgr_extend = mdextend,
|
||||||
|
- .smgr_zeroextend = mdzeroextend,
|
||||||
|
- .smgr_prefetch = mdprefetch,
|
||||||
|
- .smgr_readv = mdreadv,
|
||||||
|
- .smgr_writev = mdwritev,
|
||||||
|
- .smgr_writeback = mdwriteback,
|
||||||
|
- .smgr_nblocks = mdnblocks,
|
||||||
|
- .smgr_truncate = mdtruncate,
|
||||||
|
- .smgr_immedsync = mdimmedsync,
|
||||||
|
- }
|
||||||
|
-};
|
||||||
|
+static f_smgr *smgrsw;
|
||||||
|
|
||||||
|
-static const int NSmgr = lengthof(smgrsw);
|
||||||
|
+static int NSmgr = 0;
|
||||||
|
+
|
||||||
|
+static Size LargestSMgrRelationSize = 0;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Each backend has a hashtable that stores all extant SMgrRelation objects.
|
||||||
|
@@ -105,6 +48,57 @@ static dlist_head unowned_relns;
|
||||||
|
/* local function prototypes */
|
||||||
|
static void smgrshutdown(int code, Datum arg);
|
||||||
|
|
||||||
|
+SMgrId
|
||||||
|
+smgr_register(const f_smgr *smgr, Size smgrrelation_size)
|
||||||
|
+{
|
||||||
|
+ SMgrId my_id;
|
||||||
|
+ MemoryContext old;
|
||||||
|
+
|
||||||
|
+ if (process_shared_preload_libraries_done)
|
||||||
|
+ elog(FATAL, "SMgrs must be registered in the shared_preload_libraries phase");
|
||||||
|
+ if (NSmgr == MaxSMgrId)
|
||||||
|
+ elog(FATAL, "Too many smgrs registered");
|
||||||
|
+ if (smgr->name == NULL || *smgr->name == 0)
|
||||||
|
+ elog(FATAL, "smgr registered with invalid name");
|
||||||
|
+
|
||||||
|
+ Assert(smgr->smgr_open != NULL);
|
||||||
|
+ Assert(smgr->smgr_close != NULL);
|
||||||
|
+ Assert(smgr->smgr_create != NULL);
|
||||||
|
+ Assert(smgr->smgr_exists != NULL);
|
||||||
|
+ Assert(smgr->smgr_unlink != NULL);
|
||||||
|
+ Assert(smgr->smgr_extend != NULL);
|
||||||
|
+ Assert(smgr->smgr_zeroextend != NULL);
|
||||||
|
+ Assert(smgr->smgr_prefetch != NULL);
|
||||||
|
+ Assert(smgr->smgr_readv != NULL);
|
||||||
|
+ Assert(smgr->smgr_writev != NULL);
|
||||||
|
+ Assert(smgr->smgr_writeback != NULL);
|
||||||
|
+ Assert(smgr->smgr_nblocks != NULL);
|
||||||
|
+ Assert(smgr->smgr_truncate != NULL);
|
||||||
|
+ Assert(smgr->smgr_immedsync != NULL);
|
||||||
|
+ old = MemoryContextSwitchTo(TopMemoryContext);
|
||||||
|
+
|
||||||
|
+ my_id = NSmgr++;
|
||||||
|
+ if (my_id == 0)
|
||||||
|
+ smgrsw = palloc(sizeof(f_smgr));
|
||||||
|
+ else
|
||||||
|
+ smgrsw = repalloc(smgrsw, sizeof(f_smgr) * NSmgr);
|
||||||
|
+
|
||||||
|
+ MemoryContextSwitchTo(old);
|
||||||
|
+
|
||||||
|
+ pg_compiler_barrier();
|
||||||
|
+
|
||||||
|
+ if (!smgrsw)
|
||||||
|
+ {
|
||||||
|
+ NSmgr--;
|
||||||
|
+ elog(FATAL, "Failed to extend smgr array");
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ memcpy(&smgrsw[my_id], smgr, sizeof(f_smgr));
|
||||||
|
+
|
||||||
|
+ LargestSMgrRelationSize = Max(LargestSMgrRelationSize, smgrrelation_size);
|
||||||
|
+
|
||||||
|
+ return my_id;
|
||||||
|
+}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* smgrinit(), smgrshutdown() -- Initialize or shut down storage
|
||||||
|
@@ -162,9 +156,11 @@ smgropen(RelFileLocator rlocator, BackendId backend)
|
||||||
|
{
|
||||||
|
/* First time through: initialize the hash table */
|
||||||
|
HASHCTL ctl;
|
||||||
|
+ LargestSMgrRelationSize = MAXALIGN(LargestSMgrRelationSize);
|
||||||
|
+ Assert(NSmgr > 0);
|
||||||
|
|
||||||
|
ctl.keysize = sizeof(RelFileLocatorBackend);
|
||||||
|
- ctl.entrysize = sizeof(SMgrRelationData);
|
||||||
|
+ ctl.entrysize = LargestSMgrRelationSize;
|
||||||
|
SMgrRelationHash = hash_create("smgr relation table", 400,
|
||||||
|
&ctl, HASH_ELEM | HASH_BLOBS);
|
||||||
|
dlist_init(&unowned_relns);
|
||||||
|
@@ -185,7 +181,8 @@ smgropen(RelFileLocator rlocator, BackendId backend)
|
||||||
|
reln->smgr_targblock = InvalidBlockNumber;
|
||||||
|
for (int i = 0; i <= MAX_FORKNUM; ++i)
|
||||||
|
reln->smgr_cached_nblocks[i] = InvalidBlockNumber;
|
||||||
|
- reln->smgr_which = 0; /* we only have md.c at present */
|
||||||
|
+
|
||||||
|
+ reln->smgr_which = MdSMgrId; /* we only have md.c at present */
|
||||||
|
|
||||||
|
/* implementation-specific initialization */
|
||||||
|
smgrsw[reln->smgr_which].smgr_open(reln);
|
||||||
|
diff --git a/src/backend/utils/init/miscinit.c b/src/backend/utils/init/miscinit.c
|
||||||
|
index 23f77a59e5..4ec7619302 100644
|
||||||
|
--- a/src/backend/utils/init/miscinit.c
|
||||||
|
+++ b/src/backend/utils/init/miscinit.c
|
||||||
|
@@ -42,6 +42,7 @@
|
||||||
|
#include "postmaster/postmaster.h"
|
||||||
|
#include "storage/fd.h"
|
||||||
|
#include "storage/ipc.h"
|
||||||
|
+#include "storage/md.h"
|
||||||
|
#include "storage/latch.h"
|
||||||
|
#include "storage/pg_shmem.h"
|
||||||
|
#include "storage/pmsignal.h"
|
||||||
|
@@ -198,6 +199,9 @@ InitStandaloneProcess(const char *argv0)
|
||||||
|
InitProcessLocalLatch();
|
||||||
|
InitializeLatchWaitSet();
|
||||||
|
|
||||||
|
+ /* Initialize smgrs */
|
||||||
|
+ register_builtin_dynamic_managers();
|
||||||
|
+
|
||||||
|
/*
|
||||||
|
* For consistency with InitPostmasterChild, initialize signal mask here.
|
||||||
|
* But we don't unblock SIGQUIT or provide a default handler for it.
|
||||||
|
@@ -1860,6 +1864,15 @@ process_session_preload_libraries(void)
|
||||||
|
true);
|
||||||
|
}
|
||||||
|
|
||||||
|
+/*
|
||||||
|
+ * Register any internal managers.
|
||||||
|
+ */
|
||||||
|
+void
|
||||||
|
+register_builtin_dynamic_managers(void)
|
||||||
|
+{
|
||||||
|
+ mdsmgr_register();
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
/*
|
||||||
|
* process any shared memory requests from preloaded libraries
|
||||||
|
*/
|
||||||
|
diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h
|
||||||
|
index 0b01c1f093..d0d4ba38ef 100644
|
||||||
|
--- a/src/include/miscadmin.h
|
||||||
|
+++ b/src/include/miscadmin.h
|
||||||
|
@@ -493,6 +493,7 @@ extern void TouchSocketLockFiles(void);
|
||||||
|
extern void AddToDataDirLockFile(int target_line, const char *str);
|
||||||
|
extern bool RecheckDataDirLockFile(void);
|
||||||
|
extern void ValidatePgVersion(const char *path);
|
||||||
|
+extern void register_builtin_dynamic_managers(void);
|
||||||
|
extern void process_shared_preload_libraries(void);
|
||||||
|
extern void process_session_preload_libraries(void);
|
||||||
|
extern void process_shmem_requests(void);
|
||||||
|
diff --git a/src/include/storage/md.h b/src/include/storage/md.h
|
||||||
|
index 7c181e5a17..734bae07e1 100644
|
||||||
|
--- a/src/include/storage/md.h
|
||||||
|
+++ b/src/include/storage/md.h
|
||||||
|
@@ -19,6 +19,10 @@
|
||||||
|
#include "storage/smgr.h"
|
||||||
|
#include "storage/sync.h"
|
||||||
|
|
||||||
|
+/* registration function for md storage manager */
|
||||||
|
+extern void mdsmgr_register(void);
|
||||||
|
+extern SMgrId MdSMgrId;
|
||||||
|
+
|
||||||
|
/* md storage manager functionality */
|
||||||
|
extern void mdinit(void);
|
||||||
|
extern void mdopen(SMgrRelation reln);
|
||||||
|
diff --git a/src/include/storage/smgr.h b/src/include/storage/smgr.h
|
||||||
|
index 527cd2a056..95927b8bdd 100644
|
||||||
|
--- a/src/include/storage/smgr.h
|
||||||
|
+++ b/src/include/storage/smgr.h
|
||||||
|
@@ -18,6 +18,10 @@
|
||||||
|
#include "storage/block.h"
|
||||||
|
#include "storage/relfilelocator.h"
|
||||||
|
|
||||||
|
+typedef uint8 SMgrId;
|
||||||
|
+
|
||||||
|
+#define MaxSMgrId UINT8_MAX
|
||||||
|
+
|
||||||
|
/*
|
||||||
|
* smgr.c maintains a table of SMgrRelation objects, which are essentially
|
||||||
|
* cached file handles. An SMgrRelation is created (if not already present)
|
||||||
|
@@ -59,14 +63,8 @@ typedef struct SMgrRelationData
|
||||||
|
* Fields below here are intended to be private to smgr.c and its
|
||||||
|
* submodules. Do not touch them from elsewhere.
|
||||||
|
*/
|
||||||
|
- int smgr_which; /* storage manager selector */
|
||||||
|
-
|
||||||
|
- /*
|
||||||
|
- * for md.c; per-fork arrays of the number of open segments
|
||||||
|
- * (md_num_open_segs) and the segments themselves (md_seg_fds).
|
||||||
|
- */
|
||||||
|
- int md_num_open_segs[MAX_FORKNUM + 1];
|
||||||
|
- struct _MdfdVec *md_seg_fds[MAX_FORKNUM + 1];
|
||||||
|
+ SMgrId smgr_which; /* storage manager selector */
|
||||||
|
+ int smgrrelation_size; /* size of this struct, incl. smgr-specific data */
|
||||||
|
|
||||||
|
/* if unowned, list link in list of all unowned SMgrRelations */
|
||||||
|
dlist_node node;
|
||||||
|
@@ -77,6 +75,51 @@ typedef SMgrRelationData *SMgrRelation;
|
||||||
|
#define SmgrIsTemp(smgr) \
|
||||||
|
RelFileLocatorBackendIsTemp((smgr)->smgr_rlocator)
|
||||||
|
|
||||||
|
+/*
|
||||||
|
+ * This struct of function pointers defines the API between smgr.c and
|
||||||
|
+ * any individual storage manager module. Note that smgr subfunctions are
|
||||||
|
+ * generally expected to report problems via elog(ERROR). An exception is
|
||||||
|
+ * that smgr_unlink should use elog(WARNING), rather than erroring out,
|
||||||
|
+ * because we normally unlink relations during post-commit/abort cleanup,
|
||||||
|
+ * and so it's too late to raise an error. Also, various conditions that
|
||||||
|
+ * would normally be errors should be allowed during bootstrap and/or WAL
|
||||||
|
+ * recovery --- see comments in md.c for details.
|
||||||
|
+ */
|
||||||
|
+typedef struct f_smgr
|
||||||
|
+{
|
||||||
|
+ const char *name;
|
||||||
|
+ void (*smgr_init) (void); /* may be NULL */
|
||||||
|
+ void (*smgr_shutdown) (void); /* may be NULL */
|
||||||
|
+ void (*smgr_open) (SMgrRelation reln);
|
||||||
|
+ void (*smgr_close) (SMgrRelation reln, ForkNumber forknum);
|
||||||
|
+ void (*smgr_create) (SMgrRelation reln, ForkNumber forknum,
|
||||||
|
+ bool isRedo);
|
||||||
|
+ bool (*smgr_exists) (SMgrRelation reln, ForkNumber forknum);
|
||||||
|
+ void (*smgr_unlink) (RelFileLocatorBackend rlocator, ForkNumber forknum,
|
||||||
|
+ bool isRedo);
|
||||||
|
+ void (*smgr_extend) (SMgrRelation reln, ForkNumber forknum,
|
||||||
|
+ BlockNumber blocknum, const void *buffer, bool skipFsync);
|
||||||
|
+ void (*smgr_zeroextend) (SMgrRelation reln, ForkNumber forknum,
|
||||||
|
+ BlockNumber blocknum, int nblocks, bool skipFsync);
|
||||||
|
+ bool (*smgr_prefetch) (SMgrRelation reln, ForkNumber forknum,
|
||||||
|
+ BlockNumber blocknum, int nblocks);
|
||||||
|
+ void (*smgr_readv) (SMgrRelation reln, ForkNumber forknum,
|
||||||
|
+ BlockNumber blocknum,
|
||||||
|
+ void **buffers, BlockNumber nblocks);
|
||||||
|
+ void (*smgr_writev) (SMgrRelation reln, ForkNumber forknum,
|
||||||
|
+ BlockNumber blocknum,
|
||||||
|
+ const void **buffers, BlockNumber nblocks,
|
||||||
|
+ bool skipFsync);
|
||||||
|
+ void (*smgr_writeback) (SMgrRelation reln, ForkNumber forknum,
|
||||||
|
+ BlockNumber blocknum, BlockNumber nblocks);
|
||||||
|
+ BlockNumber (*smgr_nblocks) (SMgrRelation reln, ForkNumber forknum);
|
||||||
|
+ void (*smgr_truncate) (SMgrRelation reln, ForkNumber forknum,
|
||||||
|
+ BlockNumber nblocks);
|
||||||
|
+ void (*smgr_immedsync) (SMgrRelation reln, ForkNumber forknum);
|
||||||
|
+} f_smgr;
|
||||||
|
+
|
||||||
|
+extern SMgrId smgr_register(const f_smgr *smgr, Size smgrrelation_size);
|
||||||
|
+
|
||||||
|
extern void smgrinit(void);
|
||||||
|
extern SMgrRelation smgropen(RelFileLocator rlocator, BackendId backend);
|
||||||
|
extern bool smgrexists(SMgrRelation reln, ForkNumber forknum);
|
||||||
|
--
|
||||||
|
Tristan Partin
|
||||||
|
Neon (https://neon.tech)
|
||||||
|
|
@ -0,0 +1,93 @@ |
|||||||
|
From 59a667f079c9b040c23921e4c43fae94b88776f2 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Tristan Partin <tristan@neon.tech>
|
||||||
|
Date: Fri, 13 Oct 2023 14:00:44 -0500
|
||||||
|
Subject: [PATCH v1 2/5] Allow extensions to override the global storage
|
||||||
|
manager
|
||||||
|
|
||||||
|
---
|
||||||
|
src/backend/storage/smgr/md.c | 2 +-
|
||||||
|
src/backend/storage/smgr/smgr.c | 5 ++++-
|
||||||
|
src/backend/utils/init/miscinit.c | 2 ++
|
||||||
|
src/include/storage/md.h | 2 ++
|
||||||
|
src/include/storage/smgr.h | 2 ++
|
||||||
|
5 files changed, 11 insertions(+), 2 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c
|
||||||
|
index 66a93101ab..13ec9da236 100644
|
||||||
|
--- a/src/backend/storage/smgr/md.c
|
||||||
|
+++ b/src/backend/storage/smgr/md.c
|
||||||
|
@@ -140,7 +140,7 @@ void mdsmgr_register(void)
|
||||||
|
{
|
||||||
|
/* magnetic disk */
|
||||||
|
f_smgr md_smgr = (f_smgr) {
|
||||||
|
- .name = "md",
|
||||||
|
+ .name = MdSMgrName,
|
||||||
|
.smgr_init = mdinit,
|
||||||
|
.smgr_shutdown = NULL,
|
||||||
|
.smgr_open = mdopen,
|
||||||
|
diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c
|
||||||
|
index b586e6e25a..0814330b8a 100644
|
||||||
|
--- a/src/backend/storage/smgr/smgr.c
|
||||||
|
+++ b/src/backend/storage/smgr/smgr.c
|
||||||
|
@@ -37,6 +37,9 @@ static int NSmgr = 0;
|
||||||
|
|
||||||
|
static Size LargestSMgrRelationSize = 0;
|
||||||
|
|
||||||
|
+char *storage_manager_string;
|
||||||
|
+SMgrId storage_manager_id;
|
||||||
|
+
|
||||||
|
/*
|
||||||
|
* Each backend has a hashtable that stores all extant SMgrRelation objects.
|
||||||
|
* In addition, "unowned" SMgrRelation objects are chained together in a list.
|
||||||
|
@@ -182,7 +185,7 @@ smgropen(RelFileLocator rlocator, BackendId backend)
|
||||||
|
for (int i = 0; i <= MAX_FORKNUM; ++i)
|
||||||
|
reln->smgr_cached_nblocks[i] = InvalidBlockNumber;
|
||||||
|
|
||||||
|
- reln->smgr_which = MdSMgrId; /* we only have md.c at present */
|
||||||
|
+ reln->smgr_which = storage_manager_id;
|
||||||
|
|
||||||
|
/* implementation-specific initialization */
|
||||||
|
smgrsw[reln->smgr_which].smgr_open(reln);
|
||||||
|
diff --git a/src/backend/utils/init/miscinit.c b/src/backend/utils/init/miscinit.c
|
||||||
|
index 4ec7619302..f44f511f69 100644
|
||||||
|
--- a/src/backend/utils/init/miscinit.c
|
||||||
|
+++ b/src/backend/utils/init/miscinit.c
|
||||||
|
@@ -1871,6 +1871,8 @@ void
|
||||||
|
register_builtin_dynamic_managers(void)
|
||||||
|
{
|
||||||
|
mdsmgr_register();
|
||||||
|
+
|
||||||
|
+ storage_manager_id = MdSMgrId;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
diff --git a/src/include/storage/md.h b/src/include/storage/md.h
|
||||||
|
index 734bae07e1..fdafb2c8e3 100644
|
||||||
|
--- a/src/include/storage/md.h
|
||||||
|
+++ b/src/include/storage/md.h
|
||||||
|
@@ -19,6 +19,8 @@
|
||||||
|
#include "storage/smgr.h"
|
||||||
|
#include "storage/sync.h"
|
||||||
|
|
||||||
|
+#define MdSMgrName "md"
|
||||||
|
+
|
||||||
|
/* registration function for md storage manager */
|
||||||
|
extern void mdsmgr_register(void);
|
||||||
|
extern SMgrId MdSMgrId;
|
||||||
|
diff --git a/src/include/storage/smgr.h b/src/include/storage/smgr.h
|
||||||
|
index 95927b8bdd..ee4fc27265 100644
|
||||||
|
--- a/src/include/storage/smgr.h
|
||||||
|
+++ b/src/include/storage/smgr.h
|
||||||
|
@@ -22,6 +22,8 @@ typedef uint8 SMgrId;
|
||||||
|
|
||||||
|
#define MaxSMgrId UINT8_MAX
|
||||||
|
|
||||||
|
+extern PGDLLIMPORT SMgrId storage_manager_id;
|
||||||
|
+
|
||||||
|
/*
|
||||||
|
* smgr.c maintains a table of SMgrRelation objects, which are essentially
|
||||||
|
* cached file handles. An SMgrRelation is created (if not already present)
|
||||||
|
--
|
||||||
|
Tristan Partin
|
||||||
|
Neon (https://neon.tech)
|
||||||
|
|
@ -0,0 +1,60 @@ |
|||||||
|
From 9ed9b8ca36cdb75b44deccdfea619c7494fcc6ef Mon Sep 17 00:00:00 2001
|
||||||
|
From: Tristan Partin <tristan@neon.tech>
|
||||||
|
Date: Fri, 13 Oct 2023 13:57:18 -0500
|
||||||
|
Subject: [PATCH v1 3/5] Add checkpoint_create_hook
|
||||||
|
|
||||||
|
Allows an extension to hook into CheckPointCreate().
|
||||||
|
---
|
||||||
|
src/backend/access/transam/xlog.c | 5 +++++
|
||||||
|
src/include/access/xlog.h | 4 ++++
|
||||||
|
2 files changed, 9 insertions(+)
|
||||||
|
|
||||||
|
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
|
||||||
|
index 478377c4a2..61ae5b63b8 100644
|
||||||
|
--- a/src/backend/access/transam/xlog.c
|
||||||
|
+++ b/src/backend/access/transam/xlog.c
|
||||||
|
@@ -212,6 +212,8 @@ const struct config_enum_entry archive_mode_options[] = {
|
||||||
|
*/
|
||||||
|
CheckpointStatsData CheckpointStats;
|
||||||
|
|
||||||
|
+checkpoint_create_hook_type checkpoint_create_hook = NULL;
|
||||||
|
+
|
||||||
|
/*
|
||||||
|
* During recovery, lastFullPageWrites keeps track of full_page_writes that
|
||||||
|
* the replayed WAL records indicate. It's initialized with full_page_writes
|
||||||
|
@@ -6875,6 +6877,9 @@ CreateCheckPoint(int flags)
|
||||||
|
*/
|
||||||
|
END_CRIT_SECTION();
|
||||||
|
|
||||||
|
+ if (checkpoint_create_hook != NULL)
|
||||||
|
+ checkpoint_create_hook(&checkPoint);
|
||||||
|
+
|
||||||
|
/*
|
||||||
|
* In some cases there are groups of actions that must all occur on one
|
||||||
|
* side or the other of a checkpoint record. Before flushing the
|
||||||
|
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
|
||||||
|
index 301c5fa11f..437f2a994b 100644
|
||||||
|
--- a/src/include/access/xlog.h
|
||||||
|
+++ b/src/include/access/xlog.h
|
||||||
|
@@ -13,6 +13,7 @@
|
||||||
|
|
||||||
|
#include "access/xlogbackup.h"
|
||||||
|
#include "access/xlogdefs.h"
|
||||||
|
+#include "catalog/pg_control.h"
|
||||||
|
#include "datatype/timestamp.h"
|
||||||
|
#include "lib/stringinfo.h"
|
||||||
|
#include "nodes/pg_list.h"
|
||||||
|
@@ -57,6 +58,9 @@ extern PGDLLIMPORT int wal_decode_buffer_size;
|
||||||
|
|
||||||
|
extern PGDLLIMPORT int CheckPointSegments;
|
||||||
|
|
||||||
|
+typedef void (*checkpoint_create_hook_type)(const CheckPoint *);
|
||||||
|
+extern PGDLLIMPORT checkpoint_create_hook_type checkpoint_create_hook;
|
||||||
|
+
|
||||||
|
/* Archive modes */
|
||||||
|
typedef enum ArchiveMode
|
||||||
|
{
|
||||||
|
--
|
||||||
|
Tristan Partin
|
||||||
|
Neon (https://neon.tech)
|
||||||
|
|
@ -0,0 +1,341 @@ |
|||||||
|
From d46b41d7c89deb23a6a1afec9d7fe3544b9a3327 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Tristan Partin <tristan@neon.tech>
|
||||||
|
Date: Wed, 20 Sep 2023 14:23:38 -0500
|
||||||
|
Subject: [PATCH v1 4/5] Add contrib/fsync_checker
|
||||||
|
|
||||||
|
fsync_checker is an extension which overrides the global storage manager
|
||||||
|
to check for volatile relations, those which have been written but not
|
||||||
|
synced to disk.
|
||||||
|
---
|
||||||
|
contrib/Makefile | 1 +
|
||||||
|
contrib/fsync_checker/fsync_checker.control | 5 +
|
||||||
|
contrib/fsync_checker/fsync_checker_smgr.c | 249 ++++++++++++++++++++
|
||||||
|
contrib/fsync_checker/meson.build | 22 ++
|
||||||
|
contrib/meson.build | 1 +
|
||||||
|
5 files changed, 278 insertions(+)
|
||||||
|
create mode 100644 contrib/fsync_checker/fsync_checker.control
|
||||||
|
create mode 100644 contrib/fsync_checker/fsync_checker_smgr.c
|
||||||
|
create mode 100644 contrib/fsync_checker/meson.build
|
||||||
|
|
||||||
|
diff --git a/contrib/Makefile b/contrib/Makefile
|
||||||
|
index da4e2316a3..c55ced6ec0 100644
|
||||||
|
--- a/contrib/Makefile
|
||||||
|
+++ b/contrib/Makefile
|
||||||
|
@@ -20,6 +20,7 @@ SUBDIRS = \
|
||||||
|
dict_int \
|
||||||
|
dict_xsyn \
|
||||||
|
earthdistance \
|
||||||
|
+ fsync_checker \
|
||||||
|
file_fdw \
|
||||||
|
fuzzystrmatch \
|
||||||
|
hstore \
|
||||||
|
diff --git a/contrib/fsync_checker/fsync_checker.control b/contrib/fsync_checker/fsync_checker.control
|
||||||
|
new file mode 100644
|
||||||
|
index 0000000000..7d0e36434b
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/contrib/fsync_checker/fsync_checker.control
|
||||||
|
@@ -0,0 +1,5 @@
|
||||||
|
+# fsync_checker extension
|
||||||
|
+comment = 'SMGR extension for checking volatile writes'
|
||||||
|
+default_version = '1.0'
|
||||||
|
+module_pathname = '$libdir/fsync_checker'
|
||||||
|
+relocatable = true
|
||||||
|
diff --git a/contrib/fsync_checker/fsync_checker_smgr.c b/contrib/fsync_checker/fsync_checker_smgr.c
|
||||||
|
new file mode 100644
|
||||||
|
index 0000000000..feef2f7d3e
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/contrib/fsync_checker/fsync_checker_smgr.c
|
||||||
|
@@ -0,0 +1,249 @@
|
||||||
|
+#include "postgres.h"
|
||||||
|
+
|
||||||
|
+#include "access/xlog.h"
|
||||||
|
+#include "fmgr.h"
|
||||||
|
+#include "miscadmin.h"
|
||||||
|
+#include "storage/ipc.h"
|
||||||
|
+#include "storage/lwlock.h"
|
||||||
|
+#include "storage/shmem.h"
|
||||||
|
+#include "storage/smgr.h"
|
||||||
|
+#include "storage/md.h"
|
||||||
|
+#include "utils/hsearch.h"
|
||||||
|
+
|
||||||
|
+PG_MODULE_MAGIC;
|
||||||
|
+
|
||||||
|
+typedef struct volatileRelnKey
|
||||||
|
+{
|
||||||
|
+ RelFileLocator locator;
|
||||||
|
+ ForkNumber forknum;
|
||||||
|
+} volatileRelnKey;
|
||||||
|
+
|
||||||
|
+typedef struct volatileRelnEntry
|
||||||
|
+{
|
||||||
|
+ volatileRelnKey key;
|
||||||
|
+ XLogRecPtr lsn;
|
||||||
|
+} volatileRelnEntry;
|
||||||
|
+
|
||||||
|
+void _PG_init(void);
|
||||||
|
+
|
||||||
|
+static void fsync_checker_extend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
|
||||||
|
+ const void *buffer, bool skipFsync);
|
||||||
|
+static void fsync_checker_immedsync(SMgrRelation reln, ForkNumber forknum);
|
||||||
|
+static void fsync_checker_writev(SMgrRelation reln, ForkNumber forknum,
|
||||||
|
+ BlockNumber blocknum, const void **buffers,
|
||||||
|
+ BlockNumber nblocks, bool skipFsync);
|
||||||
|
+static void fsync_checker_writeback(SMgrRelation reln, ForkNumber forknum,
|
||||||
|
+ BlockNumber blocknum, BlockNumber nblocks);
|
||||||
|
+static void fsync_checker_zeroextend(SMgrRelation reln, ForkNumber forknum,
|
||||||
|
+ BlockNumber blocknum, int nblocks, bool skipFsync);
|
||||||
|
+
|
||||||
|
+static void fsync_checker_checkpoint_create(const CheckPoint *checkPoint);
|
||||||
|
+static void fsync_checker_shmem_request(void);
|
||||||
|
+static void fsync_checker_shmem_startup(void);
|
||||||
|
+
|
||||||
|
+static void add_reln(SMgrRelation reln, ForkNumber forknum);
|
||||||
|
+static void remove_reln(SMgrRelation reln, ForkNumber forknum);
|
||||||
|
+
|
||||||
|
+static SMgrId fsync_checker_smgr_id;
|
||||||
|
+static const struct f_smgr fsync_checker_smgr = {
|
||||||
|
+ .name = "fsync_checker",
|
||||||
|
+ .smgr_init = mdinit,
|
||||||
|
+ .smgr_shutdown = NULL,
|
||||||
|
+ .smgr_open = mdopen,
|
||||||
|
+ .smgr_close = mdclose,
|
||||||
|
+ .smgr_create = mdcreate,
|
||||||
|
+ .smgr_exists = mdexists,
|
||||||
|
+ .smgr_unlink = mdunlink,
|
||||||
|
+ .smgr_extend = fsync_checker_extend,
|
||||||
|
+ .smgr_zeroextend = fsync_checker_zeroextend,
|
||||||
|
+ .smgr_prefetch = mdprefetch,
|
||||||
|
+ .smgr_readv = mdreadv,
|
||||||
|
+ .smgr_writev = fsync_checker_writev,
|
||||||
|
+ .smgr_writeback = fsync_checker_writeback,
|
||||||
|
+ .smgr_nblocks = mdnblocks,
|
||||||
|
+ .smgr_truncate = mdtruncate,
|
||||||
|
+ .smgr_immedsync = fsync_checker_immedsync,
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+static HTAB *volatile_relns;
|
||||||
|
+static LWLock *volatile_relns_lock;
|
||||||
|
+static shmem_request_hook_type prev_shmem_request_hook;
|
||||||
|
+static shmem_startup_hook_type prev_shmem_startup_hook;
|
||||||
|
+static checkpoint_create_hook_type prev_checkpoint_create_hook;
|
||||||
|
+
|
||||||
|
+void
|
||||||
|
+_PG_init(void)
|
||||||
|
+{
|
||||||
|
+ prev_checkpoint_create_hook = checkpoint_create_hook;
|
||||||
|
+ checkpoint_create_hook = fsync_checker_checkpoint_create;
|
||||||
|
+
|
||||||
|
+ prev_shmem_request_hook = shmem_request_hook;
|
||||||
|
+ shmem_request_hook = fsync_checker_shmem_request;
|
||||||
|
+
|
||||||
|
+ prev_shmem_startup_hook = shmem_startup_hook;
|
||||||
|
+ shmem_startup_hook = fsync_checker_shmem_startup;
|
||||||
|
+
|
||||||
|
+ /*
|
||||||
|
+ * Relation size of 0 means we can just defer to md, but it would be nice
|
||||||
|
+ * to just expose this functionality, so if I needed my own relation, I
|
||||||
|
+ * could use MdSmgrRelation as the parent.
|
||||||
|
+ */
|
||||||
|
+ fsync_checker_smgr_id = smgr_register(&fsync_checker_smgr, 0);
|
||||||
|
+
|
||||||
|
+ storage_manager_id = fsync_checker_smgr_id;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static void
|
||||||
|
+fsync_checker_checkpoint_create(const CheckPoint *checkPoint)
|
||||||
|
+{
|
||||||
|
+ long num_entries;
|
||||||
|
+ HASH_SEQ_STATUS status;
|
||||||
|
+ volatileRelnEntry *entry;
|
||||||
|
+
|
||||||
|
+ if (prev_checkpoint_create_hook)
|
||||||
|
+ prev_checkpoint_create_hook(checkPoint);
|
||||||
|
+
|
||||||
|
+ LWLockAcquire(volatile_relns_lock, LW_EXCLUSIVE);
|
||||||
|
+
|
||||||
|
+ hash_seq_init(&status, volatile_relns);
|
||||||
|
+
|
||||||
|
+ num_entries = hash_get_num_entries(volatile_relns);
|
||||||
|
+ elog(INFO, "Analyzing %ld volatile relations", num_entries);
|
||||||
|
+ while ((entry = hash_seq_search(&status)))
|
||||||
|
+ {
|
||||||
|
+ if (entry->lsn < checkPoint->redo)
|
||||||
|
+ {
|
||||||
|
+ char *path;
|
||||||
|
+
|
||||||
|
+ path = relpathperm(entry->key.locator, entry->key.forknum);
|
||||||
|
+
|
||||||
|
+ elog(WARNING, "Relation not previously synced: %s", path);
|
||||||
|
+
|
||||||
|
+ pfree(path);
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ LWLockRelease(volatile_relns_lock);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static void
|
||||||
|
+fsync_checker_shmem_request(void)
|
||||||
|
+{
|
||||||
|
+ if (prev_shmem_request_hook)
|
||||||
|
+ prev_shmem_request_hook();
|
||||||
|
+
|
||||||
|
+ RequestAddinShmemSpace(hash_estimate_size(1024, sizeof(volatileRelnEntry)));
|
||||||
|
+ RequestNamedLWLockTranche("fsync_checker volatile relns lock", 1);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static void
|
||||||
|
+fsync_checker_shmem_startup(void)
|
||||||
|
+{
|
||||||
|
+ HASHCTL ctl;
|
||||||
|
+
|
||||||
|
+ if (prev_shmem_startup_hook)
|
||||||
|
+ prev_shmem_startup_hook();
|
||||||
|
+
|
||||||
|
+ ctl.keysize = sizeof(volatileRelnKey);
|
||||||
|
+ ctl.entrysize = sizeof(volatileRelnEntry);
|
||||||
|
+ volatile_relns = NULL;
|
||||||
|
+ volatile_relns_lock = NULL;
|
||||||
|
+
|
||||||
|
+ /*
|
||||||
|
+ * Create or attach to the shared memory state, including hash table
|
||||||
|
+ */
|
||||||
|
+ LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE);
|
||||||
|
+
|
||||||
|
+ volatile_relns = ShmemInitHash("fsync_checker volatile relns",
|
||||||
|
+ 1024, 1024, &ctl, HASH_BLOBS | HASH_ELEM);
|
||||||
|
+ volatile_relns_lock = &GetNamedLWLockTranche("fsync_checker volatile relns lock")->lock;
|
||||||
|
+
|
||||||
|
+ LWLockRelease(AddinShmemInitLock);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static void
|
||||||
|
+add_reln(SMgrRelation reln, ForkNumber forknum)
|
||||||
|
+{
|
||||||
|
+ bool found;
|
||||||
|
+ XLogRecPtr lsn;
|
||||||
|
+ volatileRelnKey key;
|
||||||
|
+ volatileRelnEntry *entry;
|
||||||
|
+
|
||||||
|
+ key.locator = reln->smgr_rlocator.locator;
|
||||||
|
+ key.forknum = forknum;
|
||||||
|
+
|
||||||
|
+ lsn = GetXLogWriteRecPtr();
|
||||||
|
+
|
||||||
|
+ LWLockAcquire(volatile_relns_lock, LW_EXCLUSIVE);
|
||||||
|
+
|
||||||
|
+ entry = hash_search(volatile_relns, &key, HASH_ENTER, &found);
|
||||||
|
+ if (!found)
|
||||||
|
+ entry->lsn = lsn;
|
||||||
|
+
|
||||||
|
+ LWLockRelease(volatile_relns_lock);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static void
|
||||||
|
+remove_reln(SMgrRelation reln, ForkNumber forknum)
|
||||||
|
+{
|
||||||
|
+ volatileRelnKey key;
|
||||||
|
+
|
||||||
|
+ key.locator = reln->smgr_rlocator.locator;
|
||||||
|
+ key.forknum = forknum;
|
||||||
|
+
|
||||||
|
+ LWLockAcquire(volatile_relns_lock, LW_EXCLUSIVE);
|
||||||
|
+
|
||||||
|
+ hash_search(volatile_relns, &key, HASH_REMOVE, NULL);
|
||||||
|
+
|
||||||
|
+ LWLockRelease(volatile_relns_lock);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static void
|
||||||
|
+fsync_checker_extend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
|
||||||
|
+ const void *buffer, bool skipFsync)
|
||||||
|
+{
|
||||||
|
+ if (!SmgrIsTemp(reln) && !skipFsync)
|
||||||
|
+ add_reln(reln, forknum);
|
||||||
|
+
|
||||||
|
+ mdextend(reln, forknum, blocknum, buffer, skipFsync);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static void
|
||||||
|
+fsync_checker_immedsync(SMgrRelation reln, ForkNumber forknum)
|
||||||
|
+{
|
||||||
|
+ if (!SmgrIsTemp(reln))
|
||||||
|
+ remove_reln(reln, forknum);
|
||||||
|
+
|
||||||
|
+ mdimmedsync(reln, forknum);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static void
|
||||||
|
+fsync_checker_writev(SMgrRelation reln, ForkNumber forknum,
|
||||||
|
+ BlockNumber blocknum, const void **buffers,
|
||||||
|
+ BlockNumber nblocks, bool skipFsync)
|
||||||
|
+{
|
||||||
|
+ if (!SmgrIsTemp(reln) && !skipFsync)
|
||||||
|
+ add_reln(reln, forknum);
|
||||||
|
+
|
||||||
|
+ mdwritev(reln, forknum, blocknum, buffers, nblocks, skipFsync);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static void
|
||||||
|
+fsync_checker_writeback(SMgrRelation reln, ForkNumber forknum,
|
||||||
|
+ BlockNumber blocknum, BlockNumber nblocks)
|
||||||
|
+{
|
||||||
|
+ if (!SmgrIsTemp(reln))
|
||||||
|
+ remove_reln(reln, forknum);
|
||||||
|
+
|
||||||
|
+ mdwriteback(reln, forknum, blocknum, nblocks);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static void
|
||||||
|
+fsync_checker_zeroextend(SMgrRelation reln, ForkNumber forknum,
|
||||||
|
+ BlockNumber blocknum, int nblocks, bool skipFsync)
|
||||||
|
+{
|
||||||
|
+ if (!SmgrIsTemp(reln) && !skipFsync)
|
||||||
|
+ add_reln(reln, forknum);
|
||||||
|
+
|
||||||
|
+ mdzeroextend(reln, forknum, blocknum, nblocks, skipFsync);
|
||||||
|
+}
|
||||||
|
diff --git a/contrib/fsync_checker/meson.build b/contrib/fsync_checker/meson.build
|
||||||
|
new file mode 100644
|
||||||
|
index 0000000000..ce6ed7fe90
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/contrib/fsync_checker/meson.build
|
||||||
|
@@ -0,0 +1,22 @@
|
||||||
|
+# Copyright (c) 2023, PostgreSQL Global Development Group
|
||||||
|
+
|
||||||
|
+fsync_checker_sources = files(
|
||||||
|
+ 'fsync_checker_smgr.c',
|
||||||
|
+)
|
||||||
|
+
|
||||||
|
+if host_system == 'windows'
|
||||||
|
+ fsync_checker_sources += rc_lib_gen.process(win32ver_rc, extra_args: [
|
||||||
|
+ '--NAME', 'fsync_checker',
|
||||||
|
+ '--FILEDESC', 'fsync_checker - SMGR extension for checking volatile relations',])
|
||||||
|
+endif
|
||||||
|
+
|
||||||
|
+fsync_checker = shared_module('fsync_checker',
|
||||||
|
+ fsync_checker_sources,
|
||||||
|
+ kwargs: contrib_mod_args,
|
||||||
|
+)
|
||||||
|
+contrib_targets += fsync_checker
|
||||||
|
+
|
||||||
|
+install_data(
|
||||||
|
+ 'fsync_checker.control',
|
||||||
|
+ kwargs: contrib_data_args,
|
||||||
|
+)
|
||||||
|
diff --git a/contrib/meson.build b/contrib/meson.build
|
||||||
|
index c12dc906ca..e5d872494a 100644
|
||||||
|
--- a/contrib/meson.build
|
||||||
|
+++ b/contrib/meson.build
|
||||||
|
@@ -29,6 +29,7 @@ subdir('dict_int')
|
||||||
|
subdir('dict_xsyn')
|
||||||
|
subdir('earthdistance')
|
||||||
|
subdir('file_fdw')
|
||||||
|
+subdir('fsync_checker')
|
||||||
|
subdir('fuzzystrmatch')
|
||||||
|
subdir('hstore')
|
||||||
|
subdir('hstore_plperl')
|
||||||
|
--
|
||||||
|
Tristan Partin
|
||||||
|
Neon (https://neon.tech)
|
||||||
|
|
Loading…
Reference in new issue