mirror of https://github.com/postgres/postgres
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
408 lines
14 KiB
408 lines
14 KiB
/*-------------------------------------------------------------------------
|
|
*
|
|
* xlogreader.h
|
|
* Definitions for the generic XLog reading facility
|
|
*
|
|
* Portions Copyright (c) 2013-2021, PostgreSQL Global Development Group
|
|
*
|
|
* IDENTIFICATION
|
|
* src/include/access/xlogreader.h
|
|
*
|
|
* NOTES
|
|
* See the definition of the XLogReaderState struct for instructions on
|
|
* how to use the XLogReader infrastructure.
|
|
*
|
|
* The basic idea is to allocate an XLogReaderState via
|
|
* XLogReaderAllocate(), position the reader to the first record with
|
|
* XLogBeginRead() or XLogFindNextRecord(), and call XLogReadRecord()
|
|
* until it returns NULL.
|
|
*
|
|
* Callers supply a page_read callback if they want to call
|
|
* XLogReadRecord or XLogFindNextRecord; it can be passed in as NULL
|
|
* otherwise. The WALRead function can be used as a helper to write
|
|
* page_read callbacks, but it is not mandatory; callers that use it,
|
|
* must supply segment_open callbacks. The segment_close callback
|
|
* must always be supplied.
|
|
*
|
|
* After reading a record with XLogReadRecord(), it's decomposed into
|
|
* the per-block and main data parts, and the parts can be accessed
|
|
* with the XLogRec* macros and functions. You can also decode a
|
|
* record that's already constructed in memory, without reading from
|
|
* disk, by calling the DecodeXLogRecord() function.
|
|
*-------------------------------------------------------------------------
|
|
*/
|
|
#ifndef XLOGREADER_H
|
|
#define XLOGREADER_H
|
|
|
|
#ifndef FRONTEND
|
|
#include "access/transam.h"
|
|
#endif
|
|
|
|
#include "access/xlogrecord.h"
|
|
#include "storage/buf.h"
|
|
|
|
/* WALOpenSegment represents a WAL segment being read. */
|
|
typedef struct WALOpenSegment
|
|
{
|
|
int ws_file; /* segment file descriptor */
|
|
XLogSegNo ws_segno; /* segment number */
|
|
TimeLineID ws_tli; /* timeline ID of the currently open file */
|
|
} WALOpenSegment;
|
|
|
|
/* WALSegmentContext carries context information about WAL segments to read */
|
|
typedef struct WALSegmentContext
|
|
{
|
|
char ws_dir[MAXPGPATH];
|
|
int ws_segsize;
|
|
} WALSegmentContext;
|
|
|
|
typedef struct XLogReaderState XLogReaderState;
|
|
typedef struct XLogFindNextRecordState XLogFindNextRecordState;
|
|
|
|
/* Function type definition for the segment cleanup callback */
|
|
typedef void (*WALSegmentCleanupCB) (XLogReaderState *xlogreader);
|
|
|
|
/* Function type definition for the open/close callbacks for WALRead() */
|
|
typedef void (*WALSegmentOpenCB) (XLogReaderState *xlogreader,
|
|
XLogSegNo nextSegNo,
|
|
TimeLineID *tli_p);
|
|
typedef void (*WALSegmentCloseCB) (XLogReaderState *xlogreader);
|
|
|
|
typedef struct
|
|
{
|
|
/* Is this block ref in use? */
|
|
bool in_use;
|
|
|
|
/* Identify the block this refers to */
|
|
RelFileNode rnode;
|
|
ForkNumber forknum;
|
|
BlockNumber blkno;
|
|
|
|
/* Workspace for remembering last known buffer holding this block. */
|
|
Buffer recent_buffer;
|
|
|
|
/* copy of the fork_flags field from the XLogRecordBlockHeader */
|
|
uint8 flags;
|
|
|
|
/* Information on full-page image, if any */
|
|
bool has_image; /* has image, even for consistency checking */
|
|
bool apply_image; /* has image that should be restored */
|
|
char *bkp_image;
|
|
uint16 hole_offset;
|
|
uint16 hole_length;
|
|
uint16 bimg_len;
|
|
uint8 bimg_info;
|
|
|
|
/* Buffer holding the rmgr-specific data associated with this block */
|
|
bool has_data;
|
|
char *data;
|
|
uint16 data_len;
|
|
uint16 data_bufsz;
|
|
} DecodedBkpBlock;
|
|
|
|
/* Return code from XLogReadRecord */
|
|
typedef enum XLogReadRecordResult
|
|
{
|
|
XLREAD_SUCCESS, /* record is successfully read */
|
|
XLREAD_NEED_DATA, /* need more data. see XLogReadRecord. */
|
|
XLREAD_FULL, /* cannot hold more data while reading ahead */
|
|
XLREAD_FAIL /* failed during reading a record */
|
|
} XLogReadRecordResult;
|
|
|
|
/*
|
|
* internal state of XLogReadRecord
|
|
*
|
|
* XLogReadState runs a state machine while reading a record. Theses states
|
|
* are not seen outside the function. Each state may repeat several times
|
|
* exiting requesting caller for new data. See the comment of XLogReadRecrod
|
|
* for details.
|
|
*/
|
|
typedef enum XLogReadRecordState
|
|
{
|
|
XLREAD_NEXT_RECORD,
|
|
XLREAD_TOT_LEN,
|
|
XLREAD_FIRST_FRAGMENT,
|
|
XLREAD_CONTINUATION
|
|
} XLogReadRecordState;
|
|
|
|
/*
|
|
* The decoded contents of a record. This occupies a contiguous region of
|
|
* memory, with main_data and blocks[n].data pointing to memory after the
|
|
* members declared here.
|
|
*/
|
|
typedef struct DecodedXLogRecord
|
|
{
|
|
/* Private member used for resource management. */
|
|
size_t size; /* total size of decoded record */
|
|
bool oversized; /* outside the regular decode buffer? */
|
|
struct DecodedXLogRecord *next; /* decoded record queue link */
|
|
|
|
/* Public members. */
|
|
XLogRecPtr lsn; /* location */
|
|
XLogRecPtr next_lsn; /* location of next record */
|
|
XLogRecord header; /* header */
|
|
RepOriginId record_origin;
|
|
TransactionId toplevel_xid; /* XID of top-level transaction */
|
|
char *main_data; /* record's main data portion */
|
|
uint32 main_data_len; /* main data portion's length */
|
|
int max_block_id; /* highest block_id in use (-1 if none) */
|
|
DecodedBkpBlock blocks[FLEXIBLE_ARRAY_MEMBER];
|
|
} DecodedXLogRecord;
|
|
|
|
struct XLogReaderState
|
|
{
|
|
/*
|
|
* Operational callbacks
|
|
*/
|
|
WALSegmentCleanupCB cleanup_cb;
|
|
|
|
/* ----------------------------------------
|
|
* Public parameters
|
|
* ----------------------------------------
|
|
*/
|
|
|
|
/*
|
|
* System identifier of the xlog files we're about to read. Set to zero
|
|
* (the default value) if unknown or unimportant.
|
|
*/
|
|
uint64 system_identifier;
|
|
|
|
/*
|
|
* Start and end point of last record read. EndRecPtr is also used as the
|
|
* position to read next. Calling XLogBeginRead() sets EndRecPtr to the
|
|
* starting position and ReadRecPtr to invalid.
|
|
*
|
|
* Start and end point of last record returned by XLogReadRecord(). These
|
|
* are also available as record->lsn and record->next_lsn.
|
|
*/
|
|
XLogRecPtr ReadRecPtr; /* start of last record read or being read */
|
|
XLogRecPtr EndRecPtr; /* end+1 of last record read */
|
|
|
|
/* ----------------------------------------
|
|
* Communication with page reader
|
|
* readBuf is XLOG_BLCKSZ bytes, valid up to at least reqLen bytes.
|
|
* ----------------------------------------
|
|
*/
|
|
/* variables the clients of xlogreader can examine */
|
|
XLogRecPtr readPagePtr; /* page pointer to read */
|
|
int32 reqLen; /* bytes requested to the caller */
|
|
char *readBuf; /* buffer to store data */
|
|
bool page_verified; /* is the page header on the buffer verified? */
|
|
bool record_verified;/* is the current record header verified? */
|
|
|
|
/* variables set by the client of xlogreader */
|
|
int32 readLen; /* actual bytes copied into readBuf by client,
|
|
* which should be >= reqLen. Client should
|
|
* use XLogReaderSetInputData() to set. */
|
|
|
|
/* ----------------------------------------
|
|
* Decoded representation of current record
|
|
*
|
|
* Use XLogRecGet* functions to investigate the record; these fields
|
|
* should not be accessed directly.
|
|
* ----------------------------------------
|
|
* Start and end point of the last record read and decoded by
|
|
* XLogReadRecordInternal(). NextRecPtr is also used as the position to
|
|
* decode next. Calling XLogBeginRead() sets NextRecPtr and EndRecPtr to
|
|
* the requested starting position.
|
|
*/
|
|
XLogRecPtr DecodeRecPtr; /* start of last record decoded */
|
|
XLogRecPtr NextRecPtr; /* end+1 of last record decoded */
|
|
XLogRecPtr PrevRecPtr; /* start of previous record decoded */
|
|
|
|
/* Last record returned by XLogReadRecord(). */
|
|
DecodedXLogRecord *record;
|
|
|
|
/* ----------------------------------------
|
|
* private/internal state
|
|
* ----------------------------------------
|
|
*/
|
|
|
|
/*
|
|
* Buffer for decoded records. This is a circular buffer, though
|
|
* individual records can't be split in the middle, so some space is often
|
|
* wasted at the end. Oversized records that don't fit in this space are
|
|
* allocated separately.
|
|
*/
|
|
char *decode_buffer;
|
|
size_t decode_buffer_size;
|
|
bool free_decode_buffer; /* need to free? */
|
|
char *decode_buffer_head; /* write head */
|
|
char *decode_buffer_tail; /* read head */
|
|
|
|
/*
|
|
* Queue of records that have been decoded. This is a linked list that
|
|
* usually consists of consecutive records in decode_buffer, but may also
|
|
* contain oversized records allocated with palloc().
|
|
*/
|
|
DecodedXLogRecord *decode_queue_head; /* newest decoded record */
|
|
DecodedXLogRecord *decode_queue_tail; /* oldest decoded record */
|
|
|
|
/* last read XLOG position for data currently in readBuf */
|
|
WALSegmentContext segcxt;
|
|
WALOpenSegment seg;
|
|
uint32 segoff;
|
|
|
|
/*
|
|
* beginning of prior page read, and its TLI. Doesn't necessarily
|
|
* correspond to what's in readBuf; used for timeline sanity checks.
|
|
*/
|
|
XLogRecPtr latestPagePtr;
|
|
TimeLineID latestPageTLI;
|
|
|
|
/* timeline to read it from, 0 if a lookup is required */
|
|
TimeLineID currTLI;
|
|
|
|
/*
|
|
* Safe point to read to in currTLI if current TLI is historical
|
|
* (tliSwitchPoint) or InvalidXLogRecPtr if on current timeline.
|
|
*
|
|
* Actually set to the start of the segment containing the timeline switch
|
|
* that ends currTLI's validity, not the LSN of the switch its self, since
|
|
* we can't assume the old segment will be present.
|
|
*/
|
|
XLogRecPtr currTLIValidUntil;
|
|
|
|
/*
|
|
* If currTLI is not the most recent known timeline, the next timeline to
|
|
* read from when currTLIValidUntil is reached.
|
|
*/
|
|
TimeLineID nextTLI;
|
|
|
|
/*
|
|
* Buffer for current ReadRecord result (expandable), used when a record
|
|
* crosses a page boundary.
|
|
*/
|
|
char *readRecordBuf;
|
|
uint32 readRecordBufSize;
|
|
|
|
/*
|
|
* XLogReadRecordInternal() state
|
|
*/
|
|
XLogReadRecordState readRecordState; /* state machine state */
|
|
int recordGotLen; /* amount of current record that has already
|
|
* been read */
|
|
int recordRemainLen; /* length of current record that remains */
|
|
XLogRecPtr recordContRecPtr; /* where the current record continues */
|
|
|
|
DecodedXLogRecord *decoding; /* record currently being decoded */
|
|
|
|
/* Buffer to hold error message */
|
|
char *errormsg_buf;
|
|
bool errormsg_deferred;
|
|
};
|
|
|
|
struct XLogFindNextRecordState
|
|
{
|
|
XLogReaderState *reader_state;
|
|
XLogRecPtr targetRecPtr;
|
|
XLogRecPtr currRecPtr;
|
|
};
|
|
|
|
/* Report that data is available for decoding. */
|
|
static inline void
|
|
XLogReaderSetInputData(XLogReaderState *state, int32 len)
|
|
{
|
|
state->readLen = len;
|
|
}
|
|
|
|
/* Get a new XLogReader */
|
|
extern XLogReaderState *XLogReaderAllocate(int wal_segment_size,
|
|
const char *waldir,
|
|
WALSegmentCleanupCB cleanup_cb);
|
|
|
|
/* Free an XLogReader */
|
|
extern void XLogReaderFree(XLogReaderState *state);
|
|
|
|
/* Optionally provide a circular decoding buffer to allow readahead. */
|
|
extern void XLogReaderSetDecodeBuffer(XLogReaderState *state,
|
|
void *buffer,
|
|
size_t size);
|
|
|
|
/* Position the XLogReader to given record */
|
|
extern void XLogBeginRead(XLogReaderState *state, XLogRecPtr RecPtr);
|
|
#ifdef FRONTEND
|
|
extern XLogFindNextRecordState *InitXLogFindNextRecord(XLogReaderState *reader_state, XLogRecPtr start_ptr);
|
|
extern bool XLogFindNextRecord(XLogFindNextRecordState *state);
|
|
#endif /* FRONTEND */
|
|
|
|
/* Read the next record's header. Returns NULL on end-of-WAL or failure. */
|
|
extern XLogReadRecordResult XLogReadRecord(XLogReaderState *state,
|
|
XLogRecord **record,
|
|
char **errormsg);
|
|
|
|
/* Read the next decoded record. Returns NULL on end-of-WAL or failure. */
|
|
extern XLogReadRecordResult XLogNextRecord(XLogReaderState *state,
|
|
DecodedXLogRecord **record,
|
|
char **errormsg);
|
|
|
|
/* Try to read ahead, if there is space in the decoding buffer. */
|
|
extern XLogReadRecordResult XLogReadAhead(XLogReaderState *state,
|
|
DecodedXLogRecord **record,
|
|
char **errormsg);
|
|
|
|
/* Validate a page */
|
|
extern bool XLogReaderValidatePageHeader(XLogReaderState *state,
|
|
XLogRecPtr recptr, char *phdr);
|
|
|
|
/*
|
|
* Error information from WALRead that both backend and frontend caller can
|
|
* process. Currently only errors from pg_pread can be reported.
|
|
*/
|
|
typedef struct WALReadError
|
|
{
|
|
int wre_errno; /* errno set by the last pg_pread() */
|
|
int wre_off; /* Offset we tried to read from. */
|
|
int wre_req; /* Bytes requested to be read. */
|
|
int wre_read; /* Bytes read by the last read(). */
|
|
WALOpenSegment wre_seg; /* Segment we tried to read from. */
|
|
} WALReadError;
|
|
|
|
extern bool WALRead(XLogReaderState *state,
|
|
WALSegmentOpenCB segopenfn, WALSegmentCloseCB sgclosefn,
|
|
char *buf, XLogRecPtr startptr, Size count,
|
|
TimeLineID tli, WALReadError *errinfo);
|
|
|
|
/* Functions for decoding an XLogRecord */
|
|
|
|
extern size_t DecodeXLogRecordRequiredSpace(size_t xl_tot_len);
|
|
extern bool DecodeXLogRecord(XLogReaderState *state,
|
|
DecodedXLogRecord *decoded,
|
|
XLogRecord *record,
|
|
XLogRecPtr lsn,
|
|
char **errmsg);
|
|
|
|
#define XLogRecGetTotalLen(decoder) ((decoder)->record->header.xl_tot_len)
|
|
#define XLogRecGetPrev(decoder) ((decoder)->record->header.xl_prev)
|
|
#define XLogRecGetInfo(decoder) ((decoder)->record->header.xl_info)
|
|
#define XLogRecGetRmid(decoder) ((decoder)->record->header.xl_rmid)
|
|
#define XLogRecGetXid(decoder) ((decoder)->record->header.xl_xid)
|
|
#define XLogRecGetOrigin(decoder) ((decoder)->record->record_origin)
|
|
#define XLogRecGetTopXid(decoder) ((decoder)->record->toplevel_xid)
|
|
#define XLogRecGetData(decoder) ((decoder)->record->main_data)
|
|
#define XLogRecGetDataLen(decoder) ((decoder)->record->main_data_len)
|
|
#define XLogRecHasAnyBlockRefs(decoder) ((decoder)->record->max_block_id >= 0)
|
|
#define XLogRecMaxBlockId(decoder) ((decoder)->record->max_block_id)
|
|
#define XLogRecGetBlock(decoder, i) (&(decoder)->record->blocks[(i)])
|
|
#define XLogRecHasBlockRef(decoder, block_id) \
|
|
((decoder)->record->max_block_id >= (block_id)) && \
|
|
((decoder)->record->blocks[block_id].in_use)
|
|
#define XLogRecHasBlockImage(decoder, block_id) \
|
|
((decoder)->record->blocks[block_id].has_image)
|
|
#define XLogRecBlockImageApply(decoder, block_id) \
|
|
((decoder)->record->blocks[block_id].apply_image)
|
|
|
|
#ifndef FRONTEND
|
|
extern FullTransactionId XLogRecGetFullXid(XLogReaderState *record);
|
|
#endif
|
|
|
|
extern bool RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page);
|
|
extern char *XLogRecGetBlockData(XLogReaderState *record, uint8 block_id, Size *len);
|
|
extern bool XLogRecGetBlockTag(XLogReaderState *record, uint8 block_id,
|
|
RelFileNode *rnode, ForkNumber *forknum,
|
|
BlockNumber *blknum);
|
|
extern bool XLogRecGetRecentBuffer(XLogReaderState *record, uint8 block_id,
|
|
RelFileNode *rnode, ForkNumber *forknum,
|
|
BlockNumber *blknum, Buffer *recent_buffer);
|
|
|
|
#endif /* XLOGREADER_H */
|
|
|