You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 
postgres/src/include/access/xlogreader.h

408 lines
14 KiB

/*-------------------------------------------------------------------------
*
* xlogreader.h
* Definitions for the generic XLog reading facility
*
* Portions Copyright (c) 2013-2021, PostgreSQL Global Development Group
*
* IDENTIFICATION
* src/include/access/xlogreader.h
*
* NOTES
* See the definition of the XLogReaderState struct for instructions on
* how to use the XLogReader infrastructure.
*
* The basic idea is to allocate an XLogReaderState via
* XLogReaderAllocate(), position the reader to the first record with
* XLogBeginRead() or XLogFindNextRecord(), and call XLogReadRecord()
* until it returns NULL.
*
* Callers supply a page_read callback if they want to call
* XLogReadRecord or XLogFindNextRecord; it can be passed in as NULL
* otherwise. The WALRead function can be used as a helper to write
* page_read callbacks, but it is not mandatory; callers that use it,
* must supply segment_open callbacks. The segment_close callback
* must always be supplied.
*
* After reading a record with XLogReadRecord(), it's decomposed into
* the per-block and main data parts, and the parts can be accessed
* with the XLogRec* macros and functions. You can also decode a
* record that's already constructed in memory, without reading from
* disk, by calling the DecodeXLogRecord() function.
*-------------------------------------------------------------------------
*/
#ifndef XLOGREADER_H
#define XLOGREADER_H
#ifndef FRONTEND
#include "access/transam.h"
#endif
#include "access/xlogrecord.h"
#include "storage/buf.h"
/* WALOpenSegment represents a WAL segment being read. */
typedef struct WALOpenSegment
{
int ws_file; /* segment file descriptor */
XLogSegNo ws_segno; /* segment number */
TimeLineID ws_tli; /* timeline ID of the currently open file */
} WALOpenSegment;
/* WALSegmentContext carries context information about WAL segments to read */
typedef struct WALSegmentContext
{
char ws_dir[MAXPGPATH];
int ws_segsize;
} WALSegmentContext;
typedef struct XLogReaderState XLogReaderState;
typedef struct XLogFindNextRecordState XLogFindNextRecordState;
/* Function type definition for the segment cleanup callback */
typedef void (*WALSegmentCleanupCB) (XLogReaderState *xlogreader);
/* Function type definition for the open/close callbacks for WALRead() */
typedef void (*WALSegmentOpenCB) (XLogReaderState *xlogreader,
XLogSegNo nextSegNo,
TimeLineID *tli_p);
typedef void (*WALSegmentCloseCB) (XLogReaderState *xlogreader);
typedef struct
{
/* Is this block ref in use? */
bool in_use;
/* Identify the block this refers to */
RelFileNode rnode;
ForkNumber forknum;
BlockNumber blkno;
/* Workspace for remembering last known buffer holding this block. */
Buffer recent_buffer;
/* copy of the fork_flags field from the XLogRecordBlockHeader */
uint8 flags;
/* Information on full-page image, if any */
bool has_image; /* has image, even for consistency checking */
bool apply_image; /* has image that should be restored */
char *bkp_image;
uint16 hole_offset;
uint16 hole_length;
uint16 bimg_len;
uint8 bimg_info;
/* Buffer holding the rmgr-specific data associated with this block */
bool has_data;
char *data;
uint16 data_len;
uint16 data_bufsz;
} DecodedBkpBlock;
/* Return code from XLogReadRecord */
typedef enum XLogReadRecordResult
{
XLREAD_SUCCESS, /* record is successfully read */
XLREAD_NEED_DATA, /* need more data. see XLogReadRecord. */
XLREAD_FULL, /* cannot hold more data while reading ahead */
XLREAD_FAIL /* failed during reading a record */
} XLogReadRecordResult;
/*
* internal state of XLogReadRecord
*
* XLogReadState runs a state machine while reading a record. Theses states
* are not seen outside the function. Each state may repeat several times
* exiting requesting caller for new data. See the comment of XLogReadRecrod
* for details.
*/
typedef enum XLogReadRecordState
{
XLREAD_NEXT_RECORD,
XLREAD_TOT_LEN,
XLREAD_FIRST_FRAGMENT,
XLREAD_CONTINUATION
} XLogReadRecordState;
/*
* The decoded contents of a record. This occupies a contiguous region of
* memory, with main_data and blocks[n].data pointing to memory after the
* members declared here.
*/
typedef struct DecodedXLogRecord
{
/* Private member used for resource management. */
size_t size; /* total size of decoded record */
bool oversized; /* outside the regular decode buffer? */
struct DecodedXLogRecord *next; /* decoded record queue link */
/* Public members. */
XLogRecPtr lsn; /* location */
XLogRecPtr next_lsn; /* location of next record */
XLogRecord header; /* header */
RepOriginId record_origin;
TransactionId toplevel_xid; /* XID of top-level transaction */
char *main_data; /* record's main data portion */
uint32 main_data_len; /* main data portion's length */
int max_block_id; /* highest block_id in use (-1 if none) */
DecodedBkpBlock blocks[FLEXIBLE_ARRAY_MEMBER];
} DecodedXLogRecord;
struct XLogReaderState
{
/*
* Operational callbacks
*/
WALSegmentCleanupCB cleanup_cb;
/* ----------------------------------------
* Public parameters
* ----------------------------------------
*/
/*
* System identifier of the xlog files we're about to read. Set to zero
* (the default value) if unknown or unimportant.
*/
uint64 system_identifier;
/*
* Start and end point of last record read. EndRecPtr is also used as the
* position to read next. Calling XLogBeginRead() sets EndRecPtr to the
* starting position and ReadRecPtr to invalid.
*
* Start and end point of last record returned by XLogReadRecord(). These
* are also available as record->lsn and record->next_lsn.
*/
XLogRecPtr ReadRecPtr; /* start of last record read or being read */
XLogRecPtr EndRecPtr; /* end+1 of last record read */
/* ----------------------------------------
* Communication with page reader
* readBuf is XLOG_BLCKSZ bytes, valid up to at least reqLen bytes.
* ----------------------------------------
*/
/* variables the clients of xlogreader can examine */
XLogRecPtr readPagePtr; /* page pointer to read */
int32 reqLen; /* bytes requested to the caller */
char *readBuf; /* buffer to store data */
bool page_verified; /* is the page header on the buffer verified? */
bool record_verified;/* is the current record header verified? */
/* variables set by the client of xlogreader */
int32 readLen; /* actual bytes copied into readBuf by client,
* which should be >= reqLen. Client should
* use XLogReaderSetInputData() to set. */
/* ----------------------------------------
* Decoded representation of current record
*
* Use XLogRecGet* functions to investigate the record; these fields
* should not be accessed directly.
* ----------------------------------------
* Start and end point of the last record read and decoded by
* XLogReadRecordInternal(). NextRecPtr is also used as the position to
* decode next. Calling XLogBeginRead() sets NextRecPtr and EndRecPtr to
* the requested starting position.
*/
XLogRecPtr DecodeRecPtr; /* start of last record decoded */
XLogRecPtr NextRecPtr; /* end+1 of last record decoded */
XLogRecPtr PrevRecPtr; /* start of previous record decoded */
/* Last record returned by XLogReadRecord(). */
DecodedXLogRecord *record;
/* ----------------------------------------
* private/internal state
* ----------------------------------------
*/
/*
* Buffer for decoded records. This is a circular buffer, though
* individual records can't be split in the middle, so some space is often
* wasted at the end. Oversized records that don't fit in this space are
* allocated separately.
*/
char *decode_buffer;
size_t decode_buffer_size;
bool free_decode_buffer; /* need to free? */
char *decode_buffer_head; /* write head */
char *decode_buffer_tail; /* read head */
/*
* Queue of records that have been decoded. This is a linked list that
* usually consists of consecutive records in decode_buffer, but may also
* contain oversized records allocated with palloc().
*/
DecodedXLogRecord *decode_queue_head; /* newest decoded record */
DecodedXLogRecord *decode_queue_tail; /* oldest decoded record */
/* last read XLOG position for data currently in readBuf */
WALSegmentContext segcxt;
WALOpenSegment seg;
uint32 segoff;
/*
* beginning of prior page read, and its TLI. Doesn't necessarily
* correspond to what's in readBuf; used for timeline sanity checks.
*/
XLogRecPtr latestPagePtr;
TimeLineID latestPageTLI;
/* timeline to read it from, 0 if a lookup is required */
TimeLineID currTLI;
/*
* Safe point to read to in currTLI if current TLI is historical
* (tliSwitchPoint) or InvalidXLogRecPtr if on current timeline.
*
* Actually set to the start of the segment containing the timeline switch
* that ends currTLI's validity, not the LSN of the switch its self, since
* we can't assume the old segment will be present.
*/
XLogRecPtr currTLIValidUntil;
/*
* If currTLI is not the most recent known timeline, the next timeline to
* read from when currTLIValidUntil is reached.
*/
TimeLineID nextTLI;
/*
* Buffer for current ReadRecord result (expandable), used when a record
* crosses a page boundary.
*/
char *readRecordBuf;
uint32 readRecordBufSize;
/*
* XLogReadRecordInternal() state
*/
XLogReadRecordState readRecordState; /* state machine state */
int recordGotLen; /* amount of current record that has already
* been read */
int recordRemainLen; /* length of current record that remains */
XLogRecPtr recordContRecPtr; /* where the current record continues */
DecodedXLogRecord *decoding; /* record currently being decoded */
/* Buffer to hold error message */
char *errormsg_buf;
bool errormsg_deferred;
};
struct XLogFindNextRecordState
{
XLogReaderState *reader_state;
XLogRecPtr targetRecPtr;
XLogRecPtr currRecPtr;
};
/* Report that data is available for decoding. */
static inline void
XLogReaderSetInputData(XLogReaderState *state, int32 len)
{
state->readLen = len;
}
/* Get a new XLogReader */
extern XLogReaderState *XLogReaderAllocate(int wal_segment_size,
const char *waldir,
WALSegmentCleanupCB cleanup_cb);
/* Free an XLogReader */
extern void XLogReaderFree(XLogReaderState *state);
/* Optionally provide a circular decoding buffer to allow readahead. */
extern void XLogReaderSetDecodeBuffer(XLogReaderState *state,
void *buffer,
size_t size);
/* Position the XLogReader to given record */
extern void XLogBeginRead(XLogReaderState *state, XLogRecPtr RecPtr);
#ifdef FRONTEND
extern XLogFindNextRecordState *InitXLogFindNextRecord(XLogReaderState *reader_state, XLogRecPtr start_ptr);
extern bool XLogFindNextRecord(XLogFindNextRecordState *state);
#endif /* FRONTEND */
/* Read the next record's header. Returns NULL on end-of-WAL or failure. */
extern XLogReadRecordResult XLogReadRecord(XLogReaderState *state,
XLogRecord **record,
char **errormsg);
/* Read the next decoded record. Returns NULL on end-of-WAL or failure. */
extern XLogReadRecordResult XLogNextRecord(XLogReaderState *state,
DecodedXLogRecord **record,
char **errormsg);
/* Try to read ahead, if there is space in the decoding buffer. */
extern XLogReadRecordResult XLogReadAhead(XLogReaderState *state,
DecodedXLogRecord **record,
char **errormsg);
/* Validate a page */
extern bool XLogReaderValidatePageHeader(XLogReaderState *state,
XLogRecPtr recptr, char *phdr);
/*
* Error information from WALRead that both backend and frontend caller can
* process. Currently only errors from pg_pread can be reported.
*/
typedef struct WALReadError
{
int wre_errno; /* errno set by the last pg_pread() */
int wre_off; /* Offset we tried to read from. */
int wre_req; /* Bytes requested to be read. */
int wre_read; /* Bytes read by the last read(). */
WALOpenSegment wre_seg; /* Segment we tried to read from. */
} WALReadError;
extern bool WALRead(XLogReaderState *state,
WALSegmentOpenCB segopenfn, WALSegmentCloseCB sgclosefn,
char *buf, XLogRecPtr startptr, Size count,
TimeLineID tli, WALReadError *errinfo);
/* Functions for decoding an XLogRecord */
extern size_t DecodeXLogRecordRequiredSpace(size_t xl_tot_len);
extern bool DecodeXLogRecord(XLogReaderState *state,
DecodedXLogRecord *decoded,
XLogRecord *record,
XLogRecPtr lsn,
char **errmsg);
#define XLogRecGetTotalLen(decoder) ((decoder)->record->header.xl_tot_len)
#define XLogRecGetPrev(decoder) ((decoder)->record->header.xl_prev)
#define XLogRecGetInfo(decoder) ((decoder)->record->header.xl_info)
#define XLogRecGetRmid(decoder) ((decoder)->record->header.xl_rmid)
#define XLogRecGetXid(decoder) ((decoder)->record->header.xl_xid)
#define XLogRecGetOrigin(decoder) ((decoder)->record->record_origin)
#define XLogRecGetTopXid(decoder) ((decoder)->record->toplevel_xid)
#define XLogRecGetData(decoder) ((decoder)->record->main_data)
#define XLogRecGetDataLen(decoder) ((decoder)->record->main_data_len)
#define XLogRecHasAnyBlockRefs(decoder) ((decoder)->record->max_block_id >= 0)
#define XLogRecMaxBlockId(decoder) ((decoder)->record->max_block_id)
#define XLogRecGetBlock(decoder, i) (&(decoder)->record->blocks[(i)])
#define XLogRecHasBlockRef(decoder, block_id) \
((decoder)->record->max_block_id >= (block_id)) && \
((decoder)->record->blocks[block_id].in_use)
#define XLogRecHasBlockImage(decoder, block_id) \
((decoder)->record->blocks[block_id].has_image)
#define XLogRecBlockImageApply(decoder, block_id) \
((decoder)->record->blocks[block_id].apply_image)
#ifndef FRONTEND
extern FullTransactionId XLogRecGetFullXid(XLogReaderState *record);
#endif
extern bool RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page);
extern char *XLogRecGetBlockData(XLogReaderState *record, uint8 block_id, Size *len);
extern bool XLogRecGetBlockTag(XLogReaderState *record, uint8 block_id,
RelFileNode *rnode, ForkNumber *forknum,
BlockNumber *blknum);
extern bool XLogRecGetRecentBuffer(XLogReaderState *record, uint8 block_id,
RelFileNode *rnode, ForkNumber *forknum,
BlockNumber *blknum, Buffer *recent_buffer);
#endif /* XLOGREADER_H */