Avoid WAL-logging individual tuple insertions during CREATE TABLE AS

(a/k/a SELECT INTO).  Instead, flush and fsync the whole relation before
committing.  We do still need the WAL log when PITR is active, however.
Simon Riggs and Tom Lane.
REL8_1_STABLE
Tom Lane 21 years ago
parent 1bfdd1a893
commit b95ae32b41
  1. 36
      src/backend/access/heap/heapam.c
  2. 27
      src/backend/access/heap/hio.c
  3. 41
      src/backend/executor/execMain.c
  4. 4
      src/backend/executor/execUtils.c
  5. 5
      src/backend/storage/smgr/md.c
  6. 9
      src/backend/storage/smgr/smgr.c
  7. 5
      src/include/access/heapam.h
  8. 4
      src/include/access/hio.h
  9. 4
      src/include/nodes/execnodes.h

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.194 2005/06/08 15:50:21 tgl Exp $
* $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.195 2005/06/20 18:37:01 tgl Exp $
*
*
* INTERFACE ROUTINES
@ -1034,9 +1034,20 @@ heap_get_latest_tid(Relation relation,
*
* The new tuple is stamped with current transaction ID and the specified
* command ID.
*
* If use_wal is false, the new tuple is not logged in WAL, even for a
* non-temp relation. Safe usage of this behavior requires that we arrange
* that all new tuples go into new pages not containing any tuples from other
* transactions, that the relation gets fsync'd before commit, and that the
* transaction emits at least one WAL record to ensure RecordTransactionCommit
* will decide to WAL-log the commit.
*
* use_fsm is passed directly to RelationGetBufferForTuple, which see for
* more info.
*/
Oid
heap_insert(Relation relation, HeapTuple tup, CommandId cid)
heap_insert(Relation relation, HeapTuple tup, CommandId cid,
bool use_wal, bool use_fsm)
{
TransactionId xid = GetCurrentTransactionId();
Buffer buffer;
@ -1086,7 +1097,8 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid)
heap_tuple_toast_attrs(relation, tup, NULL);
/* Find buffer to insert this tuple into */
buffer = RelationGetBufferForTuple(relation, tup->t_len, InvalidBuffer);
buffer = RelationGetBufferForTuple(relation, tup->t_len,
InvalidBuffer, use_fsm);
/* NO EREPORT(ERROR) from here till changes are logged */
START_CRIT_SECTION();
@ -1096,7 +1108,12 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid)
pgstat_count_heap_insert(&relation->pgstat_info);
/* XLOG stuff */
if (!relation->rd_istemp)
if (relation->rd_istemp)
{
/* No XLOG record, but still need to flag that XID exists on disk */
MyXactMadeTempRelUpdate = true;
}
else if (use_wal)
{
xl_heap_insert xlrec;
xl_heap_header xlhdr;
@ -1151,11 +1168,6 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid)
PageSetLSN(page, recptr);
PageSetTLI(page, ThisTimeLineID);
}
else
{
/* No XLOG record, but still need to flag that XID exists on disk */
MyXactMadeTempRelUpdate = true;
}
END_CRIT_SECTION();
@ -1183,7 +1195,7 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid)
Oid
simple_heap_insert(Relation relation, HeapTuple tup)
{
return heap_insert(relation, tup, GetCurrentCommandId());
return heap_insert(relation, tup, GetCurrentCommandId(), true, true);
}
/*
@ -1743,7 +1755,7 @@ l2:
{
/* Assume there's no chance to put newtup on same page. */
newbuf = RelationGetBufferForTuple(relation, newtup->t_len,
buffer);
buffer, true);
}
else
{
@ -1760,7 +1772,7 @@ l2:
*/
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
newbuf = RelationGetBufferForTuple(relation, newtup->t_len,
buffer);
buffer, true);
}
else
{

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/heap/hio.c,v 1.56 2005/05/07 21:32:23 tgl Exp $
* $PostgreSQL: pgsql/src/backend/access/heap/hio.c,v 1.57 2005/06/20 18:37:01 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -79,12 +79,26 @@ RelationPutHeapTuple(Relation relation,
* happen if space is freed in that page after heap_update finds there's not
* enough there). In that case, the page will be pinned and locked only once.
*
* If use_fsm is true (the normal case), we use FSM to help us find free
* space. If use_fsm is false, we always append a new empty page to the
* end of the relation if the tuple won't fit on the current target page.
* This can save some cycles when we know the relation is new and doesn't
* contain useful amounts of free space.
*
* The use_fsm = false case is also useful for non-WAL-logged additions to a
* relation, if the caller holds exclusive lock and is careful to invalidate
* relation->rd_targblock before the first insertion --- that ensures that
* all insertions will occur into newly added pages and not be intermixed
* with tuples from other transactions. That way, a crash can't risk losing
* any committed data of other transactions. (See heap_insert's comments
* for additional constraints needed for safe usage of this behavior.)
*
* ereport(ERROR) is allowed here, so this routine *must* be called
* before any (unlogged) changes are made in buffer pool.
*/
Buffer
RelationGetBufferForTuple(Relation relation, Size len,
Buffer otherBuffer)
Buffer otherBuffer, bool use_fsm)
{
Buffer buffer = InvalidBuffer;
Page pageHeader;
@ -121,11 +135,14 @@ RelationGetBufferForTuple(Relation relation, Size len,
* on each page that proves not to be suitable.) If the FSM has no
* record of a page with enough free space, we give up and extend the
* relation.
*
* When use_fsm is false, we either put the tuple onto the existing
* target page or extend the relation.
*/
targetBlock = relation->rd_targblock;
if (targetBlock == InvalidBlockNumber)
if (targetBlock == InvalidBlockNumber && use_fsm)
{
/*
* We have no cached target page, so ask the FSM for an initial
@ -209,6 +226,10 @@ RelationGetBufferForTuple(Relation relation, Size len,
ReleaseBuffer(buffer);
}
/* Without FSM, always fall out of the loop and extend */
if (!use_fsm)
break;
/*
* Update FSM as to condition of this page, and ask for another
* page to try.

@ -26,13 +26,14 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/executor/execMain.c,v 1.249 2005/05/22 22:30:19 tgl Exp $
* $PostgreSQL: pgsql/src/backend/executor/execMain.c,v 1.250 2005/06/20 18:37:01 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "access/heapam.h"
#include "access/xlog.h"
#include "catalog/heap.h"
#include "catalog/namespace.h"
#include "commands/tablecmds.h"
@ -44,6 +45,7 @@
#include "optimizer/clauses.h"
#include "optimizer/var.h"
#include "parser/parsetree.h"
#include "storage/smgr.h"
#include "utils/acl.h"
#include "utils/guc.h"
#include "utils/lsyscache.h"
@ -784,6 +786,20 @@ InitPlan(QueryDesc *queryDesc, bool explainOnly)
* And open the constructed table for writing.
*/
intoRelationDesc = heap_open(intoRelationId, AccessExclusiveLock);
/* use_wal off requires rd_targblock be initially invalid */
Assert(intoRelationDesc->rd_targblock == InvalidBlockNumber);
/*
* We can skip WAL-logging the insertions, unless PITR is in use.
*
* Note that for a non-temp INTO table, this is safe only because
* we know that the catalog changes above will have been WAL-logged,
* and so RecordTransactionCommit will think it needs to WAL-log the
* eventual transaction commit. Else the commit might be lost, even
* though all the data is safely fsync'd ...
*/
estate->es_into_relation_use_wal = XLogArchivingActive();
}
estate->es_into_relation_descriptor = intoRelationDesc;
@ -979,7 +995,22 @@ ExecEndPlan(PlanState *planstate, EState *estate)
* close the "into" relation if necessary, again keeping lock
*/
if (estate->es_into_relation_descriptor != NULL)
{
/*
* If we skipped using WAL, and it's not a temp relation,
* we must force the relation down to disk before it's
* safe to commit the transaction. This requires forcing
* out any dirty buffers and then doing a forced fsync.
*/
if (!estate->es_into_relation_use_wal &&
!estate->es_into_relation_descriptor->rd_istemp)
{
FlushRelationBuffers(estate->es_into_relation_descriptor);
smgrimmedsync(estate->es_into_relation_descriptor->rd_smgr);
}
heap_close(estate->es_into_relation_descriptor, NoLock);
}
/*
* close any relations selected FOR UPDATE/FOR SHARE, again keeping locks
@ -1307,7 +1338,9 @@ ExecSelect(TupleTableSlot *slot,
tuple = ExecCopySlotTuple(slot);
heap_insert(estate->es_into_relation_descriptor, tuple,
estate->es_snapshot->curcid);
estate->es_snapshot->curcid,
estate->es_into_relation_use_wal,
false); /* never any point in using FSM */
/* we know there are no indexes to update */
heap_freetuple(tuple);
IncrAppended();
@ -1386,7 +1419,8 @@ ExecInsert(TupleTableSlot *slot,
* insert the tuple
*/
newId = heap_insert(resultRelationDesc, tuple,
estate->es_snapshot->curcid);
estate->es_snapshot->curcid,
true, true);
IncrAppended();
(estate->es_processed)++;
@ -2089,6 +2123,7 @@ EvalPlanQualStart(evalPlanQual *epq, EState *estate, evalPlanQual *priorepq)
epqstate->es_result_relation_info = estate->es_result_relation_info;
epqstate->es_junkFilter = estate->es_junkFilter;
epqstate->es_into_relation_descriptor = estate->es_into_relation_descriptor;
epqstate->es_into_relation_use_wal = estate->es_into_relation_use_wal;
epqstate->es_param_list_info = estate->es_param_list_info;
if (estate->es_topPlan->nParamExec > 0)
epqstate->es_param_exec_vals = (ParamExecData *)

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/executor/execUtils.c,v 1.123 2005/04/28 21:47:12 tgl Exp $
* $PostgreSQL: pgsql/src/backend/executor/execUtils.c,v 1.124 2005/06/20 18:37:01 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -186,7 +186,9 @@ CreateExecutorState(void)
estate->es_result_relation_info = NULL;
estate->es_junkFilter = NULL;
estate->es_into_relation_descriptor = NULL;
estate->es_into_relation_use_wal = false;
estate->es_param_list_info = NULL;
estate->es_param_exec_vals = NULL;

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/storage/smgr/md.c,v 1.115 2005/05/29 04:23:05 tgl Exp $
* $PostgreSQL: pgsql/src/backend/storage/smgr/md.c,v 1.116 2005/06/20 18:37:01 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -660,6 +660,9 @@ mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp)
/*
* mdimmedsync() -- Immediately sync a relation to stable storage.
*
* Note that only writes already issued are synced; this routine knows
* nothing of dirty buffers that may exist inside the buffer manager.
*/
bool
mdimmedsync(SMgrRelation reln)

@ -11,7 +11,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/storage/smgr/smgr.c,v 1.90 2005/06/17 22:32:46 tgl Exp $
* $PostgreSQL: pgsql/src/backend/storage/smgr/smgr.c,v 1.91 2005/06/20 18:37:01 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -650,7 +650,8 @@ smgrtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp)
/*
* smgrimmedsync() -- Force the specified relation to stable storage.
*
* Synchronously force all of the specified relation down to disk.
* Synchronously force all previous writes to the specified relation
* down to disk.
*
* This is useful for building completely new relations (eg, new
* indexes). Instead of incrementally WAL-logging the index build
@ -664,6 +665,10 @@ smgrtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp)
*
* The preceding writes should specify isTemp = true to avoid
* duplicative fsyncs.
*
* Note that you need to do FlushRelationBuffers() first if there is
* any possibility that there are dirty buffers for the relation;
* otherwise the sync is not very meaningful.
*/
void
smgrimmedsync(SMgrRelation reln)

@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/access/heapam.h,v 1.101 2005/06/06 17:01:24 tgl Exp $
* $PostgreSQL: pgsql/src/include/access/heapam.h,v 1.102 2005/06/20 18:37:01 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -156,7 +156,8 @@ extern ItemPointer heap_get_latest_tid(Relation relation, Snapshot snapshot,
ItemPointer tid);
extern void setLastTid(const ItemPointer tid);
extern Oid heap_insert(Relation relation, HeapTuple tup, CommandId cid);
extern Oid heap_insert(Relation relation, HeapTuple tup, CommandId cid,
bool use_wal, bool use_fsm);
extern HTSU_Result heap_delete(Relation relation, ItemPointer tid, ItemPointer ctid,
CommandId cid, Snapshot crosscheck, bool wait);
extern HTSU_Result heap_update(Relation relation, ItemPointer otid, HeapTuple tup,

@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/access/hio.h,v 1.27 2004/12/31 22:03:21 pgsql Exp $
* $PostgreSQL: pgsql/src/include/access/hio.h,v 1.28 2005/06/20 18:37:01 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -19,6 +19,6 @@
extern void RelationPutHeapTuple(Relation relation, Buffer buffer,
HeapTuple tuple);
extern Buffer RelationGetBufferForTuple(Relation relation, Size len,
Buffer otherBuffer);
Buffer otherBuffer, bool use_fsm);
#endif /* HIO_H */

@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/nodes/execnodes.h,v 1.134 2005/06/15 07:27:44 neilc Exp $
* $PostgreSQL: pgsql/src/include/nodes/execnodes.h,v 1.135 2005/06/20 18:37:02 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -304,7 +304,9 @@ typedef struct EState
ResultRelInfo *es_result_relation_info; /* currently active array
* elt */
JunkFilter *es_junkFilter; /* currently active junk filter */
Relation es_into_relation_descriptor; /* for SELECT INTO */
bool es_into_relation_use_wal;
/* Parameter info: */
ParamListInfo es_param_list_info; /* values of external params */

Loading…
Cancel
Save