mirror of https://github.com/postgres/postgres
Add a TABLESAMPLE clause to SELECT statements that allows user to specify random BERNOULLI sampling or block level SYSTEM sampling. Implementation allows for extensible sampling functions to be written, using a standard API. Basic version follows SQLStandard exactly. Usable concrete use cases for the sampling API follow in later commits. Petr Jelinek Reviewed by Michael Paquier and Simon Riggspull/14/head
parent
11a83bbedd
commit
f6d208d6e5
@ -0,0 +1,17 @@ |
||||
#-------------------------------------------------------------------------
|
||||
#
|
||||
# Makefile--
|
||||
# Makefile for utils/tablesample
|
||||
#
|
||||
# IDENTIFICATION
|
||||
# src/backend/utils/tablesample/Makefile
|
||||
#
|
||||
#-------------------------------------------------------------------------
|
||||
|
||||
subdir = src/backend/access/tablesample
|
||||
top_builddir = ../../../..
|
||||
include $(top_builddir)/src/Makefile.global |
||||
|
||||
OBJS = tablesample.o system.o bernoulli.o
|
||||
|
||||
include $(top_srcdir)/src/backend/common.mk |
@ -0,0 +1,235 @@ |
||||
/*-------------------------------------------------------------------------
|
||||
* |
||||
* bernoulli.c |
||||
* interface routines for BERNOULLI tablesample method |
||||
* |
||||
* Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group |
||||
* |
||||
* IDENTIFICATION |
||||
* src/backend/utils/tablesample/bernoulli.c |
||||
* |
||||
*------------------------------------------------------------------------- |
||||
*/ |
||||
|
||||
#include "postgres.h" |
||||
|
||||
#include "fmgr.h" |
||||
|
||||
#include "access/tablesample.h" |
||||
#include "access/relscan.h" |
||||
#include "nodes/execnodes.h" |
||||
#include "nodes/relation.h" |
||||
#include "optimizer/clauses.h" |
||||
#include "storage/bufmgr.h" |
||||
#include "utils/sampling.h" |
||||
|
||||
|
||||
/* tsdesc */ |
||||
typedef struct |
||||
{ |
||||
uint32 seed; /* random seed */ |
||||
BlockNumber startblock; /* starting block, we use ths for syncscan support */ |
||||
BlockNumber nblocks; /* number of blocks */ |
||||
BlockNumber blockno; /* current block */ |
||||
float4 probability; /* probabilty that tuple will be returned (0.0-1.0) */ |
||||
OffsetNumber lt; /* last tuple returned from current block */ |
||||
SamplerRandomState randstate; /* random generator tsdesc */ |
||||
} BernoulliSamplerData; |
||||
|
||||
/*
|
||||
* Initialize the state. |
||||
*/ |
||||
Datum |
||||
tsm_bernoulli_init(PG_FUNCTION_ARGS) |
||||
{ |
||||
TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0); |
||||
uint32 seed = PG_GETARG_UINT32(1); |
||||
float4 percent = PG_ARGISNULL(2) ? -1 : PG_GETARG_FLOAT4(2); |
||||
HeapScanDesc scan = tsdesc->heapScan; |
||||
BernoulliSamplerData *sampler; |
||||
|
||||
if (percent < 0 || percent > 100) |
||||
ereport(ERROR, |
||||
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), |
||||
errmsg("invalid sample size"), |
||||
errhint("Sample size must be numeric value between 0 and 100 (inclusive)."))); |
||||
|
||||
sampler = palloc0(sizeof(BernoulliSamplerData)); |
||||
|
||||
/* Remember initial values for reinit */ |
||||
sampler->seed = seed; |
||||
sampler->startblock = scan->rs_startblock; |
||||
sampler->nblocks = scan->rs_nblocks; |
||||
sampler->blockno = InvalidBlockNumber; |
||||
sampler->probability = percent / 100; |
||||
sampler->lt = InvalidOffsetNumber; |
||||
sampler_random_init_state(sampler->seed, sampler->randstate); |
||||
|
||||
tsdesc->tsmdata = (void *) sampler; |
||||
|
||||
PG_RETURN_VOID(); |
||||
} |
||||
|
||||
/*
|
||||
* Get next block number to read or InvalidBlockNumber if we are at the |
||||
* end of the relation. |
||||
*/ |
||||
Datum |
||||
tsm_bernoulli_nextblock(PG_FUNCTION_ARGS) |
||||
{ |
||||
TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0); |
||||
BernoulliSamplerData *sampler = |
||||
(BernoulliSamplerData *) tsdesc->tsmdata; |
||||
|
||||
/*
|
||||
* Bernoulli sampling scans all blocks on the table and supports |
||||
* syncscan so loop from startblock to startblock instead of |
||||
* from 0 to nblocks. |
||||
*/ |
||||
if (sampler->blockno == InvalidBlockNumber) |
||||
sampler->blockno = sampler->startblock; |
||||
else |
||||
{ |
||||
sampler->blockno++; |
||||
|
||||
if (sampler->blockno >= sampler->nblocks) |
||||
sampler->blockno = 0; |
||||
|
||||
if (sampler->blockno == sampler->startblock) |
||||
PG_RETURN_UINT32(InvalidBlockNumber); |
||||
} |
||||
|
||||
PG_RETURN_UINT32(sampler->blockno); |
||||
} |
||||
|
||||
/*
|
||||
* Get next tuple from current block. |
||||
* |
||||
* This method implements the main logic in bernoulli sampling. |
||||
* The algorithm simply generates new random number (in 0.0-1.0 range) and if |
||||
* it falls within user specified probability (in the same range) return the |
||||
* tuple offset. |
||||
* |
||||
* It is ok here to return tuple offset without knowing if tuple is visible |
||||
* and not check it via examinetuple. The reason for that is that we do the |
||||
* coinflip (random number generation) for every tuple in the table. Since all |
||||
* tuples have same probability of being returned the visible and invisible |
||||
* tuples will be returned in same ratio as they have in the actual table. |
||||
* This means that there is no skew towards either visible or invisible tuples |
||||
* and the number returned visible tuples to from the executor node is the |
||||
* fraction of visible tuples which was specified in input. |
||||
* |
||||
* This is faster than doing the coinflip in the examinetuple because we don't |
||||
* have to do visibility checks on uninteresting tuples. |
||||
* |
||||
* If we reach end of the block return InvalidOffsetNumber which tells |
||||
* SampleScan to go to next block. |
||||
*/ |
||||
Datum |
||||
tsm_bernoulli_nexttuple(PG_FUNCTION_ARGS) |
||||
{ |
||||
TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0); |
||||
OffsetNumber maxoffset = PG_GETARG_UINT16(2); |
||||
BernoulliSamplerData *sampler = |
||||
(BernoulliSamplerData *) tsdesc->tsmdata; |
||||
OffsetNumber tupoffset = sampler->lt; |
||||
float4 probability = sampler->probability; |
||||
|
||||
if (tupoffset == InvalidOffsetNumber) |
||||
tupoffset = FirstOffsetNumber; |
||||
else |
||||
tupoffset++; |
||||
|
||||
/*
|
||||
* Loop over tuple offsets until the random generator returns value that |
||||
* is within the probability of returning the tuple or until we reach |
||||
* end of the block. |
||||
* |
||||
* (This is our implementation of bernoulli trial) |
||||
*/ |
||||
while (sampler_random_fract(sampler->randstate) > probability) |
||||
{ |
||||
tupoffset++; |
||||
|
||||
if (tupoffset > maxoffset) |
||||
break; |
||||
} |
||||
|
||||
if (tupoffset > maxoffset) |
||||
/* Tell SampleScan that we want next block. */ |
||||
tupoffset = InvalidOffsetNumber; |
||||
|
||||
sampler->lt = tupoffset; |
||||
|
||||
PG_RETURN_UINT16(tupoffset); |
||||
} |
||||
|
||||
/*
|
||||
* Cleanup method. |
||||
*/ |
||||
Datum |
||||
tsm_bernoulli_end(PG_FUNCTION_ARGS) |
||||
{ |
||||
TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0); |
||||
|
||||
pfree(tsdesc->tsmdata); |
||||
|
||||
PG_RETURN_VOID(); |
||||
} |
||||
|
||||
/*
|
||||
* Reset tsdesc (called by ReScan). |
||||
*/ |
||||
Datum |
||||
tsm_bernoulli_reset(PG_FUNCTION_ARGS) |
||||
{ |
||||
TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0); |
||||
BernoulliSamplerData *sampler = |
||||
(BernoulliSamplerData *) tsdesc->tsmdata; |
||||
|
||||
sampler->blockno = InvalidBlockNumber; |
||||
sampler->lt = InvalidOffsetNumber; |
||||
sampler_random_init_state(sampler->seed, sampler->randstate); |
||||
|
||||
PG_RETURN_VOID(); |
||||
} |
||||
|
||||
/*
|
||||
* Costing function. |
||||
*/ |
||||
Datum |
||||
tsm_bernoulli_cost(PG_FUNCTION_ARGS) |
||||
{ |
||||
PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); |
||||
Path *path = (Path *) PG_GETARG_POINTER(1); |
||||
RelOptInfo *baserel = (RelOptInfo *) PG_GETARG_POINTER(2); |
||||
List *args = (List *) PG_GETARG_POINTER(3); |
||||
BlockNumber *pages = (BlockNumber *) PG_GETARG_POINTER(4); |
||||
double *tuples = (double *) PG_GETARG_POINTER(5); |
||||
Node *pctnode; |
||||
float4 samplesize; |
||||
|
||||
*pages = baserel->pages; |
||||
|
||||
pctnode = linitial(args); |
||||
pctnode = estimate_expression_value(root, pctnode); |
||||
|
||||
if (IsA(pctnode, RelabelType)) |
||||
pctnode = (Node *) ((RelabelType *) pctnode)->arg; |
||||
|
||||
if (IsA(pctnode, Const)) |
||||
{ |
||||
samplesize = DatumGetFloat4(((Const *) pctnode)->constvalue); |
||||
samplesize /= 100.0; |
||||
} |
||||
else |
||||
{ |
||||
/* Default samplesize if the estimation didn't return Const. */ |
||||
samplesize = 0.1f; |
||||
} |
||||
|
||||
*tuples = path->rows * samplesize; |
||||
path->rows = *tuples; |
||||
|
||||
PG_RETURN_VOID(); |
||||
} |
@ -0,0 +1,186 @@ |
||||
/*-------------------------------------------------------------------------
|
||||
* |
||||
* system.c |
||||
* interface routines for system tablesample method |
||||
* |
||||
* |
||||
* Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group |
||||
* |
||||
* IDENTIFICATION |
||||
* src/backend/utils/tablesample/system.c |
||||
* |
||||
*------------------------------------------------------------------------- |
||||
*/ |
||||
|
||||
#include "postgres.h" |
||||
|
||||
#include "fmgr.h" |
||||
|
||||
#include "access/tablesample.h" |
||||
#include "access/relscan.h" |
||||
#include "nodes/execnodes.h" |
||||
#include "nodes/relation.h" |
||||
#include "optimizer/clauses.h" |
||||
#include "storage/bufmgr.h" |
||||
#include "utils/sampling.h" |
||||
|
||||
|
||||
/*
|
||||
* State |
||||
*/ |
||||
typedef struct |
||||
{ |
||||
BlockSamplerData bs; |
||||
uint32 seed; /* random seed */ |
||||
BlockNumber nblocks; /* number of block in relation */ |
||||
int samplesize; /* number of blocks to return */ |
||||
OffsetNumber lt; /* last tuple returned from current block */ |
||||
} SystemSamplerData; |
||||
|
||||
|
||||
/*
|
||||
* Initializes the state. |
||||
*/ |
||||
Datum |
||||
tsm_system_init(PG_FUNCTION_ARGS) |
||||
{ |
||||
TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0); |
||||
uint32 seed = PG_GETARG_UINT32(1); |
||||
float4 percent = PG_ARGISNULL(2) ? -1 : PG_GETARG_FLOAT4(2); |
||||
HeapScanDesc scan = tsdesc->heapScan; |
||||
SystemSamplerData *sampler; |
||||
|
||||
if (percent < 0 || percent > 100) |
||||
ereport(ERROR, |
||||
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), |
||||
errmsg("invalid sample size"), |
||||
errhint("Sample size must be numeric value between 0 and 100 (inclusive)."))); |
||||
|
||||
sampler = palloc0(sizeof(SystemSamplerData)); |
||||
|
||||
/* Remember initial values for reinit */ |
||||
sampler->seed = seed; |
||||
sampler->nblocks = scan->rs_nblocks; |
||||
sampler->samplesize = 1 + (int) (sampler->nblocks * (percent / 100.0)); |
||||
sampler->lt = InvalidOffsetNumber; |
||||
|
||||
BlockSampler_Init(&sampler->bs, sampler->nblocks, sampler->samplesize, |
||||
sampler->seed); |
||||
|
||||
tsdesc->tsmdata = (void *) sampler; |
||||
|
||||
PG_RETURN_VOID(); |
||||
} |
||||
|
||||
/*
|
||||
* Get next block number or InvalidBlockNumber when we're done. |
||||
* |
||||
* Uses the same logic as ANALYZE for picking the random blocks. |
||||
*/ |
||||
Datum |
||||
tsm_system_nextblock(PG_FUNCTION_ARGS) |
||||
{ |
||||
TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0); |
||||
SystemSamplerData *sampler = (SystemSamplerData *) tsdesc->tsmdata; |
||||
BlockNumber blockno; |
||||
|
||||
if (!BlockSampler_HasMore(&sampler->bs)) |
||||
PG_RETURN_UINT32(InvalidBlockNumber); |
||||
|
||||
blockno = BlockSampler_Next(&sampler->bs); |
||||
|
||||
PG_RETURN_UINT32(blockno); |
||||
} |
||||
|
||||
/*
|
||||
* Get next tuple offset in current block or InvalidOffsetNumber if we are done |
||||
* with this block. |
||||
*/ |
||||
Datum |
||||
tsm_system_nexttuple(PG_FUNCTION_ARGS) |
||||
{ |
||||
TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0); |
||||
OffsetNumber maxoffset = PG_GETARG_UINT16(2); |
||||
SystemSamplerData *sampler = (SystemSamplerData *) tsdesc->tsmdata; |
||||
OffsetNumber tupoffset = sampler->lt; |
||||
|
||||
if (tupoffset == InvalidOffsetNumber) |
||||
tupoffset = FirstOffsetNumber; |
||||
else |
||||
tupoffset++; |
||||
|
||||
if (tupoffset > maxoffset) |
||||
tupoffset = InvalidOffsetNumber; |
||||
|
||||
sampler->lt = tupoffset; |
||||
|
||||
PG_RETURN_UINT16(tupoffset); |
||||
} |
||||
|
||||
/*
|
||||
* Cleanup method. |
||||
*/ |
||||
Datum |
||||
tsm_system_end(PG_FUNCTION_ARGS) |
||||
{ |
||||
TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0); |
||||
|
||||
pfree(tsdesc->tsmdata); |
||||
|
||||
PG_RETURN_VOID(); |
||||
} |
||||
|
||||
/*
|
||||
* Reset state (called by ReScan). |
||||
*/ |
||||
Datum |
||||
tsm_system_reset(PG_FUNCTION_ARGS) |
||||
{ |
||||
TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0); |
||||
SystemSamplerData *sampler = (SystemSamplerData *) tsdesc->tsmdata; |
||||
|
||||
sampler->lt = InvalidOffsetNumber; |
||||
BlockSampler_Init(&sampler->bs, sampler->nblocks, sampler->samplesize, |
||||
sampler->seed); |
||||
|
||||
PG_RETURN_VOID(); |
||||
} |
||||
|
||||
/*
|
||||
* Costing function. |
||||
*/ |
||||
Datum |
||||
tsm_system_cost(PG_FUNCTION_ARGS) |
||||
{ |
||||
PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); |
||||
Path *path = (Path *) PG_GETARG_POINTER(1); |
||||
RelOptInfo *baserel = (RelOptInfo *) PG_GETARG_POINTER(2); |
||||
List *args = (List *) PG_GETARG_POINTER(3); |
||||
BlockNumber *pages = (BlockNumber *) PG_GETARG_POINTER(4); |
||||
double *tuples = (double *) PG_GETARG_POINTER(5); |
||||
Node *pctnode; |
||||
float4 samplesize; |
||||
|
||||
pctnode = linitial(args); |
||||
pctnode = estimate_expression_value(root, pctnode); |
||||
|
||||
if (IsA(pctnode, RelabelType)) |
||||
pctnode = (Node *) ((RelabelType *) pctnode)->arg; |
||||
|
||||
if (IsA(pctnode, Const)) |
||||
{ |
||||
samplesize = DatumGetFloat4(((Const *) pctnode)->constvalue); |
||||
samplesize /= 100.0; |
||||
} |
||||
else |
||||
{ |
||||
/* Default samplesize if the estimation didn't return Const. */ |
||||
samplesize = 0.1f; |
||||
} |
||||
|
||||
*pages = baserel->pages * samplesize; |
||||
*tuples = path->rows * samplesize; |
||||
path->rows = *tuples; |
||||
|
||||
PG_RETURN_VOID(); |
||||
} |
@ -0,0 +1,368 @@ |
||||
/*-------------------------------------------------------------------------
|
||||
* |
||||
* tablesample.c |
||||
* TABLESAMPLE internal API |
||||
* |
||||
* Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group |
||||
* Portions Copyright (c) 1994, Regents of the University of California |
||||
* |
||||
* |
||||
* IDENTIFICATION |
||||
* src/backend/access/tablesample/tablesample.c |
||||
* |
||||
* TABLESAMPLE is the SQL standard clause for sampling the relations. |
||||
* |
||||
* The API is interface between the Executor and the TABLESAMPLE Methods. |
||||
* |
||||
* TABLESAMPLE Methods are implementations of actual sampling algorithms which |
||||
* can be used for returning a sample of the source relation. |
||||
* Methods don't read the table directly but are asked for block number and |
||||
* tuple offset which they want to examine (or return) and the tablesample |
||||
* interface implemented here does the reading for them. |
||||
* |
||||
* We currently only support sampling of the physical relations, but in the |
||||
* future we might extend the API to support subqueries as well. |
||||
* |
||||
* ------------------------------------------------------------------------- |
||||
*/ |
||||
|
||||
#include "postgres.h" |
||||
|
||||
#include "access/tablesample.h" |
||||
|
||||
#include "catalog/pg_tablesample_method.h" |
||||
#include "miscadmin.h" |
||||
#include "pgstat.h" |
||||
#include "storage/bufmgr.h" |
||||
#include "storage/predicate.h" |
||||
#include "utils/rel.h" |
||||
#include "utils/tqual.h" |
||||
|
||||
|
||||
static bool SampleTupleVisible(HeapTuple tuple, OffsetNumber tupoffset, HeapScanDesc scan); |
||||
|
||||
|
||||
/*
|
||||
* Initialize the TABLESAMPLE Descriptor and the TABLESAMPLE Method. |
||||
*/ |
||||
TableSampleDesc * |
||||
tablesample_init(SampleScanState *scanstate, TableSampleClause *tablesample) |
||||
{ |
||||
FunctionCallInfoData fcinfo; |
||||
int i; |
||||
List *args = tablesample->args; |
||||
ListCell *arg; |
||||
ExprContext *econtext = scanstate->ss.ps.ps_ExprContext; |
||||
TableSampleDesc *tsdesc = (TableSampleDesc *) palloc0(sizeof(TableSampleDesc)); |
||||
|
||||
/* Load functions */ |
||||
fmgr_info(tablesample->tsminit, &(tsdesc->tsminit)); |
||||
fmgr_info(tablesample->tsmnextblock, &(tsdesc->tsmnextblock)); |
||||
fmgr_info(tablesample->tsmnexttuple, &(tsdesc->tsmnexttuple)); |
||||
if (OidIsValid(tablesample->tsmexaminetuple)) |
||||
fmgr_info(tablesample->tsmexaminetuple, &(tsdesc->tsmexaminetuple)); |
||||
else |
||||
tsdesc->tsmexaminetuple.fn_oid = InvalidOid; |
||||
fmgr_info(tablesample->tsmreset, &(tsdesc->tsmreset)); |
||||
fmgr_info(tablesample->tsmend, &(tsdesc->tsmend)); |
||||
|
||||
InitFunctionCallInfoData(fcinfo, &tsdesc->tsminit, |
||||
list_length(args) + 2, |
||||
InvalidOid, NULL, NULL); |
||||
|
||||
tsdesc->tupDesc = scanstate->ss.ss_ScanTupleSlot->tts_tupleDescriptor; |
||||
tsdesc->heapScan = scanstate->ss.ss_currentScanDesc; |
||||
|
||||
/* First argument for init function is always TableSampleDesc */ |
||||
fcinfo.arg[0] = PointerGetDatum(tsdesc); |
||||
fcinfo.argnull[0] = false; |
||||
|
||||
/*
|
||||
* Second arg for init function is always REPEATABLE |
||||
* When tablesample->repeatable is NULL then REPEATABLE clause was not |
||||
* specified. |
||||
* When specified, the expression cannot evaluate to NULL. |
||||
*/ |
||||
if (tablesample->repeatable) |
||||
{ |
||||
ExprState *argstate = ExecInitExpr((Expr *) tablesample->repeatable, |
||||
(PlanState *) scanstate); |
||||
fcinfo.arg[1] = ExecEvalExpr(argstate, econtext, |
||||
&fcinfo.argnull[1], NULL); |
||||
if (fcinfo.argnull[1]) |
||||
ereport(ERROR, |
||||
(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), |
||||
errmsg("REPEATABLE clause must be NOT NULL numeric value"))); |
||||
} |
||||
else |
||||
{ |
||||
fcinfo.arg[1] = UInt32GetDatum(random()); |
||||
fcinfo.argnull[1] = false; |
||||
} |
||||
|
||||
/* Rest of the arguments come from user. */ |
||||
i = 2; |
||||
foreach(arg, args) |
||||
{ |
||||
Expr *argexpr = (Expr *) lfirst(arg); |
||||
ExprState *argstate = ExecInitExpr(argexpr, (PlanState *) scanstate); |
||||
|
||||
if (argstate == NULL) |
||||
{ |
||||
fcinfo.argnull[i] = true; |
||||
fcinfo.arg[i] = (Datum) 0;; |
||||
} |
||||
|
||||
fcinfo.arg[i] = ExecEvalExpr(argstate, econtext, |
||||
&fcinfo.argnull[i], NULL); |
||||
i++; |
||||
} |
||||
Assert(i == fcinfo.nargs); |
||||
|
||||
(void) FunctionCallInvoke(&fcinfo); |
||||
|
||||
return tsdesc; |
||||
} |
||||
|
||||
/*
|
||||
* Get next tuple from TABLESAMPLE Method. |
||||
*/ |
||||
HeapTuple |
||||
tablesample_getnext(TableSampleDesc *desc) |
||||
{ |
||||
HeapScanDesc scan = desc->heapScan; |
||||
HeapTuple tuple = &(scan->rs_ctup); |
||||
bool pagemode = scan->rs_pageatatime; |
||||
BlockNumber blockno; |
||||
Page page; |
||||
bool page_all_visible; |
||||
ItemId itemid; |
||||
OffsetNumber tupoffset, |
||||
maxoffset; |
||||
|
||||
if (!scan->rs_inited) |
||||
{ |
||||
/*
|
||||
* return null immediately if relation is empty |
||||
*/ |
||||
if (scan->rs_nblocks == 0) |
||||
{ |
||||
Assert(!BufferIsValid(scan->rs_cbuf)); |
||||
tuple->t_data = NULL; |
||||
return NULL; |
||||
} |
||||
blockno = DatumGetInt32(FunctionCall1(&desc->tsmnextblock, |
||||
PointerGetDatum(desc))); |
||||
if (!BlockNumberIsValid(blockno)) |
||||
{ |
||||
tuple->t_data = NULL; |
||||
return NULL; |
||||
} |
||||
|
||||
heapgetpage(scan, blockno); |
||||
scan->rs_inited = true; |
||||
} |
||||
else |
||||
{ |
||||
/* continue from previously returned page/tuple */ |
||||
blockno = scan->rs_cblock; /* current page */ |
||||
} |
||||
|
||||
/*
|
||||
* When pagemode is disabled, the scan will do visibility checks for each |
||||
* tuple it finds so the buffer needs to be locked. |
||||
*/ |
||||
if (!pagemode) |
||||
LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE); |
||||
|
||||
page = (Page) BufferGetPage(scan->rs_cbuf); |
||||
page_all_visible = PageIsAllVisible(page); |
||||
maxoffset = PageGetMaxOffsetNumber(page); |
||||
|
||||
for (;;) |
||||
{ |
||||
CHECK_FOR_INTERRUPTS(); |
||||
|
||||
tupoffset = DatumGetUInt16(FunctionCall3(&desc->tsmnexttuple, |
||||
PointerGetDatum(desc), |
||||
UInt32GetDatum(blockno), |
||||
UInt16GetDatum(maxoffset))); |
||||
|
||||
if (OffsetNumberIsValid(tupoffset)) |
||||
{ |
||||
bool visible; |
||||
bool found; |
||||
|
||||
/* Skip invalid tuple pointers. */ |
||||
itemid = PageGetItemId(page, tupoffset); |
||||
if (!ItemIdIsNormal(itemid)) |
||||
continue; |
||||
|
||||
tuple->t_data = (HeapTupleHeader) PageGetItem((Page) page, itemid); |
||||
tuple->t_len = ItemIdGetLength(itemid); |
||||
ItemPointerSet(&(tuple->t_self), blockno, tupoffset); |
||||
|
||||
if (page_all_visible) |
||||
visible = true; |
||||
else |
||||
visible = SampleTupleVisible(tuple, tupoffset, scan); |
||||
|
||||
/*
|
||||
* Let the sampling method examine the actual tuple and decide if we |
||||
* should return it. |
||||
* |
||||
* Note that we let it examine even invisible tuples for |
||||
* statistical purposes, but not return them since user should |
||||
* never see invisible tuples. |
||||
*/ |
||||
if (OidIsValid(desc->tsmexaminetuple.fn_oid)) |
||||
{ |
||||
found = DatumGetBool(FunctionCall4(&desc->tsmexaminetuple, |
||||
PointerGetDatum(desc), |
||||
UInt32GetDatum(blockno), |
||||
PointerGetDatum(tuple), |
||||
BoolGetDatum(visible))); |
||||
/* Should not happen if sampling method is well written. */ |
||||
if (found && !visible) |
||||
elog(ERROR, "Sampling method wanted to return invisible tuple"); |
||||
} |
||||
else |
||||
found = visible; |
||||
|
||||
/* Found visible tuple, return it. */ |
||||
if (found) |
||||
{ |
||||
if (!pagemode) |
||||
LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK); |
||||
break; |
||||
} |
||||
else |
||||
{ |
||||
/* Try next tuple from same page. */ |
||||
continue; |
||||
} |
||||
} |
||||
|
||||
|
||||
if (!pagemode) |
||||
LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK); |
||||
|
||||
blockno = DatumGetInt32(FunctionCall1(&desc->tsmnextblock, |
||||
PointerGetDatum(desc))); |
||||
|
||||
/*
|
||||
* Report our new scan position for synchronization purposes. We |
||||
* don't do that when moving backwards, however. That would just |
||||
* mess up any other forward-moving scanners. |
||||
* |
||||
* Note: we do this before checking for end of scan so that the |
||||
* final state of the position hint is back at the start of the |
||||
* rel. That's not strictly necessary, but otherwise when you run |
||||
* the same query multiple times the starting position would shift |
||||
* a little bit backwards on every invocation, which is confusing. |
||||
* We don't guarantee any specific ordering in general, though. |
||||
*/ |
||||
if (scan->rs_syncscan) |
||||
ss_report_location(scan->rs_rd, BlockNumberIsValid(blockno) ? |
||||
blockno : scan->rs_startblock); |
||||
|
||||
/*
|
||||
* Reached end of scan. |
||||
*/ |
||||
if (!BlockNumberIsValid(blockno)) |
||||
{ |
||||
if (BufferIsValid(scan->rs_cbuf)) |
||||
ReleaseBuffer(scan->rs_cbuf); |
||||
scan->rs_cbuf = InvalidBuffer; |
||||
scan->rs_cblock = InvalidBlockNumber; |
||||
tuple->t_data = NULL; |
||||
scan->rs_inited = false; |
||||
return NULL; |
||||
} |
||||
|
||||
heapgetpage(scan, blockno); |
||||
|
||||
if (!pagemode) |
||||
LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE); |
||||
|
||||
page = (Page) BufferGetPage(scan->rs_cbuf); |
||||
page_all_visible = PageIsAllVisible(page); |
||||
maxoffset = PageGetMaxOffsetNumber(page); |
||||
} |
||||
|
||||
pgstat_count_heap_getnext(scan->rs_rd); |
||||
|
||||
return &(scan->rs_ctup); |
||||
} |
||||
|
||||
/*
|
||||
* Reset the sampling to starting state |
||||
*/ |
||||
void |
||||
tablesample_reset(TableSampleDesc *desc) |
||||
{ |
||||
(void) FunctionCall1(&desc->tsmreset, PointerGetDatum(desc)); |
||||
} |
||||
|
||||
/*
|
||||
* Signal the sampling method that the scan has finished. |
||||
*/ |
||||
void |
||||
tablesample_end(TableSampleDesc *desc) |
||||
{ |
||||
(void) FunctionCall1(&desc->tsmend, PointerGetDatum(desc)); |
||||
} |
||||
|
||||
/*
|
||||
* Check visibility of the tuple. |
||||
*/ |
||||
static bool |
||||
SampleTupleVisible(HeapTuple tuple, OffsetNumber tupoffset, HeapScanDesc scan) |
||||
{ |
||||
/*
|
||||
* If this scan is reading whole pages at a time, there is already |
||||
* visibility info present in rs_vistuples so we can just search it |
||||
* for the tupoffset. |
||||
*/ |
||||
if (scan->rs_pageatatime) |
||||
{ |
||||
int start = 0, |
||||
end = scan->rs_ntuples - 1; |
||||
|
||||
/*
|
||||
* Do the binary search over rs_vistuples, it's already sorted by |
||||
* OffsetNumber so we don't need to do any sorting ourselves here. |
||||
* |
||||
* We could use bsearch() here but it's slower for integers because |
||||
* of the function call overhead and because it needs boiler plate code |
||||
* it would not save us anything code-wise anyway. |
||||
*/ |
||||
while (start <= end) |
||||
{ |
||||
int mid = start + (end - start) / 2; |
||||
OffsetNumber curoffset = scan->rs_vistuples[mid]; |
||||
|
||||
if (curoffset == tupoffset) |
||||
return true; |
||||
else if (curoffset > tupoffset) |
||||
end = mid - 1; |
||||
else |
||||
start = mid + 1; |
||||
} |
||||
|
||||
return false; |
||||
} |
||||
else |
||||
{ |
||||
/* No pagemode, we have to check the tuple itself. */ |
||||
Snapshot snapshot = scan->rs_snapshot; |
||||
Buffer buffer = scan->rs_cbuf; |
||||
|
||||
bool visible = HeapTupleSatisfiesVisibility(tuple, snapshot, buffer); |
||||
|
||||
CheckForSerializableConflictOut(visible, scan->rs_rd, tuple, buffer, |
||||
snapshot); |
||||
|
||||
return visible; |
||||
} |
||||
} |
@ -0,0 +1,256 @@ |
||||
/*-------------------------------------------------------------------------
|
||||
* |
||||
* nodeSamplescan.c |
||||
* Support routines for sample scans of relations (table sampling). |
||||
* |
||||
* Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group |
||||
* Portions Copyright (c) 1994, Regents of the University of California |
||||
* |
||||
* |
||||
* IDENTIFICATION |
||||
* src/backend/executor/nodeSamplescan.c |
||||
* |
||||
*------------------------------------------------------------------------- |
||||
*/ |
||||
#include "postgres.h" |
||||
|
||||
#include "access/tablesample.h" |
||||
#include "executor/executor.h" |
||||
#include "executor/nodeSamplescan.h" |
||||
#include "miscadmin.h" |
||||
#include "parser/parsetree.h" |
||||
#include "pgstat.h" |
||||
#include "storage/bufmgr.h" |
||||
#include "storage/predicate.h" |
||||
#include "utils/rel.h" |
||||
#include "utils/syscache.h" |
||||
#include "utils/tqual.h" |
||||
|
||||
static void InitScanRelation(SampleScanState *node, EState *estate, |
||||
int eflags, TableSampleClause *tablesample); |
||||
static TupleTableSlot *SampleNext(SampleScanState *node); |
||||
|
||||
|
||||
/* ----------------------------------------------------------------
|
||||
* Scan Support |
||||
* ---------------------------------------------------------------- |
||||
*/ |
||||
|
||||
/* ----------------------------------------------------------------
|
||||
* SampleNext |
||||
* |
||||
* This is a workhorse for ExecSampleScan |
||||
* ---------------------------------------------------------------- |
||||
*/ |
||||
static TupleTableSlot * |
||||
SampleNext(SampleScanState *node) |
||||
{ |
||||
TupleTableSlot *slot; |
||||
TableSampleDesc *tsdesc; |
||||
HeapTuple tuple; |
||||
|
||||
/*
|
||||
* get information from the scan state |
||||
*/ |
||||
slot = node->ss.ss_ScanTupleSlot; |
||||
tsdesc = node->tsdesc; |
||||
|
||||
tuple = tablesample_getnext(tsdesc); |
||||
|
||||
if (tuple) |
||||
ExecStoreTuple(tuple, /* tuple to store */ |
||||
slot, /* slot to store in */ |
||||
tsdesc->heapScan->rs_cbuf, /* buffer associated with this tuple */ |
||||
false); /* don't pfree this pointer */ |
||||
else |
||||
ExecClearTuple(slot); |
||||
|
||||
return slot; |
||||
} |
||||
|
||||
/*
|
||||
* SampleRecheck -- access method routine to recheck a tuple in EvalPlanQual |
||||
*/ |
||||
static bool |
||||
SampleRecheck(SampleScanState *node, TupleTableSlot *slot) |
||||
{ |
||||
/* No need to recheck for SampleScan */ |
||||
return true; |
||||
} |
||||
|
||||
/* ----------------------------------------------------------------
|
||||
* ExecSampleScan(node) |
||||
* |
||||
* Scans the relation using the sampling method and returns |
||||
* the next qualifying tuple. |
||||
* We call the ExecScan() routine and pass it the appropriate |
||||
* access method functions. |
||||
* ---------------------------------------------------------------- |
||||
*/ |
||||
TupleTableSlot * |
||||
ExecSampleScan(SampleScanState *node) |
||||
{ |
||||
return ExecScan((ScanState *) node, |
||||
(ExecScanAccessMtd) SampleNext, |
||||
(ExecScanRecheckMtd) SampleRecheck); |
||||
} |
||||
|
||||
/* ----------------------------------------------------------------
|
||||
* InitScanRelation |
||||
* |
||||
* Set up to access the scan relation. |
||||
* ---------------------------------------------------------------- |
||||
*/ |
||||
static void |
||||
InitScanRelation(SampleScanState *node, EState *estate, int eflags, |
||||
TableSampleClause *tablesample) |
||||
{ |
||||
Relation currentRelation; |
||||
|
||||
/*
|
||||
* get the relation object id from the relid'th entry in the range table, |
||||
* open that relation and acquire appropriate lock on it. |
||||
*/ |
||||
currentRelation = ExecOpenScanRelation(estate, |
||||
((SampleScan *) node->ss.ps.plan)->scanrelid, |
||||
eflags); |
||||
|
||||
node->ss.ss_currentRelation = currentRelation; |
||||
|
||||
/*
|
||||
* Even though we aren't going to do a conventional seqscan, it is useful |
||||
* to create a HeapScanDesc --- many of the fields in it are usable. |
||||
*/ |
||||
node->ss.ss_currentScanDesc = |
||||
heap_beginscan_sampling(currentRelation, estate->es_snapshot, 0, NULL, |
||||
tablesample->tsmseqscan, |
||||
tablesample->tsmpagemode); |
||||
|
||||
/* and report the scan tuple slot's rowtype */ |
||||
ExecAssignScanType(&node->ss, RelationGetDescr(currentRelation)); |
||||
} |
||||
|
||||
|
||||
/* ----------------------------------------------------------------
|
||||
* ExecInitSampleScan |
||||
* ---------------------------------------------------------------- |
||||
*/ |
||||
SampleScanState * |
||||
ExecInitSampleScan(SampleScan *node, EState *estate, int eflags) |
||||
{ |
||||
SampleScanState *scanstate; |
||||
RangeTblEntry *rte = rt_fetch(node->scanrelid, |
||||
estate->es_range_table); |
||||
|
||||
Assert(outerPlan(node) == NULL); |
||||
Assert(innerPlan(node) == NULL); |
||||
Assert(rte->tablesample != NULL); |
||||
|
||||
/*
|
||||
* create state structure |
||||
*/ |
||||
scanstate = makeNode(SampleScanState); |
||||
scanstate->ss.ps.plan = (Plan *) node; |
||||
scanstate->ss.ps.state = estate; |
||||
|
||||
/*
|
||||
* Miscellaneous initialization |
||||
* |
||||
* create expression context for node |
||||
*/ |
||||
ExecAssignExprContext(estate, &scanstate->ss.ps); |
||||
|
||||
/*
|
||||
* initialize child expressions |
||||
*/ |
||||
scanstate->ss.ps.targetlist = (List *) |
||||
ExecInitExpr((Expr *) node->plan.targetlist, |
||||
(PlanState *) scanstate); |
||||
scanstate->ss.ps.qual = (List *) |
||||
ExecInitExpr((Expr *) node->plan.qual, |
||||
(PlanState *) scanstate); |
||||
|
||||
/*
|
||||
* tuple table initialization |
||||
*/ |
||||
ExecInitResultTupleSlot(estate, &scanstate->ss.ps); |
||||
ExecInitScanTupleSlot(estate, &scanstate->ss); |
||||
|
||||
/*
|
||||
* initialize scan relation |
||||
*/ |
||||
InitScanRelation(scanstate, estate, eflags, rte->tablesample); |
||||
|
||||
scanstate->ss.ps.ps_TupFromTlist = false; |
||||
|
||||
/*
|
||||
* Initialize result tuple type and projection info. |
||||
*/ |
||||
ExecAssignResultTypeFromTL(&scanstate->ss.ps); |
||||
ExecAssignScanProjectionInfo(&scanstate->ss); |
||||
|
||||
scanstate->tsdesc = tablesample_init(scanstate, rte->tablesample); |
||||
|
||||
return scanstate; |
||||
} |
||||
|
||||
/* ----------------------------------------------------------------
|
||||
* ExecEndSampleScan |
||||
* |
||||
* frees any storage allocated through C routines. |
||||
* ---------------------------------------------------------------- |
||||
*/ |
||||
void |
||||
ExecEndSampleScan(SampleScanState *node) |
||||
{ |
||||
/*
|
||||
* Tell sampling function that we finished the scan. |
||||
*/ |
||||
tablesample_end(node->tsdesc); |
||||
|
||||
/*
|
||||
* Free the exprcontext |
||||
*/ |
||||
ExecFreeExprContext(&node->ss.ps); |
||||
|
||||
/*
|
||||
* clean out the tuple table |
||||
*/ |
||||
ExecClearTuple(node->ss.ps.ps_ResultTupleSlot); |
||||
ExecClearTuple(node->ss.ss_ScanTupleSlot); |
||||
|
||||
/*
|
||||
* close heap scan |
||||
*/ |
||||
heap_endscan(node->ss.ss_currentScanDesc); |
||||
|
||||
/*
|
||||
* close the heap relation. |
||||
*/ |
||||
ExecCloseScanRelation(node->ss.ss_currentRelation); |
||||
} |
||||
|
||||
/* ----------------------------------------------------------------
|
||||
* Join Support |
||||
* ---------------------------------------------------------------- |
||||
*/ |
||||
|
||||
/* ----------------------------------------------------------------
|
||||
* ExecReScanSampleScan |
||||
* |
||||
* Rescans the relation. |
||||
* |
||||
* ---------------------------------------------------------------- |
||||
*/ |
||||
void |
||||
ExecReScanSampleScan(SampleScanState *node) |
||||
{ |
||||
heap_rescan(node->ss.ss_currentScanDesc, NULL); |
||||
|
||||
/*
|
||||
* Tell sampling function to reset its state for rescan. |
||||
*/ |
||||
tablesample_reset(node->tsdesc); |
||||
|
||||
ExecScanReScan(&node->ss); |
||||
} |
@ -0,0 +1,60 @@ |
||||
/*-------------------------------------------------------------------------
|
||||
* |
||||
* tablesample.h |
||||
* Public header file for TABLESAMPLE clause interface |
||||
* |
||||
* |
||||
* Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group |
||||
* Portions Copyright (c) 1994, Regents of the University of California |
||||
* |
||||
* src/include/access/tablesample.h |
||||
* |
||||
*------------------------------------------------------------------------- |
||||
*/ |
||||
#ifndef TABLESAMPLE_H |
||||
#define TABLESAMPLE_H |
||||
|
||||
#include "access/relscan.h" |
||||
#include "executor/executor.h" |
||||
|
||||
typedef struct TableSampleDesc { |
||||
HeapScanDesc heapScan; |
||||
TupleDesc tupDesc; /* Mostly useful for tsmexaminetuple */ |
||||
|
||||
void *tsmdata; /* private method data */ |
||||
|
||||
/* These point to he function of the TABLESAMPLE Method. */ |
||||
FmgrInfo tsminit; |
||||
FmgrInfo tsmnextblock; |
||||
FmgrInfo tsmnexttuple; |
||||
FmgrInfo tsmexaminetuple; |
||||
FmgrInfo tsmreset; |
||||
FmgrInfo tsmend; |
||||
} TableSampleDesc; |
||||
|
||||
|
||||
extern TableSampleDesc *tablesample_init(SampleScanState *scanstate, |
||||
TableSampleClause *tablesample); |
||||
extern HeapTuple tablesample_getnext(TableSampleDesc *desc); |
||||
extern void tablesample_reset(TableSampleDesc *desc); |
||||
extern void tablesample_end(TableSampleDesc *desc); |
||||
extern HeapTuple tablesample_source_getnext(TableSampleDesc *desc); |
||||
extern HeapTuple tablesample_source_gettup(TableSampleDesc *desc, ItemPointer tid, |
||||
bool *visible); |
||||
|
||||
extern Datum tsm_system_init(PG_FUNCTION_ARGS); |
||||
extern Datum tsm_system_nextblock(PG_FUNCTION_ARGS); |
||||
extern Datum tsm_system_nexttuple(PG_FUNCTION_ARGS); |
||||
extern Datum tsm_system_end(PG_FUNCTION_ARGS); |
||||
extern Datum tsm_system_reset(PG_FUNCTION_ARGS); |
||||
extern Datum tsm_system_cost(PG_FUNCTION_ARGS); |
||||
|
||||
extern Datum tsm_bernoulli_init(PG_FUNCTION_ARGS); |
||||
extern Datum tsm_bernoulli_nextblock(PG_FUNCTION_ARGS); |
||||
extern Datum tsm_bernoulli_nexttuple(PG_FUNCTION_ARGS); |
||||
extern Datum tsm_bernoulli_end(PG_FUNCTION_ARGS); |
||||
extern Datum tsm_bernoulli_reset(PG_FUNCTION_ARGS); |
||||
extern Datum tsm_bernoulli_cost(PG_FUNCTION_ARGS); |
||||
|
||||
|
||||
#endif |
@ -0,0 +1,78 @@ |
||||
/*-------------------------------------------------------------------------
|
||||
* |
||||
* pg_tablesample_method.h |
||||
* definition of the table scan methods. |
||||
* |
||||
* |
||||
* Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group |
||||
* Portions Copyright (c) 1994, Regents of the University of California |
||||
* |
||||
* src/include/catalog/pg_tablesample_method.h |
||||
* |
||||
* |
||||
*------------------------------------------------------------------------- |
||||
*/ |
||||
#ifndef PG_TABLESAMPLE_METHOD_H |
||||
#define PG_TABLESAMPLE_METHOD_H |
||||
|
||||
#include "catalog/genbki.h" |
||||
#include "catalog/objectaddress.h" |
||||
|
||||
/* ----------------
|
||||
* pg_tablesample_method definition. cpp turns this into |
||||
* typedef struct FormData_pg_tablesample_method |
||||
* ---------------- |
||||
*/ |
||||
#define TableSampleMethodRelationId 3330 |
||||
|
||||
CATALOG(pg_tablesample_method,3330) |
||||
{ |
||||
NameData tsmname; /* tablesample method name */ |
||||
bool tsmseqscan; /* does this method scan whole table sequentially? */ |
||||
bool tsmpagemode; /* does this method scan page at a time? */ |
||||
regproc tsminit; /* init scan function */ |
||||
regproc tsmnextblock; /* function returning next block to sample
|
||||
or InvalidBlockOffset if finished */ |
||||
regproc tsmnexttuple; /* function returning next tuple offset from current block
|
||||
or InvalidOffsetNumber if end of the block was reacher */ |
||||
regproc tsmexaminetuple; /* optional function which can examine tuple contents and
|
||||
decide if tuple should be returned or not */ |
||||
regproc tsmend; /* end scan function*/ |
||||
regproc tsmreset; /* reset state - used by rescan */ |
||||
regproc tsmcost; /* costing function */ |
||||
} FormData_pg_tablesample_method; |
||||
|
||||
/* ----------------
|
||||
* Form_pg_tablesample_method corresponds to a pointer to a tuple with |
||||
* the format of pg_tablesample_method relation. |
||||
* ---------------- |
||||
*/ |
||||
typedef FormData_pg_tablesample_method *Form_pg_tablesample_method; |
||||
|
||||
/* ----------------
|
||||
* compiler constants for pg_tablesample_method |
||||
* ---------------- |
||||
*/ |
||||
#define Natts_pg_tablesample_method 10 |
||||
#define Anum_pg_tablesample_method_tsmname 1 |
||||
#define Anum_pg_tablesample_method_tsmseqscan 2 |
||||
#define Anum_pg_tablesample_method_tsmpagemode 3 |
||||
#define Anum_pg_tablesample_method_tsminit 4 |
||||
#define Anum_pg_tablesample_method_tsmnextblock 5 |
||||
#define Anum_pg_tablesample_method_tsmnexttuple 6 |
||||
#define Anum_pg_tablesample_method_tsmexaminetuple 7 |
||||
#define Anum_pg_tablesample_method_tsmend 8 |
||||
#define Anum_pg_tablesample_method_tsmreset 9 |
||||
#define Anum_pg_tablesample_method_tsmcost 10 |
||||
|
||||
/* ----------------
|
||||
* initial contents of pg_tablesample_method |
||||
* ---------------- |
||||
*/ |
||||
|
||||
DATA(insert OID = 3333 ( system false true tsm_system_init tsm_system_nextblock tsm_system_nexttuple - tsm_system_end tsm_system_reset tsm_system_cost )); |
||||
DESCR("SYSTEM table sampling method"); |
||||
DATA(insert OID = 3334 ( bernoulli true false tsm_bernoulli_init tsm_bernoulli_nextblock tsm_bernoulli_nexttuple - tsm_bernoulli_end tsm_bernoulli_reset tsm_bernoulli_cost )); |
||||
DESCR("BERNOULLI table sampling method"); |
||||
|
||||
#endif /* PG_TABLESAMPLE_METHOD_H */ |
@ -0,0 +1,24 @@ |
||||
/*-------------------------------------------------------------------------
|
||||
* |
||||
* nodeSamplescan.h |
||||
* |
||||
* |
||||
* |
||||
* Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group |
||||
* Portions Copyright (c) 1994, Regents of the University of California |
||||
* |
||||
* src/include/executor/nodeSamplescan.h |
||||
* |
||||
*------------------------------------------------------------------------- |
||||
*/ |
||||
#ifndef NODESAMPLESCAN_H |
||||
#define NODESAMPLESCAN_H |
||||
|
||||
#include "nodes/execnodes.h" |
||||
|
||||
extern SampleScanState *ExecInitSampleScan(SampleScan *node, EState *estate, int eflags); |
||||
extern TupleTableSlot *ExecSampleScan(SampleScanState *node); |
||||
extern void ExecEndSampleScan(SampleScanState *node); |
||||
extern void ExecReScanSampleScan(SampleScanState *node); |
||||
|
||||
#endif /* NODESAMPLESCAN_H */ |
@ -0,0 +1,231 @@ |
||||
CREATE TABLE test_tablesample (id int, name text) WITH (fillfactor=10); -- force smaller pages so we don't have to load too much data to get multiple pages |
||||
INSERT INTO test_tablesample SELECT i, repeat(i::text, 200) FROM generate_series(0, 9) s(i) ORDER BY i; |
||||
SELECT t.id FROM test_tablesample AS t TABLESAMPLE SYSTEM (50) REPEATABLE (10); |
||||
id |
||||
---- |
||||
0 |
||||
1 |
||||
2 |
||||
3 |
||||
4 |
||||
5 |
||||
9 |
||||
(7 rows) |
||||
|
||||
SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (100.0/11) REPEATABLE (9999); |
||||
id |
||||
---- |
||||
6 |
||||
7 |
||||
8 |
||||
(3 rows) |
||||
|
||||
SELECT count(*) FROM test_tablesample TABLESAMPLE SYSTEM (100); |
||||
count |
||||
------- |
||||
10 |
||||
(1 row) |
||||
|
||||
SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (100); |
||||
id |
||||
---- |
||||
0 |
||||
1 |
||||
2 |
||||
6 |
||||
7 |
||||
8 |
||||
9 |
||||
(7 rows) |
||||
|
||||
SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (50) REPEATABLE (100); |
||||
id |
||||
---- |
||||
0 |
||||
1 |
||||
3 |
||||
4 |
||||
5 |
||||
(5 rows) |
||||
|
||||
SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (5.5) REPEATABLE (1); |
||||
id |
||||
---- |
||||
0 |
||||
5 |
||||
(2 rows) |
||||
|
||||
CREATE VIEW test_tablesample_v1 AS SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (10*2) REPEATABLE (2); |
||||
CREATE VIEW test_tablesample_v2 AS SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (99); |
||||
SELECT pg_get_viewdef('test_tablesample_v1'::regclass); |
||||
pg_get_viewdef |
||||
-------------------------------------------------------------------------------- |
||||
SELECT test_tablesample.id + |
||||
FROM test_tablesample TABLESAMPLE system (((10 * 2))::real) REPEATABLE (2); |
||||
(1 row) |
||||
|
||||
SELECT pg_get_viewdef('test_tablesample_v2'::regclass); |
||||
pg_get_viewdef |
||||
----------------------------------------------------------- |
||||
SELECT test_tablesample.id + |
||||
FROM test_tablesample TABLESAMPLE system ((99)::real); |
||||
(1 row) |
||||
|
||||
BEGIN; |
||||
DECLARE tablesample_cur CURSOR FOR SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (100); |
||||
FETCH FIRST FROM tablesample_cur; |
||||
id |
||||
---- |
||||
0 |
||||
(1 row) |
||||
|
||||
FETCH NEXT FROM tablesample_cur; |
||||
id |
||||
---- |
||||
1 |
||||
(1 row) |
||||
|
||||
FETCH NEXT FROM tablesample_cur; |
||||
id |
||||
---- |
||||
2 |
||||
(1 row) |
||||
|
||||
SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (10); |
||||
id |
||||
---- |
||||
0 |
||||
1 |
||||
2 |
||||
3 |
||||
4 |
||||
5 |
||||
9 |
||||
(7 rows) |
||||
|
||||
FETCH NEXT FROM tablesample_cur; |
||||
id |
||||
---- |
||||
6 |
||||
(1 row) |
||||
|
||||
FETCH NEXT FROM tablesample_cur; |
||||
id |
||||
---- |
||||
7 |
||||
(1 row) |
||||
|
||||
FETCH NEXT FROM tablesample_cur; |
||||
id |
||||
---- |
||||
8 |
||||
(1 row) |
||||
|
||||
FETCH FIRST FROM tablesample_cur; |
||||
id |
||||
---- |
||||
0 |
||||
(1 row) |
||||
|
||||
FETCH NEXT FROM tablesample_cur; |
||||
id |
||||
---- |
||||
1 |
||||
(1 row) |
||||
|
||||
FETCH NEXT FROM tablesample_cur; |
||||
id |
||||
---- |
||||
2 |
||||
(1 row) |
||||
|
||||
FETCH NEXT FROM tablesample_cur; |
||||
id |
||||
---- |
||||
6 |
||||
(1 row) |
||||
|
||||
FETCH NEXT FROM tablesample_cur; |
||||
id |
||||
---- |
||||
7 |
||||
(1 row) |
||||
|
||||
FETCH NEXT FROM tablesample_cur; |
||||
id |
||||
---- |
||||
8 |
||||
(1 row) |
||||
|
||||
CLOSE tablesample_cur; |
||||
END; |
||||
EXPLAIN SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (10); |
||||
QUERY PLAN |
||||
------------------------------------------------------------------------------- |
||||
Sample Scan (system) on test_tablesample (cost=0.00..26.35 rows=635 width=4) |
||||
(1 row) |
||||
|
||||
EXPLAIN SELECT * FROM test_tablesample_v1; |
||||
QUERY PLAN |
||||
------------------------------------------------------------------------------- |
||||
Sample Scan (system) on test_tablesample (cost=0.00..10.54 rows=254 width=4) |
||||
(1 row) |
||||
|
||||
-- errors |
||||
SELECT id FROM test_tablesample TABLESAMPLE FOOBAR (1); |
||||
ERROR: tablesample method "foobar" does not exist |
||||
LINE 1: SELECT id FROM test_tablesample TABLESAMPLE FOOBAR (1); |
||||
^ |
||||
SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (NULL); |
||||
ERROR: REPEATABLE clause must be NOT NULL numeric value |
||||
LINE 1: ... test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (NULL); |
||||
^ |
||||
SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (-1); |
||||
ERROR: invalid sample size |
||||
HINT: Sample size must be numeric value between 0 and 100 (inclusive). |
||||
SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (200); |
||||
ERROR: invalid sample size |
||||
HINT: Sample size must be numeric value between 0 and 100 (inclusive). |
||||
SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (-1); |
||||
ERROR: invalid sample size |
||||
HINT: Sample size must be numeric value between 0 and 100 (inclusive). |
||||
SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (200); |
||||
ERROR: invalid sample size |
||||
HINT: Sample size must be numeric value between 0 and 100 (inclusive). |
||||
SELECT id FROM test_tablesample_v1 TABLESAMPLE BERNOULLI (1); |
||||
ERROR: TABLESAMPLE clause can only be used on tables and materialized views |
||||
LINE 1: SELECT id FROM test_tablesample_v1 TABLESAMPLE BERNOULLI (1)... |
||||
^ |
||||
INSERT INTO test_tablesample_v1 VALUES(1); |
||||
ERROR: cannot insert into view "test_tablesample_v1" |
||||
DETAIL: Views containing TABLESAMPLE are not automatically updatable. |
||||
HINT: To enable inserting into the view, provide an INSTEAD OF INSERT trigger or an unconditional ON INSERT DO INSTEAD rule. |
||||
WITH query_select AS (SELECT * FROM test_tablesample) |
||||
SELECT * FROM query_select TABLESAMPLE BERNOULLI (5.5) REPEATABLE (1); |
||||
ERROR: TABLESAMPLE clause can only be used on tables and materialized views |
||||
LINE 2: SELECT * FROM query_select TABLESAMPLE BERNOULLI (5.5) REPEA... |
||||
^ |
||||
SELECT q.* FROM (SELECT * FROM test_tablesample) as q TABLESAMPLE BERNOULLI (5); |
||||
ERROR: syntax error at or near "TABLESAMPLE" |
||||
LINE 1: ...CT q.* FROM (SELECT * FROM test_tablesample) as q TABLESAMPL... |
||||
^ |
||||
-- catalog sanity |
||||
SELECT * |
||||
FROM pg_tablesample_method |
||||
WHERE tsminit IS NULL |
||||
OR tsmseqscan IS NULL |
||||
OR tsmpagemode IS NULL |
||||
OR tsmnextblock IS NULL |
||||
OR tsmnexttuple IS NULL |
||||
OR tsmend IS NULL |
||||
OR tsmreset IS NULL |
||||
OR tsmcost IS NULL; |
||||
tsmname | tsmseqscan | tsmpagemode | tsminit | tsmnextblock | tsmnexttuple | tsmexaminetuple | tsmend | tsmreset | tsmcost |
||||
---------+------------+-------------+---------+--------------+--------------+-----------------+--------+----------+--------- |
||||
(0 rows) |
||||
|
||||
-- done |
||||
DROP TABLE test_tablesample CASCADE; |
||||
NOTICE: drop cascades to 2 other objects |
||||
DETAIL: drop cascades to view test_tablesample_v1 |
||||
drop cascades to view test_tablesample_v2 |
@ -0,0 +1,74 @@ |
||||
CREATE TABLE test_tablesample (id int, name text) WITH (fillfactor=10); -- force smaller pages so we don't have to load too much data to get multiple pages |
||||
|
||||
INSERT INTO test_tablesample SELECT i, repeat(i::text, 200) FROM generate_series(0, 9) s(i) ORDER BY i; |
||||
|
||||
SELECT t.id FROM test_tablesample AS t TABLESAMPLE SYSTEM (50) REPEATABLE (10); |
||||
SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (100.0/11) REPEATABLE (9999); |
||||
SELECT count(*) FROM test_tablesample TABLESAMPLE SYSTEM (100); |
||||
SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (100); |
||||
SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (50) REPEATABLE (100); |
||||
SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (5.5) REPEATABLE (1); |
||||
|
||||
CREATE VIEW test_tablesample_v1 AS SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (10*2) REPEATABLE (2); |
||||
CREATE VIEW test_tablesample_v2 AS SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (99); |
||||
SELECT pg_get_viewdef('test_tablesample_v1'::regclass); |
||||
SELECT pg_get_viewdef('test_tablesample_v2'::regclass); |
||||
|
||||
BEGIN; |
||||
DECLARE tablesample_cur CURSOR FOR SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (100); |
||||
FETCH FIRST FROM tablesample_cur; |
||||
FETCH NEXT FROM tablesample_cur; |
||||
FETCH NEXT FROM tablesample_cur; |
||||
|
||||
SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (10); |
||||
|
||||
FETCH NEXT FROM tablesample_cur; |
||||
FETCH NEXT FROM tablesample_cur; |
||||
FETCH NEXT FROM tablesample_cur; |
||||
|
||||
FETCH FIRST FROM tablesample_cur; |
||||
FETCH NEXT FROM tablesample_cur; |
||||
FETCH NEXT FROM tablesample_cur; |
||||
FETCH NEXT FROM tablesample_cur; |
||||
FETCH NEXT FROM tablesample_cur; |
||||
FETCH NEXT FROM tablesample_cur; |
||||
|
||||
CLOSE tablesample_cur; |
||||
END; |
||||
|
||||
EXPLAIN SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (10); |
||||
EXPLAIN SELECT * FROM test_tablesample_v1; |
||||
|
||||
-- errors |
||||
SELECT id FROM test_tablesample TABLESAMPLE FOOBAR (1); |
||||
|
||||
SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (NULL); |
||||
|
||||
SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (-1); |
||||
SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (200); |
||||
SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (-1); |
||||
SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (200); |
||||
|
||||
SELECT id FROM test_tablesample_v1 TABLESAMPLE BERNOULLI (1); |
||||
INSERT INTO test_tablesample_v1 VALUES(1); |
||||
|
||||
WITH query_select AS (SELECT * FROM test_tablesample) |
||||
SELECT * FROM query_select TABLESAMPLE BERNOULLI (5.5) REPEATABLE (1); |
||||
|
||||
SELECT q.* FROM (SELECT * FROM test_tablesample) as q TABLESAMPLE BERNOULLI (5); |
||||
|
||||
-- catalog sanity |
||||
|
||||
SELECT * |
||||
FROM pg_tablesample_method |
||||
WHERE tsminit IS NULL |
||||
OR tsmseqscan IS NULL |
||||
OR tsmpagemode IS NULL |
||||
OR tsmnextblock IS NULL |
||||
OR tsmnexttuple IS NULL |
||||
OR tsmend IS NULL |
||||
OR tsmreset IS NULL |
||||
OR tsmcost IS NULL; |
||||
|
||||
-- done |
||||
DROP TABLE test_tablesample CASCADE; |
Loading…
Reference in new issue