mirror of https://github.com/postgres/postgres
parent
4d40494b11
commit
56e121a508
@ -0,0 +1,4 @@ |
|||||||
|
# Generated subdirectories |
||||||
|
/log/ |
||||||
|
/results/ |
||||||
|
/tmp_check/ |
@ -0,0 +1,21 @@ |
|||||||
|
# src/test/modules/tsm_system_time/Makefile
|
||||||
|
|
||||||
|
MODULE_big = tsm_system_time
|
||||||
|
OBJS = tsm_system_time.o $(WIN32RES)
|
||||||
|
PGFILEDESC = "tsm_system_time - SYSTEM TABLESAMPLE method which accepts number rows of as a limit"
|
||||||
|
|
||||||
|
EXTENSION = tsm_system_time
|
||||||
|
DATA = tsm_system_time--1.0.sql
|
||||||
|
|
||||||
|
REGRESS = tsm_system_time
|
||||||
|
|
||||||
|
ifdef USE_PGXS |
||||||
|
PG_CONFIG = pg_config
|
||||||
|
PGXS := $(shell $(PG_CONFIG) --pgxs)
|
||||||
|
include $(PGXS) |
||||||
|
else |
||||||
|
subdir = contrib/tsm_system_time
|
||||||
|
top_builddir = ../..
|
||||||
|
include $(top_builddir)/src/Makefile.global |
||||||
|
include $(top_srcdir)/contrib/contrib-global.mk |
||||||
|
endif |
@ -0,0 +1,54 @@ |
|||||||
|
CREATE EXTENSION tsm_system_time; |
||||||
|
CREATE TABLE test_tablesample (id int, name text) WITH (fillfactor=10); -- force smaller pages so we don't have to load too much data to get multiple pages |
||||||
|
INSERT INTO test_tablesample SELECT i, repeat(i::text, 1000) FROM generate_series(0, 30) s(i) ORDER BY i; |
||||||
|
ANALYZE test_tablesample; |
||||||
|
SELECT count(*) FROM test_tablesample TABLESAMPLE system_time (1000); |
||||||
|
count |
||||||
|
------- |
||||||
|
31 |
||||||
|
(1 row) |
||||||
|
|
||||||
|
SELECT id FROM test_tablesample TABLESAMPLE system_time (1000) REPEATABLE (5432); |
||||||
|
id |
||||||
|
---- |
||||||
|
7 |
||||||
|
14 |
||||||
|
21 |
||||||
|
28 |
||||||
|
4 |
||||||
|
11 |
||||||
|
18 |
||||||
|
25 |
||||||
|
1 |
||||||
|
8 |
||||||
|
15 |
||||||
|
22 |
||||||
|
29 |
||||||
|
5 |
||||||
|
12 |
||||||
|
19 |
||||||
|
26 |
||||||
|
2 |
||||||
|
9 |
||||||
|
16 |
||||||
|
23 |
||||||
|
30 |
||||||
|
6 |
||||||
|
13 |
||||||
|
20 |
||||||
|
27 |
||||||
|
3 |
||||||
|
10 |
||||||
|
17 |
||||||
|
24 |
||||||
|
0 |
||||||
|
(31 rows) |
||||||
|
|
||||||
|
EXPLAIN SELECT id FROM test_tablesample TABLESAMPLE system_time (100) REPEATABLE (10); |
||||||
|
QUERY PLAN |
||||||
|
------------------------------------------------------------------------------------ |
||||||
|
Sample Scan (system_time) on test_tablesample (cost=0.00..100.25 rows=25 width=4) |
||||||
|
(1 row) |
||||||
|
|
||||||
|
-- done |
||||||
|
DROP TABLE test_tablesample CASCADE; |
@ -0,0 +1,14 @@ |
|||||||
|
CREATE EXTENSION tsm_system_time; |
||||||
|
|
||||||
|
CREATE TABLE test_tablesample (id int, name text) WITH (fillfactor=10); -- force smaller pages so we don't have to load too much data to get multiple pages |
||||||
|
|
||||||
|
INSERT INTO test_tablesample SELECT i, repeat(i::text, 1000) FROM generate_series(0, 30) s(i) ORDER BY i; |
||||||
|
ANALYZE test_tablesample; |
||||||
|
|
||||||
|
SELECT count(*) FROM test_tablesample TABLESAMPLE system_time (1000); |
||||||
|
SELECT id FROM test_tablesample TABLESAMPLE system_time (1000) REPEATABLE (5432); |
||||||
|
|
||||||
|
EXPLAIN SELECT id FROM test_tablesample TABLESAMPLE system_time (100) REPEATABLE (10); |
||||||
|
|
||||||
|
-- done |
||||||
|
DROP TABLE test_tablesample CASCADE; |
@ -0,0 +1,40 @@ |
|||||||
|
/* src/test/modules/tablesample/tsm_system_time--1.0.sql */ |
||||||
|
|
||||||
|
-- complain if script is sourced in psql, rather than via CREATE EXTENSION |
||||||
|
\echo Use "CREATE EXTENSION tsm_system_time" to load this file. \quit |
||||||
|
|
||||||
|
CREATE FUNCTION tsm_system_time_init(internal, int4, int4) |
||||||
|
RETURNS void |
||||||
|
AS 'MODULE_PATHNAME' |
||||||
|
LANGUAGE C STRICT; |
||||||
|
|
||||||
|
CREATE FUNCTION tsm_system_time_nextblock(internal) |
||||||
|
RETURNS int4 |
||||||
|
AS 'MODULE_PATHNAME' |
||||||
|
LANGUAGE C STRICT; |
||||||
|
|
||||||
|
CREATE FUNCTION tsm_system_time_nexttuple(internal, int4, int2) |
||||||
|
RETURNS int2 |
||||||
|
AS 'MODULE_PATHNAME' |
||||||
|
LANGUAGE C STRICT; |
||||||
|
|
||||||
|
CREATE FUNCTION tsm_system_time_end(internal) |
||||||
|
RETURNS void |
||||||
|
AS 'MODULE_PATHNAME' |
||||||
|
LANGUAGE C STRICT; |
||||||
|
|
||||||
|
CREATE FUNCTION tsm_system_time_reset(internal) |
||||||
|
RETURNS void |
||||||
|
AS 'MODULE_PATHNAME' |
||||||
|
LANGUAGE C STRICT; |
||||||
|
|
||||||
|
CREATE FUNCTION tsm_system_time_cost(internal, internal, internal, internal, internal, internal, internal) |
||||||
|
RETURNS void |
||||||
|
AS 'MODULE_PATHNAME' |
||||||
|
LANGUAGE C STRICT; |
||||||
|
|
||||||
|
INSERT INTO pg_tablesample_method VALUES('system_time', false, true, |
||||||
|
'tsm_system_time_init', 'tsm_system_time_nextblock', |
||||||
|
'tsm_system_time_nexttuple', '-', 'tsm_system_time_end', |
||||||
|
'tsm_system_time_reset', 'tsm_system_time_cost'); |
||||||
|
|
@ -0,0 +1,315 @@ |
|||||||
|
/*-------------------------------------------------------------------------
|
||||||
|
* |
||||||
|
* tsm_system_time.c |
||||||
|
* interface routines for system_time tablesample method |
||||||
|
* |
||||||
|
* |
||||||
|
* Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group |
||||||
|
* |
||||||
|
* IDENTIFICATION |
||||||
|
* contrib/tsm_system_time_rowlimit/tsm_system_time.c |
||||||
|
* |
||||||
|
*------------------------------------------------------------------------- |
||||||
|
*/ |
||||||
|
|
||||||
|
#include "postgres.h" |
||||||
|
|
||||||
|
#include "fmgr.h" |
||||||
|
|
||||||
|
#include "access/tablesample.h" |
||||||
|
#include "access/relscan.h" |
||||||
|
#include "miscadmin.h" |
||||||
|
#include "nodes/execnodes.h" |
||||||
|
#include "nodes/relation.h" |
||||||
|
#include "optimizer/clauses.h" |
||||||
|
#include "storage/bufmgr.h" |
||||||
|
#include "utils/sampling.h" |
||||||
|
#include "utils/spccache.h" |
||||||
|
#include "utils/timestamp.h" |
||||||
|
|
||||||
|
PG_MODULE_MAGIC; |
||||||
|
|
||||||
|
/*
|
||||||
|
* State |
||||||
|
*/ |
||||||
|
typedef struct |
||||||
|
{ |
||||||
|
SamplerRandomState randstate; |
||||||
|
uint32 seed; /* random seed */ |
||||||
|
BlockNumber nblocks; /* number of block in relation */ |
||||||
|
int32 time; /* time limit for sampling */ |
||||||
|
TimestampTz start_time; /* start time of sampling */ |
||||||
|
TimestampTz end_time; /* end time of sampling */ |
||||||
|
OffsetNumber lt; /* last tuple returned from current block */ |
||||||
|
BlockNumber step; /* step size */ |
||||||
|
BlockNumber lb; /* last block visited */ |
||||||
|
BlockNumber estblocks; /* estimated number of returned blocks (moving) */ |
||||||
|
BlockNumber doneblocks; /* number of already returned blocks */ |
||||||
|
} SystemSamplerData; |
||||||
|
|
||||||
|
|
||||||
|
PG_FUNCTION_INFO_V1(tsm_system_time_init); |
||||||
|
PG_FUNCTION_INFO_V1(tsm_system_time_nextblock); |
||||||
|
PG_FUNCTION_INFO_V1(tsm_system_time_nexttuple); |
||||||
|
PG_FUNCTION_INFO_V1(tsm_system_time_end); |
||||||
|
PG_FUNCTION_INFO_V1(tsm_system_time_reset); |
||||||
|
PG_FUNCTION_INFO_V1(tsm_system_time_cost); |
||||||
|
|
||||||
|
static uint32 random_relative_prime(uint32 n, SamplerRandomState randstate); |
||||||
|
|
||||||
|
/*
|
||||||
|
* Initializes the state. |
||||||
|
*/ |
||||||
|
Datum |
||||||
|
tsm_system_time_init(PG_FUNCTION_ARGS) |
||||||
|
{ |
||||||
|
TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0); |
||||||
|
uint32 seed = PG_GETARG_UINT32(1); |
||||||
|
int32 time = PG_ARGISNULL(2) ? -1 : PG_GETARG_INT32(2); |
||||||
|
HeapScanDesc scan = tsdesc->heapScan; |
||||||
|
SystemSamplerData *sampler; |
||||||
|
|
||||||
|
if (time < 1) |
||||||
|
ereport(ERROR, |
||||||
|
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), |
||||||
|
errmsg("invalid time limit"), |
||||||
|
errhint("Time limit must be positive integer value."))); |
||||||
|
|
||||||
|
sampler = palloc0(sizeof(SystemSamplerData)); |
||||||
|
|
||||||
|
/* Remember initial values for reinit */ |
||||||
|
sampler->seed = seed; |
||||||
|
sampler->nblocks = scan->rs_nblocks; |
||||||
|
sampler->lt = InvalidOffsetNumber; |
||||||
|
sampler->estblocks = 2; |
||||||
|
sampler->doneblocks = 0; |
||||||
|
sampler->time = time; |
||||||
|
sampler->start_time = GetCurrentTimestamp(); |
||||||
|
sampler->end_time = TimestampTzPlusMilliseconds(sampler->start_time, |
||||||
|
sampler->time); |
||||||
|
|
||||||
|
sampler_random_init_state(sampler->seed, sampler->randstate); |
||||||
|
|
||||||
|
/* Find relative prime as step size for linear probing. */ |
||||||
|
sampler->step = random_relative_prime(sampler->nblocks, sampler->randstate); |
||||||
|
/*
|
||||||
|
* Randomize start position so that blocks close to step size don't have |
||||||
|
* higher probability of being chosen on very short scan. |
||||||
|
*/ |
||||||
|
sampler->lb = sampler_random_fract(sampler->randstate) * (sampler->nblocks / sampler->step); |
||||||
|
|
||||||
|
tsdesc->tsmdata = (void *) sampler; |
||||||
|
|
||||||
|
PG_RETURN_VOID(); |
||||||
|
} |
||||||
|
|
||||||
|
/*
|
||||||
|
* Get next block number or InvalidBlockNumber when we're done. |
||||||
|
* |
||||||
|
* Uses linear probing algorithm for picking next block. |
||||||
|
*/ |
||||||
|
Datum |
||||||
|
tsm_system_time_nextblock(PG_FUNCTION_ARGS) |
||||||
|
{ |
||||||
|
TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0); |
||||||
|
SystemSamplerData *sampler = (SystemSamplerData *) tsdesc->tsmdata; |
||||||
|
|
||||||
|
sampler->lb = (sampler->lb + sampler->step) % sampler->nblocks; |
||||||
|
sampler->doneblocks++; |
||||||
|
|
||||||
|
/* All blocks have been read, we're done */ |
||||||
|
if (sampler->doneblocks > sampler->nblocks) |
||||||
|
PG_RETURN_UINT32(InvalidBlockNumber); |
||||||
|
|
||||||
|
/*
|
||||||
|
* Update the estimations for time limit at least 10 times per estimated |
||||||
|
* number of returned blocks to handle variations in block read speed. |
||||||
|
*/ |
||||||
|
if (sampler->doneblocks % Max(sampler->estblocks/10, 1) == 0) |
||||||
|
{ |
||||||
|
TimestampTz now = GetCurrentTimestamp(); |
||||||
|
long secs; |
||||||
|
int usecs; |
||||||
|
int usecs_remaining; |
||||||
|
int time_per_block; |
||||||
|
|
||||||
|
TimestampDifference(sampler->start_time, now, &secs, &usecs); |
||||||
|
usecs += (int) secs * 1000000; |
||||||
|
|
||||||
|
time_per_block = usecs / sampler->doneblocks; |
||||||
|
|
||||||
|
/* No time left, end. */ |
||||||
|
TimestampDifference(now, sampler->end_time, &secs, &usecs); |
||||||
|
if (secs <= 0 && usecs <= 0) |
||||||
|
PG_RETURN_UINT32(InvalidBlockNumber); |
||||||
|
|
||||||
|
/* Remaining microseconds */ |
||||||
|
usecs_remaining = usecs + (int) secs * 1000000; |
||||||
|
|
||||||
|
/* Recalculate estimated returned number of blocks */ |
||||||
|
if (time_per_block < usecs_remaining && time_per_block > 0) |
||||||
|
sampler->estblocks = sampler->time * time_per_block; |
||||||
|
} |
||||||
|
|
||||||
|
PG_RETURN_UINT32(sampler->lb); |
||||||
|
} |
||||||
|
|
||||||
|
/*
|
||||||
|
* Get next tuple offset in current block or InvalidOffsetNumber if we are done |
||||||
|
* with this block. |
||||||
|
*/ |
||||||
|
Datum |
||||||
|
tsm_system_time_nexttuple(PG_FUNCTION_ARGS) |
||||||
|
{ |
||||||
|
TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0); |
||||||
|
OffsetNumber maxoffset = PG_GETARG_UINT16(2); |
||||||
|
SystemSamplerData *sampler = (SystemSamplerData *) tsdesc->tsmdata; |
||||||
|
OffsetNumber tupoffset = sampler->lt; |
||||||
|
|
||||||
|
if (tupoffset == InvalidOffsetNumber) |
||||||
|
tupoffset = FirstOffsetNumber; |
||||||
|
else |
||||||
|
tupoffset++; |
||||||
|
|
||||||
|
if (tupoffset > maxoffset) |
||||||
|
tupoffset = InvalidOffsetNumber; |
||||||
|
|
||||||
|
sampler->lt = tupoffset; |
||||||
|
|
||||||
|
PG_RETURN_UINT16(tupoffset); |
||||||
|
} |
||||||
|
|
||||||
|
/*
|
||||||
|
* Cleanup method. |
||||||
|
*/ |
||||||
|
Datum |
||||||
|
tsm_system_time_end(PG_FUNCTION_ARGS) |
||||||
|
{ |
||||||
|
TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0); |
||||||
|
|
||||||
|
pfree(tsdesc->tsmdata); |
||||||
|
|
||||||
|
PG_RETURN_VOID(); |
||||||
|
} |
||||||
|
|
||||||
|
/*
|
||||||
|
* Reset state (called by ReScan). |
||||||
|
*/ |
||||||
|
Datum |
||||||
|
tsm_system_time_reset(PG_FUNCTION_ARGS) |
||||||
|
{ |
||||||
|
TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0); |
||||||
|
SystemSamplerData *sampler = (SystemSamplerData *) tsdesc->tsmdata; |
||||||
|
|
||||||
|
sampler->lt = InvalidOffsetNumber; |
||||||
|
sampler->start_time = GetCurrentTimestamp(); |
||||||
|
sampler->end_time = TimestampTzPlusMilliseconds(sampler->start_time, |
||||||
|
sampler->time); |
||||||
|
sampler->estblocks = 2; |
||||||
|
sampler->doneblocks = 0; |
||||||
|
|
||||||
|
sampler_random_init_state(sampler->seed, sampler->randstate); |
||||||
|
sampler->step = random_relative_prime(sampler->nblocks, sampler->randstate); |
||||||
|
sampler->lb = sampler_random_fract(sampler->randstate) * (sampler->nblocks / sampler->step); |
||||||
|
|
||||||
|
PG_RETURN_VOID(); |
||||||
|
} |
||||||
|
|
||||||
|
/*
|
||||||
|
* Costing function. |
||||||
|
*/ |
||||||
|
Datum |
||||||
|
tsm_system_time_cost(PG_FUNCTION_ARGS) |
||||||
|
{ |
||||||
|
PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); |
||||||
|
Path *path = (Path *) PG_GETARG_POINTER(1); |
||||||
|
RelOptInfo *baserel = (RelOptInfo *) PG_GETARG_POINTER(2); |
||||||
|
List *args = (List *) PG_GETARG_POINTER(3); |
||||||
|
BlockNumber *pages = (BlockNumber *) PG_GETARG_POINTER(4); |
||||||
|
double *tuples = (double *) PG_GETARG_POINTER(5); |
||||||
|
Node *limitnode; |
||||||
|
int32 time; |
||||||
|
BlockNumber relpages; |
||||||
|
double reltuples; |
||||||
|
double density; |
||||||
|
double spc_random_page_cost; |
||||||
|
|
||||||
|
limitnode = linitial(args); |
||||||
|
limitnode = estimate_expression_value(root, limitnode); |
||||||
|
|
||||||
|
if (IsA(limitnode, RelabelType)) |
||||||
|
limitnode = (Node *) ((RelabelType *) limitnode)->arg; |
||||||
|
|
||||||
|
if (IsA(limitnode, Const)) |
||||||
|
time = DatumGetInt32(((Const *) limitnode)->constvalue); |
||||||
|
else |
||||||
|
{ |
||||||
|
/* Default time (1s) if the estimation didn't return Const. */ |
||||||
|
time = 1000; |
||||||
|
} |
||||||
|
|
||||||
|
relpages = baserel->pages; |
||||||
|
reltuples = baserel->tuples; |
||||||
|
|
||||||
|
/* estimate the tuple density */ |
||||||
|
if (relpages > 0) |
||||||
|
density = reltuples / (double) relpages; |
||||||
|
else |
||||||
|
density = (BLCKSZ - SizeOfPageHeaderData) / baserel->width; |
||||||
|
|
||||||
|
/*
|
||||||
|
* We equal random page cost value to number of ms it takes to read the |
||||||
|
* random page here which is far from accurate but we don't have anything |
||||||
|
* better to base our predicted page reads. |
||||||
|
*/ |
||||||
|
get_tablespace_page_costs(baserel->reltablespace, |
||||||
|
&spc_random_page_cost, |
||||||
|
NULL); |
||||||
|
|
||||||
|
/*
|
||||||
|
* Assumption here is that we'll never read less then 1% of table pages, |
||||||
|
* this is here mainly because it is much less bad to overestimate than |
||||||
|
* underestimate and using just spc_random_page_cost will probably lead |
||||||
|
* to underestimations in general. |
||||||
|
*/ |
||||||
|
*pages = Min(baserel->pages, Max(time/spc_random_page_cost, baserel->pages/100)); |
||||||
|
*tuples = rint(density * (double) *pages * path->rows / baserel->tuples); |
||||||
|
path->rows = *tuples; |
||||||
|
|
||||||
|
PG_RETURN_VOID(); |
||||||
|
} |
||||||
|
|
||||||
|
static uint32 |
||||||
|
gcd (uint32 a, uint32 b) |
||||||
|
{ |
||||||
|
uint32 c; |
||||||
|
|
||||||
|
while (a != 0) |
||||||
|
{ |
||||||
|
c = a; |
||||||
|
a = b % a; |
||||||
|
b = c; |
||||||
|
} |
||||||
|
|
||||||
|
return b; |
||||||
|
} |
||||||
|
|
||||||
|
static uint32 |
||||||
|
random_relative_prime(uint32 n, SamplerRandomState randstate) |
||||||
|
{ |
||||||
|
/* Pick random starting number, with some limits on what it can be. */ |
||||||
|
uint32 r = (uint32) sampler_random_fract(randstate) * n/2 + n/4, |
||||||
|
t; |
||||||
|
|
||||||
|
/*
|
||||||
|
* This should only take 2 or 3 iterations as the probability of 2 numbers |
||||||
|
* being relatively prime is ~61%. |
||||||
|
*/ |
||||||
|
while ((t = gcd(r, n)) > 1) |
||||||
|
{ |
||||||
|
CHECK_FOR_INTERRUPTS(); |
||||||
|
r /= t; |
||||||
|
} |
||||||
|
|
||||||
|
return r; |
||||||
|
} |
@ -0,0 +1,5 @@ |
|||||||
|
# tsm_system_time extension |
||||||
|
comment = 'SYSTEM TABLESAMPLE method which accepts time in milliseconds as a limit' |
||||||
|
default_version = '1.0' |
||||||
|
module_pathname = '$libdir/tsm_system_time' |
||||||
|
relocatable = true |
Loading…
Reference in new issue