|
|
|
|
@ -7,112 +7,22 @@ |
|
|
|
|
* Copyright (c) 2001-2006, PostgreSQL Global Development Group |
|
|
|
|
* |
|
|
|
|
* IDENTIFICATION |
|
|
|
|
* $PostgreSQL: pgsql/src/backend/executor/instrument.c,v 1.17 2006/06/07 18:49:03 tgl Exp $ |
|
|
|
|
* $PostgreSQL: pgsql/src/backend/executor/instrument.c,v 1.18 2006/06/09 19:30:56 tgl Exp $ |
|
|
|
|
* |
|
|
|
|
*------------------------------------------------------------------------- |
|
|
|
|
*/ |
|
|
|
|
#include "postgres.h" |
|
|
|
|
|
|
|
|
|
#include <math.h> |
|
|
|
|
#include <unistd.h> |
|
|
|
|
|
|
|
|
|
#include "executor/instrument.h" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* As of PostgreSQL 8.2, we try to reduce the overhead of EXPLAIN ANALYZE |
|
|
|
|
* by not calling INSTR_TIME_SET_CURRENT() for every single node execution. |
|
|
|
|
* (Because that requires a kernel call on most systems, it's expensive.) |
|
|
|
|
* |
|
|
|
|
* This macro determines the sampling interval: that is, after how many more |
|
|
|
|
* iterations will we take the next time sample, given that niters iterations |
|
|
|
|
* have occurred already. In general, if the function is f(x), then for N |
|
|
|
|
* iterations we will take on the order of integral(1/f(x), x=0..N) |
|
|
|
|
* samples. Some examples follow, with the number of samples that would be |
|
|
|
|
* collected over 1,000,000 iterations: |
|
|
|
|
* |
|
|
|
|
* f(x) = x => log2(N) 20 |
|
|
|
|
* f(x) = x^(1/2) => 2 * N^(1/2) 2000 |
|
|
|
|
* f(x) = x^(1/3) => 1.5 * N^(2/3) 15000 |
|
|
|
|
* |
|
|
|
|
* I've chosen the last one as it seems to provide a good compromise between |
|
|
|
|
* low overhead but still getting a meaningful number of samples. However, |
|
|
|
|
* not all machines have the cbrt() function so on those we substitute |
|
|
|
|
* sqrt(). The difference is not very significant in the tests I made. |
|
|
|
|
* |
|
|
|
|
* The actual sampling interval is randomized with the SampleFunc() value |
|
|
|
|
* as the mean; this hopefully will reduce any measurement bias due to |
|
|
|
|
* variation in the node execution time. |
|
|
|
|
*/ |
|
|
|
|
#ifdef HAVE_CBRT |
|
|
|
|
#define SampleFunc(niters) cbrt(niters) |
|
|
|
|
#else |
|
|
|
|
#define SampleFunc(niters) sqrt(niters) |
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
#define SampleInterval(niters) \ |
|
|
|
|
(SampleFunc(niters) * (double) random() / (double) (MAX_RANDOM_VALUE/2)) |
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* We sample at every node iteration until we've reached this threshold, |
|
|
|
|
* so that nodes not called a large number of times are completely accurate. |
|
|
|
|
* (Perhaps this should be a GUC variable?) |
|
|
|
|
*/ |
|
|
|
|
#define SAMPLE_THRESHOLD 50 |
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Determine the sampling overhead. This only needs to be done once per |
|
|
|
|
* backend (actually, probably once per postmaster would do ...) |
|
|
|
|
*/ |
|
|
|
|
static double SampleOverhead; |
|
|
|
|
static bool SampleOverheadCalculated = false; |
|
|
|
|
|
|
|
|
|
static void |
|
|
|
|
CalculateSampleOverhead(void) |
|
|
|
|
{ |
|
|
|
|
int i; |
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* We could get a wrong result due to being interrupted by task switch. |
|
|
|
|
* To minimize the risk we do it a few times and take the lowest. |
|
|
|
|
*/ |
|
|
|
|
SampleOverhead = 1.0e6; |
|
|
|
|
|
|
|
|
|
for (i = 0; i < 5; i++) |
|
|
|
|
{ |
|
|
|
|
Instrumentation timer; |
|
|
|
|
instr_time tmptime; |
|
|
|
|
int j; |
|
|
|
|
double overhead; |
|
|
|
|
|
|
|
|
|
memset(&timer, 0, sizeof(timer)); |
|
|
|
|
InstrStartNode(&timer); |
|
|
|
|
#define TEST_COUNT 100 |
|
|
|
|
for (j = 0; j < TEST_COUNT; j++) |
|
|
|
|
{ |
|
|
|
|
INSTR_TIME_SET_CURRENT(tmptime); |
|
|
|
|
} |
|
|
|
|
InstrStopNode(&timer, 1); |
|
|
|
|
overhead = INSTR_TIME_GET_DOUBLE(timer.counter) / TEST_COUNT; |
|
|
|
|
if (overhead < SampleOverhead) |
|
|
|
|
SampleOverhead = overhead; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
SampleOverheadCalculated = true; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* Allocate new instrumentation structure(s) */ |
|
|
|
|
Instrumentation * |
|
|
|
|
InstrAlloc(int n) |
|
|
|
|
{ |
|
|
|
|
Instrumentation *instr; |
|
|
|
|
|
|
|
|
|
/* Calculate sampling overhead, if not done yet in this backend */ |
|
|
|
|
if (!SampleOverheadCalculated) |
|
|
|
|
CalculateSampleOverhead(); |
|
|
|
|
|
|
|
|
|
instr = palloc0(n * sizeof(Instrumentation)); |
|
|
|
|
Instrumentation *instr = palloc0(n * sizeof(Instrumentation)); |
|
|
|
|
|
|
|
|
|
/* we don't need to do any initialization except zero 'em */ |
|
|
|
|
|
|
|
|
|
@ -124,22 +34,7 @@ void |
|
|
|
|
InstrStartNode(Instrumentation *instr) |
|
|
|
|
{ |
|
|
|
|
if (INSTR_TIME_IS_ZERO(instr->starttime)) |
|
|
|
|
{ |
|
|
|
|
/*
|
|
|
|
|
* Always sample if not yet up to threshold, else check whether |
|
|
|
|
* next threshold has been reached |
|
|
|
|
*/ |
|
|
|
|
if (instr->itercount < SAMPLE_THRESHOLD) |
|
|
|
|
instr->sampling = true; |
|
|
|
|
else if (instr->itercount >= instr->nextsample) |
|
|
|
|
{ |
|
|
|
|
instr->sampling = true; |
|
|
|
|
instr->nextsample = |
|
|
|
|
instr->itercount + SampleInterval(instr->itercount); |
|
|
|
|
} |
|
|
|
|
if (instr->sampling) |
|
|
|
|
INSTR_TIME_SET_CURRENT(instr->starttime); |
|
|
|
|
} |
|
|
|
|
INSTR_TIME_SET_CURRENT(instr->starttime); |
|
|
|
|
else |
|
|
|
|
elog(DEBUG2, "InstrStartNode called twice in a row"); |
|
|
|
|
} |
|
|
|
|
@ -148,53 +43,39 @@ InstrStartNode(Instrumentation *instr) |
|
|
|
|
void |
|
|
|
|
InstrStopNode(Instrumentation *instr, double nTuples) |
|
|
|
|
{ |
|
|
|
|
/* count the returned tuples and iterations */ |
|
|
|
|
instr_time endtime; |
|
|
|
|
|
|
|
|
|
/* count the returned tuples */ |
|
|
|
|
instr->tuplecount += nTuples; |
|
|
|
|
instr->itercount += 1; |
|
|
|
|
|
|
|
|
|
/* measure runtime if appropriate */ |
|
|
|
|
if (instr->sampling) |
|
|
|
|
if (INSTR_TIME_IS_ZERO(instr->starttime)) |
|
|
|
|
{ |
|
|
|
|
instr_time endtime; |
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* To be sure that SampleOverhead accurately reflects the extra |
|
|
|
|
* overhead, we must do INSTR_TIME_SET_CURRENT() as the *first* |
|
|
|
|
* action that is different between the sampling and non-sampling |
|
|
|
|
* code paths. |
|
|
|
|
*/ |
|
|
|
|
INSTR_TIME_SET_CURRENT(endtime); |
|
|
|
|
elog(DEBUG2, "InstrStopNode called without start"); |
|
|
|
|
return; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
if (INSTR_TIME_IS_ZERO(instr->starttime)) |
|
|
|
|
{ |
|
|
|
|
elog(DEBUG2, "InstrStopNode called without start"); |
|
|
|
|
return; |
|
|
|
|
} |
|
|
|
|
INSTR_TIME_SET_CURRENT(endtime); |
|
|
|
|
|
|
|
|
|
#ifndef WIN32 |
|
|
|
|
instr->counter.tv_sec += endtime.tv_sec - instr->starttime.tv_sec; |
|
|
|
|
instr->counter.tv_usec += endtime.tv_usec - instr->starttime.tv_usec; |
|
|
|
|
instr->counter.tv_sec += endtime.tv_sec - instr->starttime.tv_sec; |
|
|
|
|
instr->counter.tv_usec += endtime.tv_usec - instr->starttime.tv_usec; |
|
|
|
|
|
|
|
|
|
/* Normalize after each add to avoid overflow/underflow of tv_usec */ |
|
|
|
|
while (instr->counter.tv_usec < 0) |
|
|
|
|
{ |
|
|
|
|
instr->counter.tv_usec += 1000000; |
|
|
|
|
instr->counter.tv_sec--; |
|
|
|
|
} |
|
|
|
|
while (instr->counter.tv_usec >= 1000000) |
|
|
|
|
{ |
|
|
|
|
instr->counter.tv_usec -= 1000000; |
|
|
|
|
instr->counter.tv_sec++; |
|
|
|
|
} |
|
|
|
|
/* Normalize after each add to avoid overflow/underflow of tv_usec */ |
|
|
|
|
while (instr->counter.tv_usec < 0) |
|
|
|
|
{ |
|
|
|
|
instr->counter.tv_usec += 1000000; |
|
|
|
|
instr->counter.tv_sec--; |
|
|
|
|
} |
|
|
|
|
while (instr->counter.tv_usec >= 1000000) |
|
|
|
|
{ |
|
|
|
|
instr->counter.tv_usec -= 1000000; |
|
|
|
|
instr->counter.tv_sec++; |
|
|
|
|
} |
|
|
|
|
#else /* WIN32 */ |
|
|
|
|
instr->counter.QuadPart += (endtime.QuadPart - instr->starttime.QuadPart); |
|
|
|
|
instr->counter.QuadPart += (endtime.QuadPart - instr->starttime.QuadPart); |
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
INSTR_TIME_SET_ZERO(instr->starttime); |
|
|
|
|
|
|
|
|
|
instr->samplecount += 1; |
|
|
|
|
instr->sampling = false; |
|
|
|
|
} |
|
|
|
|
INSTR_TIME_SET_ZERO(instr->starttime); |
|
|
|
|
|
|
|
|
|
/* Is this the first tuple of this cycle? */ |
|
|
|
|
if (!instr->running) |
|
|
|
|
@ -217,31 +98,9 @@ InstrEndLoop(Instrumentation *instr) |
|
|
|
|
if (!INSTR_TIME_IS_ZERO(instr->starttime)) |
|
|
|
|
elog(DEBUG2, "InstrEndLoop called on running node"); |
|
|
|
|
|
|
|
|
|
/* Compute time spent in node */ |
|
|
|
|
/* Accumulate per-cycle statistics into totals */ |
|
|
|
|
totaltime = INSTR_TIME_GET_DOUBLE(instr->counter); |
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* If we didn't measure runtime on every iteration, then we must increase |
|
|
|
|
* the measured total to account for the other iterations. Naively |
|
|
|
|
* multiplying totaltime by itercount/samplecount would be wrong because |
|
|
|
|
* it effectively assumes the sampling overhead applies to all iterations, |
|
|
|
|
* even the ones we didn't measure. (Note that what we are trying to |
|
|
|
|
* estimate here is the actual time spent in the node, including the |
|
|
|
|
* actual measurement overhead; not the time exclusive of measurement |
|
|
|
|
* overhead.) We exclude the first iteration from the correction basis, |
|
|
|
|
* because it often takes longer than others. |
|
|
|
|
*/ |
|
|
|
|
if (instr->itercount > instr->samplecount) |
|
|
|
|
{ |
|
|
|
|
double per_iter; |
|
|
|
|
|
|
|
|
|
per_iter = (totaltime - instr->firsttuple) / (instr->samplecount - 1) |
|
|
|
|
- SampleOverhead; |
|
|
|
|
if (per_iter > 0) /* sanity check */ |
|
|
|
|
totaltime += per_iter * (instr->itercount - instr->samplecount); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/* Accumulate per-cycle statistics into totals */ |
|
|
|
|
instr->startup += instr->firsttuple; |
|
|
|
|
instr->total += totaltime; |
|
|
|
|
instr->ntuples += instr->tuplecount; |
|
|
|
|
@ -249,12 +108,8 @@ InstrEndLoop(Instrumentation *instr) |
|
|
|
|
|
|
|
|
|
/* Reset for next cycle (if any) */ |
|
|
|
|
instr->running = false; |
|
|
|
|
instr->sampling = false; |
|
|
|
|
INSTR_TIME_SET_ZERO(instr->starttime); |
|
|
|
|
INSTR_TIME_SET_ZERO(instr->counter); |
|
|
|
|
instr->firsttuple = 0; |
|
|
|
|
instr->tuplecount = 0; |
|
|
|
|
instr->itercount = 0; |
|
|
|
|
instr->samplecount = 0; |
|
|
|
|
instr->nextsample = 0; |
|
|
|
|
} |
|
|
|
|
|