mirror of https://github.com/postgres/postgres
Add support for explicitly declared statistic objects (CREATE STATISTICS), allowing collection of statistics on more complex combinations that individual table columns. Companion commands DROP STATISTICS and ALTER STATISTICS ... OWNER TO / SET SCHEMA / RENAME are added too. All this DDL has been designed so that more statistic types can be added later on, such as multivariate most-common-values and multivariate histograms between columns of a single table, leaving room for permitting columns on multiple tables, too, as well as expressions. This commit only adds support for collection of n-distinct coefficient on user-specified sets of columns in a single table. This is useful to estimate number of distinct groups in GROUP BY and DISTINCT clauses; estimation errors there can cause over-allocation of memory in hashed aggregates, for instance, so it's a worthwhile problem to solve. A new special pseudo-type pg_ndistinct is used. (num-distinct estimation was deemed sufficiently useful by itself that this is worthwhile even if no further statistic types are added immediately; so much so that another version of essentially the same functionality was submitted by Kyotaro Horiguchi: https://postgr.es/m/20150828.173334.114731693.horiguchi.kyotaro@lab.ntt.co.jp though this commit does not use that code.) Author: Tomas Vondra. Some code rework by Álvaro. Reviewed-by: Dean Rasheed, David Rowley, Kyotaro Horiguchi, Jeff Janes, Ideriha Takeshi Discussion: https://postgr.es/m/543AFA15.4080608@fuzzy.cz https://postgr.es/m/20170320190220.ixlaueanxegqd5gr@alvherre.pgsqlpull/3/merge
parent
f120b614e0
commit
7b504eb282
@ -0,0 +1,115 @@ |
|||||||
|
<!-- |
||||||
|
doc/src/sgml/ref/alter_statistics.sgml |
||||||
|
PostgreSQL documentation |
||||||
|
--> |
||||||
|
|
||||||
|
<refentry id="SQL-ALTERSTATISTICS"> |
||||||
|
<indexterm zone="sql-alterstatistics"> |
||||||
|
<primary>ALTER STATISTICS</primary> |
||||||
|
</indexterm> |
||||||
|
|
||||||
|
<refmeta> |
||||||
|
<refentrytitle>ALTER STATISTICS</refentrytitle> |
||||||
|
<manvolnum>7</manvolnum> |
||||||
|
<refmiscinfo>SQL - Language Statements</refmiscinfo> |
||||||
|
</refmeta> |
||||||
|
|
||||||
|
<refnamediv> |
||||||
|
<refname>ALTER STATISTICS</refname> |
||||||
|
<refpurpose> |
||||||
|
change the definition of a extended statistics |
||||||
|
</refpurpose> |
||||||
|
</refnamediv> |
||||||
|
|
||||||
|
<refsynopsisdiv> |
||||||
|
<synopsis> |
||||||
|
ALTER STATISTICS <replaceable class="parameter">name</replaceable> OWNER TO { <replaceable class="PARAMETER">new_owner</replaceable> | CURRENT_USER | SESSION_USER } |
||||||
|
ALTER STATISTICS <replaceable class="parameter">name</replaceable> RENAME TO <replaceable class="parameter">new_name</replaceable> |
||||||
|
ALTER STATISTICS <replaceable class="parameter">name</replaceable> SET SCHEMA <replaceable class="parameter">new_schema</replaceable> |
||||||
|
</synopsis> |
||||||
|
</refsynopsisdiv> |
||||||
|
|
||||||
|
<refsect1> |
||||||
|
<title>Description</title> |
||||||
|
|
||||||
|
<para> |
||||||
|
<command>ALTER STATISTICS</command> changes the parameters of an existing |
||||||
|
extended statistics. Any parameters not specifically set in the |
||||||
|
<command>ALTER STATISTICS</command> command retain their prior settings. |
||||||
|
</para> |
||||||
|
|
||||||
|
<para> |
||||||
|
You must own the statistics to use <command>ALTER STATISTICS</>. |
||||||
|
To change a statistics' schema, you must also have <literal>CREATE</> |
||||||
|
privilege on the new schema. |
||||||
|
To alter the owner, you must also be a direct or indirect member of the new |
||||||
|
owning role, and that role must have <literal>CREATE</literal> privilege on |
||||||
|
the statistics' schema. (These restrictions enforce that altering the owner |
||||||
|
doesn't do anything you couldn't do by dropping and recreating the statistics. |
||||||
|
However, a superuser can alter ownership of any statistics anyway.) |
||||||
|
</para> |
||||||
|
</refsect1> |
||||||
|
|
||||||
|
<refsect1> |
||||||
|
<title>Parameters</title> |
||||||
|
|
||||||
|
<para> |
||||||
|
<variablelist> |
||||||
|
<varlistentry> |
||||||
|
<term><replaceable class="parameter">name</replaceable></term> |
||||||
|
<listitem> |
||||||
|
<para> |
||||||
|
The name (optionally schema-qualified) of the statistics to be altered. |
||||||
|
</para> |
||||||
|
</listitem> |
||||||
|
</varlistentry> |
||||||
|
|
||||||
|
<varlistentry> |
||||||
|
<term><replaceable class="PARAMETER">new_owner</replaceable></term> |
||||||
|
<listitem> |
||||||
|
<para> |
||||||
|
The user name of the new owner of the statistics. |
||||||
|
</para> |
||||||
|
</listitem> |
||||||
|
</varlistentry> |
||||||
|
|
||||||
|
<varlistentry> |
||||||
|
<term><replaceable class="parameter">new_name</replaceable></term> |
||||||
|
<listitem> |
||||||
|
<para> |
||||||
|
The new name for the statistics. |
||||||
|
</para> |
||||||
|
</listitem> |
||||||
|
</varlistentry> |
||||||
|
|
||||||
|
<varlistentry> |
||||||
|
<term><replaceable class="parameter">new_schema</replaceable></term> |
||||||
|
<listitem> |
||||||
|
<para> |
||||||
|
The new schema for the statistics. |
||||||
|
</para> |
||||||
|
</listitem> |
||||||
|
</varlistentry> |
||||||
|
|
||||||
|
</variablelist> |
||||||
|
</para> |
||||||
|
</refsect1> |
||||||
|
|
||||||
|
<refsect1> |
||||||
|
<title>Compatibility</title> |
||||||
|
|
||||||
|
<para> |
||||||
|
There's no <command>ALTER STATISTICS</command> command in the SQL standard. |
||||||
|
</para> |
||||||
|
</refsect1> |
||||||
|
|
||||||
|
<refsect1> |
||||||
|
<title>See Also</title> |
||||||
|
|
||||||
|
<simplelist type="inline"> |
||||||
|
<member><xref linkend="sql-createstatistics"></member> |
||||||
|
<member><xref linkend="sql-dropstatistics"></member> |
||||||
|
</simplelist> |
||||||
|
</refsect1> |
||||||
|
|
||||||
|
</refentry> |
@ -0,0 +1,155 @@ |
|||||||
|
<!-- |
||||||
|
doc/src/sgml/ref/create_statistics.sgml |
||||||
|
PostgreSQL documentation |
||||||
|
--> |
||||||
|
|
||||||
|
<refentry id="SQL-CREATESTATISTICS"> |
||||||
|
<indexterm zone="sql-createstatistics"> |
||||||
|
<primary>CREATE STATISTICS</primary> |
||||||
|
</indexterm> |
||||||
|
|
||||||
|
<refmeta> |
||||||
|
<refentrytitle>CREATE STATISTICS</refentrytitle> |
||||||
|
<manvolnum>7</manvolnum> |
||||||
|
<refmiscinfo>SQL - Language Statements</refmiscinfo> |
||||||
|
</refmeta> |
||||||
|
|
||||||
|
<refnamediv> |
||||||
|
<refname>CREATE STATISTICS</refname> |
||||||
|
<refpurpose>define extended statistics</refpurpose> |
||||||
|
</refnamediv> |
||||||
|
|
||||||
|
<refsynopsisdiv> |
||||||
|
<synopsis> |
||||||
|
CREATE STATISTICS [ IF NOT EXISTS ] <replaceable class="PARAMETER">statistics_name</replaceable> ON ( |
||||||
|
<replaceable class="PARAMETER">column_name</replaceable>, <replaceable class="PARAMETER">column_name</replaceable> [, ...]) |
||||||
|
FROM <replaceable class="PARAMETER">table_name</replaceable> |
||||||
|
</synopsis> |
||||||
|
|
||||||
|
</refsynopsisdiv> |
||||||
|
|
||||||
|
<refsect1 id="SQL-CREATESTATISTICS-description"> |
||||||
|
<title>Description</title> |
||||||
|
|
||||||
|
<para> |
||||||
|
<command>CREATE STATISTICS</command> will create a new extended statistics |
||||||
|
object on the specified table. |
||||||
|
The statistics will be created in the current database and |
||||||
|
will be owned by the user issuing the command. |
||||||
|
</para> |
||||||
|
|
||||||
|
<para> |
||||||
|
If a schema name is given (for example, <literal>CREATE STATISTICS |
||||||
|
myschema.mystat ...</>) then the statistics is created in the specified |
||||||
|
schema. Otherwise it is created in the current schema. The name of |
||||||
|
the statistics must be distinct from the name of any other statistics in the |
||||||
|
same schema. |
||||||
|
</para> |
||||||
|
</refsect1> |
||||||
|
|
||||||
|
<refsect1> |
||||||
|
<title>Parameters</title> |
||||||
|
|
||||||
|
<variablelist> |
||||||
|
|
||||||
|
<varlistentry> |
||||||
|
<term><literal>IF NOT EXISTS</></term> |
||||||
|
<listitem> |
||||||
|
<para> |
||||||
|
Do not throw an error if a statistics with the same name already exists. |
||||||
|
A notice is issued in this case. Note that only the name of the |
||||||
|
statistics object is considered here. The definition of the statistics is |
||||||
|
not considered. |
||||||
|
</para> |
||||||
|
</listitem> |
||||||
|
</varlistentry> |
||||||
|
|
||||||
|
<varlistentry> |
||||||
|
<term><replaceable class="PARAMETER">statistics_name</replaceable></term> |
||||||
|
<listitem> |
||||||
|
<para> |
||||||
|
The name (optionally schema-qualified) of the statistics to be created. |
||||||
|
</para> |
||||||
|
</listitem> |
||||||
|
</varlistentry> |
||||||
|
|
||||||
|
<varlistentry> |
||||||
|
<term><replaceable class="PARAMETER">column_name</replaceable></term> |
||||||
|
<listitem> |
||||||
|
<para> |
||||||
|
The name of a column to be included in the statistics. |
||||||
|
</para> |
||||||
|
</listitem> |
||||||
|
</varlistentry> |
||||||
|
|
||||||
|
<varlistentry> |
||||||
|
<term><replaceable class="PARAMETER">table_name</replaceable></term> |
||||||
|
<listitem> |
||||||
|
<para> |
||||||
|
The name (optionally schema-qualified) of the table the statistics should |
||||||
|
be created on. |
||||||
|
</para> |
||||||
|
</listitem> |
||||||
|
</varlistentry> |
||||||
|
|
||||||
|
</variablelist> |
||||||
|
|
||||||
|
</refsect1> |
||||||
|
|
||||||
|
<refsect1> |
||||||
|
<title>Notes</title> |
||||||
|
|
||||||
|
<para> |
||||||
|
You must be the owner of a table to create or change statistics on it. |
||||||
|
</para> |
||||||
|
</refsect1> |
||||||
|
|
||||||
|
<refsect1 id="SQL-CREATESTATISTICS-examples"> |
||||||
|
<title>Examples</title> |
||||||
|
|
||||||
|
<para> |
||||||
|
Create table <structname>t1</> with two functionally dependent columns, i.e. |
||||||
|
knowledge of a value in the first column is sufficient for determining the |
||||||
|
value in the other column. Then functional dependencies are built on those |
||||||
|
columns: |
||||||
|
|
||||||
|
<programlisting> |
||||||
|
CREATE TABLE t1 ( |
||||||
|
a int, |
||||||
|
b int |
||||||
|
); |
||||||
|
|
||||||
|
INSERT INTO t1 SELECT i/100, i/500 |
||||||
|
FROM generate_series(1,1000000) s(i); |
||||||
|
|
||||||
|
CREATE STATISTICS s1 ON (a, b) FROM t1; |
||||||
|
|
||||||
|
ANALYZE t1; |
||||||
|
|
||||||
|
-- valid combination of values |
||||||
|
EXPLAIN ANALYZE SELECT * FROM t1 WHERE (a = 1) AND (b = 0); |
||||||
|
|
||||||
|
-- invalid combination of values |
||||||
|
EXPLAIN ANALYZE SELECT * FROM t1 WHERE (a = 1) AND (b = 1); |
||||||
|
</programlisting> |
||||||
|
</para> |
||||||
|
|
||||||
|
</refsect1> |
||||||
|
|
||||||
|
<refsect1> |
||||||
|
<title>Compatibility</title> |
||||||
|
|
||||||
|
<para> |
||||||
|
There's no <command>CREATE STATISTICS</command> command in the SQL standard. |
||||||
|
</para> |
||||||
|
</refsect1> |
||||||
|
|
||||||
|
<refsect1> |
||||||
|
<title>See Also</title> |
||||||
|
|
||||||
|
<simplelist type="inline"> |
||||||
|
<member><xref linkend="sql-alterstatistics"></member> |
||||||
|
<member><xref linkend="sql-dropstatistics"></member> |
||||||
|
</simplelist> |
||||||
|
</refsect1> |
||||||
|
</refentry> |
@ -0,0 +1,98 @@ |
|||||||
|
<!-- |
||||||
|
doc/src/sgml/ref/drop_statistics.sgml |
||||||
|
PostgreSQL documentation |
||||||
|
--> |
||||||
|
|
||||||
|
<refentry id="SQL-DROPSTATISTICS"> |
||||||
|
<indexterm zone="sql-dropstatistics"> |
||||||
|
<primary>DROP STATISTICS</primary> |
||||||
|
</indexterm> |
||||||
|
|
||||||
|
<refmeta> |
||||||
|
<refentrytitle>DROP STATISTICS</refentrytitle> |
||||||
|
<manvolnum>7</manvolnum> |
||||||
|
<refmiscinfo>SQL - Language Statements</refmiscinfo> |
||||||
|
</refmeta> |
||||||
|
|
||||||
|
<refnamediv> |
||||||
|
<refname>DROP STATISTICS</refname> |
||||||
|
<refpurpose>remove extended statistics</refpurpose> |
||||||
|
</refnamediv> |
||||||
|
|
||||||
|
<refsynopsisdiv> |
||||||
|
<synopsis> |
||||||
|
DROP STATISTICS [ IF EXISTS ] <replaceable class="PARAMETER">name</replaceable> [, ...] |
||||||
|
</synopsis> |
||||||
|
</refsynopsisdiv> |
||||||
|
|
||||||
|
<refsect1> |
||||||
|
<title>Description</title> |
||||||
|
|
||||||
|
<para> |
||||||
|
<command>DROP STATISTICS</command> removes statistics from the database. |
||||||
|
Only the statistics owner, the schema owner, and superuser can drop a |
||||||
|
statistics. |
||||||
|
</para> |
||||||
|
|
||||||
|
</refsect1> |
||||||
|
|
||||||
|
<refsect1> |
||||||
|
<title>Parameters</title> |
||||||
|
|
||||||
|
<variablelist> |
||||||
|
<varlistentry> |
||||||
|
<term><literal>IF EXISTS</literal></term> |
||||||
|
<listitem> |
||||||
|
<para> |
||||||
|
Do not throw an error if the statistics do not exist. A notice is |
||||||
|
issued in this case. |
||||||
|
</para> |
||||||
|
</listitem> |
||||||
|
</varlistentry> |
||||||
|
|
||||||
|
<varlistentry> |
||||||
|
<term><replaceable class="PARAMETER">name</replaceable></term> |
||||||
|
<listitem> |
||||||
|
<para> |
||||||
|
The name (optionally schema-qualified) of the statistics to drop. |
||||||
|
</para> |
||||||
|
</listitem> |
||||||
|
</varlistentry> |
||||||
|
|
||||||
|
</variablelist> |
||||||
|
</refsect1> |
||||||
|
|
||||||
|
<refsect1> |
||||||
|
<title>Examples</title> |
||||||
|
|
||||||
|
<para> |
||||||
|
To destroy two statistics objects on different schemas, without failing |
||||||
|
if they don't exist: |
||||||
|
|
||||||
|
<programlisting> |
||||||
|
DROP STATISTICS IF EXISTS |
||||||
|
accounting.users_uid_creation, |
||||||
|
public.grants_user_role; |
||||||
|
</programlisting> |
||||||
|
</para> |
||||||
|
|
||||||
|
</refsect1> |
||||||
|
|
||||||
|
<refsect1> |
||||||
|
<title>Compatibility</title> |
||||||
|
|
||||||
|
<para> |
||||||
|
There's no <command>DROP STATISTICS</command> command in the SQL standard. |
||||||
|
</para> |
||||||
|
</refsect1> |
||||||
|
|
||||||
|
<refsect1> |
||||||
|
<title>See Also</title> |
||||||
|
|
||||||
|
<simplelist type="inline"> |
||||||
|
<member><xref linkend="sql-alterstatistics"></member> |
||||||
|
<member><xref linkend="sql-createstatistics"></member> |
||||||
|
</simplelist> |
||||||
|
</refsect1> |
||||||
|
|
||||||
|
</refentry> |
@ -0,0 +1,296 @@ |
|||||||
|
/*-------------------------------------------------------------------------
|
||||||
|
* |
||||||
|
* statscmds.c |
||||||
|
* Commands for creating and altering extended statistics |
||||||
|
* |
||||||
|
* Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group |
||||||
|
* Portions Copyright (c) 1994, Regents of the University of California |
||||||
|
* |
||||||
|
* |
||||||
|
* IDENTIFICATION |
||||||
|
* src/backend/commands/statscmds.c |
||||||
|
* |
||||||
|
*------------------------------------------------------------------------- |
||||||
|
*/ |
||||||
|
#include "postgres.h" |
||||||
|
|
||||||
|
#include "access/relscan.h" |
||||||
|
#include "catalog/dependency.h" |
||||||
|
#include "catalog/indexing.h" |
||||||
|
#include "catalog/namespace.h" |
||||||
|
#include "catalog/pg_namespace.h" |
||||||
|
#include "catalog/pg_statistic_ext.h" |
||||||
|
#include "commands/defrem.h" |
||||||
|
#include "miscadmin.h" |
||||||
|
#include "statistics/statistics.h" |
||||||
|
#include "utils/builtins.h" |
||||||
|
#include "utils/inval.h" |
||||||
|
#include "utils/memutils.h" |
||||||
|
#include "utils/rel.h" |
||||||
|
#include "utils/syscache.h" |
||||||
|
#include "utils/typcache.h" |
||||||
|
|
||||||
|
|
||||||
|
/* used for sorting the attnums in CreateStatistics */ |
||||||
|
static int |
||||||
|
compare_int16(const void *a, const void *b) |
||||||
|
{ |
||||||
|
return memcmp(a, b, sizeof(int16)); |
||||||
|
} |
||||||
|
|
||||||
|
/*
|
||||||
|
* CREATE STATISTICS |
||||||
|
*/ |
||||||
|
ObjectAddress |
||||||
|
CreateStatistics(CreateStatsStmt *stmt) |
||||||
|
{ |
||||||
|
int i; |
||||||
|
ListCell *l; |
||||||
|
int16 attnums[STATS_MAX_DIMENSIONS]; |
||||||
|
int numcols = 0; |
||||||
|
ObjectAddress address = InvalidObjectAddress; |
||||||
|
char *namestr; |
||||||
|
NameData staname; |
||||||
|
Oid statoid; |
||||||
|
Oid namespaceId; |
||||||
|
HeapTuple htup; |
||||||
|
Datum values[Natts_pg_statistic_ext]; |
||||||
|
bool nulls[Natts_pg_statistic_ext]; |
||||||
|
int2vector *stakeys; |
||||||
|
Relation statrel; |
||||||
|
Relation rel; |
||||||
|
Oid relid; |
||||||
|
ObjectAddress parentobject, |
||||||
|
childobject; |
||||||
|
Datum types[1]; /* only ndistinct defined now */ |
||||||
|
int ntypes; |
||||||
|
ArrayType *staenabled; |
||||||
|
bool build_ndistinct; |
||||||
|
bool requested_type = false; |
||||||
|
|
||||||
|
Assert(IsA(stmt, CreateStatsStmt)); |
||||||
|
|
||||||
|
/* resolve the pieces of the name (namespace etc.) */ |
||||||
|
namespaceId = QualifiedNameGetCreationNamespace(stmt->defnames, &namestr); |
||||||
|
namestrcpy(&staname, namestr); |
||||||
|
|
||||||
|
/*
|
||||||
|
* If if_not_exists was given and the statistics already exists, bail out. |
||||||
|
*/ |
||||||
|
if (SearchSysCacheExists2(STATEXTNAMENSP, |
||||||
|
PointerGetDatum(&staname), |
||||||
|
ObjectIdGetDatum(namespaceId))) |
||||||
|
{ |
||||||
|
if (stmt->if_not_exists) |
||||||
|
{ |
||||||
|
ereport(NOTICE, |
||||||
|
(errcode(ERRCODE_DUPLICATE_OBJECT), |
||||||
|
errmsg("statistics \"%s\" already exist, skipping", |
||||||
|
namestr))); |
||||||
|
return InvalidObjectAddress; |
||||||
|
} |
||||||
|
|
||||||
|
ereport(ERROR, |
||||||
|
(errcode(ERRCODE_DUPLICATE_OBJECT), |
||||||
|
errmsg("statistics \"%s\" already exist", namestr))); |
||||||
|
} |
||||||
|
|
||||||
|
rel = heap_openrv(stmt->relation, AccessExclusiveLock); |
||||||
|
relid = RelationGetRelid(rel); |
||||||
|
|
||||||
|
if (rel->rd_rel->relkind != RELKIND_RELATION && |
||||||
|
rel->rd_rel->relkind != RELKIND_MATVIEW) |
||||||
|
ereport(ERROR, |
||||||
|
(errcode(ERRCODE_WRONG_OBJECT_TYPE), |
||||||
|
errmsg("relation \"%s\" is not a table or materialized view", |
||||||
|
RelationGetRelationName(rel)))); |
||||||
|
|
||||||
|
/*
|
||||||
|
* Transform column names to array of attnums. While at it, enforce some |
||||||
|
* constraints. |
||||||
|
*/ |
||||||
|
foreach(l, stmt->keys) |
||||||
|
{ |
||||||
|
char *attname = strVal(lfirst(l)); |
||||||
|
HeapTuple atttuple; |
||||||
|
Form_pg_attribute attForm; |
||||||
|
TypeCacheEntry *type; |
||||||
|
|
||||||
|
atttuple = SearchSysCacheAttName(relid, attname); |
||||||
|
if (!HeapTupleIsValid(atttuple)) |
||||||
|
ereport(ERROR, |
||||||
|
(errcode(ERRCODE_UNDEFINED_COLUMN), |
||||||
|
errmsg("column \"%s\" referenced in statistics does not exist", |
||||||
|
attname))); |
||||||
|
attForm = (Form_pg_attribute) GETSTRUCT(atttuple); |
||||||
|
|
||||||
|
/* Disallow use of system attributes in extended stats */ |
||||||
|
if (attForm->attnum < 0) |
||||||
|
ereport(ERROR, |
||||||
|
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED), |
||||||
|
errmsg("statistic creation on system columns is not supported"))); |
||||||
|
|
||||||
|
/* Disallow data types without a less-than operator */ |
||||||
|
type = lookup_type_cache(attForm->atttypid, TYPECACHE_LT_OPR); |
||||||
|
if (type->lt_opr == InvalidOid) |
||||||
|
ereport(ERROR, |
||||||
|
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED), |
||||||
|
errmsg("only scalar types can be used in extended statistics"))); |
||||||
|
|
||||||
|
/* Make sure no more than STATS_MAX_DIMENSIONS columns are used */ |
||||||
|
if (numcols >= STATS_MAX_DIMENSIONS) |
||||||
|
ereport(ERROR, |
||||||
|
(errcode(ERRCODE_TOO_MANY_COLUMNS), |
||||||
|
errmsg("cannot have more than %d keys in statistics", |
||||||
|
STATS_MAX_DIMENSIONS))); |
||||||
|
|
||||||
|
attnums[numcols] = ((Form_pg_attribute) GETSTRUCT(atttuple))->attnum; |
||||||
|
ReleaseSysCache(atttuple); |
||||||
|
numcols++; |
||||||
|
} |
||||||
|
|
||||||
|
/*
|
||||||
|
* Check that at least two columns were specified in the statement. The |
||||||
|
* upper bound was already checked in the loop above. |
||||||
|
*/ |
||||||
|
if (numcols < 2) |
||||||
|
ereport(ERROR, |
||||||
|
(errcode(ERRCODE_TOO_MANY_COLUMNS), |
||||||
|
errmsg("statistics require at least 2 columns"))); |
||||||
|
|
||||||
|
/*
|
||||||
|
* Sort the attnums, which makes detecting duplicies somewhat easier, and |
||||||
|
* it does not hurt (it does not affect the efficiency, unlike for |
||||||
|
* indexes, for example). |
||||||
|
*/ |
||||||
|
qsort(attnums, numcols, sizeof(int16), compare_int16); |
||||||
|
|
||||||
|
/*
|
||||||
|
* Look for duplicities in the list of columns. The attnums are sorted so |
||||||
|
* just check consecutive elements. |
||||||
|
*/ |
||||||
|
for (i = 1; i < numcols; i++) |
||||||
|
if (attnums[i] == attnums[i - 1]) |
||||||
|
ereport(ERROR, |
||||||
|
(errcode(ERRCODE_UNDEFINED_COLUMN), |
||||||
|
errmsg("duplicate column name in statistics definition"))); |
||||||
|
|
||||||
|
stakeys = buildint2vector(attnums, numcols); |
||||||
|
|
||||||
|
/*
|
||||||
|
* Parse the statistics options. Currently only statistics types are |
||||||
|
* recognized. |
||||||
|
*/ |
||||||
|
build_ndistinct = false; |
||||||
|
foreach(l, stmt->options) |
||||||
|
{ |
||||||
|
DefElem *opt = (DefElem *) lfirst(l); |
||||||
|
|
||||||
|
if (strcmp(opt->defname, "ndistinct") == 0) |
||||||
|
{ |
||||||
|
build_ndistinct = defGetBoolean(opt); |
||||||
|
requested_type = true; |
||||||
|
} |
||||||
|
else |
||||||
|
ereport(ERROR, |
||||||
|
(errcode(ERRCODE_SYNTAX_ERROR), |
||||||
|
errmsg("unrecognized STATISTICS option \"%s\"", |
||||||
|
opt->defname))); |
||||||
|
} |
||||||
|
/* If no statistic type was specified, build them all. */ |
||||||
|
if (!requested_type) |
||||||
|
build_ndistinct = true; |
||||||
|
|
||||||
|
/* construct the char array of enabled statistic types */ |
||||||
|
ntypes = 0; |
||||||
|
if (build_ndistinct) |
||||||
|
types[ntypes++] = CharGetDatum(STATS_EXT_NDISTINCT); |
||||||
|
Assert(ntypes > 0); |
||||||
|
staenabled = construct_array(types, ntypes, CHAROID, 1, true, 'c'); |
||||||
|
|
||||||
|
/*
|
||||||
|
* Everything seems fine, so let's build the pg_statistic_ext tuple. |
||||||
|
*/ |
||||||
|
memset(values, 0, sizeof(values)); |
||||||
|
memset(nulls, false, sizeof(nulls)); |
||||||
|
values[Anum_pg_statistic_ext_starelid - 1] = ObjectIdGetDatum(relid); |
||||||
|
values[Anum_pg_statistic_ext_staname - 1] = NameGetDatum(&staname); |
||||||
|
values[Anum_pg_statistic_ext_stanamespace - 1] = ObjectIdGetDatum(namespaceId); |
||||||
|
values[Anum_pg_statistic_ext_staowner - 1] = ObjectIdGetDatum(GetUserId()); |
||||||
|
values[Anum_pg_statistic_ext_stakeys - 1] = PointerGetDatum(stakeys); |
||||||
|
values[Anum_pg_statistic_ext_staenabled - 1] = PointerGetDatum(staenabled); |
||||||
|
|
||||||
|
/* no statistics build yet */ |
||||||
|
nulls[Anum_pg_statistic_ext_standistinct - 1] = true; |
||||||
|
|
||||||
|
/* insert it into pg_statistic_ext */ |
||||||
|
statrel = heap_open(StatisticExtRelationId, RowExclusiveLock); |
||||||
|
htup = heap_form_tuple(statrel->rd_att, values, nulls); |
||||||
|
CatalogTupleInsert(statrel, htup); |
||||||
|
statoid = HeapTupleGetOid(htup); |
||||||
|
heap_freetuple(htup); |
||||||
|
heap_close(statrel, RowExclusiveLock); |
||||||
|
relation_close(rel, NoLock); |
||||||
|
|
||||||
|
/*
|
||||||
|
* Add a dependency on a table, so that stats get dropped on DROP TABLE. |
||||||
|
*/ |
||||||
|
ObjectAddressSet(parentobject, RelationRelationId, relid); |
||||||
|
ObjectAddressSet(childobject, StatisticExtRelationId, statoid); |
||||||
|
recordDependencyOn(&childobject, &parentobject, DEPENDENCY_AUTO); |
||||||
|
|
||||||
|
/*
|
||||||
|
* Also add dependency on the schema. This is required to ensure that we |
||||||
|
* drop the statistics on DROP SCHEMA. This is not handled automatically |
||||||
|
* by DROP TABLE because the statistics are not an object in the table's |
||||||
|
* schema. |
||||||
|
*/ |
||||||
|
ObjectAddressSet(parentobject, NamespaceRelationId, namespaceId); |
||||||
|
recordDependencyOn(&childobject, &parentobject, DEPENDENCY_AUTO); |
||||||
|
|
||||||
|
ObjectAddressSet(address, StatisticExtRelationId, statoid); |
||||||
|
|
||||||
|
/*
|
||||||
|
* Invalidate relcache so that others see the new statistics. |
||||||
|
*/ |
||||||
|
CacheInvalidateRelcache(rel); |
||||||
|
|
||||||
|
return address; |
||||||
|
} |
||||||
|
|
||||||
|
/*
|
||||||
|
* Guts of statistics deletion. |
||||||
|
*/ |
||||||
|
void |
||||||
|
RemoveStatisticsById(Oid statsOid) |
||||||
|
{ |
||||||
|
Relation relation; |
||||||
|
Oid relid; |
||||||
|
Relation rel; |
||||||
|
HeapTuple tup; |
||||||
|
Form_pg_statistic_ext statext; |
||||||
|
|
||||||
|
/*
|
||||||
|
* Delete the pg_statistic_ext tuple. |
||||||
|
*/ |
||||||
|
relation = heap_open(StatisticExtRelationId, RowExclusiveLock); |
||||||
|
|
||||||
|
tup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(statsOid)); |
||||||
|
|
||||||
|
if (!HeapTupleIsValid(tup)) /* should not happen */ |
||||||
|
elog(ERROR, "cache lookup failed for statistics %u", statsOid); |
||||||
|
|
||||||
|
statext = (Form_pg_statistic_ext) GETSTRUCT(tup); |
||||||
|
relid = statext->starelid; |
||||||
|
|
||||||
|
rel = heap_open(relid, AccessExclusiveLock); |
||||||
|
|
||||||
|
simple_heap_delete(relation, &tup->t_self); |
||||||
|
|
||||||
|
CacheInvalidateRelcache(rel); |
||||||
|
|
||||||
|
ReleaseSysCache(tup); |
||||||
|
|
||||||
|
heap_close(relation, RowExclusiveLock); |
||||||
|
heap_close(rel, NoLock); |
||||||
|
} |
@ -0,0 +1,17 @@ |
|||||||
|
#-------------------------------------------------------------------------
|
||||||
|
#
|
||||||
|
# Makefile--
|
||||||
|
# Makefile for statistics
|
||||||
|
#
|
||||||
|
# IDENTIFICATION
|
||||||
|
# src/backend/statistics/Makefile
|
||||||
|
#
|
||||||
|
#-------------------------------------------------------------------------
|
||||||
|
|
||||||
|
subdir = src/backend/statistics
|
||||||
|
top_builddir = ../../..
|
||||||
|
include $(top_builddir)/src/Makefile.global |
||||||
|
|
||||||
|
OBJS = extended_stats.o mvdistinct.o
|
||||||
|
|
||||||
|
include $(top_srcdir)/src/backend/common.mk |
@ -0,0 +1,34 @@ |
|||||||
|
Extended statistics |
||||||
|
=================== |
||||||
|
|
||||||
|
When estimating various quantities (e.g. condition selectivities) the default |
||||||
|
approach relies on the assumption of independence. In practice that's often |
||||||
|
not true, resulting in estimation errors. |
||||||
|
|
||||||
|
Extended statistics track different types of dependencies between the columns, |
||||||
|
hopefully improving the estimates and producing better plans. |
||||||
|
|
||||||
|
Currently we only have one type of extended statistics - ndistinct |
||||||
|
coefficients, and we use it to improve estimates of grouping queries. See |
||||||
|
README.ndistinct for details. |
||||||
|
|
||||||
|
|
||||||
|
Size of sample in ANALYZE |
||||||
|
------------------------- |
||||||
|
When performing ANALYZE, the number of rows to sample is determined as |
||||||
|
|
||||||
|
(300 * statistics_target) |
||||||
|
|
||||||
|
That works reasonably well for statistics on individual columns, but perhaps |
||||||
|
it's not enough for extended statistics. Papers analyzing estimation errors |
||||||
|
all use samples proportional to the table (usually finding that 1-3% of the |
||||||
|
table is enough to build accurate stats). |
||||||
|
|
||||||
|
The requested accuracy (number of MCV items or histogram bins) should also |
||||||
|
be considered when determining the sample size, and in extended statistics |
||||||
|
those are not necessarily limited by statistics_target. |
||||||
|
|
||||||
|
This however merits further discussion, because collecting the sample is quite |
||||||
|
expensive and increasing it further would make ANALYZE even more painful. |
||||||
|
Judging by the experiments with the current implementation, the fixed size |
||||||
|
seems to work reasonably well for now, so we leave this as a future work. |
@ -0,0 +1,389 @@ |
|||||||
|
/*-------------------------------------------------------------------------
|
||||||
|
* |
||||||
|
* extended_stats.c |
||||||
|
* POSTGRES extended statistics |
||||||
|
* |
||||||
|
* Generic code supporting statistic objects created via CREATE STATISTICS. |
||||||
|
* |
||||||
|
* |
||||||
|
* Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group |
||||||
|
* Portions Copyright (c) 1994, Regents of the University of California |
||||||
|
* |
||||||
|
* IDENTIFICATION |
||||||
|
* src/backend/statistics/extended_stats.c |
||||||
|
* |
||||||
|
*------------------------------------------------------------------------- |
||||||
|
*/ |
||||||
|
#include "postgres.h" |
||||||
|
|
||||||
|
#include "access/genam.h" |
||||||
|
#include "access/heapam.h" |
||||||
|
#include "access/htup_details.h" |
||||||
|
#include "catalog/indexing.h" |
||||||
|
#include "catalog/pg_collation.h" |
||||||
|
#include "catalog/pg_statistic_ext.h" |
||||||
|
#include "nodes/relation.h" |
||||||
|
#include "statistics/extended_stats_internal.h" |
||||||
|
#include "statistics/statistics.h" |
||||||
|
#include "utils/builtins.h" |
||||||
|
#include "utils/fmgroids.h" |
||||||
|
#include "utils/lsyscache.h" |
||||||
|
#include "utils/rel.h" |
||||||
|
#include "utils/syscache.h" |
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Used internally to refer to an individual pg_statistic_ext entry. |
||||||
|
*/ |
||||||
|
typedef struct StatExtEntry |
||||||
|
{ |
||||||
|
Oid statOid; /* OID of pg_statistic_ext entry */ |
||||||
|
Bitmapset *columns; /* attribute numbers covered by the statistics */ |
||||||
|
List *types; /* 'char' list of enabled statistic kinds */ |
||||||
|
} StatExtEntry; |
||||||
|
|
||||||
|
|
||||||
|
static List *fetch_statentries_for_relation(Relation pg_statext, Oid relid); |
||||||
|
static VacAttrStats **lookup_var_attr_stats(Relation rel, Bitmapset *attrs, |
||||||
|
int natts, VacAttrStats **vacattrstats); |
||||||
|
static void statext_store(Relation pg_stext, Oid relid, |
||||||
|
MVNDistinct *ndistinct, |
||||||
|
VacAttrStats **stats); |
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Compute requested extended stats, using the rows sampled for the plain |
||||||
|
* (single-column) stats. |
||||||
|
* |
||||||
|
* This fetches a list of stats from pg_statistic_ext, computes the stats |
||||||
|
* and serializes them back into the catalog (as bytea values). |
||||||
|
*/ |
||||||
|
void |
||||||
|
BuildRelationExtStatistics(Relation onerel, double totalrows, |
||||||
|
int numrows, HeapTuple *rows, |
||||||
|
int natts, VacAttrStats **vacattrstats) |
||||||
|
{ |
||||||
|
Relation pg_stext; |
||||||
|
ListCell *lc; |
||||||
|
List *stats; |
||||||
|
|
||||||
|
pg_stext = heap_open(StatisticExtRelationId, RowExclusiveLock); |
||||||
|
stats = fetch_statentries_for_relation(pg_stext, RelationGetRelid(onerel)); |
||||||
|
|
||||||
|
foreach(lc, stats) |
||||||
|
{ |
||||||
|
StatExtEntry *stat = (StatExtEntry *) lfirst(lc); |
||||||
|
MVNDistinct *ndistinct = NULL; |
||||||
|
VacAttrStats **stats; |
||||||
|
ListCell *lc2; |
||||||
|
|
||||||
|
/* filter only the interesting vacattrstats records */ |
||||||
|
stats = lookup_var_attr_stats(onerel, stat->columns, |
||||||
|
natts, vacattrstats); |
||||||
|
|
||||||
|
/* check allowed number of dimensions */ |
||||||
|
Assert(bms_num_members(stat->columns) >= 2 && |
||||||
|
bms_num_members(stat->columns) <= STATS_MAX_DIMENSIONS); |
||||||
|
|
||||||
|
/* compute statistic of each type */ |
||||||
|
foreach(lc2, stat->types) |
||||||
|
{ |
||||||
|
char t = (char) lfirst_int(lc2); |
||||||
|
|
||||||
|
if (t == STATS_EXT_NDISTINCT) |
||||||
|
ndistinct = statext_ndistinct_build(totalrows, numrows, rows, |
||||||
|
stat->columns, stats); |
||||||
|
} |
||||||
|
|
||||||
|
/* store the statistics in the catalog */ |
||||||
|
statext_store(pg_stext, stat->statOid, ndistinct, stats); |
||||||
|
} |
||||||
|
|
||||||
|
heap_close(pg_stext, RowExclusiveLock); |
||||||
|
} |
||||||
|
|
||||||
|
/*
|
||||||
|
* statext_is_kind_built |
||||||
|
* Is this stat kind built in the given pg_statistic_ext tuple? |
||||||
|
*/ |
||||||
|
bool |
||||||
|
statext_is_kind_built(HeapTuple htup, char type) |
||||||
|
{ |
||||||
|
AttrNumber attnum; |
||||||
|
|
||||||
|
switch (type) |
||||||
|
{ |
||||||
|
case STATS_EXT_NDISTINCT: |
||||||
|
attnum = Anum_pg_statistic_ext_standistinct; |
||||||
|
break; |
||||||
|
|
||||||
|
default: |
||||||
|
elog(ERROR, "unexpected statistics type requested: %d", type); |
||||||
|
} |
||||||
|
|
||||||
|
return !heap_attisnull(htup, attnum); |
||||||
|
} |
||||||
|
|
||||||
|
/*
|
||||||
|
* Return a list (of StatExtEntry) of statistics for the given relation. |
||||||
|
*/ |
||||||
|
static List * |
||||||
|
fetch_statentries_for_relation(Relation pg_statext, Oid relid) |
||||||
|
{ |
||||||
|
SysScanDesc scan; |
||||||
|
ScanKeyData skey; |
||||||
|
HeapTuple htup; |
||||||
|
List *result = NIL; |
||||||
|
|
||||||
|
/*
|
||||||
|
* Prepare to scan pg_statistic_ext for entries having indrelid = this |
||||||
|
* rel. |
||||||
|
*/ |
||||||
|
ScanKeyInit(&skey, |
||||||
|
Anum_pg_statistic_ext_starelid, |
||||||
|
BTEqualStrategyNumber, F_OIDEQ, |
||||||
|
ObjectIdGetDatum(relid)); |
||||||
|
|
||||||
|
scan = systable_beginscan(pg_statext, StatisticExtRelidIndexId, true, |
||||||
|
NULL, 1, &skey); |
||||||
|
|
||||||
|
while (HeapTupleIsValid(htup = systable_getnext(scan))) |
||||||
|
{ |
||||||
|
StatExtEntry *entry; |
||||||
|
Datum datum; |
||||||
|
bool isnull; |
||||||
|
int i; |
||||||
|
ArrayType *arr; |
||||||
|
char *enabled; |
||||||
|
Form_pg_statistic_ext staForm; |
||||||
|
|
||||||
|
entry = palloc0(sizeof(StatExtEntry)); |
||||||
|
entry->statOid = HeapTupleGetOid(htup); |
||||||
|
staForm = (Form_pg_statistic_ext) GETSTRUCT(htup); |
||||||
|
for (i = 0; i < staForm->stakeys.dim1; i++) |
||||||
|
{ |
||||||
|
entry->columns = bms_add_member(entry->columns, |
||||||
|
staForm->stakeys.values[i]); |
||||||
|
} |
||||||
|
|
||||||
|
/* decode the staenabled char array into a list of chars */ |
||||||
|
datum = SysCacheGetAttr(STATEXTOID, htup, |
||||||
|
Anum_pg_statistic_ext_staenabled, &isnull); |
||||||
|
Assert(!isnull); |
||||||
|
arr = DatumGetArrayTypeP(datum); |
||||||
|
if (ARR_NDIM(arr) != 1 || |
||||||
|
ARR_HASNULL(arr) || |
||||||
|
ARR_ELEMTYPE(arr) != CHAROID) |
||||||
|
elog(ERROR, "staenabled is not a 1-D char array"); |
||||||
|
enabled = (char *) ARR_DATA_PTR(arr); |
||||||
|
for (i = 0; i < ARR_DIMS(arr)[0]; i++) |
||||||
|
{ |
||||||
|
Assert(enabled[i] == STATS_EXT_NDISTINCT); |
||||||
|
entry->types = lappend_int(entry->types, (int) enabled[i]); |
||||||
|
} |
||||||
|
|
||||||
|
result = lappend(result, entry); |
||||||
|
} |
||||||
|
|
||||||
|
systable_endscan(scan); |
||||||
|
|
||||||
|
return result; |
||||||
|
} |
||||||
|
|
||||||
|
/*
|
||||||
|
* Using 'vacattrstats' of size 'natts' as input data, return a newly built |
||||||
|
* VacAttrStats array which includes only the items corresponding to attributes |
||||||
|
* indicated by 'attrs'. |
||||||
|
*/ |
||||||
|
static VacAttrStats ** |
||||||
|
lookup_var_attr_stats(Relation rel, Bitmapset *attrs, int natts, |
||||||
|
VacAttrStats **vacattrstats) |
||||||
|
{ |
||||||
|
int i = 0; |
||||||
|
int x = -1; |
||||||
|
VacAttrStats **stats; |
||||||
|
Bitmapset *matched = NULL; |
||||||
|
|
||||||
|
stats = (VacAttrStats **) |
||||||
|
palloc(bms_num_members(attrs) * sizeof(VacAttrStats *)); |
||||||
|
|
||||||
|
/* lookup VacAttrStats info for the requested columns (same attnum) */ |
||||||
|
while ((x = bms_next_member(attrs, x)) >= 0) |
||||||
|
{ |
||||||
|
int j; |
||||||
|
|
||||||
|
stats[i] = NULL; |
||||||
|
for (j = 0; j < natts; j++) |
||||||
|
{ |
||||||
|
if (x == vacattrstats[j]->tupattnum) |
||||||
|
{ |
||||||
|
stats[i] = vacattrstats[j]; |
||||||
|
break; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
if (!stats[i]) |
||||||
|
ereport(ERROR, |
||||||
|
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION), |
||||||
|
errmsg("extended statistics could not be collected for column \"%s\" of relation %s.%s", |
||||||
|
NameStr(RelationGetDescr(rel)->attrs[x - 1]->attname), |
||||||
|
get_namespace_name(rel->rd_rel->relnamespace), |
||||||
|
RelationGetRelationName(rel)), |
||||||
|
errhint("Consider ALTER TABLE \"%s\".\"%s\" ALTER \"%s\" SET STATISTICS -1", |
||||||
|
get_namespace_name(rel->rd_rel->relnamespace), |
||||||
|
RelationGetRelationName(rel), |
||||||
|
NameStr(RelationGetDescr(rel)->attrs[x - 1]->attname)))); |
||||||
|
|
||||||
|
/*
|
||||||
|
* Check that we found a non-dropped column and that the attnum |
||||||
|
* matches. |
||||||
|
*/ |
||||||
|
Assert(!stats[i]->attr->attisdropped); |
||||||
|
matched = bms_add_member(matched, stats[i]->tupattnum); |
||||||
|
|
||||||
|
i++; |
||||||
|
} |
||||||
|
if (bms_subset_compare(matched, attrs) != BMS_EQUAL) |
||||||
|
elog(ERROR, "could not find all attributes in attribute stats array"); |
||||||
|
bms_free(matched); |
||||||
|
|
||||||
|
return stats; |
||||||
|
} |
||||||
|
|
||||||
|
/*
|
||||||
|
* statext_store |
||||||
|
* Serializes the statistics and stores them into the pg_statistic_ext tuple. |
||||||
|
*/ |
||||||
|
static void |
||||||
|
statext_store(Relation pg_stext, Oid statOid, |
||||||
|
MVNDistinct *ndistinct, |
||||||
|
VacAttrStats **stats) |
||||||
|
{ |
||||||
|
HeapTuple stup, |
||||||
|
oldtup; |
||||||
|
Datum values[Natts_pg_statistic_ext]; |
||||||
|
bool nulls[Natts_pg_statistic_ext]; |
||||||
|
bool replaces[Natts_pg_statistic_ext]; |
||||||
|
|
||||||
|
memset(nulls, 1, Natts_pg_statistic_ext * sizeof(bool)); |
||||||
|
memset(replaces, 0, Natts_pg_statistic_ext * sizeof(bool)); |
||||||
|
memset(values, 0, Natts_pg_statistic_ext * sizeof(Datum)); |
||||||
|
|
||||||
|
/*
|
||||||
|
* Construct a new pg_statistic_ext tuple, replacing the calculated stats. |
||||||
|
*/ |
||||||
|
if (ndistinct != NULL) |
||||||
|
{ |
||||||
|
bytea *data = statext_ndistinct_serialize(ndistinct); |
||||||
|
|
||||||
|
nulls[Anum_pg_statistic_ext_standistinct - 1] = (data == NULL); |
||||||
|
values[Anum_pg_statistic_ext_standistinct - 1] = PointerGetDatum(data); |
||||||
|
} |
||||||
|
|
||||||
|
/* always replace the value (either by bytea or NULL) */ |
||||||
|
replaces[Anum_pg_statistic_ext_standistinct - 1] = true; |
||||||
|
|
||||||
|
/* there should already be a pg_statistic_ext tuple */ |
||||||
|
oldtup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(statOid)); |
||||||
|
if (!HeapTupleIsValid(oldtup)) |
||||||
|
elog(ERROR, "cache lookup failed for extended statistics %u", statOid); |
||||||
|
|
||||||
|
/* replace it */ |
||||||
|
stup = heap_modify_tuple(oldtup, |
||||||
|
RelationGetDescr(pg_stext), |
||||||
|
values, |
||||||
|
nulls, |
||||||
|
replaces); |
||||||
|
ReleaseSysCache(oldtup); |
||||||
|
CatalogTupleUpdate(pg_stext, &stup->t_self, stup); |
||||||
|
|
||||||
|
heap_freetuple(stup); |
||||||
|
} |
||||||
|
|
||||||
|
/* initialize multi-dimensional sort */ |
||||||
|
MultiSortSupport |
||||||
|
multi_sort_init(int ndims) |
||||||
|
{ |
||||||
|
MultiSortSupport mss; |
||||||
|
|
||||||
|
Assert(ndims >= 2); |
||||||
|
|
||||||
|
mss = (MultiSortSupport) palloc0(offsetof(MultiSortSupportData, ssup) |
||||||
|
+sizeof(SortSupportData) * ndims); |
||||||
|
|
||||||
|
mss->ndims = ndims; |
||||||
|
|
||||||
|
return mss; |
||||||
|
} |
||||||
|
|
||||||
|
/*
|
||||||
|
* Prepare sort support info using the given sort operator |
||||||
|
* at the position 'sortdim' |
||||||
|
*/ |
||||||
|
void |
||||||
|
multi_sort_add_dimension(MultiSortSupport mss, int sortdim, Oid oper) |
||||||
|
{ |
||||||
|
SortSupport ssup = &mss->ssup[sortdim]; |
||||||
|
|
||||||
|
ssup->ssup_cxt = CurrentMemoryContext; |
||||||
|
ssup->ssup_collation = DEFAULT_COLLATION_OID; |
||||||
|
ssup->ssup_nulls_first = false; |
||||||
|
ssup->ssup_cxt = CurrentMemoryContext; |
||||||
|
|
||||||
|
PrepareSortSupportFromOrderingOp(oper, ssup); |
||||||
|
} |
||||||
|
|
||||||
|
/* compare all the dimensions in the selected order */ |
||||||
|
int |
||||||
|
multi_sort_compare(const void *a, const void *b, void *arg) |
||||||
|
{ |
||||||
|
MultiSortSupport mss = (MultiSortSupport) arg; |
||||||
|
SortItem *ia = (SortItem *) a; |
||||||
|
SortItem *ib = (SortItem *) b; |
||||||
|
int i; |
||||||
|
|
||||||
|
for (i = 0; i < mss->ndims; i++) |
||||||
|
{ |
||||||
|
int compare; |
||||||
|
|
||||||
|
compare = ApplySortComparator(ia->values[i], ia->isnull[i], |
||||||
|
ib->values[i], ib->isnull[i], |
||||||
|
&mss->ssup[i]); |
||||||
|
|
||||||
|
if (compare != 0) |
||||||
|
return compare; |
||||||
|
} |
||||||
|
|
||||||
|
/* equal by default */ |
||||||
|
return 0; |
||||||
|
} |
||||||
|
|
||||||
|
/* compare selected dimension */ |
||||||
|
int |
||||||
|
multi_sort_compare_dim(int dim, const SortItem *a, const SortItem *b, |
||||||
|
MultiSortSupport mss) |
||||||
|
{ |
||||||
|
return ApplySortComparator(a->values[dim], a->isnull[dim], |
||||||
|
b->values[dim], b->isnull[dim], |
||||||
|
&mss->ssup[dim]); |
||||||
|
} |
||||||
|
|
||||||
|
int |
||||||
|
multi_sort_compare_dims(int start, int end, |
||||||
|
const SortItem *a, const SortItem *b, |
||||||
|
MultiSortSupport mss) |
||||||
|
{ |
||||||
|
int dim; |
||||||
|
|
||||||
|
for (dim = start; dim <= end; dim++) |
||||||
|
{ |
||||||
|
int r = ApplySortComparator(a->values[dim], a->isnull[dim], |
||||||
|
b->values[dim], b->isnull[dim], |
||||||
|
&mss->ssup[dim]); |
||||||
|
|
||||||
|
if (r != 0) |
||||||
|
return r; |
||||||
|
} |
||||||
|
|
||||||
|
return 0; |
||||||
|
} |
@ -0,0 +1,671 @@ |
|||||||
|
/*-------------------------------------------------------------------------
|
||||||
|
* |
||||||
|
* mvdistinct.c |
||||||
|
* POSTGRES multivariate ndistinct coefficients |
||||||
|
* |
||||||
|
* Estimating number of groups in a combination of columns (e.g. for GROUP BY) |
||||||
|
* is tricky, and the estimation error is often significant. |
||||||
|
|
||||||
|
* The multivariate ndistinct coefficients address this by storing ndistinct |
||||||
|
* estimates for combinations of the user-specified columns. So for example |
||||||
|
* given a statistics object on three columns (a,b,c), this module estimates |
||||||
|
* and store n-distinct for (a,b), (a,c), (b,c) and (a,b,c). The per-column |
||||||
|
* estimates are already available in pg_statistic. |
||||||
|
* |
||||||
|
* |
||||||
|
* Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group |
||||||
|
* Portions Copyright (c) 1994, Regents of the University of California |
||||||
|
* |
||||||
|
* IDENTIFICATION |
||||||
|
* src/backend/statistics/mvdistinct.c |
||||||
|
*------------------------------------------------------------------------- |
||||||
|
*/ |
||||||
|
#include "postgres.h" |
||||||
|
|
||||||
|
#include <math.h> |
||||||
|
|
||||||
|
#include "access/htup_details.h" |
||||||
|
#include "catalog/pg_statistic_ext.h" |
||||||
|
#include "utils/fmgrprotos.h" |
||||||
|
#include "utils/lsyscache.h" |
||||||
|
#include "lib/stringinfo.h" |
||||||
|
#include "utils/syscache.h" |
||||||
|
#include "utils/typcache.h" |
||||||
|
#include "statistics/extended_stats_internal.h" |
||||||
|
#include "statistics/statistics.h" |
||||||
|
|
||||||
|
|
||||||
|
static double ndistinct_for_combination(double totalrows, int numrows, |
||||||
|
HeapTuple *rows, VacAttrStats **stats, |
||||||
|
int k, int *combination); |
||||||
|
static double estimate_ndistinct(double totalrows, int numrows, int d, int f1); |
||||||
|
static int n_choose_k(int n, int k); |
||||||
|
static int num_combinations(int n); |
||||||
|
|
||||||
|
/* Combination generator API */ |
||||||
|
|
||||||
|
/* internal state for generator of k-combinations of n elements */ |
||||||
|
typedef struct CombinationGenerator |
||||||
|
{ |
||||||
|
int k; /* size of the combination */ |
||||||
|
int n; /* total number of elements */ |
||||||
|
int current; /* index of the next combination to return */ |
||||||
|
int ncombinations; /* number of combinations (size of array) */ |
||||||
|
int *combinations; /* array of pre-built combinations */ |
||||||
|
} CombinationGenerator; |
||||||
|
|
||||||
|
static CombinationGenerator *generator_init(int n, int k); |
||||||
|
static void generator_free(CombinationGenerator *state); |
||||||
|
static int *generator_next(CombinationGenerator *state); |
||||||
|
static void generate_combinations(CombinationGenerator *state); |
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* statext_ndistinct_build |
||||||
|
* Compute ndistinct coefficient for the combination of attributes. |
||||||
|
* |
||||||
|
* This computes the ndistinct estimate using the same estimator used |
||||||
|
* in analyze.c and then computes the coefficient. |
||||||
|
*/ |
||||||
|
MVNDistinct * |
||||||
|
statext_ndistinct_build(double totalrows, int numrows, HeapTuple *rows, |
||||||
|
Bitmapset *attrs, VacAttrStats **stats) |
||||||
|
{ |
||||||
|
MVNDistinct *result; |
||||||
|
int k; |
||||||
|
int itemcnt; |
||||||
|
int numattrs = bms_num_members(attrs); |
||||||
|
int numcombs = num_combinations(numattrs); |
||||||
|
|
||||||
|
result = palloc(offsetof(MVNDistinct, items) + |
||||||
|
numcombs * sizeof(MVNDistinctItem)); |
||||||
|
result->magic = STATS_NDISTINCT_MAGIC; |
||||||
|
result->type = STATS_NDISTINCT_TYPE_BASIC; |
||||||
|
result->nitems = numcombs; |
||||||
|
|
||||||
|
itemcnt = 0; |
||||||
|
for (k = 2; k <= numattrs; k++) |
||||||
|
{ |
||||||
|
int *combination; |
||||||
|
CombinationGenerator *generator; |
||||||
|
|
||||||
|
/* generate combinations of K out of N elements */ |
||||||
|
generator = generator_init(numattrs, k); |
||||||
|
|
||||||
|
while ((combination = generator_next(generator))) |
||||||
|
{ |
||||||
|
MVNDistinctItem *item = &result->items[itemcnt]; |
||||||
|
int j; |
||||||
|
|
||||||
|
item->attrs = NULL; |
||||||
|
for (j = 0; j < k; j++) |
||||||
|
item->attrs = bms_add_member(item->attrs, |
||||||
|
stats[combination[j]]->attr->attnum); |
||||||
|
item->ndistinct = |
||||||
|
ndistinct_for_combination(totalrows, numrows, rows, |
||||||
|
stats, k, combination); |
||||||
|
|
||||||
|
itemcnt++; |
||||||
|
Assert(itemcnt <= result->nitems); |
||||||
|
} |
||||||
|
|
||||||
|
generator_free(generator); |
||||||
|
} |
||||||
|
|
||||||
|
/* must consume exactly the whole output array */ |
||||||
|
Assert(itemcnt == result->nitems); |
||||||
|
|
||||||
|
return result; |
||||||
|
} |
||||||
|
|
||||||
|
/*
|
||||||
|
* statext_ndistinct_load |
||||||
|
* Load the ndistinct value for the indicated pg_statistic_ext tuple |
||||||
|
*/ |
||||||
|
MVNDistinct * |
||||||
|
statext_ndistinct_load(Oid mvoid) |
||||||
|
{ |
||||||
|
bool isnull = false; |
||||||
|
Datum ndist; |
||||||
|
HeapTuple htup; |
||||||
|
|
||||||
|
htup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(mvoid)); |
||||||
|
if (!htup) |
||||||
|
elog(ERROR, "cache lookup failed for statistics %u", mvoid); |
||||||
|
|
||||||
|
ndist = SysCacheGetAttr(STATEXTOID, htup, |
||||||
|
Anum_pg_statistic_ext_standistinct, &isnull); |
||||||
|
if (isnull) |
||||||
|
elog(ERROR, |
||||||
|
"requested statistic kind %c not yet built for statistics %u", |
||||||
|
STATS_EXT_NDISTINCT, mvoid); |
||||||
|
|
||||||
|
ReleaseSysCache(htup); |
||||||
|
|
||||||
|
return statext_ndistinct_deserialize(DatumGetByteaP(ndist)); |
||||||
|
} |
||||||
|
|
||||||
|
/*
|
||||||
|
* statext_ndistinct_serialize |
||||||
|
* serialize ndistinct to the on-disk bytea format |
||||||
|
*/ |
||||||
|
bytea * |
||||||
|
statext_ndistinct_serialize(MVNDistinct *ndistinct) |
||||||
|
{ |
||||||
|
int i; |
||||||
|
bytea *output; |
||||||
|
char *tmp; |
||||||
|
Size len; |
||||||
|
|
||||||
|
Assert(ndistinct->magic == STATS_NDISTINCT_MAGIC); |
||||||
|
Assert(ndistinct->type == STATS_NDISTINCT_TYPE_BASIC); |
||||||
|
|
||||||
|
/*
|
||||||
|
* Base size is base struct size, plus one base struct for each items, |
||||||
|
* including number of items for each. |
||||||
|
*/ |
||||||
|
len = VARHDRSZ + offsetof(MVNDistinct, items) + |
||||||
|
ndistinct->nitems * (offsetof(MVNDistinctItem, attrs) + sizeof(int)); |
||||||
|
|
||||||
|
/* and also include space for the actual attribute numbers */ |
||||||
|
for (i = 0; i < ndistinct->nitems; i++) |
||||||
|
{ |
||||||
|
int nmembers; |
||||||
|
|
||||||
|
nmembers = bms_num_members(ndistinct->items[i].attrs); |
||||||
|
Assert(nmembers >= 2); |
||||||
|
len += sizeof(AttrNumber) * nmembers; |
||||||
|
} |
||||||
|
|
||||||
|
output = (bytea *) palloc(len); |
||||||
|
SET_VARSIZE(output, len); |
||||||
|
|
||||||
|
tmp = VARDATA(output); |
||||||
|
|
||||||
|
/* Store the base struct values */ |
||||||
|
memcpy(tmp, ndistinct, offsetof(MVNDistinct, items)); |
||||||
|
tmp += offsetof(MVNDistinct, items); |
||||||
|
|
||||||
|
/*
|
||||||
|
* store number of attributes and attribute numbers for each ndistinct |
||||||
|
* entry |
||||||
|
*/ |
||||||
|
for (i = 0; i < ndistinct->nitems; i++) |
||||||
|
{ |
||||||
|
MVNDistinctItem item = ndistinct->items[i]; |
||||||
|
int nmembers = bms_num_members(item.attrs); |
||||||
|
int x; |
||||||
|
|
||||||
|
memcpy(tmp, &item.ndistinct, sizeof(double)); |
||||||
|
tmp += sizeof(double); |
||||||
|
memcpy(tmp, &nmembers, sizeof(int)); |
||||||
|
tmp += sizeof(int); |
||||||
|
|
||||||
|
x = -1; |
||||||
|
while ((x = bms_next_member(item.attrs, x)) >= 0) |
||||||
|
{ |
||||||
|
AttrNumber value = (AttrNumber) x; |
||||||
|
|
||||||
|
memcpy(tmp, &value, sizeof(AttrNumber)); |
||||||
|
tmp += sizeof(AttrNumber); |
||||||
|
} |
||||||
|
|
||||||
|
Assert(tmp <= ((char *) output + len)); |
||||||
|
} |
||||||
|
|
||||||
|
return output; |
||||||
|
} |
||||||
|
|
||||||
|
/*
|
||||||
|
* statext_ndistinct_deserialize |
||||||
|
* Read an on-disk bytea format MVNDistinct to in-memory format |
||||||
|
*/ |
||||||
|
MVNDistinct * |
||||||
|
statext_ndistinct_deserialize(bytea *data) |
||||||
|
{ |
||||||
|
int i; |
||||||
|
Size expected_size; |
||||||
|
MVNDistinct *ndistinct; |
||||||
|
char *tmp; |
||||||
|
|
||||||
|
if (data == NULL) |
||||||
|
return NULL; |
||||||
|
|
||||||
|
if (VARSIZE_ANY_EXHDR(data) < offsetof(MVNDistinct, items)) |
||||||
|
elog(ERROR, "invalid MVNDistinct size %ld (expected at least %ld)", |
||||||
|
VARSIZE_ANY_EXHDR(data), offsetof(MVNDistinct, items)); |
||||||
|
|
||||||
|
/* read the MVNDistinct header */ |
||||||
|
ndistinct = (MVNDistinct *) palloc(sizeof(MVNDistinct)); |
||||||
|
|
||||||
|
/* initialize pointer to the data part (skip the varlena header) */ |
||||||
|
tmp = VARDATA_ANY(data); |
||||||
|
|
||||||
|
/* get the header and perform basic sanity checks */ |
||||||
|
memcpy(ndistinct, tmp, offsetof(MVNDistinct, items)); |
||||||
|
tmp += offsetof(MVNDistinct, items); |
||||||
|
|
||||||
|
if (ndistinct->magic != STATS_NDISTINCT_MAGIC) |
||||||
|
elog(ERROR, "invalid ndistinct magic %d (expected %d)", |
||||||
|
ndistinct->magic, STATS_NDISTINCT_MAGIC); |
||||||
|
|
||||||
|
if (ndistinct->type != STATS_NDISTINCT_TYPE_BASIC) |
||||||
|
elog(ERROR, "invalid ndistinct type %d (expected %d)", |
||||||
|
ndistinct->type, STATS_NDISTINCT_TYPE_BASIC); |
||||||
|
|
||||||
|
Assert(ndistinct->nitems > 0); |
||||||
|
|
||||||
|
/* what minimum bytea size do we expect for those parameters */ |
||||||
|
expected_size = offsetof(MVNDistinct, items) + |
||||||
|
ndistinct->nitems * (offsetof(MVNDistinctItem, attrs) + |
||||||
|
sizeof(AttrNumber) * 2); |
||||||
|
|
||||||
|
if (VARSIZE_ANY_EXHDR(data) < expected_size) |
||||||
|
elog(ERROR, "invalid dependencies size %ld (expected at least %ld)", |
||||||
|
VARSIZE_ANY_EXHDR(data), expected_size); |
||||||
|
|
||||||
|
/* allocate space for the ndistinct items */ |
||||||
|
ndistinct = repalloc(ndistinct, offsetof(MVNDistinct, items) + |
||||||
|
(ndistinct->nitems * sizeof(MVNDistinctItem))); |
||||||
|
|
||||||
|
for (i = 0; i < ndistinct->nitems; i++) |
||||||
|
{ |
||||||
|
MVNDistinctItem *item = &ndistinct->items[i]; |
||||||
|
int nelems; |
||||||
|
|
||||||
|
item->attrs = NULL; |
||||||
|
|
||||||
|
/* ndistinct value */ |
||||||
|
memcpy(&item->ndistinct, tmp, sizeof(double)); |
||||||
|
tmp += sizeof(double); |
||||||
|
|
||||||
|
/* number of attributes */ |
||||||
|
memcpy(&nelems, tmp, sizeof(int)); |
||||||
|
tmp += sizeof(int); |
||||||
|
Assert((nelems >= 2) && (nelems <= STATS_MAX_DIMENSIONS)); |
||||||
|
|
||||||
|
while (nelems-- > 0) |
||||||
|
{ |
||||||
|
AttrNumber attno; |
||||||
|
|
||||||
|
memcpy(&attno, tmp, sizeof(AttrNumber)); |
||||||
|
tmp += sizeof(AttrNumber); |
||||||
|
item->attrs = bms_add_member(item->attrs, attno); |
||||||
|
} |
||||||
|
|
||||||
|
/* still within the bytea */ |
||||||
|
Assert(tmp <= ((char *) data + VARSIZE_ANY(data))); |
||||||
|
} |
||||||
|
|
||||||
|
/* we should have consumed the whole bytea exactly */ |
||||||
|
Assert(tmp == ((char *) data + VARSIZE_ANY(data))); |
||||||
|
|
||||||
|
return ndistinct; |
||||||
|
} |
||||||
|
|
||||||
|
/*
|
||||||
|
* pg_ndistinct_in |
||||||
|
* input routine for type pg_ndistinct |
||||||
|
* |
||||||
|
* pg_ndistinct is real enough to be a table column, but it has no |
||||||
|
* operations of its own, and disallows input (jus like pg_node_tree). |
||||||
|
*/ |
||||||
|
Datum |
||||||
|
pg_ndistinct_in(PG_FUNCTION_ARGS) |
||||||
|
{ |
||||||
|
ereport(ERROR, |
||||||
|
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED), |
||||||
|
errmsg("cannot accept a value of type %s", "pg_ndistinct"))); |
||||||
|
|
||||||
|
PG_RETURN_VOID(); /* keep compiler quiet */ |
||||||
|
} |
||||||
|
|
||||||
|
/*
|
||||||
|
* pg_ndistinct |
||||||
|
* output routine for type pg_ndistinct |
||||||
|
* |
||||||
|
* Produces a human-readable representation of the value. |
||||||
|
*/ |
||||||
|
Datum |
||||||
|
pg_ndistinct_out(PG_FUNCTION_ARGS) |
||||||
|
{ |
||||||
|
bytea *data = PG_GETARG_BYTEA_PP(0); |
||||||
|
MVNDistinct *ndist = statext_ndistinct_deserialize(data); |
||||||
|
int i; |
||||||
|
StringInfoData str; |
||||||
|
|
||||||
|
initStringInfo(&str); |
||||||
|
appendStringInfoChar(&str, '['); |
||||||
|
|
||||||
|
for (i = 0; i < ndist->nitems; i++) |
||||||
|
{ |
||||||
|
MVNDistinctItem item = ndist->items[i]; |
||||||
|
|
||||||
|
if (i > 0) |
||||||
|
appendStringInfoString(&str, ", "); |
||||||
|
|
||||||
|
appendStringInfoChar(&str, '{'); |
||||||
|
outBitmapset(&str, item.attrs); |
||||||
|
appendStringInfo(&str, ", %f}", item.ndistinct); |
||||||
|
} |
||||||
|
|
||||||
|
appendStringInfoChar(&str, ']'); |
||||||
|
|
||||||
|
PG_RETURN_CSTRING(str.data); |
||||||
|
} |
||||||
|
|
||||||
|
/*
|
||||||
|
* pg_ndistinct_recv |
||||||
|
* binary input routine for type pg_ndistinct |
||||||
|
*/ |
||||||
|
Datum |
||||||
|
pg_ndistinct_recv(PG_FUNCTION_ARGS) |
||||||
|
{ |
||||||
|
ereport(ERROR, |
||||||
|
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED), |
||||||
|
errmsg("cannot accept a value of type %s", "pg_ndistinct"))); |
||||||
|
|
||||||
|
PG_RETURN_VOID(); /* keep compiler quiet */ |
||||||
|
} |
||||||
|
|
||||||
|
/*
|
||||||
|
* pg_ndistinct_send |
||||||
|
* binary output routine for type pg_ndistinct |
||||||
|
* |
||||||
|
* n-distinct is serialized into a bytea value, so let's send that. |
||||||
|
*/ |
||||||
|
Datum |
||||||
|
pg_ndistinct_send(PG_FUNCTION_ARGS) |
||||||
|
{ |
||||||
|
return byteasend(fcinfo); |
||||||
|
} |
||||||
|
|
||||||
|
/*
|
||||||
|
* ndistinct_for_combination |
||||||
|
* Estimates number of distinct values in a combination of columns. |
||||||
|
* |
||||||
|
* This uses the same ndistinct estimator as compute_scalar_stats() in |
||||||
|
* ANALYZE, i.e., |
||||||
|
* n*d / (n - f1 + f1*n/N) |
||||||
|
* |
||||||
|
* except that instead of values in a single column we are dealing with |
||||||
|
* combination of multiple columns. |
||||||
|
*/ |
||||||
|
static double |
||||||
|
ndistinct_for_combination(double totalrows, int numrows, HeapTuple *rows, |
||||||
|
VacAttrStats **stats, int k, int *combination) |
||||||
|
{ |
||||||
|
int i, |
||||||
|
j; |
||||||
|
int f1, |
||||||
|
cnt, |
||||||
|
d; |
||||||
|
bool *isnull; |
||||||
|
Datum *values; |
||||||
|
SortItem *items; |
||||||
|
MultiSortSupport mss; |
||||||
|
|
||||||
|
mss = multi_sort_init(k); |
||||||
|
|
||||||
|
/*
|
||||||
|
* In order to determine the number of distinct elements, create separate |
||||||
|
* values[]/isnull[] arrays with all the data we have, then sort them |
||||||
|
* using the specified column combination as dimensions. We could try to |
||||||
|
* sort in place, but it'd probably be more complex and bug-prone. |
||||||
|
*/ |
||||||
|
items = (SortItem *) palloc(numrows * sizeof(SortItem)); |
||||||
|
values = (Datum *) palloc0(sizeof(Datum) * numrows * k); |
||||||
|
isnull = (bool *) palloc0(sizeof(bool) * numrows * k); |
||||||
|
|
||||||
|
for (i = 0; i < numrows; i++) |
||||||
|
{ |
||||||
|
items[i].values = &values[i * k]; |
||||||
|
items[i].isnull = &isnull[i * k]; |
||||||
|
} |
||||||
|
|
||||||
|
/*
|
||||||
|
* For each dimension, set up sort-support and fill in the values from |
||||||
|
* the sample data. |
||||||
|
*/ |
||||||
|
for (i = 0; i < k; i++) |
||||||
|
{ |
||||||
|
VacAttrStats *colstat = stats[combination[i]]; |
||||||
|
TypeCacheEntry *type; |
||||||
|
|
||||||
|
type = lookup_type_cache(colstat->attrtypid, TYPECACHE_LT_OPR); |
||||||
|
if (type->lt_opr == InvalidOid) /* shouldn't happen */ |
||||||
|
elog(ERROR, "cache lookup failed for ordering operator for type %u", |
||||||
|
colstat->attrtypid); |
||||||
|
|
||||||
|
/* prepare the sort function for this dimension */ |
||||||
|
multi_sort_add_dimension(mss, i, type->lt_opr); |
||||||
|
|
||||||
|
/* accumulate all the data for this dimension into the arrays */ |
||||||
|
for (j = 0; j < numrows; j++) |
||||||
|
{ |
||||||
|
items[j].values[i] = |
||||||
|
heap_getattr(rows[j], |
||||||
|
colstat->attr->attnum, |
||||||
|
colstat->tupDesc, |
||||||
|
&items[j].isnull[i]); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
/* We can sort the array now ... */ |
||||||
|
qsort_arg((void *) items, numrows, sizeof(SortItem), |
||||||
|
multi_sort_compare, mss); |
||||||
|
|
||||||
|
/* ... and count the number of distinct combinations */ |
||||||
|
|
||||||
|
f1 = 0; |
||||||
|
cnt = 1; |
||||||
|
d = 1; |
||||||
|
for (i = 1; i < numrows; i++) |
||||||
|
{ |
||||||
|
if (multi_sort_compare(&items[i], &items[i - 1], mss) != 0) |
||||||
|
{ |
||||||
|
if (cnt == 1) |
||||||
|
f1 += 1; |
||||||
|
|
||||||
|
d++; |
||||||
|
cnt = 0; |
||||||
|
} |
||||||
|
|
||||||
|
cnt += 1; |
||||||
|
} |
||||||
|
|
||||||
|
if (cnt == 1) |
||||||
|
f1 += 1; |
||||||
|
|
||||||
|
return estimate_ndistinct(totalrows, numrows, d, f1); |
||||||
|
} |
||||||
|
|
||||||
|
/* The Duj1 estimator (already used in analyze.c). */ |
||||||
|
static double |
||||||
|
estimate_ndistinct(double totalrows, int numrows, int d, int f1) |
||||||
|
{ |
||||||
|
double numer, |
||||||
|
denom, |
||||||
|
ndistinct; |
||||||
|
|
||||||
|
numer = (double) numrows * (double) d; |
||||||
|
|
||||||
|
denom = (double) (numrows - f1) + |
||||||
|
(double) f1 *(double) numrows / totalrows; |
||||||
|
|
||||||
|
ndistinct = numer / denom; |
||||||
|
|
||||||
|
/* Clamp to sane range in case of roundoff error */ |
||||||
|
if (ndistinct < (double) d) |
||||||
|
ndistinct = (double) d; |
||||||
|
|
||||||
|
if (ndistinct > totalrows) |
||||||
|
ndistinct = totalrows; |
||||||
|
|
||||||
|
return floor(ndistinct + 0.5); |
||||||
|
} |
||||||
|
|
||||||
|
/*
|
||||||
|
* n_choose_k |
||||||
|
* computes binomial coefficients using an algorithm that is both |
||||||
|
* efficient and prevents overflows |
||||||
|
*/ |
||||||
|
static int |
||||||
|
n_choose_k(int n, int k) |
||||||
|
{ |
||||||
|
int d, |
||||||
|
r; |
||||||
|
|
||||||
|
Assert((k > 0) && (n >= k)); |
||||||
|
|
||||||
|
/* use symmetry of the binomial coefficients */ |
||||||
|
k = Min(k, n - k); |
||||||
|
|
||||||
|
r = 1; |
||||||
|
for (d = 1; d <= k; ++d) |
||||||
|
{ |
||||||
|
r *= n--; |
||||||
|
r /= d; |
||||||
|
} |
||||||
|
|
||||||
|
return r; |
||||||
|
} |
||||||
|
|
||||||
|
/*
|
||||||
|
* num_combinations |
||||||
|
* number of combinations, excluding single-value combinations |
||||||
|
*/ |
||||||
|
static int |
||||||
|
num_combinations(int n) |
||||||
|
{ |
||||||
|
int k; |
||||||
|
int ncombs = 1; |
||||||
|
|
||||||
|
for (k = 1; k <= n; k++) |
||||||
|
ncombs *= 2; |
||||||
|
|
||||||
|
ncombs -= (n + 1); |
||||||
|
|
||||||
|
return ncombs; |
||||||
|
} |
||||||
|
|
||||||
|
/*
|
||||||
|
* generator_init |
||||||
|
* initialize the generator of combinations |
||||||
|
* |
||||||
|
* The generator produces combinations of K elements in the interval (0..N). |
||||||
|
* We prebuild all the combinations in this method, which is simpler than |
||||||
|
* generating them on the fly. |
||||||
|
*/ |
||||||
|
static CombinationGenerator * |
||||||
|
generator_init(int n, int k) |
||||||
|
{ |
||||||
|
CombinationGenerator *state; |
||||||
|
|
||||||
|
Assert((n >= k) && (k > 0)); |
||||||
|
|
||||||
|
/* allocate the generator state as a single chunk of memory */ |
||||||
|
state = (CombinationGenerator *) palloc(sizeof(CombinationGenerator)); |
||||||
|
|
||||||
|
state->ncombinations = n_choose_k(n, k); |
||||||
|
|
||||||
|
/* pre-allocate space for all combinations*/ |
||||||
|
state->combinations = (int *) palloc(sizeof(int) * k * state->ncombinations); |
||||||
|
|
||||||
|
state->current = 0; |
||||||
|
state->k = k; |
||||||
|
state->n = n; |
||||||
|
|
||||||
|
/* now actually pre-generate all the combinations of K elements */ |
||||||
|
generate_combinations(state); |
||||||
|
|
||||||
|
/* make sure we got the expected number of combinations */ |
||||||
|
Assert(state->current == state->ncombinations); |
||||||
|
|
||||||
|
/* reset the number, so we start with the first one */ |
||||||
|
state->current = 0; |
||||||
|
|
||||||
|
return state; |
||||||
|
} |
||||||
|
|
||||||
|
/*
|
||||||
|
* generator_next |
||||||
|
* returns the next combination from the prebuilt list |
||||||
|
* |
||||||
|
* Returns a combination of K array indexes (0 .. N), as specified to |
||||||
|
* generator_init), or NULL when there are no more combination. |
||||||
|
*/ |
||||||
|
static int * |
||||||
|
generator_next(CombinationGenerator *state) |
||||||
|
{ |
||||||
|
if (state->current == state->ncombinations) |
||||||
|
return NULL; |
||||||
|
|
||||||
|
return &state->combinations[state->k * state->current++]; |
||||||
|
} |
||||||
|
|
||||||
|
/*
|
||||||
|
* generator_free |
||||||
|
* free the internal state of the generator |
||||||
|
* |
||||||
|
* Releases the generator internal state (pre-built combinations). |
||||||
|
*/ |
||||||
|
static void |
||||||
|
generator_free(CombinationGenerator *state) |
||||||
|
{ |
||||||
|
pfree(state->combinations); |
||||||
|
pfree(state); |
||||||
|
} |
||||||
|
|
||||||
|
/*
|
||||||
|
* generate_combinations_recurse |
||||||
|
* given a prefix, generate all possible combinations |
||||||
|
* |
||||||
|
* Given a prefix (first few elements of the combination), generate following |
||||||
|
* elements recursively. We generate the combinations in lexicographic order, |
||||||
|
* which eliminates permutations of the same combination. |
||||||
|
*/ |
||||||
|
static void |
||||||
|
generate_combinations_recurse(CombinationGenerator *state, |
||||||
|
int index, int start, int *current) |
||||||
|
{ |
||||||
|
/* If we haven't filled all the elements, simply recurse. */ |
||||||
|
if (index < state->k) |
||||||
|
{ |
||||||
|
int i; |
||||||
|
|
||||||
|
/*
|
||||||
|
* The values have to be in ascending order, so make sure we start |
||||||
|
* with the value passed by parameter. |
||||||
|
*/ |
||||||
|
|
||||||
|
for (i = start; i < state->n; i++) |
||||||
|
{ |
||||||
|
current[index] = i; |
||||||
|
generate_combinations_recurse(state, (index + 1), (i + 1), current); |
||||||
|
} |
||||||
|
|
||||||
|
return; |
||||||
|
} |
||||||
|
else |
||||||
|
{ |
||||||
|
/* we got a valid combination, add it to the array */ |
||||||
|
memcpy(&state->combinations[(state->k * state->current)], |
||||||
|
current, state->k * sizeof(int)); |
||||||
|
state->current++; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
/*
|
||||||
|
* generate_combinations |
||||||
|
* generate all k-combinations of N elements |
||||||
|
*/ |
||||||
|
static void |
||||||
|
generate_combinations(CombinationGenerator *state) |
||||||
|
{ |
||||||
|
int *current = (int *) palloc0(sizeof(int) * state->k); |
||||||
|
|
||||||
|
generate_combinations_recurse(state, 0, 0, current); |
||||||
|
|
||||||
|
pfree(current); |
||||||
|
} |
@ -0,0 +1,75 @@ |
|||||||
|
/*-------------------------------------------------------------------------
|
||||||
|
* |
||||||
|
* pg_statistic_ext.h |
||||||
|
* definition of the system "extended statistic" relation (pg_statistic_ext) |
||||||
|
* along with the relation's initial contents. |
||||||
|
* |
||||||
|
* |
||||||
|
* Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group |
||||||
|
* Portions Copyright (c) 1994, Regents of the University of California |
||||||
|
* |
||||||
|
* src/include/catalog/pg_statistic_ext.h |
||||||
|
* |
||||||
|
* NOTES |
||||||
|
* the genbki.pl script reads this file and generates .bki |
||||||
|
* information from the DATA() statements. |
||||||
|
* |
||||||
|
*------------------------------------------------------------------------- |
||||||
|
*/ |
||||||
|
#ifndef PG_STATISTIC_EXT_H |
||||||
|
#define PG_STATISTIC_EXT_H |
||||||
|
|
||||||
|
#include "catalog/genbki.h" |
||||||
|
|
||||||
|
/* ----------------
|
||||||
|
* pg_statistic_ext definition. cpp turns this into |
||||||
|
* typedef struct FormData_pg_statistic_ext |
||||||
|
* ---------------- |
||||||
|
*/ |
||||||
|
#define StatisticExtRelationId 3381 |
||||||
|
|
||||||
|
CATALOG(pg_statistic_ext,3381) |
||||||
|
{ |
||||||
|
/* These fields form the unique key for the entry: */ |
||||||
|
Oid starelid; /* relation containing attributes */ |
||||||
|
NameData staname; /* statistics name */ |
||||||
|
Oid stanamespace; /* OID of namespace containing this statistics */ |
||||||
|
Oid staowner; /* statistics owner */ |
||||||
|
|
||||||
|
/*
|
||||||
|
* variable-length fields start here, but we allow direct access to |
||||||
|
* stakeys |
||||||
|
*/ |
||||||
|
int2vector stakeys; /* array of column keys */ |
||||||
|
|
||||||
|
#ifdef CATALOG_VARLEN |
||||||
|
char staenabled[1] BKI_FORCE_NOT_NULL; /* statistic types
|
||||||
|
* requested to build */ |
||||||
|
pg_ndistinct standistinct; /* ndistinct coefficients (serialized) */ |
||||||
|
#endif |
||||||
|
|
||||||
|
} FormData_pg_statistic_ext; |
||||||
|
|
||||||
|
/* ----------------
|
||||||
|
* Form_pg_statistic_ext corresponds to a pointer to a tuple with |
||||||
|
* the format of pg_statistic_ext relation. |
||||||
|
* ---------------- |
||||||
|
*/ |
||||||
|
typedef FormData_pg_statistic_ext *Form_pg_statistic_ext; |
||||||
|
|
||||||
|
/* ----------------
|
||||||
|
* compiler constants for pg_statistic_ext |
||||||
|
* ---------------- |
||||||
|
*/ |
||||||
|
#define Natts_pg_statistic_ext 7 |
||||||
|
#define Anum_pg_statistic_ext_starelid 1 |
||||||
|
#define Anum_pg_statistic_ext_staname 2 |
||||||
|
#define Anum_pg_statistic_ext_stanamespace 3 |
||||||
|
#define Anum_pg_statistic_ext_staowner 4 |
||||||
|
#define Anum_pg_statistic_ext_stakeys 5 |
||||||
|
#define Anum_pg_statistic_ext_staenabled 6 |
||||||
|
#define Anum_pg_statistic_ext_standistinct 7 |
||||||
|
|
||||||
|
#define STATS_EXT_NDISTINCT 'd' |
||||||
|
|
||||||
|
#endif /* PG_STATISTIC_EXT_H */ |
@ -0,0 +1,64 @@ |
|||||||
|
/*-------------------------------------------------------------------------
|
||||||
|
* |
||||||
|
* extended_stats_internal.h |
||||||
|
* POSTGRES extended statistics internal declarations |
||||||
|
* |
||||||
|
* Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group |
||||||
|
* Portions Copyright (c) 1994, Regents of the University of California |
||||||
|
* |
||||||
|
* IDENTIFICATION |
||||||
|
* src/include/statistics/extended_stats_internal.h |
||||||
|
* |
||||||
|
*------------------------------------------------------------------------- |
||||||
|
*/ |
||||||
|
#ifndef EXTENDED_STATS_INTERNAL_H |
||||||
|
#define EXTENDED_STATS_INTERNAL_H |
||||||
|
|
||||||
|
#include "utils/sortsupport.h" |
||||||
|
#include "statistics/statistics.h" |
||||||
|
|
||||||
|
|
||||||
|
typedef struct |
||||||
|
{ |
||||||
|
Oid eqopr; /* '=' operator for datatype, if any */ |
||||||
|
Oid eqfunc; /* and associated function */ |
||||||
|
Oid ltopr; /* '<' operator for datatype, if any */ |
||||||
|
} StdAnalyzeData; |
||||||
|
|
||||||
|
typedef struct |
||||||
|
{ |
||||||
|
Datum value; /* a data value */ |
||||||
|
int tupno; /* position index for tuple it came from */ |
||||||
|
} ScalarItem; |
||||||
|
|
||||||
|
/* multi-sort */ |
||||||
|
typedef struct MultiSortSupportData |
||||||
|
{ |
||||||
|
int ndims; /* number of dimensions supported by the */ |
||||||
|
SortSupportData ssup[1]; /* sort support data for each dimension */ |
||||||
|
} MultiSortSupportData; |
||||||
|
|
||||||
|
typedef MultiSortSupportData *MultiSortSupport; |
||||||
|
|
||||||
|
typedef struct SortItem |
||||||
|
{ |
||||||
|
Datum *values; |
||||||
|
bool *isnull; |
||||||
|
} SortItem; |
||||||
|
|
||||||
|
extern MVNDistinct *statext_ndistinct_build(double totalrows, |
||||||
|
int numrows, HeapTuple *rows, |
||||||
|
Bitmapset *attrs, VacAttrStats **stats); |
||||||
|
extern bytea *statext_ndistinct_serialize(MVNDistinct *ndistinct); |
||||||
|
extern MVNDistinct *statext_ndistinct_deserialize(bytea *data); |
||||||
|
|
||||||
|
extern MultiSortSupport multi_sort_init(int ndims); |
||||||
|
extern void multi_sort_add_dimension(MultiSortSupport mss, int sortdim, |
||||||
|
Oid oper); |
||||||
|
extern int multi_sort_compare(const void *a, const void *b, void *arg); |
||||||
|
extern int multi_sort_compare_dim(int dim, const SortItem * a, |
||||||
|
const SortItem * b, MultiSortSupport mss); |
||||||
|
extern int multi_sort_compare_dims(int start, int end, const SortItem * a, |
||||||
|
const SortItem * b, MultiSortSupport mss); |
||||||
|
|
||||||
|
#endif /* EXTENDED_STATS_INTERNAL_H */ |
@ -0,0 +1,47 @@ |
|||||||
|
/*-------------------------------------------------------------------------
|
||||||
|
* |
||||||
|
* statistics.h |
||||||
|
* Extended statistics and selectivity estimation functions. |
||||||
|
* |
||||||
|
* Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group |
||||||
|
* Portions Copyright (c) 1994, Regents of the University of California |
||||||
|
* |
||||||
|
* src/include/statistics/statistics.h |
||||||
|
* |
||||||
|
*------------------------------------------------------------------------- |
||||||
|
*/ |
||||||
|
#ifndef STATISTICS_H |
||||||
|
#define STATISTICS_H |
||||||
|
|
||||||
|
#include "commands/vacuum.h" |
||||||
|
|
||||||
|
#define STATS_MAX_DIMENSIONS 8 /* max number of attributes */ |
||||||
|
|
||||||
|
/* Multivariate distinct coefficients */ |
||||||
|
#define STATS_NDISTINCT_MAGIC 0xA352BFA4 /* struct identifier */ |
||||||
|
#define STATS_NDISTINCT_TYPE_BASIC 1 /* struct version */ |
||||||
|
|
||||||
|
/* MVDistinctItem represents a single combination of columns */ |
||||||
|
typedef struct MVNDistinctItem |
||||||
|
{ |
||||||
|
double ndistinct; /* ndistinct value for this combination */ |
||||||
|
Bitmapset *attrs; /* attr numbers of items */ |
||||||
|
} MVNDistinctItem; |
||||||
|
|
||||||
|
/* A MVNDistinct object, comprising all possible combinations of columns */ |
||||||
|
typedef struct MVNDistinct |
||||||
|
{ |
||||||
|
uint32 magic; /* magic constant marker */ |
||||||
|
uint32 type; /* type of ndistinct (BASIC) */ |
||||||
|
uint32 nitems; /* number of items in the statistic */ |
||||||
|
MVNDistinctItem items[FLEXIBLE_ARRAY_MEMBER]; |
||||||
|
} MVNDistinct; |
||||||
|
|
||||||
|
extern MVNDistinct *statext_ndistinct_load(Oid mvoid); |
||||||
|
|
||||||
|
extern void BuildRelationExtStatistics(Relation onerel, double totalrows, |
||||||
|
int numrows, HeapTuple *rows, |
||||||
|
int natts, VacAttrStats **vacattrstats); |
||||||
|
extern bool statext_is_kind_built(HeapTuple htup, char kind); |
||||||
|
|
||||||
|
#endif /* STATISTICS_H */ |
@ -0,0 +1,155 @@ |
|||||||
|
-- Generic extended statistics support |
||||||
|
-- Ensure stats are dropped sanely |
||||||
|
CREATE TABLE ab1 (a INTEGER, b INTEGER, c INTEGER); |
||||||
|
CREATE STATISTICS ab1_a_b_stats ON (a, b) FROM ab1; |
||||||
|
DROP STATISTICS ab1_a_b_stats; |
||||||
|
CREATE SCHEMA regress_schema_2; |
||||||
|
CREATE STATISTICS regress_schema_2.ab1_a_b_stats ON (a, b) FROM ab1; |
||||||
|
DROP STATISTICS regress_schema_2.ab1_a_b_stats; |
||||||
|
-- Ensure statistics are dropped when columns are |
||||||
|
CREATE STATISTICS ab1_b_c_stats ON (b, c) FROM ab1; |
||||||
|
CREATE STATISTICS ab1_a_b_c_stats ON (a, b, c) FROM ab1; |
||||||
|
CREATE STATISTICS ab1_a_b_stats ON (a, b) FROM ab1; |
||||||
|
ALTER TABLE ab1 DROP COLUMN a; |
||||||
|
\d ab1 |
||||||
|
Table "public.ab1" |
||||||
|
Column | Type | Collation | Nullable | Default |
||||||
|
--------+---------+-----------+----------+--------- |
||||||
|
b | integer | | | |
||||||
|
c | integer | | | |
||||||
|
Statistics: |
||||||
|
"public.ab1_b_c_stats" WITH (ndistinct) ON (b, c) |
||||||
|
|
||||||
|
DROP TABLE ab1; |
||||||
|
-- Ensure things work sanely with SET STATISTICS 0 |
||||||
|
CREATE TABLE ab1 (a INTEGER, b INTEGER); |
||||||
|
ALTER TABLE ab1 ALTER a SET STATISTICS 0; |
||||||
|
INSERT INTO ab1 SELECT a, a%23 FROM generate_series(1, 1000) a; |
||||||
|
CREATE STATISTICS ab1_a_b_stats ON (a, b) FROM ab1; |
||||||
|
ANALYZE ab1; |
||||||
|
ERROR: extended statistics could not be collected for column "a" of relation public.ab1 |
||||||
|
HINT: Consider ALTER TABLE "public"."ab1" ALTER "a" SET STATISTICS -1 |
||||||
|
ALTER TABLE ab1 ALTER a SET STATISTICS -1; |
||||||
|
ANALYZE ab1; |
||||||
|
DROP TABLE ab1; |
||||||
|
-- n-distinct tests |
||||||
|
CREATE TABLE ndistinct ( |
||||||
|
filler1 TEXT, |
||||||
|
filler2 NUMERIC, |
||||||
|
a INT, |
||||||
|
b INT, |
||||||
|
filler3 DATE, |
||||||
|
c INT, |
||||||
|
d INT |
||||||
|
); |
||||||
|
-- unknown column |
||||||
|
CREATE STATISTICS s10 ON (unknown_column) FROM ndistinct; |
||||||
|
ERROR: column "unknown_column" referenced in statistics does not exist |
||||||
|
-- single column |
||||||
|
CREATE STATISTICS s10 ON (a) FROM ndistinct; |
||||||
|
ERROR: statistics require at least 2 columns |
||||||
|
-- single column, duplicated |
||||||
|
CREATE STATISTICS s10 ON (a,a) FROM ndistinct; |
||||||
|
ERROR: duplicate column name in statistics definition |
||||||
|
-- two columns, one duplicated |
||||||
|
CREATE STATISTICS s10 ON (a, a, b) FROM ndistinct; |
||||||
|
ERROR: duplicate column name in statistics definition |
||||||
|
-- correct command |
||||||
|
CREATE STATISTICS s10 ON (a, b, c) FROM ndistinct; |
||||||
|
-- perfectly correlated groups |
||||||
|
INSERT INTO ndistinct (a, b, c, filler1) |
||||||
|
SELECT i/100, i/100, i/100, cash_words(i::money) |
||||||
|
FROM generate_series(1,10000) s(i); |
||||||
|
ANALYZE ndistinct; |
||||||
|
SELECT staenabled, standistinct |
||||||
|
FROM pg_statistic_ext WHERE starelid = 'ndistinct'::regclass; |
||||||
|
staenabled | standistinct |
||||||
|
------------+------------------------------------------------------------------------------------------------ |
||||||
|
{d} | [{(b 3 4), 101.000000}, {(b 3 6), 101.000000}, {(b 4 6), 101.000000}, {(b 3 4 6), 101.000000}] |
||||||
|
(1 row) |
||||||
|
|
||||||
|
EXPLAIN (COSTS off) |
||||||
|
SELECT COUNT(*) FROM ndistinct GROUP BY a, b; |
||||||
|
QUERY PLAN |
||||||
|
----------------------------- |
||||||
|
HashAggregate |
||||||
|
Group Key: a, b |
||||||
|
-> Seq Scan on ndistinct |
||||||
|
(3 rows) |
||||||
|
|
||||||
|
EXPLAIN (COSTS off) |
||||||
|
SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c; |
||||||
|
QUERY PLAN |
||||||
|
----------------------------- |
||||||
|
HashAggregate |
||||||
|
Group Key: a, b, c |
||||||
|
-> Seq Scan on ndistinct |
||||||
|
(3 rows) |
||||||
|
|
||||||
|
EXPLAIN (COSTS off) |
||||||
|
SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d; |
||||||
|
QUERY PLAN |
||||||
|
----------------------------- |
||||||
|
HashAggregate |
||||||
|
Group Key: a, b, c, d |
||||||
|
-> Seq Scan on ndistinct |
||||||
|
(3 rows) |
||||||
|
|
||||||
|
TRUNCATE TABLE ndistinct; |
||||||
|
-- partially correlated groups |
||||||
|
INSERT INTO ndistinct (a, b, c) |
||||||
|
SELECT i/50, i/100, i/200 FROM generate_series(1,10000) s(i); |
||||||
|
ANALYZE ndistinct; |
||||||
|
SELECT staenabled, standistinct |
||||||
|
FROM pg_statistic_ext WHERE starelid = 'ndistinct'::regclass; |
||||||
|
staenabled | standistinct |
||||||
|
------------+------------------------------------------------------------------------------------------------ |
||||||
|
{d} | [{(b 3 4), 201.000000}, {(b 3 6), 201.000000}, {(b 4 6), 101.000000}, {(b 3 4 6), 201.000000}] |
||||||
|
(1 row) |
||||||
|
|
||||||
|
EXPLAIN |
||||||
|
SELECT COUNT(*) FROM ndistinct GROUP BY a, b; |
||||||
|
QUERY PLAN |
||||||
|
--------------------------------------------------------------------- |
||||||
|
HashAggregate (cost=230.00..232.01 rows=201 width=16) |
||||||
|
Group Key: a, b |
||||||
|
-> Seq Scan on ndistinct (cost=0.00..155.00 rows=10000 width=8) |
||||||
|
(3 rows) |
||||||
|
|
||||||
|
EXPLAIN |
||||||
|
SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c; |
||||||
|
QUERY PLAN |
||||||
|
---------------------------------------------------------------------- |
||||||
|
HashAggregate (cost=255.00..257.01 rows=201 width=20) |
||||||
|
Group Key: a, b, c |
||||||
|
-> Seq Scan on ndistinct (cost=0.00..155.00 rows=10000 width=12) |
||||||
|
(3 rows) |
||||||
|
|
||||||
|
EXPLAIN |
||||||
|
SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d; |
||||||
|
QUERY PLAN |
||||||
|
---------------------------------------------------------------------- |
||||||
|
HashAggregate (cost=280.00..290.00 rows=1000 width=24) |
||||||
|
Group Key: a, b, c, d |
||||||
|
-> Seq Scan on ndistinct (cost=0.00..155.00 rows=10000 width=16) |
||||||
|
(3 rows) |
||||||
|
|
||||||
|
EXPLAIN |
||||||
|
SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d; |
||||||
|
QUERY PLAN |
||||||
|
---------------------------------------------------------------------- |
||||||
|
HashAggregate (cost=255.00..265.00 rows=1000 width=20) |
||||||
|
Group Key: b, c, d |
||||||
|
-> Seq Scan on ndistinct (cost=0.00..155.00 rows=10000 width=12) |
||||||
|
(3 rows) |
||||||
|
|
||||||
|
EXPLAIN |
||||||
|
SELECT COUNT(*) FROM ndistinct GROUP BY a, d; |
||||||
|
QUERY PLAN |
||||||
|
--------------------------------------------------------------------- |
||||||
|
HashAggregate (cost=230.00..240.00 rows=1000 width=16) |
||||||
|
Group Key: a, d |
||||||
|
-> Seq Scan on ndistinct (cost=0.00..155.00 rows=10000 width=8) |
||||||
|
(3 rows) |
||||||
|
|
||||||
|
DROP TABLE ndistinct; |
@ -0,0 +1,102 @@ |
|||||||
|
-- Generic extended statistics support |
||||||
|
|
||||||
|
-- Ensure stats are dropped sanely |
||||||
|
CREATE TABLE ab1 (a INTEGER, b INTEGER, c INTEGER); |
||||||
|
CREATE STATISTICS ab1_a_b_stats ON (a, b) FROM ab1; |
||||||
|
DROP STATISTICS ab1_a_b_stats; |
||||||
|
|
||||||
|
CREATE SCHEMA regress_schema_2; |
||||||
|
CREATE STATISTICS regress_schema_2.ab1_a_b_stats ON (a, b) FROM ab1; |
||||||
|
DROP STATISTICS regress_schema_2.ab1_a_b_stats; |
||||||
|
|
||||||
|
-- Ensure statistics are dropped when columns are |
||||||
|
CREATE STATISTICS ab1_b_c_stats ON (b, c) FROM ab1; |
||||||
|
CREATE STATISTICS ab1_a_b_c_stats ON (a, b, c) FROM ab1; |
||||||
|
CREATE STATISTICS ab1_a_b_stats ON (a, b) FROM ab1; |
||||||
|
ALTER TABLE ab1 DROP COLUMN a; |
||||||
|
\d ab1 |
||||||
|
DROP TABLE ab1; |
||||||
|
|
||||||
|
-- Ensure things work sanely with SET STATISTICS 0 |
||||||
|
CREATE TABLE ab1 (a INTEGER, b INTEGER); |
||||||
|
ALTER TABLE ab1 ALTER a SET STATISTICS 0; |
||||||
|
INSERT INTO ab1 SELECT a, a%23 FROM generate_series(1, 1000) a; |
||||||
|
CREATE STATISTICS ab1_a_b_stats ON (a, b) FROM ab1; |
||||||
|
ANALYZE ab1; |
||||||
|
ALTER TABLE ab1 ALTER a SET STATISTICS -1; |
||||||
|
ANALYZE ab1; |
||||||
|
DROP TABLE ab1; |
||||||
|
|
||||||
|
|
||||||
|
-- n-distinct tests |
||||||
|
CREATE TABLE ndistinct ( |
||||||
|
filler1 TEXT, |
||||||
|
filler2 NUMERIC, |
||||||
|
a INT, |
||||||
|
b INT, |
||||||
|
filler3 DATE, |
||||||
|
c INT, |
||||||
|
d INT |
||||||
|
); |
||||||
|
|
||||||
|
-- unknown column |
||||||
|
CREATE STATISTICS s10 ON (unknown_column) FROM ndistinct; |
||||||
|
|
||||||
|
-- single column |
||||||
|
CREATE STATISTICS s10 ON (a) FROM ndistinct; |
||||||
|
|
||||||
|
-- single column, duplicated |
||||||
|
CREATE STATISTICS s10 ON (a,a) FROM ndistinct; |
||||||
|
|
||||||
|
-- two columns, one duplicated |
||||||
|
CREATE STATISTICS s10 ON (a, a, b) FROM ndistinct; |
||||||
|
|
||||||
|
-- correct command |
||||||
|
CREATE STATISTICS s10 ON (a, b, c) FROM ndistinct; |
||||||
|
|
||||||
|
-- perfectly correlated groups |
||||||
|
INSERT INTO ndistinct (a, b, c, filler1) |
||||||
|
SELECT i/100, i/100, i/100, cash_words(i::money) |
||||||
|
FROM generate_series(1,10000) s(i); |
||||||
|
|
||||||
|
ANALYZE ndistinct; |
||||||
|
|
||||||
|
SELECT staenabled, standistinct |
||||||
|
FROM pg_statistic_ext WHERE starelid = 'ndistinct'::regclass; |
||||||
|
|
||||||
|
EXPLAIN (COSTS off) |
||||||
|
SELECT COUNT(*) FROM ndistinct GROUP BY a, b; |
||||||
|
|
||||||
|
EXPLAIN (COSTS off) |
||||||
|
SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c; |
||||||
|
|
||||||
|
EXPLAIN (COSTS off) |
||||||
|
SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d; |
||||||
|
|
||||||
|
TRUNCATE TABLE ndistinct; |
||||||
|
|
||||||
|
-- partially correlated groups |
||||||
|
INSERT INTO ndistinct (a, b, c) |
||||||
|
SELECT i/50, i/100, i/200 FROM generate_series(1,10000) s(i); |
||||||
|
|
||||||
|
ANALYZE ndistinct; |
||||||
|
|
||||||
|
SELECT staenabled, standistinct |
||||||
|
FROM pg_statistic_ext WHERE starelid = 'ndistinct'::regclass; |
||||||
|
|
||||||
|
EXPLAIN |
||||||
|
SELECT COUNT(*) FROM ndistinct GROUP BY a, b; |
||||||
|
|
||||||
|
EXPLAIN |
||||||
|
SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c; |
||||||
|
|
||||||
|
EXPLAIN |
||||||
|
SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d; |
||||||
|
|
||||||
|
EXPLAIN |
||||||
|
SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d; |
||||||
|
|
||||||
|
EXPLAIN |
||||||
|
SELECT COUNT(*) FROM ndistinct GROUP BY a, d; |
||||||
|
|
||||||
|
DROP TABLE ndistinct; |
Loading…
Reference in new issue