mirror of https://github.com/postgres/postgres
Uriy Zhuravlev and Alexander Korotkov, reviewed by Jeff Janes, some cleanup by me.pull/14/head
parent
434873806a
commit
c6fbe6d6fb
@ -0,0 +1,341 @@ |
|||||||
|
/*-------------------------------------------------------------------------
|
||||||
|
* |
||||||
|
* _int_selfuncs.c |
||||||
|
* Functions for selectivity estimation of intarray operators |
||||||
|
* |
||||||
|
* Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group |
||||||
|
* Portions Copyright (c) 1994, Regents of the University of California |
||||||
|
* |
||||||
|
* |
||||||
|
* IDENTIFICATION |
||||||
|
* contrib/intarray/_int_selfuncs.c |
||||||
|
* |
||||||
|
*------------------------------------------------------------------------- |
||||||
|
*/ |
||||||
|
#include "postgres.h" |
||||||
|
#include "_int.h" |
||||||
|
|
||||||
|
#include "access/htup_details.h" |
||||||
|
#include "catalog/pg_operator.h" |
||||||
|
#include "catalog/pg_statistic.h" |
||||||
|
#include "catalog/pg_type.h" |
||||||
|
#include "utils/selfuncs.h" |
||||||
|
#include "utils/syscache.h" |
||||||
|
#include "utils/lsyscache.h" |
||||||
|
#include "miscadmin.h" |
||||||
|
|
||||||
|
PG_FUNCTION_INFO_V1(_int_overlap_sel); |
||||||
|
PG_FUNCTION_INFO_V1(_int_contains_sel); |
||||||
|
PG_FUNCTION_INFO_V1(_int_contained_sel); |
||||||
|
PG_FUNCTION_INFO_V1(_int_overlap_joinsel); |
||||||
|
PG_FUNCTION_INFO_V1(_int_contains_joinsel); |
||||||
|
PG_FUNCTION_INFO_V1(_int_contained_joinsel); |
||||||
|
PG_FUNCTION_INFO_V1(_int_matchsel); |
||||||
|
|
||||||
|
Datum _int_overlap_sel(PG_FUNCTION_ARGS); |
||||||
|
Datum _int_contains_sel(PG_FUNCTION_ARGS); |
||||||
|
Datum _int_contained_sel(PG_FUNCTION_ARGS); |
||||||
|
Datum _int_overlap_joinsel(PG_FUNCTION_ARGS); |
||||||
|
Datum _int_contains_joinsel(PG_FUNCTION_ARGS); |
||||||
|
Datum _int_contained_joinsel(PG_FUNCTION_ARGS); |
||||||
|
Datum _int_matchsel(PG_FUNCTION_ARGS); |
||||||
|
|
||||||
|
|
||||||
|
static Selectivity int_query_opr_selec(ITEM *item, Datum *values, float4 *freqs, |
||||||
|
int nmncelems, float4 minfreq); |
||||||
|
static int compare_val_int4(const void *a, const void *b); |
||||||
|
|
||||||
|
/*
|
||||||
|
* Wrappers around the default array selectivity estimation functions. |
||||||
|
* |
||||||
|
* The default array selectivity operators for the @>, && and @< operators |
||||||
|
* work fine for integer arrays. However, if we tried to just use arraycontsel |
||||||
|
* and arracontjoinsel directly as the cost estimator functions for our |
||||||
|
* operators, they would not work as intended, because they look at the |
||||||
|
* operator's OID. Our operators behave exactly like the built-in anyarray |
||||||
|
* versions, but we must tell the cost estimator functions which built-in |
||||||
|
* operators they correspond to. These wrappers just replace the operator |
||||||
|
* OID with the corresponding built-in operator's OID, and call the built-in |
||||||
|
* function. |
||||||
|
*/ |
||||||
|
|
||||||
|
Datum |
||||||
|
_int_overlap_sel(PG_FUNCTION_ARGS) |
||||||
|
{ |
||||||
|
PG_RETURN_DATUM(DirectFunctionCall4(arraycontsel, |
||||||
|
PG_GETARG_DATUM(0), |
||||||
|
ObjectIdGetDatum(OID_ARRAY_OVERLAP_OP), |
||||||
|
PG_GETARG_DATUM(2), |
||||||
|
PG_GETARG_DATUM(3))); |
||||||
|
} |
||||||
|
|
||||||
|
Datum |
||||||
|
_int_contains_sel(PG_FUNCTION_ARGS) |
||||||
|
{ |
||||||
|
PG_RETURN_DATUM(DirectFunctionCall4(arraycontsel, |
||||||
|
PG_GETARG_DATUM(0), |
||||||
|
ObjectIdGetDatum(OID_ARRAY_CONTAINS_OP), |
||||||
|
PG_GETARG_DATUM(2), |
||||||
|
PG_GETARG_DATUM(3))); |
||||||
|
} |
||||||
|
|
||||||
|
Datum |
||||||
|
_int_contained_sel(PG_FUNCTION_ARGS) |
||||||
|
{ |
||||||
|
PG_RETURN_DATUM(DirectFunctionCall4(arraycontsel, |
||||||
|
PG_GETARG_DATUM(0), |
||||||
|
ObjectIdGetDatum(OID_ARRAY_CONTAINED_OP), |
||||||
|
PG_GETARG_DATUM(2), |
||||||
|
PG_GETARG_DATUM(3))); |
||||||
|
} |
||||||
|
|
||||||
|
Datum |
||||||
|
_int_overlap_joinsel(PG_FUNCTION_ARGS) |
||||||
|
{ |
||||||
|
PG_RETURN_DATUM(DirectFunctionCall5(arraycontjoinsel, |
||||||
|
PG_GETARG_DATUM(0), |
||||||
|
ObjectIdGetDatum(OID_ARRAY_OVERLAP_OP), |
||||||
|
PG_GETARG_DATUM(2), |
||||||
|
PG_GETARG_DATUM(3), |
||||||
|
PG_GETARG_DATUM(4))); |
||||||
|
} |
||||||
|
|
||||||
|
Datum |
||||||
|
_int_contains_joinsel(PG_FUNCTION_ARGS) |
||||||
|
{ |
||||||
|
PG_RETURN_DATUM(DirectFunctionCall5(arraycontjoinsel, |
||||||
|
PG_GETARG_DATUM(0), |
||||||
|
ObjectIdGetDatum(OID_ARRAY_CONTAINS_OP), |
||||||
|
PG_GETARG_DATUM(2), |
||||||
|
PG_GETARG_DATUM(3), |
||||||
|
PG_GETARG_DATUM(4))); |
||||||
|
} |
||||||
|
|
||||||
|
Datum |
||||||
|
_int_contained_joinsel(PG_FUNCTION_ARGS) |
||||||
|
{ |
||||||
|
PG_RETURN_DATUM(DirectFunctionCall5(arraycontjoinsel, |
||||||
|
PG_GETARG_DATUM(0), |
||||||
|
ObjectIdGetDatum(OID_ARRAY_CONTAINED_OP), |
||||||
|
PG_GETARG_DATUM(2), |
||||||
|
PG_GETARG_DATUM(3), |
||||||
|
PG_GETARG_DATUM(4))); |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* _int_matchsel -- restriction selectivity function for intarray @@ query_int |
||||||
|
*/ |
||||||
|
Datum |
||||||
|
_int_matchsel(PG_FUNCTION_ARGS) |
||||||
|
{ |
||||||
|
PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); |
||||||
|
|
||||||
|
List *args = (List *) PG_GETARG_POINTER(2); |
||||||
|
int varRelid = PG_GETARG_INT32(3); |
||||||
|
VariableStatData vardata; |
||||||
|
Node *other; |
||||||
|
bool varonleft; |
||||||
|
Selectivity selec; |
||||||
|
QUERYTYPE *query; |
||||||
|
Datum *mcelems = NULL; |
||||||
|
float4 *mcefreqs = NULL; |
||||||
|
int nmcelems = 0; |
||||||
|
float4 minfreq = 0.0; |
||||||
|
float4 nullfrac = 0.0; |
||||||
|
Form_pg_statistic stats; |
||||||
|
Datum *values = NULL; |
||||||
|
int nvalues = 0; |
||||||
|
float4 *numbers = NULL; |
||||||
|
int nnumbers = 0; |
||||||
|
|
||||||
|
/*
|
||||||
|
* If expression is not "variable @@ something" or "something @@ variable" |
||||||
|
* then punt and return a default estimate. |
||||||
|
*/ |
||||||
|
if (!get_restriction_variable(root, args, varRelid, |
||||||
|
&vardata, &other, &varonleft)) |
||||||
|
PG_RETURN_FLOAT8(DEFAULT_EQ_SEL); |
||||||
|
|
||||||
|
/*
|
||||||
|
* Variable should be int[]. We don't support cases where variable is |
||||||
|
* query_int. |
||||||
|
*/ |
||||||
|
if (vardata.vartype != INT4ARRAYOID) |
||||||
|
PG_RETURN_FLOAT8(DEFAULT_EQ_SEL); |
||||||
|
|
||||||
|
/*
|
||||||
|
* Can't do anything useful if the something is not a constant, either. |
||||||
|
*/ |
||||||
|
if (!IsA(other, Const)) |
||||||
|
{ |
||||||
|
ReleaseVariableStats(vardata); |
||||||
|
PG_RETURN_FLOAT8(DEFAULT_EQ_SEL); |
||||||
|
} |
||||||
|
|
||||||
|
/*
|
||||||
|
* The "@@" operator is strict, so we can cope with NULL right away. |
||||||
|
*/ |
||||||
|
if (((Const *) other)->constisnull) |
||||||
|
{ |
||||||
|
ReleaseVariableStats(vardata); |
||||||
|
PG_RETURN_FLOAT8(0.0); |
||||||
|
} |
||||||
|
|
||||||
|
/* The caller made sure the const is a query, so get it now */ |
||||||
|
query = DatumGetQueryTypeP(((Const *) other)->constvalue); |
||||||
|
|
||||||
|
/* Empty query matches nothing */ |
||||||
|
if (query->size == 0) |
||||||
|
{ |
||||||
|
ReleaseVariableStats(vardata); |
||||||
|
return (Selectivity) 0.0; |
||||||
|
} |
||||||
|
|
||||||
|
/*
|
||||||
|
* Get the statistics for the intarray column. |
||||||
|
* |
||||||
|
* We're interested in the Most-Common-Elements list, and the NULL |
||||||
|
* fraction. |
||||||
|
*/ |
||||||
|
if (HeapTupleIsValid(vardata.statsTuple)) |
||||||
|
{ |
||||||
|
stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple); |
||||||
|
nullfrac = stats->stanullfrac; |
||||||
|
|
||||||
|
/*
|
||||||
|
* For an int4 array, the default array type analyze function will |
||||||
|
* collect a Most Common Elements list, which is an array of int4s. |
||||||
|
*/ |
||||||
|
if (get_attstatsslot(vardata.statsTuple, |
||||||
|
INT4OID, -1, |
||||||
|
STATISTIC_KIND_MCELEM, InvalidOid, |
||||||
|
NULL, |
||||||
|
&values, &nvalues, |
||||||
|
&numbers, &nnumbers)) |
||||||
|
{ |
||||||
|
/*
|
||||||
|
* There should be three more Numbers than Values, because the |
||||||
|
* last three (for intarray) cells are taken for minimal, maximal |
||||||
|
* and nulls frequency. Punt if not. |
||||||
|
*/ |
||||||
|
if (nnumbers == nvalues + 3) |
||||||
|
{ |
||||||
|
/* Grab the lowest frequency. */ |
||||||
|
minfreq = numbers[nnumbers - (nnumbers - nvalues)]; |
||||||
|
|
||||||
|
mcelems = values; |
||||||
|
mcefreqs = numbers; |
||||||
|
nmcelems = nvalues; |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
/* Process the logical expression in the query, using the stats */ |
||||||
|
selec = int_query_opr_selec(GETQUERY(query) + query->size - 1, |
||||||
|
mcelems, mcefreqs, nmcelems, minfreq); |
||||||
|
|
||||||
|
/* MCE stats count only non-null rows, so adjust for null rows. */ |
||||||
|
selec *= (1.0 - nullfrac); |
||||||
|
|
||||||
|
free_attstatsslot(INT4OID, values, nvalues, numbers, nnumbers); |
||||||
|
ReleaseVariableStats(vardata); |
||||||
|
|
||||||
|
CLAMP_PROBABILITY(selec); |
||||||
|
|
||||||
|
PG_RETURN_FLOAT8((float8) selec); |
||||||
|
} |
||||||
|
|
||||||
|
/*
|
||||||
|
* Estimate selectivity of single intquery operator |
||||||
|
*/ |
||||||
|
static Selectivity |
||||||
|
int_query_opr_selec(ITEM *item, Datum *mcelems, float4 *mcefreqs, |
||||||
|
int nmcelems, float4 minfreq) |
||||||
|
{ |
||||||
|
Selectivity selec; |
||||||
|
|
||||||
|
/* since this function recurses, it could be driven to stack overflow */ |
||||||
|
check_stack_depth(); |
||||||
|
|
||||||
|
if (item->type == VAL) |
||||||
|
{ |
||||||
|
Datum *searchres; |
||||||
|
|
||||||
|
if (mcelems == NULL) |
||||||
|
return (Selectivity) DEFAULT_EQ_SEL; |
||||||
|
|
||||||
|
searchres = (Datum *) bsearch(&item->val, mcelems, nmcelems, |
||||||
|
sizeof(Datum), compare_val_int4); |
||||||
|
if (searchres) |
||||||
|
{ |
||||||
|
/*
|
||||||
|
* The element is in MCELEM. Return precise selectivity (or at |
||||||
|
* least as precise as ANALYZE could find out). |
||||||
|
*/ |
||||||
|
selec = mcefreqs[searchres - mcelems]; |
||||||
|
} |
||||||
|
else |
||||||
|
{ |
||||||
|
/*
|
||||||
|
* The element is not in MCELEM. Punt, but assume that the |
||||||
|
* selectivity cannot be more than minfreq / 2. |
||||||
|
*/ |
||||||
|
selec = Min(DEFAULT_EQ_SEL, minfreq / 2); |
||||||
|
} |
||||||
|
} |
||||||
|
else if (item->type == OPR) |
||||||
|
{ |
||||||
|
/* Current query node is an operator */ |
||||||
|
Selectivity s1, |
||||||
|
s2; |
||||||
|
|
||||||
|
s1 = int_query_opr_selec(item - 1, mcelems, mcefreqs, nmcelems, |
||||||
|
minfreq); |
||||||
|
switch (item->val) |
||||||
|
{ |
||||||
|
case (int32) '!': |
||||||
|
selec = 1.0 - s1; |
||||||
|
break; |
||||||
|
|
||||||
|
case (int32) '&': |
||||||
|
s2 = int_query_opr_selec(item + item->left, mcelems, mcefreqs, |
||||||
|
nmcelems, minfreq); |
||||||
|
selec = s1 * s2; |
||||||
|
break; |
||||||
|
|
||||||
|
case (int32) '|': |
||||||
|
s2 = int_query_opr_selec(item + item->left, mcelems, mcefreqs, |
||||||
|
nmcelems, minfreq); |
||||||
|
selec = s1 + s2 - s1 * s2; |
||||||
|
break; |
||||||
|
|
||||||
|
default: |
||||||
|
elog(ERROR, "unrecognized operator: %d", item->val); |
||||||
|
selec = 0; /* keep compiler quiet */ |
||||||
|
break; |
||||||
|
} |
||||||
|
} |
||||||
|
else |
||||||
|
{ |
||||||
|
elog(ERROR, "unrecognized int query item type: %u", item->type); |
||||||
|
selec = 0; /* keep compiler quiet */ |
||||||
|
} |
||||||
|
|
||||||
|
/* Clamp intermediate results to stay sane despite roundoff error */ |
||||||
|
CLAMP_PROBABILITY(selec); |
||||||
|
|
||||||
|
return selec; |
||||||
|
} |
||||||
|
|
||||||
|
/*
|
||||||
|
* Comparison function for binary search in mcelem array. |
||||||
|
*/ |
||||||
|
static int |
||||||
|
compare_val_int4(const void *a, const void *b) |
||||||
|
{ |
||||||
|
int32 key = *(int32 *) a; |
||||||
|
const Datum *t = (const Datum *) b; |
||||||
|
|
||||||
|
return key - DatumGetInt32(*t); |
||||||
|
} |
@ -0,0 +1,49 @@ |
|||||||
|
/* contrib/intarray/intarray--1.0--1.1.sql */ |
||||||
|
|
||||||
|
-- complain if script is sourced in psql, rather than via CREATE EXTENSION |
||||||
|
\echo Use "ALTER EXTENSION intarray UPDATE TO '1.1'" to load this file. \quit |
||||||
|
|
||||||
|
CREATE FUNCTION _int_matchsel(internal, oid, internal, integer) |
||||||
|
RETURNS float8 |
||||||
|
AS 'MODULE_PATHNAME' |
||||||
|
LANGUAGE C STRICT STABLE; |
||||||
|
|
||||||
|
ALTER OPERATOR @@ (_int4, query_int) SET (RESTRICT = _int_matchsel); |
||||||
|
ALTER OPERATOR ~~ (query_int, _int4) SET (RESTRICT = _int_matchsel); |
||||||
|
|
||||||
|
CREATE FUNCTION _int_overlap_sel(internal, oid, internal, integer) |
||||||
|
RETURNS float8 |
||||||
|
AS 'MODULE_PATHNAME' |
||||||
|
LANGUAGE C STRICT STABLE; |
||||||
|
|
||||||
|
CREATE FUNCTION _int_contains_sel(internal, oid, internal, integer) |
||||||
|
RETURNS float8 |
||||||
|
AS 'MODULE_PATHNAME' |
||||||
|
LANGUAGE C STRICT STABLE; |
||||||
|
|
||||||
|
CREATE FUNCTION _int_contained_sel(internal, oid, internal, integer) |
||||||
|
RETURNS float8 |
||||||
|
AS 'MODULE_PATHNAME' |
||||||
|
LANGUAGE C STRICT STABLE; |
||||||
|
|
||||||
|
CREATE FUNCTION _int_overlap_joinsel(internal, oid, internal, smallint, internal) |
||||||
|
RETURNS float8 |
||||||
|
AS 'MODULE_PATHNAME' |
||||||
|
LANGUAGE C STRICT STABLE; |
||||||
|
|
||||||
|
CREATE FUNCTION _int_contains_joinsel(internal, oid, internal, smallint, internal) |
||||||
|
RETURNS float8 |
||||||
|
AS 'MODULE_PATHNAME' |
||||||
|
LANGUAGE C STRICT STABLE; |
||||||
|
|
||||||
|
CREATE FUNCTION _int_contained_joinsel(internal, oid, internal, smallint, internal) |
||||||
|
RETURNS float8 |
||||||
|
AS 'MODULE_PATHNAME' |
||||||
|
LANGUAGE C STRICT STABLE; |
||||||
|
|
||||||
|
ALTER OPERATOR && (_int4, _int4) SET (RESTRICT = _int_overlap_sel, JOIN = _int_overlap_joinsel); |
||||||
|
ALTER OPERATOR @> (_int4, _int4) SET (RESTRICT = _int_contains_sel, JOIN = _int_contains_joinsel); |
||||||
|
ALTER OPERATOR <@ (_int4, _int4) SET (RESTRICT = _int_contained_sel, JOIN = _int_contained_joinsel); |
||||||
|
|
||||||
|
ALTER OPERATOR @ (_int4, _int4) SET (RESTRICT = _int_contains_sel, JOIN = _int_contains_joinsel); |
||||||
|
ALTER OPERATOR ~ (_int4, _int4) SET (RESTRICT = _int_contained_sel, JOIN = _int_contained_joinsel); |
@ -1,5 +1,5 @@ |
|||||||
# intarray extension |
# intarray extension |
||||||
comment = 'functions, operators, and index support for 1-D arrays of integers' |
comment = 'functions, operators, and index support for 1-D arrays of integers' |
||||||
default_version = '1.0' |
default_version = '1.1' |
||||||
module_pathname = '$libdir/_int' |
module_pathname = '$libdir/_int' |
||||||
relocatable = true |
relocatable = true |
||||||
|
Loading…
Reference in new issue