postgres/contrib/intarray/_int_selfuncs.c

/*-------------------------------------------------------------------------
 *
 * _int_selfuncs.c
 *	  Functions for selectivity estimation of intarray operators
 *
 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 *
 * IDENTIFICATION
 *	  contrib/intarray/_int_selfuncs.c
 *
 *-------------------------------------------------------------------------
 */
#include "postgres.h"

#include "_int.h"
#include "access/htup_details.h"
#include "catalog/pg_operator.h"
#include "catalog/pg_statistic.h"
#include "catalog/pg_type.h"
#include "miscadmin.h"
#include "utils/fmgrprotos.h"
#include "utils/lsyscache.h"
#include "utils/selfuncs.h"

PG_FUNCTION_INFO_V1(_int_overlap_sel);
PG_FUNCTION_INFO_V1(_int_contains_sel);
PG_FUNCTION_INFO_V1(_int_contained_sel);
PG_FUNCTION_INFO_V1(_int_overlap_joinsel);
PG_FUNCTION_INFO_V1(_int_contains_joinsel);
PG_FUNCTION_INFO_V1(_int_contained_joinsel);
PG_FUNCTION_INFO_V1(_int_matchsel);


static Selectivity int_query_opr_selec(ITEM *item, Datum *mcelems, float4 *mcefreqs,
									   int nmcelems, float4 minfreq);
static int	compare_val_int4(const void *a, const void *b);

/*
 * Wrappers around the default array selectivity estimation functions.
 *
 * The default array selectivity operators for the @>, && and @< operators
 * work fine for integer arrays. However, if we tried to just use arraycontsel
 * and arraycontjoinsel directly as the cost estimator functions for our
 * operators, they would not work as intended, because they look at the
 * operator's OID. Our operators behave exactly like the built-in anyarray
 * versions, but we must tell the cost estimator functions which built-in
 * operators they correspond to. These wrappers just replace the operator
 * OID with the corresponding built-in operator's OID, and call the built-in
 * function.
 */

Datum
_int_overlap_sel(PG_FUNCTION_ARGS)
{
	PG_RETURN_DATUM(DirectFunctionCall4(arraycontsel,
										PG_GETARG_DATUM(0),
										ObjectIdGetDatum(OID_ARRAY_OVERLAP_OP),
										PG_GETARG_DATUM(2),
										PG_GETARG_DATUM(3)));
}

Datum
_int_contains_sel(PG_FUNCTION_ARGS)
{
	PG_RETURN_DATUM(DirectFunctionCall4(arraycontsel,
										PG_GETARG_DATUM(0),
										ObjectIdGetDatum(OID_ARRAY_CONTAINS_OP),
										PG_GETARG_DATUM(2),
										PG_GETARG_DATUM(3)));
}

Datum
_int_contained_sel(PG_FUNCTION_ARGS)
{
	PG_RETURN_DATUM(DirectFunctionCall4(arraycontsel,
										PG_GETARG_DATUM(0),
										ObjectIdGetDatum(OID_ARRAY_CONTAINED_OP),
										PG_GETARG_DATUM(2),
										PG_GETARG_DATUM(3)));
}

Datum
_int_overlap_joinsel(PG_FUNCTION_ARGS)
{
	PG_RETURN_DATUM(DirectFunctionCall5(arraycontjoinsel,
										PG_GETARG_DATUM(0),
										ObjectIdGetDatum(OID_ARRAY_OVERLAP_OP),
										PG_GETARG_DATUM(2),
										PG_GETARG_DATUM(3),
										PG_GETARG_DATUM(4)));
}

Datum
_int_contains_joinsel(PG_FUNCTION_ARGS)
{
	PG_RETURN_DATUM(DirectFunctionCall5(arraycontjoinsel,
										PG_GETARG_DATUM(0),
										ObjectIdGetDatum(OID_ARRAY_CONTAINS_OP),
										PG_GETARG_DATUM(2),
										PG_GETARG_DATUM(3),
										PG_GETARG_DATUM(4)));
}

Datum
_int_contained_joinsel(PG_FUNCTION_ARGS)
{
	PG_RETURN_DATUM(DirectFunctionCall5(arraycontjoinsel,
										PG_GETARG_DATUM(0),
										ObjectIdGetDatum(OID_ARRAY_CONTAINED_OP),
										PG_GETARG_DATUM(2),
										PG_GETARG_DATUM(3),
										PG_GETARG_DATUM(4)));
}


/*
 * _int_matchsel -- restriction selectivity function for intarray @@ query_int
 */
Datum
_int_matchsel(PG_FUNCTION_ARGS)
{
	PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);

	List	   *args = (List *) PG_GETARG_POINTER(2);
	int			varRelid = PG_GETARG_INT32(3);
	VariableStatData vardata;
	Node	   *other;
	bool		varonleft;
	Selectivity selec;
	QUERYTYPE  *query;
	Datum	   *mcelems = NULL;
	float4	   *mcefreqs = NULL;
	int			nmcelems = 0;
	float4		minfreq = 0.0;
	float4		nullfrac = 0.0;
	AttStatsSlot sslot;

	/*
	 * If expression is not "variable @@ something" or "something @@ variable"
	 * then punt and return a default estimate.
	 */
	if (!get_restriction_variable(root, args, varRelid,
								  &vardata, &other, &varonleft))
		PG_RETURN_FLOAT8(DEFAULT_EQ_SEL);

	/*
	 * Variable should be int[]. We don't support cases where variable is
	 * query_int.
	 */
	if (vardata.vartype != INT4ARRAYOID)
		PG_RETURN_FLOAT8(DEFAULT_EQ_SEL);

	/*
	 * Can't do anything useful if the something is not a constant, either.
	 */
	if (!IsA(other, Const))
	{
		ReleaseVariableStats(vardata);
		PG_RETURN_FLOAT8(DEFAULT_EQ_SEL);
	}

	/*
	 * The "@@" operator is strict, so we can cope with NULL right away.
	 */
	if (((Const *) other)->constisnull)
	{
		ReleaseVariableStats(vardata);
		PG_RETURN_FLOAT8(0.0);
	}

	/* The caller made sure the const is a query, so get it now */
	query = DatumGetQueryTypeP(((Const *) other)->constvalue);

	/* Empty query matches nothing */
	if (query->size == 0)
	{
		ReleaseVariableStats(vardata);
		PG_RETURN_FLOAT8(0.0);
	}

	/*
	 * Get the statistics for the intarray column.
	 *
	 * We're interested in the Most-Common-Elements list, and the NULL
	 * fraction.
	 */
	if (HeapTupleIsValid(vardata.statsTuple))
	{
		Form_pg_statistic stats;

		stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
		nullfrac = stats->stanullfrac;

		/*
		 * For an int4 array, the default array type analyze function will
		 * collect a Most Common Elements list, which is an array of int4s.
		 */
		if (get_attstatsslot(&sslot, vardata.statsTuple,
							 STATISTIC_KIND_MCELEM, InvalidOid,
							 ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS))
		{
			Assert(sslot.valuetype == INT4OID);

			/*
			 * There should be three more Numbers than Values, because the
			 * last three (for intarray) cells are taken for minimal, maximal
			 * and nulls frequency. Punt if not.
			 */
			if (sslot.nnumbers == sslot.nvalues + 3)
			{
				/* Grab the minimal MCE frequency. */
				minfreq = sslot.numbers[sslot.nvalues];

				mcelems = sslot.values;
				mcefreqs = sslot.numbers;
				nmcelems = sslot.nvalues;
			}
		}
	}
	else
		memset(&sslot, 0, sizeof(sslot));

	/* Process the logical expression in the query, using the stats */
	selec = int_query_opr_selec(GETQUERY(query) + query->size - 1,
								mcelems, mcefreqs, nmcelems, minfreq);

	/* MCE stats count only non-null rows, so adjust for null rows. */
	selec *= (1.0 - nullfrac);

	free_attstatsslot(&sslot);
	ReleaseVariableStats(vardata);

	CLAMP_PROBABILITY(selec);

	PG_RETURN_FLOAT8((float8) selec);
}

/*
 * Estimate selectivity of single intquery operator
 */
static Selectivity
int_query_opr_selec(ITEM *item, Datum *mcelems, float4 *mcefreqs,
					int nmcelems, float4 minfreq)
{
	Selectivity selec;

	/* since this function recurses, it could be driven to stack overflow */
	check_stack_depth();

	if (item->type == VAL)
	{
		Datum	   *searchres;

		if (mcelems == NULL)
			return (Selectivity) DEFAULT_EQ_SEL;

		searchres = (Datum *) bsearch(&item->val, mcelems, nmcelems,
									  sizeof(Datum), compare_val_int4);
		if (searchres)
		{
			/*
			 * The element is in MCELEM.  Return precise selectivity (or at
			 * least as precise as ANALYZE could find out).
			 */
			selec = mcefreqs[searchres - mcelems];
		}
		else
		{
			/*
			 * The element is not in MCELEM.  Estimate its frequency as half
			 * that of the least-frequent MCE.  (We know it cannot be more
			 * than minfreq, and it could be a great deal less.  Half seems
			 * like a good compromise.)  For probably-historical reasons,
			 * clamp to not more than DEFAULT_EQ_SEL.
			 */
			selec = Min(DEFAULT_EQ_SEL, minfreq / 2);
		}
	}
	else if (item->type == OPR)
	{
		/* Current query node is an operator */
		Selectivity s1,
					s2;

		s1 = int_query_opr_selec(item - 1, mcelems, mcefreqs, nmcelems,
								 minfreq);
		switch (item->val)
		{
			case (int32) '!':
				selec = 1.0 - s1;
				break;

			case (int32) '&':
				s2 = int_query_opr_selec(item + item->left, mcelems, mcefreqs,
										 nmcelems, minfreq);
				selec = s1 * s2;
				break;

			case (int32) '|':
				s2 = int_query_opr_selec(item + item->left, mcelems, mcefreqs,
										 nmcelems, minfreq);
				selec = s1 + s2 - s1 * s2;
				break;

			default:
				elog(ERROR, "unrecognized operator: %d", item->val);
				selec = 0;		/* keep compiler quiet */
				break;
		}
	}
	else
	{
		elog(ERROR, "unrecognized int query item type: %u", item->type);
		selec = 0;				/* keep compiler quiet */
	}

	/* Clamp intermediate results to stay sane despite roundoff error */
	CLAMP_PROBABILITY(selec);

	return selec;
}

/*
 * Comparison function for binary search in mcelem array.
 */
static int
compare_val_int4(const void *a, const void *b)
{
	int32		key = *(int32 *) a;
	const Datum *t = (const Datum *) b;

	return key - DatumGetInt32(*t);
}