postgres/src/include/partitioning/partbounds.h

/*-------------------------------------------------------------------------
 *
 * partbounds.h
 *
 * Copyright (c) 2007-2019, PostgreSQL Global Development Group
 *
 * src/include/partitioning/partbounds.h
 *
 *-------------------------------------------------------------------------
 */
#ifndef PARTBOUNDS_H
#define PARTBOUNDS_H

#include "fmgr.h"
#include "nodes/parsenodes.h"
#include "nodes/pg_list.h"
#include "partitioning/partdefs.h"
#include "utils/relcache.h"


/*
 * PartitionBoundInfoData encapsulates a set of partition bounds. It is
 * usually associated with partitioned tables as part of its partition
 * descriptor, but may also be used to represent a virtual partitioned
 * table such as a partitioned joinrel within the planner.
 *
 * A list partition datum that is known to be NULL is never put into the
 * datums array. Instead, it is tracked using the null_index field.
 *
 * In the case of range partitioning, ndatums will typically be far less than
 * 2 * nparts, because a partition's upper bound and the next partition's lower
 * bound are the same in most common cases, and we only store one of them (the
 * upper bound).  In case of hash partitioning, ndatums will be same as the
 * number of partitions.
 *
 * For range and list partitioned tables, datums is an array of datum-tuples
 * with key->partnatts datums each.  For hash partitioned tables, it is an array
 * of datum-tuples with 2 datums, modulus and remainder, corresponding to a
 * given partition.
 *
 * The datums in datums array are arranged in increasing order as defined by
 * functions qsort_partition_rbound_cmp(), qsort_partition_list_value_cmp() and
 * qsort_partition_hbound_cmp() for range, list and hash partitioned tables
 * respectively. For range and list partitions this simply means that the
 * datums in the datums array are arranged in increasing order as defined by
 * the partition key's operator classes and collations.
 *
 * In the case of list partitioning, the indexes array stores one entry for
 * every datum, which is the index of the partition that accepts a given datum.
 * In case of range partitioning, it stores one entry per distinct range
 * datum, which is the index of the partition for which a given datum
 * is an upper bound.  In the case of hash partitioning, the number of the
 * entries in the indexes array is same as the greatest modulus amongst all
 * partitions.  For a given partition key datum-tuple, the index of the
 * partition which would accept that datum-tuple would be given by the entry
 * pointed by remainder produced when hash value of the datum-tuple is divided
 * by the greatest modulus.
 */

typedef struct PartitionBoundInfoData
{
	char		strategy;		/* hash, list or range? */
	int			ndatums;		/* Length of the datums following array */
	Datum	  **datums;
	PartitionRangeDatumKind **kind; /* The kind of each range bound datum;
									 * NULL for hash and list partitioned
									 * tables */
	int		   *indexes;		/* Partition indexes */
	int			null_index;		/* Index of the null-accepting partition; -1
								 * if there isn't one */
	int			default_index;	/* Index of the default partition; -1 if there
								 * isn't one */
} PartitionBoundInfoData;

#define partition_bound_accepts_nulls(bi) ((bi)->null_index != -1)
#define partition_bound_has_default(bi) ((bi)->default_index != -1)

extern int	get_hash_partition_greatest_modulus(PartitionBoundInfo b);
extern uint64 compute_partition_hash_value(int partnatts, FmgrInfo *partsupfunc,
							 Oid *partcollation,
							 Datum *values, bool *isnull);
extern List *get_qual_from_partbound(Relation rel, Relation parent,
						PartitionBoundSpec *spec);
extern PartitionBoundInfo partition_bounds_create(PartitionBoundSpec **boundspecs,
						int nparts, PartitionKey key, int **mapping);
extern bool partition_bounds_equal(int partnatts, int16 *parttyplen,
					   bool *parttypbyval, PartitionBoundInfo b1,
					   PartitionBoundInfo b2);
extern PartitionBoundInfo partition_bounds_copy(PartitionBoundInfo src,
					  PartitionKey key);
extern bool partitions_are_ordered(PartitionBoundInfo boundinfo, int nparts);
extern void check_new_partition_bound(char *relname, Relation parent,
						  PartitionBoundSpec *spec);
extern void check_default_partition_contents(Relation parent,
								 Relation defaultRel,
								 PartitionBoundSpec *new_spec);

extern int32 partition_rbound_datum_cmp(FmgrInfo *partsupfunc,
						   Oid *partcollation,
						   Datum *rb_datums, PartitionRangeDatumKind *rb_kind,
						   Datum *tuple_datums, int n_tuple_datums);
extern int partition_list_bsearch(FmgrInfo *partsupfunc,
					   Oid *partcollation,
					   PartitionBoundInfo boundinfo,
					   Datum value, bool *is_equal);
extern int partition_range_datum_bsearch(FmgrInfo *partsupfunc,
							  Oid *partcollation,
							  PartitionBoundInfo boundinfo,
							  int nvalues, Datum *values, bool *is_equal);
extern int partition_hash_bsearch(PartitionBoundInfo boundinfo,
					   int modulus, int remainder);

#endif							/* PARTBOUNDS_H */
Faster partition pruning Add a new module backend/partitioning/partprune.c, implementing a more sophisticated algorithm for partition pruning. The new module uses each partition's "boundinfo" for pruning instead of constraint exclusion, based on an idea proposed by Robert Haas of a "pruning program": a list of steps generated from the query quals which are run iteratively to obtain a list of partitions that must be scanned in order to satisfy those quals. At present, this targets planner-time partition pruning, but there exist further patches to apply partition pruning at execution time as well. This commit also moves some definitions from include/catalog/partition.h to a new file include/partitioning/partbounds.h, in an attempt to rationalize partitioning related code. Authors: Amit Langote, David Rowley, Dilip Kumar Reviewers: Robert Haas, Kyotaro Horiguchi, Ashutosh Bapat, Jesper Pedersen. Discussion: https://postgr.es/m/098b9c71-1915-1a2a-8d52-1a7a50ce79e8@lab.ntt.co.jp 8 years ago			`/*-------------------------------------------------------------------------`
			`*`
			`* partbounds.h`
			`*`
Update copyright for 2019 Backpatch-through: certain files through 9.4 7 years ago			`* Copyright (c) 2007-2019, PostgreSQL Global Development Group`
Faster partition pruning Add a new module backend/partitioning/partprune.c, implementing a more sophisticated algorithm for partition pruning. The new module uses each partition's "boundinfo" for pruning instead of constraint exclusion, based on an idea proposed by Robert Haas of a "pruning program": a list of steps generated from the query quals which are run iteratively to obtain a list of partitions that must be scanned in order to satisfy those quals. At present, this targets planner-time partition pruning, but there exist further patches to apply partition pruning at execution time as well. This commit also moves some definitions from include/catalog/partition.h to a new file include/partitioning/partbounds.h, in an attempt to rationalize partitioning related code. Authors: Amit Langote, David Rowley, Dilip Kumar Reviewers: Robert Haas, Kyotaro Horiguchi, Ashutosh Bapat, Jesper Pedersen. Discussion: https://postgr.es/m/098b9c71-1915-1a2a-8d52-1a7a50ce79e8@lab.ntt.co.jp 8 years ago			`*`
			`* src/include/partitioning/partbounds.h`
			`*`
			`*-------------------------------------------------------------------------`
			`*/`
			`#ifndef PARTBOUNDS_H`
			`#define PARTBOUNDS_H`

Reorganize partitioning code There's been a massive addition of partitioning code in PostgreSQL 11, with little oversight on its placement, resulting in a catalog/partition.c with poorly defined boundaries and responsibilities. This commit tries to set a couple of distinct modules to separate things a little bit. There are no code changes here, only code movement. There are three new files: src/backend/utils/cache/partcache.c src/include/partitioning/partdefs.h src/include/utils/partcache.h The previous arrangement of #including catalog/partition.h almost everywhere is no more. Authors: Amit Langote and Álvaro Herrera Discussion: https://postgr.es/m/98e8d509-790a-128c-be7f-e48a5b2d8d97@lab.ntt.co.jp https://postgr.es/m/11aa0c50-316b-18bb-722d-c23814f39059@lab.ntt.co.jp https://postgr.es/m/143ed9a4-6038-76d4-9a55-502035815e68@lab.ntt.co.jp https://postgr.es/m/20180413193503.nynq7bnmgh6vs5vm@alvherre.pgsql 8 years ago			`#include "fmgr.h"`
			`#include "nodes/parsenodes.h"`
			`#include "nodes/pg_list.h"`
			`#include "partitioning/partdefs.h"`
			`#include "utils/relcache.h"`
Faster partition pruning Add a new module backend/partitioning/partprune.c, implementing a more sophisticated algorithm for partition pruning. The new module uses each partition's "boundinfo" for pruning instead of constraint exclusion, based on an idea proposed by Robert Haas of a "pruning program": a list of steps generated from the query quals which are run iteratively to obtain a list of partitions that must be scanned in order to satisfy those quals. At present, this targets planner-time partition pruning, but there exist further patches to apply partition pruning at execution time as well. This commit also moves some definitions from include/catalog/partition.h to a new file include/partitioning/partbounds.h, in an attempt to rationalize partitioning related code. Authors: Amit Langote, David Rowley, Dilip Kumar Reviewers: Robert Haas, Kyotaro Horiguchi, Ashutosh Bapat, Jesper Pedersen. Discussion: https://postgr.es/m/098b9c71-1915-1a2a-8d52-1a7a50ce79e8@lab.ntt.co.jp 8 years ago

			`/*`
			`* PartitionBoundInfoData encapsulates a set of partition bounds. It is`
			`* usually associated with partitioned tables as part of its partition`
			`* descriptor, but may also be used to represent a virtual partitioned`
			`* table such as a partitioned joinrel within the planner.`
			`*`
			`* A list partition datum that is known to be NULL is never put into the`
			`* datums array. Instead, it is tracked using the null_index field.`
			`*`
			`* In the case of range partitioning, ndatums will typically be far less than`
			`* 2 * nparts, because a partition's upper bound and the next partition's lower`
			`* bound are the same in most common cases, and we only store one of them (the`
			`* upper bound). In case of hash partitioning, ndatums will be same as the`
			`* number of partitions.`
			`*`
			`* For range and list partitioned tables, datums is an array of datum-tuples`
			`* with key->partnatts datums each. For hash partitioned tables, it is an array`
			`* of datum-tuples with 2 datums, modulus and remainder, corresponding to a`
			`* given partition.`
			`*`
			`* The datums in datums array are arranged in increasing order as defined by`
			`* functions qsort_partition_rbound_cmp(), qsort_partition_list_value_cmp() and`
			`* qsort_partition_hbound_cmp() for range, list and hash partitioned tables`
			`* respectively. For range and list partitions this simply means that the`
			`* datums in the datums array are arranged in increasing order as defined by`
			`* the partition key's operator classes and collations.`
			`*`
			`* In the case of list partitioning, the indexes array stores one entry for`
			`* every datum, which is the index of the partition that accepts a given datum.`
			`* In case of range partitioning, it stores one entry per distinct range`
			`* datum, which is the index of the partition for which a given datum`
			`* is an upper bound. In the case of hash partitioning, the number of the`
			`* entries in the indexes array is same as the greatest modulus amongst all`
			`* partitions. For a given partition key datum-tuple, the index of the`
			`* partition which would accept that datum-tuple would be given by the entry`
			`* pointed by remainder produced when hash value of the datum-tuple is divided`
			`* by the greatest modulus.`
			`*/`

			`typedef struct PartitionBoundInfoData`
			`{`
			`char strategy; /* hash, list or range? */`
			`int ndatums; /* Length of the datums following array */`
			`Datum **datums;`
			`PartitionRangeDatumKind *kind; / The kind of each range bound datum;`
			`* NULL for hash and list partitioned`
			`* tables */`
			`int indexes; / Partition indexes */`
			`int null_index; /* Index of the null-accepting partition; -1`
			`* if there isn't one */`
			`int default_index; /* Index of the default partition; -1 if there`
			`* isn't one */`
			`} PartitionBoundInfoData;`

			`#define partition_bound_accepts_nulls(bi) ((bi)->null_index != -1)`
			`#define partition_bound_has_default(bi) ((bi)->default_index != -1)`

			`extern int get_hash_partition_greatest_modulus(PartitionBoundInfo b);`
Fix some ill-chosen names for globally-visible partition support functions. "compute_hash_value" is particularly gratuitously generic, but IMO all of these ought to have names clearly related to partitioning. 8 years ago			`extern uint64 compute_partition_hash_value(int partnatts, FmgrInfo *partsupfunc,`
Collations with nondeterministic comparison This adds a flag "deterministic" to collations. If that is false, such a collation disables various optimizations that assume that strings are equal only if they are byte-wise equal. That then allows use cases such as case-insensitive or accent-insensitive comparisons or handling of strings with different Unicode normal forms. This functionality is only supported with the ICU provider. At least glibc doesn't appear to have any locales that work in a nondeterministic way, so it's not worth supporting this for the libc provider. The term "deterministic comparison" in this context is from Unicode Technical Standard #10 (https://unicode.org/reports/tr10/#Deterministic_Comparison). This patch makes changes in three areas: - CREATE COLLATION DDL changes and system catalog changes to support this new flag. - Many executor nodes and auxiliary code are extended to track collations. Previously, this code would just throw away collation information, because the eventually-called user-defined functions didn't use it since they only cared about equality, which didn't need collation information. - String data type functions that do equality comparisons and hashing are changed to take the (non-)deterministic flag into account. For comparison, this just means skipping various shortcuts and tie breakers that use byte-wise comparison. For hashing, we first need to convert the input string to a canonical "sort key" using the ICU analogue of strxfrm(). Reviewed-by: Daniel Verite <daniel@manitou-mail.org> Reviewed-by: Peter Geoghegan <pg@bowt.ie> Discussion: https://www.postgresql.org/message-id/flat/1ccc668f-4cbc-0bef-af67-450b47cdfee7@2ndquadrant.com 7 years ago			`Oid *partcollation,`
Fix some ill-chosen names for globally-visible partition support functions. "compute_hash_value" is particularly gratuitously generic, but IMO all of these ought to have names clearly related to partitioning. 8 years ago			`Datum values, bool isnull);`
Reorganize partitioning code There's been a massive addition of partitioning code in PostgreSQL 11, with little oversight on its placement, resulting in a catalog/partition.c with poorly defined boundaries and responsibilities. This commit tries to set a couple of distinct modules to separate things a little bit. There are no code changes here, only code movement. There are three new files: src/backend/utils/cache/partcache.c src/include/partitioning/partdefs.h src/include/utils/partcache.h The previous arrangement of #including catalog/partition.h almost everywhere is no more. Authors: Amit Langote and Álvaro Herrera Discussion: https://postgr.es/m/98e8d509-790a-128c-be7f-e48a5b2d8d97@lab.ntt.co.jp https://postgr.es/m/11aa0c50-316b-18bb-722d-c23814f39059@lab.ntt.co.jp https://postgr.es/m/143ed9a4-6038-76d4-9a55-502035815e68@lab.ntt.co.jp https://postgr.es/m/20180413193503.nynq7bnmgh6vs5vm@alvherre.pgsql 8 years ago			`extern List *get_qual_from_partbound(Relation rel, Relation parent,`
			`PartitionBoundSpec *spec);`
Reduce unnecessary list construction in RelationBuildPartitionDesc. The 'partoids' list which was constructed by the previous version of this code was necessarily identical to 'inhoids'. There's no point to duplicating the list, so avoid that. Instead, construct the array representation directly from the original 'inhoids' list. Also, use an array rather than a list for 'boundspecs'. We know exactly how many items we need to store, so there's really no reason to use a list. Using an array instead reduces the number of memory allocations we perform. Patch by me, reviewed by Michael Paquier and Amit Langote, the latter of whom also helped with rebasing. 7 years ago			`extern PartitionBoundInfo partition_bounds_create(PartitionBoundSpec **boundspecs,`
			`int nparts, PartitionKey key, int **mapping);`
Reorganize partitioning code There's been a massive addition of partitioning code in PostgreSQL 11, with little oversight on its placement, resulting in a catalog/partition.c with poorly defined boundaries and responsibilities. This commit tries to set a couple of distinct modules to separate things a little bit. There are no code changes here, only code movement. There are three new files: src/backend/utils/cache/partcache.c src/include/partitioning/partdefs.h src/include/utils/partcache.h The previous arrangement of #including catalog/partition.h almost everywhere is no more. Authors: Amit Langote and Álvaro Herrera Discussion: https://postgr.es/m/98e8d509-790a-128c-be7f-e48a5b2d8d97@lab.ntt.co.jp https://postgr.es/m/11aa0c50-316b-18bb-722d-c23814f39059@lab.ntt.co.jp https://postgr.es/m/143ed9a4-6038-76d4-9a55-502035815e68@lab.ntt.co.jp https://postgr.es/m/20180413193503.nynq7bnmgh6vs5vm@alvherre.pgsql 8 years ago			`extern bool partition_bounds_equal(int partnatts, int16 *parttyplen,`
			`bool *parttypbyval, PartitionBoundInfo b1,`
			`PartitionBoundInfo b2);`
			`extern PartitionBoundInfo partition_bounds_copy(PartitionBoundInfo src,`
			`PartitionKey key);`
Use Append rather than MergeAppend for scanning ordered partitions. If we need ordered output from a scan of a partitioned table, but the ordering matches the partition ordering, then we don't need to use a MergeAppend to combine the pre-ordered per-partition scan results: a plain Append will produce the same results. This both saves useless comparison work inside the MergeAppend proper, and allows us to start returning tuples after istarting up just the first child node not all of them. However, all is not peaches and cream, because if some of the child nodes have high startup costs then there will be big discontinuities in the tuples-returned-versus-elapsed-time curve. The planner's cost model cannot handle that (yet, anyway). If we model the Append's startup cost as being just the first child's startup cost, we may drastically underestimate the cost of fetching slightly more tuples than are available from the first child. Since we've had bad experiences with over-optimistic choices of "fast start" plans for ORDER BY LIMIT queries, that seems scary. As a klugy workaround, set the startup cost estimate for an ordered Append to be the sum of its children's startup costs (as MergeAppend would). This doesn't really describe reality, but it's less likely to cause a bad plan choice than an underestimated startup cost would. In practice, the cases where we really care about this optimization will have child plans that are IndexScans with zero startup cost, so that the overly conservative estimate is still just zero. David Rowley, reviewed by Julien Rouhaud and Antonin Houska Discussion: https://postgr.es/m/CAKJS1f-hAqhPLRk_RaSFTgYxd=Tz5hA7kQ2h4-DhJufQk8TGuw@mail.gmail.com 7 years ago			`extern bool partitions_are_ordered(PartitionBoundInfo boundinfo, int nparts);`
Reorganize partitioning code There's been a massive addition of partitioning code in PostgreSQL 11, with little oversight on its placement, resulting in a catalog/partition.c with poorly defined boundaries and responsibilities. This commit tries to set a couple of distinct modules to separate things a little bit. There are no code changes here, only code movement. There are three new files: src/backend/utils/cache/partcache.c src/include/partitioning/partdefs.h src/include/utils/partcache.h The previous arrangement of #including catalog/partition.h almost everywhere is no more. Authors: Amit Langote and Álvaro Herrera Discussion: https://postgr.es/m/98e8d509-790a-128c-be7f-e48a5b2d8d97@lab.ntt.co.jp https://postgr.es/m/11aa0c50-316b-18bb-722d-c23814f39059@lab.ntt.co.jp https://postgr.es/m/143ed9a4-6038-76d4-9a55-502035815e68@lab.ntt.co.jp https://postgr.es/m/20180413193503.nynq7bnmgh6vs5vm@alvherre.pgsql 8 years ago			`extern void check_new_partition_bound(char *relname, Relation parent,`
			`PartitionBoundSpec *spec);`
Fix some ill-chosen names for globally-visible partition support functions. "compute_hash_value" is particularly gratuitously generic, but IMO all of these ought to have names clearly related to partitioning. 8 years ago			`extern void check_default_partition_contents(Relation parent,`
			`Relation defaultRel,`
			`PartitionBoundSpec *new_spec);`
Reorganize partitioning code There's been a massive addition of partitioning code in PostgreSQL 11, with little oversight on its placement, resulting in a catalog/partition.c with poorly defined boundaries and responsibilities. This commit tries to set a couple of distinct modules to separate things a little bit. There are no code changes here, only code movement. There are three new files: src/backend/utils/cache/partcache.c src/include/partitioning/partdefs.h src/include/utils/partcache.h The previous arrangement of #including catalog/partition.h almost everywhere is no more. Authors: Amit Langote and Álvaro Herrera Discussion: https://postgr.es/m/98e8d509-790a-128c-be7f-e48a5b2d8d97@lab.ntt.co.jp https://postgr.es/m/11aa0c50-316b-18bb-722d-c23814f39059@lab.ntt.co.jp https://postgr.es/m/143ed9a4-6038-76d4-9a55-502035815e68@lab.ntt.co.jp https://postgr.es/m/20180413193503.nynq7bnmgh6vs5vm@alvherre.pgsql 8 years ago
			`extern int32 partition_rbound_datum_cmp(FmgrInfo *partsupfunc,`
			`Oid *partcollation,`
			`Datum rb_datums, PartitionRangeDatumKind rb_kind,`
			`Datum *tuple_datums, int n_tuple_datums);`
			`extern int partition_list_bsearch(FmgrInfo *partsupfunc,`
			`Oid *partcollation,`
Faster partition pruning Add a new module backend/partitioning/partprune.c, implementing a more sophisticated algorithm for partition pruning. The new module uses each partition's "boundinfo" for pruning instead of constraint exclusion, based on an idea proposed by Robert Haas of a "pruning program": a list of steps generated from the query quals which are run iteratively to obtain a list of partitions that must be scanned in order to satisfy those quals. At present, this targets planner-time partition pruning, but there exist further patches to apply partition pruning at execution time as well. This commit also moves some definitions from include/catalog/partition.h to a new file include/partitioning/partbounds.h, in an attempt to rationalize partitioning related code. Authors: Amit Langote, David Rowley, Dilip Kumar Reviewers: Robert Haas, Kyotaro Horiguchi, Ashutosh Bapat, Jesper Pedersen. Discussion: https://postgr.es/m/098b9c71-1915-1a2a-8d52-1a7a50ce79e8@lab.ntt.co.jp 8 years ago			`PartitionBoundInfo boundinfo,`
			`Datum value, bool *is_equal);`
			`extern int partition_range_datum_bsearch(FmgrInfo *partsupfunc,`
			`Oid *partcollation,`
			`PartitionBoundInfo boundinfo,`
			`int nvalues, Datum values, bool is_equal);`
			`extern int partition_hash_bsearch(PartitionBoundInfo boundinfo,`
			`int modulus, int remainder);`

			`#endif /* PARTBOUNDS_H */`