@ -36,6 +36,61 @@
# include "utils/ruleutils.h"
# include "utils/syscache.h"
/*
* When qsort ' ing partition bounds after reading from the catalog , each bound
* is represented with one of the following structs .
*/
/* One bound of a hash partition */
typedef struct PartitionHashBound
{
int modulus ;
int remainder ;
int index ;
} PartitionHashBound ;
/* One value coming from some (index'th) list partition */
typedef struct PartitionListValue
{
int index ;
Datum value ;
} PartitionListValue ;
/* One bound of a range partition */
typedef struct PartitionRangeBound
{
int index ;
Datum * datums ; /* range bound datums */
PartitionRangeDatumKind * kind ; /* the kind of each datum */
bool lower ; /* this is the lower (vs upper) bound */
} PartitionRangeBound ;
static int32 qsort_partition_hbound_cmp ( const void * a , const void * b ) ;
static int32 qsort_partition_list_value_cmp ( const void * a , const void * b ,
void * arg ) ;
static int32 qsort_partition_rbound_cmp ( const void * a , const void * b ,
void * arg ) ;
static PartitionBoundInfo create_hash_bounds ( List * boundspecs ,
PartitionKey key ,
int * * mapping ) ;
static PartitionBoundInfo create_list_bounds ( List * boundspecs ,
PartitionKey key ,
int * * mapping ) ;
static PartitionBoundInfo create_range_bounds ( List * boundspecs ,
PartitionKey key ,
int * * mapping ) ;
static PartitionRangeBound * make_one_partition_rbound ( PartitionKey key , int index ,
List * datums , bool lower ) ;
static int32 partition_hbound_cmp ( int modulus1 , int remainder1 , int modulus2 ,
int remainder2 ) ;
static int32 partition_rbound_cmp ( int partnatts , FmgrInfo * partsupfunc ,
Oid * partcollation , Datum * datums1 ,
PartitionRangeDatumKind * kind1 , bool lower1 ,
PartitionRangeBound * b2 ) ;
static int partition_range_bsearch ( int partnatts , FmgrInfo * partsupfunc ,
Oid * partcollation ,
PartitionBoundInfo boundinfo ,
PartitionRangeBound * probe , bool * is_equal ) ;
static int get_partition_bound_num_indexes ( PartitionBoundInfo b ) ;
static Expr * make_partition_op_expr ( PartitionKey key , int keynum ,
uint16 strategy , Expr * arg1 , Expr * arg2 ) ;
@ -92,6 +147,521 @@ get_qual_from_partbound(Relation rel, Relation parent,
return my_qual ;
}
/*
* partition_bounds_create
* Build a PartitionBoundInfo struct from a list of PartitionBoundSpec
* nodes
*
* This function creates a PartitionBoundInfo and fills the values of its
* various members based on the input list . Importantly , ' datums ' array will
* contain Datum representation of individual bounds ( possibly after
* de - duplication as in case of range bounds ) , sorted in a canonical order
* defined by qsort_partition_ * functions of respective partitioning methods .
* ' indexes ' array will contain as many elements as there are bounds ( specific
* exceptions to this rule are listed in the function body ) , which represent
* the 0 - based canonical positions of partitions .
*
* Upon return from this function , * mapping is set to an array of
* list_length ( boundspecs ) elements , each of which maps the original index of
* a partition to its canonical index .
*
* Note : The objects returned by this function are wholly allocated in the
* current memory context .
*/
PartitionBoundInfo
partition_bounds_create ( List * boundspecs , PartitionKey key , int * * mapping )
{
int nparts = list_length ( boundspecs ) ;
int i ;
Assert ( nparts > 0 ) ;
/*
* For each partitioning method , we first convert the partition bounds
* from their parser node representation to the internal representation ,
* along with any additional preprocessing ( such as de - duplicating range
* bounds ) . Resulting bound datums are then added to the ' datums ' array
* in PartitionBoundInfo . For each datum added , an integer indicating the
* canonical partition index is added to the ' indexes ' array .
*
* For each bound , we remember its partition ' s position ( 0 - based ) in the
* original list to later map it to the canonical index .
*/
/*
* Initialize mapping array with invalid values , this is filled within
* each sub - routine below depending on the bound type .
*/
* mapping = ( int * ) palloc ( sizeof ( int ) * nparts ) ;
for ( i = 0 ; i < nparts ; i + + )
( * mapping ) [ i ] = - 1 ;
switch ( key - > strategy )
{
case PARTITION_STRATEGY_HASH :
return create_hash_bounds ( boundspecs , key , mapping ) ;
case PARTITION_STRATEGY_LIST :
return create_list_bounds ( boundspecs , key , mapping ) ;
case PARTITION_STRATEGY_RANGE :
return create_range_bounds ( boundspecs , key , mapping ) ;
default :
elog ( ERROR , " unexpected partition strategy: %d " ,
( int ) key - > strategy ) ;
break ;
}
Assert ( false ) ;
return NULL ; /* keep compiler quiet */
}
/*
* create_hash_bounds
* Create a PartitionBoundInfo for a hash partitioned table
*/
static PartitionBoundInfo
create_hash_bounds ( List * boundspecs , PartitionKey key , int * * mapping )
{
PartitionBoundInfo boundinfo ;
PartitionHashBound * * hbounds = NULL ;
ListCell * cell ;
int i ,
nparts = list_length ( boundspecs ) ;
int ndatums = 0 ;
int greatest_modulus ;
boundinfo = ( PartitionBoundInfoData * )
palloc0 ( sizeof ( PartitionBoundInfoData ) ) ;
boundinfo - > strategy = key - > strategy ;
/* No special hash partitions. */
boundinfo - > null_index = - 1 ;
boundinfo - > default_index = - 1 ;
ndatums = nparts ;
hbounds = ( PartitionHashBound * * )
palloc ( nparts * sizeof ( PartitionHashBound * ) ) ;
/* Convert from node to the internal representation */
i = 0 ;
foreach ( cell , boundspecs )
{
PartitionBoundSpec * spec = castNode ( PartitionBoundSpec , lfirst ( cell ) ) ;
if ( spec - > strategy ! = PARTITION_STRATEGY_HASH )
elog ( ERROR , " invalid strategy in partition bound spec " ) ;
hbounds [ i ] = ( PartitionHashBound * ) palloc ( sizeof ( PartitionHashBound ) ) ;
hbounds [ i ] - > modulus = spec - > modulus ;
hbounds [ i ] - > remainder = spec - > remainder ;
hbounds [ i ] - > index = i ;
i + + ;
}
/* Sort all the bounds in ascending order */
qsort ( hbounds , nparts , sizeof ( PartitionHashBound * ) ,
qsort_partition_hbound_cmp ) ;
/* After sorting, moduli are now stored in ascending order. */
greatest_modulus = hbounds [ ndatums - 1 ] - > modulus ;
boundinfo - > ndatums = ndatums ;
boundinfo - > datums = ( Datum * * ) palloc0 ( ndatums * sizeof ( Datum * ) ) ;
boundinfo - > indexes = ( int * ) palloc ( greatest_modulus * sizeof ( int ) ) ;
for ( i = 0 ; i < greatest_modulus ; i + + )
boundinfo - > indexes [ i ] = - 1 ;
/*
* For hash partitioning , there are as many datums ( modulus and remainder
* pairs ) as there are partitions . Indexes are simply values ranging from
* 0 to ( nparts - 1 ) .
*/
for ( i = 0 ; i < nparts ; i + + )
{
int modulus = hbounds [ i ] - > modulus ;
int remainder = hbounds [ i ] - > remainder ;
boundinfo - > datums [ i ] = ( Datum * ) palloc ( 2 * sizeof ( Datum ) ) ;
boundinfo - > datums [ i ] [ 0 ] = Int32GetDatum ( modulus ) ;
boundinfo - > datums [ i ] [ 1 ] = Int32GetDatum ( remainder ) ;
while ( remainder < greatest_modulus )
{
/* overlap? */
Assert ( boundinfo - > indexes [ remainder ] = = - 1 ) ;
boundinfo - > indexes [ remainder ] = i ;
remainder + = modulus ;
}
( * mapping ) [ hbounds [ i ] - > index ] = i ;
pfree ( hbounds [ i ] ) ;
}
pfree ( hbounds ) ;
return boundinfo ;
}
/*
* create_list_bounds
* Create a PartitionBoundInfo for a list partitioned table
*/
static PartitionBoundInfo
create_list_bounds ( List * boundspecs , PartitionKey key , int * * mapping )
{
PartitionBoundInfo boundinfo ;
PartitionListValue * * all_values = NULL ;
ListCell * cell ;
int i = 0 ;
int ndatums = 0 ;
int next_index = 0 ;
int default_index = - 1 ;
int null_index = - 1 ;
List * non_null_values = NIL ;
boundinfo = ( PartitionBoundInfoData * )
palloc0 ( sizeof ( PartitionBoundInfoData ) ) ;
boundinfo - > strategy = key - > strategy ;
/* Will be set correctly below. */
boundinfo - > null_index = - 1 ;
boundinfo - > default_index = - 1 ;
/* Create a unified list of non-null values across all partitions. */
foreach ( cell , boundspecs )
{
PartitionBoundSpec * spec = castNode ( PartitionBoundSpec , lfirst ( cell ) ) ;
ListCell * c ;
if ( spec - > strategy ! = PARTITION_STRATEGY_LIST )
elog ( ERROR , " invalid strategy in partition bound spec " ) ;
/*
* Note the index of the partition bound spec for the default
* partition . There ' s no datum to add to the list on non - null datums
* for this partition .
*/
if ( spec - > is_default )
{
default_index = i ;
i + + ;
continue ;
}
foreach ( c , spec - > listdatums )
{
Const * val = castNode ( Const , lfirst ( c ) ) ;
PartitionListValue * list_value = NULL ;
if ( ! val - > constisnull )
{
list_value = ( PartitionListValue * )
palloc0 ( sizeof ( PartitionListValue ) ) ;
list_value - > index = i ;
list_value - > value = val - > constvalue ;
}
else
{
/*
* Never put a null into the values array , flag instead for
* the code further down below where we construct the actual
* relcache struct .
*/
if ( null_index ! = - 1 )
elog ( ERROR , " found null more than once " ) ;
null_index = i ;
}
if ( list_value )
non_null_values = lappend ( non_null_values , list_value ) ;
}
i + + ;
}
ndatums = list_length ( non_null_values ) ;
/*
* Collect all list values in one array . Alongside the value , we also save
* the index of partition the value comes from .
*/
all_values = ( PartitionListValue * * )
palloc ( ndatums * sizeof ( PartitionListValue * ) ) ;
i = 0 ;
foreach ( cell , non_null_values )
{
PartitionListValue * src = lfirst ( cell ) ;
all_values [ i ] = ( PartitionListValue * )
palloc ( sizeof ( PartitionListValue ) ) ;
all_values [ i ] - > value = src - > value ;
all_values [ i ] - > index = src - > index ;
i + + ;
}
qsort_arg ( all_values , ndatums , sizeof ( PartitionListValue * ) ,
qsort_partition_list_value_cmp , ( void * ) key ) ;
boundinfo - > ndatums = ndatums ;
boundinfo - > datums = ( Datum * * ) palloc0 ( ndatums * sizeof ( Datum * ) ) ;
boundinfo - > indexes = ( int * ) palloc ( ndatums * sizeof ( int ) ) ;
/*
* Copy values . Canonical indexes are values ranging from 0 to ( nparts -
* 1 ) assigned to each partition such that all datums of a given partition
* receive the same value . The value for a given partition is the index of
* that partition ' s smallest datum in the all_values [ ] array .
*/
for ( i = 0 ; i < ndatums ; i + + )
{
int orig_index = all_values [ i ] - > index ;
boundinfo - > datums [ i ] = ( Datum * ) palloc ( sizeof ( Datum ) ) ;
boundinfo - > datums [ i ] [ 0 ] = datumCopy ( all_values [ i ] - > value ,
key - > parttypbyval [ 0 ] ,
key - > parttyplen [ 0 ] ) ;
/* If the old index has no mapping, assign one */
if ( ( * mapping ) [ orig_index ] = = - 1 )
( * mapping ) [ orig_index ] = next_index + + ;
boundinfo - > indexes [ i ] = ( * mapping ) [ orig_index ] ;
}
/*
* Set the canonical value for null_index , if any .
*
* It is possible that the null - accepting partition has not been assigned
* an index yet , which could happen if such partition accepts only null
* and hence not handled in the above loop which only looked at non - null
* values .
*/
if ( null_index ! = - 1 )
{
Assert ( null_index > = 0 ) ;
if ( ( * mapping ) [ null_index ] = = - 1 )
( * mapping ) [ null_index ] = next_index + + ;
boundinfo - > null_index = ( * mapping ) [ null_index ] ;
}
/* Set the canonical value for default_index, if any. */
if ( default_index ! = - 1 )
{
/*
* The default partition accepts any value not specified in the lists
* of other partitions , hence it should not get mapped index while
* assigning those for non - null datums .
*/
Assert ( default_index > = 0 ) ;
Assert ( ( * mapping ) [ default_index ] = = - 1 ) ;
( * mapping ) [ default_index ] = next_index + + ;
boundinfo - > default_index = ( * mapping ) [ default_index ] ;
}
/* All partition must now have been assigned canonical indexes. */
Assert ( next_index = = list_length ( boundspecs ) ) ;
return boundinfo ;
}
/*
* create_range_bounds
* Create a PartitionBoundInfo for a range partitioned table
*/
static PartitionBoundInfo
create_range_bounds ( List * boundspecs , PartitionKey key , int * * mapping )
{
PartitionBoundInfo boundinfo ;
PartitionRangeBound * * rbounds = NULL ;
PartitionRangeBound * * all_bounds ,
* prev ;
ListCell * cell ;
int i ,
k ,
nparts = list_length ( boundspecs ) ;
int ndatums = 0 ;
int default_index = - 1 ;
int next_index = 0 ;
boundinfo = ( PartitionBoundInfoData * )
palloc0 ( sizeof ( PartitionBoundInfoData ) ) ;
boundinfo - > strategy = key - > strategy ;
/* There is no special null-accepting range partition. */
boundinfo - > null_index = - 1 ;
/* Will be set correctly below. */
boundinfo - > default_index = - 1 ;
all_bounds = ( PartitionRangeBound * * )
palloc0 ( 2 * nparts * sizeof ( PartitionRangeBound * ) ) ;
/* Create a unified list of range bounds across all the partitions. */
i = ndatums = 0 ;
foreach ( cell , boundspecs )
{
PartitionBoundSpec * spec = castNode ( PartitionBoundSpec , lfirst ( cell ) ) ;
PartitionRangeBound * lower ,
* upper ;
if ( spec - > strategy ! = PARTITION_STRATEGY_RANGE )
elog ( ERROR , " invalid strategy in partition bound spec " ) ;
/*
* Note the index of the partition bound spec for the default
* partition . There ' s no datum to add to the all_bounds array for
* this partition .
*/
if ( spec - > is_default )
{
default_index = i + + ;
continue ;
}
lower = make_one_partition_rbound ( key , i , spec - > lowerdatums , true ) ;
upper = make_one_partition_rbound ( key , i , spec - > upperdatums , false ) ;
all_bounds [ ndatums + + ] = lower ;
all_bounds [ ndatums + + ] = upper ;
i + + ;
}
Assert ( ndatums = = nparts * 2 | |
( default_index ! = - 1 & & ndatums = = ( nparts - 1 ) * 2 ) ) ;
/* Sort all the bounds in ascending order */
qsort_arg ( all_bounds , ndatums ,
sizeof ( PartitionRangeBound * ) ,
qsort_partition_rbound_cmp ,
( void * ) key ) ;
/* Save distinct bounds from all_bounds into rbounds. */
rbounds = ( PartitionRangeBound * * )
palloc ( ndatums * sizeof ( PartitionRangeBound * ) ) ;
k = 0 ;
prev = NULL ;
for ( i = 0 ; i < ndatums ; i + + )
{
PartitionRangeBound * cur = all_bounds [ i ] ;
bool is_distinct = false ;
int j ;
/* Is the current bound distinct from the previous one? */
for ( j = 0 ; j < key - > partnatts ; j + + )
{
Datum cmpval ;
if ( prev = = NULL | | cur - > kind [ j ] ! = prev - > kind [ j ] )
{
is_distinct = true ;
break ;
}
/*
* If the bounds are both MINVALUE or MAXVALUE , stop now and treat
* them as equal , since any values after this point must be
* ignored .
*/
if ( cur - > kind [ j ] ! = PARTITION_RANGE_DATUM_VALUE )
break ;
cmpval = FunctionCall2Coll ( & key - > partsupfunc [ j ] ,
key - > partcollation [ j ] ,
cur - > datums [ j ] ,
prev - > datums [ j ] ) ;
if ( DatumGetInt32 ( cmpval ) ! = 0 )
{
is_distinct = true ;
break ;
}
}
/*
* Only if the bound is distinct save it into a temporary array , i . e ,
* rbounds which is later copied into boundinfo datums array .
*/
if ( is_distinct )
rbounds [ k + + ] = all_bounds [ i ] ;
prev = cur ;
}
/* Update ndatums to hold the count of distinct datums. */
ndatums = k ;
/*
* Add datums to boundinfo . Canonical indexes are values ranging from 0
* to nparts - 1 , assigned in that order to each partition ' s upper bound .
* For ' datums ' elements that are lower bounds , there is - 1 in the
* ' indexes ' array to signify that no partition exists for the values less
* than such a bound and greater than or equal to the previous upper
* bound .
*/
boundinfo - > ndatums = ndatums ;
boundinfo - > datums = ( Datum * * ) palloc0 ( ndatums * sizeof ( Datum * ) ) ;
boundinfo - > kind = ( PartitionRangeDatumKind * * )
palloc ( ndatums *
sizeof ( PartitionRangeDatumKind * ) ) ;
/*
* For range partitioning , an additional value of - 1 is stored as the last
* element .
*/
boundinfo - > indexes = ( int * ) palloc ( ( ndatums + 1 ) * sizeof ( int ) ) ;
for ( i = 0 ; i < ndatums ; i + + )
{
int j ;
boundinfo - > datums [ i ] = ( Datum * ) palloc ( key - > partnatts *
sizeof ( Datum ) ) ;
boundinfo - > kind [ i ] = ( PartitionRangeDatumKind * )
palloc ( key - > partnatts *
sizeof ( PartitionRangeDatumKind ) ) ;
for ( j = 0 ; j < key - > partnatts ; j + + )
{
if ( rbounds [ i ] - > kind [ j ] = = PARTITION_RANGE_DATUM_VALUE )
boundinfo - > datums [ i ] [ j ] =
datumCopy ( rbounds [ i ] - > datums [ j ] ,
key - > parttypbyval [ j ] ,
key - > parttyplen [ j ] ) ;
boundinfo - > kind [ i ] [ j ] = rbounds [ i ] - > kind [ j ] ;
}
/*
* There is no mapping for invalid indexes .
*
* Any lower bounds in the rbounds array have invalid indexes
* assigned , because the values between the previous bound ( if there
* is one ) and this ( lower ) bound are not part of the range of any
* existing partition .
*/
if ( rbounds [ i ] - > lower )
boundinfo - > indexes [ i ] = - 1 ;
else
{
int orig_index = rbounds [ i ] - > index ;
/* If the old index has no mapping, assign one */
if ( ( * mapping ) [ orig_index ] = = - 1 )
( * mapping ) [ orig_index ] = next_index + + ;
boundinfo - > indexes [ i ] = ( * mapping ) [ orig_index ] ;
}
}
/* Set the canonical value for default_index, if any. */
if ( default_index ! = - 1 )
{
Assert ( default_index > = 0 & & ( * mapping ) [ default_index ] = = - 1 ) ;
( * mapping ) [ default_index ] = next_index + + ;
boundinfo - > default_index = ( * mapping ) [ default_index ] ;
}
/* The extra -1 element. */
Assert ( i = = ndatums ) ;
boundinfo - > indexes [ i ] = - 1 ;
/* All partition must now have been assigned canonical indexes. */
Assert ( next_index = = nparts ) ;
return boundinfo ;
}
/*
* Are two partition bound collections logically equal ?
*
@ -763,7 +1333,7 @@ get_hash_partition_greatest_modulus(PartitionBoundInfo bound)
* and a flag telling whether the bound is lower or not . Made into a function
* because there are multiple sites that want to use this facility .
*/
PartitionRangeBound *
static PartitionRangeBound *
make_one_partition_rbound ( PartitionKey key , int index , List * datums , bool lower )
{
PartitionRangeBound * bound ;
@ -819,7 +1389,7 @@ make_one_partition_rbound(PartitionKey key, int index, List *datums, bool lower)
* structure , which only stores the upper bound of a common boundary between
* two contiguous partitions .
*/
int32
static int32
partition_rbound_cmp ( int partnatts , FmgrInfo * partsupfunc ,
Oid * partcollation ,
Datum * datums1 , PartitionRangeDatumKind * kind1 ,
@ -914,7 +1484,7 @@ partition_rbound_datum_cmp(FmgrInfo *partsupfunc, Oid *partcollation,
*
* Compares modulus first , then remainder if modulus is equal .
*/
int32
static int32
partition_hbound_cmp ( int modulus1 , int remainder1 , int modulus2 , int remainder2 )
{
if ( modulus1 < modulus2 )
@ -977,7 +1547,7 @@ partition_list_bsearch(FmgrInfo *partsupfunc, Oid *partcollation,
* * is_equal is set to true if the range bound at the returned index is equal
* to the input range bound
*/
int
static int
partition_range_bsearch ( int partnatts , FmgrInfo * partsupfunc ,
Oid * partcollation ,
PartitionBoundInfo boundinfo ,
@ -1101,6 +1671,55 @@ partition_hash_bsearch(PartitionBoundInfo boundinfo,
return lo ;
}
/*
* qsort_partition_hbound_cmp
*
* Hash bounds are sorted by modulus , then by remainder .
*/
static int32
qsort_partition_hbound_cmp ( const void * a , const void * b )
{
PartitionHashBound * h1 = ( * ( PartitionHashBound * const * ) a ) ;
PartitionHashBound * h2 = ( * ( PartitionHashBound * const * ) b ) ;
return partition_hbound_cmp ( h1 - > modulus , h1 - > remainder ,
h2 - > modulus , h2 - > remainder ) ;
}
/*
* qsort_partition_list_value_cmp
*
* Compare two list partition bound datums .
*/
static int32
qsort_partition_list_value_cmp ( const void * a , const void * b , void * arg )
{
Datum val1 = ( * ( const PartitionListValue * * ) a ) - > value ,
val2 = ( * ( const PartitionListValue * * ) b ) - > value ;
PartitionKey key = ( PartitionKey ) arg ;
return DatumGetInt32 ( FunctionCall2Coll ( & key - > partsupfunc [ 0 ] ,
key - > partcollation [ 0 ] ,
val1 , val2 ) ) ;
}
/*
* qsort_partition_rbound_cmp
*
* Used when sorting range bounds across all range partitions .
*/
static int32
qsort_partition_rbound_cmp ( const void * a , const void * b , void * arg )
{
PartitionRangeBound * b1 = ( * ( PartitionRangeBound * const * ) a ) ;
PartitionRangeBound * b2 = ( * ( PartitionRangeBound * const * ) b ) ;
PartitionKey key = ( PartitionKey ) arg ;
return partition_rbound_cmp ( key - > partnatts , key - > partsupfunc ,
key - > partcollation , b1 - > datums , b1 - > kind ,
b1 - > lower , b2 ) ;
}
/*
* get_partition_bound_num_indexes
*