Further cleanup of gistsplit.c.

After further reflection I was unconvinced that the existing coding is
guaranteed to return valid union datums in every code path for multi-column
indexes.  Fix that by forcing a gistunionsubkey() call at the end of the
recursion.  Having done that, we can remove some clearly-redundant calls
elsewhere.  This should be a little faster for multi-column indexes (since
the previous coding would uselessly do such a call for each column while
unwinding the recursion), as well as much harder to break.

Also, simplify the handling of cases where one side or the other of a
primary split contains only don't-care tuples.  The previous coding used a
very ugly hack in removeDontCares() that essentially forced one random
tuple to be treated as non-don't-care, providing a random initial choice of
seed datum for the secondary split.  It seems unlikely that that method
will give better-than-random splits.  Instead, treat such a split as
degenerate and just let the next column determine the split, the same way
that we handle fully degenerate cases where the two sides produce identical
union datums.
REL8_4_STABLE
Tom Lane 13 years ago
parent 754083fc5a
commit 06af3f9656
  1. 157
      src/backend/access/gist/gistsplit.c

@ -163,23 +163,16 @@ findDontCares(Relation r, GISTSTATE *giststate, GISTENTRY *valvec,
* Remove tuples that are marked don't-cares from the tuple index array a[] * Remove tuples that are marked don't-cares from the tuple index array a[]
* of length *len. This is applied separately to the spl_left and spl_right * of length *len. This is applied separately to the spl_left and spl_right
* arrays. * arrays.
*
* Corner case: we do not wish to reduce the index array to zero length.
* (If we did, then the union key for this side would be null, and having just
* one of spl_ldatum_exists and spl_rdatum_exists be TRUE might confuse
* user-defined PickSplit methods.) To avoid that, we'll forcibly redefine
* one tuple as non-don't-care if necessary. Hence, we must be able to adjust
* caller's NumDontCare count.
*/ */
static void static void
removeDontCares(OffsetNumber *a, int *len, bool *dontcare, int *NumDontCare) removeDontCares(OffsetNumber *a, int *len, const bool *dontcare)
{ {
int origlen, int origlen,
curlen, newlen,
i; i;
OffsetNumber *curwpos; OffsetNumber *curwpos;
origlen = curlen = *len; origlen = newlen = *len;
curwpos = a; curwpos = a;
for (i = 0; i < origlen; i++) for (i = 0; i < origlen; i++)
{ {
@ -191,18 +184,11 @@ removeDontCares(OffsetNumber *a, int *len, bool *dontcare, int *NumDontCare)
*curwpos = ai; *curwpos = ai;
curwpos++; curwpos++;
} }
else if (curlen == 1)
{
/* corner case: don't let array become empty */
dontcare[ai] = FALSE; /* mark item as non-dont-care */
*NumDontCare -= 1;
i--; /* reprocess item on next iteration */
}
else else
curlen--; newlen--;
} }
*len = curlen; *len = newlen;
} }
/* /*
@ -305,8 +291,14 @@ supportSecondarySplit(Relation r, GISTSTATE *giststate, int attno,
penalty2; penalty2;
/* /*
* there is only one previously defined union, so we just choose swap * There is only one previously defined union, so we just choose swap
* or not by lowest penalty * or not by lowest penalty for that side. We can only get here if a
* secondary split happened to have all NULLs in its column in the
* tuples that the outer recursion level had assigned to one side.
* (Note that the null checks in gistSplitByKey don't prevent the
* case, because they'll only be checking tuples that were considered
* don't-cares at the outer recursion level, not the tuples that went
* into determining the passed-down left and right union keys.)
*/ */
penalty1 = gistpenalty(giststate, attno, entry1, false, &entrySL, false); penalty1 = gistpenalty(giststate, attno, entry1, false, &entrySL, false);
penalty2 = gistpenalty(giststate, attno, entry1, false, &entrySR, false); penalty2 = gistpenalty(giststate, attno, entry1, false, &entrySR, false);
@ -404,13 +396,19 @@ genericPickSplit(GISTSTATE *giststate, GistEntryVector *entryvec, GIST_SPLITVEC
* two vectors. * two vectors.
* *
* Returns FALSE if split is complete (there are no more index columns, or * Returns FALSE if split is complete (there are no more index columns, or
* there is no need to consider them). Note that in this case the union * there is no need to consider them because split is optimal already).
* keys for all columns must be computed here. *
* Returns TRUE and v->spl_dontcare = NULL if left and right unions of attno * Returns TRUE and v->spl_dontcare = NULL if the picksplit result is
* column are the same, so we should split on next column instead. * degenerate (all tuples seem to be don't-cares), so we should just
* disregard this column and split on the next column(s) instead.
*
* Returns TRUE and v->spl_dontcare != NULL if there are don't-care tuples * Returns TRUE and v->spl_dontcare != NULL if there are don't-care tuples
* that could be relocated based on the next column(s). The don't-care * that could be relocated based on the next column(s). The don't-care
* tuples have been removed from the split and must be reinserted by caller. * tuples have been removed from the split and must be reinserted by caller.
* There is at least one non-don't-care tuple on each side of the split,
* and union keys for all columns are updated to include just those tuples.
*
* A TRUE result implies there is at least one more index column.
*/ */
static bool static bool
gistUserPicksplit(Relation r, GistEntryVector *entryvec, int attno, GistSplitVector *v, gistUserPicksplit(Relation r, GistEntryVector *entryvec, int attno, GistSplitVector *v,
@ -481,8 +479,7 @@ gistUserPicksplit(Relation r, GistEntryVector *entryvec, int attno, GistSplitVec
/* /*
* If index columns remain, then consider whether we can improve the split * If index columns remain, then consider whether we can improve the split
* by using them. Even if we can't, we must compute union keys for those * by using them.
* columns before we can return FALSE.
*/ */
v->spl_dontcare = NULL; v->spl_dontcare = NULL;
@ -490,40 +487,49 @@ gistUserPicksplit(Relation r, GistEntryVector *entryvec, int attno, GistSplitVec
{ {
int NumDontCare; int NumDontCare;
/*
* Make a quick check to see if left and right union keys are equal;
* if so, the split is certainly degenerate, so tell caller to
* re-split with the next column.
*/
if (gistKeyIsEQ(giststate, attno, sv->spl_ldatum, sv->spl_rdatum)) if (gistKeyIsEQ(giststate, attno, sv->spl_ldatum, sv->spl_rdatum))
{
/*
* Left and right union keys are equal, so we can get better split
* by considering next column.
*/
return true; return true;
}
/* /*
* Locate don't-care tuples, if any * Locate don't-care tuples, if any. If there are none, the split is
* optimal, so just fall out and return false.
*/ */
v->spl_dontcare = (bool *) palloc0(sizeof(bool) * (entryvec->n + 1)); v->spl_dontcare = (bool *) palloc0(sizeof(bool) * (entryvec->n + 1));
NumDontCare = findDontCares(r, giststate, entryvec->vector, v, attno); NumDontCare = findDontCares(r, giststate, entryvec->vector, v, attno);
if (NumDontCare == 0) if (NumDontCare > 0)
{ {
/* /*
* There are no don't-cares, so just compute the union keys for * Remove don't-cares from spl_left[] and spl_right[].
* remaining columns and we're done.
*/ */
gistunionsubkey(giststate, itup, v); removeDontCares(sv->spl_left, &sv->spl_nleft, v->spl_dontcare);
} removeDontCares(sv->spl_right, &sv->spl_nright, v->spl_dontcare);
else
{
/* /*
* Remove don't-cares from spl_left[] and spl_right[]. NOTE: this * If all tuples on either side were don't-cares, the split is
* could reduce NumDontCare to zero. * degenerate, and we're best off to ignore it and split on the
* next column. (We used to try to press on with a secondary
* split by forcing a random tuple on each side to be treated as
* non-don't-care, but it seems unlikely that that technique
* really gives a better result. Note that we don't want to try a
* secondary split with empty left or right primary split sides,
* because then there is no union key on that side for the
* PickSplit function to try to expand, so it can have no good
* figure of merit for what it's doing. Also note that this check
* ensures we can't produce a bogus one-side-only split in the
* NumDontCare == 1 special case below.)
*/ */
removeDontCares(sv->spl_left, &sv->spl_nleft, if (sv->spl_nleft == 0 || sv->spl_nright == 0)
v->spl_dontcare, &NumDontCare); {
removeDontCares(sv->spl_right, &sv->spl_nright, v->spl_dontcare = NULL;
v->spl_dontcare, &NumDontCare); return true;
}
/* /*
* Recompute union keys, considering only non-don't-care tuples. * Recompute union keys, considering only non-don't-care tuples.
@ -539,7 +545,9 @@ gistUserPicksplit(Relation r, GistEntryVector *entryvec, int attno, GistSplitVec
/* /*
* If there's only one don't-care tuple then we can't do a * If there's only one don't-care tuple then we can't do a
* PickSplit on it, so just choose whether to send it left or * PickSplit on it, so just choose whether to send it left or
* right by comparing penalties. * right by comparing penalties. We needed the
* gistunionsubkey step anyway so that we have appropriate
* union keys for figuring the penalties.
*/ */
OffsetNumber toMove; OffsetNumber toMove;
@ -554,13 +562,14 @@ gistUserPicksplit(Relation r, GistEntryVector *entryvec, int attno, GistSplitVec
/* ... and assign it to cheaper side */ /* ... and assign it to cheaper side */
placeOne(r, giststate, v, itup[toMove - 1], toMove, attno + 1); placeOne(r, giststate, v, itup[toMove - 1], toMove, attno + 1);
/* recompute the union keys including this tuple */ /*
v->spl_dontcare = NULL; * At this point the union keys are wrong, but we don't care
gistunionsubkey(giststate, itup, v); * because we're done splitting. The outermost recursion
* level of gistSplitByKey will fix things before returning.
*/
} }
else if (NumDontCare > 1) else
return true; return true;
/* else NumDontCare is now zero; handle same as above */
} }
} }
@ -706,7 +715,7 @@ gistSplitByKey(Relation r, Page page, IndexTuple *itup, int len,
*/ */
v->spl_risnull[attno] = v->spl_lisnull[attno] = TRUE; v->spl_risnull[attno] = v->spl_lisnull[attno] = TRUE;
if (attno + 1 < r->rd_att->natts) if (attno + 1 < giststate->tupdesc->natts)
gistSplitByKey(r, page, itup, len, giststate, v, attno + 1); gistSplitByKey(r, page, itup, len, giststate, v, attno + 1);
else else
gistSplitHalf(&v->splitVector, len); gistSplitHalf(&v->splitVector, len);
@ -731,14 +740,17 @@ gistSplitByKey(Relation r, Page page, IndexTuple *itup, int len,
else else
v->splitVector.spl_left[v->splitVector.spl_nleft++] = i; v->splitVector.spl_left[v->splitVector.spl_nleft++] = i;
/* Must compute union keys for this and any following columns */ /* Compute union keys, unless outer recursion level will handle it */
v->spl_dontcare = NULL; if (attno == 0 && giststate->tupdesc->natts == 1)
gistunionsubkey(giststate, itup, v); {
v->spl_dontcare = NULL;
gistunionsubkey(giststate, itup, v);
}
} }
else else
{ {
/* /*
* all keys are not-null, so apply user-defined PickSplit method * All keys are not-null, so apply user-defined PickSplit method
*/ */
if (gistUserPicksplit(r, entryvec, attno, v, itup, len, giststate)) if (gistUserPicksplit(r, entryvec, attno, v, itup, len, giststate))
{ {
@ -746,13 +758,13 @@ gistSplitByKey(Relation r, Page page, IndexTuple *itup, int len,
* Splitting on attno column is not optimal, so consider * Splitting on attno column is not optimal, so consider
* redistributing don't-care tuples according to the next column * redistributing don't-care tuples according to the next column
*/ */
Assert(attno + 1 < r->rd_att->natts); Assert(attno + 1 < giststate->tupdesc->natts);
if (v->spl_dontcare == NULL) if (v->spl_dontcare == NULL)
{ {
/* /*
* Simple case: left and right keys for attno column are * This split was actually degenerate, so ignore it altogether
* equal, so just split according to the next column. * and just split according to the next column.
*/ */
gistSplitByKey(r, page, itup, len, giststate, v, attno + 1); gistSplitByKey(r, page, itup, len, giststate, v, attno + 1);
} }
@ -799,10 +811,27 @@ gistSplitByKey(Relation r, Page page, IndexTuple *itup, int len,
backupSplit.spl_right[backupSplit.spl_nright++] = map[v->splitVector.spl_right[i] - 1]; backupSplit.spl_right[backupSplit.spl_nright++] = map[v->splitVector.spl_right[i] - 1];
v->splitVector = backupSplit; v->splitVector = backupSplit;
/* recompute left and right union datums */
gistunionsubkey(giststate, itup, v);
} }
} }
} }
/*
* If we're handling a multicolumn index, at the end of the recursion
* recompute the left and right union datums for all index columns. This
* makes sure we hand back correct union datums in all corner cases,
* including when we haven't processed all columns to start with, or when
* a secondary split moved "don't care" tuples from one side to the other
* (we really shouldn't assume that that didn't change the union datums).
*
* Note: when we're in an internal recursion (attno > 0), we do not worry
* about whether the union datums we return with are sensible, since
* calling levels won't care. Also, in a single-column index, we expect
* that PickSplit (or the special cases above) produced correct union
* datums.
*/
if (attno == 0 && giststate->tupdesc->natts > 1)
{
v->spl_dontcare = NULL;
gistunionsubkey(giststate, itup, v);
}
} }

Loading…
Cancel
Save