Speed up in-memory tuplesorting.

Per recent work by Peter Geoghegan, it's significantly faster to
tuplesort on a single sortkey if ApplySortComparator is inlined into
quicksort rather reached via a function pointer.  It's also faster
in general to have a version of quicksort which is specialized for
sorting SortTuple objects rather than objects of arbitrary size and
type.  This requires a couple of additional copies of the quicksort
logic, which in this patch are generate using a Perl script.  There
might be some benefit in adding further specializations here too,
but thus far it's not clear that those gains are worth their weight
in code footprint.
pull/1/head
Robert Haas 14 years ago
parent ac9100f8cf
commit 337b6f5ecf
  1. 4
      src/backend/Makefile
  2. 1
      src/backend/utils/sort/.gitignore
  3. 8
      src/backend/utils/sort/Makefile
  4. 232
      src/backend/utils/sort/gen_qsort_tuple.pl
  5. 68
      src/backend/utils/sort/tuplesort.c
  6. 2
      src/port/qsort.c
  7. 2
      src/port/qsort_arg.c
  8. 8
      src/tools/msvc/Solution.pm

@ -202,6 +202,7 @@ distprep:
$(MAKE) -C replication repl_gram.c repl_scanner.c
$(MAKE) -C utils fmgrtab.c fmgroids.h errcodes.h
$(MAKE) -C utils/misc guc-file.c
$(MAKE) -C utils/sort qsort_tuple.c
##########################################################################
@ -315,7 +316,8 @@ maintainer-clean: distclean
utils/fmgroids.h \
utils/fmgrtab.c \
utils/errcodes.h \
utils/misc/guc-file.c
utils/misc/guc-file.c \
utils/misc/qsort_tuple.c
##########################################################################

@ -0,0 +1 @@
/qsort_tuple.c

@ -14,4 +14,12 @@ include $(top_builddir)/src/Makefile.global
OBJS = logtape.o sortsupport.o tuplesort.o tuplestore.o
tuplesort.o: qsort_tuple.c
qsort_tuple.c: gen_qsort_tuple.pl
$(PERL) $(srcdir)/gen_qsort_tuple.pl $< > $@
include $(top_srcdir)/src/backend/common.mk
maintainer-clean:
rm -f qsort_tuple.c

@ -0,0 +1,232 @@
#!/usr/bin/perl -w
#
# gen_qsort_tuple.pl
#
# This script generates specialized versions of the quicksort algorithm for
# tuple sorting. The quicksort code is derived from the NetBSD code. The
# code generated by this script runs significantly faster than vanilla qsort
# when used to sort tuples. This speedup comes from a number of places.
# The major effects are (1) inlining simple tuple comparators is much faster
# than jumping through a function pointer and (2) swap and vecswap operations
# specialized to the particular data type of interest (in this case, SortTuple)
# are faster than the generic routines.
#
# Modifications from vanilla NetBSD source:
# Add do ... while() macro fix
# Remove __inline, _DIAGASSERTs, __P
# Remove ill-considered "swap_cnt" switch to insertion sort,
# in favor of a simple check for presorted input.
# Instead of sorting arbitrary objects, we're always sorting SortTuples
# Add CHECK_FOR_INTERRUPTS()
#
# CAUTION: if you change this file, see also qsort.c and qsort_arg.c
#
use strict;
my $SUFFIX;
my $EXTRAARGS;
my $EXTRAPARAMS;
my $CMPPARAMS;
emit_qsort_boilerplate();
$SUFFIX = 'tuple';
$EXTRAARGS = ', SortTupleComparator cmp_tuple, Tuplesortstate *state';
$EXTRAPARAMS = ', cmp_tuple, state';
$CMPPARAMS = ', state';
emit_qsort_implementation();
$SUFFIX = 'ssup';
$EXTRAARGS = ', SortSupport ssup';
$EXTRAPARAMS = ', ssup';
$CMPPARAMS = ', ssup';
print <<'EOM';
#define cmp_ssup(a, b, ssup) \
ApplySortComparator((a)->datum1, (a)->isnull1, \
(b)->datum1, (b)->isnull1, ssup)
EOM
emit_qsort_implementation();
sub emit_qsort_boilerplate
{
print <<'EOM';
/*
* autogenerated by src/backend/utils/sort/gen_qsort_tuple.pl, do not edit
* This file is included by tuplesort.c, rather than compiled separately.
*/
/* $NetBSD: qsort.c,v 1.13 2003/08/07 16:43:42 agc Exp $ */
/*-
* Copyright (c) 1992, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* Qsort routine based on J. L. Bentley and M. D. McIlroy,
* "Engineering a sort function",
* Software--Practice and Experience 23 (1993) 1249-1265.
* We have modified their original by adding a check for already-sorted input,
* which seems to be a win per discussions on pgsql-hackers around 2006-03-21.
*/
static void
swapfunc(SortTuple *a, SortTuple *b, size_t n)
{
do
{
SortTuple t = *a;
*a++ = *b;
*b++ = t;
} while (--n > 0);
}
#define swap(a, b) \
do { \
SortTuple t = *(a); \
*(a) = *(b); \
*(b) = t; \
} while (0);
#define vecswap(a, b, n) if ((n) > 0) swapfunc((a), (b), (size_t)(n))
EOM
}
sub emit_qsort_implementation
{
print <<EOM;
static SortTuple *
med3_$SUFFIX(SortTuple *a, SortTuple *b, SortTuple *c$EXTRAARGS)
{
return cmp_$SUFFIX(a, b$CMPPARAMS) < 0 ?
(cmp_$SUFFIX(b, c$CMPPARAMS) < 0 ? b :
(cmp_$SUFFIX(a, c$CMPPARAMS) < 0 ? c : a))
: (cmp_$SUFFIX(b, c$CMPPARAMS) > 0 ? b :
(cmp_$SUFFIX(a, c$CMPPARAMS) < 0 ? a : c));
}
static void
qsort_$SUFFIX(SortTuple *a, size_t n$EXTRAARGS)
{
SortTuple *pa,
*pb,
*pc,
*pd,
*pl,
*pm,
*pn;
int d,
r,
presorted;
loop:
CHECK_FOR_INTERRUPTS();
if (n < 7)
{
for (pm = a + 1; pm < a + n; pm++)
for (pl = pm; pl > a && cmp_$SUFFIX(pl - 1, pl$CMPPARAMS) > 0; pl--)
swap(pl, pl - 1);
return;
}
presorted = 1;
for (pm = a + 1; pm < a + n; pm++)
{
CHECK_FOR_INTERRUPTS();
if (cmp_$SUFFIX(pm - 1, pm$CMPPARAMS) > 0)
{
presorted = 0;
break;
}
}
if (presorted)
return;
pm = a + (n / 2);
if (n > 7)
{
pl = a;
pn = a + (n - 1);
if (n > 40)
{
d = (n / 8);
pl = med3_$SUFFIX(pl, pl + d, pl + 2 * d$EXTRAPARAMS);
pm = med3_$SUFFIX(pm - d, pm, pm + d$EXTRAPARAMS);
pn = med3_$SUFFIX(pn - 2 * d, pn - d, pn$EXTRAPARAMS);
}
pm = med3_$SUFFIX(pl, pm, pn$EXTRAPARAMS);
}
swap(a, pm);
pa = pb = a + 1;
pc = pd = a + (n - 1);
for (;;)
{
while (pb <= pc && (r = cmp_$SUFFIX(pb, a$CMPPARAMS)) <= 0)
{
CHECK_FOR_INTERRUPTS();
if (r == 0)
{
swap(pa, pb);
pa++;
}
pb++;
}
while (pb <= pc && (r = cmp_$SUFFIX(pc, a$CMPPARAMS)) >= 0)
{
CHECK_FOR_INTERRUPTS();
if (r == 0)
{
swap(pc, pd);
pd--;
}
pc--;
}
if (pb > pc)
break;
swap(pb, pc);
pb++;
pc--;
}
pn = a + n;
r = Min(pa - a, pb - pa);
vecswap(a, pb - r, r);
r = Min(pd - pc, pn - pd - 1);
vecswap(pb, pn - r, r);
if ((r = pb - pa) > 1)
qsort_$SUFFIX(a, r$EXTRAPARAMS);
if ((r = pd - pc) > 1)
{
/* Iterate rather than recurse to save stack space */
a = pn - r;
n = r;
goto loop;
}
/* qsort_$SUFFIX(pn - r, r$EXTRAPARAMS);*/
}
EOM
}

@ -195,6 +195,9 @@ typedef enum
#define TAPE_BUFFER_OVERHEAD (BLCKSZ * 3)
#define MERGE_BUFFER_SIZE (BLCKSZ * 32)
typedef int (*SortTupleComparator) (const SortTuple *a, const SortTuple *b,
Tuplesortstate *state);
/*
* Private state of a Tuplesort operation.
*/
@ -223,8 +226,7 @@ struct Tuplesortstate
* <0, 0, >0 according as a<b, a=b, a>b. The API must match
* qsort_arg_comparator.
*/
int (*comparetup) (const SortTuple *a, const SortTuple *b,
Tuplesortstate *state);
SortTupleComparator comparetup;
/*
* Function to copy a supplied input tuple into palloc'd space and set up
@ -363,12 +365,14 @@ struct Tuplesortstate
/* These are specific to the index_hash subcase: */
uint32 hash_mask; /* mask for sortable part of hash code */
/* This is initialized when, and only when, there's just one key. */
SortSupport onlyKey;
/*
* These variables are specific to the Datum case; they are set by
* tuplesort_begin_datum and used only by the DatumTuple routines.
*/
Oid datumType;
SortSupport datumKey;
/* we need typelen and byval in order to know how to copy the Datums. */
int datumTypeLen;
bool datumTypeByVal;
@ -492,6 +496,11 @@ static void readtup_datum(Tuplesortstate *state, SortTuple *stup,
static void reversedirection_datum(Tuplesortstate *state);
static void free_sort_tuple(Tuplesortstate *state, SortTuple *stup);
/*
* Special version of qsort, just for SortTuple objects.
*/
#include "qsort_tuple.c"
/*
* tuplesort_begin_xxx
@ -631,6 +640,9 @@ tuplesort_begin_heap(TupleDesc tupDesc,
PrepareSortSupportFromOrderingOp(sortOperators[i], sortKey);
}
if (nkeys == 1)
state->onlyKey = state->sortKeys;
MemoryContextSwitchTo(oldcontext);
return state;
@ -809,13 +821,13 @@ tuplesort_begin_datum(Oid datumType, Oid sortOperator, Oid sortCollation,
state->datumType = datumType;
/* Prepare SortSupport data */
state->datumKey = (SortSupport) palloc0(sizeof(SortSupportData));
state->onlyKey = (SortSupport) palloc0(sizeof(SortSupportData));
state->datumKey->ssup_cxt = CurrentMemoryContext;
state->datumKey->ssup_collation = sortCollation;
state->datumKey->ssup_nulls_first = nullsFirstFlag;
state->onlyKey->ssup_cxt = CurrentMemoryContext;
state->onlyKey->ssup_collation = sortCollation;
state->onlyKey->ssup_nulls_first = nullsFirstFlag;
PrepareSortSupportFromOrderingOp(sortOperator, state->datumKey);
PrepareSortSupportFromOrderingOp(sortOperator, state->onlyKey);
/* lookup necessary attributes of the datum type */
get_typlenbyval(datumType, &typlen, &typbyval);
@ -1222,11 +1234,16 @@ tuplesort_performsort(Tuplesortstate *state)
* amount of memory. Just qsort 'em and we're done.
*/
if (state->memtupcount > 1)
qsort_arg((void *) state->memtuples,
state->memtupcount,
sizeof(SortTuple),
(qsort_arg_comparator) state->comparetup,
(void *) state);
{
if (state->onlyKey != NULL)
qsort_ssup(state->memtuples, state->memtupcount,
state->onlyKey);
else
qsort_tuple(state->memtuples,
state->memtupcount,
state->comparetup,
state);
}
state->current = 0;
state->eof_reached = false;
state->markpos_offset = 0;
@ -2660,9 +2677,6 @@ comparetup_heap(const SortTuple *a, const SortTuple *b, Tuplesortstate *state)
int nkey;
int32 compare;
/* Allow interrupting long sorts */
CHECK_FOR_INTERRUPTS();
/* Compare the leading sort key */
compare = ApplySortComparator(a->datum1, a->isnull1,
b->datum1, b->isnull1,
@ -2804,9 +2818,6 @@ comparetup_cluster(const SortTuple *a, const SortTuple *b,
int nkey;
int32 compare;
/* Allow interrupting long sorts */
CHECK_FOR_INTERRUPTS();
/* Compare the leading sort key, if it's simple */
if (state->indexInfo->ii_KeyAttrNumbers[0] != 0)
{
@ -2995,9 +3006,6 @@ comparetup_index_btree(const SortTuple *a, const SortTuple *b,
int nkey;
int32 compare;
/* Allow interrupting long sorts */
CHECK_FOR_INTERRUPTS();
/* Compare the leading sort key */
compare = inlineApplySortFunction(&scanKey->sk_func, scanKey->sk_flags,
scanKey->sk_collation,
@ -3102,9 +3110,6 @@ comparetup_index_hash(const SortTuple *a, const SortTuple *b,
IndexTuple tuple1;
IndexTuple tuple2;
/* Allow interrupting long sorts */
CHECK_FOR_INTERRUPTS();
/*
* Fetch hash keys and mask off bits we don't want to sort by. We know
* that the first column of the index tuple is the hash key.
@ -3231,12 +3236,9 @@ reversedirection_index_hash(Tuplesortstate *state)
static int
comparetup_datum(const SortTuple *a, const SortTuple *b, Tuplesortstate *state)
{
/* Allow interrupting long sorts */
CHECK_FOR_INTERRUPTS();
return ApplySortComparator(a->datum1, a->isnull1,
b->datum1, b->isnull1,
state->datumKey);
/* Not currently needed */
elog(ERROR, "comparetup_datum() should not be called");
return 0;
}
static void
@ -3328,8 +3330,8 @@ readtup_datum(Tuplesortstate *state, SortTuple *stup,
static void
reversedirection_datum(Tuplesortstate *state)
{
state->datumKey->ssup_reverse = !state->datumKey->ssup_reverse;
state->datumKey->ssup_nulls_first = !state->datumKey->ssup_nulls_first;
state->onlyKey->ssup_reverse = !state->onlyKey->ssup_reverse;
state->onlyKey->ssup_nulls_first = !state->onlyKey->ssup_nulls_first;
}
/*

@ -7,7 +7,7 @@
* Remove ill-considered "swap_cnt" switch to insertion sort,
* in favor of a simple check for presorted input.
*
* CAUTION: if you change this file, see also qsort_arg.c
* CAUTION: if you change this file, see also qsort_arg.c, gen_qsort_tuple.pl
*
* src/port/qsort.c
*/

@ -7,7 +7,7 @@
* Remove ill-considered "swap_cnt" switch to insertion sort,
* in favor of a simple check for presorted input.
*
* CAUTION: if you change this file, see also qsort.c
* CAUTION: if you change this file, see also qsort.c, gen_qsort_tuple.pl
*
* src/port/qsort_arg.c
*/

@ -287,6 +287,14 @@ s{PG_VERSION_STR "[^"]+"}{__STRINGIFY(x) #x\n#define __STRINGIFY2(z) __STRINGIFY
);
}
if (IsNewer('src\backend\utils\sort\qsort_tuple.c','src\backend\utils\sort\gen_qsort_tuple.pl'))
{
print "Generating qsort_tuple.c...\n";
system(
'perl src\backend\utils\sort\gen_qsort_tuple.pl > src\backend\utils\sort\qsort_tuple.c'
);
}
if (IsNewer('src\interfaces\libpq\libpq.rc','src\interfaces\libpq\libpq.rc.in'))
{
print "Generating libpq.rc...\n";

Loading…
Cancel
Save