|
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
|
*
|
|
|
|
|
* gininsert.c
|
|
|
|
|
* insert routines for the postgres inverted index access method.
|
|
|
|
|
*
|
|
|
|
|
*
|
|
|
|
|
* Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
|
|
|
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
|
|
|
|
*
|
|
|
|
|
* IDENTIFICATION
|
|
|
|
|
* src/backend/access/gin/gininsert.c
|
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
#include "postgres.h"
|
|
|
|
|
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
15 years ago
|
|
|
#include "access/gin_private.h"
|
Allow parallel CREATE INDEX for GIN indexes
Allow using parallel workers to build a GIN index, similarly to BTREE
and BRIN. For large tables this may result in significant speedup when
the build is CPU-bound.
The work is divided so that each worker builds index entries on a subset
of the table, determined by the regular parallel scan used to read the
data. Each worker uses a local tuplesort to sort and merge the entries
for the same key. The TID lists do not overlap (for a given key), which
means the merge sort simply concatenates the two lists. The merged
entries are written into a shared tuplesort for the leader.
The leader needs to merge the sorted entries again, before writing them
into the index. But this way a significant part of the work happens in
the workers, and the leader is left with merging fewer large entries,
which is more efficient.
Most of the parallelism infrastructure is a simplified copy of the code
used by BTREE indexes, omitting the parts irrelevant for GIN indexes
(e.g. uniqueness checks).
Original patch by me, with reviews and substantial improvements by
Matthias van de Meent, certainly enough to make him a co-author.
Author: Tomas Vondra, Matthias van de Meent
Reviewed-by: Matthias van de Meent, Andy Fan, Kirill Reshke
Discussion: https://postgr.es/m/6ab4003f-a8b8-4d75-a67f-f25ad98582dc%40enterprisedb.com
10 months ago
|
|
|
#include "access/gin_tuple.h"
|
|
|
|
|
#include "access/parallel.h"
|
|
|
|
|
#include "access/table.h"
|
|
|
|
|
#include "access/tableam.h"
|
|
|
|
|
#include "access/xloginsert.h"
|
Allow parallel CREATE INDEX for GIN indexes
Allow using parallel workers to build a GIN index, similarly to BTREE
and BRIN. For large tables this may result in significant speedup when
the build is CPU-bound.
The work is divided so that each worker builds index entries on a subset
of the table, determined by the regular parallel scan used to read the
data. Each worker uses a local tuplesort to sort and merge the entries
for the same key. The TID lists do not overlap (for a given key), which
means the merge sort simply concatenates the two lists. The merged
entries are written into a shared tuplesort for the leader.
The leader needs to merge the sorted entries again, before writing them
into the index. But this way a significant part of the work happens in
the workers, and the leader is left with merging fewer large entries,
which is more efficient.
Most of the parallelism infrastructure is a simplified copy of the code
used by BTREE indexes, omitting the parts irrelevant for GIN indexes
(e.g. uniqueness checks).
Original patch by me, with reviews and substantial improvements by
Matthias van de Meent, certainly enough to make him a co-author.
Author: Tomas Vondra, Matthias van de Meent
Reviewed-by: Matthias van de Meent, Andy Fan, Kirill Reshke
Discussion: https://postgr.es/m/6ab4003f-a8b8-4d75-a67f-f25ad98582dc%40enterprisedb.com
10 months ago
|
|
|
#include "catalog/index.h"
|
|
|
|
|
#include "catalog/pg_collation.h"
|
|
|
|
|
#include "commands/progress.h"
|
|
|
|
|
#include "miscadmin.h"
|
|
|
|
|
#include "nodes/execnodes.h"
|
Allow parallel CREATE INDEX for GIN indexes
Allow using parallel workers to build a GIN index, similarly to BTREE
and BRIN. For large tables this may result in significant speedup when
the build is CPU-bound.
The work is divided so that each worker builds index entries on a subset
of the table, determined by the regular parallel scan used to read the
data. Each worker uses a local tuplesort to sort and merge the entries
for the same key. The TID lists do not overlap (for a given key), which
means the merge sort simply concatenates the two lists. The merged
entries are written into a shared tuplesort for the leader.
The leader needs to merge the sorted entries again, before writing them
into the index. But this way a significant part of the work happens in
the workers, and the leader is left with merging fewer large entries,
which is more efficient.
Most of the parallelism infrastructure is a simplified copy of the code
used by BTREE indexes, omitting the parts irrelevant for GIN indexes
(e.g. uniqueness checks).
Original patch by me, with reviews and substantial improvements by
Matthias van de Meent, certainly enough to make him a co-author.
Author: Tomas Vondra, Matthias van de Meent
Reviewed-by: Matthias van de Meent, Andy Fan, Kirill Reshke
Discussion: https://postgr.es/m/6ab4003f-a8b8-4d75-a67f-f25ad98582dc%40enterprisedb.com
10 months ago
|
|
|
#include "pgstat.h"
|
|
|
|
|
#include "storage/bufmgr.h"
|
|
|
|
|
#include "storage/predicate.h"
|
Allow parallel CREATE INDEX for GIN indexes
Allow using parallel workers to build a GIN index, similarly to BTREE
and BRIN. For large tables this may result in significant speedup when
the build is CPU-bound.
The work is divided so that each worker builds index entries on a subset
of the table, determined by the regular parallel scan used to read the
data. Each worker uses a local tuplesort to sort and merge the entries
for the same key. The TID lists do not overlap (for a given key), which
means the merge sort simply concatenates the two lists. The merged
entries are written into a shared tuplesort for the leader.
The leader needs to merge the sorted entries again, before writing them
into the index. But this way a significant part of the work happens in
the workers, and the leader is left with merging fewer large entries,
which is more efficient.
Most of the parallelism infrastructure is a simplified copy of the code
used by BTREE indexes, omitting the parts irrelevant for GIN indexes
(e.g. uniqueness checks).
Original patch by me, with reviews and substantial improvements by
Matthias van de Meent, certainly enough to make him a co-author.
Author: Tomas Vondra, Matthias van de Meent
Reviewed-by: Matthias van de Meent, Andy Fan, Kirill Reshke
Discussion: https://postgr.es/m/6ab4003f-a8b8-4d75-a67f-f25ad98582dc%40enterprisedb.com
10 months ago
|
|
|
#include "tcop/tcopprot.h" /* pgrminclude ignore */
|
|
|
|
|
#include "utils/datum.h"
|
|
|
|
|
#include "utils/memutils.h"
|
|
|
|
|
#include "utils/rel.h"
|
Allow parallel CREATE INDEX for GIN indexes
Allow using parallel workers to build a GIN index, similarly to BTREE
and BRIN. For large tables this may result in significant speedup when
the build is CPU-bound.
The work is divided so that each worker builds index entries on a subset
of the table, determined by the regular parallel scan used to read the
data. Each worker uses a local tuplesort to sort and merge the entries
for the same key. The TID lists do not overlap (for a given key), which
means the merge sort simply concatenates the two lists. The merged
entries are written into a shared tuplesort for the leader.
The leader needs to merge the sorted entries again, before writing them
into the index. But this way a significant part of the work happens in
the workers, and the leader is left with merging fewer large entries,
which is more efficient.
Most of the parallelism infrastructure is a simplified copy of the code
used by BTREE indexes, omitting the parts irrelevant for GIN indexes
(e.g. uniqueness checks).
Original patch by me, with reviews and substantial improvements by
Matthias van de Meent, certainly enough to make him a co-author.
Author: Tomas Vondra, Matthias van de Meent
Reviewed-by: Matthias van de Meent, Andy Fan, Kirill Reshke
Discussion: https://postgr.es/m/6ab4003f-a8b8-4d75-a67f-f25ad98582dc%40enterprisedb.com
10 months ago
|
|
|
#include "utils/builtins.h"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* Magic numbers for parallel state sharing */
|
|
|
|
|
#define PARALLEL_KEY_GIN_SHARED UINT64CONST(0xB000000000000001)
|
|
|
|
|
#define PARALLEL_KEY_TUPLESORT UINT64CONST(0xB000000000000002)
|
|
|
|
|
#define PARALLEL_KEY_QUERY_TEXT UINT64CONST(0xB000000000000003)
|
|
|
|
|
#define PARALLEL_KEY_WAL_USAGE UINT64CONST(0xB000000000000004)
|
|
|
|
|
#define PARALLEL_KEY_BUFFER_USAGE UINT64CONST(0xB000000000000005)
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Status for index builds performed in parallel. This is allocated in a
|
|
|
|
|
* dynamic shared memory segment.
|
|
|
|
|
*/
|
|
|
|
|
typedef struct GinBuildShared
|
|
|
|
|
{
|
|
|
|
|
/*
|
|
|
|
|
* These fields are not modified during the build. They primarily exist
|
|
|
|
|
* for the benefit of worker processes that need to create state
|
|
|
|
|
* corresponding to that used by the leader.
|
|
|
|
|
*/
|
|
|
|
|
Oid heaprelid;
|
|
|
|
|
Oid indexrelid;
|
|
|
|
|
bool isconcurrent;
|
|
|
|
|
int scantuplesortstates;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* workersdonecv is used to monitor the progress of workers. All parallel
|
|
|
|
|
* participants must indicate that they are done before leader can use
|
|
|
|
|
* results built by the workers (and before leader can write the data into
|
|
|
|
|
* the index).
|
|
|
|
|
*/
|
|
|
|
|
ConditionVariable workersdonecv;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* mutex protects all following fields
|
|
|
|
|
*
|
|
|
|
|
* These fields contain status information of interest to GIN index builds
|
|
|
|
|
* that must work just the same when an index is built in parallel.
|
|
|
|
|
*/
|
|
|
|
|
slock_t mutex;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Mutable state that is maintained by workers, and reported back to
|
|
|
|
|
* leader at end of the scans.
|
|
|
|
|
*
|
|
|
|
|
* nparticipantsdone is number of worker processes finished.
|
|
|
|
|
*
|
|
|
|
|
* reltuples is the total number of input heap tuples.
|
|
|
|
|
*
|
|
|
|
|
* indtuples is the total number of tuples that made it into the index.
|
|
|
|
|
*/
|
|
|
|
|
int nparticipantsdone;
|
|
|
|
|
double reltuples;
|
|
|
|
|
double indtuples;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* ParallelTableScanDescData data follows. Can't directly embed here, as
|
|
|
|
|
* implementations of the parallel table scan desc interface might need
|
|
|
|
|
* stronger alignment.
|
|
|
|
|
*/
|
|
|
|
|
} GinBuildShared;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Return pointer to a GinBuildShared's parallel table scan.
|
|
|
|
|
*
|
|
|
|
|
* c.f. shm_toc_allocate as to why BUFFERALIGN is used, rather than just
|
|
|
|
|
* MAXALIGN.
|
|
|
|
|
*/
|
|
|
|
|
#define ParallelTableScanFromGinBuildShared(shared) \
|
|
|
|
|
(ParallelTableScanDesc) ((char *) (shared) + BUFFERALIGN(sizeof(GinBuildShared)))
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Status for leader in parallel index build.
|
|
|
|
|
*/
|
|
|
|
|
typedef struct GinLeader
|
|
|
|
|
{
|
|
|
|
|
/* parallel context itself */
|
|
|
|
|
ParallelContext *pcxt;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* nparticipanttuplesorts is the exact number of worker processes
|
|
|
|
|
* successfully launched, plus one leader process if it participates as a
|
|
|
|
|
* worker (only DISABLE_LEADER_PARTICIPATION builds avoid leader
|
|
|
|
|
* participating as a worker).
|
|
|
|
|
*/
|
|
|
|
|
int nparticipanttuplesorts;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Leader process convenience pointers to shared state (leader avoids TOC
|
|
|
|
|
* lookups).
|
|
|
|
|
*
|
|
|
|
|
* GinBuildShared is the shared state for entire build. sharedsort is the
|
|
|
|
|
* shared, tuplesort-managed state passed to each process tuplesort.
|
|
|
|
|
* snapshot is the snapshot used by the scan iff an MVCC snapshot is
|
|
|
|
|
* required.
|
|
|
|
|
*/
|
|
|
|
|
GinBuildShared *ginshared;
|
|
|
|
|
Sharedsort *sharedsort;
|
|
|
|
|
Snapshot snapshot;
|
|
|
|
|
WalUsage *walusage;
|
|
|
|
|
BufferUsage *bufferusage;
|
|
|
|
|
} GinLeader;
|
|
|
|
|
|
|
|
|
|
typedef struct
|
|
|
|
|
{
|
|
|
|
|
GinState ginstate;
|
|
|
|
|
double indtuples;
|
|
|
|
|
GinStatsData buildStats;
|
|
|
|
|
MemoryContext tmpCtx;
|
|
|
|
|
MemoryContext funcCtx;
|
|
|
|
|
BuildAccumulator accum;
|
Allow parallel CREATE INDEX for GIN indexes
Allow using parallel workers to build a GIN index, similarly to BTREE
and BRIN. For large tables this may result in significant speedup when
the build is CPU-bound.
The work is divided so that each worker builds index entries on a subset
of the table, determined by the regular parallel scan used to read the
data. Each worker uses a local tuplesort to sort and merge the entries
for the same key. The TID lists do not overlap (for a given key), which
means the merge sort simply concatenates the two lists. The merged
entries are written into a shared tuplesort for the leader.
The leader needs to merge the sorted entries again, before writing them
into the index. But this way a significant part of the work happens in
the workers, and the leader is left with merging fewer large entries,
which is more efficient.
Most of the parallelism infrastructure is a simplified copy of the code
used by BTREE indexes, omitting the parts irrelevant for GIN indexes
(e.g. uniqueness checks).
Original patch by me, with reviews and substantial improvements by
Matthias van de Meent, certainly enough to make him a co-author.
Author: Tomas Vondra, Matthias van de Meent
Reviewed-by: Matthias van de Meent, Andy Fan, Kirill Reshke
Discussion: https://postgr.es/m/6ab4003f-a8b8-4d75-a67f-f25ad98582dc%40enterprisedb.com
10 months ago
|
|
|
ItemPointerData tid;
|
|
|
|
|
int work_mem;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* bs_leader is only present when a parallel index build is performed, and
|
|
|
|
|
* only in the leader process.
|
|
|
|
|
*/
|
|
|
|
|
GinLeader *bs_leader;
|
|
|
|
|
int bs_worker_id;
|
|
|
|
|
|
|
|
|
|
/* used to pass information from workers to leader */
|
|
|
|
|
double bs_numtuples;
|
|
|
|
|
double bs_reltuples;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* The sortstate is used by workers (including the leader). It has to be
|
|
|
|
|
* part of the build state, because that's the only thing passed to the
|
|
|
|
|
* build callback etc.
|
|
|
|
|
*/
|
|
|
|
|
Tuplesortstate *bs_sortstate;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* The sortstate used only within a single worker for the first merge pass
|
|
|
|
|
* happenning there. In principle it doesn't need to be part of the build
|
|
|
|
|
* state and we could pass it around directly, but it's more convenient
|
|
|
|
|
* this way. And it's part of the build state, after all.
|
|
|
|
|
*/
|
|
|
|
|
Tuplesortstate *bs_worker_sort;
|
|
|
|
|
} GinBuildState;
|
|
|
|
|
|
|
|
|
|
|
Allow parallel CREATE INDEX for GIN indexes
Allow using parallel workers to build a GIN index, similarly to BTREE
and BRIN. For large tables this may result in significant speedup when
the build is CPU-bound.
The work is divided so that each worker builds index entries on a subset
of the table, determined by the regular parallel scan used to read the
data. Each worker uses a local tuplesort to sort and merge the entries
for the same key. The TID lists do not overlap (for a given key), which
means the merge sort simply concatenates the two lists. The merged
entries are written into a shared tuplesort for the leader.
The leader needs to merge the sorted entries again, before writing them
into the index. But this way a significant part of the work happens in
the workers, and the leader is left with merging fewer large entries,
which is more efficient.
Most of the parallelism infrastructure is a simplified copy of the code
used by BTREE indexes, omitting the parts irrelevant for GIN indexes
(e.g. uniqueness checks).
Original patch by me, with reviews and substantial improvements by
Matthias van de Meent, certainly enough to make him a co-author.
Author: Tomas Vondra, Matthias van de Meent
Reviewed-by: Matthias van de Meent, Andy Fan, Kirill Reshke
Discussion: https://postgr.es/m/6ab4003f-a8b8-4d75-a67f-f25ad98582dc%40enterprisedb.com
10 months ago
|
|
|
/* parallel index builds */
|
|
|
|
|
static void _gin_begin_parallel(GinBuildState *buildstate, Relation heap, Relation index,
|
|
|
|
|
bool isconcurrent, int request);
|
|
|
|
|
static void _gin_end_parallel(GinLeader *ginleader, GinBuildState *state);
|
|
|
|
|
static Size _gin_parallel_estimate_shared(Relation heap, Snapshot snapshot);
|
|
|
|
|
static double _gin_parallel_heapscan(GinBuildState *buildstate);
|
|
|
|
|
static double _gin_parallel_merge(GinBuildState *buildstate);
|
|
|
|
|
static void _gin_leader_participate_as_worker(GinBuildState *buildstate,
|
|
|
|
|
Relation heap, Relation index);
|
|
|
|
|
static void _gin_parallel_scan_and_build(GinBuildState *buildstate,
|
|
|
|
|
GinBuildShared *ginshared,
|
|
|
|
|
Sharedsort *sharedsort,
|
|
|
|
|
Relation heap, Relation index,
|
|
|
|
|
int sortmem, bool progress);
|
|
|
|
|
|
|
|
|
|
static Datum _gin_parse_tuple(GinTuple *a, ItemPointerData **items);
|
|
|
|
|
static GinTuple *_gin_build_tuple(OffsetNumber attrnum, unsigned char category,
|
|
|
|
|
Datum key, int16 typlen, bool typbyval,
|
|
|
|
|
ItemPointerData *items, uint32 nitems,
|
|
|
|
|
Size *len);
|
|
|
|
|
|
|
|
|
|
/*
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
15 years ago
|
|
|
* Adds array of item pointers to tuple's posting list, or
|
|
|
|
|
* creates posting tree and tuple pointing to tree in case
|
|
|
|
|
* of not enough space. Max size of tuple is defined in
|
|
|
|
|
* GinFormTuple(). Returns a new, modified index tuple.
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
15 years ago
|
|
|
* items[] must be in sorted order with no duplicates.
|
|
|
|
|
*/
|
|
|
|
|
static IndexTuple
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
15 years ago
|
|
|
addItemPointersToLeafTuple(GinState *ginstate,
|
|
|
|
|
IndexTuple old,
|
|
|
|
|
ItemPointerData *items, uint32 nitem,
|
|
|
|
|
GinStatsData *buildStats, Buffer buffer)
|
|
|
|
|
{
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
15 years ago
|
|
|
OffsetNumber attnum;
|
|
|
|
|
Datum key;
|
|
|
|
|
GinNullCategory category;
|
|
|
|
|
IndexTuple res;
|
|
|
|
|
ItemPointerData *newItems,
|
|
|
|
|
*oldItems;
|
|
|
|
|
int oldNPosting,
|
|
|
|
|
newNPosting;
|
|
|
|
|
GinPostingList *compressedList;
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
15 years ago
|
|
|
|
|
|
|
|
Assert(!GinIsPostingTree(old));
|
|
|
|
|
|
|
|
|
|
attnum = gintuple_get_attrnum(ginstate, old);
|
|
|
|
|
key = gintuple_get_key(ginstate, old, &category);
|
|
|
|
|
|
|
|
|
|
/* merge the old and new posting lists */
|
|
|
|
|
oldItems = ginReadTuple(ginstate, attnum, old, &oldNPosting);
|
|
|
|
|
|
|
|
|
|
newItems = ginMergeItemPointers(items, nitem,
|
|
|
|
|
oldItems, oldNPosting,
|
|
|
|
|
&newNPosting);
|
|
|
|
|
|
|
|
|
|
/* Compress the posting list, and try to a build tuple with room for it */
|
|
|
|
|
res = NULL;
|
|
|
|
|
compressedList = ginCompressPostingList(newItems, newNPosting, GinMaxItemSize,
|
|
|
|
|
NULL);
|
|
|
|
|
pfree(newItems);
|
|
|
|
|
if (compressedList)
|
|
|
|
|
{
|
|
|
|
|
res = GinFormTuple(ginstate, attnum, key, category,
|
|
|
|
|
(char *) compressedList,
|
|
|
|
|
SizeOfGinPostingList(compressedList),
|
|
|
|
|
newNPosting,
|
|
|
|
|
false);
|
|
|
|
|
pfree(compressedList);
|
|
|
|
|
}
|
|
|
|
|
if (!res)
|
|
|
|
|
{
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
15 years ago
|
|
|
/* posting list would be too big, convert to posting tree */
|
|
|
|
|
BlockNumber postingRoot;
|
|
|
|
|
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
15 years ago
|
|
|
/*
|
|
|
|
|
* Initialize posting tree with the old tuple's posting list. It's
|
|
|
|
|
* surely small enough to fit on one posting-tree page, and should
|
|
|
|
|
* already be in order with no duplicates.
|
|
|
|
|
*/
|
|
|
|
|
postingRoot = createPostingTree(ginstate->index,
|
|
|
|
|
oldItems,
|
|
|
|
|
oldNPosting,
|
|
|
|
|
buildStats,
|
|
|
|
|
buffer);
|
|
|
|
|
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
15 years ago
|
|
|
/* Now insert the TIDs-to-be-added into the posting tree */
|
|
|
|
|
ginInsertItemPointers(ginstate->index, postingRoot,
|
|
|
|
|
items, nitem,
|
|
|
|
|
buildStats);
|
|
|
|
|
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
15 years ago
|
|
|
/* And build a new posting-tree-only result tuple */
|
|
|
|
|
res = GinFormTuple(ginstate, attnum, key, category, NULL, 0, 0, true);
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
15 years ago
|
|
|
GinSetPostingTree(res, postingRoot);
|
|
|
|
|
}
|
|
|
|
|
pfree(oldItems);
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
15 years ago
|
|
|
|
|
|
|
|
return res;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Build a fresh leaf tuple, either posting-list or posting-tree format
|
|
|
|
|
* depending on whether the given items list will fit.
|
|
|
|
|
* items[] must be in sorted order with no duplicates.
|
|
|
|
|
*
|
|
|
|
|
* This is basically the same logic as in addItemPointersToLeafTuple,
|
|
|
|
|
* but working from slightly different input.
|
|
|
|
|
*/
|
|
|
|
|
static IndexTuple
|
|
|
|
|
buildFreshLeafTuple(GinState *ginstate,
|
|
|
|
|
OffsetNumber attnum, Datum key, GinNullCategory category,
|
|
|
|
|
ItemPointerData *items, uint32 nitem,
|
|
|
|
|
GinStatsData *buildStats, Buffer buffer)
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
15 years ago
|
|
|
{
|
|
|
|
|
IndexTuple res = NULL;
|
|
|
|
|
GinPostingList *compressedList;
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
15 years ago
|
|
|
|
|
|
|
|
/* try to build a posting list tuple with all the items */
|
|
|
|
|
compressedList = ginCompressPostingList(items, nitem, GinMaxItemSize, NULL);
|
|
|
|
|
if (compressedList)
|
|
|
|
|
{
|
|
|
|
|
res = GinFormTuple(ginstate, attnum, key, category,
|
|
|
|
|
(char *) compressedList,
|
|
|
|
|
SizeOfGinPostingList(compressedList),
|
|
|
|
|
nitem, false);
|
|
|
|
|
pfree(compressedList);
|
|
|
|
|
}
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
15 years ago
|
|
|
if (!res)
|
|
|
|
|
{
|
|
|
|
|
/* posting list would be too big, build posting tree */
|
|
|
|
|
BlockNumber postingRoot;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Build posting-tree-only result tuple. We do this first so as to
|
|
|
|
|
* fail quickly if the key is too big.
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
15 years ago
|
|
|
*/
|
|
|
|
|
res = GinFormTuple(ginstate, attnum, key, category, NULL, 0, 0, true);
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
15 years ago
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Initialize a new posting tree with the TIDs.
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
15 years ago
|
|
|
*/
|
|
|
|
|
postingRoot = createPostingTree(ginstate->index, items, nitem,
|
|
|
|
|
buildStats, buffer);
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
15 years ago
|
|
|
|
|
|
|
|
/* And save the root link in the result tuple */
|
|
|
|
|
GinSetPostingTree(res, postingRoot);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return res;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
15 years ago
|
|
|
* Insert one or more heap TIDs associated with the given key value.
|
|
|
|
|
* This will either add a single key entry, or enlarge a pre-existing entry.
|
|
|
|
|
*
|
|
|
|
|
* During an index build, buildStats is non-null and the counters
|
|
|
|
|
* it contains should be incremented as needed.
|
|
|
|
|
*/
|
|
|
|
|
void
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
15 years ago
|
|
|
ginEntryInsert(GinState *ginstate,
|
|
|
|
|
OffsetNumber attnum, Datum key, GinNullCategory category,
|
|
|
|
|
ItemPointerData *items, uint32 nitem,
|
|
|
|
|
GinStatsData *buildStats)
|
|
|
|
|
{
|
|
|
|
|
GinBtreeData btree;
|
|
|
|
|
GinBtreeEntryInsertData insertdata;
|
|
|
|
|
GinBtreeStack *stack;
|
|
|
|
|
IndexTuple itup;
|
|
|
|
|
Page page;
|
|
|
|
|
|
|
|
|
|
insertdata.isDelete = false;
|
|
|
|
|
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
15 years ago
|
|
|
ginPrepareEntryScan(&btree, attnum, key, category, ginstate);
|
Generate less WAL during GiST, GIN and SP-GiST index build.
Instead of WAL-logging every modification during the build separately,
first build the index without any WAL-logging, and make a separate pass
through the index at the end, to write all pages to the WAL. This
significantly reduces the amount of WAL generated, and is usually also
faster, despite the extra I/O needed for the extra scan through the index.
WAL generated this way is also faster to replay.
For GiST, the LSN-NSN interlock makes this a little tricky. All pages must
be marked with a valid (i.e. non-zero) LSN, so that the parent-child
LSN-NSN interlock works correctly. We now use magic value 1 for that during
index build. Change the fake LSN counter to begin from 1000, so that 1 is
safely smaller than any real or fake LSN. 2 would've been enough for our
purposes, but let's reserve a bigger range, in case we need more special
values in the future.
Author: Anastasia Lubennikova, Andrey V. Lepikhov
Reviewed-by: Heikki Linnakangas, Dmitry Dolgov
7 years ago
|
|
|
btree.isBuild = (buildStats != NULL);
|
|
|
|
|
|
|
|
|
|
stack = ginFindLeafPage(&btree, false, false);
|
|
|
|
|
page = BufferGetPage(stack->buffer);
|
|
|
|
|
|
|
|
|
|
if (btree.findItem(&btree, stack))
|
|
|
|
|
{
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
15 years ago
|
|
|
/* found pre-existing entry */
|
|
|
|
|
itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off));
|
|
|
|
|
|
|
|
|
|
if (GinIsPostingTree(itup))
|
|
|
|
|
{
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
15 years ago
|
|
|
/* add entries to existing posting tree */
|
|
|
|
|
BlockNumber rootPostingTree = GinGetPostingTree(itup);
|
|
|
|
|
|
|
|
|
|
/* release all stack */
|
|
|
|
|
LockBuffer(stack->buffer, GIN_UNLOCK);
|
|
|
|
|
freeGinBtreeStack(stack);
|
|
|
|
|
|
|
|
|
|
/* insert into posting tree */
|
|
|
|
|
ginInsertItemPointers(ginstate->index, rootPostingTree,
|
|
|
|
|
items, nitem,
|
|
|
|
|
buildStats);
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
CheckForSerializableConflictIn(ginstate->index, NULL,
|
|
|
|
|
BufferGetBlockNumber(stack->buffer));
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
15 years ago
|
|
|
/* modify an existing leaf entry */
|
|
|
|
|
itup = addItemPointersToLeafTuple(ginstate, itup,
|
|
|
|
|
items, nitem, buildStats, stack->buffer);
|
|
|
|
|
|
|
|
|
|
insertdata.isDelete = true;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
CheckForSerializableConflictIn(ginstate->index, NULL,
|
|
|
|
|
BufferGetBlockNumber(stack->buffer));
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
15 years ago
|
|
|
/* no match, so construct a new leaf entry */
|
|
|
|
|
itup = buildFreshLeafTuple(ginstate, attnum, key, category,
|
|
|
|
|
items, nitem, buildStats, stack->buffer);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* nEntries counts leaf tuples, so increment it only when we make a
|
|
|
|
|
* new one.
|
|
|
|
|
*/
|
|
|
|
|
if (buildStats)
|
|
|
|
|
buildStats->nEntries++;
|
|
|
|
|
}
|
|
|
|
|
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
15 years ago
|
|
|
/* Insert the new or modified leaf tuple */
|
|
|
|
|
insertdata.entry = itup;
|
|
|
|
|
ginInsertValue(&btree, stack, &insertdata, buildStats);
|
|
|
|
|
pfree(itup);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
15 years ago
|
|
|
* Extract index entries for a single indexable item, and add them to the
|
|
|
|
|
* BuildAccumulator's state.
|
|
|
|
|
*
|
|
|
|
|
* This function is used only during initial index creation.
|
|
|
|
|
*/
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
15 years ago
|
|
|
static void
|
|
|
|
|
ginHeapTupleBulkInsert(GinBuildState *buildstate, OffsetNumber attnum,
|
|
|
|
|
Datum value, bool isNull,
|
|
|
|
|
ItemPointer heapptr)
|
|
|
|
|
{
|
|
|
|
|
Datum *entries;
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
15 years ago
|
|
|
GinNullCategory *categories;
|
|
|
|
|
int32 nentries;
|
|
|
|
|
MemoryContext oldCtx;
|
|
|
|
|
|
|
|
|
|
oldCtx = MemoryContextSwitchTo(buildstate->funcCtx);
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
15 years ago
|
|
|
entries = ginExtractEntries(buildstate->accum.ginstate, attnum,
|
|
|
|
|
value, isNull,
|
|
|
|
|
&nentries, &categories);
|
|
|
|
|
MemoryContextSwitchTo(oldCtx);
|
|
|
|
|
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
15 years ago
|
|
|
ginInsertBAEntries(&buildstate->accum, heapptr, attnum,
|
|
|
|
|
entries, categories, nentries);
|
|
|
|
|
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
15 years ago
|
|
|
buildstate->indtuples += nentries;
|
|
|
|
|
|
|
|
|
|
MemoryContextReset(buildstate->funcCtx);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
ginBuildCallback(Relation index, ItemPointer tid, Datum *values,
|
|
|
|
|
bool *isnull, bool tupleIsAlive, void *state)
|
|
|
|
|
{
|
|
|
|
|
GinBuildState *buildstate = (GinBuildState *) state;
|
|
|
|
|
MemoryContext oldCtx;
|
|
|
|
|
int i;
|
|
|
|
|
|
|
|
|
|
oldCtx = MemoryContextSwitchTo(buildstate->tmpCtx);
|
|
|
|
|
|
|
|
|
|
for (i = 0; i < buildstate->ginstate.origTupdesc->natts; i++)
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
15 years ago
|
|
|
ginHeapTupleBulkInsert(buildstate, (OffsetNumber) (i + 1),
|
|
|
|
|
values[i], isnull[i], tid);
|
|
|
|
|
|
|
|
|
|
/* If we've maxed out our available memory, dump everything to the index */
|
|
|
|
|
if (buildstate->accum.allocatedMemory >= maintenance_work_mem * (Size) 1024)
|
|
|
|
|
{
|
|
|
|
|
ItemPointerData *list;
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
15 years ago
|
|
|
Datum key;
|
|
|
|
|
GinNullCategory category;
|
|
|
|
|
uint32 nlist;
|
|
|
|
|
OffsetNumber attnum;
|
|
|
|
|
|
|
|
|
|
ginBeginBAScan(&buildstate->accum);
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
15 years ago
|
|
|
while ((list = ginGetBAEntry(&buildstate->accum,
|
Phase 3 of pgindent updates.
Don't move parenthesized lines to the left, even if that means they
flow past the right margin.
By default, BSD indent lines up statement continuation lines that are
within parentheses so that they start just to the right of the preceding
left parenthesis. However, traditionally, if that resulted in the
continuation line extending to the right of the desired right margin,
then indent would push it left just far enough to not overrun the margin,
if it could do so without making the continuation line start to the left of
the current statement indent. That makes for a weird mix of indentations
unless one has been completely rigid about never violating the 80-column
limit.
This behavior has been pretty universally panned by Postgres developers.
Hence, disable it with indent's new -lpl switch, so that parenthesized
lines are always lined up with the preceding left paren.
This patch is much less interesting than the first round of indent
changes, but also bulkier, so I thought it best to separate the effects.
Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org
Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
9 years ago
|
|
|
&attnum, &key, &category, &nlist)) != NULL)
|
|
|
|
|
{
|
|
|
|
|
/* there could be many entries, so be willing to abort here */
|
|
|
|
|
CHECK_FOR_INTERRUPTS();
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
15 years ago
|
|
|
ginEntryInsert(&buildstate->ginstate, attnum, key, category,
|
|
|
|
|
list, nlist, &buildstate->buildStats);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
MemoryContextReset(buildstate->tmpCtx);
|
|
|
|
|
ginInitBA(&buildstate->accum);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
MemoryContextSwitchTo(oldCtx);
|
|
|
|
|
}
|
|
|
|
|
|
Allow parallel CREATE INDEX for GIN indexes
Allow using parallel workers to build a GIN index, similarly to BTREE
and BRIN. For large tables this may result in significant speedup when
the build is CPU-bound.
The work is divided so that each worker builds index entries on a subset
of the table, determined by the regular parallel scan used to read the
data. Each worker uses a local tuplesort to sort and merge the entries
for the same key. The TID lists do not overlap (for a given key), which
means the merge sort simply concatenates the two lists. The merged
entries are written into a shared tuplesort for the leader.
The leader needs to merge the sorted entries again, before writing them
into the index. But this way a significant part of the work happens in
the workers, and the leader is left with merging fewer large entries,
which is more efficient.
Most of the parallelism infrastructure is a simplified copy of the code
used by BTREE indexes, omitting the parts irrelevant for GIN indexes
(e.g. uniqueness checks).
Original patch by me, with reviews and substantial improvements by
Matthias van de Meent, certainly enough to make him a co-author.
Author: Tomas Vondra, Matthias van de Meent
Reviewed-by: Matthias van de Meent, Andy Fan, Kirill Reshke
Discussion: https://postgr.es/m/6ab4003f-a8b8-4d75-a67f-f25ad98582dc%40enterprisedb.com
10 months ago
|
|
|
/*
|
|
|
|
|
* ginFlushBuildState
|
|
|
|
|
* Write all data from BuildAccumulator into the tuplesort.
|
|
|
|
|
*/
|
|
|
|
|
static void
|
|
|
|
|
ginFlushBuildState(GinBuildState *buildstate, Relation index)
|
|
|
|
|
{
|
|
|
|
|
ItemPointerData *list;
|
|
|
|
|
Datum key;
|
|
|
|
|
GinNullCategory category;
|
|
|
|
|
uint32 nlist;
|
|
|
|
|
OffsetNumber attnum;
|
|
|
|
|
TupleDesc tdesc = RelationGetDescr(index);
|
|
|
|
|
|
|
|
|
|
ginBeginBAScan(&buildstate->accum);
|
|
|
|
|
while ((list = ginGetBAEntry(&buildstate->accum,
|
|
|
|
|
&attnum, &key, &category, &nlist)) != NULL)
|
|
|
|
|
{
|
|
|
|
|
/* information about the key */
|
|
|
|
|
Form_pg_attribute attr = TupleDescAttr(tdesc, (attnum - 1));
|
|
|
|
|
|
|
|
|
|
/* GIN tuple and tuple length */
|
|
|
|
|
GinTuple *tup;
|
|
|
|
|
Size tuplen;
|
|
|
|
|
|
|
|
|
|
/* there could be many entries, so be willing to abort here */
|
|
|
|
|
CHECK_FOR_INTERRUPTS();
|
|
|
|
|
|
|
|
|
|
tup = _gin_build_tuple(attnum, category,
|
|
|
|
|
key, attr->attlen, attr->attbyval,
|
|
|
|
|
list, nlist, &tuplen);
|
|
|
|
|
|
|
|
|
|
tuplesort_putgintuple(buildstate->bs_worker_sort, tup, tuplen);
|
|
|
|
|
|
|
|
|
|
pfree(tup);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
MemoryContextReset(buildstate->tmpCtx);
|
|
|
|
|
ginInitBA(&buildstate->accum);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* ginBuildCallbackParallel
|
|
|
|
|
* Callback for the parallel index build.
|
|
|
|
|
*
|
|
|
|
|
* This is similar to the serial build callback ginBuildCallback, but
|
|
|
|
|
* instead of writing the accumulated entries into the index, each worker
|
|
|
|
|
* writes them into a (local) tuplesort.
|
|
|
|
|
*
|
|
|
|
|
* The worker then sorts and combines these entries, before writing them
|
|
|
|
|
* into a shared tuplesort for the leader (see _gin_parallel_scan_and_build
|
|
|
|
|
* for the whole process).
|
|
|
|
|
*/
|
|
|
|
|
static void
|
|
|
|
|
ginBuildCallbackParallel(Relation index, ItemPointer tid, Datum *values,
|
|
|
|
|
bool *isnull, bool tupleIsAlive, void *state)
|
|
|
|
|
{
|
|
|
|
|
GinBuildState *buildstate = (GinBuildState *) state;
|
|
|
|
|
MemoryContext oldCtx;
|
|
|
|
|
int i;
|
|
|
|
|
|
|
|
|
|
oldCtx = MemoryContextSwitchTo(buildstate->tmpCtx);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* if scan wrapped around - flush accumulated entries and start anew
|
|
|
|
|
*
|
|
|
|
|
* With parallel scans, we don't have a guarantee the scan does not start
|
|
|
|
|
* half-way through the relation (serial builds disable sync scans and
|
|
|
|
|
* always start from block 0, parallel scans require allow_sync=true).
|
|
|
|
|
*
|
|
|
|
|
* Building the posting lists assumes the TIDs are monotonic and never go
|
|
|
|
|
* back, and the wrap around would break that. We handle that by detecting
|
|
|
|
|
* the wraparound, and flushing all entries. This means we'll later see
|
|
|
|
|
* two separate entries with non-overlapping TID lists (which can be
|
|
|
|
|
* combined by merge sort).
|
|
|
|
|
*
|
|
|
|
|
* To detect a wraparound, we remember the last TID seen by each worker
|
|
|
|
|
* (for any key). If the next TID seen by the worker is lower, the scan
|
|
|
|
|
* must have wrapped around.
|
|
|
|
|
*/
|
|
|
|
|
if (ItemPointerCompare(tid, &buildstate->tid) < 0)
|
|
|
|
|
ginFlushBuildState(buildstate, index);
|
|
|
|
|
|
|
|
|
|
/* remember the TID we're about to process */
|
|
|
|
|
buildstate->tid = *tid;
|
|
|
|
|
|
|
|
|
|
for (i = 0; i < buildstate->ginstate.origTupdesc->natts; i++)
|
|
|
|
|
ginHeapTupleBulkInsert(buildstate, (OffsetNumber) (i + 1),
|
|
|
|
|
values[i], isnull[i], tid);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* If we've maxed out our available memory, dump everything to the
|
|
|
|
|
* tuplesort. We use half the per-worker fraction of maintenance_work_mem,
|
|
|
|
|
* the other half is used for the tuplesort.
|
|
|
|
|
*/
|
|
|
|
|
if (buildstate->accum.allocatedMemory >= buildstate->work_mem * (Size) 1024)
|
|
|
|
|
ginFlushBuildState(buildstate, index);
|
|
|
|
|
|
|
|
|
|
MemoryContextSwitchTo(oldCtx);
|
|
|
|
|
}
|
|
|
|
|
|
Restructure index access method API to hide most of it at the C level.
This patch reduces pg_am to just two columns, a name and a handler
function. All the data formerly obtained from pg_am is now provided
in a C struct returned by the handler function. This is similar to
the designs we've adopted for FDWs and tablesample methods. There
are multiple advantages. For one, the index AM's support functions
are now simple C functions, making them faster to call and much less
error-prone, since the C compiler can now check function signatures.
For another, this will make it far more practical to define index access
methods in installable extensions.
A disadvantage is that SQL-level code can no longer see attributes
of index AMs; in particular, some of the crosschecks in the opr_sanity
regression test are no longer possible from SQL. We've addressed that
by adding a facility for the index AM to perform such checks instead.
(Much more could be done in that line, but for now we're content if the
amvalidate functions more or less replace what opr_sanity used to do.)
We might also want to expose some sort of reporting functionality, but
this patch doesn't do that.
Alexander Korotkov, reviewed by Petr Jelínek, and rather heavily
editorialized on by me.
10 years ago
|
|
|
IndexBuildResult *
|
|
|
|
|
ginbuild(Relation heap, Relation index, IndexInfo *indexInfo)
|
|
|
|
|
{
|
|
|
|
|
IndexBuildResult *result;
|
|
|
|
|
double reltuples;
|
|
|
|
|
GinBuildState buildstate;
|
Allow parallel CREATE INDEX for GIN indexes
Allow using parallel workers to build a GIN index, similarly to BTREE
and BRIN. For large tables this may result in significant speedup when
the build is CPU-bound.
The work is divided so that each worker builds index entries on a subset
of the table, determined by the regular parallel scan used to read the
data. Each worker uses a local tuplesort to sort and merge the entries
for the same key. The TID lists do not overlap (for a given key), which
means the merge sort simply concatenates the two lists. The merged
entries are written into a shared tuplesort for the leader.
The leader needs to merge the sorted entries again, before writing them
into the index. But this way a significant part of the work happens in
the workers, and the leader is left with merging fewer large entries,
which is more efficient.
Most of the parallelism infrastructure is a simplified copy of the code
used by BTREE indexes, omitting the parts irrelevant for GIN indexes
(e.g. uniqueness checks).
Original patch by me, with reviews and substantial improvements by
Matthias van de Meent, certainly enough to make him a co-author.
Author: Tomas Vondra, Matthias van de Meent
Reviewed-by: Matthias van de Meent, Andy Fan, Kirill Reshke
Discussion: https://postgr.es/m/6ab4003f-a8b8-4d75-a67f-f25ad98582dc%40enterprisedb.com
10 months ago
|
|
|
GinBuildState *state = &buildstate;
|
|
|
|
|
Buffer RootBuffer,
|
|
|
|
|
MetaBuffer;
|
|
|
|
|
ItemPointerData *list;
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
15 years ago
|
|
|
Datum key;
|
|
|
|
|
GinNullCategory category;
|
|
|
|
|
uint32 nlist;
|
|
|
|
|
MemoryContext oldCtx;
|
|
|
|
|
OffsetNumber attnum;
|
|
|
|
|
|
|
|
|
|
if (RelationGetNumberOfBlocks(index) != 0)
|
|
|
|
|
elog(ERROR, "index \"%s\" already contains data",
|
|
|
|
|
RelationGetRelationName(index));
|
|
|
|
|
|
|
|
|
|
initGinState(&buildstate.ginstate, index);
|
|
|
|
|
buildstate.indtuples = 0;
|
|
|
|
|
memset(&buildstate.buildStats, 0, sizeof(GinStatsData));
|
|
|
|
|
|
Allow parallel CREATE INDEX for GIN indexes
Allow using parallel workers to build a GIN index, similarly to BTREE
and BRIN. For large tables this may result in significant speedup when
the build is CPU-bound.
The work is divided so that each worker builds index entries on a subset
of the table, determined by the regular parallel scan used to read the
data. Each worker uses a local tuplesort to sort and merge the entries
for the same key. The TID lists do not overlap (for a given key), which
means the merge sort simply concatenates the two lists. The merged
entries are written into a shared tuplesort for the leader.
The leader needs to merge the sorted entries again, before writing them
into the index. But this way a significant part of the work happens in
the workers, and the leader is left with merging fewer large entries,
which is more efficient.
Most of the parallelism infrastructure is a simplified copy of the code
used by BTREE indexes, omitting the parts irrelevant for GIN indexes
(e.g. uniqueness checks).
Original patch by me, with reviews and substantial improvements by
Matthias van de Meent, certainly enough to make him a co-author.
Author: Tomas Vondra, Matthias van de Meent
Reviewed-by: Matthias van de Meent, Andy Fan, Kirill Reshke
Discussion: https://postgr.es/m/6ab4003f-a8b8-4d75-a67f-f25ad98582dc%40enterprisedb.com
10 months ago
|
|
|
/* Initialize fields for parallel build too. */
|
|
|
|
|
buildstate.bs_numtuples = 0;
|
|
|
|
|
buildstate.bs_reltuples = 0;
|
|
|
|
|
buildstate.bs_leader = NULL;
|
|
|
|
|
memset(&buildstate.tid, 0, sizeof(ItemPointerData));
|
|
|
|
|
|
|
|
|
|
/* initialize the meta page */
|
|
|
|
|
MetaBuffer = GinNewBuffer(index);
|
|
|
|
|
|
|
|
|
|
/* initialize the root page */
|
|
|
|
|
RootBuffer = GinNewBuffer(index);
|
|
|
|
|
|
|
|
|
|
START_CRIT_SECTION();
|
|
|
|
|
GinInitMetabuffer(MetaBuffer);
|
|
|
|
|
MarkBufferDirty(MetaBuffer);
|
|
|
|
|
GinInitBuffer(RootBuffer, GIN_LEAF);
|
|
|
|
|
MarkBufferDirty(RootBuffer);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
UnlockReleaseBuffer(MetaBuffer);
|
|
|
|
|
UnlockReleaseBuffer(RootBuffer);
|
|
|
|
|
END_CRIT_SECTION();
|
|
|
|
|
|
|
|
|
|
/* count the root as first entry page */
|
|
|
|
|
buildstate.buildStats.nEntryPages++;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* create a temporary memory context that is used to hold data not yet
|
|
|
|
|
* dumped out to the index
|
|
|
|
|
*/
|
|
|
|
|
buildstate.tmpCtx = AllocSetContextCreate(CurrentMemoryContext,
|
|
|
|
|
"Gin build temporary context",
|
Add macros to make AllocSetContextCreate() calls simpler and safer.
I found that half a dozen (nearly 5%) of our AllocSetContextCreate calls
had typos in the context-sizing parameters. While none of these led to
especially significant problems, they did create minor inefficiencies,
and it's now clear that expecting people to copy-and-paste those calls
accurately is not a great idea. Let's reduce the risk of future errors
by introducing single macros that encapsulate the common use-cases.
Three such macros are enough to cover all but two special-purpose contexts;
those two calls can be left as-is, I think.
While this patch doesn't in itself improve matters for third-party
extensions, it doesn't break anything for them either, and they can
gradually adopt the simplified notation over time.
In passing, change TopMemoryContext to use the default allocation
parameters. Formerly it could only be extended 8K at a time. That was
probably reasonable when this code was written; but nowadays we create
many more contexts than we did then, so that it's not unusual to have a
couple hundred K in TopMemoryContext, even without considering various
dubious code that sticks other things there. There seems no good reason
not to let it use growing blocks like most other contexts.
Back-patch to 9.6, mostly because that's still close enough to HEAD that
it's easy to do so, and keeping the branches in sync can be expected to
avoid some future back-patching pain. The bugs fixed by these changes
don't seem to be significant enough to justify fixing them further back.
Discussion: <21072.1472321324@sss.pgh.pa.us>
9 years ago
|
|
|
ALLOCSET_DEFAULT_SIZES);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* create a temporary memory context that is used for calling
|
|
|
|
|
* ginExtractEntries(), and can be reset after each tuple
|
|
|
|
|
*/
|
|
|
|
|
buildstate.funcCtx = AllocSetContextCreate(CurrentMemoryContext,
|
Phase 3 of pgindent updates.
Don't move parenthesized lines to the left, even if that means they
flow past the right margin.
By default, BSD indent lines up statement continuation lines that are
within parentheses so that they start just to the right of the preceding
left parenthesis. However, traditionally, if that resulted in the
continuation line extending to the right of the desired right margin,
then indent would push it left just far enough to not overrun the margin,
if it could do so without making the continuation line start to the left of
the current statement indent. That makes for a weird mix of indentations
unless one has been completely rigid about never violating the 80-column
limit.
This behavior has been pretty universally panned by Postgres developers.
Hence, disable it with indent's new -lpl switch, so that parenthesized
lines are always lined up with the preceding left paren.
This patch is much less interesting than the first round of indent
changes, but also bulkier, so I thought it best to separate the effects.
Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org
Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
9 years ago
|
|
|
"Gin build temporary context for user-defined function",
|
Add macros to make AllocSetContextCreate() calls simpler and safer.
I found that half a dozen (nearly 5%) of our AllocSetContextCreate calls
had typos in the context-sizing parameters. While none of these led to
especially significant problems, they did create minor inefficiencies,
and it's now clear that expecting people to copy-and-paste those calls
accurately is not a great idea. Let's reduce the risk of future errors
by introducing single macros that encapsulate the common use-cases.
Three such macros are enough to cover all but two special-purpose contexts;
those two calls can be left as-is, I think.
While this patch doesn't in itself improve matters for third-party
extensions, it doesn't break anything for them either, and they can
gradually adopt the simplified notation over time.
In passing, change TopMemoryContext to use the default allocation
parameters. Formerly it could only be extended 8K at a time. That was
probably reasonable when this code was written; but nowadays we create
many more contexts than we did then, so that it's not unusual to have a
couple hundred K in TopMemoryContext, even without considering various
dubious code that sticks other things there. There seems no good reason
not to let it use growing blocks like most other contexts.
Back-patch to 9.6, mostly because that's still close enough to HEAD that
it's easy to do so, and keeping the branches in sync can be expected to
avoid some future back-patching pain. The bugs fixed by these changes
don't seem to be significant enough to justify fixing them further back.
Discussion: <21072.1472321324@sss.pgh.pa.us>
9 years ago
|
|
|
ALLOCSET_DEFAULT_SIZES);
|
|
|
|
|
|
|
|
|
|
buildstate.accum.ginstate = &buildstate.ginstate;
|
|
|
|
|
ginInitBA(&buildstate.accum);
|
|
|
|
|
|
Allow parallel CREATE INDEX for GIN indexes
Allow using parallel workers to build a GIN index, similarly to BTREE
and BRIN. For large tables this may result in significant speedup when
the build is CPU-bound.
The work is divided so that each worker builds index entries on a subset
of the table, determined by the regular parallel scan used to read the
data. Each worker uses a local tuplesort to sort and merge the entries
for the same key. The TID lists do not overlap (for a given key), which
means the merge sort simply concatenates the two lists. The merged
entries are written into a shared tuplesort for the leader.
The leader needs to merge the sorted entries again, before writing them
into the index. But this way a significant part of the work happens in
the workers, and the leader is left with merging fewer large entries,
which is more efficient.
Most of the parallelism infrastructure is a simplified copy of the code
used by BTREE indexes, omitting the parts irrelevant for GIN indexes
(e.g. uniqueness checks).
Original patch by me, with reviews and substantial improvements by
Matthias van de Meent, certainly enough to make him a co-author.
Author: Tomas Vondra, Matthias van de Meent
Reviewed-by: Matthias van de Meent, Andy Fan, Kirill Reshke
Discussion: https://postgr.es/m/6ab4003f-a8b8-4d75-a67f-f25ad98582dc%40enterprisedb.com
10 months ago
|
|
|
/* Report table scan phase started */
|
|
|
|
|
pgstat_progress_update_param(PROGRESS_CREATEIDX_SUBPHASE,
|
|
|
|
|
PROGRESS_GIN_PHASE_INDEXBUILD_TABLESCAN);
|
|
|
|
|
|
|
|
|
|
/*
|
Allow parallel CREATE INDEX for GIN indexes
Allow using parallel workers to build a GIN index, similarly to BTREE
and BRIN. For large tables this may result in significant speedup when
the build is CPU-bound.
The work is divided so that each worker builds index entries on a subset
of the table, determined by the regular parallel scan used to read the
data. Each worker uses a local tuplesort to sort and merge the entries
for the same key. The TID lists do not overlap (for a given key), which
means the merge sort simply concatenates the two lists. The merged
entries are written into a shared tuplesort for the leader.
The leader needs to merge the sorted entries again, before writing them
into the index. But this way a significant part of the work happens in
the workers, and the leader is left with merging fewer large entries,
which is more efficient.
Most of the parallelism infrastructure is a simplified copy of the code
used by BTREE indexes, omitting the parts irrelevant for GIN indexes
(e.g. uniqueness checks).
Original patch by me, with reviews and substantial improvements by
Matthias van de Meent, certainly enough to make him a co-author.
Author: Tomas Vondra, Matthias van de Meent
Reviewed-by: Matthias van de Meent, Andy Fan, Kirill Reshke
Discussion: https://postgr.es/m/6ab4003f-a8b8-4d75-a67f-f25ad98582dc%40enterprisedb.com
10 months ago
|
|
|
* Attempt to launch parallel worker scan when required
|
|
|
|
|
*
|
|
|
|
|
* XXX plan_create_index_workers makes the number of workers dependent on
|
|
|
|
|
* maintenance_work_mem, requiring 32MB for each worker. For GIN that's
|
|
|
|
|
* reasonable too, because we sort the data just like btree. It does
|
|
|
|
|
* ignore the memory used to accumulate data in memory (set by work_mem),
|
|
|
|
|
* but there is no way to communicate that to plan_create_index_workers.
|
|
|
|
|
*/
|
Allow parallel CREATE INDEX for GIN indexes
Allow using parallel workers to build a GIN index, similarly to BTREE
and BRIN. For large tables this may result in significant speedup when
the build is CPU-bound.
The work is divided so that each worker builds index entries on a subset
of the table, determined by the regular parallel scan used to read the
data. Each worker uses a local tuplesort to sort and merge the entries
for the same key. The TID lists do not overlap (for a given key), which
means the merge sort simply concatenates the two lists. The merged
entries are written into a shared tuplesort for the leader.
The leader needs to merge the sorted entries again, before writing them
into the index. But this way a significant part of the work happens in
the workers, and the leader is left with merging fewer large entries,
which is more efficient.
Most of the parallelism infrastructure is a simplified copy of the code
used by BTREE indexes, omitting the parts irrelevant for GIN indexes
(e.g. uniqueness checks).
Original patch by me, with reviews and substantial improvements by
Matthias van de Meent, certainly enough to make him a co-author.
Author: Tomas Vondra, Matthias van de Meent
Reviewed-by: Matthias van de Meent, Andy Fan, Kirill Reshke
Discussion: https://postgr.es/m/6ab4003f-a8b8-4d75-a67f-f25ad98582dc%40enterprisedb.com
10 months ago
|
|
|
if (indexInfo->ii_ParallelWorkers > 0)
|
|
|
|
|
_gin_begin_parallel(state, heap, index, indexInfo->ii_Concurrent,
|
|
|
|
|
indexInfo->ii_ParallelWorkers);
|
|
|
|
|
|
Allow parallel CREATE INDEX for GIN indexes
Allow using parallel workers to build a GIN index, similarly to BTREE
and BRIN. For large tables this may result in significant speedup when
the build is CPU-bound.
The work is divided so that each worker builds index entries on a subset
of the table, determined by the regular parallel scan used to read the
data. Each worker uses a local tuplesort to sort and merge the entries
for the same key. The TID lists do not overlap (for a given key), which
means the merge sort simply concatenates the two lists. The merged
entries are written into a shared tuplesort for the leader.
The leader needs to merge the sorted entries again, before writing them
into the index. But this way a significant part of the work happens in
the workers, and the leader is left with merging fewer large entries,
which is more efficient.
Most of the parallelism infrastructure is a simplified copy of the code
used by BTREE indexes, omitting the parts irrelevant for GIN indexes
(e.g. uniqueness checks).
Original patch by me, with reviews and substantial improvements by
Matthias van de Meent, certainly enough to make him a co-author.
Author: Tomas Vondra, Matthias van de Meent
Reviewed-by: Matthias van de Meent, Andy Fan, Kirill Reshke
Discussion: https://postgr.es/m/6ab4003f-a8b8-4d75-a67f-f25ad98582dc%40enterprisedb.com
10 months ago
|
|
|
/*
|
|
|
|
|
* If parallel build requested and at least one worker process was
|
|
|
|
|
* successfully launched, set up coordination state, wait for workers to
|
|
|
|
|
* complete. Then read all tuples from the shared tuplesort and insert
|
|
|
|
|
* them into the index.
|
|
|
|
|
*
|
|
|
|
|
* In serial mode, simply scan the table and build the index one index
|
|
|
|
|
* tuple at a time.
|
|
|
|
|
*/
|
|
|
|
|
if (state->bs_leader)
|
|
|
|
|
{
|
Allow parallel CREATE INDEX for GIN indexes
Allow using parallel workers to build a GIN index, similarly to BTREE
and BRIN. For large tables this may result in significant speedup when
the build is CPU-bound.
The work is divided so that each worker builds index entries on a subset
of the table, determined by the regular parallel scan used to read the
data. Each worker uses a local tuplesort to sort and merge the entries
for the same key. The TID lists do not overlap (for a given key), which
means the merge sort simply concatenates the two lists. The merged
entries are written into a shared tuplesort for the leader.
The leader needs to merge the sorted entries again, before writing them
into the index. But this way a significant part of the work happens in
the workers, and the leader is left with merging fewer large entries,
which is more efficient.
Most of the parallelism infrastructure is a simplified copy of the code
used by BTREE indexes, omitting the parts irrelevant for GIN indexes
(e.g. uniqueness checks).
Original patch by me, with reviews and substantial improvements by
Matthias van de Meent, certainly enough to make him a co-author.
Author: Tomas Vondra, Matthias van de Meent
Reviewed-by: Matthias van de Meent, Andy Fan, Kirill Reshke
Discussion: https://postgr.es/m/6ab4003f-a8b8-4d75-a67f-f25ad98582dc%40enterprisedb.com
10 months ago
|
|
|
SortCoordinate coordinate;
|
|
|
|
|
|
|
|
|
|
coordinate = (SortCoordinate) palloc0(sizeof(SortCoordinateData));
|
|
|
|
|
coordinate->isWorker = false;
|
|
|
|
|
coordinate->nParticipants =
|
|
|
|
|
state->bs_leader->nparticipanttuplesorts;
|
|
|
|
|
coordinate->sharedsort = state->bs_leader->sharedsort;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Begin leader tuplesort.
|
|
|
|
|
*
|
|
|
|
|
* In cases where parallelism is involved, the leader receives the
|
|
|
|
|
* same share of maintenance_work_mem as a serial sort (it is
|
|
|
|
|
* generally treated in the same way as a serial sort once we return).
|
|
|
|
|
* Parallel worker Tuplesortstates will have received only a fraction
|
|
|
|
|
* of maintenance_work_mem, though.
|
|
|
|
|
*
|
|
|
|
|
* We rely on the lifetime of the Leader Tuplesortstate almost not
|
|
|
|
|
* overlapping with any worker Tuplesortstate's lifetime. There may
|
|
|
|
|
* be some small overlap, but that's okay because we rely on leader
|
|
|
|
|
* Tuplesortstate only allocating a small, fixed amount of memory
|
|
|
|
|
* here. When its tuplesort_performsort() is called (by our caller),
|
|
|
|
|
* and significant amounts of memory are likely to be used, all
|
|
|
|
|
* workers must have already freed almost all memory held by their
|
|
|
|
|
* Tuplesortstates (they are about to go away completely, too). The
|
|
|
|
|
* overall effect is that maintenance_work_mem always represents an
|
|
|
|
|
* absolute high watermark on the amount of memory used by a CREATE
|
|
|
|
|
* INDEX operation, regardless of the use of parallelism or any other
|
|
|
|
|
* factor.
|
|
|
|
|
*/
|
|
|
|
|
state->bs_sortstate =
|
|
|
|
|
tuplesort_begin_index_gin(heap, index,
|
|
|
|
|
maintenance_work_mem, coordinate,
|
|
|
|
|
TUPLESORT_NONE);
|
|
|
|
|
|
|
|
|
|
/* scan the relation in parallel and merge per-worker results */
|
|
|
|
|
reltuples = _gin_parallel_merge(state);
|
|
|
|
|
|
|
|
|
|
_gin_end_parallel(state->bs_leader, state);
|
|
|
|
|
}
|
|
|
|
|
else /* no parallel index build */
|
|
|
|
|
{
|
|
|
|
|
/*
|
|
|
|
|
* Do the heap scan. We disallow sync scan here because
|
|
|
|
|
* dataPlaceToPage prefers to receive tuples in TID order.
|
|
|
|
|
*/
|
|
|
|
|
reltuples = table_index_build_scan(heap, index, indexInfo, false, true,
|
|
|
|
|
ginBuildCallback, &buildstate, NULL);
|
|
|
|
|
|
|
|
|
|
/* dump remaining entries to the index */
|
|
|
|
|
oldCtx = MemoryContextSwitchTo(buildstate.tmpCtx);
|
|
|
|
|
ginBeginBAScan(&buildstate.accum);
|
|
|
|
|
while ((list = ginGetBAEntry(&buildstate.accum,
|
|
|
|
|
&attnum, &key, &category, &nlist)) != NULL)
|
|
|
|
|
{
|
|
|
|
|
/* there could be many entries, so be willing to abort here */
|
|
|
|
|
CHECK_FOR_INTERRUPTS();
|
|
|
|
|
ginEntryInsert(&buildstate.ginstate, attnum, key, category,
|
|
|
|
|
list, nlist, &buildstate.buildStats);
|
|
|
|
|
}
|
|
|
|
|
MemoryContextSwitchTo(oldCtx);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
MemoryContextDelete(buildstate.funcCtx);
|
|
|
|
|
MemoryContextDelete(buildstate.tmpCtx);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Update metapage stats
|
|
|
|
|
*/
|
|
|
|
|
buildstate.buildStats.nTotalPages = RelationGetNumberOfBlocks(index);
|
Generate less WAL during GiST, GIN and SP-GiST index build.
Instead of WAL-logging every modification during the build separately,
first build the index without any WAL-logging, and make a separate pass
through the index at the end, to write all pages to the WAL. This
significantly reduces the amount of WAL generated, and is usually also
faster, despite the extra I/O needed for the extra scan through the index.
WAL generated this way is also faster to replay.
For GiST, the LSN-NSN interlock makes this a little tricky. All pages must
be marked with a valid (i.e. non-zero) LSN, so that the parent-child
LSN-NSN interlock works correctly. We now use magic value 1 for that during
index build. Change the fake LSN counter to begin from 1000, so that 1 is
safely smaller than any real or fake LSN. 2 would've been enough for our
purposes, but let's reserve a bigger range, in case we need more special
values in the future.
Author: Anastasia Lubennikova, Andrey V. Lepikhov
Reviewed-by: Heikki Linnakangas, Dmitry Dolgov
7 years ago
|
|
|
ginUpdateStats(index, &buildstate.buildStats, true);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* We didn't write WAL records as we built the index, so if WAL-logging is
|
|
|
|
|
* required, write all pages to the WAL now.
|
|
|
|
|
*/
|
|
|
|
|
if (RelationNeedsWAL(index))
|
|
|
|
|
{
|
|
|
|
|
log_newpage_range(index, MAIN_FORKNUM,
|
|
|
|
|
0, RelationGetNumberOfBlocks(index),
|
|
|
|
|
true);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Return statistics
|
|
|
|
|
*/
|
|
|
|
|
result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult));
|
|
|
|
|
|
|
|
|
|
result->heap_tuples = reltuples;
|
|
|
|
|
result->index_tuples = buildstate.indtuples;
|
|
|
|
|
|
Restructure index access method API to hide most of it at the C level.
This patch reduces pg_am to just two columns, a name and a handler
function. All the data formerly obtained from pg_am is now provided
in a C struct returned by the handler function. This is similar to
the designs we've adopted for FDWs and tablesample methods. There
are multiple advantages. For one, the index AM's support functions
are now simple C functions, making them faster to call and much less
error-prone, since the C compiler can now check function signatures.
For another, this will make it far more practical to define index access
methods in installable extensions.
A disadvantage is that SQL-level code can no longer see attributes
of index AMs; in particular, some of the crosschecks in the opr_sanity
regression test are no longer possible from SQL. We've addressed that
by adding a facility for the index AM to perform such checks instead.
(Much more could be done in that line, but for now we're content if the
amvalidate functions more or less replace what opr_sanity used to do.)
We might also want to expose some sort of reporting functionality, but
this patch doesn't do that.
Alexander Korotkov, reviewed by Petr Jelínek, and rather heavily
editorialized on by me.
10 years ago
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* ginbuildempty() -- build an empty gin index in the initialization fork
|
|
|
|
|
*/
|
Restructure index access method API to hide most of it at the C level.
This patch reduces pg_am to just two columns, a name and a handler
function. All the data formerly obtained from pg_am is now provided
in a C struct returned by the handler function. This is similar to
the designs we've adopted for FDWs and tablesample methods. There
are multiple advantages. For one, the index AM's support functions
are now simple C functions, making them faster to call and much less
error-prone, since the C compiler can now check function signatures.
For another, this will make it far more practical to define index access
methods in installable extensions.
A disadvantage is that SQL-level code can no longer see attributes
of index AMs; in particular, some of the crosschecks in the opr_sanity
regression test are no longer possible from SQL. We've addressed that
by adding a facility for the index AM to perform such checks instead.
(Much more could be done in that line, but for now we're content if the
amvalidate functions more or less replace what opr_sanity used to do.)
We might also want to expose some sort of reporting functionality, but
this patch doesn't do that.
Alexander Korotkov, reviewed by Petr Jelínek, and rather heavily
editorialized on by me.
10 years ago
|
|
|
void
|
|
|
|
|
ginbuildempty(Relation index)
|
|
|
|
|
{
|
|
|
|
|
Buffer RootBuffer,
|
|
|
|
|
MetaBuffer;
|
|
|
|
|
|
|
|
|
|
/* An empty GIN index has two pages. */
|
|
|
|
|
MetaBuffer = ExtendBufferedRel(BMR_REL(index), INIT_FORKNUM, NULL,
|
|
|
|
|
EB_LOCK_FIRST | EB_SKIP_EXTENSION_LOCK);
|
|
|
|
|
RootBuffer = ExtendBufferedRel(BMR_REL(index), INIT_FORKNUM, NULL,
|
|
|
|
|
EB_LOCK_FIRST | EB_SKIP_EXTENSION_LOCK);
|
|
|
|
|
|
|
|
|
|
/* Initialize and xlog metabuffer and root buffer. */
|
|
|
|
|
START_CRIT_SECTION();
|
|
|
|
|
GinInitMetabuffer(MetaBuffer);
|
|
|
|
|
MarkBufferDirty(MetaBuffer);
|
Set the metapage's pd_lower correctly in brin, gin, and spgist indexes.
Previously, these index types left the pd_lower field set to the default
SizeOfPageHeaderData, which is really a lie because it ought to point past
whatever space is being used for metadata. The coding accidentally failed
to fail because we never told xlog.c that the metapage is of standard
format --- but that's not very good, because it impedes WAL consistency
checking, and in some cases prevents compression of full-page images.
To fix, ensure that we set pd_lower correctly, not only when creating a
metapage but whenever we write it out (these apparently redundant steps are
needed to cope with pg_upgrade'd indexes that don't yet contain the right
value). This allows telling xlog.c that the page is of standard format.
The WAL consistency check mask functions are made to mask only if pd_lower
appears valid, which I think is likely unnecessary complication, since
any metapage appearing in a v11 WAL stream should contain valid pd_lower.
But it doesn't cost much to be paranoid.
Amit Langote, reviewed by Michael Paquier and Amit Kapila
Discussion: https://postgr.es/m/0d273805-0e9e-ec1a-cb84-d4da400b8f85@lab.ntt.co.jp
8 years ago
|
|
|
log_newpage_buffer(MetaBuffer, true);
|
|
|
|
|
GinInitBuffer(RootBuffer, GIN_LEAF);
|
|
|
|
|
MarkBufferDirty(RootBuffer);
|
|
|
|
|
log_newpage_buffer(RootBuffer, false);
|
|
|
|
|
END_CRIT_SECTION();
|
|
|
|
|
|
|
|
|
|
/* Unlock and release the buffers. */
|
|
|
|
|
UnlockReleaseBuffer(MetaBuffer);
|
|
|
|
|
UnlockReleaseBuffer(RootBuffer);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
15 years ago
|
|
|
* Insert index entries for a single indexable item during "normal"
|
|
|
|
|
* (non-fast-update) insertion
|
|
|
|
|
*/
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
15 years ago
|
|
|
static void
|
|
|
|
|
ginHeapTupleInsert(GinState *ginstate, OffsetNumber attnum,
|
|
|
|
|
Datum value, bool isNull,
|
|
|
|
|
ItemPointer item)
|
|
|
|
|
{
|
|
|
|
|
Datum *entries;
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
15 years ago
|
|
|
GinNullCategory *categories;
|
|
|
|
|
int32 i,
|
|
|
|
|
nentries;
|
|
|
|
|
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
15 years ago
|
|
|
entries = ginExtractEntries(ginstate, attnum, value, isNull,
|
|
|
|
|
&nentries, &categories);
|
|
|
|
|
|
|
|
|
|
for (i = 0; i < nentries; i++)
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
15 years ago
|
|
|
ginEntryInsert(ginstate, attnum, entries[i], categories[i],
|
|
|
|
|
item, 1, NULL);
|
|
|
|
|
}
|
|
|
|
|
|
Restructure index access method API to hide most of it at the C level.
This patch reduces pg_am to just two columns, a name and a handler
function. All the data formerly obtained from pg_am is now provided
in a C struct returned by the handler function. This is similar to
the designs we've adopted for FDWs and tablesample methods. There
are multiple advantages. For one, the index AM's support functions
are now simple C functions, making them faster to call and much less
error-prone, since the C compiler can now check function signatures.
For another, this will make it far more practical to define index access
methods in installable extensions.
A disadvantage is that SQL-level code can no longer see attributes
of index AMs; in particular, some of the crosschecks in the opr_sanity
regression test are no longer possible from SQL. We've addressed that
by adding a facility for the index AM to perform such checks instead.
(Much more could be done in that line, but for now we're content if the
amvalidate functions more or less replace what opr_sanity used to do.)
We might also want to expose some sort of reporting functionality, but
this patch doesn't do that.
Alexander Korotkov, reviewed by Petr Jelínek, and rather heavily
editorialized on by me.
10 years ago
|
|
|
bool
|
|
|
|
|
gininsert(Relation index, Datum *values, bool *isnull,
|
|
|
|
|
ItemPointer ht_ctid, Relation heapRel,
|
Allow index AMs to cache data across aminsert calls within a SQL command.
It's always been possible for index AMs to cache data across successive
amgettuple calls within a single SQL command: the IndexScanDesc.opaque
field is meant for precisely that. However, no comparable facility
exists for amortizing setup work across successive aminsert calls.
This patch adds such a feature and teaches GIN, GIST, and BRIN to use it
to amortize catalog lookups they'd previously been doing on every call.
(The other standard index AMs keep everything they need in the relcache,
so there's little to improve there.)
For GIN, the overall improvement in a statement that inserts many rows
can be as much as 10%, though it seems a bit less for the other two.
In addition, this makes a really significant difference in runtime
for CLOBBER_CACHE_ALWAYS tests, since in those builds the repeated
catalog lookups are vastly more expensive.
The reason this has been hard up to now is that the aminsert function is
not passed any useful place to cache per-statement data. What I chose to
do is to add suitable fields to struct IndexInfo and pass that to aminsert.
That's not widening the index AM API very much because IndexInfo is already
within the ken of ambuild; in fact, by passing the same info to aminsert
as to ambuild, this is really removing an inconsistency in the AM API.
Discussion: https://postgr.es/m/27568.1486508680@sss.pgh.pa.us
9 years ago
|
|
|
IndexUniqueCheck checkUnique,
|
|
|
|
|
bool indexUnchanged,
|
Allow index AMs to cache data across aminsert calls within a SQL command.
It's always been possible for index AMs to cache data across successive
amgettuple calls within a single SQL command: the IndexScanDesc.opaque
field is meant for precisely that. However, no comparable facility
exists for amortizing setup work across successive aminsert calls.
This patch adds such a feature and teaches GIN, GIST, and BRIN to use it
to amortize catalog lookups they'd previously been doing on every call.
(The other standard index AMs keep everything they need in the relcache,
so there's little to improve there.)
For GIN, the overall improvement in a statement that inserts many rows
can be as much as 10%, though it seems a bit less for the other two.
In addition, this makes a really significant difference in runtime
for CLOBBER_CACHE_ALWAYS tests, since in those builds the repeated
catalog lookups are vastly more expensive.
The reason this has been hard up to now is that the aminsert function is
not passed any useful place to cache per-statement data. What I chose to
do is to add suitable fields to struct IndexInfo and pass that to aminsert.
That's not widening the index AM API very much because IndexInfo is already
within the ken of ambuild; in fact, by passing the same info to aminsert
as to ambuild, this is really removing an inconsistency in the AM API.
Discussion: https://postgr.es/m/27568.1486508680@sss.pgh.pa.us
9 years ago
|
|
|
IndexInfo *indexInfo)
|
|
|
|
|
{
|
Allow index AMs to cache data across aminsert calls within a SQL command.
It's always been possible for index AMs to cache data across successive
amgettuple calls within a single SQL command: the IndexScanDesc.opaque
field is meant for precisely that. However, no comparable facility
exists for amortizing setup work across successive aminsert calls.
This patch adds such a feature and teaches GIN, GIST, and BRIN to use it
to amortize catalog lookups they'd previously been doing on every call.
(The other standard index AMs keep everything they need in the relcache,
so there's little to improve there.)
For GIN, the overall improvement in a statement that inserts many rows
can be as much as 10%, though it seems a bit less for the other two.
In addition, this makes a really significant difference in runtime
for CLOBBER_CACHE_ALWAYS tests, since in those builds the repeated
catalog lookups are vastly more expensive.
The reason this has been hard up to now is that the aminsert function is
not passed any useful place to cache per-statement data. What I chose to
do is to add suitable fields to struct IndexInfo and pass that to aminsert.
That's not widening the index AM API very much because IndexInfo is already
within the ken of ambuild; in fact, by passing the same info to aminsert
as to ambuild, this is really removing an inconsistency in the AM API.
Discussion: https://postgr.es/m/27568.1486508680@sss.pgh.pa.us
9 years ago
|
|
|
GinState *ginstate = (GinState *) indexInfo->ii_AmCache;
|
|
|
|
|
MemoryContext oldCtx;
|
|
|
|
|
MemoryContext insertCtx;
|
|
|
|
|
int i;
|
|
|
|
|
|
Allow index AMs to cache data across aminsert calls within a SQL command.
It's always been possible for index AMs to cache data across successive
amgettuple calls within a single SQL command: the IndexScanDesc.opaque
field is meant for precisely that. However, no comparable facility
exists for amortizing setup work across successive aminsert calls.
This patch adds such a feature and teaches GIN, GIST, and BRIN to use it
to amortize catalog lookups they'd previously been doing on every call.
(The other standard index AMs keep everything they need in the relcache,
so there's little to improve there.)
For GIN, the overall improvement in a statement that inserts many rows
can be as much as 10%, though it seems a bit less for the other two.
In addition, this makes a really significant difference in runtime
for CLOBBER_CACHE_ALWAYS tests, since in those builds the repeated
catalog lookups are vastly more expensive.
The reason this has been hard up to now is that the aminsert function is
not passed any useful place to cache per-statement data. What I chose to
do is to add suitable fields to struct IndexInfo and pass that to aminsert.
That's not widening the index AM API very much because IndexInfo is already
within the ken of ambuild; in fact, by passing the same info to aminsert
as to ambuild, this is really removing an inconsistency in the AM API.
Discussion: https://postgr.es/m/27568.1486508680@sss.pgh.pa.us
9 years ago
|
|
|
/* Initialize GinState cache if first call in this statement */
|
|
|
|
|
if (ginstate == NULL)
|
|
|
|
|
{
|
|
|
|
|
oldCtx = MemoryContextSwitchTo(indexInfo->ii_Context);
|
|
|
|
|
ginstate = (GinState *) palloc(sizeof(GinState));
|
|
|
|
|
initGinState(ginstate, index);
|
|
|
|
|
indexInfo->ii_AmCache = ginstate;
|
Allow index AMs to cache data across aminsert calls within a SQL command.
It's always been possible for index AMs to cache data across successive
amgettuple calls within a single SQL command: the IndexScanDesc.opaque
field is meant for precisely that. However, no comparable facility
exists for amortizing setup work across successive aminsert calls.
This patch adds such a feature and teaches GIN, GIST, and BRIN to use it
to amortize catalog lookups they'd previously been doing on every call.
(The other standard index AMs keep everything they need in the relcache,
so there's little to improve there.)
For GIN, the overall improvement in a statement that inserts many rows
can be as much as 10%, though it seems a bit less for the other two.
In addition, this makes a really significant difference in runtime
for CLOBBER_CACHE_ALWAYS tests, since in those builds the repeated
catalog lookups are vastly more expensive.
The reason this has been hard up to now is that the aminsert function is
not passed any useful place to cache per-statement data. What I chose to
do is to add suitable fields to struct IndexInfo and pass that to aminsert.
That's not widening the index AM API very much because IndexInfo is already
within the ken of ambuild; in fact, by passing the same info to aminsert
as to ambuild, this is really removing an inconsistency in the AM API.
Discussion: https://postgr.es/m/27568.1486508680@sss.pgh.pa.us
9 years ago
|
|
|
MemoryContextSwitchTo(oldCtx);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
insertCtx = AllocSetContextCreate(CurrentMemoryContext,
|
|
|
|
|
"Gin insert temporary context",
|
Add macros to make AllocSetContextCreate() calls simpler and safer.
I found that half a dozen (nearly 5%) of our AllocSetContextCreate calls
had typos in the context-sizing parameters. While none of these led to
especially significant problems, they did create minor inefficiencies,
and it's now clear that expecting people to copy-and-paste those calls
accurately is not a great idea. Let's reduce the risk of future errors
by introducing single macros that encapsulate the common use-cases.
Three such macros are enough to cover all but two special-purpose contexts;
those two calls can be left as-is, I think.
While this patch doesn't in itself improve matters for third-party
extensions, it doesn't break anything for them either, and they can
gradually adopt the simplified notation over time.
In passing, change TopMemoryContext to use the default allocation
parameters. Formerly it could only be extended 8K at a time. That was
probably reasonable when this code was written; but nowadays we create
many more contexts than we did then, so that it's not unusual to have a
couple hundred K in TopMemoryContext, even without considering various
dubious code that sticks other things there. There seems no good reason
not to let it use growing blocks like most other contexts.
Back-patch to 9.6, mostly because that's still close enough to HEAD that
it's easy to do so, and keeping the branches in sync can be expected to
avoid some future back-patching pain. The bugs fixed by these changes
don't seem to be significant enough to justify fixing them further back.
Discussion: <21072.1472321324@sss.pgh.pa.us>
9 years ago
|
|
|
ALLOCSET_DEFAULT_SIZES);
|
|
|
|
|
|
|
|
|
|
oldCtx = MemoryContextSwitchTo(insertCtx);
|
|
|
|
|
|
|
|
|
|
if (GinGetUseFastUpdate(index))
|
|
|
|
|
{
|
|
|
|
|
GinTupleCollector collector;
|
|
|
|
|
|
|
|
|
|
memset(&collector, 0, sizeof(GinTupleCollector));
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
15 years ago
|
|
|
|
Allow index AMs to cache data across aminsert calls within a SQL command.
It's always been possible for index AMs to cache data across successive
amgettuple calls within a single SQL command: the IndexScanDesc.opaque
field is meant for precisely that. However, no comparable facility
exists for amortizing setup work across successive aminsert calls.
This patch adds such a feature and teaches GIN, GIST, and BRIN to use it
to amortize catalog lookups they'd previously been doing on every call.
(The other standard index AMs keep everything they need in the relcache,
so there's little to improve there.)
For GIN, the overall improvement in a statement that inserts many rows
can be as much as 10%, though it seems a bit less for the other two.
In addition, this makes a really significant difference in runtime
for CLOBBER_CACHE_ALWAYS tests, since in those builds the repeated
catalog lookups are vastly more expensive.
The reason this has been hard up to now is that the aminsert function is
not passed any useful place to cache per-statement data. What I chose to
do is to add suitable fields to struct IndexInfo and pass that to aminsert.
That's not widening the index AM API very much because IndexInfo is already
within the ken of ambuild; in fact, by passing the same info to aminsert
as to ambuild, this is really removing an inconsistency in the AM API.
Discussion: https://postgr.es/m/27568.1486508680@sss.pgh.pa.us
9 years ago
|
|
|
for (i = 0; i < ginstate->origTupdesc->natts; i++)
|
|
|
|
|
ginHeapTupleFastCollect(ginstate, &collector,
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
15 years ago
|
|
|
(OffsetNumber) (i + 1),
|
|
|
|
|
values[i], isnull[i],
|
|
|
|
|
ht_ctid);
|
|
|
|
|
|
Allow index AMs to cache data across aminsert calls within a SQL command.
It's always been possible for index AMs to cache data across successive
amgettuple calls within a single SQL command: the IndexScanDesc.opaque
field is meant for precisely that. However, no comparable facility
exists for amortizing setup work across successive aminsert calls.
This patch adds such a feature and teaches GIN, GIST, and BRIN to use it
to amortize catalog lookups they'd previously been doing on every call.
(The other standard index AMs keep everything they need in the relcache,
so there's little to improve there.)
For GIN, the overall improvement in a statement that inserts many rows
can be as much as 10%, though it seems a bit less for the other two.
In addition, this makes a really significant difference in runtime
for CLOBBER_CACHE_ALWAYS tests, since in those builds the repeated
catalog lookups are vastly more expensive.
The reason this has been hard up to now is that the aminsert function is
not passed any useful place to cache per-statement data. What I chose to
do is to add suitable fields to struct IndexInfo and pass that to aminsert.
That's not widening the index AM API very much because IndexInfo is already
within the ken of ambuild; in fact, by passing the same info to aminsert
as to ambuild, this is really removing an inconsistency in the AM API.
Discussion: https://postgr.es/m/27568.1486508680@sss.pgh.pa.us
9 years ago
|
|
|
ginHeapTupleFastInsert(ginstate, &collector);
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
Allow index AMs to cache data across aminsert calls within a SQL command.
It's always been possible for index AMs to cache data across successive
amgettuple calls within a single SQL command: the IndexScanDesc.opaque
field is meant for precisely that. However, no comparable facility
exists for amortizing setup work across successive aminsert calls.
This patch adds such a feature and teaches GIN, GIST, and BRIN to use it
to amortize catalog lookups they'd previously been doing on every call.
(The other standard index AMs keep everything they need in the relcache,
so there's little to improve there.)
For GIN, the overall improvement in a statement that inserts many rows
can be as much as 10%, though it seems a bit less for the other two.
In addition, this makes a really significant difference in runtime
for CLOBBER_CACHE_ALWAYS tests, since in those builds the repeated
catalog lookups are vastly more expensive.
The reason this has been hard up to now is that the aminsert function is
not passed any useful place to cache per-statement data. What I chose to
do is to add suitable fields to struct IndexInfo and pass that to aminsert.
That's not widening the index AM API very much because IndexInfo is already
within the ken of ambuild; in fact, by passing the same info to aminsert
as to ambuild, this is really removing an inconsistency in the AM API.
Discussion: https://postgr.es/m/27568.1486508680@sss.pgh.pa.us
9 years ago
|
|
|
for (i = 0; i < ginstate->origTupdesc->natts; i++)
|
|
|
|
|
ginHeapTupleInsert(ginstate, (OffsetNumber) (i + 1),
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
15 years ago
|
|
|
values[i], isnull[i],
|
|
|
|
|
ht_ctid);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
MemoryContextSwitchTo(oldCtx);
|
|
|
|
|
MemoryContextDelete(insertCtx);
|
|
|
|
|
|
Restructure index access method API to hide most of it at the C level.
This patch reduces pg_am to just two columns, a name and a handler
function. All the data formerly obtained from pg_am is now provided
in a C struct returned by the handler function. This is similar to
the designs we've adopted for FDWs and tablesample methods. There
are multiple advantages. For one, the index AM's support functions
are now simple C functions, making them faster to call and much less
error-prone, since the C compiler can now check function signatures.
For another, this will make it far more practical to define index access
methods in installable extensions.
A disadvantage is that SQL-level code can no longer see attributes
of index AMs; in particular, some of the crosschecks in the opr_sanity
regression test are no longer possible from SQL. We've addressed that
by adding a facility for the index AM to perform such checks instead.
(Much more could be done in that line, but for now we're content if the
amvalidate functions more or less replace what opr_sanity used to do.)
We might also want to expose some sort of reporting functionality, but
this patch doesn't do that.
Alexander Korotkov, reviewed by Petr Jelínek, and rather heavily
editorialized on by me.
10 years ago
|
|
|
return false;
|
|
|
|
|
}
|
Allow parallel CREATE INDEX for GIN indexes
Allow using parallel workers to build a GIN index, similarly to BTREE
and BRIN. For large tables this may result in significant speedup when
the build is CPU-bound.
The work is divided so that each worker builds index entries on a subset
of the table, determined by the regular parallel scan used to read the
data. Each worker uses a local tuplesort to sort and merge the entries
for the same key. The TID lists do not overlap (for a given key), which
means the merge sort simply concatenates the two lists. The merged
entries are written into a shared tuplesort for the leader.
The leader needs to merge the sorted entries again, before writing them
into the index. But this way a significant part of the work happens in
the workers, and the leader is left with merging fewer large entries,
which is more efficient.
Most of the parallelism infrastructure is a simplified copy of the code
used by BTREE indexes, omitting the parts irrelevant for GIN indexes
(e.g. uniqueness checks).
Original patch by me, with reviews and substantial improvements by
Matthias van de Meent, certainly enough to make him a co-author.
Author: Tomas Vondra, Matthias van de Meent
Reviewed-by: Matthias van de Meent, Andy Fan, Kirill Reshke
Discussion: https://postgr.es/m/6ab4003f-a8b8-4d75-a67f-f25ad98582dc%40enterprisedb.com
10 months ago
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Create parallel context, and launch workers for leader.
|
|
|
|
|
*
|
|
|
|
|
* buildstate argument should be initialized (with the exception of the
|
|
|
|
|
* tuplesort states, which may later be created based on shared
|
|
|
|
|
* state initially set up here).
|
|
|
|
|
*
|
|
|
|
|
* isconcurrent indicates if operation is CREATE INDEX CONCURRENTLY.
|
|
|
|
|
*
|
|
|
|
|
* request is the target number of parallel worker processes to launch.
|
|
|
|
|
*
|
|
|
|
|
* Sets buildstate's GinLeader, which caller must use to shut down parallel
|
|
|
|
|
* mode by passing it to _gin_end_parallel() at the very end of its index
|
|
|
|
|
* build. If not even a single worker process can be launched, this is
|
|
|
|
|
* never set, and caller should proceed with a serial index build.
|
|
|
|
|
*/
|
|
|
|
|
static void
|
|
|
|
|
_gin_begin_parallel(GinBuildState *buildstate, Relation heap, Relation index,
|
|
|
|
|
bool isconcurrent, int request)
|
|
|
|
|
{
|
|
|
|
|
ParallelContext *pcxt;
|
|
|
|
|
int scantuplesortstates;
|
|
|
|
|
Snapshot snapshot;
|
|
|
|
|
Size estginshared;
|
|
|
|
|
Size estsort;
|
|
|
|
|
GinBuildShared *ginshared;
|
|
|
|
|
Sharedsort *sharedsort;
|
|
|
|
|
GinLeader *ginleader = (GinLeader *) palloc0(sizeof(GinLeader));
|
|
|
|
|
WalUsage *walusage;
|
|
|
|
|
BufferUsage *bufferusage;
|
|
|
|
|
bool leaderparticipates = true;
|
|
|
|
|
int querylen;
|
|
|
|
|
|
|
|
|
|
#ifdef DISABLE_LEADER_PARTICIPATION
|
|
|
|
|
leaderparticipates = false;
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Enter parallel mode, and create context for parallel build of gin index
|
|
|
|
|
*/
|
|
|
|
|
EnterParallelMode();
|
|
|
|
|
Assert(request > 0);
|
|
|
|
|
pcxt = CreateParallelContext("postgres", "_gin_parallel_build_main",
|
|
|
|
|
request);
|
|
|
|
|
|
|
|
|
|
scantuplesortstates = leaderparticipates ? request + 1 : request;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Prepare for scan of the base relation. In a normal index build, we use
|
|
|
|
|
* SnapshotAny because we must retrieve all tuples and do our own time
|
|
|
|
|
* qual checks (because we have to index RECENTLY_DEAD tuples). In a
|
|
|
|
|
* concurrent build, we take a regular MVCC snapshot and index whatever's
|
|
|
|
|
* live according to that.
|
|
|
|
|
*/
|
|
|
|
|
if (!isconcurrent)
|
|
|
|
|
snapshot = SnapshotAny;
|
|
|
|
|
else
|
|
|
|
|
snapshot = RegisterSnapshot(GetTransactionSnapshot());
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Estimate size for our own PARALLEL_KEY_GIN_SHARED workspace.
|
|
|
|
|
*/
|
|
|
|
|
estginshared = _gin_parallel_estimate_shared(heap, snapshot);
|
|
|
|
|
shm_toc_estimate_chunk(&pcxt->estimator, estginshared);
|
|
|
|
|
estsort = tuplesort_estimate_shared(scantuplesortstates);
|
|
|
|
|
shm_toc_estimate_chunk(&pcxt->estimator, estsort);
|
|
|
|
|
|
|
|
|
|
shm_toc_estimate_keys(&pcxt->estimator, 2);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Estimate space for WalUsage and BufferUsage -- PARALLEL_KEY_WAL_USAGE
|
|
|
|
|
* and PARALLEL_KEY_BUFFER_USAGE.
|
|
|
|
|
*
|
|
|
|
|
* If there are no extensions loaded that care, we could skip this. We
|
|
|
|
|
* have no way of knowing whether anyone's looking at pgWalUsage or
|
|
|
|
|
* pgBufferUsage, so do it unconditionally.
|
|
|
|
|
*/
|
|
|
|
|
shm_toc_estimate_chunk(&pcxt->estimator,
|
|
|
|
|
mul_size(sizeof(WalUsage), pcxt->nworkers));
|
|
|
|
|
shm_toc_estimate_keys(&pcxt->estimator, 1);
|
|
|
|
|
shm_toc_estimate_chunk(&pcxt->estimator,
|
|
|
|
|
mul_size(sizeof(BufferUsage), pcxt->nworkers));
|
|
|
|
|
shm_toc_estimate_keys(&pcxt->estimator, 1);
|
|
|
|
|
|
|
|
|
|
/* Finally, estimate PARALLEL_KEY_QUERY_TEXT space */
|
|
|
|
|
if (debug_query_string)
|
|
|
|
|
{
|
|
|
|
|
querylen = strlen(debug_query_string);
|
|
|
|
|
shm_toc_estimate_chunk(&pcxt->estimator, querylen + 1);
|
|
|
|
|
shm_toc_estimate_keys(&pcxt->estimator, 1);
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
querylen = 0; /* keep compiler quiet */
|
|
|
|
|
|
|
|
|
|
/* Everyone's had a chance to ask for space, so now create the DSM */
|
|
|
|
|
InitializeParallelDSM(pcxt);
|
|
|
|
|
|
|
|
|
|
/* If no DSM segment was available, back out (do serial build) */
|
|
|
|
|
if (pcxt->seg == NULL)
|
|
|
|
|
{
|
|
|
|
|
if (IsMVCCSnapshot(snapshot))
|
|
|
|
|
UnregisterSnapshot(snapshot);
|
|
|
|
|
DestroyParallelContext(pcxt);
|
|
|
|
|
ExitParallelMode();
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Store shared build state, for which we reserved space */
|
|
|
|
|
ginshared = (GinBuildShared *) shm_toc_allocate(pcxt->toc, estginshared);
|
|
|
|
|
/* Initialize immutable state */
|
|
|
|
|
ginshared->heaprelid = RelationGetRelid(heap);
|
|
|
|
|
ginshared->indexrelid = RelationGetRelid(index);
|
|
|
|
|
ginshared->isconcurrent = isconcurrent;
|
|
|
|
|
ginshared->scantuplesortstates = scantuplesortstates;
|
|
|
|
|
|
|
|
|
|
ConditionVariableInit(&ginshared->workersdonecv);
|
|
|
|
|
SpinLockInit(&ginshared->mutex);
|
|
|
|
|
|
|
|
|
|
/* Initialize mutable state */
|
|
|
|
|
ginshared->nparticipantsdone = 0;
|
|
|
|
|
ginshared->reltuples = 0.0;
|
|
|
|
|
ginshared->indtuples = 0.0;
|
|
|
|
|
|
|
|
|
|
table_parallelscan_initialize(heap,
|
|
|
|
|
ParallelTableScanFromGinBuildShared(ginshared),
|
|
|
|
|
snapshot);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Store shared tuplesort-private state, for which we reserved space.
|
|
|
|
|
* Then, initialize opaque state using tuplesort routine.
|
|
|
|
|
*/
|
|
|
|
|
sharedsort = (Sharedsort *) shm_toc_allocate(pcxt->toc, estsort);
|
|
|
|
|
tuplesort_initialize_shared(sharedsort, scantuplesortstates,
|
|
|
|
|
pcxt->seg);
|
|
|
|
|
|
|
|
|
|
shm_toc_insert(pcxt->toc, PARALLEL_KEY_GIN_SHARED, ginshared);
|
|
|
|
|
shm_toc_insert(pcxt->toc, PARALLEL_KEY_TUPLESORT, sharedsort);
|
|
|
|
|
|
|
|
|
|
/* Store query string for workers */
|
|
|
|
|
if (debug_query_string)
|
|
|
|
|
{
|
|
|
|
|
char *sharedquery;
|
|
|
|
|
|
|
|
|
|
sharedquery = (char *) shm_toc_allocate(pcxt->toc, querylen + 1);
|
|
|
|
|
memcpy(sharedquery, debug_query_string, querylen + 1);
|
|
|
|
|
shm_toc_insert(pcxt->toc, PARALLEL_KEY_QUERY_TEXT, sharedquery);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Allocate space for each worker's WalUsage and BufferUsage; no need to
|
|
|
|
|
* initialize.
|
|
|
|
|
*/
|
|
|
|
|
walusage = shm_toc_allocate(pcxt->toc,
|
|
|
|
|
mul_size(sizeof(WalUsage), pcxt->nworkers));
|
|
|
|
|
shm_toc_insert(pcxt->toc, PARALLEL_KEY_WAL_USAGE, walusage);
|
|
|
|
|
bufferusage = shm_toc_allocate(pcxt->toc,
|
|
|
|
|
mul_size(sizeof(BufferUsage), pcxt->nworkers));
|
|
|
|
|
shm_toc_insert(pcxt->toc, PARALLEL_KEY_BUFFER_USAGE, bufferusage);
|
|
|
|
|
|
|
|
|
|
/* Launch workers, saving status for leader/caller */
|
|
|
|
|
LaunchParallelWorkers(pcxt);
|
|
|
|
|
ginleader->pcxt = pcxt;
|
|
|
|
|
ginleader->nparticipanttuplesorts = pcxt->nworkers_launched;
|
|
|
|
|
if (leaderparticipates)
|
|
|
|
|
ginleader->nparticipanttuplesorts++;
|
|
|
|
|
ginleader->ginshared = ginshared;
|
|
|
|
|
ginleader->sharedsort = sharedsort;
|
|
|
|
|
ginleader->snapshot = snapshot;
|
|
|
|
|
ginleader->walusage = walusage;
|
|
|
|
|
ginleader->bufferusage = bufferusage;
|
|
|
|
|
|
|
|
|
|
/* If no workers were successfully launched, back out (do serial build) */
|
|
|
|
|
if (pcxt->nworkers_launched == 0)
|
|
|
|
|
{
|
|
|
|
|
_gin_end_parallel(ginleader, NULL);
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Save leader state now that it's clear build will be parallel */
|
|
|
|
|
buildstate->bs_leader = ginleader;
|
|
|
|
|
|
|
|
|
|
/* Join heap scan ourselves */
|
|
|
|
|
if (leaderparticipates)
|
|
|
|
|
_gin_leader_participate_as_worker(buildstate, heap, index);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Caller needs to wait for all launched workers when we return. Make
|
|
|
|
|
* sure that the failure-to-start case will not hang forever.
|
|
|
|
|
*/
|
|
|
|
|
WaitForParallelWorkersToAttach(pcxt);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Shut down workers, destroy parallel context, and end parallel mode.
|
|
|
|
|
*/
|
|
|
|
|
static void
|
|
|
|
|
_gin_end_parallel(GinLeader *ginleader, GinBuildState *state)
|
|
|
|
|
{
|
|
|
|
|
int i;
|
|
|
|
|
|
|
|
|
|
/* Shutdown worker processes */
|
|
|
|
|
WaitForParallelWorkersToFinish(ginleader->pcxt);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Next, accumulate WAL usage. (This must wait for the workers to finish,
|
|
|
|
|
* or we might get incomplete data.)
|
|
|
|
|
*/
|
|
|
|
|
for (i = 0; i < ginleader->pcxt->nworkers_launched; i++)
|
|
|
|
|
InstrAccumParallelQuery(&ginleader->bufferusage[i], &ginleader->walusage[i]);
|
|
|
|
|
|
|
|
|
|
/* Free last reference to MVCC snapshot, if one was used */
|
|
|
|
|
if (IsMVCCSnapshot(ginleader->snapshot))
|
|
|
|
|
UnregisterSnapshot(ginleader->snapshot);
|
|
|
|
|
DestroyParallelContext(ginleader->pcxt);
|
|
|
|
|
ExitParallelMode();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Within leader, wait for end of heap scan.
|
|
|
|
|
*
|
|
|
|
|
* When called, parallel heap scan started by _gin_begin_parallel() will
|
|
|
|
|
* already be underway within worker processes (when leader participates
|
|
|
|
|
* as a worker, we should end up here just as workers are finishing).
|
|
|
|
|
*
|
|
|
|
|
* Returns the total number of heap tuples scanned.
|
|
|
|
|
*/
|
|
|
|
|
static double
|
|
|
|
|
_gin_parallel_heapscan(GinBuildState *state)
|
|
|
|
|
{
|
|
|
|
|
GinBuildShared *ginshared = state->bs_leader->ginshared;
|
|
|
|
|
int nparticipanttuplesorts;
|
|
|
|
|
|
|
|
|
|
nparticipanttuplesorts = state->bs_leader->nparticipanttuplesorts;
|
|
|
|
|
for (;;)
|
|
|
|
|
{
|
|
|
|
|
SpinLockAcquire(&ginshared->mutex);
|
|
|
|
|
if (ginshared->nparticipantsdone == nparticipanttuplesorts)
|
|
|
|
|
{
|
|
|
|
|
/* copy the data into leader state */
|
|
|
|
|
state->bs_reltuples = ginshared->reltuples;
|
|
|
|
|
state->bs_numtuples = ginshared->indtuples;
|
|
|
|
|
|
|
|
|
|
SpinLockRelease(&ginshared->mutex);
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
SpinLockRelease(&ginshared->mutex);
|
|
|
|
|
|
|
|
|
|
ConditionVariableSleep(&ginshared->workersdonecv,
|
|
|
|
|
WAIT_EVENT_PARALLEL_CREATE_INDEX_SCAN);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
ConditionVariableCancelSleep();
|
|
|
|
|
|
|
|
|
|
return state->bs_reltuples;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Buffer used to accumulate TIDs from multiple GinTuples for the same key
|
|
|
|
|
* (we read these from the tuplesort, sorted by the key).
|
|
|
|
|
*
|
|
|
|
|
* This is similar to BuildAccumulator in that it's used to collect TIDs
|
|
|
|
|
* in memory before inserting them into the index, but it's much simpler
|
|
|
|
|
* as it only deals with a single index key at a time.
|
|
|
|
|
*
|
|
|
|
|
* When adding TIDs to the buffer, we make sure to keep them sorted, both
|
|
|
|
|
* during the initial table scan (and detecting when the scan wraps around),
|
|
|
|
|
* and during merging (where we do mergesort).
|
|
|
|
|
*/
|
|
|
|
|
typedef struct GinBuffer
|
|
|
|
|
{
|
|
|
|
|
OffsetNumber attnum;
|
|
|
|
|
GinNullCategory category;
|
|
|
|
|
Datum key; /* 0 if no key (and keylen == 0) */
|
|
|
|
|
Size keylen; /* number of bytes (not typlen) */
|
|
|
|
|
|
|
|
|
|
/* type info */
|
|
|
|
|
int16 typlen;
|
|
|
|
|
bool typbyval;
|
|
|
|
|
|
|
|
|
|
/* array of TID values */
|
|
|
|
|
int nitems;
|
|
|
|
|
SortSupport ssup; /* for sorting/comparing keys */
|
|
|
|
|
ItemPointerData *items;
|
|
|
|
|
} GinBuffer;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Check that TID array contains valid values, and that it's sorted (if we
|
|
|
|
|
* expect it to be).
|
|
|
|
|
*/
|
|
|
|
|
static void
|
|
|
|
|
AssertCheckItemPointers(GinBuffer *buffer)
|
|
|
|
|
{
|
|
|
|
|
#ifdef USE_ASSERT_CHECKING
|
|
|
|
|
/* we should not have a buffer with no TIDs to sort */
|
|
|
|
|
Assert(buffer->items != NULL);
|
|
|
|
|
Assert(buffer->nitems > 0);
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < buffer->nitems; i++)
|
|
|
|
|
{
|
|
|
|
|
Assert(ItemPointerIsValid(&buffer->items[i]));
|
|
|
|
|
|
|
|
|
|
/* don't check ordering for the first TID item */
|
|
|
|
|
if (i == 0)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
Assert(ItemPointerCompare(&buffer->items[i - 1], &buffer->items[i]) < 0);
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* GinBuffer checks
|
|
|
|
|
*
|
|
|
|
|
* Make sure the nitems/items fields are consistent (either the array is empty
|
|
|
|
|
* or not empty, the fields need to agree). If there are items, check ordering.
|
|
|
|
|
*/
|
|
|
|
|
static void
|
|
|
|
|
AssertCheckGinBuffer(GinBuffer *buffer)
|
|
|
|
|
{
|
|
|
|
|
#ifdef USE_ASSERT_CHECKING
|
|
|
|
|
/* if we have any items, the array must exist */
|
|
|
|
|
Assert(!((buffer->nitems > 0) && (buffer->items == NULL)));
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* The buffer may be empty, in which case we must not call the check of
|
|
|
|
|
* item pointers, because that assumes non-emptiness.
|
|
|
|
|
*/
|
|
|
|
|
if (buffer->nitems == 0)
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
/* Make sure the item pointers are valid and sorted. */
|
|
|
|
|
AssertCheckItemPointers(buffer);
|
|
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* GinBufferInit
|
|
|
|
|
* Initialize buffer to store tuples for a GIN index.
|
|
|
|
|
*
|
|
|
|
|
* Initialize the buffer used to accumulate TID for a single key at a time
|
|
|
|
|
* (we process the data sorted), so we know when we received all data for
|
|
|
|
|
* a given key.
|
|
|
|
|
*
|
|
|
|
|
* Initializes sort support procedures for all index attributes.
|
|
|
|
|
*/
|
|
|
|
|
static GinBuffer *
|
|
|
|
|
GinBufferInit(Relation index)
|
|
|
|
|
{
|
|
|
|
|
GinBuffer *buffer = palloc0(sizeof(GinBuffer));
|
|
|
|
|
int i,
|
|
|
|
|
nKeys;
|
|
|
|
|
TupleDesc desc = RelationGetDescr(index);
|
|
|
|
|
|
|
|
|
|
nKeys = IndexRelationGetNumberOfKeyAttributes(index);
|
|
|
|
|
|
|
|
|
|
buffer->ssup = palloc0(sizeof(SortSupportData) * nKeys);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Lookup ordering operator for the index key data type, and initialize
|
|
|
|
|
* the sort support function.
|
|
|
|
|
*/
|
|
|
|
|
for (i = 0; i < nKeys; i++)
|
|
|
|
|
{
|
|
|
|
|
Oid cmpFunc;
|
|
|
|
|
SortSupport sortKey = &buffer->ssup[i];
|
|
|
|
|
Form_pg_attribute att = TupleDescAttr(desc, i);
|
|
|
|
|
|
|
|
|
|
sortKey->ssup_cxt = CurrentMemoryContext;
|
|
|
|
|
sortKey->ssup_collation = index->rd_indcollation[i];
|
|
|
|
|
|
|
|
|
|
if (!OidIsValid(sortKey->ssup_collation))
|
|
|
|
|
sortKey->ssup_collation = DEFAULT_COLLATION_OID;
|
|
|
|
|
|
|
|
|
|
sortKey->ssup_nulls_first = false;
|
|
|
|
|
sortKey->ssup_attno = i + 1;
|
|
|
|
|
sortKey->abbreviate = false;
|
|
|
|
|
|
|
|
|
|
Assert(sortKey->ssup_attno != 0);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* If the compare proc isn't specified in the opclass definition, look
|
|
|
|
|
* up the index key type's default btree comparator.
|
|
|
|
|
*/
|
|
|
|
|
cmpFunc = index_getprocid(index, i + 1, GIN_COMPARE_PROC);
|
|
|
|
|
if (cmpFunc == InvalidOid)
|
|
|
|
|
{
|
|
|
|
|
TypeCacheEntry *typentry;
|
|
|
|
|
|
|
|
|
|
typentry = lookup_type_cache(att->atttypid,
|
|
|
|
|
TYPECACHE_CMP_PROC_FINFO);
|
|
|
|
|
if (!OidIsValid(typentry->cmp_proc_finfo.fn_oid))
|
|
|
|
|
ereport(ERROR,
|
|
|
|
|
(errcode(ERRCODE_UNDEFINED_FUNCTION),
|
|
|
|
|
errmsg("could not identify a comparison function for type %s",
|
|
|
|
|
format_type_be(att->atttypid))));
|
|
|
|
|
|
|
|
|
|
cmpFunc = typentry->cmp_proc_finfo.fn_oid;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
PrepareSortSupportComparisonShim(cmpFunc, sortKey);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return buffer;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Is the buffer empty, i.e. has no TID values in the array? */
|
|
|
|
|
static bool
|
|
|
|
|
GinBufferIsEmpty(GinBuffer *buffer)
|
|
|
|
|
{
|
|
|
|
|
return (buffer->nitems == 0);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* GinBufferKeyEquals
|
|
|
|
|
* Can the buffer store TIDs for the provided GIN tuple (same key)?
|
|
|
|
|
*
|
|
|
|
|
* Compare if the tuple matches the already accumulated data in the GIN
|
|
|
|
|
* buffer. Compare scalar fields first, before the actual key.
|
|
|
|
|
*
|
|
|
|
|
* Returns true if the key matches, and the TID belonds to the buffer, or
|
|
|
|
|
* false if the key does not match.
|
|
|
|
|
*/
|
|
|
|
|
static bool
|
|
|
|
|
GinBufferKeyEquals(GinBuffer *buffer, GinTuple *tup)
|
|
|
|
|
{
|
|
|
|
|
int r;
|
|
|
|
|
Datum tupkey;
|
|
|
|
|
|
|
|
|
|
AssertCheckGinBuffer(buffer);
|
|
|
|
|
|
|
|
|
|
if (tup->attrnum != buffer->attnum)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
/* same attribute should have the same type info */
|
|
|
|
|
Assert(tup->typbyval == buffer->typbyval);
|
|
|
|
|
Assert(tup->typlen == buffer->typlen);
|
|
|
|
|
|
|
|
|
|
if (tup->category != buffer->category)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* For NULL/empty keys, this means equality, for normal keys we need to
|
|
|
|
|
* compare the actual key value.
|
|
|
|
|
*/
|
|
|
|
|
if (buffer->category != GIN_CAT_NORM_KEY)
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* For the tuple, get either the first sizeof(Datum) bytes for byval
|
|
|
|
|
* types, or a pointer to the beginning of the data array.
|
|
|
|
|
*/
|
|
|
|
|
tupkey = (buffer->typbyval) ? *(Datum *) tup->data : PointerGetDatum(tup->data);
|
|
|
|
|
|
|
|
|
|
r = ApplySortComparator(buffer->key, false,
|
|
|
|
|
tupkey, false,
|
|
|
|
|
&buffer->ssup[buffer->attnum - 1]);
|
|
|
|
|
|
|
|
|
|
return (r == 0);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* GinBufferStoreTuple
|
|
|
|
|
* Add data (especially TID list) from a GIN tuple to the buffer.
|
|
|
|
|
*
|
|
|
|
|
* The buffer is expected to be empty (in which case it's initialized), or
|
|
|
|
|
* having the same key. The TID values from the tuple are combined with the
|
|
|
|
|
* stored values using a merge sort.
|
|
|
|
|
*
|
|
|
|
|
* The tuples (for the same key) are expected to be sorted by first TID. But
|
|
|
|
|
* this does not guarantee the lists do not overlap, especially in the leader,
|
|
|
|
|
* because the workers process interleaving data. There should be no overlaps
|
|
|
|
|
* in a single worker - it could happen when the parallel scan wraps around,
|
|
|
|
|
* but we detect that and flush the data (see ginBuildCallbackParallel).
|
|
|
|
|
*
|
|
|
|
|
* By sorting the GinTuple not only by key, but also by the first TID, we make
|
|
|
|
|
* it more less likely the lists will overlap during merge. We merge them using
|
|
|
|
|
* mergesort, but it's cheaper to just append one list to the other.
|
|
|
|
|
*
|
|
|
|
|
* How often can the lists overlap? There should be no overlaps in workers,
|
|
|
|
|
* and in the leader we can see overlaps between lists built by different
|
|
|
|
|
* workers. But the workers merge the items as much as possible, so there
|
|
|
|
|
* should not be too many.
|
|
|
|
|
*/
|
|
|
|
|
static void
|
|
|
|
|
GinBufferStoreTuple(GinBuffer *buffer, GinTuple *tup)
|
|
|
|
|
{
|
|
|
|
|
ItemPointerData *items;
|
|
|
|
|
Datum key;
|
|
|
|
|
|
|
|
|
|
AssertCheckGinBuffer(buffer);
|
|
|
|
|
|
|
|
|
|
key = _gin_parse_tuple(tup, &items);
|
|
|
|
|
|
|
|
|
|
/* if the buffer is empty, set the fields (and copy the key) */
|
|
|
|
|
if (GinBufferIsEmpty(buffer))
|
|
|
|
|
{
|
|
|
|
|
buffer->category = tup->category;
|
|
|
|
|
buffer->keylen = tup->keylen;
|
|
|
|
|
buffer->attnum = tup->attrnum;
|
|
|
|
|
|
|
|
|
|
buffer->typlen = tup->typlen;
|
|
|
|
|
buffer->typbyval = tup->typbyval;
|
|
|
|
|
|
|
|
|
|
if (tup->category == GIN_CAT_NORM_KEY)
|
|
|
|
|
buffer->key = datumCopy(key, buffer->typbyval, buffer->typlen);
|
|
|
|
|
else
|
|
|
|
|
buffer->key = (Datum) 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* add the new TIDs into the buffer, combine using merge-sort */
|
|
|
|
|
{
|
|
|
|
|
int nnew;
|
|
|
|
|
ItemPointer new;
|
|
|
|
|
|
|
|
|
|
new = ginMergeItemPointers(buffer->items, buffer->nitems,
|
|
|
|
|
items, tup->nitems, &nnew);
|
|
|
|
|
|
|
|
|
|
Assert(nnew == buffer->nitems + tup->nitems);
|
|
|
|
|
|
|
|
|
|
if (buffer->items)
|
|
|
|
|
pfree(buffer->items);
|
|
|
|
|
|
|
|
|
|
buffer->items = new;
|
|
|
|
|
buffer->nitems = nnew;
|
|
|
|
|
|
|
|
|
|
AssertCheckItemPointers(buffer);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* GinBufferReset
|
|
|
|
|
* Reset the buffer into a state as if it contains no data.
|
|
|
|
|
*/
|
|
|
|
|
static void
|
|
|
|
|
GinBufferReset(GinBuffer *buffer)
|
|
|
|
|
{
|
|
|
|
|
Assert(!GinBufferIsEmpty(buffer));
|
|
|
|
|
|
|
|
|
|
/* release byref values, do nothing for by-val ones */
|
|
|
|
|
if ((buffer->category == GIN_CAT_NORM_KEY) && !buffer->typbyval)
|
|
|
|
|
pfree(DatumGetPointer(buffer->key));
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Not required, but makes it more likely to trigger NULL derefefence if
|
|
|
|
|
* using the value incorrectly, etc.
|
|
|
|
|
*/
|
|
|
|
|
buffer->key = (Datum) 0;
|
|
|
|
|
|
|
|
|
|
buffer->attnum = 0;
|
|
|
|
|
buffer->category = 0;
|
|
|
|
|
buffer->keylen = 0;
|
|
|
|
|
buffer->nitems = 0;
|
|
|
|
|
|
|
|
|
|
buffer->typlen = 0;
|
|
|
|
|
buffer->typbyval = 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* GinBufferFree
|
|
|
|
|
* Release memory associated with the GinBuffer (including TID array).
|
|
|
|
|
*/
|
|
|
|
|
static void
|
|
|
|
|
GinBufferFree(GinBuffer *buffer)
|
|
|
|
|
{
|
|
|
|
|
if (buffer->items)
|
|
|
|
|
pfree(buffer->items);
|
|
|
|
|
|
|
|
|
|
/* release byref values, do nothing for by-val ones */
|
|
|
|
|
if (!GinBufferIsEmpty(buffer) &&
|
|
|
|
|
(buffer->category == GIN_CAT_NORM_KEY) && !buffer->typbyval)
|
|
|
|
|
pfree(DatumGetPointer(buffer->key));
|
|
|
|
|
|
|
|
|
|
pfree(buffer);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* GinBufferCanAddKey
|
|
|
|
|
* Check if a given GIN tuple can be added to the current buffer.
|
|
|
|
|
*
|
|
|
|
|
* Returns true if the buffer is either empty or for the same index key.
|
|
|
|
|
*/
|
|
|
|
|
static bool
|
|
|
|
|
GinBufferCanAddKey(GinBuffer *buffer, GinTuple *tup)
|
|
|
|
|
{
|
|
|
|
|
/* empty buffer can accept data for any key */
|
|
|
|
|
if (GinBufferIsEmpty(buffer))
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
|
|
/* otherwise just data for the same key */
|
|
|
|
|
return GinBufferKeyEquals(buffer, tup);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Within leader, wait for end of heap scan and merge per-worker results.
|
|
|
|
|
*
|
|
|
|
|
* After waiting for all workers to finish, merge the per-worker results into
|
|
|
|
|
* the complete index. The results from each worker are sorted by block number
|
|
|
|
|
* (start of the page range). While combinig the per-worker results we merge
|
|
|
|
|
* summaries for the same page range, and also fill-in empty summaries for
|
|
|
|
|
* ranges without any tuples.
|
|
|
|
|
*
|
|
|
|
|
* Returns the total number of heap tuples scanned.
|
|
|
|
|
*/
|
|
|
|
|
static double
|
|
|
|
|
_gin_parallel_merge(GinBuildState *state)
|
|
|
|
|
{
|
|
|
|
|
GinTuple *tup;
|
|
|
|
|
Size tuplen;
|
|
|
|
|
double reltuples = 0;
|
|
|
|
|
GinBuffer *buffer;
|
|
|
|
|
|
|
|
|
|
/* GIN tuples from workers, merged by leader */
|
|
|
|
|
double numtuples = 0;
|
|
|
|
|
|
|
|
|
|
/* wait for workers to scan table and produce partial results */
|
|
|
|
|
reltuples = _gin_parallel_heapscan(state);
|
|
|
|
|
|
|
|
|
|
/* Execute the sort */
|
|
|
|
|
pgstat_progress_update_param(PROGRESS_CREATEIDX_SUBPHASE,
|
|
|
|
|
PROGRESS_GIN_PHASE_PERFORMSORT_2);
|
|
|
|
|
|
|
|
|
|
/* do the actual sort in the leader */
|
|
|
|
|
tuplesort_performsort(state->bs_sortstate);
|
|
|
|
|
|
|
|
|
|
/* initialize buffer to combine entries for the same key */
|
|
|
|
|
buffer = GinBufferInit(state->ginstate.index);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Set the progress target for the next phase. Reset the block number
|
|
|
|
|
* values set by table_index_build_scan
|
|
|
|
|
*/
|
|
|
|
|
{
|
|
|
|
|
const int progress_index[] = {
|
|
|
|
|
PROGRESS_CREATEIDX_SUBPHASE,
|
|
|
|
|
PROGRESS_CREATEIDX_TUPLES_TOTAL,
|
|
|
|
|
PROGRESS_SCAN_BLOCKS_TOTAL,
|
|
|
|
|
PROGRESS_SCAN_BLOCKS_DONE
|
|
|
|
|
};
|
|
|
|
|
const int64 progress_vals[] = {
|
|
|
|
|
PROGRESS_GIN_PHASE_MERGE_2,
|
|
|
|
|
state->bs_numtuples,
|
|
|
|
|
0, 0
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
pgstat_progress_update_multi_param(4, progress_index, progress_vals);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Read the GIN tuples from the shared tuplesort, sorted by category and
|
|
|
|
|
* key. That probably gives us order matching how data is organized in the
|
|
|
|
|
* index.
|
|
|
|
|
*
|
|
|
|
|
* We don't insert the GIN tuples right away, but instead accumulate as
|
|
|
|
|
* many TIDs for the same key as possible, and then insert that at once.
|
|
|
|
|
* This way we don't need to decompress/recompress the posting lists, etc.
|
|
|
|
|
*/
|
|
|
|
|
while ((tup = tuplesort_getgintuple(state->bs_sortstate, &tuplen, true)) != NULL)
|
|
|
|
|
{
|
|
|
|
|
CHECK_FOR_INTERRUPTS();
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* If the buffer can accept the new GIN tuple, just store it there and
|
|
|
|
|
* we're done. If it's a different key (or maybe too much data) flush
|
|
|
|
|
* the current contents into the index first.
|
|
|
|
|
*/
|
|
|
|
|
if (!GinBufferCanAddKey(buffer, tup))
|
|
|
|
|
{
|
|
|
|
|
/*
|
|
|
|
|
* Buffer is not empty and it's storing a different key - flush
|
|
|
|
|
* the data into the insert, and start a new entry for current
|
|
|
|
|
* GinTuple.
|
|
|
|
|
*/
|
|
|
|
|
AssertCheckItemPointers(buffer);
|
|
|
|
|
|
|
|
|
|
ginEntryInsert(&state->ginstate,
|
|
|
|
|
buffer->attnum, buffer->key, buffer->category,
|
|
|
|
|
buffer->items, buffer->nitems, &state->buildStats);
|
|
|
|
|
|
|
|
|
|
/* discard the existing data */
|
|
|
|
|
GinBufferReset(buffer);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Remember data for the current tuple (either remember the new key,
|
|
|
|
|
* or append if to the existing data).
|
|
|
|
|
*/
|
|
|
|
|
GinBufferStoreTuple(buffer, tup);
|
|
|
|
|
|
|
|
|
|
/* Report progress */
|
|
|
|
|
pgstat_progress_update_param(PROGRESS_CREATEIDX_TUPLES_DONE,
|
|
|
|
|
++numtuples);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* flush data remaining in the buffer (for the last key) */
|
|
|
|
|
if (!GinBufferIsEmpty(buffer))
|
|
|
|
|
{
|
|
|
|
|
AssertCheckItemPointers(buffer);
|
|
|
|
|
|
|
|
|
|
ginEntryInsert(&state->ginstate,
|
|
|
|
|
buffer->attnum, buffer->key, buffer->category,
|
|
|
|
|
buffer->items, buffer->nitems, &state->buildStats);
|
|
|
|
|
|
|
|
|
|
/* discard the existing data */
|
|
|
|
|
GinBufferReset(buffer);
|
|
|
|
|
|
|
|
|
|
/* Report progress */
|
|
|
|
|
pgstat_progress_update_param(PROGRESS_CREATEIDX_TUPLES_DONE,
|
|
|
|
|
++numtuples);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* relase all the memory */
|
|
|
|
|
GinBufferFree(buffer);
|
|
|
|
|
|
|
|
|
|
tuplesort_end(state->bs_sortstate);
|
|
|
|
|
|
|
|
|
|
return reltuples;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Returns size of shared memory required to store state for a parallel
|
|
|
|
|
* gin index build based on the snapshot its parallel scan will use.
|
|
|
|
|
*/
|
|
|
|
|
static Size
|
|
|
|
|
_gin_parallel_estimate_shared(Relation heap, Snapshot snapshot)
|
|
|
|
|
{
|
|
|
|
|
/* c.f. shm_toc_allocate as to why BUFFERALIGN is used */
|
|
|
|
|
return add_size(BUFFERALIGN(sizeof(GinBuildShared)),
|
|
|
|
|
table_parallelscan_estimate(heap, snapshot));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Within leader, participate as a parallel worker.
|
|
|
|
|
*/
|
|
|
|
|
static void
|
|
|
|
|
_gin_leader_participate_as_worker(GinBuildState *buildstate, Relation heap, Relation index)
|
|
|
|
|
{
|
|
|
|
|
GinLeader *ginleader = buildstate->bs_leader;
|
|
|
|
|
int sortmem;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Might as well use reliable figure when doling out maintenance_work_mem
|
|
|
|
|
* (when requested number of workers were not launched, this will be
|
|
|
|
|
* somewhat higher than it is for other workers).
|
|
|
|
|
*/
|
|
|
|
|
sortmem = maintenance_work_mem / ginleader->nparticipanttuplesorts;
|
|
|
|
|
|
|
|
|
|
/* Perform work common to all participants */
|
|
|
|
|
_gin_parallel_scan_and_build(buildstate, ginleader->ginshared,
|
|
|
|
|
ginleader->sharedsort, heap, index,
|
|
|
|
|
sortmem, true);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* _gin_process_worker_data
|
|
|
|
|
* First phase of the key merging, happening in the worker.
|
|
|
|
|
*
|
|
|
|
|
* Depending on the number of distinct keys, the TID lists produced by the
|
|
|
|
|
* callback may be very short (due to frequent evictions in the callback).
|
|
|
|
|
* But combining many tiny lists is expensive, so we try to do as much as
|
|
|
|
|
* possible in the workers and only then pass the results to the leader.
|
|
|
|
|
*
|
|
|
|
|
* We read the tuples sorted by the key, and merge them into larger lists.
|
|
|
|
|
* At the moment there's no memory limit, so this will just produce one
|
|
|
|
|
* huge (sorted) list per key in each worker. Which means the leader will
|
|
|
|
|
* do a very limited number of mergesorts, which is good.
|
|
|
|
|
*/
|
|
|
|
|
static void
|
|
|
|
|
_gin_process_worker_data(GinBuildState *state, Tuplesortstate *worker_sort,
|
|
|
|
|
bool progress)
|
|
|
|
|
{
|
|
|
|
|
GinTuple *tup;
|
|
|
|
|
Size tuplen;
|
|
|
|
|
|
|
|
|
|
GinBuffer *buffer;
|
|
|
|
|
|
|
|
|
|
/* initialize buffer to combine entries for the same key */
|
|
|
|
|
buffer = GinBufferInit(state->ginstate.index);
|
|
|
|
|
|
|
|
|
|
/* sort the raw per-worker data */
|
|
|
|
|
if (progress)
|
|
|
|
|
pgstat_progress_update_param(PROGRESS_CREATEIDX_SUBPHASE,
|
|
|
|
|
PROGRESS_GIN_PHASE_PERFORMSORT_1);
|
|
|
|
|
|
|
|
|
|
tuplesort_performsort(state->bs_worker_sort);
|
|
|
|
|
|
|
|
|
|
/* reset the number of GIN tuples produced by this worker */
|
|
|
|
|
state->bs_numtuples = 0;
|
|
|
|
|
|
|
|
|
|
if (progress)
|
|
|
|
|
pgstat_progress_update_param(PROGRESS_CREATEIDX_SUBPHASE,
|
|
|
|
|
PROGRESS_GIN_PHASE_MERGE_1);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Read the GIN tuples from the shared tuplesort, sorted by the key, and
|
|
|
|
|
* merge them into larger chunks for the leader to combine.
|
|
|
|
|
*/
|
|
|
|
|
while ((tup = tuplesort_getgintuple(worker_sort, &tuplen, true)) != NULL)
|
|
|
|
|
{
|
|
|
|
|
|
|
|
|
|
CHECK_FOR_INTERRUPTS();
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* If the buffer can accept the new GIN tuple, just store it there and
|
|
|
|
|
* we're done. If it's a different key (or maybe too much data) flush
|
|
|
|
|
* the current contents into the index first.
|
|
|
|
|
*/
|
|
|
|
|
if (!GinBufferCanAddKey(buffer, tup))
|
|
|
|
|
{
|
|
|
|
|
GinTuple *ntup;
|
|
|
|
|
Size ntuplen;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Buffer is not empty and it's storing a different key - flush
|
|
|
|
|
* the data into the insert, and start a new entry for current
|
|
|
|
|
* GinTuple.
|
|
|
|
|
*/
|
|
|
|
|
AssertCheckItemPointers(buffer);
|
|
|
|
|
|
|
|
|
|
ntup = _gin_build_tuple(buffer->attnum, buffer->category,
|
|
|
|
|
buffer->key, buffer->typlen, buffer->typbyval,
|
|
|
|
|
buffer->items, buffer->nitems, &ntuplen);
|
|
|
|
|
|
|
|
|
|
tuplesort_putgintuple(state->bs_sortstate, ntup, ntuplen);
|
|
|
|
|
state->bs_numtuples++;
|
|
|
|
|
|
|
|
|
|
pfree(ntup);
|
|
|
|
|
|
|
|
|
|
/* discard the existing data */
|
|
|
|
|
GinBufferReset(buffer);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Remember data for the current tuple (either remember the new key,
|
|
|
|
|
* or append if to the existing data).
|
|
|
|
|
*/
|
|
|
|
|
GinBufferStoreTuple(buffer, tup);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* flush data remaining in the buffer (for the last key) */
|
|
|
|
|
if (!GinBufferIsEmpty(buffer))
|
|
|
|
|
{
|
|
|
|
|
GinTuple *ntup;
|
|
|
|
|
Size ntuplen;
|
|
|
|
|
|
|
|
|
|
AssertCheckItemPointers(buffer);
|
|
|
|
|
|
|
|
|
|
ntup = _gin_build_tuple(buffer->attnum, buffer->category,
|
|
|
|
|
buffer->key, buffer->typlen, buffer->typbyval,
|
|
|
|
|
buffer->items, buffer->nitems, &ntuplen);
|
|
|
|
|
|
|
|
|
|
tuplesort_putgintuple(state->bs_sortstate, ntup, ntuplen);
|
|
|
|
|
state->bs_numtuples++;
|
|
|
|
|
|
|
|
|
|
pfree(ntup);
|
|
|
|
|
|
|
|
|
|
/* discard the existing data */
|
|
|
|
|
GinBufferReset(buffer);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* relase all the memory */
|
|
|
|
|
GinBufferFree(buffer);
|
|
|
|
|
|
|
|
|
|
tuplesort_end(worker_sort);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Perform a worker's portion of a parallel GIN index build sort.
|
|
|
|
|
*
|
|
|
|
|
* This generates a tuplesort for the worker portion of the table.
|
|
|
|
|
*
|
|
|
|
|
* sortmem is the amount of working memory to use within each worker,
|
|
|
|
|
* expressed in KBs.
|
|
|
|
|
*
|
|
|
|
|
* When this returns, workers are done, and need only release resources.
|
|
|
|
|
*
|
|
|
|
|
* Before feeding data into a shared tuplesort (for the leader process),
|
|
|
|
|
* the workers process data in two phases.
|
|
|
|
|
*
|
|
|
|
|
* 1) A worker reads a portion of rows from the table, accumulates entries
|
|
|
|
|
* in memory, and flushes them into a private tuplesort (e.g. because of
|
|
|
|
|
* using too much memory).
|
|
|
|
|
*
|
|
|
|
|
* 2) The private tuplesort gets sorted (by key and TID), the worker reads
|
|
|
|
|
* the data again, and combines the entries as much as possible. This has
|
|
|
|
|
* to happen eventually, and this way it's done in workers in parallel.
|
|
|
|
|
*
|
|
|
|
|
* Finally, the combined entries are written into the shared tuplesort, so
|
|
|
|
|
* that the leader can process them.
|
|
|
|
|
*
|
|
|
|
|
* How well this works (compared to just writing entries into the shared
|
|
|
|
|
* tuplesort) depends on the data set. For large tables with many distinct
|
|
|
|
|
* keys this helps a lot. With many distinct keys it's likely the buffers has
|
|
|
|
|
* to be flushed often, generating many entries with the same key and short
|
|
|
|
|
* TID lists. These entries need to be sorted and merged at some point,
|
|
|
|
|
* before writing them to the index. The merging is quite expensive, it can
|
|
|
|
|
* easily be ~50% of a serial build, and doing as much of it in the workers
|
|
|
|
|
* means it's parallelized. The leader still has to merge results from the
|
|
|
|
|
* workers, but it's much more efficient to merge few large entries than
|
|
|
|
|
* many tiny ones.
|
|
|
|
|
*
|
|
|
|
|
* This also reduces the amount of data the workers pass to the leader through
|
|
|
|
|
* the shared tuplesort. OTOH the workers need more space for the private sort,
|
|
|
|
|
* possibly up to 2x of the data, if no entries be merged in a worker. But this
|
|
|
|
|
* is very unlikely, and the only consequence is inefficiency, so we ignore it.
|
|
|
|
|
*/
|
|
|
|
|
static void
|
|
|
|
|
_gin_parallel_scan_and_build(GinBuildState *state,
|
|
|
|
|
GinBuildShared *ginshared, Sharedsort *sharedsort,
|
|
|
|
|
Relation heap, Relation index,
|
|
|
|
|
int sortmem, bool progress)
|
|
|
|
|
{
|
|
|
|
|
SortCoordinate coordinate;
|
|
|
|
|
TableScanDesc scan;
|
|
|
|
|
double reltuples;
|
|
|
|
|
IndexInfo *indexInfo;
|
|
|
|
|
|
|
|
|
|
/* Initialize local tuplesort coordination state */
|
|
|
|
|
coordinate = palloc0(sizeof(SortCoordinateData));
|
|
|
|
|
coordinate->isWorker = true;
|
|
|
|
|
coordinate->nParticipants = -1;
|
|
|
|
|
coordinate->sharedsort = sharedsort;
|
|
|
|
|
|
|
|
|
|
/* remember how much space is allowed for the accumulated entries */
|
|
|
|
|
state->work_mem = (sortmem / 2);
|
|
|
|
|
|
|
|
|
|
/* Begin "partial" tuplesort */
|
|
|
|
|
state->bs_sortstate = tuplesort_begin_index_gin(heap, index,
|
|
|
|
|
state->work_mem,
|
|
|
|
|
coordinate,
|
|
|
|
|
TUPLESORT_NONE);
|
|
|
|
|
|
|
|
|
|
/* Local per-worker sort of raw-data */
|
|
|
|
|
state->bs_worker_sort = tuplesort_begin_index_gin(heap, index,
|
|
|
|
|
state->work_mem,
|
|
|
|
|
NULL,
|
|
|
|
|
TUPLESORT_NONE);
|
|
|
|
|
|
|
|
|
|
/* Join parallel scan */
|
|
|
|
|
indexInfo = BuildIndexInfo(index);
|
|
|
|
|
indexInfo->ii_Concurrent = ginshared->isconcurrent;
|
|
|
|
|
|
|
|
|
|
scan = table_beginscan_parallel(heap,
|
|
|
|
|
ParallelTableScanFromGinBuildShared(ginshared));
|
|
|
|
|
|
|
|
|
|
reltuples = table_index_build_scan(heap, index, indexInfo, true, progress,
|
|
|
|
|
ginBuildCallbackParallel, state, scan);
|
|
|
|
|
|
|
|
|
|
/* write remaining accumulated entries */
|
|
|
|
|
ginFlushBuildState(state, index);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Do the first phase of in-worker processing - sort the data produced by
|
|
|
|
|
* the callback, and combine them into much larger chunks and place that
|
|
|
|
|
* into the shared tuplestore for leader to process.
|
|
|
|
|
*/
|
|
|
|
|
_gin_process_worker_data(state, state->bs_worker_sort, progress);
|
|
|
|
|
|
|
|
|
|
/* sort the GIN tuples built by this worker */
|
|
|
|
|
tuplesort_performsort(state->bs_sortstate);
|
|
|
|
|
|
|
|
|
|
state->bs_reltuples += reltuples;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Done. Record ambuild statistics.
|
|
|
|
|
*/
|
|
|
|
|
SpinLockAcquire(&ginshared->mutex);
|
|
|
|
|
ginshared->nparticipantsdone++;
|
|
|
|
|
ginshared->reltuples += state->bs_reltuples;
|
|
|
|
|
ginshared->indtuples += state->bs_numtuples;
|
|
|
|
|
SpinLockRelease(&ginshared->mutex);
|
|
|
|
|
|
|
|
|
|
/* Notify leader */
|
|
|
|
|
ConditionVariableSignal(&ginshared->workersdonecv);
|
|
|
|
|
|
|
|
|
|
tuplesort_end(state->bs_sortstate);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Perform work within a launched parallel process.
|
|
|
|
|
*/
|
|
|
|
|
void
|
|
|
|
|
_gin_parallel_build_main(dsm_segment *seg, shm_toc *toc)
|
|
|
|
|
{
|
|
|
|
|
char *sharedquery;
|
|
|
|
|
GinBuildShared *ginshared;
|
|
|
|
|
Sharedsort *sharedsort;
|
|
|
|
|
GinBuildState buildstate;
|
|
|
|
|
Relation heapRel;
|
|
|
|
|
Relation indexRel;
|
|
|
|
|
LOCKMODE heapLockmode;
|
|
|
|
|
LOCKMODE indexLockmode;
|
|
|
|
|
WalUsage *walusage;
|
|
|
|
|
BufferUsage *bufferusage;
|
|
|
|
|
int sortmem;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* The only possible status flag that can be set to the parallel worker is
|
|
|
|
|
* PROC_IN_SAFE_IC.
|
|
|
|
|
*/
|
|
|
|
|
Assert((MyProc->statusFlags == 0) ||
|
|
|
|
|
(MyProc->statusFlags == PROC_IN_SAFE_IC));
|
|
|
|
|
|
|
|
|
|
/* Set debug_query_string for individual workers first */
|
|
|
|
|
sharedquery = shm_toc_lookup(toc, PARALLEL_KEY_QUERY_TEXT, true);
|
|
|
|
|
debug_query_string = sharedquery;
|
|
|
|
|
|
|
|
|
|
/* Report the query string from leader */
|
|
|
|
|
pgstat_report_activity(STATE_RUNNING, debug_query_string);
|
|
|
|
|
|
|
|
|
|
/* Look up gin shared state */
|
|
|
|
|
ginshared = shm_toc_lookup(toc, PARALLEL_KEY_GIN_SHARED, false);
|
|
|
|
|
|
|
|
|
|
/* Open relations using lock modes known to be obtained by index.c */
|
|
|
|
|
if (!ginshared->isconcurrent)
|
|
|
|
|
{
|
|
|
|
|
heapLockmode = ShareLock;
|
|
|
|
|
indexLockmode = AccessExclusiveLock;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
heapLockmode = ShareUpdateExclusiveLock;
|
|
|
|
|
indexLockmode = RowExclusiveLock;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Open relations within worker */
|
|
|
|
|
heapRel = table_open(ginshared->heaprelid, heapLockmode);
|
|
|
|
|
indexRel = index_open(ginshared->indexrelid, indexLockmode);
|
|
|
|
|
|
|
|
|
|
/* initialize the GIN build state */
|
|
|
|
|
initGinState(&buildstate.ginstate, indexRel);
|
|
|
|
|
buildstate.indtuples = 0;
|
|
|
|
|
memset(&buildstate.buildStats, 0, sizeof(GinStatsData));
|
|
|
|
|
memset(&buildstate.tid, 0, sizeof(ItemPointerData));
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* create a temporary memory context that is used to hold data not yet
|
|
|
|
|
* dumped out to the index
|
|
|
|
|
*/
|
|
|
|
|
buildstate.tmpCtx = AllocSetContextCreate(CurrentMemoryContext,
|
|
|
|
|
"Gin build temporary context",
|
|
|
|
|
ALLOCSET_DEFAULT_SIZES);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* create a temporary memory context that is used for calling
|
|
|
|
|
* ginExtractEntries(), and can be reset after each tuple
|
|
|
|
|
*/
|
|
|
|
|
buildstate.funcCtx = AllocSetContextCreate(CurrentMemoryContext,
|
|
|
|
|
"Gin build temporary context for user-defined function",
|
|
|
|
|
ALLOCSET_DEFAULT_SIZES);
|
|
|
|
|
|
|
|
|
|
buildstate.accum.ginstate = &buildstate.ginstate;
|
|
|
|
|
ginInitBA(&buildstate.accum);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* Look up shared state private to tuplesort.c */
|
|
|
|
|
sharedsort = shm_toc_lookup(toc, PARALLEL_KEY_TUPLESORT, false);
|
|
|
|
|
tuplesort_attach_shared(sharedsort, seg);
|
|
|
|
|
|
|
|
|
|
/* Prepare to track buffer usage during parallel execution */
|
|
|
|
|
InstrStartParallelQuery();
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Might as well use reliable figure when doling out maintenance_work_mem
|
|
|
|
|
* (when requested number of workers were not launched, this will be
|
|
|
|
|
* somewhat higher than it is for other workers).
|
|
|
|
|
*/
|
|
|
|
|
sortmem = maintenance_work_mem / ginshared->scantuplesortstates;
|
|
|
|
|
|
|
|
|
|
_gin_parallel_scan_and_build(&buildstate, ginshared, sharedsort,
|
|
|
|
|
heapRel, indexRel, sortmem, false);
|
|
|
|
|
|
|
|
|
|
/* Report WAL/buffer usage during parallel execution */
|
|
|
|
|
bufferusage = shm_toc_lookup(toc, PARALLEL_KEY_BUFFER_USAGE, false);
|
|
|
|
|
walusage = shm_toc_lookup(toc, PARALLEL_KEY_WAL_USAGE, false);
|
|
|
|
|
InstrEndParallelQuery(&bufferusage[ParallelWorkerNumber],
|
|
|
|
|
&walusage[ParallelWorkerNumber]);
|
|
|
|
|
|
|
|
|
|
index_close(indexRel, indexLockmode);
|
|
|
|
|
table_close(heapRel, heapLockmode);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* _gin_build_tuple
|
|
|
|
|
* Serialize the state for an index key into a tuple for tuplesort.
|
|
|
|
|
*
|
|
|
|
|
* The tuple has a number of scalar fields (mostly matching the build state),
|
|
|
|
|
* and then a data array that stores the key first, and then the TID list.
|
|
|
|
|
*
|
|
|
|
|
* For by-reference data types, we store the actual data. For by-val types
|
|
|
|
|
* we simply copy the whole Datum, so that we don't have to care about stuff
|
|
|
|
|
* like endianess etc. We could make it a little bit smaller, but it's not
|
|
|
|
|
* worth it - it's a tiny fraction of the data, and we need to MAXALIGN the
|
|
|
|
|
* start of the TID list anyway. So we wouldn't save anything.
|
|
|
|
|
*/
|
|
|
|
|
static GinTuple *
|
|
|
|
|
_gin_build_tuple(OffsetNumber attrnum, unsigned char category,
|
|
|
|
|
Datum key, int16 typlen, bool typbyval,
|
|
|
|
|
ItemPointerData *items, uint32 nitems,
|
|
|
|
|
Size *len)
|
|
|
|
|
{
|
|
|
|
|
GinTuple *tuple;
|
|
|
|
|
char *ptr;
|
|
|
|
|
|
|
|
|
|
Size tuplen;
|
|
|
|
|
int keylen;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Calculate how long is the key value. Only keys with GIN_CAT_NORM_KEY
|
|
|
|
|
* have actual non-empty key. We include varlena headers and \0 bytes for
|
|
|
|
|
* strings, to make it easier to access the data in-line.
|
|
|
|
|
*
|
|
|
|
|
* For byval types we simply copy the whole Datum. We could store just the
|
|
|
|
|
* necessary bytes, but this is simpler to work with and not worth the
|
|
|
|
|
* extra complexity. Moreover we still need to do the MAXALIGN to allow
|
|
|
|
|
* direct access to items pointers.
|
|
|
|
|
*
|
|
|
|
|
* XXX Note that for byval types we store the whole datum, no matter what
|
|
|
|
|
* the typlen value is.
|
|
|
|
|
*/
|
|
|
|
|
if (category != GIN_CAT_NORM_KEY)
|
|
|
|
|
keylen = 0;
|
|
|
|
|
else if (typbyval)
|
|
|
|
|
keylen = sizeof(Datum);
|
|
|
|
|
else if (typlen > 0)
|
|
|
|
|
keylen = typlen;
|
|
|
|
|
else if (typlen == -1)
|
|
|
|
|
keylen = VARSIZE_ANY(key);
|
|
|
|
|
else if (typlen == -2)
|
|
|
|
|
keylen = strlen(DatumGetPointer(key)) + 1;
|
|
|
|
|
else
|
|
|
|
|
elog(ERROR, "unexpected typlen value (%d)", typlen);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Determine GIN tuple length with all the data included. Be careful about
|
|
|
|
|
* alignment, to allow direct access to item pointers.
|
|
|
|
|
*/
|
|
|
|
|
tuplen = SHORTALIGN(offsetof(GinTuple, data) + keylen) +
|
|
|
|
|
(sizeof(ItemPointerData) * nitems);
|
|
|
|
|
|
|
|
|
|
*len = tuplen;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Allocate space for the whole GIN tuple.
|
|
|
|
|
*
|
|
|
|
|
* The palloc0 is needed - writetup_index_gin will write the whole tuple
|
|
|
|
|
* to disk, so we need to make sure the padding bytes are defined
|
|
|
|
|
* (otherwise valgrind would report this).
|
|
|
|
|
*/
|
|
|
|
|
tuple = palloc0(tuplen);
|
|
|
|
|
|
|
|
|
|
tuple->tuplen = tuplen;
|
|
|
|
|
tuple->attrnum = attrnum;
|
|
|
|
|
tuple->category = category;
|
|
|
|
|
tuple->keylen = keylen;
|
|
|
|
|
tuple->nitems = nitems;
|
|
|
|
|
|
|
|
|
|
/* key type info */
|
|
|
|
|
tuple->typlen = typlen;
|
|
|
|
|
tuple->typbyval = typbyval;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Copy the key and items into the tuple. First the key value, which we
|
|
|
|
|
* can simply copy right at the beginning of the data array.
|
|
|
|
|
*/
|
|
|
|
|
if (category == GIN_CAT_NORM_KEY)
|
|
|
|
|
{
|
|
|
|
|
if (typbyval)
|
|
|
|
|
{
|
|
|
|
|
memcpy(tuple->data, &key, sizeof(Datum));
|
|
|
|
|
}
|
|
|
|
|
else if (typlen > 0) /* byref, fixed length */
|
|
|
|
|
{
|
|
|
|
|
memcpy(tuple->data, DatumGetPointer(key), typlen);
|
|
|
|
|
}
|
|
|
|
|
else if (typlen == -1)
|
|
|
|
|
{
|
|
|
|
|
memcpy(tuple->data, DatumGetPointer(key), keylen);
|
|
|
|
|
}
|
|
|
|
|
else if (typlen == -2)
|
|
|
|
|
{
|
|
|
|
|
memcpy(tuple->data, DatumGetPointer(key), keylen);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* finally, copy the TIDs into the array */
|
|
|
|
|
ptr = (char *) tuple + SHORTALIGN(offsetof(GinTuple, data) + keylen);
|
|
|
|
|
|
|
|
|
|
memcpy(ptr, items, sizeof(ItemPointerData) * nitems);
|
|
|
|
|
|
|
|
|
|
return tuple;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* _gin_parse_tuple
|
|
|
|
|
* Deserialize the tuple from the tuplestore representation.
|
|
|
|
|
*
|
|
|
|
|
* Most of the fields are actually directly accessible, the only thing that
|
|
|
|
|
* needs more care is the key and the TID list.
|
|
|
|
|
*
|
|
|
|
|
* For the key, this returns a regular Datum representing it. It's either the
|
|
|
|
|
* actual key value, or a pointer to the beginning of the data array (which is
|
|
|
|
|
* where the data was copied by _gin_build_tuple).
|
|
|
|
|
*
|
|
|
|
|
* The pointer to the TID list is returned through 'items' (which is simply
|
|
|
|
|
* a pointer to the data array).
|
|
|
|
|
*/
|
|
|
|
|
static Datum
|
|
|
|
|
_gin_parse_tuple(GinTuple *a, ItemPointerData **items)
|
|
|
|
|
{
|
|
|
|
|
Datum key;
|
|
|
|
|
|
|
|
|
|
if (items)
|
|
|
|
|
{
|
|
|
|
|
char *ptr = (char *) a + SHORTALIGN(offsetof(GinTuple, data) + a->keylen);
|
|
|
|
|
|
|
|
|
|
*items = (ItemPointerData *) ptr;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (a->category != GIN_CAT_NORM_KEY)
|
|
|
|
|
return (Datum) 0;
|
|
|
|
|
|
|
|
|
|
if (a->typbyval)
|
|
|
|
|
{
|
|
|
|
|
memcpy(&key, a->data, a->keylen);
|
|
|
|
|
return key;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return PointerGetDatum(a->data);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* _gin_compare_tuples
|
|
|
|
|
* Compare GIN tuples, used by tuplesort during parallel index build.
|
|
|
|
|
*
|
|
|
|
|
* The scalar fields (attrnum, category) are compared first, the key value is
|
|
|
|
|
* compared last. The comparisons are done using type-specific sort support
|
|
|
|
|
* functions.
|
|
|
|
|
*
|
|
|
|
|
* If the key value matches, we compare the first TID value in the TID list,
|
|
|
|
|
* which means the tuples are merged in an order in which they are most
|
|
|
|
|
* likely to be simply concatenated. (This "first" TID will also allow us
|
|
|
|
|
* to determine a point up to which the list is fully determined and can be
|
|
|
|
|
* written into the index to enforce a memory limit etc.)
|
|
|
|
|
*/
|
|
|
|
|
int
|
|
|
|
|
_gin_compare_tuples(GinTuple *a, GinTuple *b, SortSupport ssup)
|
|
|
|
|
{
|
|
|
|
|
int r;
|
|
|
|
|
Datum keya,
|
|
|
|
|
keyb;
|
|
|
|
|
|
|
|
|
|
if (a->attrnum < b->attrnum)
|
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
|
|
if (a->attrnum > b->attrnum)
|
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
|
|
if (a->category < b->category)
|
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
|
|
if (a->category > b->category)
|
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
|
|
if (a->category == GIN_CAT_NORM_KEY)
|
|
|
|
|
{
|
|
|
|
|
keya = _gin_parse_tuple(a, NULL);
|
|
|
|
|
keyb = _gin_parse_tuple(b, NULL);
|
|
|
|
|
|
|
|
|
|
r = ApplySortComparator(keya, false,
|
|
|
|
|
keyb, false,
|
|
|
|
|
&ssup[a->attrnum - 1]);
|
|
|
|
|
|
|
|
|
|
/* if the key is the same, consider the first TID in the array */
|
|
|
|
|
return (r != 0) ? r : ItemPointerCompare(GinTupleGetFirst(a),
|
|
|
|
|
GinTupleGetFirst(b));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return ItemPointerCompare(GinTupleGetFirst(a),
|
|
|
|
|
GinTupleGetFirst(b));
|
|
|
|
|
}
|