|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
*
|
|
|
|
* datum.c
|
|
|
|
* POSTGRES Datum (abstract data type) manipulation routines.
|
|
|
|
*
|
|
|
|
* Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
|
|
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
|
|
|
*
|
|
|
|
*
|
|
|
|
* IDENTIFICATION
|
|
|
|
* src/backend/utils/adt/datum.c
|
|
|
|
*
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
Support "expanded" objects, particularly arrays, for better performance.
This patch introduces the ability for complex datatypes to have an
in-memory representation that is different from their on-disk format.
On-disk formats are typically optimized for minimal size, and in any case
they can't contain pointers, so they are often not well-suited for
computation. Now a datatype can invent an "expanded" in-memory format
that is better suited for its operations, and then pass that around among
the C functions that operate on the datatype. There are also provisions
(rudimentary as yet) to allow an expanded object to be modified in-place
under suitable conditions, so that operations like assignment to an element
of an array need not involve copying the entire array.
The initial application for this feature is arrays, but it is not hard
to foresee using it for other container types like JSON, XML and hstore.
I have hopes that it will be useful to PostGIS as well.
In this initial implementation, a few heuristics have been hard-wired
into plpgsql to improve performance for arrays that are stored in
plpgsql variables. We would like to generalize those hacks so that
other datatypes can obtain similar improvements, but figuring out some
appropriate APIs is left as a task for future work. (The heuristics
themselves are probably not optimal yet, either, as they sometimes
force expansion of arrays that would be better left alone.)
Preliminary performance testing shows impressive speed gains for plpgsql
functions that do element-by-element access or update of large arrays.
There are other cases that get a little slower, as a result of added array
format conversions; but we can hope to improve anything that's annoyingly
bad. In any case most applications should see a net win.
Tom Lane, reviewed by Andres Freund
10 years ago
|
|
|
|
|
|
|
/*
|
Support "expanded" objects, particularly arrays, for better performance.
This patch introduces the ability for complex datatypes to have an
in-memory representation that is different from their on-disk format.
On-disk formats are typically optimized for minimal size, and in any case
they can't contain pointers, so they are often not well-suited for
computation. Now a datatype can invent an "expanded" in-memory format
that is better suited for its operations, and then pass that around among
the C functions that operate on the datatype. There are also provisions
(rudimentary as yet) to allow an expanded object to be modified in-place
under suitable conditions, so that operations like assignment to an element
of an array need not involve copying the entire array.
The initial application for this feature is arrays, but it is not hard
to foresee using it for other container types like JSON, XML and hstore.
I have hopes that it will be useful to PostGIS as well.
In this initial implementation, a few heuristics have been hard-wired
into plpgsql to improve performance for arrays that are stored in
plpgsql variables. We would like to generalize those hacks so that
other datatypes can obtain similar improvements, but figuring out some
appropriate APIs is left as a task for future work. (The heuristics
themselves are probably not optimal yet, either, as they sometimes
force expansion of arrays that would be better left alone.)
Preliminary performance testing shows impressive speed gains for plpgsql
functions that do element-by-element access or update of large arrays.
There are other cases that get a little slower, as a result of added array
format conversions; but we can hope to improve anything that's annoyingly
bad. In any case most applications should see a net win.
Tom Lane, reviewed by Andres Freund
10 years ago
|
|
|
* In the implementation of these routines we assume the following:
|
|
|
|
*
|
|
|
|
* A) if a type is "byVal" then all the information is stored in the
|
|
|
|
* Datum itself (i.e. no pointers involved!). In this case the
|
|
|
|
* length of the type is always greater than zero and not more than
|
|
|
|
* "sizeof(Datum)"
|
|
|
|
*
|
|
|
|
* B) if a type is not "byVal" and it has a fixed length (typlen > 0),
|
|
|
|
* then the "Datum" always contains a pointer to a stream of bytes.
|
|
|
|
* The number of significant bytes are always equal to the typlen.
|
|
|
|
*
|
|
|
|
* C) if a type is not "byVal" and has typlen == -1,
|
|
|
|
* then the "Datum" always points to a "struct varlena".
|
|
|
|
* This varlena structure has information about the actual length of this
|
|
|
|
* particular instance of the type and about its value.
|
|
|
|
*
|
|
|
|
* D) if a type is not "byVal" and has typlen == -2,
|
|
|
|
* then the "Datum" always points to a null-terminated C string.
|
|
|
|
*
|
|
|
|
* Note that we do not treat "toasted" datums specially; therefore what
|
|
|
|
* will be copied or compared is the compressed data or toast reference.
|
Support "expanded" objects, particularly arrays, for better performance.
This patch introduces the ability for complex datatypes to have an
in-memory representation that is different from their on-disk format.
On-disk formats are typically optimized for minimal size, and in any case
they can't contain pointers, so they are often not well-suited for
computation. Now a datatype can invent an "expanded" in-memory format
that is better suited for its operations, and then pass that around among
the C functions that operate on the datatype. There are also provisions
(rudimentary as yet) to allow an expanded object to be modified in-place
under suitable conditions, so that operations like assignment to an element
of an array need not involve copying the entire array.
The initial application for this feature is arrays, but it is not hard
to foresee using it for other container types like JSON, XML and hstore.
I have hopes that it will be useful to PostGIS as well.
In this initial implementation, a few heuristics have been hard-wired
into plpgsql to improve performance for arrays that are stored in
plpgsql variables. We would like to generalize those hacks so that
other datatypes can obtain similar improvements, but figuring out some
appropriate APIs is left as a task for future work. (The heuristics
themselves are probably not optimal yet, either, as they sometimes
force expansion of arrays that would be better left alone.)
Preliminary performance testing shows impressive speed gains for plpgsql
functions that do element-by-element access or update of large arrays.
There are other cases that get a little slower, as a result of added array
format conversions; but we can hope to improve anything that's annoyingly
bad. In any case most applications should see a net win.
Tom Lane, reviewed by Andres Freund
10 years ago
|
|
|
* An exception is made for datumCopy() of an expanded object, however,
|
|
|
|
* because most callers expect to get a simple contiguous (and pfree'able)
|
|
|
|
* result from datumCopy(). See also datumTransfer().
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "postgres.h"
|
|
|
|
|
|
|
|
#include "utils/datum.h"
|
Support "expanded" objects, particularly arrays, for better performance.
This patch introduces the ability for complex datatypes to have an
in-memory representation that is different from their on-disk format.
On-disk formats are typically optimized for minimal size, and in any case
they can't contain pointers, so they are often not well-suited for
computation. Now a datatype can invent an "expanded" in-memory format
that is better suited for its operations, and then pass that around among
the C functions that operate on the datatype. There are also provisions
(rudimentary as yet) to allow an expanded object to be modified in-place
under suitable conditions, so that operations like assignment to an element
of an array need not involve copying the entire array.
The initial application for this feature is arrays, but it is not hard
to foresee using it for other container types like JSON, XML and hstore.
I have hopes that it will be useful to PostGIS as well.
In this initial implementation, a few heuristics have been hard-wired
into plpgsql to improve performance for arrays that are stored in
plpgsql variables. We would like to generalize those hacks so that
other datatypes can obtain similar improvements, but figuring out some
appropriate APIs is left as a task for future work. (The heuristics
themselves are probably not optimal yet, either, as they sometimes
force expansion of arrays that would be better left alone.)
Preliminary performance testing shows impressive speed gains for plpgsql
functions that do element-by-element access or update of large arrays.
There are other cases that get a little slower, as a result of added array
format conversions; but we can hope to improve anything that's annoyingly
bad. In any case most applications should see a net win.
Tom Lane, reviewed by Andres Freund
10 years ago
|
|
|
#include "utils/expandeddatum.h"
|
|
|
|
|
|
|
|
|
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
* datumGetSize
|
|
|
|
*
|
|
|
|
* Find the "real" size of a datum, given the datum value,
|
|
|
|
* whether it is a "by value", and the declared type length.
|
Support "expanded" objects, particularly arrays, for better performance.
This patch introduces the ability for complex datatypes to have an
in-memory representation that is different from their on-disk format.
On-disk formats are typically optimized for minimal size, and in any case
they can't contain pointers, so they are often not well-suited for
computation. Now a datatype can invent an "expanded" in-memory format
that is better suited for its operations, and then pass that around among
the C functions that operate on the datatype. There are also provisions
(rudimentary as yet) to allow an expanded object to be modified in-place
under suitable conditions, so that operations like assignment to an element
of an array need not involve copying the entire array.
The initial application for this feature is arrays, but it is not hard
to foresee using it for other container types like JSON, XML and hstore.
I have hopes that it will be useful to PostGIS as well.
In this initial implementation, a few heuristics have been hard-wired
into plpgsql to improve performance for arrays that are stored in
plpgsql variables. We would like to generalize those hacks so that
other datatypes can obtain similar improvements, but figuring out some
appropriate APIs is left as a task for future work. (The heuristics
themselves are probably not optimal yet, either, as they sometimes
force expansion of arrays that would be better left alone.)
Preliminary performance testing shows impressive speed gains for plpgsql
functions that do element-by-element access or update of large arrays.
There are other cases that get a little slower, as a result of added array
format conversions; but we can hope to improve anything that's annoyingly
bad. In any case most applications should see a net win.
Tom Lane, reviewed by Andres Freund
10 years ago
|
|
|
* (For TOAST pointer datums, this is the size of the pointer datum.)
|
|
|
|
*
|
|
|
|
* This is essentially an out-of-line version of the att_addlength_datum()
|
|
|
|
* macro in access/tupmacs.h. We do a tad more error checking though.
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
Size
|
|
|
|
datumGetSize(Datum value, bool typByVal, int typLen)
|
|
|
|
{
|
|
|
|
Size size;
|
|
|
|
|
|
|
|
if (typByVal)
|
|
|
|
{
|
|
|
|
/* Pass-by-value types are always fixed-length */
|
|
|
|
Assert(typLen > 0 && typLen <= sizeof(Datum));
|
|
|
|
size = (Size) typLen;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
if (typLen > 0)
|
|
|
|
{
|
|
|
|
/* Fixed-length pass-by-ref type */
|
|
|
|
size = (Size) typLen;
|
|
|
|
}
|
|
|
|
else if (typLen == -1)
|
|
|
|
{
|
|
|
|
/* It is a varlena datatype */
|
|
|
|
struct varlena *s = (struct varlena *) DatumGetPointer(value);
|
|
|
|
|
|
|
|
if (!PointerIsValid(s))
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_DATA_EXCEPTION),
|
|
|
|
errmsg("invalid Datum pointer")));
|
|
|
|
|
|
|
|
size = (Size) VARSIZE_ANY(s);
|
|
|
|
}
|
|
|
|
else if (typLen == -2)
|
|
|
|
{
|
|
|
|
/* It is a cstring datatype */
|
|
|
|
char *s = (char *) DatumGetPointer(value);
|
|
|
|
|
|
|
|
if (!PointerIsValid(s))
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_DATA_EXCEPTION),
|
|
|
|
errmsg("invalid Datum pointer")));
|
|
|
|
|
|
|
|
size = (Size) (strlen(s) + 1);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
elog(ERROR, "invalid typLen: %d", typLen);
|
|
|
|
size = 0; /* keep compiler quiet */
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return size;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
* datumCopy
|
|
|
|
*
|
Support "expanded" objects, particularly arrays, for better performance.
This patch introduces the ability for complex datatypes to have an
in-memory representation that is different from their on-disk format.
On-disk formats are typically optimized for minimal size, and in any case
they can't contain pointers, so they are often not well-suited for
computation. Now a datatype can invent an "expanded" in-memory format
that is better suited for its operations, and then pass that around among
the C functions that operate on the datatype. There are also provisions
(rudimentary as yet) to allow an expanded object to be modified in-place
under suitable conditions, so that operations like assignment to an element
of an array need not involve copying the entire array.
The initial application for this feature is arrays, but it is not hard
to foresee using it for other container types like JSON, XML and hstore.
I have hopes that it will be useful to PostGIS as well.
In this initial implementation, a few heuristics have been hard-wired
into plpgsql to improve performance for arrays that are stored in
plpgsql variables. We would like to generalize those hacks so that
other datatypes can obtain similar improvements, but figuring out some
appropriate APIs is left as a task for future work. (The heuristics
themselves are probably not optimal yet, either, as they sometimes
force expansion of arrays that would be better left alone.)
Preliminary performance testing shows impressive speed gains for plpgsql
functions that do element-by-element access or update of large arrays.
There are other cases that get a little slower, as a result of added array
format conversions; but we can hope to improve anything that's annoyingly
bad. In any case most applications should see a net win.
Tom Lane, reviewed by Andres Freund
10 years ago
|
|
|
* Make a copy of a non-NULL datum.
|
|
|
|
*
|
|
|
|
* If the datatype is pass-by-reference, memory is obtained with palloc().
|
Support "expanded" objects, particularly arrays, for better performance.
This patch introduces the ability for complex datatypes to have an
in-memory representation that is different from their on-disk format.
On-disk formats are typically optimized for minimal size, and in any case
they can't contain pointers, so they are often not well-suited for
computation. Now a datatype can invent an "expanded" in-memory format
that is better suited for its operations, and then pass that around among
the C functions that operate on the datatype. There are also provisions
(rudimentary as yet) to allow an expanded object to be modified in-place
under suitable conditions, so that operations like assignment to an element
of an array need not involve copying the entire array.
The initial application for this feature is arrays, but it is not hard
to foresee using it for other container types like JSON, XML and hstore.
I have hopes that it will be useful to PostGIS as well.
In this initial implementation, a few heuristics have been hard-wired
into plpgsql to improve performance for arrays that are stored in
plpgsql variables. We would like to generalize those hacks so that
other datatypes can obtain similar improvements, but figuring out some
appropriate APIs is left as a task for future work. (The heuristics
themselves are probably not optimal yet, either, as they sometimes
force expansion of arrays that would be better left alone.)
Preliminary performance testing shows impressive speed gains for plpgsql
functions that do element-by-element access or update of large arrays.
There are other cases that get a little slower, as a result of added array
format conversions; but we can hope to improve anything that's annoyingly
bad. In any case most applications should see a net win.
Tom Lane, reviewed by Andres Freund
10 years ago
|
|
|
*
|
|
|
|
* If the value is a reference to an expanded object, we flatten into memory
|
|
|
|
* obtained with palloc(). We need to copy because one of the main uses of
|
|
|
|
* this function is to copy a datum out of a transient memory context that's
|
|
|
|
* about to be destroyed, and the expanded object is probably in a child
|
|
|
|
* context that will also go away. Moreover, many callers assume that the
|
|
|
|
* result is a single pfree-able chunk.
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
Datum
|
|
|
|
datumCopy(Datum value, bool typByVal, int typLen)
|
|
|
|
{
|
|
|
|
Datum res;
|
|
|
|
|
|
|
|
if (typByVal)
|
|
|
|
res = value;
|
Support "expanded" objects, particularly arrays, for better performance.
This patch introduces the ability for complex datatypes to have an
in-memory representation that is different from their on-disk format.
On-disk formats are typically optimized for minimal size, and in any case
they can't contain pointers, so they are often not well-suited for
computation. Now a datatype can invent an "expanded" in-memory format
that is better suited for its operations, and then pass that around among
the C functions that operate on the datatype. There are also provisions
(rudimentary as yet) to allow an expanded object to be modified in-place
under suitable conditions, so that operations like assignment to an element
of an array need not involve copying the entire array.
The initial application for this feature is arrays, but it is not hard
to foresee using it for other container types like JSON, XML and hstore.
I have hopes that it will be useful to PostGIS as well.
In this initial implementation, a few heuristics have been hard-wired
into plpgsql to improve performance for arrays that are stored in
plpgsql variables. We would like to generalize those hacks so that
other datatypes can obtain similar improvements, but figuring out some
appropriate APIs is left as a task for future work. (The heuristics
themselves are probably not optimal yet, either, as they sometimes
force expansion of arrays that would be better left alone.)
Preliminary performance testing shows impressive speed gains for plpgsql
functions that do element-by-element access or update of large arrays.
There are other cases that get a little slower, as a result of added array
format conversions; but we can hope to improve anything that's annoyingly
bad. In any case most applications should see a net win.
Tom Lane, reviewed by Andres Freund
10 years ago
|
|
|
else if (typLen == -1)
|
|
|
|
{
|
|
|
|
/* It is a varlena datatype */
|
|
|
|
struct varlena *vl = (struct varlena *) DatumGetPointer(value);
|
|
|
|
|
|
|
|
if (VARATT_IS_EXTERNAL_EXPANDED(vl))
|
|
|
|
{
|
|
|
|
/* Flatten into the caller's memory context */
|
|
|
|
ExpandedObjectHeader *eoh = DatumGetEOHP(value);
|
|
|
|
Size resultsize;
|
|
|
|
char *resultptr;
|
|
|
|
|
|
|
|
resultsize = EOH_get_flat_size(eoh);
|
|
|
|
resultptr = (char *) palloc(resultsize);
|
|
|
|
EOH_flatten_into(eoh, (void *) resultptr, resultsize);
|
|
|
|
res = PointerGetDatum(resultptr);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* Otherwise, just copy the varlena datum verbatim */
|
|
|
|
Size realSize;
|
|
|
|
char *resultptr;
|
|
|
|
|
|
|
|
realSize = (Size) VARSIZE_ANY(vl);
|
|
|
|
resultptr = (char *) palloc(realSize);
|
|
|
|
memcpy(resultptr, vl, realSize);
|
|
|
|
res = PointerGetDatum(resultptr);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
Support "expanded" objects, particularly arrays, for better performance.
This patch introduces the ability for complex datatypes to have an
in-memory representation that is different from their on-disk format.
On-disk formats are typically optimized for minimal size, and in any case
they can't contain pointers, so they are often not well-suited for
computation. Now a datatype can invent an "expanded" in-memory format
that is better suited for its operations, and then pass that around among
the C functions that operate on the datatype. There are also provisions
(rudimentary as yet) to allow an expanded object to be modified in-place
under suitable conditions, so that operations like assignment to an element
of an array need not involve copying the entire array.
The initial application for this feature is arrays, but it is not hard
to foresee using it for other container types like JSON, XML and hstore.
I have hopes that it will be useful to PostGIS as well.
In this initial implementation, a few heuristics have been hard-wired
into plpgsql to improve performance for arrays that are stored in
plpgsql variables. We would like to generalize those hacks so that
other datatypes can obtain similar improvements, but figuring out some
appropriate APIs is left as a task for future work. (The heuristics
themselves are probably not optimal yet, either, as they sometimes
force expansion of arrays that would be better left alone.)
Preliminary performance testing shows impressive speed gains for plpgsql
functions that do element-by-element access or update of large arrays.
There are other cases that get a little slower, as a result of added array
format conversions; but we can hope to improve anything that's annoyingly
bad. In any case most applications should see a net win.
Tom Lane, reviewed by Andres Freund
10 years ago
|
|
|
/* Pass by reference, but not varlena, so not toasted */
|
|
|
|
Size realSize;
|
Support "expanded" objects, particularly arrays, for better performance.
This patch introduces the ability for complex datatypes to have an
in-memory representation that is different from their on-disk format.
On-disk formats are typically optimized for minimal size, and in any case
they can't contain pointers, so they are often not well-suited for
computation. Now a datatype can invent an "expanded" in-memory format
that is better suited for its operations, and then pass that around among
the C functions that operate on the datatype. There are also provisions
(rudimentary as yet) to allow an expanded object to be modified in-place
under suitable conditions, so that operations like assignment to an element
of an array need not involve copying the entire array.
The initial application for this feature is arrays, but it is not hard
to foresee using it for other container types like JSON, XML and hstore.
I have hopes that it will be useful to PostGIS as well.
In this initial implementation, a few heuristics have been hard-wired
into plpgsql to improve performance for arrays that are stored in
plpgsql variables. We would like to generalize those hacks so that
other datatypes can obtain similar improvements, but figuring out some
appropriate APIs is left as a task for future work. (The heuristics
themselves are probably not optimal yet, either, as they sometimes
force expansion of arrays that would be better left alone.)
Preliminary performance testing shows impressive speed gains for plpgsql
functions that do element-by-element access or update of large arrays.
There are other cases that get a little slower, as a result of added array
format conversions; but we can hope to improve anything that's annoyingly
bad. In any case most applications should see a net win.
Tom Lane, reviewed by Andres Freund
10 years ago
|
|
|
char *resultptr;
|
|
|
|
|
|
|
|
realSize = datumGetSize(value, typByVal, typLen);
|
|
|
|
|
Support "expanded" objects, particularly arrays, for better performance.
This patch introduces the ability for complex datatypes to have an
in-memory representation that is different from their on-disk format.
On-disk formats are typically optimized for minimal size, and in any case
they can't contain pointers, so they are often not well-suited for
computation. Now a datatype can invent an "expanded" in-memory format
that is better suited for its operations, and then pass that around among
the C functions that operate on the datatype. There are also provisions
(rudimentary as yet) to allow an expanded object to be modified in-place
under suitable conditions, so that operations like assignment to an element
of an array need not involve copying the entire array.
The initial application for this feature is arrays, but it is not hard
to foresee using it for other container types like JSON, XML and hstore.
I have hopes that it will be useful to PostGIS as well.
In this initial implementation, a few heuristics have been hard-wired
into plpgsql to improve performance for arrays that are stored in
plpgsql variables. We would like to generalize those hacks so that
other datatypes can obtain similar improvements, but figuring out some
appropriate APIs is left as a task for future work. (The heuristics
themselves are probably not optimal yet, either, as they sometimes
force expansion of arrays that would be better left alone.)
Preliminary performance testing shows impressive speed gains for plpgsql
functions that do element-by-element access or update of large arrays.
There are other cases that get a little slower, as a result of added array
format conversions; but we can hope to improve anything that's annoyingly
bad. In any case most applications should see a net win.
Tom Lane, reviewed by Andres Freund
10 years ago
|
|
|
resultptr = (char *) palloc(realSize);
|
|
|
|
memcpy(resultptr, DatumGetPointer(value), realSize);
|
|
|
|
res = PointerGetDatum(resultptr);
|
|
|
|
}
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*-------------------------------------------------------------------------
|
Support "expanded" objects, particularly arrays, for better performance.
This patch introduces the ability for complex datatypes to have an
in-memory representation that is different from their on-disk format.
On-disk formats are typically optimized for minimal size, and in any case
they can't contain pointers, so they are often not well-suited for
computation. Now a datatype can invent an "expanded" in-memory format
that is better suited for its operations, and then pass that around among
the C functions that operate on the datatype. There are also provisions
(rudimentary as yet) to allow an expanded object to be modified in-place
under suitable conditions, so that operations like assignment to an element
of an array need not involve copying the entire array.
The initial application for this feature is arrays, but it is not hard
to foresee using it for other container types like JSON, XML and hstore.
I have hopes that it will be useful to PostGIS as well.
In this initial implementation, a few heuristics have been hard-wired
into plpgsql to improve performance for arrays that are stored in
plpgsql variables. We would like to generalize those hacks so that
other datatypes can obtain similar improvements, but figuring out some
appropriate APIs is left as a task for future work. (The heuristics
themselves are probably not optimal yet, either, as they sometimes
force expansion of arrays that would be better left alone.)
Preliminary performance testing shows impressive speed gains for plpgsql
functions that do element-by-element access or update of large arrays.
There are other cases that get a little slower, as a result of added array
format conversions; but we can hope to improve anything that's annoyingly
bad. In any case most applications should see a net win.
Tom Lane, reviewed by Andres Freund
10 years ago
|
|
|
* datumTransfer
|
|
|
|
*
|
Support "expanded" objects, particularly arrays, for better performance.
This patch introduces the ability for complex datatypes to have an
in-memory representation that is different from their on-disk format.
On-disk formats are typically optimized for minimal size, and in any case
they can't contain pointers, so they are often not well-suited for
computation. Now a datatype can invent an "expanded" in-memory format
that is better suited for its operations, and then pass that around among
the C functions that operate on the datatype. There are also provisions
(rudimentary as yet) to allow an expanded object to be modified in-place
under suitable conditions, so that operations like assignment to an element
of an array need not involve copying the entire array.
The initial application for this feature is arrays, but it is not hard
to foresee using it for other container types like JSON, XML and hstore.
I have hopes that it will be useful to PostGIS as well.
In this initial implementation, a few heuristics have been hard-wired
into plpgsql to improve performance for arrays that are stored in
plpgsql variables. We would like to generalize those hacks so that
other datatypes can obtain similar improvements, but figuring out some
appropriate APIs is left as a task for future work. (The heuristics
themselves are probably not optimal yet, either, as they sometimes
force expansion of arrays that would be better left alone.)
Preliminary performance testing shows impressive speed gains for plpgsql
functions that do element-by-element access or update of large arrays.
There are other cases that get a little slower, as a result of added array
format conversions; but we can hope to improve anything that's annoyingly
bad. In any case most applications should see a net win.
Tom Lane, reviewed by Andres Freund
10 years ago
|
|
|
* Transfer a non-NULL datum into the current memory context.
|
|
|
|
*
|
Support "expanded" objects, particularly arrays, for better performance.
This patch introduces the ability for complex datatypes to have an
in-memory representation that is different from their on-disk format.
On-disk formats are typically optimized for minimal size, and in any case
they can't contain pointers, so they are often not well-suited for
computation. Now a datatype can invent an "expanded" in-memory format
that is better suited for its operations, and then pass that around among
the C functions that operate on the datatype. There are also provisions
(rudimentary as yet) to allow an expanded object to be modified in-place
under suitable conditions, so that operations like assignment to an element
of an array need not involve copying the entire array.
The initial application for this feature is arrays, but it is not hard
to foresee using it for other container types like JSON, XML and hstore.
I have hopes that it will be useful to PostGIS as well.
In this initial implementation, a few heuristics have been hard-wired
into plpgsql to improve performance for arrays that are stored in
plpgsql variables. We would like to generalize those hacks so that
other datatypes can obtain similar improvements, but figuring out some
appropriate APIs is left as a task for future work. (The heuristics
themselves are probably not optimal yet, either, as they sometimes
force expansion of arrays that would be better left alone.)
Preliminary performance testing shows impressive speed gains for plpgsql
functions that do element-by-element access or update of large arrays.
There are other cases that get a little slower, as a result of added array
format conversions; but we can hope to improve anything that's annoyingly
bad. In any case most applications should see a net win.
Tom Lane, reviewed by Andres Freund
10 years ago
|
|
|
* This is equivalent to datumCopy() except when the datum is a read-write
|
|
|
|
* pointer to an expanded object. In that case we merely reparent the object
|
|
|
|
* into the current context, and return its standard R/W pointer (in case the
|
|
|
|
* given one is a transient pointer of shorter lifespan).
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
Support "expanded" objects, particularly arrays, for better performance.
This patch introduces the ability for complex datatypes to have an
in-memory representation that is different from their on-disk format.
On-disk formats are typically optimized for minimal size, and in any case
they can't contain pointers, so they are often not well-suited for
computation. Now a datatype can invent an "expanded" in-memory format
that is better suited for its operations, and then pass that around among
the C functions that operate on the datatype. There are also provisions
(rudimentary as yet) to allow an expanded object to be modified in-place
under suitable conditions, so that operations like assignment to an element
of an array need not involve copying the entire array.
The initial application for this feature is arrays, but it is not hard
to foresee using it for other container types like JSON, XML and hstore.
I have hopes that it will be useful to PostGIS as well.
In this initial implementation, a few heuristics have been hard-wired
into plpgsql to improve performance for arrays that are stored in
plpgsql variables. We would like to generalize those hacks so that
other datatypes can obtain similar improvements, but figuring out some
appropriate APIs is left as a task for future work. (The heuristics
themselves are probably not optimal yet, either, as they sometimes
force expansion of arrays that would be better left alone.)
Preliminary performance testing shows impressive speed gains for plpgsql
functions that do element-by-element access or update of large arrays.
There are other cases that get a little slower, as a result of added array
format conversions; but we can hope to improve anything that's annoyingly
bad. In any case most applications should see a net win.
Tom Lane, reviewed by Andres Freund
10 years ago
|
|
|
Datum
|
|
|
|
datumTransfer(Datum value, bool typByVal, int typLen)
|
|
|
|
{
|
Support "expanded" objects, particularly arrays, for better performance.
This patch introduces the ability for complex datatypes to have an
in-memory representation that is different from their on-disk format.
On-disk formats are typically optimized for minimal size, and in any case
they can't contain pointers, so they are often not well-suited for
computation. Now a datatype can invent an "expanded" in-memory format
that is better suited for its operations, and then pass that around among
the C functions that operate on the datatype. There are also provisions
(rudimentary as yet) to allow an expanded object to be modified in-place
under suitable conditions, so that operations like assignment to an element
of an array need not involve copying the entire array.
The initial application for this feature is arrays, but it is not hard
to foresee using it for other container types like JSON, XML and hstore.
I have hopes that it will be useful to PostGIS as well.
In this initial implementation, a few heuristics have been hard-wired
into plpgsql to improve performance for arrays that are stored in
plpgsql variables. We would like to generalize those hacks so that
other datatypes can obtain similar improvements, but figuring out some
appropriate APIs is left as a task for future work. (The heuristics
themselves are probably not optimal yet, either, as they sometimes
force expansion of arrays that would be better left alone.)
Preliminary performance testing shows impressive speed gains for plpgsql
functions that do element-by-element access or update of large arrays.
There are other cases that get a little slower, as a result of added array
format conversions; but we can hope to improve anything that's annoyingly
bad. In any case most applications should see a net win.
Tom Lane, reviewed by Andres Freund
10 years ago
|
|
|
if (!typByVal && typLen == -1 &&
|
|
|
|
VARATT_IS_EXTERNAL_EXPANDED_RW(DatumGetPointer(value)))
|
|
|
|
value = TransferExpandedObject(value, CurrentMemoryContext);
|
|
|
|
else
|
|
|
|
value = datumCopy(value, typByVal, typLen);
|
|
|
|
return value;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
* datumIsEqual
|
|
|
|
*
|
|
|
|
* Return true if two datums are equal, false otherwise
|
|
|
|
*
|
|
|
|
* NOTE: XXX!
|
|
|
|
* We just compare the bytes of the two values, one by one.
|
|
|
|
* This routine will return false if there are 2 different
|
|
|
|
* representations of the same value (something along the lines
|
|
|
|
* of say the representation of zero in one's complement arithmetic).
|
|
|
|
* Also, it will probably not give the answer you want if either
|
|
|
|
* datum has been "toasted".
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
bool
|
|
|
|
datumIsEqual(Datum value1, Datum value2, bool typByVal, int typLen)
|
|
|
|
{
|
|
|
|
bool res;
|
|
|
|
|
|
|
|
if (typByVal)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* just compare the two datums. NOTE: just comparing "len" bytes will
|
|
|
|
* not do the work, because we do not know how these bytes are aligned
|
|
|
|
* inside the "Datum". We assume instead that any given datatype is
|
|
|
|
* consistent about how it fills extraneous bits in the Datum.
|
|
|
|
*/
|
|
|
|
res = (value1 == value2);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
Size size1,
|
|
|
|
size2;
|
|
|
|
char *s1,
|
|
|
|
*s2;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Compare the bytes pointed by the pointers stored in the datums.
|
|
|
|
*/
|
|
|
|
size1 = datumGetSize(value1, typByVal, typLen);
|
|
|
|
size2 = datumGetSize(value2, typByVal, typLen);
|
|
|
|
if (size1 != size2)
|
|
|
|
return false;
|
|
|
|
s1 = (char *) DatumGetPointer(value1);
|
|
|
|
s2 = (char *) DatumGetPointer(value2);
|
|
|
|
res = (memcmp(s1, s2, size1) == 0);
|
|
|
|
}
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
* datumEstimateSpace
|
|
|
|
*
|
|
|
|
* Compute the amount of space that datumSerialize will require for a
|
|
|
|
* particular Datum.
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
Size
|
|
|
|
datumEstimateSpace(Datum value, bool isnull, bool typByVal, int typLen)
|
|
|
|
{
|
|
|
|
Size sz = sizeof(int);
|
|
|
|
|
|
|
|
if (!isnull)
|
|
|
|
{
|
|
|
|
/* no need to use add_size, can't overflow */
|
|
|
|
if (typByVal)
|
|
|
|
sz += sizeof(Datum);
|
Fix problems with ParamListInfo serialization mechanism.
Commit d1b7c1ffe72e86932b5395f29e006c3f503bc53d introduced a mechanism
for serializing a ParamListInfo structure to be passed to a parallel
worker. However, this mechanism failed to handle external expanded
values, as pointed out by Noah Misch. Repair.
Moreover, plpgsql_param_fetch requires adjustment because the
serialization mechanism needs it to skip evaluating unused parameters
just as we would do when it is called from copyParamList, but params
== estate->paramLI in that case. To fix, make the bms_is_member test
in that function unconditional.
Finally, have setup_param_list set a new ParamListInfo field,
paramMask, to the parameters actually used in the expression, so that
we don't try to fetch those that are not needed when serializing a
parameter list. This isn't necessary for correctness, but it makes
the performance of the parallel executor code comparable to what we
do for cases involving cursors.
Design suggestions and extensive review by Noah Misch. Patch by me.
10 years ago
|
|
|
else if (VARATT_IS_EXTERNAL_EXPANDED(value))
|
|
|
|
{
|
|
|
|
ExpandedObjectHeader *eoh = DatumGetEOHP(value);
|
|
|
|
sz += EOH_get_flat_size(eoh);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
sz += datumGetSize(value, typByVal, typLen);
|
|
|
|
}
|
|
|
|
|
|
|
|
return sz;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
* datumSerialize
|
|
|
|
*
|
|
|
|
* Serialize a possibly-NULL datum into caller-provided storage.
|
|
|
|
*
|
|
|
|
* The format is as follows: first, we write a 4-byte header word, which
|
|
|
|
* is either the length of a pass-by-reference datum, -1 for a
|
|
|
|
* pass-by-value datum, or -2 for a NULL. If the value is NULL, nothing
|
|
|
|
* further is written. If it is pass-by-value, sizeof(Datum) bytes
|
|
|
|
* follow. Otherwise, the number of bytes indicated by the header word
|
|
|
|
* follow. The caller is responsible for ensuring that there is enough
|
|
|
|
* storage to store the number of bytes that will be written; use
|
|
|
|
* datumEstimateSpace() to find out how many will be needed.
|
|
|
|
* *start_address is updated to point to the byte immediately following
|
|
|
|
* those written.
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
datumSerialize(Datum value, bool isnull, bool typByVal, int typLen,
|
|
|
|
char **start_address)
|
|
|
|
{
|
Fix problems with ParamListInfo serialization mechanism.
Commit d1b7c1ffe72e86932b5395f29e006c3f503bc53d introduced a mechanism
for serializing a ParamListInfo structure to be passed to a parallel
worker. However, this mechanism failed to handle external expanded
values, as pointed out by Noah Misch. Repair.
Moreover, plpgsql_param_fetch requires adjustment because the
serialization mechanism needs it to skip evaluating unused parameters
just as we would do when it is called from copyParamList, but params
== estate->paramLI in that case. To fix, make the bms_is_member test
in that function unconditional.
Finally, have setup_param_list set a new ParamListInfo field,
paramMask, to the parameters actually used in the expression, so that
we don't try to fetch those that are not needed when serializing a
parameter list. This isn't necessary for correctness, but it makes
the performance of the parallel executor code comparable to what we
do for cases involving cursors.
Design suggestions and extensive review by Noah Misch. Patch by me.
10 years ago
|
|
|
ExpandedObjectHeader *eoh = NULL;
|
|
|
|
int header;
|
|
|
|
|
|
|
|
/* Write header word. */
|
|
|
|
if (isnull)
|
|
|
|
header = -2;
|
|
|
|
else if (typByVal)
|
|
|
|
header = -1;
|
Fix problems with ParamListInfo serialization mechanism.
Commit d1b7c1ffe72e86932b5395f29e006c3f503bc53d introduced a mechanism
for serializing a ParamListInfo structure to be passed to a parallel
worker. However, this mechanism failed to handle external expanded
values, as pointed out by Noah Misch. Repair.
Moreover, plpgsql_param_fetch requires adjustment because the
serialization mechanism needs it to skip evaluating unused parameters
just as we would do when it is called from copyParamList, but params
== estate->paramLI in that case. To fix, make the bms_is_member test
in that function unconditional.
Finally, have setup_param_list set a new ParamListInfo field,
paramMask, to the parameters actually used in the expression, so that
we don't try to fetch those that are not needed when serializing a
parameter list. This isn't necessary for correctness, but it makes
the performance of the parallel executor code comparable to what we
do for cases involving cursors.
Design suggestions and extensive review by Noah Misch. Patch by me.
10 years ago
|
|
|
else if (VARATT_IS_EXTERNAL_EXPANDED(value))
|
|
|
|
{
|
|
|
|
eoh = DatumGetEOHP(value);
|
|
|
|
header = EOH_get_flat_size(eoh);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
header = datumGetSize(value, typByVal, typLen);
|
|
|
|
memcpy(*start_address, &header, sizeof(int));
|
|
|
|
*start_address += sizeof(int);
|
|
|
|
|
|
|
|
/* If not null, write payload bytes. */
|
|
|
|
if (!isnull)
|
|
|
|
{
|
|
|
|
if (typByVal)
|
|
|
|
{
|
|
|
|
memcpy(*start_address, &value, sizeof(Datum));
|
|
|
|
*start_address += sizeof(Datum);
|
|
|
|
}
|
Fix problems with ParamListInfo serialization mechanism.
Commit d1b7c1ffe72e86932b5395f29e006c3f503bc53d introduced a mechanism
for serializing a ParamListInfo structure to be passed to a parallel
worker. However, this mechanism failed to handle external expanded
values, as pointed out by Noah Misch. Repair.
Moreover, plpgsql_param_fetch requires adjustment because the
serialization mechanism needs it to skip evaluating unused parameters
just as we would do when it is called from copyParamList, but params
== estate->paramLI in that case. To fix, make the bms_is_member test
in that function unconditional.
Finally, have setup_param_list set a new ParamListInfo field,
paramMask, to the parameters actually used in the expression, so that
we don't try to fetch those that are not needed when serializing a
parameter list. This isn't necessary for correctness, but it makes
the performance of the parallel executor code comparable to what we
do for cases involving cursors.
Design suggestions and extensive review by Noah Misch. Patch by me.
10 years ago
|
|
|
else if (eoh)
|
|
|
|
{
|
|
|
|
EOH_flatten_into(eoh, (void *) *start_address, header);
|
|
|
|
*start_address += header;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
memcpy(*start_address, DatumGetPointer(value), header);
|
|
|
|
*start_address += header;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
* datumRestore
|
|
|
|
*
|
|
|
|
* Restore a possibly-NULL datum previously serialized by datumSerialize.
|
|
|
|
* *start_address is updated according to the number of bytes consumed.
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
Datum
|
|
|
|
datumRestore(char **start_address, bool *isnull)
|
|
|
|
{
|
|
|
|
int header;
|
|
|
|
void *d;
|
|
|
|
|
|
|
|
/* Read header word. */
|
|
|
|
memcpy(&header, *start_address, sizeof(int));
|
|
|
|
*start_address += sizeof(int);
|
|
|
|
|
|
|
|
/* If this datum is NULL, we can stop here. */
|
|
|
|
if (header == -2)
|
|
|
|
{
|
|
|
|
*isnull = true;
|
|
|
|
return (Datum) 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* OK, datum is not null. */
|
|
|
|
*isnull = false;
|
|
|
|
|
|
|
|
/* If this datum is pass-by-value, sizeof(Datum) bytes follow. */
|
|
|
|
if (header == -1)
|
|
|
|
{
|
|
|
|
Datum val;
|
|
|
|
|
|
|
|
memcpy(&val, *start_address, sizeof(Datum));
|
|
|
|
*start_address += sizeof(Datum);
|
|
|
|
return val;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Pass-by-reference case; copy indicated number of bytes. */
|
|
|
|
Assert(header > 0);
|
|
|
|
d = palloc(header);
|
|
|
|
memcpy(d, *start_address, header);
|
|
|
|
*start_address += header;
|
|
|
|
return PointerGetDatum(d);
|
|
|
|
}
|