You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
postgres/contrib/ltree/ltree_op.c

661 lines
12 KiB

/*
23 years ago
* op function for ltree
* Teodor Sigaev <teodor@stack.net>
* contrib/ltree/ltree_op.c
*/
#include "postgres.h"
#include <ctype.h>
#include "common/hashfn.h"
#include "ltree.h"
#include "utils/builtins.h"
#include "utils/selfuncs.h"
#include "varatt.h"
PG_MODULE_MAGIC_EXT(
.name = "ltree",
.version = PG_VERSION
);
/* compare functions */
PG_FUNCTION_INFO_V1(ltree_cmp);
PG_FUNCTION_INFO_V1(ltree_lt);
PG_FUNCTION_INFO_V1(ltree_le);
PG_FUNCTION_INFO_V1(ltree_eq);
PG_FUNCTION_INFO_V1(ltree_ne);
PG_FUNCTION_INFO_V1(ltree_ge);
PG_FUNCTION_INFO_V1(ltree_gt);
PG_FUNCTION_INFO_V1(hash_ltree);
PG_FUNCTION_INFO_V1(hash_ltree_extended);
PG_FUNCTION_INFO_V1(nlevel);
PG_FUNCTION_INFO_V1(ltree_isparent);
PG_FUNCTION_INFO_V1(ltree_risparent);
PG_FUNCTION_INFO_V1(subltree);
PG_FUNCTION_INFO_V1(subpath);
PG_FUNCTION_INFO_V1(ltree_index);
PG_FUNCTION_INFO_V1(ltree_addltree);
PG_FUNCTION_INFO_V1(ltree_addtext);
PG_FUNCTION_INFO_V1(ltree_textadd);
PG_FUNCTION_INFO_V1(lca);
PG_FUNCTION_INFO_V1(ltree2text);
PG_FUNCTION_INFO_V1(text2ltree);
PG_FUNCTION_INFO_V1(ltreeparentsel);
int
ltree_compare(const ltree *a, const ltree *b)
23 years ago
{
ltree_level *al = LTREE_FIRST(a);
ltree_level *bl = LTREE_FIRST(b);
23 years ago
int an = a->numlevel;
int bn = b->numlevel;
while (an > 0 && bn > 0)
{
int res;
if ((res = memcmp(al->name, bl->name, Min(al->len, bl->len))) == 0)
23 years ago
{
if (al->len != bl->len)
return (al->len - bl->len) * 10 * (an + 1);
}
else
{
if (res < 0)
res = -1;
else
res = 1;
23 years ago
return res * 10 * (an + 1);
}
23 years ago
an--;
bn--;
al = LEVEL_NEXT(al);
bl = LEVEL_NEXT(bl);
}
23 years ago
return (a->numlevel - b->numlevel) * 10 * (an + 1);
}
23 years ago
#define RUNCMP \
ltree *a = PG_GETARG_LTREE_P(0); \
ltree *b = PG_GETARG_LTREE_P(1); \
int res = ltree_compare(a,b); \
PG_FREE_IF_COPY(a,0); \
PG_FREE_IF_COPY(b,1)
Datum
23 years ago
ltree_cmp(PG_FUNCTION_ARGS)
{
RUNCMP;
PG_RETURN_INT32(res);
}
Datum
23 years ago
ltree_lt(PG_FUNCTION_ARGS)
{
RUNCMP;
PG_RETURN_BOOL(res < 0);
}
Datum
23 years ago
ltree_le(PG_FUNCTION_ARGS)
{
RUNCMP;
PG_RETURN_BOOL(res <= 0);
}
Datum
23 years ago
ltree_eq(PG_FUNCTION_ARGS)
{
RUNCMP;
PG_RETURN_BOOL(res == 0);
}
Datum
23 years ago
ltree_ge(PG_FUNCTION_ARGS)
{
RUNCMP;
PG_RETURN_BOOL(res >= 0);
}
Datum
23 years ago
ltree_gt(PG_FUNCTION_ARGS)
{
RUNCMP;
PG_RETURN_BOOL(res > 0);
}
Datum
23 years ago
ltree_ne(PG_FUNCTION_ARGS)
{
RUNCMP;
PG_RETURN_BOOL(res != 0);
}
/* Compute a hash for the ltree */
Datum
hash_ltree(PG_FUNCTION_ARGS)
{
ltree *a = PG_GETARG_LTREE_P(0);
uint32 result = 1;
int an = a->numlevel;
ltree_level *al = LTREE_FIRST(a);
while (an > 0)
{
uint32 levelHash = DatumGetUInt32(hash_any((unsigned char *) al->name, al->len));
/*
* Combine hash values of successive elements by multiplying the
* current value by 31 and adding on the new element's hash value.
*
* This method is borrowed from hash_array(), which see for further
* commentary.
*/
result = (result << 5) - result + levelHash;
an--;
al = LEVEL_NEXT(al);
}
PG_FREE_IF_COPY(a, 0);
PG_RETURN_UINT32(result);
}
/* Compute an extended hash for the ltree */
Datum
hash_ltree_extended(PG_FUNCTION_ARGS)
{
ltree *a = PG_GETARG_LTREE_P(0);
const uint64 seed = PG_GETARG_INT64(1);
uint64 result = 1;
int an = a->numlevel;
ltree_level *al = LTREE_FIRST(a);
/*
* If the path has length zero, return 1 + seed to ensure that the low 32
* bits of the result match hash_ltree when the seed is 0, as required by
* the hash index support functions, but to also return a different value
* when there is a seed.
*/
if (an == 0)
{
PG_FREE_IF_COPY(a, 0);
PG_RETURN_UINT64(result + seed);
}
while (an > 0)
{
uint64 levelHash = DatumGetUInt64(hash_any_extended((unsigned char *) al->name, al->len, seed));
result = (result << 5) - result + levelHash;
an--;
al = LEVEL_NEXT(al);
}
PG_FREE_IF_COPY(a, 0);
PG_RETURN_UINT64(result);
}
Datum
23 years ago
nlevel(PG_FUNCTION_ARGS)
{
ltree *a = PG_GETARG_LTREE_P(0);
23 years ago
int res = a->numlevel;
PG_FREE_IF_COPY(a, 0);
PG_RETURN_INT32(res);
}
bool
inner_isparent(const ltree *c, const ltree *p)
23 years ago
{
ltree_level *cl = LTREE_FIRST(c);
ltree_level *pl = LTREE_FIRST(p);
23 years ago
int pn = p->numlevel;
23 years ago
if (pn > c->numlevel)
return false;
23 years ago
while (pn > 0)
{
if (cl->len != pl->len)
return false;
if (memcmp(cl->name, pl->name, cl->len) != 0)
return false;
pn--;
23 years ago
cl = LEVEL_NEXT(cl);
pl = LEVEL_NEXT(pl);
}
return true;
}
23 years ago
Datum
ltree_isparent(PG_FUNCTION_ARGS)
{
ltree *c = PG_GETARG_LTREE_P(1);
ltree *p = PG_GETARG_LTREE_P(0);
23 years ago
bool res = inner_isparent(c, p);
PG_FREE_IF_COPY(c, 1);
PG_FREE_IF_COPY(p, 0);
PG_RETURN_BOOL(res);
}
23 years ago
Datum
ltree_risparent(PG_FUNCTION_ARGS)
{
ltree *c = PG_GETARG_LTREE_P(0);
ltree *p = PG_GETARG_LTREE_P(1);
23 years ago
bool res = inner_isparent(c, p);
PG_FREE_IF_COPY(c, 0);
PG_FREE_IF_COPY(p, 1);
PG_RETURN_BOOL(res);
}
23 years ago
static ltree *
inner_subltree(ltree *t, int32 startpos, int32 endpos)
23 years ago
{
char *start = NULL,
*end = NULL;
ltree_level *ptr = LTREE_FIRST(t);
23 years ago
ltree *res;
int i;
if (startpos < 0 || endpos < 0 || startpos >= t->numlevel || startpos > endpos)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("invalid positions")));
23 years ago
if (endpos > t->numlevel)
endpos = t->numlevel;
start = end = (char *) ptr;
23 years ago
for (i = 0; i < endpos; i++)
{
if (i == startpos)
start = (char *) ptr;
if (i == endpos - 1)
{
end = (char *) LEVEL_NEXT(ptr);
break;
}
23 years ago
ptr = LEVEL_NEXT(ptr);
}
res = (ltree *) palloc0(LTREE_HDRSIZE + (end - start));
SET_VARSIZE(res, LTREE_HDRSIZE + (end - start));
23 years ago
res->numlevel = endpos - startpos;
memcpy(LTREE_FIRST(res), start, end - start);
return res;
}
Datum
23 years ago
subltree(PG_FUNCTION_ARGS)
{
ltree *t = PG_GETARG_LTREE_P(0);
23 years ago
ltree *res = inner_subltree(t, PG_GETARG_INT32(1), PG_GETARG_INT32(2));
23 years ago
PG_FREE_IF_COPY(t, 0);
PG_RETURN_POINTER(res);
}
23 years ago
Datum
subpath(PG_FUNCTION_ARGS)
{
ltree *t = PG_GETARG_LTREE_P(0);
int32 start = PG_GETARG_INT32(1);
int32 len = (fcinfo->nargs == 3) ? PG_GETARG_INT32(2) : 0;
int32 end;
23 years ago
ltree *res;
end = start + len;
if (start < 0)
{
start = t->numlevel + start;
23 years ago
end = start + len;
}
23 years ago
if (start < 0)
{ /* start > t->numlevel */
start = t->numlevel + start;
23 years ago
end = start + len;
}
23 years ago
if (len < 0)
end = t->numlevel + len;
23 years ago
else if (len == 0)
end = (fcinfo->nargs == 3) ? start : 0xffff;
23 years ago
res = inner_subltree(t, start, end);
23 years ago
PG_FREE_IF_COPY(t, 0);
PG_RETURN_POINTER(res);
}
23 years ago
static ltree *
ltree_concat(ltree *a, ltree *b)
23 years ago
{
ltree *r;
int numlevel = (int) a->numlevel + b->numlevel;
if (numlevel > LTREE_MAX_LEVELS)
ereport(ERROR,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("number of ltree levels (%d) exceeds the maximum allowed (%d)",
numlevel, LTREE_MAX_LEVELS)));
23 years ago
r = (ltree *) palloc0(VARSIZE(a) + VARSIZE(b) - LTREE_HDRSIZE);
SET_VARSIZE(r, VARSIZE(a) + VARSIZE(b) - LTREE_HDRSIZE);
r->numlevel = (uint16) numlevel;
23 years ago
memcpy(LTREE_FIRST(r), LTREE_FIRST(a), VARSIZE(a) - LTREE_HDRSIZE);
memcpy(((char *) LTREE_FIRST(r)) + VARSIZE(a) - LTREE_HDRSIZE,
LTREE_FIRST(b),
VARSIZE(b) - LTREE_HDRSIZE);
23 years ago
return r;
}
Datum
ltree_addltree(PG_FUNCTION_ARGS)
{
ltree *a = PG_GETARG_LTREE_P(0);
ltree *b = PG_GETARG_LTREE_P(1);
23 years ago
ltree *r;
r = ltree_concat(a, b);
23 years ago
PG_FREE_IF_COPY(a, 0);
PG_FREE_IF_COPY(b, 1);
PG_RETURN_POINTER(r);
}
Datum
23 years ago
ltree_addtext(PG_FUNCTION_ARGS)
{
ltree *a = PG_GETARG_LTREE_P(0);
text *b = PG_GETARG_TEXT_PP(1);
23 years ago
char *s;
ltree *r,
*tmp;
s = text_to_cstring(b);
tmp = (ltree *) DatumGetPointer(DirectFunctionCall1(ltree_in,
PointerGetDatum(s)));
pfree(s);
23 years ago
r = ltree_concat(a, tmp);
23 years ago
pfree(tmp);
PG_FREE_IF_COPY(a, 0);
PG_FREE_IF_COPY(b, 1);
PG_RETURN_POINTER(r);
}
Datum
ltree_index(PG_FUNCTION_ARGS)
{
ltree *a = PG_GETARG_LTREE_P(0);
ltree *b = PG_GETARG_LTREE_P(1);
22 years ago
int start = (fcinfo->nargs == 3) ? PG_GETARG_INT32(2) : 0;
int i,
j;
ltree_level *startptr,
*aptr,
*bptr;
bool found = false;
if (start < 0)
{
if (-start >= a->numlevel)
start = 0;
else
start = (int) (a->numlevel) + start;
}
22 years ago
if (a->numlevel - start < b->numlevel || a->numlevel == 0 || b->numlevel == 0)
{
PG_FREE_IF_COPY(a, 0);
PG_FREE_IF_COPY(b, 1);
PG_RETURN_INT32(-1);
}
22 years ago
startptr = LTREE_FIRST(a);
for (i = 0; i <= a->numlevel - b->numlevel; i++)
{
if (i >= start)
{
aptr = startptr;
bptr = LTREE_FIRST(b);
for (j = 0; j < b->numlevel; j++)
{
if (!(aptr->len == bptr->len && memcmp(aptr->name, bptr->name, aptr->len) == 0))
22 years ago
break;
aptr = LEVEL_NEXT(aptr);
bptr = LEVEL_NEXT(bptr);
}
22 years ago
if (j == b->numlevel)
{
found = true;
break;
}
}
22 years ago
startptr = LEVEL_NEXT(startptr);
}
22 years ago
if (!found)
i = -1;
PG_FREE_IF_COPY(a, 0);
PG_FREE_IF_COPY(b, 1);
PG_RETURN_INT32(i);
}
Datum
23 years ago
ltree_textadd(PG_FUNCTION_ARGS)
{
ltree *a = PG_GETARG_LTREE_P(1);
text *b = PG_GETARG_TEXT_PP(0);
23 years ago
char *s;
ltree *r,
*tmp;
s = text_to_cstring(b);
23 years ago
tmp = (ltree *) DatumGetPointer(DirectFunctionCall1(ltree_in,
PointerGetDatum(s)));
23 years ago
pfree(s);
r = ltree_concat(tmp, a);
pfree(tmp);
PG_FREE_IF_COPY(a, 1);
PG_FREE_IF_COPY(b, 0);
PG_RETURN_POINTER(r);
}
/*
* Common code for variants of lca(), find longest common ancestor of inputs
*
* Returns NULL if there is no common ancestor, ie, the longest common
* prefix is empty.
*/
23 years ago
ltree *
lca_inner(ltree **a, int len)
23 years ago
{
int tmp,
num,
i,
reslen;
ltree **ptr;
23 years ago
ltree_level *l1,
*l2;
ltree *res;
if (len <= 0)
return NULL; /* no inputs? */
23 years ago
if ((*a)->numlevel == 0)
return NULL; /* any empty input means NULL result */
/* num is the length of the longest common ancestor so far */
num = (*a)->numlevel - 1;
/* Compare each additional input to *a */
ptr = a + 1;
23 years ago
while (ptr - a < len)
{
if ((*ptr)->numlevel == 0)
return NULL;
23 years ago
else if ((*ptr)->numlevel == 1)
num = 0;
else
{
l1 = LTREE_FIRST(*a);
l2 = LTREE_FIRST(*ptr);
tmp = Min(num, (*ptr)->numlevel - 1);
23 years ago
num = 0;
for (i = 0; i < tmp; i++)
23 years ago
{
if (l1->len == l2->len &&
memcmp(l1->name, l2->name, l1->len) == 0)
23 years ago
num = i + 1;
else
break;
23 years ago
l1 = LEVEL_NEXT(l1);
l2 = LEVEL_NEXT(l2);
}
}
ptr++;
}
/* Now compute size of result ... */
reslen = LTREE_HDRSIZE;
l1 = LTREE_FIRST(*a);
23 years ago
for (i = 0; i < num; i++)
{
reslen += MAXALIGN(l1->len + LEVEL_HDRSIZE);
23 years ago
l1 = LEVEL_NEXT(l1);
}
/* ... and construct it by copying from *a */
res = (ltree *) palloc0(reslen);
SET_VARSIZE(res, reslen);
res->numlevel = num;
l1 = LTREE_FIRST(*a);
l2 = LTREE_FIRST(res);
23 years ago
for (i = 0; i < num; i++)
{
memcpy(l2, l1, MAXALIGN(l1->len + LEVEL_HDRSIZE));
l1 = LEVEL_NEXT(l1);
l2 = LEVEL_NEXT(l2);
}
return res;
}
Datum
23 years ago
lca(PG_FUNCTION_ARGS)
{
int i;
ltree **a,
*res;
a = (ltree **) palloc(sizeof(ltree *) * fcinfo->nargs);
for (i = 0; i < fcinfo->nargs; i++)
a[i] = PG_GETARG_LTREE_P(i);
23 years ago
res = lca_inner(a, (int) fcinfo->nargs);
for (i = 0; i < fcinfo->nargs; i++)
PG_FREE_IF_COPY(a[i], i);
pfree(a);
23 years ago
if (res)
PG_RETURN_POINTER(res);
else
PG_RETURN_NULL();
}
Datum
text2ltree(PG_FUNCTION_ARGS)
{
text *in = PG_GETARG_TEXT_PP(0);
char *s;
22 years ago
ltree *out;
s = text_to_cstring(in);
out = (ltree *) DatumGetPointer(DirectFunctionCall1(ltree_in,
PointerGetDatum(s)));
pfree(s);
22 years ago
PG_FREE_IF_COPY(in, 0);
PG_RETURN_POINTER(out);
}
Datum
ltree2text(PG_FUNCTION_ARGS)
{
ltree *in = PG_GETARG_LTREE_P(0);
22 years ago
char *ptr;
int i;
ltree_level *curlevel;
22 years ago
text *out;
out = (text *) palloc(VARSIZE(in) + VARHDRSZ);
22 years ago
ptr = VARDATA(out);
curlevel = LTREE_FIRST(in);
22 years ago
for (i = 0; i < in->numlevel; i++)
{
if (i != 0)
{
*ptr = '.';
ptr++;
}
memcpy(ptr, curlevel->name, curlevel->len);
ptr += curlevel->len;
curlevel = LEVEL_NEXT(curlevel);
}
22 years ago
SET_VARSIZE(out, ptr - ((char *) out));
PG_FREE_IF_COPY(in, 0);
22 years ago
PG_RETURN_POINTER(out);
}
/*
* ltreeparentsel - Selectivity of parent relationship for ltree data types.
Improve selectivity estimation for assorted match-style operators. Quite a few matching operators such as JSONB's @> used "contsel" and "contjoinsel" as their selectivity estimators. That was a bad idea, because (a) contsel is only a stub, yielding a fixed default estimate, and (b) that default is 0.001, meaning we estimate these operators as five times more selective than equality, which is surely pretty silly. There's a good model for improving this in ltree's ltreeparentsel(): for any "var OP constant" query, we can try applying the operator to all of the column's MCV and histogram values, taking the latter as being a random sample of the non-MCV values. That code is actually 100% generic, except for the question of exactly what default selectivity ought to be plugged in when we don't have stats. Hence, migrate the guts of ltreeparentsel() into the core code, provide wrappers "matchingsel" and "matchingjoinsel" with a more-appropriate default estimate, and use those for the non-geometric operators that formerly used contsel (mostly JSONB containment operators and tsquery matching). Also apply this code to some match-like operators in hstore, ltree, and pg_trgm, including the former users of ltreeparentsel as well as ones that improperly used contsel. Since commit 911e70207 just created new versions of those extensions that we haven't released yet, we can sneak this change into those new versions instead of having to create an additional generation of update scripts. Patch by me, reviewed by Alexey Bashtanov Discussion: https://postgr.es/m/12237.1582833074@sss.pgh.pa.us
6 years ago
*
* This function is not used anymore, if the ltree extension has been
* updated to 1.2 or later.
*/
Datum
ltreeparentsel(PG_FUNCTION_ARGS)
{
PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
Oid operator = PG_GETARG_OID(1);
List *args = (List *) PG_GETARG_POINTER(2);
int varRelid = PG_GETARG_INT32(3);
double selec;
Improve selectivity estimation for assorted match-style operators. Quite a few matching operators such as JSONB's @> used "contsel" and "contjoinsel" as their selectivity estimators. That was a bad idea, because (a) contsel is only a stub, yielding a fixed default estimate, and (b) that default is 0.001, meaning we estimate these operators as five times more selective than equality, which is surely pretty silly. There's a good model for improving this in ltree's ltreeparentsel(): for any "var OP constant" query, we can try applying the operator to all of the column's MCV and histogram values, taking the latter as being a random sample of the non-MCV values. That code is actually 100% generic, except for the question of exactly what default selectivity ought to be plugged in when we don't have stats. Hence, migrate the guts of ltreeparentsel() into the core code, provide wrappers "matchingsel" and "matchingjoinsel" with a more-appropriate default estimate, and use those for the non-geometric operators that formerly used contsel (mostly JSONB containment operators and tsquery matching). Also apply this code to some match-like operators in hstore, ltree, and pg_trgm, including the former users of ltreeparentsel as well as ones that improperly used contsel. Since commit 911e70207 just created new versions of those extensions that we haven't released yet, we can sneak this change into those new versions instead of having to create an additional generation of update scripts. Patch by me, reviewed by Alexey Bashtanov Discussion: https://postgr.es/m/12237.1582833074@sss.pgh.pa.us
6 years ago
/* Use generic restriction selectivity logic, with default 0.001. */
Use query collation, not column's collation, while examining statistics. Commit 5e0928005 changed the planner so that, instead of blindly using DEFAULT_COLLATION_OID when invoking operators for selectivity estimation, it would use the collation of the column whose statistics we're considering. This was recognized as still being not quite the right thing, but it seemed like a good incremental improvement. However, shortly thereafter we introduced nondeterministic collations, and that creates cases where operators can fail if they're passed the wrong collation. We don't want planning to fail in cases where the query itself would work, so this means that we *must* use the query's collation when invoking operators for estimation purposes. The only real problem this creates is in ineq_histogram_selectivity, where the binary search might produce a garbage answer if we perform comparisons using a different collation than the column's histogram is ordered with. However, when the query's collation is significantly different from the column's default collation, the estimate we previously generated would be pretty irrelevant anyway; so it's not clear that this will result in noticeably worse estimates in practice. (A follow-on patch will improve this situation in HEAD, but it seems too invasive for back-patch.) The patch requires changing the signatures of mcv_selectivity and allied functions, which are exported and very possibly are used by extensions. In HEAD, I just did that, but an API/ABI break of this sort isn't acceptable in stable branches. Therefore, in v12 the patch introduces "mcv_selectivity_ext" and so on, with signatures matching HEAD, and makes the old functions into wrappers that assume DEFAULT_COLLATION_OID should be used. That does not match the prior behavior, but it should avoid risk of failure in most cases. (In practice, I think most extension datatypes aren't collation-aware, so the change probably doesn't matter to them.) Per report from James Lucas. Back-patch to v12 where the problem was introduced. Discussion: https://postgr.es/m/CAAFmbbOvfi=wMM=3qRsPunBSLb8BFREno2oOzSBS=mzfLPKABw@mail.gmail.com
5 years ago
selec = generic_restriction_selectivity(root, operator, InvalidOid,
Improve selectivity estimation for assorted match-style operators. Quite a few matching operators such as JSONB's @> used "contsel" and "contjoinsel" as their selectivity estimators. That was a bad idea, because (a) contsel is only a stub, yielding a fixed default estimate, and (b) that default is 0.001, meaning we estimate these operators as five times more selective than equality, which is surely pretty silly. There's a good model for improving this in ltree's ltreeparentsel(): for any "var OP constant" query, we can try applying the operator to all of the column's MCV and histogram values, taking the latter as being a random sample of the non-MCV values. That code is actually 100% generic, except for the question of exactly what default selectivity ought to be plugged in when we don't have stats. Hence, migrate the guts of ltreeparentsel() into the core code, provide wrappers "matchingsel" and "matchingjoinsel" with a more-appropriate default estimate, and use those for the non-geometric operators that formerly used contsel (mostly JSONB containment operators and tsquery matching). Also apply this code to some match-like operators in hstore, ltree, and pg_trgm, including the former users of ltreeparentsel as well as ones that improperly used contsel. Since commit 911e70207 just created new versions of those extensions that we haven't released yet, we can sneak this change into those new versions instead of having to create an additional generation of update scripts. Patch by me, reviewed by Alexey Bashtanov Discussion: https://postgr.es/m/12237.1582833074@sss.pgh.pa.us
6 years ago
args, varRelid,
0.001);
PG_RETURN_FLOAT8((float8) selec);
}