You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
postgres/contrib/ltree/ltree_io.c

686 lines
15 KiB

/*
* in/out function for ltree and lquery
* Teodor Sigaev <teodor@stack.net>
* contrib/ltree/ltree_io.c
*/
#include "postgres.h"
#include <ctype.h>
#include "crc32.h"
#include "ltree.h"
#include "utils/memutils.h"
PG_FUNCTION_INFO_V1(ltree_in);
PG_FUNCTION_INFO_V1(ltree_out);
PG_FUNCTION_INFO_V1(lquery_in);
PG_FUNCTION_INFO_V1(lquery_out);
23 years ago
typedef struct
{
char *start;
int len; /* length in bytes */
23 years ago
int flag;
int wlen; /* length in characters */
} nodeitem;
#define LTPRS_WAITNAME 0
23 years ago
#define LTPRS_WAITDELIM 1
Datum
ltree_in(PG_FUNCTION_ARGS)
{
char *buf = (char *) PG_GETARG_POINTER(0);
char *ptr;
nodeitem *list,
*lptr;
int num = 0,
totallen = 0;
int state = LTPRS_WAITNAME;
ltree *result;
ltree_level *curlevel;
int charlen;
int pos = 1; /* character position for error messages */
#define UNCHAR ereport(ERROR, \
errcode(ERRCODE_SYNTAX_ERROR), \
errmsg("ltree syntax error at character %d", \
pos))
23 years ago
ptr = buf;
while (*ptr)
{
charlen = pg_mblen(ptr);
if (charlen == 1 && t_iseq(ptr, '.'))
num++;
ptr += charlen;
}
if (num + 1 > LTREE_MAX_LEVELS)
ereport(ERROR,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("number of ltree labels (%d) exceeds the maximum allowed (%d)",
num + 1, LTREE_MAX_LEVELS)));
23 years ago
list = lptr = (nodeitem *) palloc(sizeof(nodeitem) * (num + 1));
ptr = buf;
while (*ptr)
{
charlen = pg_mblen(ptr);
23 years ago
if (state == LTPRS_WAITNAME)
{
if (ISALNUM(ptr))
23 years ago
{
lptr->start = ptr;
lptr->wlen = 0;
state = LTPRS_WAITDELIM;
23 years ago
}
else
UNCHAR;
23 years ago
}
else if (state == LTPRS_WAITDELIM)
{
if (charlen == 1 && t_iseq(ptr, '.'))
23 years ago
{
lptr->len = ptr - lptr->start;
if (lptr->wlen > LTREE_LABEL_MAX_CHARS)
ereport(ERROR,
(errcode(ERRCODE_NAME_TOO_LONG),
errmsg("label string is too long"),
errdetail("Label length is %d, must be at most %d, at character %d.",
lptr->wlen, LTREE_LABEL_MAX_CHARS,
pos)));
totallen += MAXALIGN(lptr->len + LEVEL_HDRSIZE);
lptr++;
state = LTPRS_WAITNAME;
23 years ago
}
else if (!ISALNUM(ptr))
UNCHAR;
23 years ago
}
else
/* internal error */
elog(ERROR, "internal error in parser");
ptr += charlen;
lptr->wlen++;
pos++;
}
23 years ago
if (state == LTPRS_WAITDELIM)
{
lptr->len = ptr - lptr->start;
if (lptr->wlen > LTREE_LABEL_MAX_CHARS)
ereport(ERROR,
(errcode(ERRCODE_NAME_TOO_LONG),
errmsg("label string is too long"),
errdetail("Label length is %d, must be at most %d, at character %d.",
lptr->wlen, LTREE_LABEL_MAX_CHARS, pos)));
totallen += MAXALIGN(lptr->len + LEVEL_HDRSIZE);
lptr++;
23 years ago
}
else if (!(state == LTPRS_WAITNAME && lptr == list))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("ltree syntax error"),
errdetail("Unexpected end of input.")));
result = (ltree *) palloc0(LTREE_HDRSIZE + totallen);
SET_VARSIZE(result, LTREE_HDRSIZE + totallen);
23 years ago
result->numlevel = lptr - list;
curlevel = LTREE_FIRST(result);
23 years ago
lptr = list;
while (lptr - list < result->numlevel)
{
curlevel->len = (uint16) lptr->len;
23 years ago
memcpy(curlevel->name, lptr->start, lptr->len);
curlevel = LEVEL_NEXT(curlevel);
lptr++;
}
pfree(list);
PG_RETURN_POINTER(result);
#undef UNCHAR
}
23 years ago
Datum
ltree_out(PG_FUNCTION_ARGS)
{
ltree *in = PG_GETARG_LTREE_P(0);
23 years ago
char *buf,
*ptr;
int i;
ltree_level *curlevel;
ptr = buf = (char *) palloc(VARSIZE(in));
curlevel = LTREE_FIRST(in);
23 years ago
for (i = 0; i < in->numlevel; i++)
{
if (i != 0)
{
*ptr = '.';
ptr++;
}
23 years ago
memcpy(ptr, curlevel->name, curlevel->len);
ptr += curlevel->len;
curlevel = LEVEL_NEXT(curlevel);
}
23 years ago
*ptr = '\0';
PG_FREE_IF_COPY(in, 0);
PG_RETURN_POINTER(buf);
}
23 years ago
#define LQPRS_WAITLEVEL 0
#define LQPRS_WAITDELIM 1
#define LQPRS_WAITOPEN 2
#define LQPRS_WAITFNUM 3
#define LQPRS_WAITSNUM 4
#define LQPRS_WAITND 5
#define LQPRS_WAITCLOSE 6
#define LQPRS_WAITEND 7
#define LQPRS_WAITVAR 8
23 years ago
#define GETVAR(x) ( *((nodeitem**)LQL_FIRST(x)) )
#define ITEMSIZE MAXALIGN(LQL_HDRSIZE+sizeof(nodeitem*))
#define NEXTLEV(x) ( (lquery_level*)( ((char*)(x)) + ITEMSIZE) )
Datum
lquery_in(PG_FUNCTION_ARGS)
{
char *buf = (char *) PG_GETARG_POINTER(0);
char *ptr;
int num = 0,
totallen = 0,
numOR = 0;
int state = LQPRS_WAITLEVEL;
lquery *result;
nodeitem *lptr = NULL;
lquery_level *cur,
*curqlevel,
*tmpql;
lquery_variant *lrptr = NULL;
bool hasnot = false;
bool wasbad = false;
int charlen;
int pos = 1; /* character position for error messages */
#define UNCHAR ereport(ERROR, \
errcode(ERRCODE_SYNTAX_ERROR), \
errmsg("lquery syntax error at character %d", \
pos))
23 years ago
ptr = buf;
while (*ptr)
{
charlen = pg_mblen(ptr);
if (charlen == 1)
{
if (t_iseq(ptr, '.'))
num++;
else if (t_iseq(ptr, '|'))
numOR++;
}
ptr += charlen;
}
23 years ago
num++;
if (num > LQUERY_MAX_LEVELS)
ereport(ERROR,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("number of lquery items (%d) exceeds the maximum allowed (%d)",
num, LQUERY_MAX_LEVELS)));
curqlevel = tmpql = (lquery_level *) palloc0(ITEMSIZE * num);
23 years ago
ptr = buf;
while (*ptr)
{
charlen = pg_mblen(ptr);
23 years ago
if (state == LQPRS_WAITLEVEL)
{
if (ISALNUM(ptr))
23 years ago
{
GETVAR(curqlevel) = lptr = (nodeitem *) palloc0(sizeof(nodeitem) * (numOR + 1));
lptr->start = ptr;
state = LQPRS_WAITDELIM;
curqlevel->numvar = 1;
23 years ago
}
else if (charlen == 1 && t_iseq(ptr, '!'))
23 years ago
{
GETVAR(curqlevel) = lptr = (nodeitem *) palloc0(sizeof(nodeitem) * (numOR + 1));
23 years ago
lptr->start = ptr + 1;
state = LQPRS_WAITDELIM;
curqlevel->numvar = 1;
curqlevel->flag |= LQL_NOT;
23 years ago
hasnot = true;
}
else if (charlen == 1 && t_iseq(ptr, '*'))
state = LQPRS_WAITOPEN;
23 years ago
else
UNCHAR;
23 years ago
}
else if (state == LQPRS_WAITVAR)
{
if (ISALNUM(ptr))
23 years ago
{
lptr++;
lptr->start = ptr;
state = LQPRS_WAITDELIM;
curqlevel->numvar++;
23 years ago
}
else
UNCHAR;
23 years ago
}
else if (state == LQPRS_WAITDELIM)
{
if (charlen == 1 && t_iseq(ptr, '@'))
23 years ago
{
if (lptr->start == ptr)
UNCHAR;
lptr->flag |= LVAR_INCASE;
curqlevel->flag |= LVAR_INCASE;
23 years ago
}
else if (charlen == 1 && t_iseq(ptr, '*'))
23 years ago
{
if (lptr->start == ptr)
UNCHAR;
lptr->flag |= LVAR_ANYEND;
curqlevel->flag |= LVAR_ANYEND;
23 years ago
}
else if (charlen == 1 && t_iseq(ptr, '%'))
23 years ago
{
if (lptr->start == ptr)
UNCHAR;
lptr->flag |= LVAR_SUBLEXEME;
curqlevel->flag |= LVAR_SUBLEXEME;
23 years ago
}
else if (charlen == 1 && t_iseq(ptr, '|'))
23 years ago
{
lptr->len = ptr - lptr->start -
((lptr->flag & LVAR_SUBLEXEME) ? 1 : 0) -
23 years ago
((lptr->flag & LVAR_INCASE) ? 1 : 0) -
((lptr->flag & LVAR_ANYEND) ? 1 : 0);
if (lptr->wlen > LTREE_LABEL_MAX_CHARS)
ereport(ERROR,
(errcode(ERRCODE_NAME_TOO_LONG),
errmsg("label string is too long"),
errdetail("Label length is %d, must be at most %d, at character %d.",
lptr->wlen, LTREE_LABEL_MAX_CHARS,
pos)));
state = LQPRS_WAITVAR;
23 years ago
}
Fix lquery's NOT handling, and add ability to quantify non-'*' items. The existing implementation of the ltree ~ lquery match operator is sufficiently complex and undocumented that it's hard to tell exactly what it does. But one thing it clearly gets wrong is the combination of NOT symbols (!) and '*' symbols. A pattern such as '*.!foo.*' should, by any ordinary understanding of regular expression behavior, match any ltree that has at least one label that's not "foo". As best we can tell by experimentation, what it's actually matching is any ltree in which *no* label is "foo". That's surprising, and not at all what the documentation says. Now, that's arguably a useful behavior, so if we rewrite to fix the bug we should provide some other way to get it. To do so, add the ability to attach lquery quantifiers to non-'*' items as well as '*'s. Then the pattern '!foo{,}' expresses "any ltree in which no label is foo". For backwards compatibility, the default quantifier for non-'*' items has to be "{1}", although the default for '*' items is '{,}'. I wouldn't have done it like that in a green field, but it's not totally horrible. Armed with that, rewrite checkCond() from scratch. Treating '*' and non-'*' items alike makes it simpler, not more complicated, so that the function actually gets a lot shorter than it was. Filip Rembiałkowski, Tom Lane, Nikita Glukhov, per a very ancient bug report from M. Palm Discussion: https://postgr.es/m/CAP_rww=waX2Oo6q+MbMSiZ9ktdj6eaJj0cQzNu=Ry2cCDij5fw@mail.gmail.com
6 years ago
else if (charlen == 1 && t_iseq(ptr, '{'))
{
lptr->len = ptr - lptr->start -
((lptr->flag & LVAR_SUBLEXEME) ? 1 : 0) -
((lptr->flag & LVAR_INCASE) ? 1 : 0) -
((lptr->flag & LVAR_ANYEND) ? 1 : 0);
if (lptr->wlen > LTREE_LABEL_MAX_CHARS)
ereport(ERROR,
(errcode(ERRCODE_NAME_TOO_LONG),
errmsg("label string is too long"),
errdetail("Label length is %d, must be at most %d, at character %d.",
lptr->wlen, LTREE_LABEL_MAX_CHARS,
pos)));
curqlevel->flag |= LQL_COUNT;
state = LQPRS_WAITFNUM;
}
else if (charlen == 1 && t_iseq(ptr, '.'))
23 years ago
{
lptr->len = ptr - lptr->start -
((lptr->flag & LVAR_SUBLEXEME) ? 1 : 0) -
23 years ago
((lptr->flag & LVAR_INCASE) ? 1 : 0) -
((lptr->flag & LVAR_ANYEND) ? 1 : 0);
if (lptr->wlen > LTREE_LABEL_MAX_CHARS)
ereport(ERROR,
(errcode(ERRCODE_NAME_TOO_LONG),
errmsg("label string is too long"),
errdetail("Label length is %d, must be at most %d, at character %d.",
lptr->wlen, LTREE_LABEL_MAX_CHARS,
pos)));
state = LQPRS_WAITLEVEL;
curqlevel = NEXTLEV(curqlevel);
23 years ago
}
else if (ISALNUM(ptr))
23 years ago
{
if (lptr->flag)
UNCHAR;
23 years ago
}
else
UNCHAR;
23 years ago
}
else if (state == LQPRS_WAITOPEN)
{
if (charlen == 1 && t_iseq(ptr, '{'))
state = LQPRS_WAITFNUM;
else if (charlen == 1 && t_iseq(ptr, '.'))
23 years ago
{
Fix lquery's NOT handling, and add ability to quantify non-'*' items. The existing implementation of the ltree ~ lquery match operator is sufficiently complex and undocumented that it's hard to tell exactly what it does. But one thing it clearly gets wrong is the combination of NOT symbols (!) and '*' symbols. A pattern such as '*.!foo.*' should, by any ordinary understanding of regular expression behavior, match any ltree that has at least one label that's not "foo". As best we can tell by experimentation, what it's actually matching is any ltree in which *no* label is "foo". That's surprising, and not at all what the documentation says. Now, that's arguably a useful behavior, so if we rewrite to fix the bug we should provide some other way to get it. To do so, add the ability to attach lquery quantifiers to non-'*' items as well as '*'s. Then the pattern '!foo{,}' expresses "any ltree in which no label is foo". For backwards compatibility, the default quantifier for non-'*' items has to be "{1}", although the default for '*' items is '{,}'. I wouldn't have done it like that in a green field, but it's not totally horrible. Armed with that, rewrite checkCond() from scratch. Treating '*' and non-'*' items alike makes it simpler, not more complicated, so that the function actually gets a lot shorter than it was. Filip Rembiałkowski, Tom Lane, Nikita Glukhov, per a very ancient bug report from M. Palm Discussion: https://postgr.es/m/CAP_rww=waX2Oo6q+MbMSiZ9ktdj6eaJj0cQzNu=Ry2cCDij5fw@mail.gmail.com
6 years ago
/* We only get here for '*', so these are correct defaults */
23 years ago
curqlevel->low = 0;
curqlevel->high = LTREE_MAX_LEVELS;
curqlevel = NEXTLEV(curqlevel);
state = LQPRS_WAITLEVEL;
23 years ago
}
else
UNCHAR;
23 years ago
}
else if (state == LQPRS_WAITFNUM)
{
if (charlen == 1 && t_iseq(ptr, ','))
23 years ago
state = LQPRS_WAITSNUM;
else if (t_isdigit(ptr))
23 years ago
{
int low = atoi(ptr);
if (low < 0 || low > LTREE_MAX_LEVELS)
ereport(ERROR,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("lquery syntax error"),
errdetail("Low limit (%d) exceeds the maximum allowed (%d), at character %d.",
low, LTREE_MAX_LEVELS, pos)));
curqlevel->low = (uint16) low;
state = LQPRS_WAITND;
23 years ago
}
else
UNCHAR;
}
else if (state == LQPRS_WAITSNUM)
{
if (t_isdigit(ptr))
23 years ago
{
int high = atoi(ptr);
if (high < 0 || high > LTREE_MAX_LEVELS)
ereport(ERROR,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("lquery syntax error"),
errdetail("High limit (%d) exceeds the maximum allowed (%d), at character %d.",
high, LTREE_MAX_LEVELS, pos)));
else if (curqlevel->low > high)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("lquery syntax error"),
errdetail("Low limit (%d) is greater than high limit (%d), at character %d.",
curqlevel->low, high, pos)));
curqlevel->high = (uint16) high;
23 years ago
state = LQPRS_WAITCLOSE;
}
else if (charlen == 1 && t_iseq(ptr, '}'))
23 years ago
{
curqlevel->high = LTREE_MAX_LEVELS;
state = LQPRS_WAITEND;
23 years ago
}
else
UNCHAR;
23 years ago
}
else if (state == LQPRS_WAITCLOSE)
{
if (charlen == 1 && t_iseq(ptr, '}'))
state = LQPRS_WAITEND;
else if (!t_isdigit(ptr))
UNCHAR;
23 years ago
}
else if (state == LQPRS_WAITND)
{
if (charlen == 1 && t_iseq(ptr, '}'))
23 years ago
{
curqlevel->high = curqlevel->low;
state = LQPRS_WAITEND;
23 years ago
}
else if (charlen == 1 && t_iseq(ptr, ','))
state = LQPRS_WAITSNUM;
else if (!t_isdigit(ptr))
UNCHAR;
23 years ago
}
else if (state == LQPRS_WAITEND)
{
if (charlen == 1 && t_iseq(ptr, '.'))
23 years ago
{
state = LQPRS_WAITLEVEL;
curqlevel = NEXTLEV(curqlevel);
23 years ago
}
else
UNCHAR;
23 years ago
}
else
/* internal error */
elog(ERROR, "internal error in parser");
ptr += charlen;
if (state == LQPRS_WAITDELIM)
lptr->wlen++;
pos++;
}
23 years ago
if (state == LQPRS_WAITDELIM)
{
if (lptr->start == ptr)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("lquery syntax error"),
errdetail("Unexpected end of input.")));
lptr->len = ptr - lptr->start -
((lptr->flag & LVAR_SUBLEXEME) ? 1 : 0) -
23 years ago
((lptr->flag & LVAR_INCASE) ? 1 : 0) -
((lptr->flag & LVAR_ANYEND) ? 1 : 0);
if (lptr->len == 0)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("lquery syntax error"),
errdetail("Unexpected end of input.")));
if (lptr->wlen > LTREE_LABEL_MAX_CHARS)
ereport(ERROR,
(errcode(ERRCODE_NAME_TOO_LONG),
errmsg("label string is too long"),
errdetail("Label length is %d, must be at most %d, at character %d.",
lptr->wlen, LTREE_LABEL_MAX_CHARS, pos)));
23 years ago
}
else if (state == LQPRS_WAITOPEN)
curqlevel->high = LTREE_MAX_LEVELS;
23 years ago
else if (state != LQPRS_WAITEND)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("lquery syntax error"),
errdetail("Unexpected end of input.")));
23 years ago
curqlevel = tmpql;
23 years ago
totallen = LQUERY_HDRSIZE;
while ((char *) curqlevel - (char *) tmpql < num * ITEMSIZE)
{
totallen += LQL_HDRSIZE;
if (curqlevel->numvar)
{
lptr = GETVAR(curqlevel);
23 years ago
while (lptr - GETVAR(curqlevel) < curqlevel->numvar)
{
totallen += MAXALIGN(LVAR_HDRSIZE + lptr->len);
lptr++;
}
23 years ago
}
curqlevel = NEXTLEV(curqlevel);
}
result = (lquery *) palloc0(totallen);
SET_VARSIZE(result, totallen);
result->numlevel = num;
result->firstgood = 0;
23 years ago
result->flag = 0;
if (hasnot)
result->flag |= LQUERY_HASNOT;
cur = LQUERY_FIRST(result);
curqlevel = tmpql;
23 years ago
while ((char *) curqlevel - (char *) tmpql < num * ITEMSIZE)
{
memcpy(cur, curqlevel, LQL_HDRSIZE);
cur->totallen = LQL_HDRSIZE;
if (curqlevel->numvar)
{
lrptr = LQL_FIRST(cur);
lptr = GETVAR(curqlevel);
23 years ago
while (lptr - GETVAR(curqlevel) < curqlevel->numvar)
{
cur->totallen += MAXALIGN(LVAR_HDRSIZE + lptr->len);
23 years ago
lrptr->len = lptr->len;
lrptr->flag = lptr->flag;
lrptr->val = ltree_crc32_sz(lptr->start, lptr->len);
23 years ago
memcpy(lrptr->name, lptr->start, lptr->len);
lptr++;
23 years ago
lrptr = LVAR_NEXT(lrptr);
}
23 years ago
pfree(GETVAR(curqlevel));
if (cur->numvar > 1 || cur->flag != 0)
{
/* Not a simple match */
23 years ago
wasbad = true;
}
23 years ago
else if (wasbad == false)
{
/* count leading simple matches */
23 years ago
(result->firstgood)++;
}
23 years ago
}
else
{
/* '*', so this isn't a simple match */
23 years ago
wasbad = true;
}
curqlevel = NEXTLEV(curqlevel);
cur = LQL_NEXT(cur);
}
pfree(tmpql);
PG_RETURN_POINTER(result);
#undef UNCHAR
}
23 years ago
Datum
lquery_out(PG_FUNCTION_ARGS)
{
lquery *in = PG_GETARG_LQUERY_P(0);
23 years ago
char *buf,
*ptr;
int i,
j,
totallen = 1;
23 years ago
lquery_level *curqlevel;
lquery_variant *curtlevel;
curqlevel = LQUERY_FIRST(in);
23 years ago
for (i = 0; i < in->numlevel; i++)
{
totallen++;
23 years ago
if (curqlevel->numvar)
Fix lquery's NOT handling, and add ability to quantify non-'*' items. The existing implementation of the ltree ~ lquery match operator is sufficiently complex and undocumented that it's hard to tell exactly what it does. But one thing it clearly gets wrong is the combination of NOT symbols (!) and '*' symbols. A pattern such as '*.!foo.*' should, by any ordinary understanding of regular expression behavior, match any ltree that has at least one label that's not "foo". As best we can tell by experimentation, what it's actually matching is any ltree in which *no* label is "foo". That's surprising, and not at all what the documentation says. Now, that's arguably a useful behavior, so if we rewrite to fix the bug we should provide some other way to get it. To do so, add the ability to attach lquery quantifiers to non-'*' items as well as '*'s. Then the pattern '!foo{,}' expresses "any ltree in which no label is foo". For backwards compatibility, the default quantifier for non-'*' items has to be "{1}", although the default for '*' items is '{,}'. I wouldn't have done it like that in a green field, but it's not totally horrible. Armed with that, rewrite checkCond() from scratch. Treating '*' and non-'*' items alike makes it simpler, not more complicated, so that the function actually gets a lot shorter than it was. Filip Rembiałkowski, Tom Lane, Nikita Glukhov, per a very ancient bug report from M. Palm Discussion: https://postgr.es/m/CAP_rww=waX2Oo6q+MbMSiZ9ktdj6eaJj0cQzNu=Ry2cCDij5fw@mail.gmail.com
6 years ago
{
totallen += 1 + (curqlevel->numvar * 4) + curqlevel->totallen;
Fix lquery's NOT handling, and add ability to quantify non-'*' items. The existing implementation of the ltree ~ lquery match operator is sufficiently complex and undocumented that it's hard to tell exactly what it does. But one thing it clearly gets wrong is the combination of NOT symbols (!) and '*' symbols. A pattern such as '*.!foo.*' should, by any ordinary understanding of regular expression behavior, match any ltree that has at least one label that's not "foo". As best we can tell by experimentation, what it's actually matching is any ltree in which *no* label is "foo". That's surprising, and not at all what the documentation says. Now, that's arguably a useful behavior, so if we rewrite to fix the bug we should provide some other way to get it. To do so, add the ability to attach lquery quantifiers to non-'*' items as well as '*'s. Then the pattern '!foo{,}' expresses "any ltree in which no label is foo". For backwards compatibility, the default quantifier for non-'*' items has to be "{1}", although the default for '*' items is '{,}'. I wouldn't have done it like that in a green field, but it's not totally horrible. Armed with that, rewrite checkCond() from scratch. Treating '*' and non-'*' items alike makes it simpler, not more complicated, so that the function actually gets a lot shorter than it was. Filip Rembiałkowski, Tom Lane, Nikita Glukhov, per a very ancient bug report from M. Palm Discussion: https://postgr.es/m/CAP_rww=waX2Oo6q+MbMSiZ9ktdj6eaJj0cQzNu=Ry2cCDij5fw@mail.gmail.com
6 years ago
if (curqlevel->flag & LQL_COUNT)
totallen += 2 * 11 + 3;
}
else
totallen += 2 * 11 + 4;
curqlevel = LQL_NEXT(curqlevel);
}
23 years ago
ptr = buf = (char *) palloc(totallen);
curqlevel = LQUERY_FIRST(in);
23 years ago
for (i = 0; i < in->numlevel; i++)
{
if (i != 0)
{
*ptr = '.';
ptr++;
}
23 years ago
if (curqlevel->numvar)
{
if (curqlevel->flag & LQL_NOT)
{
*ptr = '!';
ptr++;
}
curtlevel = LQL_FIRST(curqlevel);
23 years ago
for (j = 0; j < curqlevel->numvar; j++)
{
if (j != 0)
{
*ptr = '|';
ptr++;
}
23 years ago
memcpy(ptr, curtlevel->name, curtlevel->len);
ptr += curtlevel->len;
if ((curtlevel->flag & LVAR_SUBLEXEME))
23 years ago
{
*ptr = '%';
ptr++;
}
23 years ago
if ((curtlevel->flag & LVAR_INCASE))
{
*ptr = '@';
ptr++;
}
23 years ago
if ((curtlevel->flag & LVAR_ANYEND))
{
*ptr = '*';
ptr++;
}
curtlevel = LVAR_NEXT(curtlevel);
}
23 years ago
}
else
Fix lquery's NOT handling, and add ability to quantify non-'*' items. The existing implementation of the ltree ~ lquery match operator is sufficiently complex and undocumented that it's hard to tell exactly what it does. But one thing it clearly gets wrong is the combination of NOT symbols (!) and '*' symbols. A pattern such as '*.!foo.*' should, by any ordinary understanding of regular expression behavior, match any ltree that has at least one label that's not "foo". As best we can tell by experimentation, what it's actually matching is any ltree in which *no* label is "foo". That's surprising, and not at all what the documentation says. Now, that's arguably a useful behavior, so if we rewrite to fix the bug we should provide some other way to get it. To do so, add the ability to attach lquery quantifiers to non-'*' items as well as '*'s. Then the pattern '!foo{,}' expresses "any ltree in which no label is foo". For backwards compatibility, the default quantifier for non-'*' items has to be "{1}", although the default for '*' items is '{,}'. I wouldn't have done it like that in a green field, but it's not totally horrible. Armed with that, rewrite checkCond() from scratch. Treating '*' and non-'*' items alike makes it simpler, not more complicated, so that the function actually gets a lot shorter than it was. Filip Rembiałkowski, Tom Lane, Nikita Glukhov, per a very ancient bug report from M. Palm Discussion: https://postgr.es/m/CAP_rww=waX2Oo6q+MbMSiZ9ktdj6eaJj0cQzNu=Ry2cCDij5fw@mail.gmail.com
6 years ago
{
*ptr = '*';
ptr++;
}
if ((curqlevel->flag & LQL_COUNT) || curqlevel->numvar == 0)
23 years ago
{
if (curqlevel->low == curqlevel->high)
{
Fix lquery's NOT handling, and add ability to quantify non-'*' items. The existing implementation of the ltree ~ lquery match operator is sufficiently complex and undocumented that it's hard to tell exactly what it does. But one thing it clearly gets wrong is the combination of NOT symbols (!) and '*' symbols. A pattern such as '*.!foo.*' should, by any ordinary understanding of regular expression behavior, match any ltree that has at least one label that's not "foo". As best we can tell by experimentation, what it's actually matching is any ltree in which *no* label is "foo". That's surprising, and not at all what the documentation says. Now, that's arguably a useful behavior, so if we rewrite to fix the bug we should provide some other way to get it. To do so, add the ability to attach lquery quantifiers to non-'*' items as well as '*'s. Then the pattern '!foo{,}' expresses "any ltree in which no label is foo". For backwards compatibility, the default quantifier for non-'*' items has to be "{1}", although the default for '*' items is '{,}'. I wouldn't have done it like that in a green field, but it's not totally horrible. Armed with that, rewrite checkCond() from scratch. Treating '*' and non-'*' items alike makes it simpler, not more complicated, so that the function actually gets a lot shorter than it was. Filip Rembiałkowski, Tom Lane, Nikita Glukhov, per a very ancient bug report from M. Palm Discussion: https://postgr.es/m/CAP_rww=waX2Oo6q+MbMSiZ9ktdj6eaJj0cQzNu=Ry2cCDij5fw@mail.gmail.com
6 years ago
sprintf(ptr, "{%d}", curqlevel->low);
23 years ago
}
else if (curqlevel->low == 0)
{
if (curqlevel->high == LTREE_MAX_LEVELS)
23 years ago
{
Fix lquery's NOT handling, and add ability to quantify non-'*' items. The existing implementation of the ltree ~ lquery match operator is sufficiently complex and undocumented that it's hard to tell exactly what it does. But one thing it clearly gets wrong is the combination of NOT symbols (!) and '*' symbols. A pattern such as '*.!foo.*' should, by any ordinary understanding of regular expression behavior, match any ltree that has at least one label that's not "foo". As best we can tell by experimentation, what it's actually matching is any ltree in which *no* label is "foo". That's surprising, and not at all what the documentation says. Now, that's arguably a useful behavior, so if we rewrite to fix the bug we should provide some other way to get it. To do so, add the ability to attach lquery quantifiers to non-'*' items as well as '*'s. Then the pattern '!foo{,}' expresses "any ltree in which no label is foo". For backwards compatibility, the default quantifier for non-'*' items has to be "{1}", although the default for '*' items is '{,}'. I wouldn't have done it like that in a green field, but it's not totally horrible. Armed with that, rewrite checkCond() from scratch. Treating '*' and non-'*' items alike makes it simpler, not more complicated, so that the function actually gets a lot shorter than it was. Filip Rembiałkowski, Tom Lane, Nikita Glukhov, per a very ancient bug report from M. Palm Discussion: https://postgr.es/m/CAP_rww=waX2Oo6q+MbMSiZ9ktdj6eaJj0cQzNu=Ry2cCDij5fw@mail.gmail.com
6 years ago
if (curqlevel->numvar == 0)
{
/* This is default for '*', so print nothing */
*ptr = '\0';
}
else
sprintf(ptr, "{,}");
23 years ago
}
else
Fix lquery's NOT handling, and add ability to quantify non-'*' items. The existing implementation of the ltree ~ lquery match operator is sufficiently complex and undocumented that it's hard to tell exactly what it does. But one thing it clearly gets wrong is the combination of NOT symbols (!) and '*' symbols. A pattern such as '*.!foo.*' should, by any ordinary understanding of regular expression behavior, match any ltree that has at least one label that's not "foo". As best we can tell by experimentation, what it's actually matching is any ltree in which *no* label is "foo". That's surprising, and not at all what the documentation says. Now, that's arguably a useful behavior, so if we rewrite to fix the bug we should provide some other way to get it. To do so, add the ability to attach lquery quantifiers to non-'*' items as well as '*'s. Then the pattern '!foo{,}' expresses "any ltree in which no label is foo". For backwards compatibility, the default quantifier for non-'*' items has to be "{1}", although the default for '*' items is '{,}'. I wouldn't have done it like that in a green field, but it's not totally horrible. Armed with that, rewrite checkCond() from scratch. Treating '*' and non-'*' items alike makes it simpler, not more complicated, so that the function actually gets a lot shorter than it was. Filip Rembiałkowski, Tom Lane, Nikita Glukhov, per a very ancient bug report from M. Palm Discussion: https://postgr.es/m/CAP_rww=waX2Oo6q+MbMSiZ9ktdj6eaJj0cQzNu=Ry2cCDij5fw@mail.gmail.com
6 years ago
sprintf(ptr, "{,%d}", curqlevel->high);
23 years ago
}
else if (curqlevel->high == LTREE_MAX_LEVELS)
23 years ago
{
Fix lquery's NOT handling, and add ability to quantify non-'*' items. The existing implementation of the ltree ~ lquery match operator is sufficiently complex and undocumented that it's hard to tell exactly what it does. But one thing it clearly gets wrong is the combination of NOT symbols (!) and '*' symbols. A pattern such as '*.!foo.*' should, by any ordinary understanding of regular expression behavior, match any ltree that has at least one label that's not "foo". As best we can tell by experimentation, what it's actually matching is any ltree in which *no* label is "foo". That's surprising, and not at all what the documentation says. Now, that's arguably a useful behavior, so if we rewrite to fix the bug we should provide some other way to get it. To do so, add the ability to attach lquery quantifiers to non-'*' items as well as '*'s. Then the pattern '!foo{,}' expresses "any ltree in which no label is foo". For backwards compatibility, the default quantifier for non-'*' items has to be "{1}", although the default for '*' items is '{,}'. I wouldn't have done it like that in a green field, but it's not totally horrible. Armed with that, rewrite checkCond() from scratch. Treating '*' and non-'*' items alike makes it simpler, not more complicated, so that the function actually gets a lot shorter than it was. Filip Rembiałkowski, Tom Lane, Nikita Glukhov, per a very ancient bug report from M. Palm Discussion: https://postgr.es/m/CAP_rww=waX2Oo6q+MbMSiZ9ktdj6eaJj0cQzNu=Ry2cCDij5fw@mail.gmail.com
6 years ago
sprintf(ptr, "{%d,}", curqlevel->low);
23 years ago
}
else
Fix lquery's NOT handling, and add ability to quantify non-'*' items. The existing implementation of the ltree ~ lquery match operator is sufficiently complex and undocumented that it's hard to tell exactly what it does. But one thing it clearly gets wrong is the combination of NOT symbols (!) and '*' symbols. A pattern such as '*.!foo.*' should, by any ordinary understanding of regular expression behavior, match any ltree that has at least one label that's not "foo". As best we can tell by experimentation, what it's actually matching is any ltree in which *no* label is "foo". That's surprising, and not at all what the documentation says. Now, that's arguably a useful behavior, so if we rewrite to fix the bug we should provide some other way to get it. To do so, add the ability to attach lquery quantifiers to non-'*' items as well as '*'s. Then the pattern '!foo{,}' expresses "any ltree in which no label is foo". For backwards compatibility, the default quantifier for non-'*' items has to be "{1}", although the default for '*' items is '{,}'. I wouldn't have done it like that in a green field, but it's not totally horrible. Armed with that, rewrite checkCond() from scratch. Treating '*' and non-'*' items alike makes it simpler, not more complicated, so that the function actually gets a lot shorter than it was. Filip Rembiałkowski, Tom Lane, Nikita Glukhov, per a very ancient bug report from M. Palm Discussion: https://postgr.es/m/CAP_rww=waX2Oo6q+MbMSiZ9ktdj6eaJj0cQzNu=Ry2cCDij5fw@mail.gmail.com
6 years ago
sprintf(ptr, "{%d,%d}", curqlevel->low, curqlevel->high);
23 years ago
ptr = strchr(ptr, '\0');
}
curqlevel = LQL_NEXT(curqlevel);
}
23 years ago
*ptr = '\0';
PG_FREE_IF_COPY(in, 0);
PG_RETURN_POINTER(buf);
}