@ -4,6 +4,9 @@
* jsonpath_scan.l
* jsonpath_scan.l
* Lexical parser for jsonpath datatype
* Lexical parser for jsonpath datatype
*
*
* Splits jsonpath string into tokens represented as JsonPathString structs.
* Decodes unicode and hex escaped strings.
*
* Copyright (c) 2019, PostgreSQL Global Development Group
* Copyright (c) 2019, PostgreSQL Global Development Group
*
*
* IDENTIFICATION
* IDENTIFICATION
@ -19,9 +22,6 @@
static JsonPathString scanstring;
static JsonPathString scanstring;
/* No reason to constrain amount of data slurped */
/* #define YY_READ_BUF_SIZE 16777216 */
/* Handles to the buffer that the lexer uses internally */
/* Handles to the buffer that the lexer uses internally */
static YY_BUFFER_STATE scanbufhandle;
static YY_BUFFER_STATE scanbufhandle;
static char *scanbuf;
static char *scanbuf;
@ -29,9 +29,7 @@ static int scanbuflen;
static void addstring(bool init, char *s, int l);
static void addstring(bool init, char *s, int l);
static void addchar(bool init, char s);
static void addchar(bool init, char s);
static int checkSpecialVal(void); /* examine scanstring for the special
static enum yytokentype checkKeyword(void);
* value */
static void parseUnicode(char *s, int l);
static void parseUnicode(char *s, int l);
static void parseHexChars(char *s, int l);
static void parseHexChars(char *s, int l);
@ -60,11 +58,22 @@ fprintf_to_ereport(const char *fmt, const char *msg)
%option noyyrealloc
%option noyyrealloc
%option noyyfree
%option noyyfree
%x xQUOTED
/*
%x xNONQUOTED
* We use exclusive states for quoted, signle-quoted and non-quoted strings,
%x xVARQUOTED
* quoted variable names and C-tyle comments.
%x xSINGLEQUOTED
* Exclusive states:
%x xCOMMENT
* <xq> - quoted strings
* <xnq> - non-quoted strings
* <xvq> - quoted variable names
* <xsq> - single-quoted strings
* <xc> - C-style comment
*/
%x xq
%x xnq
%x xvq
%x xsq
%x xc
special [\?\%\$\.\[\]\{\}\(\)\|\&\!\=\<\>\@\#\,\*:\-\+\/]
special [\?\%\$\.\[\]\{\}\(\)\|\&\!\=\<\>\@\#\,\*:\-\+\/]
any [^\?\%\$\.\[\]\{\}\(\)\|\&\!\=\<\>\@\#\,\*:\-\+\/\\\"\' \t\n\r\f]
any [^\?\%\$\.\[\]\{\}\(\)\|\&\!\=\<\>\@\#\,\*:\-\+\/\\\"\' \t\n\r\f]
@ -73,189 +82,188 @@ hex_dig [0-9A-Fa-f]
unicode \\u({hex_dig}{4}|\{{hex_dig}{1,6}\})
unicode \\u({hex_dig}{4}|\{{hex_dig}{1,6}\})
hex_char \\x{hex_dig}{2}
hex_char \\x{hex_dig}{2}
%%
%%
<INITIAL>\&\& { return AND_P; }
<xnq>{any}+ {
addstring(false, yytext, yyleng);
<INITIAL>\|\| { return OR_P; }
}
<INITIAL>\! { return NOT_P; }
<INITIAL>\*\* { return ANY_P; }
<INITIAL>\< { return LESS_P; }
<INITIAL>\<\= { return LESSEQUAL_P; }
<INITIAL>\=\= { return EQUAL_P; }
<INITIAL>\<\> { return NOTEQUAL_P; }
<INITIAL>\!\= { return NOTEQUAL_P; }
<xnq>{blank}+ {
yylval->str = scanstring;
BEGIN INITIAL;
return checkKeyword();
}
<INITIAL>\>\= { return GREATEREQUAL_P; }
<INITIAL>\> { return GREATER_P; }
<xnq>\/\* {
yylval->str = scanstring;
BEGIN xc;
}
<INITIAL>\${any}+ {
<xnq>({special}|\"|\') {
addstring(true, yytext + 1, yyleng - 1);
addchar(false, '\0');
yylval->str = scanstring;
yylval->str = scanstring;
return VARIABLE_P;
yyless(0);
BEGIN INITIAL;
return checkKeyword();
}
}
<INITIAL>\$\" {
<xnq><<EOF>> {
addchar(true, '\0');
yylval->str = scanstring;
BEGIN xVARQUOTED;
BEGIN INITIAL;
return checkKeyword();
}
}
<INITIAL>{special} { return *yytext ; }
<xnq,xq,xvq,xsq>\\[\"\'\\] { addchar(false, yytext[1]) ; }
<INITIAL>{blank}+ { /* ignore */ }
<xnq,xq,xvq,xsq>\\b { addchar(false, '\b'); }
<INITIAL>\/\* {
<xnq,xq,xvq,xsq>\\f { addchar(false, '\f'); }
addchar(true, '\0');
BEGIN xCOMMENT;
}
<INITIAL>[0-9]+(\.[0-9]+)?[eE][+-]?[0-9]+ /* float */ {
<xnq,xq,xvq,xsq>\\n { addchar(false, '\n'); }
addstring(true, yytext, yyleng);
addchar(false, '\0');
yylval->str = scanstring;
return NUMERIC_P;
}
<INITIAL>\.[0-9]+[eE][+-]?[0-9]+ /* float */ {
<xnq,xq,xvq,xsq>\\r { addchar(false, '\r'); }
addstring(true, yytext, yyleng);
addchar(false, '\0');
yylval->str = scanstring;
return NUMERIC_P;
}
<INITIAL>([0-9]+)?\.[0-9]+ {
<xnq,xq,xvq,xsq>\\t { addchar(false, '\t'); }
addstring(true, yytext, yyleng);
addchar(false, '\0');
yylval->str = scanstring;
return NUMERIC_P;
}
<INITIAL>[0-9]+ {
<xnq,xq,xvq,xsq>\\v { addchar(false, '\v'); }
addstring(true, yytext, yyleng);
addchar(false, '\0');
yylval->str = scanstring;
return INT_P;
}
<INITIAL>{any}+ {
<xnq,xq,xvq,xsq>{unicode}+ { parseUnicode(yytext, yyleng); }
addstring(true, yytext, yyleng);
BEGIN xNONQUOTED;
}
<INITIAL>\" {
<xnq,xq,xvq,xsq>{hex_char}+ { parseHexChars(yytext, yyleng); }
addchar(true, '\0');
BEGIN xQUOTED;
}
<INITIAL>\' {
<xnq,xq,xvq,xsq>\\x { yyerror(NULL, "Hex character sequence is invalid"); }
addchar(true, '\0');
BEGIN xSINGLEQUOTED;
}
<INITIAL>\\ {
<xnq,xq,xvq,xsq>\\u { yyerror(NULL, "Unicode sequence is invalid"); }
yyless(0);
addchar(true, '\0');
BEGIN xNONQUOTED;
}
<xNONQUOTED>{any}+ {
<xnq,xq,xvq,xsq>\\. { yyerror(NULL, "Escape sequence is invalid"); }
addstring(false, yytext, yyleng);
}
<xNONQUOTED>{blank}+ {
<xnq,xq,xvq,xsq>\\ { yyerror(NULL, "Unexpected end after backslash"); }
yylval->str = scanstring;
BEGIN INITIAL;
return checkSpecialVal();
}
<xq,xvq,xsq><<EOF>> { yyerror(NULL, "Unexpected end of quoted string"); }
<xNONQUOTED>\/\* {
<xq>\" {
yylval->str = scanstring;
yylval->str = scanstring;
BEGIN xCOMMENT;
BEGIN INITIAL;
return STRING_P;
}
}
<xNONQUOTED>({special}|\"|\') {
<xvq>\" {
yylval->str = scanstring;
yylval->str = scanstring;
yyless(0);
BEGIN INITIAL;
BEGIN INITIAL;
return checkSpecialVal() ;
return VARIABLE_P ;
}
}
<xNONQUOTED><<EOF>> {
<xsq>\' {
yylval->str = scanstring;
yylval->str = scanstring;
BEGIN INITIAL;
BEGIN INITIAL;
return checkSpecialVal() ;
return STRING_P ;
}
}
<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\[\"\'\\] { addchar(false, yytext[1]); }
<xq,xvq>[^\\\"]+ { addstring(false, yytext, yyleng); }
<xsq>[^\\\']+ { addstring(false, yytext, yyleng); }
<xc>\*\/ { BEGIN INITIAL; }
<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\b { addchar(false, '\b'); }
<xc>[^\*]+ { }
<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\f { addchar(false, '\f'); }
<xc>\* { }
<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\n { addchar(false, '\n' ); }
<xc><<EOF>> { yyerror(NULL, "Unexpected end of comment" ); }
<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\r { addchar(false, '\r') ; }
\&\& { return AND_P ; }
<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\t { addchar(false, '\t') ; }
\|\| { return OR_P ; }
<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\v { addchar(false, '\v') ; }
\! { return NOT_P ; }
<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>{unicode}+ { parseUnicode(yytext, yyleng) ; }
\*\* { return ANY_P ; }
<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>{hex_char}+ { parseHexChars(yytext, yyleng) ; }
\< { return LESS_P ; }
<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\x { yyerror(NULL, "Hex character sequence is invalid") ; }
\<\= { return LESSEQUAL_P ; }
<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\u { yyerror(NULL, "Unicode sequence is invalid") ; }
\=\= { return EQUAL_P ; }
<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\. { yyerror(NULL, "Escape sequence is invalid") ; }
\<\> { return NOTEQUAL_P ; }
<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\ { yyerror(NULL, "Unexpected end after backslash") ; }
\!\= { return NOTEQUAL_P ; }
<xQUOTED,xVARQUOTED,xSINGLEQUOTED><<EOF>> { yyerror(NULL, "Unexpected end of quoted string") ; }
\>\= { return GREATEREQUAL_P ; }
<xQUOTED>\" {
\> { return GREATER_P; }
\${any}+ {
addstring(true, yytext + 1, yyleng - 1);
addchar(false, '\0');
yylval->str = scanstring;
yylval->str = scanstring;
BEGIN INITIAL;
return VARIABLE_P;
return STRING_P;
}
\$\" {
addchar(true, '\0');
BEGIN xvq;
}
{special} { return *yytext; }
{blank}+ { /* ignore */ }
\/\* {
addchar(true, '\0');
BEGIN xc;
}
}
<xVARQUOTED>\" {
[0-9]+(\.[0-9]+)?[eE][+-]?[0-9]+ { /* float */
addstring(true, yytext, yyleng);
addchar(false, '\0');
yylval->str = scanstring;
yylval->str = scanstring;
BEGIN INITIAL;
return NUMERIC_P;
return VARIABLE_P;
}
}
<xSINGLEQUOTED>\' {
\.[0-9]+[eE][+-]?[0-9]+ { /* float */
addstring(true, yytext, yyleng);
addchar(false, '\0');
yylval->str = scanstring;
yylval->str = scanstring;
BEGIN INITIAL;
return NUMERIC_P;
return STRING_P;
}
}
<xQUOTED,xVARQUOTED>[^\\\"]+ { addstring(false, yytext, yyleng); }
([0-9]+)?\.[0-9]+ {
addstring(true, yytext, yyleng);
addchar(false, '\0');
yylval->str = scanstring;
return NUMERIC_P;
}
<xSINGLEQUOTED>[^\\\']+ { addstring(false, yytext, yyleng); }
[0-9]+ {
addstring(true, yytext, yyleng);
addchar(false, '\0');
yylval->str = scanstring;
return INT_P;
}
<INITIAL><<EOF>> { yyterminate(); }
{any}+ {
addstring(true, yytext, yyleng);
BEGIN xnq;
}
<xCOMMENT>\*\/ { BEGIN INITIAL; }
\" {
addchar(true, '\0');
BEGIN xq;
}
<xCOMMENT>[^\*]+ { }
\' {
addchar(true, '\0');
BEGIN xsq;
}
<xCOMMENT>\* { }
\\ {
yyless(0);
addchar(true, '\0');
BEGIN xnq;
}
<xCOMMENT><<EOF>> { yyerror(NULL, "Unexpected end of comment"); }
<<EOF>> { yyterminate( ); }
%%
%%
@ -292,7 +300,6 @@ typedef struct JsonPathKeyword
* Array of key words should be sorted by length and then
* Array of key words should be sorted by length and then
* alphabetical order
* alphabetical order
*/
*/
static const JsonPathKeyword keywords[] = {
static const JsonPathKeyword keywords[] = {
{ 2, false, IS_P, "is"},
{ 2, false, IS_P, "is"},
{ 2, false, TO_P, "to"},
{ 2, false, TO_P, "to"},
@ -317,8 +324,9 @@ static const JsonPathKeyword keywords[] = {
{ 10,false, LIKE_REGEX_P, "like_regex"},
{ 10,false, LIKE_REGEX_P, "like_regex"},
};
};
static int
/* Check if current scanstring value is a keyword */
checkSpecialVal()
static enum yytokentype
checkKeyword()
{
{
int res = IDENT_P;
int res = IDENT_P;
int diff;
int diff;
@ -397,49 +405,50 @@ jsonpath_scanner_finish(void)
pfree(scanbuf);
pfree(scanbuf);
}
}
/*
* Resize scanstring so that it can append string of given length.
* Reinitialize if required.
*/
static void
static void
addstring(bool init, char *s, int l)
resizeString(bool init, int appendLen )
{
{
if (init)
if (init)
{
{
scanstring.total = 32;
scanstring.total = Max( 32, appendLen) ;
scanstring.val = palloc(scanstring.total);
scanstring.val = (char *) palloc(scanstring.total);
scanstring.len = 0;
scanstring.len = 0;
}
}
else
if (s && l)
{
{
while(scanstring.len + l + 1 >= scanstring.total)
if (scanstring.len + appendLen >= scanstring.total)
{
{
while (scanstring.len + appendLen >= scanstring.total)
scanstring.total *= 2;
scanstring.total *= 2;
scanstring.val = repalloc(scanstring.val, scanstring.total);
scanstring.val = repalloc(scanstring.val, scanstring.total);
}
}
memcpy(scanstring.val + scanstring.len, s, l);
scanstring.len += l;
}
}
}
}
/* Add set of bytes at "s" of length "l" to scanstring */
static void
static void
addchar(bool init, char s)
addstring(bool init, char *s, int l)
{
if (init)
{
scanstring.total = 32;
scanstring.val = palloc(scanstring.total);
scanstring.len = 0;
}
else if(scanstring.len + 1 >= scanstring.total)
{
{
scanstring.total *= 2;
resizeString(init, l + 1);
scanstring.val = repalloc(scanstring.val, scanstring.total);
memcpy(scanstring.val + scanstring.len, s, l);
scanstring.len += l;
}
}
scanstring.val[ scanstring.len ] = s;
/* Add single byte "c" to scanstring */
if (s != '\0')
static void
addchar(bool init, char c)
{
resizeString(init, 1);
scanstring.val[scanstring.len] = c;
if (c != '\0')
scanstring.len++;
scanstring.len++;
}
}
/* Interface to jsonpath parser */
JsonPathParseResult *
JsonPathParseResult *
parsejsonpath(const char *str, int len)
parsejsonpath(const char *str, int len)
{
{
@ -455,6 +464,7 @@ parsejsonpath(const char *str, int len)
return parseresult;
return parseresult;
}
}
/* Turn hex character into integer */
static int
static int
hexval(char c)
hexval(char c)
{
{
@ -468,6 +478,7 @@ hexval(char c)
return 0; /* not reached */
return 0; /* not reached */
}
}
/* Add given unicode character to scanstring */
static void
static void
addUnicodeChar(int ch)
addUnicodeChar(int ch)
{
{
@ -515,6 +526,7 @@ addUnicodeChar(int ch)
}
}
}
}
/* Add unicode character and process its hi surrogate */
static void
static void
addUnicode(int ch, int *hi_surrogate)
addUnicode(int ch, int *hi_surrogate)
{
{
@ -592,6 +604,7 @@ parseUnicode(char *s, int l)
}
}
}
}
/* Parse sequence of hex-encoded characters */
static void
static void
parseHexChars(char *s, int l)
parseHexChars(char *s, int l)
{
{
@ -601,7 +614,8 @@ parseHexChars(char *s, int l)
for (i = 0; i < l / 4; i++)
for (i = 0; i < l / 4; i++)
{
{
int ch = (hexval(s[i * 4 + 2]) << 4) | hexval(s[i * 4 + 3]);
int ch = (hexval(s[i * 4 + 2]) << 4) |
hexval(s[i * 4 + 3]);
addUnicodeChar(ch);
addUnicodeChar(ch);
}
}