Cosmetic changes for jsonpath_gram.y and jsonpath_scan.l

This commit include formatting improvements, renamings and comments. Also, it makes jsonpath_scan.l be more uniform with other our lexers. Firstly, states names are renamed to more short alternatives. Secondly, <INITIAL> prefix removed from the rules. Corresponding rules are moved to the tail, so they would anyway work only in initial state. Author: Alexander Korotkov Reviewed-by: John Naylor
6 years ago · 8b17298f0b
parent d303122eab
commit 8b17298f0b
2 changed files with 223 additions and 189 deletions
--- a/src/backend/utils/adt/jsonpath_gram.y
+++ b/src/backend/utils/adt/jsonpath_gram.y
@ -4,6 +4,8 @@
 * jsonpath_gram.y
 *	 Grammar definitions for jsonpath datatype
 *
 * Transforms tokenized jsonpath into tree of JsonPathParseItem structs.
 *
 * Copyright (c) 2019, PostgreSQL Global Development Group
 *
 * IDENTIFICATION
@ -37,15 +39,17 @@ int	jsonpath_yylex(union YYSTYPE *yylval_param);
 int	jsonpath_yyparse(JsonPathParseResult **result);
 void jsonpath_yyerror(JsonPathParseResult **result, const char *message);
-static JsonPathParseItem *makeItemType(int type);
+static JsonPathParseItem *makeItemType(JsonPathItemType type);
 static JsonPathParseItem *makeItemString(JsonPathString *s);
 static JsonPathParseItem *makeItemVariable(JsonPathString *s);
 static JsonPathParseItem *makeItemKey(JsonPathString *s);
 static JsonPathParseItem *makeItemNumeric(JsonPathString *s);
 static JsonPathParseItem *makeItemBool(bool val);
-static JsonPathParseItem *makeItemBinary(int type, JsonPathParseItem *la,
+static JsonPathParseItem *makeItemBinary(JsonPathItemType type,
 										 JsonPathParseItem *la,
 										 JsonPathParseItem *ra);
-static JsonPathParseItem *makeItemUnary(int type, JsonPathParseItem *a);
+static JsonPathParseItem *makeItemUnary(JsonPathItemType type,
 										JsonPathParseItem *a);
 static JsonPathParseItem *makeItemList(List *list);
 static JsonPathParseItem *makeIndexArray(List *list);
 static JsonPathParseItem *makeAny(int first, int last);
@ -170,7 +174,8 @@ predicate:
 	| predicate AND_P predicate		{ $$ = makeItemBinary(jpiAnd, $1, $3); }
 	| predicate OR_P predicate		{ $$ = makeItemBinary(jpiOr, $1, $3); }
 	| NOT_P delimited_predicate 	{ $$ = makeItemUnary(jpiNot, $2); }
-	| '(' predicate ')' IS_P UNKNOWN_P	{ $$ = makeItemUnary(jpiIsUnknown, $2); }
+	| '(' predicate ')' IS_P UNKNOWN_P
 									{ $$ = makeItemUnary(jpiIsUnknown, $2); }
 	| expr STARTS_P WITH_P starts_with_initial
 									{ $$ = makeItemBinary(jpiStartsWith, $1, $4); }
 	| expr LIKE_REGEX_P STRING_P 	{ $$ = makeItemLikeRegex($1, &$3, NULL); }
@ -232,7 +237,8 @@ any_level:
 any_path:
 	ANY_P							{ $$ = makeAny(0, -1); }
 	| ANY_P '{' any_level '}'		{ $$ = makeAny($3, $3); }
-	| ANY_P '{' any_level TO_P any_level '}'	{ $$ = makeAny($3, $5); }
+	| ANY_P '{' any_level TO_P any_level '}'
 									{ $$ = makeAny($3, $5); }
 	;
 accessor_op:
@ -285,8 +291,13 @@ method:
 	;
 %%
 /*
 * The helper functions below allocate and fill JsonPathParseItem's of various
 * types.
 */
 static JsonPathParseItem *
-makeItemType(int type)
+makeItemType(JsonPathItemType type)
 {
 	JsonPathParseItem  *v = palloc(sizeof(*v));
@ -364,7 +375,7 @@ makeItemBool(bool val)
 }
 static JsonPathParseItem *
-makeItemBinary(int type, JsonPathParseItem* la, JsonPathParseItem *ra)
+makeItemBinary(JsonPathItemType type, JsonPathParseItem *la, JsonPathParseItem *ra)
 {
 	JsonPathParseItem  *v = makeItemType(type);
@ -375,7 +386,7 @@ makeItemBinary(int type, JsonPathParseItem* la, JsonPathParseItem *ra)
 }
 static JsonPathParseItem *
-makeItemUnary(int type, JsonPathParseItem* a)
+makeItemUnary(JsonPathItemType type, JsonPathParseItem *a)
 {
 	JsonPathParseItem  *v;
@ -401,7 +412,8 @@ makeItemUnary(int type, JsonPathParseItem* a)
 static JsonPathParseItem *
 makeItemList(List *list)
 {
-	JsonPathParseItem *head, *end;
+	JsonPathParseItem  *head,
 					   *end;
 	ListCell		   *cell = list_head(list);
 	head = end = (JsonPathParseItem *) lfirst(cell);
@ -510,4 +522,12 @@ makeItemLikeRegex(JsonPathParseItem *expr, JsonPathString *pattern,
 	return v;
 }
 /*
 * jsonpath_scan.l is compiled as part of jsonpath_gram.y.  Currently, this is
 * unavoidable because jsonpath_gram does not create a .h file to export its
 * token symbols.  If these files ever grow large enough to be worth compiling
 * separately, that could be fixed; but for now it seems like useless
 * complication.
 */
 #include "jsonpath_scan.c"
--- a/src/backend/utils/adt/jsonpath_scan.l
+++ b/src/backend/utils/adt/jsonpath_scan.l
@ -4,6 +4,9 @@
 * jsonpath_scan.l
 *	Lexical parser for jsonpath datatype
 *
 * Splits jsonpath string into tokens represented as JsonPathString structs.
 * Decodes unicode and hex escaped strings.
 *
 * Copyright (c) 2019, PostgreSQL Global Development Group
 *
 * IDENTIFICATION
@ -19,9 +22,6 @@
 static JsonPathString scanstring;
 /* No reason to constrain amount of data slurped */
 /* #define YY_READ_BUF_SIZE 16777216 */
 /* Handles to the buffer that the lexer uses internally */
 static YY_BUFFER_STATE scanbufhandle;
 static char *scanbuf;
@ -29,9 +29,7 @@ static int	scanbuflen;
 static void addstring(bool init, char *s, int l);
 static void addchar(bool init, char s);
-static int checkSpecialVal(void); /* examine scanstring for the special
+static enum yytokentype checkKeyword(void);
 								   * value */
 static void parseUnicode(char *s, int l);
 static void parseHexChars(char *s, int l);
@ -60,11 +58,22 @@ fprintf_to_ereport(const char *fmt, const char *msg)
 %option noyyrealloc
 %option noyyfree
-%x xQUOTED
+/*
-%x xNONQUOTED
+ * We use exclusive states for quoted, signle-quoted and non-quoted strings,
-%x xVARQUOTED
+ * quoted variable names and C-tyle comments.
-%x xSINGLEQUOTED
+ * Exclusive states:
-%x xCOMMENT
+ *  <xq> - quoted strings
 *  <xnq> - non-quoted strings
 *  <xvq> - quoted variable names
 *  <xsq> - single-quoted strings
 *  <xc> - C-style comment
 */
 %x xq
 %x xnq
 %x xvq
 %x xsq
 %x xc
 special		 [\?\%\$\.\[\]\{\}\(\)\|\&\!\=\<\>\@\#\,\*:\-\+\/]
 any			[^\?\%\$\.\[\]\{\}\(\)\|\&\!\=\<\>\@\#\,\*:\-\+\/\\\"\' \t\n\r\f]
@ -73,189 +82,188 @@ hex_dig		[0-9A-Fa-f]
 unicode		\\u({hex_dig}{4}|\{{hex_dig}{1,6}\})
 hex_char	\\x{hex_dig}{2}
 %%
-<INITIAL>\&\&					{ return AND_P; }
+<xnq>{any}+						{
-
+									addstring(false, yytext, yyleng);
-<INITIAL>\|\|					{ return OR_P; }
+								}
 <INITIAL>\!						{ return NOT_P; }
 <INITIAL>\*\*					{ return ANY_P; }
 <INITIAL>\<						{ return LESS_P; }
 <INITIAL>\<\=					{ return LESSEQUAL_P; }
 <INITIAL>\=\=					{ return EQUAL_P; }
 <INITIAL>\<\>					{ return NOTEQUAL_P; }
-<INITIAL>\!\=					{ return NOTEQUAL_P; }
+<xnq>{blank}+					{
 									yylval->str = scanstring;
 									BEGIN INITIAL;
 									return checkKeyword();
 								}
 <INITIAL>\>\=					{ return GREATEREQUAL_P; }
-<INITIAL>\>						{ return GREATER_P; }
+<xnq>\/\*						{
 									yylval->str = scanstring;
 									BEGIN xc;
 								}
-<INITIAL>\${any}+				{
+<xnq>({special}|\"|\')			{
 									addstring(true, yytext + 1, yyleng - 1);
 									addchar(false, '\0');
 									yylval->str = scanstring;
-									return VARIABLE_P;
+									yyless(0);
 									BEGIN INITIAL;
 									return checkKeyword();
 								}
-<INITIAL>\$\"					{
+<xnq><<EOF>>					{
-									addchar(true, '\0');
+									yylval->str = scanstring;
-									BEGIN xVARQUOTED;
+									BEGIN INITIAL;
 									return checkKeyword();
 								}
-<INITIAL>{special}				{ return *yytext; }
+<xnq,xq,xvq,xsq>\\[\"\'\\]		{ addchar(false, yytext[1]); }
-<INITIAL>{blank}+				{ /* ignore */ }
+<xnq,xq,xvq,xsq>\\b				{ addchar(false, '\b'); }
-<INITIAL>\/\*					{
+<xnq,xq,xvq,xsq>\\f				{ addchar(false, '\f'); }
 									addchar(true, '\0');
 									BEGIN xCOMMENT;
 								}
-<INITIAL>[0-9]+(\.[0-9]+)?[eE][+-]?[0-9]+  /* float */  {
+<xnq,xq,xvq,xsq>\\n				{ addchar(false, '\n'); }
 									addstring(true, yytext, yyleng);
 									addchar(false, '\0');
 									yylval->str = scanstring;
 									return NUMERIC_P;
 								}
-<INITIAL>\.[0-9]+[eE][+-]?[0-9]+  /* float */  {
+<xnq,xq,xvq,xsq>\\r				{ addchar(false, '\r'); }
 									addstring(true, yytext, yyleng);
 									addchar(false, '\0');
 									yylval->str = scanstring;
 									return NUMERIC_P;
 								}
-<INITIAL>([0-9]+)?\.[0-9]+		{
+<xnq,xq,xvq,xsq>\\t				{ addchar(false, '\t'); }
 									addstring(true, yytext, yyleng);
 									addchar(false, '\0');
 									yylval->str = scanstring;
 									return NUMERIC_P;
 								}
-<INITIAL>[0-9]+					{
+<xnq,xq,xvq,xsq>\\v				{ addchar(false, '\v'); }
 									addstring(true, yytext, yyleng);
 									addchar(false, '\0');
 									yylval->str = scanstring;
 									return INT_P;
 								}
-<INITIAL>{any}+					{
+<xnq,xq,xvq,xsq>{unicode}+		{ parseUnicode(yytext, yyleng); }
 									addstring(true, yytext, yyleng);
 									BEGIN xNONQUOTED;
 								}
-<INITIAL>\"						{
+<xnq,xq,xvq,xsq>{hex_char}+		{ parseHexChars(yytext, yyleng); }
 									addchar(true, '\0');
 									BEGIN xQUOTED;
 								}
-<INITIAL>\'						{
+<xnq,xq,xvq,xsq>\\x				{ yyerror(NULL, "Hex character sequence is invalid"); }
 									addchar(true, '\0');
 									BEGIN xSINGLEQUOTED;
 								}
-<INITIAL>\\						{
+<xnq,xq,xvq,xsq>\\u				{ yyerror(NULL, "Unicode sequence is invalid"); }
 									yyless(0);
 									addchar(true, '\0');
 									BEGIN xNONQUOTED;
 								}
-<xNONQUOTED>{any}+				{
+<xnq,xq,xvq,xsq>\\.				{ yyerror(NULL, "Escape sequence is invalid"); }
 									addstring(false, yytext, yyleng);
 								}
-<xNONQUOTED>{blank}+			{
+<xnq,xq,xvq,xsq>\\				{ yyerror(NULL, "Unexpected end after backslash"); }
 									yylval->str = scanstring;
 									BEGIN INITIAL;
 									return checkSpecialVal();
 								}
 <xq,xvq,xsq><<EOF>>				{ yyerror(NULL, "Unexpected end of quoted string"); }
-<xNONQUOTED>\/\*				{
+<xq>\"							{
 									yylval->str = scanstring;
-									BEGIN xCOMMENT;
+									BEGIN INITIAL;
 									return STRING_P;
 								}
-<xNONQUOTED>({special}|\"|\')	{
+<xvq>\"							{
 									yylval->str = scanstring;
 									yyless(0);
 									BEGIN INITIAL;
-									return checkSpecialVal();
+									return VARIABLE_P;
 								}
-<xNONQUOTED><<EOF>>				{
+<xsq>\'							{
 									yylval->str = scanstring;
 									BEGIN INITIAL;
-									return checkSpecialVal();
+									return STRING_P;
 								}
-<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\[\"\'\\]	{ addchar(false, yytext[1]); }
+<xq,xvq>[^\\\"]+				{ addstring(false, yytext, yyleng); }
 <xsq>[^\\\']+					{ addstring(false, yytext, yyleng); }
 <xc>\*\/						{ BEGIN INITIAL; }
-<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\b	{ addchar(false, '\b'); }
+<xc>[^\*]+						{ }
-<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\f	{ addchar(false, '\f'); }
+<xc>\*							{ }
-<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\n	{ addchar(false, '\n'); }
+<xc><<EOF>>						{ yyerror(NULL, "Unexpected end of comment"); }
-<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\r	{ addchar(false, '\r'); }
+\&\&							{ return AND_P; }
-<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\t	{ addchar(false, '\t'); }
+\|\|							{ return OR_P; }
-<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\v	{ addchar(false, '\v'); }
+\!								{ return NOT_P; }
-<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>{unicode}+		{ parseUnicode(yytext, yyleng); }
+\*\*							{ return ANY_P; }
-<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>{hex_char}+	{ parseHexChars(yytext, yyleng); }
+\<								{ return LESS_P; }
-<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\x	{ yyerror(NULL, "Hex character sequence is invalid"); }
+\<\=							{ return LESSEQUAL_P; }
-<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\u	{ yyerror(NULL, "Unicode sequence is invalid"); }
+\=\=							{ return EQUAL_P; }
-<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\.	{ yyerror(NULL, "Escape sequence is invalid"); }
+\<\>							{ return NOTEQUAL_P; }
-<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\		{ yyerror(NULL, "Unexpected end after backslash"); }
+\!\=							{ return NOTEQUAL_P; }
-<xQUOTED,xVARQUOTED,xSINGLEQUOTED><<EOF>>			{ yyerror(NULL, "Unexpected end of quoted string"); }
+\>\=							{ return GREATEREQUAL_P; }
-<xQUOTED>\"						{
+\>								{ return GREATER_P; }
 \${any}+						{
 									addstring(true, yytext + 1, yyleng - 1);
 									addchar(false, '\0');
 									yylval->str = scanstring;
-									BEGIN INITIAL;
+									return VARIABLE_P;
-									return STRING_P;
+								}
 \$\"							{
 									addchar(true, '\0');
 									BEGIN xvq;
 								}
 {special}						{ return *yytext; }
 {blank}+						{ /* ignore */ }
 \/\*							{
 									addchar(true, '\0');
 									BEGIN xc;
 								}
-<xVARQUOTED>\"					{
+[0-9]+(\.[0-9]+)?[eE][+-]?[0-9]+ { /* float */
 									addstring(true, yytext, yyleng);
 									addchar(false, '\0');
 									yylval->str = scanstring;
-									BEGIN INITIAL;
+									return NUMERIC_P;
 									return VARIABLE_P;
 								}
-<xSINGLEQUOTED>\'				{
+\.[0-9]+[eE][+-]?[0-9]+			{ /* float */
 									addstring(true, yytext, yyleng);
 									addchar(false, '\0');
 									yylval->str = scanstring;
-									BEGIN INITIAL;
+									return NUMERIC_P;
 									return STRING_P;
 								}
-<xQUOTED,xVARQUOTED>[^\\\"]+	{ addstring(false, yytext, yyleng); }
+([0-9]+)?\.[0-9]+				{
 									addstring(true, yytext, yyleng);
 									addchar(false, '\0');
 									yylval->str = scanstring;
 									return NUMERIC_P;
 								}
-<xSINGLEQUOTED>[^\\\']+			{ addstring(false, yytext, yyleng); }
+[0-9]+							{
 									addstring(true, yytext, yyleng);
 									addchar(false, '\0');
 									yylval->str = scanstring;
 									return INT_P;
 								}
-<INITIAL><<EOF>>				{ yyterminate(); }
+{any}+							{
 									addstring(true, yytext, yyleng);
 									BEGIN xnq;
 								}
-<xCOMMENT>\*\/					{ BEGIN INITIAL; }
+\"								{
 									addchar(true, '\0');
 									BEGIN xq;
 								}
-<xCOMMENT>[^\*]+				{ }
+\'								{
 									addchar(true, '\0');
 									BEGIN xsq;
 								}
-<xCOMMENT>\*					{ }
+\\								{
 									yyless(0);
 									addchar(true, '\0');
 									BEGIN xnq;
 								}
-<xCOMMENT><<EOF>>				{ yyerror(NULL, "Unexpected end of comment"); }
+<<EOF>>							{ yyterminate(); }
 %%
@ -292,7 +300,6 @@ typedef struct JsonPathKeyword
 * Array of key words should be sorted by length and then
 * alphabetical order
 */
 static const JsonPathKeyword keywords[] = {
 	{ 2, false,	IS_P,		"is"},
 	{ 2, false,	TO_P,		"to"},
@ -317,8 +324,9 @@ static const JsonPathKeyword keywords[] = {
 	{ 10,false, LIKE_REGEX_P, "like_regex"},
 };
-static int
+/* Check if current scanstring value is a keyword */
-checkSpecialVal()
+static enum yytokentype
 checkKeyword()
 {
 	int						res = IDENT_P;
 	int						diff;
@ -397,49 +405,50 @@ jsonpath_scanner_finish(void)
 	pfree(scanbuf);
 }
 /*
 * Resize scanstring so that it can append string of given length.
 * Reinitialize if required.
 */
 static void
-addstring(bool init, char *s, int l)
+resizeString(bool init, int appendLen)
 {
 	if (init)
 	{
-		scanstring.total = 32;
+		scanstring.total = Max(32, appendLen);
-		scanstring.val = palloc(scanstring.total);
+		scanstring.val = (char *) palloc(scanstring.total);
 		scanstring.len = 0;
 	}
-
+	else
 	if (s && l)
 	{
-		while(scanstring.len + l + 1 >= scanstring.total)
+		if (scanstring.len + appendLen >= scanstring.total)
 		{
 			while (scanstring.len + appendLen >= scanstring.total)
 				scanstring.total *= 2;
 			scanstring.val = repalloc(scanstring.val, scanstring.total);
 		}
 		memcpy(scanstring.val + scanstring.len, s, l);
 		scanstring.len += l;
 	}
 }
 /* Add set of bytes at "s" of length "l" to scanstring */
 static void
-addchar(bool init, char s)
+addstring(bool init, char *s, int l)
 {
 	if (init)
 	{
 		scanstring.total = 32;
 		scanstring.val = palloc(scanstring.total);
 		scanstring.len = 0;
 	}
 	else if(scanstring.len + 1 >= scanstring.total)
 {
-		scanstring.total *= 2;
+	resizeString(init, l + 1);
-		scanstring.val = repalloc(scanstring.val, scanstring.total);
+	memcpy(scanstring.val + scanstring.len, s, l);
 	scanstring.len += l;
 }
-	scanstring.val[ scanstring.len ] = s;
+/* Add single byte "c" to scanstring */
-	if (s != '\0')
+static void
 addchar(bool init, char c)
 {
 	resizeString(init, 1);
 	scanstring.val[scanstring.len] = c;
 	if (c != '\0')
 		scanstring.len++;
 }
 /* Interface to jsonpath parser */
 JsonPathParseResult *
 parsejsonpath(const char *str, int len)
 {
@ -455,6 +464,7 @@ parsejsonpath(const char *str, int len)
 	return parseresult;
 }
 /* Turn hex character into integer */
 static int
 hexval(char c)
 {
@ -468,6 +478,7 @@ hexval(char c)
 	return 0; /* not reached */
 }
 /* Add given unicode character to scanstring */
 static void
 addUnicodeChar(int ch)
 {
@ -515,6 +526,7 @@ addUnicodeChar(int ch)
 	}
 }
 /* Add unicode character and process its hi surrogate */
 static void
 addUnicode(int ch, int *hi_surrogate)
 {
@ -592,6 +604,7 @@ parseUnicode(char *s, int l)
 	}
 }
 /* Parse sequence of hex-encoded characters */
 static void
 parseHexChars(char *s, int l)
 {
@ -601,7 +614,8 @@ parseHexChars(char *s, int l)
 	for (i = 0; i < l / 4; i++)
 	{
-		int			ch = (hexval(s[i * 4 + 2]) << 4) | hexval(s[i * 4 + 3]);
+		int			ch = (hexval(s[i * 4 + 2]) << 4) |
 						  hexval(s[i * 4 + 3]);
 		addUnicodeChar(ch);
 	}