@ -4,13 +4,27 @@
* scan.l
* lexical scanner for PostgreSQL
*
* XXX The rules in this file must be kept in sync with psql's lexer!!!
* NOTE NOTE NOTE:
*
* The rules in this file must be kept in sync with psql's lexer!!!
*
* The rules are designed so that the scanner never has to backtrack,
* in the sense that there is always a rule that can match the input
* consumed so far (the rule action may internally throw back some input
* with yyless(), however). As explained in the flex manual, this makes
* for a useful speed increase --- about a third faster than a plain -CF
* lexer, in simple testing. The extra complexity is mostly in the rules
* for handling float numbers and continued string literals. If you change
* the lexical rules, verify that you haven't broken the no-backtrack
* property by running flex with the "-b" option and checking that the
* resulting "lex.backup" file says that no backing up is needed.
*
*
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/parser/scan.l,v 1.121 2005/03/11 19:13:42 momjian Exp $
* $PostgreSQL: pgsql/src/backend/parser/scan.l,v 1.122 2005/05/26 01:24:29 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -138,6 +152,20 @@ special_whitespace ({space}+|{comment}{newline})
horiz_whitespace ({horiz_space}|{comment})
whitespace_with_newline ({horiz_whitespace}*{newline}{special_whitespace}*)
/*
* To ensure that {quotecontinue} can be scanned without having to back up
* if the full pattern isn't matched, we include trailing whitespace in
* {quotestop}. This matches all cases where {quotecontinue} fails to match,
* except for {quote} followed by whitespace and just one "-" (not two,
* which would start a {comment}). To cover that we have {quotefail}.
* The actions for {quotestop} and {quotefail} must throw back characters
* beyond the quote proper.
*/
quote '
quotestop {quote}{whitespace}*
quotecontinue {quote}{whitespace_with_newline}{quote}
quotefail {quote}{whitespace}*"-"
/* Bit string
* It is tempting to scan the string for only those characters
* which are allowed. However, this leads to silently swallowed
@ -148,16 +176,12 @@ whitespace_with_newline ({horiz_whitespace}*{newline}{special_whitespace}*)
* validate the contents.
*/
xbstart [bB]{quote}
xbstop {quote}
xbinside [^']*
xbcat {quote}{whitespace_with_newline}{quote}
/* Hexadecimal number
*/
xhstart [xX]{quote}
xhstop {quote}
xhinside [^']*
xhcat {quote}{whitespace_with_newline}{quote}
/* National character
*/
@ -165,26 +189,26 @@ xnstart [nN]{quote}
/* Extended quote
* xqdouble implements embedded quote
* xqcat allows strings to cross input lines
*/
quote '
xqstart {quote}
xqstop {quote}
xqdouble {quote}{quote}
xqinside [^\\']+
xqescape [\\][^0-7]
xqoctesc [\\][0-7]{1,3}
xqcat {quote}{whitespace_with_newline}{quote}
/* $foo$ style quotes ("dollar quoting")
* The quoted string starts with $foo$ where "foo" is an optional string
* in the form of an identifier, except that it may not contain "$",
* and extends to the first occurrence of an identical string.
* There is *no* processing of the quoted text.
*
* {dolqfailed} is an error rule to avoid scanner backup when {dolqdelim}
* fails to match its trailing "$".
*/
dolq_start [A-Za-z\200-\377_]
dolq_cont [A-Za-z\200-\377_0-9]
dolqdelim \$({dolq_start}{dolq_cont}*)?\$
dolqfailed \${dolq_start}{dolq_cont}*
dolqinside [^$]+
/* Double quote
@ -242,12 +266,17 @@ operator {op_chars}+
/* we no longer allow unary minus in numbers.
* instead we pass it separately to parser. there it gets
* coerced via doNegate() -- Leon aug 20 1999
* coerced via doNegate() -- Leon aug 20 1999
*
* {realfail1} and {realfail2} are added to prevent the need for scanner
* backup when the {real} rule fails to match completely.
*/
integer {digit}+
decimal (({digit}*\.{digit}+)|({digit}+\.{digit}*))
real ((({digit}*\.{digit}+)|({digit}+\.{digit}*)|({digit}+))([Ee][-+]?{digit}+))
real ({integer}|{decimal})[Ee][-+]?{digit}+
realfail1 ({integer}|{decimal})[Ee]
realfail2 ({integer}|{decimal})[Ee][-+]
param \${integer}
@ -310,6 +339,10 @@ other .
/* ignore */
}
<xc>\*+ {
/* ignore */
}
<xc><<EOF>> { yyerror("unterminated /* comment"); }
{xbstart} {
@ -324,7 +357,9 @@ other .
startlit();
addlitchar('b');
}
<xb>{xbstop} {
<xb>{quotestop} |
<xb>{quotefail} {
yyless(1);
BEGIN(INITIAL);
yylval.str = litbufdup();
return BCONST;
@ -333,8 +368,8 @@ other .
<xb>{xbinside} {
addlit(yytext, yyleng);
}
<xh>{xhcat} |
<xb>{xbcat} {
<xh>{quotecontinue} |
<xb>{quotecontinue} {
/* ignore */
}
<xb><<EOF>> { yyerror("unterminated bit string literal"); }
@ -351,7 +386,9 @@ other .
startlit();
addlitchar('x');
}
<xh>{xhstop} {
<xh>{quotestop} |
<xh>{quotefail} {
yyless(1);
BEGIN(INITIAL);
yylval.str = litbufdup();
return XCONST;
@ -365,13 +402,11 @@ other .
*/
const ScanKeyword *keyword;
/* This had better be a keyword! */
yyless(1); /* eat only 'n' this time */
/* nchar had better be a keyword! */
keyword = ScanKeywordLookup("nchar");
Assert(keyword != NULL);
yylval.keyword = keyword->name;
token_start = yytext;
BEGIN(xq);
startlit();
return keyword->value;
}
@ -380,7 +415,9 @@ other .
BEGIN(xq);
startlit();
}
<xq>{xqstop} {
<xq>{quotestop} |
<xq>{quotefail} {
yyless(1);
BEGIN(INITIAL);
yylval.str = litbufdup();
return SCONST;
@ -398,7 +435,7 @@ other .
unsigned char c = strtoul(yytext+1, NULL, 8);
addlitchar(c);
}
<xq>{xqcat} {
<xq>{quotecontinue} {
/* ignore */
}
<xq>. {
@ -413,6 +450,12 @@ other .
BEGIN(xdolq);
startlit();
}
{dolqfailed} {
/* throw back all but the initial "$" */
yyless(1);
/* and treat it as {other} */
return yytext[0];
}
<xdolq>{dolqdelim} {
if (strcmp(yytext, dolqstart) == 0)
{
@ -435,6 +478,9 @@ other .
<xdolq>{dolqinside} {
addlit(yytext, yyleng);
}
<xdolq>{dolqfailed} {
addlit(yytext, yyleng);
}
<xdolq>. {
/* This is only needed for $ inside the quoted text */
addlitchar(yytext[0]);
@ -576,6 +622,23 @@ other .
yylval.str = pstrdup(yytext);
return FCONST;
}
{realfail1} {
/*
* throw back the [Ee], and treat as {decimal}. Note
* that it is possible the input is actually {integer},
* but since this case will almost certainly lead to a
* syntax error anyway, we don't bother to distinguish.
*/
yyless(yyleng-1);
yylval.str = pstrdup(yytext);
return FCONST;
}
{realfail2} {
/* throw back the [Ee][+-], and proceed as above */
yyless(yyleng-2);
yylval.str = pstrdup(yytext);
return FCONST;
}
{identifier} {