@ -1,15 +1,14 @@
%{
/*-------------------------------------------------------------------------
*
* scan.l - Scanner for the PL/pgSQL
* procedural language
* scan.l - Scanner for the PL/pgSQL procedural language
*
* Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/pl/plpgsql/src/scan.l,v 1.74 2009/11/07 00:52:26 tgl Exp $
* $PostgreSQL: pgsql/src/pl/plpgsql/src/scan.l,v 1.75 2009/11/09 00:26:55 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -24,27 +23,32 @@
#define fprintf(file, fmt, msg) ereport(ERROR, (errmsg_internal("%s", msg)))
/*
* Each call to yylex must set yylloc to the location of the found token
* (expressed as a byte offset from the start of the input text).
* When we parse a token that requires multiple lexer rules to process,
* remember the token's starting position this way.
* this should be done in the first such rule, else yylloc will point
* into the middle of the token.
*/
#define SAVE_TOKEN_START() \
( start_lineno = plpgsql_scanner_lineno(), start_charpos = yytext )
#define SET_YYLLOC() (yylloc = yytext - scanbuf)
/* Handles to the buffer that the lexer uses internally */
static YY_BUFFER_STATE scanbufhandle;
static char *scanbuf;
static const char *scanstr ; /* original input string */
static const char *scanorig ; /* original input string */
static int pushback_token;
static bool have_pushback_token;
static const char *cur_line_start;
static const char *cur_line_end;
static int cur_line_num;
static int xcdepth = 0; /* depth of nesting in slash-star comments */
static char *dolqstart; /* current $foo$ quote start string */
bool plpgsql_LookupIdentifiers = true;
bool plpgsql_SpaceScanned = false;
static void location_lineno_init(void);
%}
%option 8bit
@ -53,6 +57,10 @@ bool plpgsql_SpaceScanned = false;
%option noinput
%option nounput
%option noyywrap
%option noyyalloc
%option noyyrealloc
%option noyyfree
%option warn
%option prefix="plpgsql_base_yy"
%option case-insensitive
@ -126,133 +134,117 @@ param \${digit}+
%%
/* ----------
* Local variables in scanner to remember where
* a string or comment started
* ----------
*/
int start_lineno = 0;
char *start_charpos = NULL;
/* ----------
* Reset the state when entering the scanner
* Reset the state when entering yylex()
* ----------
*/
BEGIN(INITIAL);
plpgsql_SpaceScanned = false;
/* ----------
* The keyword rules
* ----------
*/
:= { return K_ASSIGN; }
= { return K_ASSIGN; }
\.\. { return K_DOTDOT; }
alias { return K_ALIAS; }
all { return K_ALL; }
begin { return K_BEGIN; }
by { return K_BY; }
case { return K_CASE; }
close { return K_CLOSE; }
constant { return K_CONSTANT; }
continue { return K_CONTINUE; }
cursor { return K_CURSOR; }
declare { return K_DECLARE; }
default { return K_DEFAULT; }
diagnostics { return K_DIAGNOSTICS; }
else { return K_ELSE; }
elseif { return K_ELSIF; }
elsif { return K_ELSIF; }
end { return K_END; }
exception { return K_EXCEPTION; }
execute { return K_EXECUTE; }
exit { return K_EXIT; }
fetch { return K_FETCH; }
for { return K_FOR; }
from { return K_FROM; }
get { return K_GET; }
if { return K_IF; }
in { return K_IN; }
insert { return K_INSERT; }
into { return K_INTO; }
is { return K_IS; }
loop { return K_LOOP; }
move { return K_MOVE; }
no{space}+scroll { return K_NOSCROLL; }
not { return K_NOT; }
null { return K_NULL; }
open { return K_OPEN; }
or { return K_OR; }
perform { return K_PERFORM; }
raise { return K_RAISE; }
result_oid { return K_RESULT_OID; }
return { return K_RETURN; }
reverse { return K_REVERSE; }
row_count { return K_ROW_COUNT; }
scroll { return K_SCROLL; }
strict { return K_STRICT; }
then { return K_THEN; }
to { return K_TO; }
type { return K_TYPE; }
using { return K_USING; }
when { return K_WHEN; }
while { return K_WHILE; }
^#option { return O_OPTION; }
dump { return O_DUMP; }
:= { SET_YYLLOC(); return K_ASSIGN; }
= { SET_YYLLOC(); return K_ASSIGN; }
\.\. { SET_YYLLOC(); return K_DOTDOT; }
alias { SET_YYLLOC(); return K_ALIAS; }
all { SET_YYLLOC(); return K_ALL; }
begin { SET_YYLLOC(); return K_BEGIN; }
by { SET_YYLLOC(); return K_BY; }
case { SET_YYLLOC(); return K_CASE; }
close { SET_YYLLOC(); return K_CLOSE; }
constant { SET_YYLLOC(); return K_CONSTANT; }
continue { SET_YYLLOC(); return K_CONTINUE; }
cursor { SET_YYLLOC(); return K_CURSOR; }
declare { SET_YYLLOC(); return K_DECLARE; }
default { SET_YYLLOC(); return K_DEFAULT; }
diagnostics { SET_YYLLOC(); return K_DIAGNOSTICS; }
else { SET_YYLLOC(); return K_ELSE; }
elseif { SET_YYLLOC(); return K_ELSIF; }
elsif { SET_YYLLOC(); return K_ELSIF; }
end { SET_YYLLOC(); return K_END; }
exception { SET_YYLLOC(); return K_EXCEPTION; }
execute { SET_YYLLOC(); return K_EXECUTE; }
exit { SET_YYLLOC(); return K_EXIT; }
fetch { SET_YYLLOC(); return K_FETCH; }
for { SET_YYLLOC(); return K_FOR; }
from { SET_YYLLOC(); return K_FROM; }
get { SET_YYLLOC(); return K_GET; }
if { SET_YYLLOC(); return K_IF; }
in { SET_YYLLOC(); return K_IN; }
insert { SET_YYLLOC(); return K_INSERT; }
into { SET_YYLLOC(); return K_INTO; }
is { SET_YYLLOC(); return K_IS; }
loop { SET_YYLLOC(); return K_LOOP; }
move { SET_YYLLOC(); return K_MOVE; }
no{space}+scroll { SET_YYLLOC(); return K_NOSCROLL; }
not { SET_YYLLOC(); return K_NOT; }
null { SET_YYLLOC(); return K_NULL; }
open { SET_YYLLOC(); return K_OPEN; }
or { SET_YYLLOC(); return K_OR; }
perform { SET_YYLLOC(); return K_PERFORM; }
raise { SET_YYLLOC(); return K_RAISE; }
result_oid { SET_YYLLOC(); return K_RESULT_OID; }
return { SET_YYLLOC(); return K_RETURN; }
reverse { SET_YYLLOC(); return K_REVERSE; }
row_count { SET_YYLLOC(); return K_ROW_COUNT; }
scroll { SET_YYLLOC(); return K_SCROLL; }
strict { SET_YYLLOC(); return K_STRICT; }
then { SET_YYLLOC(); return K_THEN; }
to { SET_YYLLOC(); return K_TO; }
type { SET_YYLLOC(); return K_TYPE; }
using { SET_YYLLOC(); return K_USING; }
when { SET_YYLLOC(); return K_WHEN; }
while { SET_YYLLOC(); return K_WHILE; }
^#option { SET_YYLLOC(); return O_OPTION; }
dump { SET_YYLLOC(); return O_DUMP; }
/* ----------
* Special word rules
*
* We set plpgsql_error_lineno in each rule so that errors reported
* in the pl_comp.c subroutines will point to the right place.
* ----------
*/
{identifier} {
plpgsql_error_lineno = plpgsql_scanner_lineno ();
SET_YYLLOC ();
if (!plpgsql_LookupIdentifiers) return T_WORD;
return plpgsql_parse_word(yytext); }
{identifier}{space}*\.{space}*{identifier} {
plpgsql_error_lineno = plpgsql_scanner_lineno ();
SET_YYLLOC ();
if (!plpgsql_LookupIdentifiers) return T_DBLWORD;
return plpgsql_parse_dblword(yytext); }
{identifier}{space}*\.{space}*{identifier}{space}*\.{space}*{identifier} {
plpgsql_error_lineno = plpgsql_scanner_lineno ();
SET_YYLLOC ();
if (!plpgsql_LookupIdentifiers) return T_TRIPWORD;
return plpgsql_parse_tripword(yytext); }
{param} {
plpgsql_error_lineno = plpgsql_scanner_lineno ();
SET_YYLLOC ();
if (!plpgsql_LookupIdentifiers) return T_WORD;
return plpgsql_parse_word(yytext); }
{param}{space}*\.{space}*{identifier} {
plpgsql_error_lineno = plpgsql_scanner_lineno ();
SET_YYLLOC ();
if (!plpgsql_LookupIdentifiers) return T_DBLWORD;
return plpgsql_parse_dblword(yytext); }
{param}{space}*\.{space}*{identifier}{space}*\.{space}*{identifier} {
plpgsql_error_lineno = plpgsql_scanner_lineno ();
SET_YYLLOC ();
if (!plpgsql_LookupIdentifiers) return T_TRIPWORD;
return plpgsql_parse_tripword(yytext); }
{digit}+ { return T_NUMBER; }
\". { yyerror("unterminated quoted identifier"); }
{digit}+ { SET_YYLLOC(); return T_NUMBER; }
/* ----------
* Ignore whitespace (including comments) but remember this happened
* ----------
*/
{whitespace} { plpgsql_SpaceScanned = true; }
\". { SET_YYLLOC(); yyerror("unterminated quoted identifier"); }
/* ----------
* Comment and literal handling is mostly copied from the core lexer
* ----------
*/
{whitespace} {
/* ignore */
}
{xcstart} {
/* Set location in case of syntax error in comment */
SAVE_TOKEN_START();
SET_YYLLOC();
xcdepth = 0;
BEGIN(xc);
plpgsql_SpaceScanned = true;
}
<xc>{xcstart} {
@ -281,14 +273,14 @@ dump { return O_DUMP; }
<xc><<EOF>> { yyerror("unterminated /* comment"); }
{xqstart} {
SAVE_TOKEN_START ();
SET_YYLLOC ();
if (standard_conforming_strings)
BEGIN(xq);
else
BEGIN(xe);
}
{xestart} {
SAVE_TOKEN_START ();
SET_YYLLOC ();
BEGIN(xe);
}
<xq,xe>{quotestop} |
@ -296,8 +288,8 @@ dump { return O_DUMP; }
yyless(1);
BEGIN(INITIAL);
/* adjust yytext/yyleng to describe whole string token */
yyleng += (yytext - start_charpos );
yytext = start_charpos ;
yyleng += (yytext - (scanbuf + yylloc) );
yytext = scanbuf + yylloc ;
return T_STRING;
}
<xq,xe>{xqdouble} {
@ -317,7 +309,7 @@ dump { return O_DUMP; }
<xq,xe><<EOF>> { yyerror("unterminated quoted string"); }
{dolqdelim} {
SAVE_TOKEN_START ();
SET_YYLLOC ();
dolqstart = pstrdup(yytext);
BEGIN(xdolq);
}
@ -325,7 +317,7 @@ dump { return O_DUMP; }
/* throw back all but the initial "$" */
yyless(1);
/* and treat it as {other} */
return yytext[0];
SET_YYLLOC(); return yytext[0];
}
<xdolq>{dolqdelim} {
if (strcmp(yytext, dolqstart) == 0)
@ -333,8 +325,8 @@ dump { return O_DUMP; }
pfree(dolqstart);
BEGIN(INITIAL);
/* adjust yytext/yyleng to describe whole string */
yyleng += (yytext - start_charpos );
yytext = start_charpos ;
yyleng += (yytext - (scanbuf + yylloc) );
yytext = scanbuf + yylloc ;
return T_STRING;
}
else
@ -361,7 +353,7 @@ dump { return O_DUMP; }
* ----------
*/
. {
return yytext[0];
SET_YYLLOC(); return yytext[0];
}
%%
@ -369,9 +361,10 @@ dump { return O_DUMP; }
/*
* This is the yylex routine called from outside. It exists to provide
* a one-token pushback facility. Beware of trying to make it do more:
* for the most part, plpgsql's gram.y assumes that yytext is in step
* with the "current token".
* a one-token pushback facility. Beware of trying to push back more;
* for the most part, plpgsql's gram.y assumes that yytext and yylloc
* are in step with the "current token". In particular it is assumed that
* those are in step with the result immediately after any yylex() call.
*/
int
plpgsql_yylex(void)
@ -387,7 +380,8 @@ plpgsql_yylex(void)
/*
* Push back a single token to be re-read by next plpgsql_yylex() call.
*
* NOTE: this does not cause yytext to "back up".
* NOTE: this does not cause yytext or yylloc to "back up". Also, it
* is not a good idea to push back a token other than what you read.
*/
void
plpgsql_push_back_token(int token)
@ -399,18 +393,61 @@ plpgsql_push_back_token(int token)
}
/*
* Report a syntax error.
* Append the function text starting at startlocation and extending to
* (not including) endlocation onto the existing contents of "buf".
*/
void
plpgsql_yyerror(const char *message)
plpgsql_append_source_text(StringInfo buf,
int startlocation, int endlocation)
{
Assert(startlocation <= endlocation);
appendBinaryStringInfo(buf, scanorig + startlocation,
endlocation - startlocation);
}
/*
* plpgsql_scanner_errposition
* Report an error cursor position, if possible.
*
* This is expected to be used within an ereport() call. The return value
* is a dummy (always 0, in fact).
*
* Note that this can only be used for messages emitted during initial
* parsing of a plpgsql function, since it requires the scanorig string
* to still be available.
*/
int
plpgsql_scanner_errposition(int location)
{
const char *loc = yytext;
int cursorpos;
int pos;
plpgsql_error_lineno = plpgsql_scanner_lineno();
if (location < 0 || scanorig == NULL)
return 0; /* no-op if location is unknown */
/* in multibyte encodings, return index in characters not bytes */
cursorpos = pg_mbstrlen_with_len(scanbuf, loc - scanbuf) + 1;
/* Convert byte offset to character number */
pos = pg_mbstrlen_with_len(scanorig, location) + 1;
/* And pass it to the ereport mechanism */
(void) internalerrposition(pos);
/* Also pass the function body string */
return internalerrquery(scanorig);
}
/*
* plpgsql_yyerror
* Report a lexer or grammar error.
*
* The message's cursor position is whatever YYLLOC was last set to,
* ie, the start of the current token if called within yylex(), or the
* most recently lexed token if called from the grammar.
* This is OK for syntax error messages from the Bison parser, because Bison
* parsers report error as soon as the first unparsable token is reached.
* Beware of using yyerror for other purposes, as the cursor position might
* be misleading!
*/
void
plpgsql_yyerror(const char *message)
{
const char *loc = scanbuf + yylloc;
if (*loc == YY_END_OF_BUFFER_CHAR)
{
@ -418,8 +455,7 @@ plpgsql_yyerror(const char *message)
(errcode(ERRCODE_SYNTAX_ERROR),
/* translator: %s is typically the translation of "syntax error" */
errmsg("%s at end of input", _(message)),
internalerrposition(cursorpos),
internalerrquery(scanstr)));
plpgsql_scanner_errposition(yylloc)));
}
else
{
@ -427,33 +463,72 @@ plpgsql_yyerror(const char *message)
(errcode(ERRCODE_SYNTAX_ERROR),
/* translator: first %s is typically the translation of "syntax error" */
errmsg("%s at or near \"%s\"", _(message), loc),
internalerrposition(cursorpos),
internalerrquery(scanstr)));
plpgsql_scanner_errposition(yylloc)));
}
}
/*
* Get the line number at which the current token ends. This substitutes
* for flex's very poorly implemented yylineno facility .
* Given a location (a byte offset in the function source text),
* return a line number .
*
* We assume that flex has written a '\0' over the character following the
* current token in scanbuf. So, we just have to count the '\n' characters
* before that. We optimize this a little by keeping track of the last
* '\n' seen so far.
* We expect that this is typically called for a sequence of increasing
* location values, so optimize accordingly by tracking the endpoints
* of the "current" line.
*/
int
plpgsql_scanner_lineno(void )
plpgsql_location_to_lineno(int location )
{
const char *c;
const char *loc;
if (location < 0 || scanorig == NULL)
return 0; /* garbage in, garbage out */
loc = scanorig + location;
/* be correct, but not fast, if input location goes backwards */
if (loc < cur_line_start)
location_lineno_init();
while ((c = strchr(cur_line_start, '\n')) != NULL)
while (cur_line_end != NULL && loc > cur_line_end )
{
cur_line_start = c + 1;
cur_line_start = cur_line_end + 1;
cur_line_num++;
cur_line_end = strchr(cur_line_start, '\n');
}
return cur_line_num;
}
/* initialize or reset the state for plpgsql_location_to_lineno */
static void
location_lineno_init(void)
{
cur_line_start = scanorig;
cur_line_num = 1;
/*----------
* Hack: skip any initial newline, so that in the common coding layout
* CREATE FUNCTION ... AS $$
* code body
* $$ LANGUAGE plpgsql;
* we will think "line 1" is what the programmer thinks of as line 1.
*----------
*/
if (*cur_line_start == '\r')
cur_line_start++;
if (*cur_line_start == '\n')
cur_line_start++;
cur_line_end = strchr(cur_line_start, '\n');
}
/* return the most recently computed lineno */
int
plpgsql_latest_lineno(void)
{
return cur_line_num;
}
/*
* Called before any actual parsing is done
*
@ -464,48 +539,37 @@ plpgsql_scanner_lineno(void)
void
plpgsql_scanner_init(const char *str)
{
Size slen;
slen = strlen(str);
Size slen = strlen(str);
/*
* Might be left over after ereport()
* Reset flex internal state. Whatever data it might think it has
* has long since been pfree'd.
*/
if (YY_CURRENT_BUFFER)
yy_delete_buffer(YY_CURRENT_BUFFER);
yy_init_globals();
/*
* Make a scan buffer with special termination needed by flex.
*/
scanbuf = palloc(slen + 2);
scanbuf = (char *) palloc(slen + 2);
memcpy(scanbuf, str, slen);
scanbuf[slen] = scanbuf[slen + 1] = YY_END_OF_BUFFER_CHAR;
scanbufhandle = yy_scan_buffer(scanbuf, slen + 2);
/* Other setup */
scanstr = str;
/*
* scanorig points to the original string, which unlike scanbuf won't
* be modified on-the-fly by flex. Notice that although yytext points
* into scanbuf, we rely on being able to apply locations (offsets from
* string start) to scanorig as well.
*/
scanorig = str;
/* Other setup */
have_pushback_token = false;
cur_line_start = scanbuf;
cur_line_num = 1;
/*----------
* Hack: skip any initial newline, so that in the common coding layout
* CREATE FUNCTION ... AS '
* code body
* ' LANGUAGE plpgsql;
* we will think "line 1" is what the programmer thinks of as line 1.
*----------
*/
if (*cur_line_start == '\r')
cur_line_start++;
if (*cur_line_start == '\n')
cur_line_start++;
location_lineno_init();
BEGIN(INITIAL);
plpgsql_LookupIdentifiers = true;
plpgsql_SpaceScanned = false;
}
/*
@ -514,6 +578,38 @@ plpgsql_scanner_init(const char *str)
void
plpgsql_scanner_finish(void)
{
/* release storage */
yy_delete_buffer(scanbufhandle);
pfree(scanbuf);
/* avoid leaving any dangling pointers */
scanbufhandle = NULL;
scanbuf = NULL;
scanorig = NULL;
}
/*
* Interface functions to make flex use palloc() instead of malloc().
* It'd be better to make these static, but flex insists otherwise.
*/
void *
plpgsql_base_yyalloc(yy_size_t bytes)
{
return palloc(bytes);
}
void *
plpgsql_base_yyrealloc(void *ptr, yy_size_t bytes)
{
if (ptr)
return repalloc(ptr, bytes);
else
return palloc(bytes);
}
void
plpgsql_base_yyfree(void *ptr)
{
if (ptr)
pfree(ptr);
}