Implement hex literal conversion to bit string literal.

May not be the long-term solution (some continuing discussion with
 Peter E.) but better than the current mapping of a conversion to integer
 which I'd put in years ago before we had any bit string types at all.
This is already supported in the bit string implementation elsewhere.
REL7_3_STABLE
Thomas G. Lockhart 23 years ago
parent ce5dc562e6
commit 043f9eb90a
  1. 74
      src/backend/parser/scan.l

@ -9,7 +9,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/parser/scan.l,v 1.97 2002/06/22 02:04:45 thomas Exp $ * $Header: /cvsroot/pgsql/src/backend/parser/scan.l,v 1.98 2002/08/04 06:36:18 thomas Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
@ -60,7 +60,7 @@ static char *litbufdup(void);
* When we parse a token that requires multiple lexer rules to process, * When we parse a token that requires multiple lexer rules to process,
* we set token_start to point at the true start of the token, for use * we set token_start to point at the true start of the token, for use
* by yyerror(). yytext will point at just the text consumed by the last * by yyerror(). yytext will point at just the text consumed by the last
* rule, so it's not very helpful (eg, it might contain just the last * rule, so it's not very helpful (e.g., it might contain just the last
* quote mark of a quoted identifier). But to avoid cluttering every rule * quote mark of a quoted identifier). But to avoid cluttering every rule
* with setting token_start, we allow token_start = NULL to denote that * with setting token_start, we allow token_start = NULL to denote that
* it's okay to use yytext. * it's okay to use yytext.
@ -93,10 +93,10 @@ unsigned char unescape_single_char(unsigned char c);
* and to eliminate parsing troubles for numeric strings. * and to eliminate parsing troubles for numeric strings.
* Exclusive states: * Exclusive states:
* <xb> bit string literal * <xb> bit string literal
* <xc> extended C-style comments - thomas 1997-07-12 * <xc> extended C-style comments
* <xd> delimited identifiers (double-quoted identifiers) - thomas 1997-10-27 * <xd> delimited identifiers (double-quoted identifiers)
* <xh> hexadecimal numeric string - thomas 1997-11-16 * <xh> hexadecimal numeric string
* <xq> quoted strings - thomas 1997-07-30 * <xq> quoted strings
*/ */
%x xb %x xb
@ -106,6 +106,13 @@ unsigned char unescape_single_char(unsigned char c);
%x xq %x xq
/* Bit string /* Bit string
* It is tempting to scan the string for only those characters
* which are allowed. However, this leads to silently swallowed
* characters if illegal characters are included in the string.
* For example, if xbinside is [01] then B'ABCD' is interpreted
* as a zero-length string, and the ABCD' is lost!
* Better to pass the string forward and let the input routines
* validate the contents.
*/ */
xbstart [bB]{quote} xbstart [bB]{quote}
xbstop {quote} xbstop {quote}
@ -116,7 +123,7 @@ xbcat {quote}{whitespace_with_newline}{quote}
*/ */
xhstart [xX]{quote} xhstart [xX]{quote}
xhstop {quote} xhstop {quote}
xhinside [^']+ xhinside [^']*
xhcat {quote}{whitespace_with_newline}{quote} xhcat {quote}{whitespace_with_newline}{quote}
/* National character /* National character
@ -244,7 +251,7 @@ other .
* style of two adjacent single quotes "''" and in the Postgres/Java style * style of two adjacent single quotes "''" and in the Postgres/Java style
* of escaped-quote "\'". * of escaped-quote "\'".
* Other embedded escaped characters are matched explicitly and the leading * Other embedded escaped characters are matched explicitly and the leading
* backslash is dropped from the string. - thomas 1997-09-24 * backslash is dropped from the string.
* Note that xcstart must appear before operator, as explained above! * Note that xcstart must appear before operator, as explained above!
* Also whitespace (comment) must appear before operator. * Also whitespace (comment) must appear before operator.
*/ */
@ -291,8 +298,10 @@ other .
{xbstart} { {xbstart} {
/* Binary bit type. /* Binary bit type.
* Should be passing the type forward into the parser * At some point we should simply pass the string
* rather than trying to embed it into the string. * forward to the parser and label it there.
* In the meantime, place a leading "b" on the string
* to mark it for the input routine as a binary string.
*/ */
token_start = yytext; token_start = yytext;
BEGIN(xb); BEGIN(xb);
@ -301,10 +310,8 @@ other .
} }
<xb>{xbstop} { <xb>{xbstop} {
BEGIN(INITIAL); BEGIN(INITIAL);
if (literalbuf[strspn(literalbuf + 1, "01") + 1] != '\0')
yyerror("invalid bit string input");
yylval.str = litbufdup(); yylval.str = litbufdup();
return BITCONST; return BCONST;
} }
<xh>{xhinside} | <xh>{xhinside} |
<xb>{xbinside} { <xb>{xbinside} {
@ -314,44 +321,43 @@ other .
<xb>{xbcat} { <xb>{xbcat} {
/* ignore */ /* ignore */
} }
<xb><<EOF>> { yyerror("unterminated bit string literal"); } <xb><<EOF>> {
yyerror("unterminated bit string literal");
}
{xhstart} { {xhstart} {
/* Hexadecimal bit type. /* Hexadecimal bit type.
* Should be passing the type forward into the parser * At some point we should simply pass the string
* rather than trying to embed it into the string. * forward to the parser and label it there.
* In the meantime, place a leading "x" on the string
* to mark it for the input routine as a hex string.
*/ */
token_start = yytext; token_start = yytext;
BEGIN(xh); BEGIN(xh);
startlit(); startlit();
addlitchar('x');
} }
<xh>{xhstop} { <xh>{xhstop} {
long val;
char* endptr;
BEGIN(INITIAL); BEGIN(INITIAL);
errno = 0; yylval.str = litbufdup();
val = strtol(literalbuf, &endptr, 16); return XCONST;
if (*endptr != '\0' || errno == ERANGE
#ifdef HAVE_LONG_INT_64
/* if long > 32 bits, check for overflow of int4 */
|| val != (long) ((int32) val)
#endif
)
yyerror("bad hexadecimal integer input");
yylval.ival = val;
return ICONST;
} }
<xh><<EOF>> { yyerror("unterminated hexadecimal integer"); } <xh><<EOF>> { yyerror("unterminated hexadecimal string literal"); }
{xnstart} { {xnstart} {
/* National character. /* National character.
* Need to remember type info to flow it forward into the parser. * We will pass this along as a normal character string,
* Not yet implemented. - thomas 2002-06-17 * but preceded with an internally-generated "NCHAR".
*/ */
const ScanKeyword *keyword;
/* This had better be a keyword! */
keyword = ScanKeywordLookup("nchar");
Assert(keyword != NULL);
yylval.keyword = keyword->name;
token_start = yytext; token_start = yytext;
BEGIN(xq); BEGIN(xq);
startlit(); startlit();
return keyword->value;
} }

Loading…
Cancel
Save