|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
*
|
|
|
|
* scanner.h
|
|
|
|
* API for the core scanner (flex machine)
|
|
|
|
*
|
|
|
|
* The core scanner is also used by PL/pgsql, so we provide a public API
|
|
|
|
* for it. However, the rest of the backend is only expected to use the
|
|
|
|
* higher-level API provided by parser.h.
|
|
|
|
*
|
|
|
|
*
|
|
|
|
* Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
|
|
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
|
|
|
*
|
|
|
|
* src/include/parser/scanner.h
|
|
|
|
*
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef SCANNER_H
|
|
|
|
#define SCANNER_H
|
|
|
|
|
|
|
|
#include "parser/keywords.h"
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The scanner returns extra data about scanned tokens in this union type.
|
|
|
|
* Note that this is a subset of the fields used in YYSTYPE of the bison
|
|
|
|
* parsers built atop the scanner.
|
|
|
|
*/
|
|
|
|
typedef union core_YYSTYPE
|
|
|
|
{
|
|
|
|
int ival; /* for integer literals */
|
|
|
|
char *str; /* for identifiers and non-integer literals */
|
|
|
|
const char *keyword; /* canonical spelling of keywords */
|
|
|
|
} core_YYSTYPE;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We track token locations in terms of byte offsets from the start of the
|
|
|
|
* source string, not the column number/line number representation that
|
|
|
|
* bison uses by default. Also, to minimize overhead we track only one
|
|
|
|
* location (usually the first token location) for each construct, not
|
|
|
|
* the beginning and ending locations as bison does by default. It's
|
|
|
|
* therefore sufficient to make YYLTYPE an int.
|
|
|
|
*/
|
|
|
|
#define YYLTYPE int
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Another important component of the scanner's API is the token code numbers.
|
|
|
|
* However, those are not defined in this file, because bison insists on
|
|
|
|
* defining them for itself. The token codes used by the core scanner are
|
|
|
|
* the ASCII characters plus these:
|
|
|
|
* %token <str> IDENT FCONST SCONST BCONST XCONST Op
|
|
|
|
* %token <ival> ICONST PARAM
|
|
|
|
* %token TYPECAST DOT_DOT COLON_EQUALS
|
|
|
|
* The above token definitions *must* be the first ones declared in any
|
|
|
|
* bison parser built atop this scanner, so that they will have consistent
|
|
|
|
* numbers assigned to them (specifically, IDENT = 258 and so on).
|
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The YY_EXTRA data that a flex scanner allows us to pass around.
|
|
|
|
* Private state needed by the core scanner goes here. Note that the actual
|
|
|
|
* yy_extra struct may be larger and have this as its first component, thus
|
|
|
|
* allowing the calling parser to keep some fields of its own in YY_EXTRA.
|
|
|
|
*/
|
|
|
|
typedef struct core_yy_extra_type
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* The string the scanner is physically scanning. We keep this mainly so
|
|
|
|
* that we can cheaply compute the offset of the current token (yytext).
|
|
|
|
*/
|
|
|
|
char *scanbuf;
|
|
|
|
Size scanbuflen;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The keyword list to use.
|
|
|
|
*/
|
|
|
|
const ScanKeyword *keywords;
|
|
|
|
int num_keywords;
|
|
|
|
|
Prevent duplicate escape-string warnings when using pg_stat_statements.
contrib/pg_stat_statements will sometimes run the core lexer a second time
on submitted statements. Formerly, if you had standard_conforming_strings
turned off, this led to sometimes getting two copies of any warnings
enabled by escape_string_warning. While this is probably no longer a big
deal in the field, it's a pain for regression testing.
To fix, change the lexer so it doesn't consult the escape_string_warning
GUC variable directly, but looks at a copy in the core_yy_extra_type state
struct. Then, pg_stat_statements can change that copy to disable warnings
while it's redoing the lexing.
It seemed like a good idea to make this happen for all three of the GUCs
consulted by the lexer, not just escape_string_warning. There's not an
immediate use-case for callers to adjust the other two AFAIK, but making
it possible is easy enough and seems like good future-proofing.
Arguably this is a bug fix, but there doesn't seem to be enough interest to
justify a back-patch. We'd not be able to back-patch exactly as-is anyway,
for fear of breaking ABI compatibility of the struct. (We could perhaps
back-patch the addition of only escape_string_warning by adding it at the
end of the struct, where there's currently alignment padding space.)
11 years ago
|
|
|
/*
|
|
|
|
* Scanner settings to use. These are initialized from the corresponding
|
|
|
|
* GUC variables by scanner_init(). Callers can modify them after
|
|
|
|
* scanner_init() if they don't want the scanner's behavior to follow the
|
|
|
|
* prevailing GUC settings.
|
|
|
|
*/
|
|
|
|
int backslash_quote;
|
|
|
|
bool escape_string_warning;
|
|
|
|
bool standard_conforming_strings;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* literalbuf is used to accumulate literal values when multiple rules are
|
|
|
|
* needed to parse a single literal. Call startlit() to reset buffer to
|
|
|
|
* empty, addlit() to add text. NOTE: the string in literalbuf is NOT
|
|
|
|
* necessarily null-terminated, but there always IS room to add a trailing
|
|
|
|
* null at offset literallen. We store a null only when we need it.
|
|
|
|
*/
|
|
|
|
char *literalbuf; /* palloc'd expandable buffer */
|
|
|
|
int literallen; /* actual current string length */
|
|
|
|
int literalalloc; /* current allocated buffer size */
|
|
|
|
|
|
|
|
int xcdepth; /* depth of nesting in slash-star comments */
|
|
|
|
char *dolqstart; /* current $foo$ quote start string */
|
|
|
|
|
|
|
|
/* first part of UTF16 surrogate pair for Unicode escapes */
|
|
|
|
int32 utf16_first_part;
|
|
|
|
|
|
|
|
/* state variables for literal-lexing warnings */
|
|
|
|
bool warn_on_first_escape;
|
|
|
|
bool saw_non_ascii;
|
|
|
|
} core_yy_extra_type;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The type of yyscanner is opaque outside scan.l.
|
|
|
|
*/
|
|
|
|
typedef void *core_yyscan_t;
|
|
|
|
|
|
|
|
|
|
|
|
/* Entry points in parser/scan.l */
|
|
|
|
extern core_yyscan_t scanner_init(const char *str,
|
|
|
|
core_yy_extra_type *yyext,
|
|
|
|
const ScanKeyword *keywords,
|
|
|
|
int num_keywords);
|
|
|
|
extern void scanner_finish(core_yyscan_t yyscanner);
|
|
|
|
extern int core_yylex(core_YYSTYPE *lvalp, YYLTYPE *llocp,
|
|
|
|
core_yyscan_t yyscanner);
|
|
|
|
extern int scanner_errposition(int location, core_yyscan_t yyscanner);
|
|
|
|
extern void scanner_yyerror(const char *message, core_yyscan_t yyscanner) __attribute__((noreturn));
|
|
|
|
|
|
|
|
#endif /* SCANNER_H */
|