mirror of https://github.com/postgres/postgres
This gets us to a point where psqlscan.l can be used by other frontend programs for the same purpose psql uses it for, ie to detect when it's collected a complete SQL command from input that is divided across line boundaries. Moreover, other programs can supply their own lexers for backslash commands of their own choosing. A follow-on patch will use this in pgbench. The end result here is roughly the same as in Kyotaro Horiguchi's 0001-Make-SQL-parser-part-of-psqlscan-independent-from-ps.patch, although the details of the method for switching between lexers are quite different. Basically, in this patch we share the entire PsqlScanState, YY_BUFFER_STATE stack, *and* yyscan_t between different lexers. The only thing we need to do to switch to a different lexer is to make sure the start_state is valid for the new lexer. This works because flex doesn't keep any other persistent state that depends on the specific lexing tables generated for a particular .l file. (We are assuming that both lexers are built with the same flex version, or at least versions that are compatible with respect to the contents of yyscan_t; but that doesn't seem likely to be a big problem in practice, considering how slowly flex changes.) Aside from being more efficient than Horiguchi-san's original solution, this avoids possible corner-case changes in semantics: the original code was capable of popping the input buffer stack while still staying in backslash-related parsing states. I'm not sure that that equates to any useful user-visible behaviors, but I'm not sure it doesn't either, so I'm loath to assume that we only need to consider the topmost buffer when parsing a backslash command. I've attempted to update the MSVC build scripts for the added .l file, but will rely on the buildfarm to see if I missed anything. Kyotaro Horiguchi and Tom Lanepull/11/head
parent
27199058d9
commit
0ea9efbe9e
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,129 @@ |
||||
/*
|
||||
* psqlscan_int.h |
||||
* lexical scanner internal declarations |
||||
* |
||||
* This file declares the PsqlScanStateData structure used by psqlscan.l |
||||
* and shared by other lexers compatible with it, such as psqlscanslash.l. |
||||
* |
||||
* One difficult aspect of this code is that we need to work in multibyte |
||||
* encodings that are not ASCII-safe. A "safe" encoding is one in which each |
||||
* byte of a multibyte character has the high bit set (it's >= 0x80). Since |
||||
* all our lexing rules treat all high-bit-set characters alike, we don't |
||||
* really need to care whether such a byte is part of a sequence or not. |
||||
* In an "unsafe" encoding, we still expect the first byte of a multibyte |
||||
* sequence to be >= 0x80, but later bytes might not be. If we scan such |
||||
* a sequence as-is, the lexing rules could easily be fooled into matching |
||||
* such bytes to ordinary ASCII characters. Our solution for this is to |
||||
* substitute 0xFF for each non-first byte within the data presented to flex. |
||||
* The flex rules will then pass the FF's through unmolested. The |
||||
* psqlscan_emit() subroutine is responsible for looking back to the original |
||||
* string and replacing FF's with the corresponding original bytes. |
||||
* |
||||
* Another interesting thing we do here is scan different parts of the same |
||||
* input with physically separate flex lexers (ie, lexers written in separate |
||||
* .l files). We can get away with this because the only part of the |
||||
* persistent state of a flex lexer that depends on its parsing rule tables |
||||
* is the start state number, which is easy enough to manage --- usually, |
||||
* in fact, we just need to set it to INITIAL when changing lexers. But to |
||||
* make that work at all, we must use re-entrant lexers, so that all the |
||||
* relevant state is in the yyscanner_t attached to the PsqlScanState; |
||||
* if we were using lexers with separate static state we would soon end up |
||||
* with dangling buffer pointers in one or the other. Also note that this |
||||
* is unlikely to work very nicely if the lexers aren't all built with the |
||||
* same flex version. |
||||
* |
||||
* Copyright (c) 2000-2016, PostgreSQL Global Development Group |
||||
* |
||||
* src/bin/psql/psqlscan_int.h |
||||
*/ |
||||
#ifndef PSQLSCAN_INT_H |
||||
#define PSQLSCAN_INT_H |
||||
|
||||
#include "psqlscan.h" |
||||
|
||||
/* This is just to allow this file to be compilable standalone */ |
||||
#ifndef YY_TYPEDEF_YY_BUFFER_STATE |
||||
#define YY_TYPEDEF_YY_BUFFER_STATE |
||||
typedef struct yy_buffer_state *YY_BUFFER_STATE; |
||||
#endif |
||||
|
||||
/*
|
||||
* We use a stack of flex buffers to handle substitution of psql variables. |
||||
* Each stacked buffer contains the as-yet-unread text from one psql variable. |
||||
* When we pop the stack all the way, we resume reading from the outer buffer |
||||
* identified by scanbufhandle. |
||||
*/ |
||||
typedef struct StackElem |
||||
{ |
||||
YY_BUFFER_STATE buf; /* flex input control structure */ |
||||
char *bufstring; /* data actually being scanned by flex */ |
||||
char *origstring; /* copy of original data, if needed */ |
||||
char *varname; /* name of variable providing data, or NULL */ |
||||
struct StackElem *next; |
||||
} StackElem; |
||||
|
||||
/*
|
||||
* All working state of the lexer must be stored in PsqlScanStateData |
||||
* between calls. This allows us to have multiple open lexer operations, |
||||
* which is needed for nested include files. The lexer itself is not |
||||
* recursive, but it must be re-entrant. |
||||
*/ |
||||
typedef struct PsqlScanStateData |
||||
{ |
||||
yyscan_t scanner; /* Flex's state for this PsqlScanState */ |
||||
|
||||
PQExpBuffer output_buf; /* current output buffer */ |
||||
|
||||
StackElem *buffer_stack; /* stack of variable expansion buffers */ |
||||
|
||||
/*
|
||||
* These variables always refer to the outer buffer, never to any stacked |
||||
* variable-expansion buffer. |
||||
*/ |
||||
YY_BUFFER_STATE scanbufhandle; |
||||
char *scanbuf; /* start of outer-level input buffer */ |
||||
const char *scanline; /* current input line at outer level */ |
||||
|
||||
/* safe_encoding, curline, refline are used by emit() to replace FFs */ |
||||
int encoding; /* encoding being used now */ |
||||
bool safe_encoding; /* is current encoding "safe"? */ |
||||
bool std_strings; /* are string literals standard? */ |
||||
const char *curline; /* actual flex input string for cur buf */ |
||||
const char *refline; /* original data for cur buffer */ |
||||
|
||||
/*
|
||||
* All this state lives across successive input lines, until explicitly |
||||
* reset by psql_scan_reset. start_state is adopted by yylex() on entry, |
||||
* and updated with its finishing state on exit. |
||||
*/ |
||||
int start_state; /* yylex's starting/finishing state */ |
||||
int paren_depth; /* depth of nesting in parentheses */ |
||||
int xcdepth; /* depth of nesting in slash-star comments */ |
||||
char *dolqstart; /* current $foo$ quote start string */ |
||||
|
||||
/*
|
||||
* Callback functions provided by the program making use of the lexer. |
||||
*/ |
||||
const PsqlScanCallbacks *callbacks; |
||||
} PsqlScanStateData; |
||||
|
||||
|
||||
/*
|
||||
* Functions exported by psqlscan.l, but only meant for use within |
||||
* compatible lexers. |
||||
*/ |
||||
extern void psqlscan_push_new_buffer(PsqlScanState state, |
||||
const char *newstr, const char *varname); |
||||
extern void psqlscan_pop_buffer_stack(PsqlScanState state); |
||||
extern void psqlscan_select_top_buffer(PsqlScanState state); |
||||
extern YY_BUFFER_STATE psqlscan_prepare_buffer(PsqlScanState state, |
||||
const char *txt, int len, |
||||
char **txtcopy); |
||||
extern void psqlscan_emit(PsqlScanState state, const char *txt, int len); |
||||
extern char *psqlscan_extract_substring(PsqlScanState state, |
||||
const char *txt, int len); |
||||
extern void psqlscan_escape_variable(PsqlScanState state, |
||||
const char *txt, int len, |
||||
bool as_ident); |
||||
|
||||
#endif /* PSQLSCAN_INT_H */ |
||||
@ -0,0 +1,35 @@ |
||||
/*
|
||||
* psql - the PostgreSQL interactive terminal |
||||
* |
||||
* Copyright (c) 2000-2016, PostgreSQL Global Development Group |
||||
* |
||||
* src/bin/psql/psqlscanslash.h |
||||
*/ |
||||
#ifndef PSQLSCANSLASH_H |
||||
#define PSQLSCANSLASH_H |
||||
|
||||
#include "psqlscan.h" |
||||
|
||||
|
||||
/* Different ways for scan_slash_option to handle parameter words */ |
||||
enum slash_option_type |
||||
{ |
||||
OT_NORMAL, /* normal case */ |
||||
OT_SQLID, /* treat as SQL identifier */ |
||||
OT_SQLIDHACK, /* SQL identifier, but don't downcase */ |
||||
OT_FILEPIPE, /* it's a filename or pipe */ |
||||
OT_WHOLE_LINE, /* just snarf the rest of the line */ |
||||
OT_NO_EVAL /* no expansion of backticks or variables */ |
||||
}; |
||||
|
||||
|
||||
extern char *psql_scan_slash_command(PsqlScanState state); |
||||
|
||||
extern char *psql_scan_slash_option(PsqlScanState state, |
||||
enum slash_option_type type, |
||||
char *quote, |
||||
bool semicolon); |
||||
|
||||
extern void psql_scan_slash_command_end(PsqlScanState state); |
||||
|
||||
#endif /* PSQLSCANSLASH_H */ |
||||
@ -0,0 +1,735 @@ |
||||
%top{ |
||||
/*------------------------------------------------------------------------- |
||||
* |
||||
* psqlscanslash.l |
||||
* lexical scanner for psql backslash commands |
||||
* |
||||
* XXX Avoid creating backtracking cases --- see the backend lexer for info. |
||||
* |
||||
* See psqlscan_int.h for additional commentary. |
||||
* |
||||
* Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group |
||||
* Portions Copyright (c) 1994, Regents of the University of California |
||||
* |
||||
* IDENTIFICATION |
||||
* src/bin/psql/psqlscanslash.l |
||||
* |
||||
*------------------------------------------------------------------------- |
||||
*/ |
||||
#include "postgres_fe.h" |
||||
|
||||
#include "psqlscanslash.h" |
||||
|
||||
#include "libpq-fe.h" |
||||
} |
||||
|
||||
%{ |
||||
#include "psqlscan_int.h" |
||||
|
||||
/* |
||||
* Set the type of yyextra; we use it as a pointer back to the containing |
||||
* PsqlScanState. |
||||
*/ |
||||
#define YY_EXTRA_TYPE PsqlScanState |
||||
|
||||
/* |
||||
* These variables do not need to be saved across calls. Yeah, it's a bit |
||||
* of a hack, but putting them into PsqlScanStateData would be klugy too. |
||||
*/ |
||||
static enum slash_option_type option_type; |
||||
static char *option_quote; |
||||
static int unquoted_option_chars; |
||||
static int backtick_start_offset; |
||||
|
||||
|
||||
/* Return values from yylex() */ |
||||
#define LEXRES_EOL 0 /* end of input */ |
||||
#define LEXRES_OK 1 /* OK completion of backslash argument */ |
||||
|
||||
|
||||
static void evaluate_backtick(PsqlScanState state); |
||||
|
||||
#define ECHO psqlscan_emit(cur_state, yytext, yyleng) |
||||
|
||||
/* |
||||
* Work around a bug in flex 2.5.35: it emits a couple of functions that |
||||
* it forgets to emit declarations for. Since we use -Wmissing-prototypes, |
||||
* this would cause warnings. Providing our own declarations should be |
||||
* harmless even when the bug gets fixed. |
||||
*/ |
||||
extern int slash_yyget_column(yyscan_t yyscanner); |
||||
extern void slash_yyset_column(int column_no, yyscan_t yyscanner); |
||||
|
||||
%} |
||||
|
||||
%option reentrant |
||||
%option 8bit |
||||
%option never-interactive |
||||
%option nodefault |
||||
%option noinput |
||||
%option nounput |
||||
%option noyywrap |
||||
%option warn |
||||
%option prefix="slash_yy" |
||||
|
||||
/* |
||||
* OK, here is a short description of lex/flex rules behavior. |
||||
* The longest pattern which matches an input string is always chosen. |
||||
* For equal-length patterns, the first occurring in the rules list is chosen. |
||||
* INITIAL is the starting state, to which all non-conditional rules apply. |
||||
* Exclusive states change parsing rules while the state is active. When in |
||||
* an exclusive state, only those rules defined for that state apply. |
||||
*/ |
||||
|
||||
/* Exclusive states for lexing backslash commands */ |
||||
%x xslashcmd |
||||
%x xslashargstart |
||||
%x xslasharg |
||||
%x xslashquote |
||||
%x xslashbackquote |
||||
%x xslashdquote |
||||
%x xslashwholeline |
||||
%x xslashend |
||||
|
||||
/* |
||||
* Assorted character class definitions that should match psqlscan.l. |
||||
*/ |
||||
space [ \t\n\r\f] |
||||
quote ' |
||||
xeoctesc [\\][0-7]{1,3} |
||||
xehexesc [\\]x[0-9A-Fa-f]{1,2} |
||||
xqdouble {quote}{quote} |
||||
dquote \" |
||||
variable_char [A-Za-z\200-\377_0-9] |
||||
|
||||
other . |
||||
|
||||
%% |
||||
|
||||
%{ |
||||
/* Declare some local variables inside yylex(), for convenience */ |
||||
PsqlScanState cur_state = yyextra; |
||||
PQExpBuffer output_buf = cur_state->output_buf; |
||||
|
||||
/* |
||||
* Force flex into the state indicated by start_state. This has a |
||||
* couple of purposes: it lets some of the functions below set a |
||||
* new starting state without ugly direct access to flex variables, |
||||
* and it allows us to transition from one flex lexer to another |
||||
* so that we can lex different parts of the source string using |
||||
* separate lexers. |
||||
*/ |
||||
BEGIN(cur_state->start_state); |
||||
%} |
||||
|
||||
/* |
||||
* We don't really expect to be invoked in the INITIAL state in this |
||||
* lexer; but if we are, just spit data to the output_buf until EOF. |
||||
*/ |
||||
|
||||
{other}|\n { ECHO; } |
||||
|
||||
/* |
||||
* Exclusive lexer states to handle backslash command lexing |
||||
*/ |
||||
|
||||
<xslashcmd>{ |
||||
/* command name ends at whitespace or backslash; eat all else */ |
||||
|
||||
{space}|"\\" { |
||||
yyless(0); |
||||
cur_state->start_state = YY_START; |
||||
return LEXRES_OK; |
||||
} |
||||
|
||||
{other} { ECHO; } |
||||
|
||||
} |
||||
|
||||
<xslashargstart>{ |
||||
/* |
||||
* Discard any whitespace before argument, then go to xslasharg state. |
||||
* An exception is that "|" is only special at start of argument, so we |
||||
* check for it here. |
||||
*/ |
||||
|
||||
{space}+ { } |
||||
|
||||
"|" { |
||||
if (option_type == OT_FILEPIPE) |
||||
{ |
||||
/* treat like whole-string case */ |
||||
ECHO; |
||||
BEGIN(xslashwholeline); |
||||
} |
||||
else |
||||
{ |
||||
/* vertical bar is not special otherwise */ |
||||
yyless(0); |
||||
BEGIN(xslasharg); |
||||
} |
||||
} |
||||
|
||||
{other} { |
||||
yyless(0); |
||||
BEGIN(xslasharg); |
||||
} |
||||
|
||||
} |
||||
|
||||
<xslasharg>{ |
||||
/* |
||||
* Default processing of text in a slash command's argument. |
||||
* |
||||
* Note: unquoted_option_chars counts the number of characters at the |
||||
* end of the argument that were not subject to any form of quoting. |
||||
* psql_scan_slash_option needs this to strip trailing semicolons safely. |
||||
*/ |
||||
|
||||
{space}|"\\" { |
||||
/* |
||||
* Unquoted space is end of arg; do not eat. Likewise |
||||
* backslash is end of command or next command, do not eat |
||||
* |
||||
* XXX this means we can't conveniently accept options |
||||
* that include unquoted backslashes; therefore, option |
||||
* processing that encourages use of backslashes is rather |
||||
* broken. |
||||
*/ |
||||
yyless(0); |
||||
cur_state->start_state = YY_START; |
||||
return LEXRES_OK; |
||||
} |
||||
|
||||
{quote} { |
||||
*option_quote = '\''; |
||||
unquoted_option_chars = 0; |
||||
BEGIN(xslashquote); |
||||
} |
||||
|
||||
"`" { |
||||
backtick_start_offset = output_buf->len; |
||||
*option_quote = '`'; |
||||
unquoted_option_chars = 0; |
||||
BEGIN(xslashbackquote); |
||||
} |
||||
|
||||
{dquote} { |
||||
ECHO; |
||||
*option_quote = '"'; |
||||
unquoted_option_chars = 0; |
||||
BEGIN(xslashdquote); |
||||
} |
||||
|
||||
:{variable_char}+ { |
||||
/* Possible psql variable substitution */ |
||||
if (option_type == OT_NO_EVAL || |
||||
cur_state->callbacks->get_variable == NULL) |
||||
ECHO; |
||||
else |
||||
{ |
||||
char *varname; |
||||
char *value; |
||||
|
||||
varname = psqlscan_extract_substring(cur_state, |
||||
yytext + 1, |
||||
yyleng - 1); |
||||
value = cur_state->callbacks->get_variable(varname, |
||||
false, |
||||
false); |
||||
free(varname); |
||||
|
||||
/* |
||||
* The variable value is just emitted without any |
||||
* further examination. This is consistent with the |
||||
* pre-8.0 code behavior, if not with the way that |
||||
* variables are handled outside backslash commands. |
||||
* Note that we needn't guard against recursion here. |
||||
*/ |
||||
if (value) |
||||
{ |
||||
appendPQExpBufferStr(output_buf, value); |
||||
free(value); |
||||
} |
||||
else |
||||
ECHO; |
||||
|
||||
*option_quote = ':'; |
||||
} |
||||
unquoted_option_chars = 0; |
||||
} |
||||
|
||||
:'{variable_char}+' { |
||||
if (option_type == OT_NO_EVAL) |
||||
ECHO; |
||||
else |
||||
{ |
||||
psqlscan_escape_variable(cur_state, yytext, yyleng, false); |
||||
*option_quote = ':'; |
||||
} |
||||
unquoted_option_chars = 0; |
||||
} |
||||
|
||||
|
||||
:\"{variable_char}+\" { |
||||
if (option_type == OT_NO_EVAL) |
||||
ECHO; |
||||
else |
||||
{ |
||||
psqlscan_escape_variable(cur_state, yytext, yyleng, true); |
||||
*option_quote = ':'; |
||||
} |
||||
unquoted_option_chars = 0; |
||||
} |
||||
|
||||
:'{variable_char}* { |
||||
/* Throw back everything but the colon */ |
||||
yyless(1); |
||||
unquoted_option_chars++; |
||||
ECHO; |
||||
} |
||||
|
||||
:\"{variable_char}* { |
||||
/* Throw back everything but the colon */ |
||||
yyless(1); |
||||
unquoted_option_chars++; |
||||
ECHO; |
||||
} |
||||
|
||||
{other} { |
||||
unquoted_option_chars++; |
||||
ECHO; |
||||
} |
||||
|
||||
} |
||||
|
||||
<xslashquote>{ |
||||
/* |
||||
* single-quoted text: copy literally except for '' and backslash |
||||
* sequences |
||||
*/ |
||||
|
||||
{quote} { BEGIN(xslasharg); } |
||||
|
||||
{xqdouble} { appendPQExpBufferChar(output_buf, '\''); } |
||||
|
||||
"\\n" { appendPQExpBufferChar(output_buf, '\n'); } |
||||
"\\t" { appendPQExpBufferChar(output_buf, '\t'); } |
||||
"\\b" { appendPQExpBufferChar(output_buf, '\b'); } |
||||
"\\r" { appendPQExpBufferChar(output_buf, '\r'); } |
||||
"\\f" { appendPQExpBufferChar(output_buf, '\f'); } |
||||
|
||||
{xeoctesc} { |
||||
/* octal case */ |
||||
appendPQExpBufferChar(output_buf, |
||||
(char) strtol(yytext + 1, NULL, 8)); |
||||
} |
||||
|
||||
{xehexesc} { |
||||
/* hex case */ |
||||
appendPQExpBufferChar(output_buf, |
||||
(char) strtol(yytext + 2, NULL, 16)); |
||||
} |
||||
|
||||
"\\". { psqlscan_emit(cur_state, yytext + 1, 1); } |
||||
|
||||
{other}|\n { ECHO; } |
||||
|
||||
} |
||||
|
||||
<xslashbackquote>{ |
||||
/* |
||||
* backticked text: copy everything until next backquote, then evaluate. |
||||
* |
||||
* XXX Possible future behavioral change: substitute for :VARIABLE? |
||||
*/ |
||||
|
||||
"`" { |
||||
/* In NO_EVAL mode, don't evaluate the command */ |
||||
if (option_type != OT_NO_EVAL) |
||||
evaluate_backtick(cur_state); |
||||
BEGIN(xslasharg); |
||||
} |
||||
|
||||
{other}|\n { ECHO; } |
||||
|
||||
} |
||||
|
||||
<xslashdquote>{ |
||||
/* double-quoted text: copy verbatim, including the double quotes */ |
||||
|
||||
{dquote} { |
||||
ECHO; |
||||
BEGIN(xslasharg); |
||||
} |
||||
|
||||
{other}|\n { ECHO; } |
||||
|
||||
} |
||||
|
||||
<xslashwholeline>{ |
||||
/* copy everything until end of input line */ |
||||
/* but suppress leading whitespace */ |
||||
|
||||
{space}+ { |
||||
if (output_buf->len > 0) |
||||
ECHO; |
||||
} |
||||
|
||||
{other} { ECHO; } |
||||
|
||||
} |
||||
|
||||
<xslashend>{ |
||||
/* at end of command, eat a double backslash, but not anything else */ |
||||
|
||||
"\\\\" { |
||||
cur_state->start_state = YY_START; |
||||
return LEXRES_OK; |
||||
} |
||||
|
||||
{other}|\n { |
||||
yyless(0); |
||||
cur_state->start_state = YY_START; |
||||
return LEXRES_OK; |
||||
} |
||||
|
||||
} |
||||
|
||||
/* |
||||
* psql uses a single <<EOF>> rule, unlike the backend. |
||||
*/ |
||||
|
||||
<<EOF>> { |
||||
if (cur_state->buffer_stack == NULL) |
||||
{ |
||||
cur_state->start_state = YY_START; |
||||
return LEXRES_EOL; /* end of input reached */ |
||||
} |
||||
|
||||
/* |
||||
* We were expanding a variable, so pop the inclusion |
||||
* stack and keep lexing |
||||
*/ |
||||
psqlscan_pop_buffer_stack(cur_state); |
||||
psqlscan_select_top_buffer(cur_state); |
||||
} |
||||
|
||||
%% |
||||
|
||||
/* |
||||
* Scan the command name of a psql backslash command. This should be called |
||||
* after psql_scan() returns PSCAN_BACKSLASH. It is assumed that the input |
||||
* has been consumed through the leading backslash. |
||||
* |
||||
* The return value is a malloc'd copy of the command name, as parsed off |
||||
* from the input. |
||||
*/ |
||||
char * |
||||
psql_scan_slash_command(PsqlScanState state) |
||||
{ |
||||
PQExpBufferData mybuf; |
||||
|
||||
/* Must be scanning already */ |
||||
Assert(state->scanbufhandle != NULL); |
||||
|
||||
/* Build a local buffer that we'll return the data of */ |
||||
initPQExpBuffer(&mybuf); |
||||
|
||||
/* Set current output target */ |
||||
state->output_buf = &mybuf; |
||||
|
||||
/* Set input source */ |
||||
if (state->buffer_stack != NULL) |
||||
yy_switch_to_buffer(state->buffer_stack->buf, state->scanner); |
||||
else |
||||
yy_switch_to_buffer(state->scanbufhandle, state->scanner); |
||||
|
||||
/* |
||||
* Set lexer start state. Note that this is sufficient to switch |
||||
* state->scanner over to using the tables in this lexer file. |
||||
*/ |
||||
state->start_state = xslashcmd; |
||||
|
||||
/* And lex. */ |
||||
yylex(state->scanner); |
||||
|
||||
/* There are no possible errors in this lex state... */ |
||||
|
||||
/* |
||||
* In case the caller returns to using the regular SQL lexer, reselect the |
||||
* appropriate initial state. |
||||
*/ |
||||
psql_scan_reselect_sql_lexer(state); |
||||
|
||||
return mybuf.data; |
||||
} |
||||
|
||||
/* |
||||
* Parse off the next argument for a backslash command, and return it as a |
||||
* malloc'd string. If there are no more arguments, returns NULL. |
||||
* |
||||
* type tells what processing, if any, to perform on the option string; |
||||
* for example, if it's a SQL identifier, we want to downcase any unquoted |
||||
* letters. |
||||
* |
||||
* if quote is not NULL, *quote is set to 0 if no quoting was found, else |
||||
* the last quote symbol used in the argument. |
||||
* |
||||
* if semicolon is true, unquoted trailing semicolon(s) that would otherwise |
||||
* be taken as part of the option string will be stripped. |
||||
* |
||||
* NOTE: the only possible syntax errors for backslash options are unmatched |
||||
* quotes, which are detected when we run out of input. Therefore, on a |
||||
* syntax error we just throw away the string and return NULL; there is no |
||||
* need to worry about flushing remaining input. |
||||
*/ |
||||
char * |
||||
psql_scan_slash_option(PsqlScanState state, |
||||
enum slash_option_type type, |
||||
char *quote, |
||||
bool semicolon) |
||||
{ |
||||
PQExpBufferData mybuf; |
||||
int lexresult PG_USED_FOR_ASSERTS_ONLY; |
||||
int final_state; |
||||
char local_quote; |
||||
|
||||
/* Must be scanning already */ |
||||
Assert(state->scanbufhandle != NULL); |
||||
|
||||
if (quote == NULL) |
||||
quote = &local_quote; |
||||
*quote = 0; |
||||
|
||||
/* Build a local buffer that we'll return the data of */ |
||||
initPQExpBuffer(&mybuf); |
||||
|
||||
/* Set up static variables that will be used by yylex */ |
||||
option_type = type; |
||||
option_quote = quote; |
||||
unquoted_option_chars = 0; |
||||
|
||||
/* Set current output target */ |
||||
state->output_buf = &mybuf; |
||||
|
||||
/* Set input source */ |
||||
if (state->buffer_stack != NULL) |
||||
yy_switch_to_buffer(state->buffer_stack->buf, state->scanner); |
||||
else |
||||
yy_switch_to_buffer(state->scanbufhandle, state->scanner); |
||||
|
||||
/* Set lexer start state */ |
||||
if (type == OT_WHOLE_LINE) |
||||
state->start_state = xslashwholeline; |
||||
else |
||||
state->start_state = xslashargstart; |
||||
|
||||
/* And lex. */ |
||||
lexresult = yylex(state->scanner); |
||||
|
||||
/* Save final state for a moment... */ |
||||
final_state = state->start_state; |
||||
|
||||
/* |
||||
* In case the caller returns to using the regular SQL lexer, reselect the |
||||
* appropriate initial state. |
||||
*/ |
||||
psql_scan_reselect_sql_lexer(state); |
||||
|
||||
/* |
||||
* Check the lex result: we should have gotten back either LEXRES_OK |
||||
* or LEXRES_EOL (the latter indicating end of string). If we were inside |
||||
* a quoted string, as indicated by final_state, EOL is an error. |
||||
*/ |
||||
Assert(lexresult == LEXRES_EOL || lexresult == LEXRES_OK); |
||||
|
||||
switch (final_state) |
||||
{ |
||||
case xslashargstart: |
||||
/* empty arg */ |
||||
break; |
||||
case xslasharg: |
||||
/* Strip any unquoted trailing semi-colons if requested */ |
||||
if (semicolon) |
||||
{ |
||||
while (unquoted_option_chars-- > 0 && |
||||
mybuf.len > 0 && |
||||
mybuf.data[mybuf.len - 1] == ';') |
||||
{ |
||||
mybuf.data[--mybuf.len] = '\0'; |
||||
} |
||||
} |
||||
|
||||
/* |
||||
* If SQL identifier processing was requested, then we strip out |
||||
* excess double quotes and downcase unquoted letters. |
||||
* Doubled double-quotes become output double-quotes, per spec. |
||||
* |
||||
* Note that a string like FOO"BAR"BAZ will be converted to |
||||
* fooBARbaz; this is somewhat inconsistent with the SQL spec, |
||||
* which would have us parse it as several identifiers. But |
||||
* for psql's purposes, we want a string like "foo"."bar" to |
||||
* be treated as one option, so there's little choice. |
||||
*/ |
||||
if (type == OT_SQLID || type == OT_SQLIDHACK) |
||||
{ |
||||
bool inquotes = false; |
||||
char *cp = mybuf.data; |
||||
|
||||
while (*cp) |
||||
{ |
||||
if (*cp == '"') |
||||
{ |
||||
if (inquotes && cp[1] == '"') |
||||
{ |
||||
/* Keep the first quote, remove the second */ |
||||
cp++; |
||||
} |
||||
inquotes = !inquotes; |
||||
/* Collapse out quote at *cp */ |
||||
memmove(cp, cp + 1, strlen(cp)); |
||||
mybuf.len--; |
||||
/* do not advance cp */ |
||||
} |
||||
else |
||||
{ |
||||
if (!inquotes && type == OT_SQLID) |
||||
*cp = pg_tolower((unsigned char) *cp); |
||||
cp += PQmblen(cp, state->encoding); |
||||
} |
||||
} |
||||
} |
||||
break; |
||||
case xslashquote: |
||||
case xslashbackquote: |
||||
case xslashdquote: |
||||
/* must have hit EOL inside quotes */ |
||||
state->callbacks->write_error("unterminated quoted string\n"); |
||||
termPQExpBuffer(&mybuf); |
||||
return NULL; |
||||
case xslashwholeline: |
||||
/* always okay */ |
||||
break; |
||||
default: |
||||
/* can't get here */ |
||||
fprintf(stderr, "invalid YY_START\n"); |
||||
exit(1); |
||||
} |
||||
|
||||
/* |
||||
* An unquoted empty argument isn't possible unless we are at end of |
||||
* command. Return NULL instead. |
||||
*/ |
||||
if (mybuf.len == 0 && *quote == 0) |
||||
{ |
||||
termPQExpBuffer(&mybuf); |
||||
return NULL; |
||||
} |
||||
|
||||
/* Else return the completed string. */ |
||||
return mybuf.data; |
||||
} |
||||
|
||||
/* |
||||
* Eat up any unused \\ to complete a backslash command. |
||||
*/ |
||||
void |
||||
psql_scan_slash_command_end(PsqlScanState state) |
||||
{ |
||||
/* Must be scanning already */ |
||||
Assert(state->scanbufhandle != NULL); |
||||
|
||||
/* Set current output target */ |
||||
state->output_buf = NULL; /* we won't output anything */ |
||||
|
||||
/* Set input source */ |
||||
if (state->buffer_stack != NULL) |
||||
yy_switch_to_buffer(state->buffer_stack->buf, state->scanner); |
||||
else |
||||
yy_switch_to_buffer(state->scanbufhandle, state->scanner); |
||||
|
||||
/* Set lexer start state */ |
||||
state->start_state = xslashend; |
||||
|
||||
/* And lex. */ |
||||
yylex(state->scanner); |
||||
|
||||
/* There are no possible errors in this lex state... */ |
||||
|
||||
/* |
||||
* We expect the caller to return to using the regular SQL lexer, so |
||||
* reselect the appropriate initial state. |
||||
*/ |
||||
psql_scan_reselect_sql_lexer(state); |
||||
} |
||||
|
||||
/* |
||||
* Evaluate a backticked substring of a slash command's argument. |
||||
* |
||||
* The portion of output_buf starting at backtick_start_offset is evaluated |
||||
* as a shell command and then replaced by the command's output. |
||||
*/ |
||||
static void |
||||
evaluate_backtick(PsqlScanState state) |
||||
{ |
||||
PQExpBuffer output_buf = state->output_buf; |
||||
char *cmd = output_buf->data + backtick_start_offset; |
||||
PQExpBufferData cmd_output; |
||||
FILE *fd; |
||||
bool error = false; |
||||
char buf[512]; |
||||
size_t result; |
||||
|
||||
initPQExpBuffer(&cmd_output); |
||||
|
||||
fd = popen(cmd, PG_BINARY_R); |
||||
if (!fd) |
||||
{ |
||||
state->callbacks->write_error("%s: %s\n", cmd, strerror(errno)); |
||||
error = true; |
||||
} |
||||
|
||||
if (!error) |
||||
{ |
||||
do |
||||
{ |
||||
result = fread(buf, 1, sizeof(buf), fd); |
||||
if (ferror(fd)) |
||||
{ |
||||
state->callbacks->write_error("%s: %s\n", cmd, strerror(errno)); |
||||
error = true; |
||||
break; |
||||
} |
||||
appendBinaryPQExpBuffer(&cmd_output, buf, result); |
||||
} while (!feof(fd)); |
||||
} |
||||
|
||||
if (fd && pclose(fd) == -1) |
||||
{ |
||||
state->callbacks->write_error("%s: %s\n", cmd, strerror(errno)); |
||||
error = true; |
||||
} |
||||
|
||||
if (PQExpBufferDataBroken(cmd_output)) |
||||
{ |
||||
state->callbacks->write_error("%s: out of memory\n", cmd); |
||||
error = true; |
||||
} |
||||
|
||||
/* Now done with cmd, delete it from output_buf */ |
||||
output_buf->len = backtick_start_offset; |
||||
output_buf->data[output_buf->len] = '\0'; |
||||
|
||||
/* If no error, transfer result to output_buf */ |
||||
if (!error) |
||||
{ |
||||
/* strip any trailing newline */ |
||||
if (cmd_output.len > 0 && |
||||
cmd_output.data[cmd_output.len - 1] == '\n') |
||||
cmd_output.len--; |
||||
appendBinaryPQExpBuffer(output_buf, cmd_output.data, cmd_output.len); |
||||
} |
||||
|
||||
termPQExpBuffer(&cmd_output); |
||||
} |
||||
Loading…
Reference in new issue