From 928311a463d480ca566e2905a369ac6aa0c3e210 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Fri, 26 Aug 2011 13:52:23 -0400 Subject: [PATCH] Clean up weird corner cases in lexing of psql meta-command arguments. These changes allow backtick command evaluation and psql variable interpolation to happen on substrings of a single meta-command argument. Formerly, no such evaluations happened at all if the backtick or colon wasn't the first character of the argument, and we considered an argument completed as soon as we'd processed one backtick, variable reference, or quoted substring. A string like 'FOO'BAR was thus taken as two arguments not one, not exactly what one would expect. In the new coding, an argument is considered terminated only by unquoted whitespace or backslash. Also, clean up a bunch of omissions, infelicities and outright errors in the psql documentation of variables and metacommand argument syntax. --- doc/src/sgml/ref/psql-ref.sgml | 171 +++++++++------- src/bin/psql/command.c | 2 +- src/bin/psql/psqlscan.h | 2 +- src/bin/psql/psqlscan.l | 347 ++++++++++++++++----------------- 4 files changed, 275 insertions(+), 247 deletions(-) diff --git a/doc/src/sgml/ref/psql-ref.sgml b/doc/src/sgml/ref/psql-ref.sgml index 7e30c57c35..b20d64cc05 100644 --- a/doc/src/sgml/ref/psql-ref.sgml +++ b/doc/src/sgml/ref/psql-ref.sgml @@ -156,8 +156,8 @@ PostgreSQL documentation Use the file filename as the source of commands instead of reading commands interactively. After the file is processed, psql - terminates. This is in many ways equivalent to the internal - command \i. + terminates. This is in many ways equivalent to the meta-command + \i. @@ -223,7 +223,7 @@ PostgreSQL documentation List all available databases, then exit. Other non-connection - options are ignored. This is similar to the internal command + options are ignored. This is similar to the meta-command \list. @@ -393,9 +393,9 @@ PostgreSQL documentation Perform a variable assignment, like the \set - internal command. Note that you must separate name and value, if + meta-command. Note that you must separate name and value, if any, by an equal sign on the command line. To unset a variable, - leave off the equal sign. To just set a variable without a value, + leave off the equal sign. To set a variable with an empty value, use the equal sign but leave off the value. These assignments are done during a very early stage of start-up, so variables reserved for internal purposes might get overwritten later. @@ -659,32 +659,32 @@ testdb=> - To include whitespace into an argument you can quote it with a - single quote. To include a single quote into such an argument, - use two single quotes. Anything contained in single quotes is + To include whitespace in an argument you can quote it with + single quotes. To include a single quote in an argument, + write two single quotes within single-quoted text. + Anything contained in single quotes is furthermore subject to C-like substitutions for \n (new line), \t (tab), + \b (backspace), \r (carriage return), + \f (form feed), \digits (octal), and \xdigits (hexadecimal). + A backslash preceding any other character within single-quoted text + quotes that single character, whatever it is. - If an unquoted argument begins with a colon (:), - it is taken as a psql variable and the value of the - variable is used as the argument instead. If the variable name is - surrounded by single quotes (e.g. :'var'), it - will be escaped as an SQL literal and the result will be used as - the argument. If the variable name is surrounded by double quotes, - it will be escaped as an SQL identifier and the result will be used - as the argument. + Within an argument, text that is enclosed in backquotes + (`) is taken as a command line that is passed to the + shell. The output of the command (with any trailing newline removed) + replaces the backquoted text. - Arguments that are enclosed in backquotes (`) - are taken as a command line that is passed to the shell. The - output of the command (with any trailing newline removed) is taken - as the argument value. The above escape sequences also apply in - backquotes. + If an unquoted colon (:) followed by a + psql variable name appears within an argument, it is + replaced by the variable's value, as described in . @@ -1803,15 +1803,16 @@ lo_import 152801 \prompt [ text ] name - Prompts the user to set variable name. An optional prompt, name. + An optional prompt string, text, can be specified. (For multiword - prompts, use single quotes.) + prompts, surround the text with single quotes.) By default, \prompt uses the terminal for input and - output. However, if the @@ -2197,14 +2198,19 @@ lo_import 152801 - Sets the internal variable psql variable name to value or, if more than one value - is given, to the concatenation of all of them. If no second - argument is given, the variable is just set with no value. To + class="parameter">value, or if more than one value + is given, to the concatenation of all of them. If only one + argument is given, the variable is set with an empty value. To unset a variable, use the \unset command. + + \set without any arguments displays the names and values + of all currently-set psql variables. + + Valid variable names can contain letters, digits, and underscores. See the section - This command is totally separate from the SQL + This command is unrelated to the SQL command . @@ -2293,6 +2299,18 @@ lo_import 152801 + + \unset name + + + + Unsets (deletes) the psql variable name. + + + + + \w filename \w |command @@ -2467,18 +2485,28 @@ lo_import 152801 To set a variable, use the psql meta-command - \set: + \set. For example, testdb=> \set foo bar sets the variable foo to the value bar. To retrieve the content of the variable, precede - the name with a colon and use it as the argument of any slash - command: + the name with a colon, for example: testdb=> \echo :foo bar - + + This works in both regular SQL commands and meta-commands; there is + more detail in , below. + + + + If you call \set without a second argument, the + variable is set, with an empty string as value. To unset (i.e., delete) + a variable, use the command \unset. To show the + values of all variables, call \set without any argument. + @@ -2495,12 +2523,6 @@ bar - - If you call \set without a second argument, the - variable is set, with an empty string as value. To unset (or delete) a - variable, use the command \unset. - - A number of these variables are treated specially by psql. They represent certain option @@ -2863,47 +2885,57 @@ bar - - <acronym>SQL</acronym> Interpolation + + <acronym>SQL</acronym> Interpolation - An additional useful feature of psql + A key feature of psql variables is that you can substitute (interpolate) - them into regular SQL statements. - psql provides special facilities for - ensuring that values used as SQL literals and identifiers are - properly escaped. The syntax for interpolating a value without - any special escaping is again to prepend the variable name with a colon - (:): + them into regular SQL statements, as well as the + arguments of meta-commands. Furthermore, + psql provides facilities for + ensuring that variable values used as SQL literals and identifiers are + properly quoted. The syntax for interpolating a value without + any quoting is to prepend the variable name with a colon + (:). For example, testdb=> \set foo 'my_table' testdb=> SELECT * FROM :foo; - would then query the table my_table. Note that this + would query the table my_table. Note that this may be unsafe: the value of the variable is copied literally, so it can - even contain unbalanced quotes or backslash commands. You must make sure + contain unbalanced quotes, or even backslash commands. You must make sure that it makes sense where you put it. When a value is to be used as an SQL literal or identifier, it is - safest to arrange for it to be escaped. To escape the value of + safest to arrange for it to be quoted. To quote the value of a variable as an SQL literal, write a colon followed by the variable - name in single quotes. To escape the value an SQL identifier, write - a colon followed by the variable name in double quotes. The previous - example would be more safely written this way: + name in single quotes. To quote the value as an SQL identifier, write + a colon followed by the variable name in double quotes. + These constructs deal correctly with quotes and other special + characters embedded within the variable value. + The previous example would be more safely written this way: testdb=> \set foo 'my_table' testdb=> SELECT * FROM :"foo"; - Variable interpolation will not be performed into quoted - SQL entities. - One possible use of this mechanism is to - copy the contents of a file into a table column. First load the file into a - variable and then proceed as above: + Variable interpolation will not be performed within quoted + SQL literals and identifiers. Therefore, a + construction such as ':foo' doesn't work to produce a quoted + literal from a variable's value (and it would be unsafe if it did work, + since it wouldn't correctly handle quotes embedded in the value). + + + + One example use of this mechanism is to + copy the contents of a file into a table column. + First load the file into a variable and then interpolate the variable's + value as a quoted string: testdb=> \set content `cat my_file.txt` testdb=> INSERT INTO my_table VALUES (:'content'); @@ -2914,17 +2946,20 @@ testdb=> INSERT INTO my_table VALUES (:'content'); Since colons can legally appear in SQL commands, an apparent attempt - at interpolation (such as :name, + at interpolation (that is, :name, :'name', or :"name") is not - changed unless the named variable is currently set. In any case, you + replaced unless the named variable is currently set. In any case, you can escape a colon with a backslash to protect it from substitution. - (The colon syntax for variables is standard SQL for + + + + The colon syntax for variables is standard SQL for embedded query languages, such as ECPG. - The colon syntax for array slices and type casts are - PostgreSQL extensions, hence the - conflict. The colon syntax for escaping a variable's value as an - SQL literal or identifier is a psql - extension.) + The colon syntaxes for array slices and type casts are + PostgreSQL extensions, which can sometimes + conflict with the standard usage. The colon-quote syntax for escaping a + variable's value as an SQL literal or identifier is a + psql extension. diff --git a/src/bin/psql/command.c b/src/bin/psql/command.c index 6d9cd6492f..2c389021be 100644 --- a/src/bin/psql/command.c +++ b/src/bin/psql/command.c @@ -121,7 +121,7 @@ HandleSlashCmds(PsqlScanState scan_state, /* eat any remaining arguments after a valid command */ /* note we suppress evaluation of backticks here */ while ((arg = psql_scan_slash_option(scan_state, - OT_VERBATIM, NULL, false))) + OT_NO_EVAL, NULL, false))) { psql_error("\\%s: extra argument \"%s\" ignored\n", cmd, arg); free(arg); diff --git a/src/bin/psql/psqlscan.h b/src/bin/psql/psqlscan.h index b2545d0ebf..6264def953 100644 --- a/src/bin/psql/psqlscan.h +++ b/src/bin/psql/psqlscan.h @@ -33,7 +33,7 @@ enum slash_option_type OT_SQLIDHACK, /* SQL identifier, but don't downcase */ OT_FILEPIPE, /* it's a filename or pipe */ OT_WHOLE_LINE, /* just snarf the rest of the line */ - OT_VERBATIM /* literal (no backticks or variables) */ + OT_NO_EVAL /* no expansion of backticks or variables */ }; diff --git a/src/bin/psql/psqlscan.l b/src/bin/psql/psqlscan.l index 1df8f3aa4f..d4a9d94a43 100644 --- a/src/bin/psql/psqlscan.l +++ b/src/bin/psql/psqlscan.l @@ -103,6 +103,8 @@ static PQExpBuffer output_buf; /* current output buffer */ /* these variables do not need to be saved across calls */ static enum slash_option_type option_type; static char *option_quote; +static int unquoted_option_chars; +static int backtick_start_offset; /* Return values from yylex() */ @@ -114,6 +116,7 @@ static char *option_quote; int yylex(void); +static void evaluate_backtick(void); static void push_new_buffer(const char *newstr, const char *varname); static void pop_buffer_stack(PsqlScanState state); static bool var_is_current_source(PsqlScanState state, const char *varname); @@ -182,11 +185,11 @@ static void escape_variable(bool as_ident); %x xus /* Additional exclusive states for psql only: lex backslash commands */ %x xslashcmd +%x xslashargstart %x xslasharg %x xslashquote %x xslashbackquote -%x xslashdefaultarg -%x xslashquotedarg +%x xslashdquote %x xslashwholeline %x xslashend @@ -900,17 +903,53 @@ other . } -{ - /* eat any whitespace, then decide what to do at first nonblank */ +{ + /* + * Discard any whitespace before argument, then go to xslasharg state. + * An exception is that "|" is only special at start of argument, so we + * check for it here. + */ {space}+ { } -"\\" { +"|" { + if (option_type == OT_FILEPIPE) + { + /* treat like whole-string case */ + ECHO; + BEGIN(xslashwholeline); + } + else + { + /* vertical bar is not special otherwise */ + yyless(0); + BEGIN(xslasharg); + } + } + +{other} { + yyless(0); + BEGIN(xslasharg); + } + +} + +{ + /* + * Default processing of text in a slash command's argument. + * + * Note: unquoted_option_chars counts the number of characters at the + * end of the argument that were not subject to any form of quoting. + * psql_scan_slash_option needs this to strip trailing semicolons safely. + */ + +{space}|"\\" { /* + * Unquoted space is end of arg; do not eat. Likewise * backslash is end of command or next command, do not eat * * XXX this means we can't conveniently accept options - * that start with a backslash; therefore, option + * that include unquoted backslashes; therefore, option * processing that encourages use of backslashes is rather * broken. */ @@ -920,26 +959,27 @@ other . {quote} { *option_quote = '\''; + unquoted_option_chars = 0; BEGIN(xslashquote); } "`" { - if (option_type == OT_VERBATIM) - { - /* in verbatim mode, backquote is not special */ - ECHO; - BEGIN(xslashdefaultarg); - } - else - { - *option_quote = '`'; - BEGIN(xslashbackquote); - } + backtick_start_offset = output_buf->len; + *option_quote = '`'; + unquoted_option_chars = 0; + BEGIN(xslashbackquote); + } + +{dquote} { + ECHO; + *option_quote = '"'; + unquoted_option_chars = 0; + BEGIN(xslashdquote); } :{variable_char}+ { /* Possible psql variable substitution */ - if (option_type == OT_VERBATIM) + if (option_type == OT_NO_EVAL) ECHO; else { @@ -959,71 +999,54 @@ other . */ if (value) appendPQExpBufferStr(output_buf, value); - } - - *option_quote = ':'; + else + ECHO; - return LEXRES_OK; + *option_quote = ':'; + } + unquoted_option_chars = 0; } :'{variable_char}+' { - if (option_type == OT_VERBATIM) + if (option_type == OT_NO_EVAL) ECHO; else { escape_variable(false); - return LEXRES_OK; + *option_quote = ':'; } + unquoted_option_chars = 0; } :\"{variable_char}+\" { - if (option_type == OT_VERBATIM) + if (option_type == OT_NO_EVAL) ECHO; else { escape_variable(true); - return LEXRES_OK; + *option_quote = ':'; } + unquoted_option_chars = 0; } :'{variable_char}* { /* Throw back everything but the colon */ yyless(1); + unquoted_option_chars++; ECHO; - BEGIN(xslashdefaultarg); } :\"{variable_char}* { /* Throw back everything but the colon */ yyless(1); + unquoted_option_chars++; ECHO; - BEGIN(xslashdefaultarg); - } - -"|" { - ECHO; - if (option_type == OT_FILEPIPE) - { - /* treat like whole-string case */ - BEGIN(xslashwholeline); - } - else - { - /* treat like default case */ - BEGIN(xslashdefaultarg); - } - } - -{dquote} { - *option_quote = '"'; - ECHO; - BEGIN(xslashquotedarg); } {other} { + unquoted_option_chars++; ECHO; - BEGIN(xslashdefaultarg); } } @@ -1034,7 +1057,7 @@ other . * sequences */ -{quote} { return LEXRES_OK; } +{quote} { BEGIN(xslasharg); } {xqdouble} { appendPQExpBufferChar(output_buf, '\''); } @@ -1064,55 +1087,28 @@ other . { /* - * backticked text: copy everything until next backquote or end of line. - * Invocation of the command will happen in psql_scan_slash_option. + * backticked text: copy everything until next backquote, then evaluate. + * + * XXX Possible future behavioral change: substitute for :VARIABLE? */ -"`" { return LEXRES_OK; } - -{other}|\n { ECHO; } - -} - -{ - /* - * Copy everything until unquoted whitespace or end of line. Quotes - * do not get stripped yet. - */ - -{space} { - yyless(0); - return LEXRES_OK; - } - -"\\" { - /* - * unquoted backslash is end of command or next command, - * do not eat - * - * (this was not the behavior pre-8.0, but it seems - * consistent) - */ - yyless(0); - return LEXRES_OK; - } - -{dquote} { - *option_quote = '"'; - ECHO; - BEGIN(xslashquotedarg); +"`" { + /* In NO_EVAL mode, don't evaluate the command */ + if (option_type != OT_NO_EVAL) + evaluate_backtick(); + BEGIN(xslasharg); } -{other} { ECHO; } +{other}|\n { ECHO; } } -{ - /* double-quoted text within a default-type argument: copy */ +{ + /* double-quoted text: copy verbatim, including the double quotes */ {dquote} { ECHO; - BEGIN(xslashdefaultarg); + BEGIN(xslasharg); } {other}|\n { ECHO; } @@ -1461,7 +1457,7 @@ psql_scan_slash_command(PsqlScanState state) * letters. * * if quote is not NULL, *quote is set to 0 if no quoting was found, else - * the quote symbol. + * the last quote symbol used in the argument. * * if semicolon is true, unquoted trailing semicolon(s) that would otherwise * be taken as part of the option string will be stripped. @@ -1480,7 +1476,6 @@ psql_scan_slash_option(PsqlScanState state, PQExpBufferData mybuf; int lexresult; char local_quote; - bool badarg; /* Must be scanning already */ psql_assert(state->scanbufhandle); @@ -1497,6 +1492,7 @@ psql_scan_slash_option(PsqlScanState state, output_buf = &mybuf; option_type = type; option_quote = quote; + unquoted_option_chars = 0; if (state->buffer_stack != NULL) yy_switch_to_buffer(state->buffer_stack->buf); @@ -1506,7 +1502,7 @@ psql_scan_slash_option(PsqlScanState state, if (type == OT_WHOLE_LINE) BEGIN(xslashwholeline); else - BEGIN(xslasharg); + BEGIN(xslashargstart); /* And lex. */ lexresult = yylex(); @@ -1517,85 +1513,18 @@ psql_scan_slash_option(PsqlScanState state, * a quoted string, as indicated by YY_START, EOL is an error. */ psql_assert(lexresult == LEXRES_EOL || lexresult == LEXRES_OK); - badarg = false; + switch (YY_START) { - case xslasharg: - /* empty arg, or possibly a psql variable substitution */ - break; - case xslashquote: - if (lexresult != LEXRES_OK) - badarg = true; /* hit EOL not ending quote */ - break; - case xslashbackquote: - if (lexresult != LEXRES_OK) - badarg = true; /* hit EOL not ending quote */ - else - { - /* Perform evaluation of backticked command */ - char *cmd = mybuf.data; - FILE *fd; - bool error = false; - PQExpBufferData output; - char buf[512]; - size_t result; - - fd = popen(cmd, PG_BINARY_R); - if (!fd) - { - psql_error("%s: %s\n", cmd, strerror(errno)); - error = true; - } - - initPQExpBuffer(&output); - - if (!error) - { - do - { - result = fread(buf, 1, sizeof(buf), fd); - if (ferror(fd)) - { - psql_error("%s: %s\n", cmd, strerror(errno)); - error = true; - break; - } - appendBinaryPQExpBuffer(&output, buf, result); - } while (!feof(fd)); - } - - if (fd && pclose(fd) == -1) - { - psql_error("%s: %s\n", cmd, strerror(errno)); - error = true; - } - - if (PQExpBufferBroken(&output)) - { - psql_error("%s: out of memory\n", cmd); - error = true; - } - - /* Now done with cmd, transfer result to mybuf */ - resetPQExpBuffer(&mybuf); - - if (!error) - { - /* strip any trailing newline */ - if (output.len > 0 && - output.data[output.len - 1] == '\n') - output.len--; - appendBinaryPQExpBuffer(&mybuf, output.data, output.len); - } - - termPQExpBuffer(&output); - } + case xslashargstart: + /* empty arg */ break; - case xslashdefaultarg: - /* Strip any trailing semi-colons if requested */ + case xslasharg: + /* Strip any unquoted trailing semi-colons if requested */ if (semicolon) { - while (mybuf.len > 0 && + while (unquoted_option_chars-- > 0 && + mybuf.len > 0 && mybuf.data[mybuf.len - 1] == ';') { mybuf.data[--mybuf.len] = '\0'; @@ -1642,10 +1571,13 @@ psql_scan_slash_option(PsqlScanState state, } } break; - case xslashquotedarg: - /* must have hit EOL inside double quotes */ - badarg = true; - break; + case xslashquote: + case xslashbackquote: + case xslashdquote: + /* must have hit EOL inside quotes */ + psql_error("unterminated quoted string\n"); + termPQExpBuffer(&mybuf); + return NULL; case xslashwholeline: /* always okay */ break; @@ -1655,13 +1587,6 @@ psql_scan_slash_option(PsqlScanState state, exit(1); } - if (badarg) - { - psql_error("unterminated quoted string\n"); - termPQExpBuffer(&mybuf); - return NULL; - } - /* * An unquoted empty argument isn't possible unless we are at end of * command. Return NULL instead. @@ -1702,6 +1627,74 @@ psql_scan_slash_command_end(PsqlScanState state) /* There are no possible errors in this lex state... */ } +/* + * Evaluate a backticked substring of a slash command's argument. + * + * The portion of output_buf starting at backtick_start_offset is evaluated + * as a shell command and then replaced by the command's output. + */ +static void +evaluate_backtick(void) +{ + char *cmd = output_buf->data + backtick_start_offset; + PQExpBufferData cmd_output; + FILE *fd; + bool error = false; + char buf[512]; + size_t result; + + initPQExpBuffer(&cmd_output); + + fd = popen(cmd, PG_BINARY_R); + if (!fd) + { + psql_error("%s: %s\n", cmd, strerror(errno)); + error = true; + } + + if (!error) + { + do + { + result = fread(buf, 1, sizeof(buf), fd); + if (ferror(fd)) + { + psql_error("%s: %s\n", cmd, strerror(errno)); + error = true; + break; + } + appendBinaryPQExpBuffer(&cmd_output, buf, result); + } while (!feof(fd)); + } + + if (fd && pclose(fd) == -1) + { + psql_error("%s: %s\n", cmd, strerror(errno)); + error = true; + } + + if (PQExpBufferBroken(&cmd_output)) + { + psql_error("%s: out of memory\n", cmd); + error = true; + } + + /* Now done with cmd, delete it from output_buf */ + output_buf->len = backtick_start_offset; + output_buf->data[output_buf->len] = '\0'; + + /* If no error, transfer result to output_buf */ + if (!error) + { + /* strip any trailing newline */ + if (cmd_output.len > 0 && + cmd_output.data[cmd_output.len - 1] == '\n') + cmd_output.len--; + appendBinaryPQExpBuffer(output_buf, cmd_output.data, cmd_output.len); + } + + termPQExpBuffer(&cmd_output); +} /* * Push the given string onto the stack of stuff to scan. -- GitLab