psqlscan.l 40.8 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13
%{
/*-------------------------------------------------------------------------
 *
 * psqlscan.l
 *	  lexical scanner for psql
 *
 * This code is mainly needed to determine where the end of a SQL statement
 * is: we are looking for semicolons that are not within quotes, comments,
 * or parentheses.  The most reliable way to handle this is to borrow the
 * backend's flex lexer rules, lock, stock, and barrel.  The rules below
 * are (except for a few) the same as the backend's, but their actions are
 * just ECHO whereas the backend's actions generally do other things.
 *
14 15 16
 * XXX The rules in this file must be kept in sync with the backend lexer!!!
 *
 * XXX Avoid creating backtracking cases --- see the backend lexer for info.
17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
 *
 * The most difficult aspect of this code is that we need to work in multibyte
 * encodings that are not ASCII-safe.  A "safe" encoding is one in which each
 * byte of a multibyte character has the high bit set (it's >= 0x80).  Since
 * all our lexing rules treat all high-bit-set characters alike, we don't
 * really need to care whether such a byte is part of a sequence or not.
 * In an "unsafe" encoding, we still expect the first byte of a multibyte
 * sequence to be >= 0x80, but later bytes might not be.  If we scan such
 * a sequence as-is, the lexing rules could easily be fooled into matching
 * such bytes to ordinary ASCII characters.  Our solution for this is to
 * substitute 0xFF for each non-first byte within the data presented to flex.
 * The flex rules will then pass the FF's through unmolested.  The emit()
 * subroutine is responsible for looking back to the original string and
 * replacing FF's with the corresponding original bytes.
 *
32
 * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
33 34 35
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 * IDENTIFICATION
36
 *	  $PostgreSQL: pgsql/src/bin/psql/psqlscan.l,v 1.19 2006/05/31 11:35:17 momjian Exp $
37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96
 *
 *-------------------------------------------------------------------------
 */
#include "postgres_fe.h"

#include "psqlscan.h"

#include <ctype.h>

#include "mb/pg_wchar.h"

#include "common.h"
#include "settings.h"
#include "variables.h"


/*
 * We use a stack of flex buffers to handle substitution of psql variables.
 * Each stacked buffer contains the as-yet-unread text from one psql variable.
 * When we pop the stack all the way, we resume reading from the outer buffer
 * identified by scanbufhandle.
 */
typedef struct StackElem
{
	YY_BUFFER_STATE buf;		/* flex input control structure */
	char	   *bufstring;		/* data actually being scanned by flex */
	char	   *origstring;		/* copy of original data, if needed */
	struct StackElem *next;
} StackElem;

/*
 * All working state of the lexer must be stored in PsqlScanStateData
 * between calls.  This allows us to have multiple open lexer operations,
 * which is needed for nested include files.  The lexer itself is not
 * recursive, but it must be re-entrant.
 */
typedef struct PsqlScanStateData
{
	StackElem  *buffer_stack;	/* stack of variable expansion buffers */
	/*
	 * These variables always refer to the outer buffer, never to any
	 * stacked variable-expansion buffer.
	 */
	YY_BUFFER_STATE scanbufhandle;
	char	   *scanbuf;		/* start of outer-level input buffer */
	const char *scanline;		/* current input line at outer level */

	/* safe_encoding, curline, refline are used by emit() to replace FFs */
	int			encoding;		/* encoding being used now */
	bool		safe_encoding;	/* is current encoding "safe"? */
	const char *curline;		/* actual flex input string for cur buf */
	const char *refline;		/* original data for cur buffer */

	/*
	 * All this state lives across successive input lines, until explicitly
	 * reset by psql_scan_reset.
	 */
	int			start_state;	/* saved YY_START */
	int			paren_depth;	/* depth of nesting in parentheses */
	int			xcdepth;		/* depth of nesting in slash-star comments */
97
	char	   *dolqstart;		/* current $foo$ quote start string */
98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128
} PsqlScanStateData;

static PsqlScanState cur_state;	/* current state while active */

static PQExpBuffer output_buf;	/* current output buffer */

/* these variables do not need to be saved across calls */
static enum slash_option_type option_type;
static char *option_quote;


/* Return values from yylex() */
#define LEXRES_EOL			0	/* end of input */
#define LEXRES_SEMI			1	/* command-terminating semicolon found */
#define LEXRES_BACKSLASH	2	/* backslash command start */
#define LEXRES_OK			3	/* OK completion of backslash argument */


int	yylex(void);

static void push_new_buffer(const char *newstr);
static YY_BUFFER_STATE prepare_buffer(const char *txt, int len,
									  char **txtcopy);
static void emit(const char *txt, int len);

#define ECHO emit(yytext, yyleng)

%}

%option 8bit
%option never-interactive
129
%option nodefault
130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156
%option nounput
%option noyywrap

/*
 * All of the following definitions and rules should exactly match
 * src/backend/parser/scan.l so far as the flex patterns are concerned.
 * The rule bodies are just ECHO as opposed to what the backend does,
 * however.  (But be sure to duplicate code that affects the lexing process,
 * such as BEGIN().)  Also, psqlscan uses a single <<EOF>> rule whereas
 * scan.l has a separate one for each exclusive state.
 */

/*
 * OK, here is a short description of lex/flex rules behavior.
 * The longest pattern which matches an input string is always chosen.
 * For equal-length patterns, the first occurring in the rules list is chosen.
 * INITIAL is the starting state, to which all non-conditional rules apply.
 * Exclusive states change parsing rules while the state is active.  When in
 * an exclusive state, only those rules defined for that state apply.
 *
 * We use exclusive states for quoted strings, extended comments,
 * and to eliminate parsing troubles for numeric strings.
 * Exclusive states:
 *  <xb> bit string literal
 *  <xc> extended C-style comments
 *  <xd> delimited identifiers (double-quoted identifiers)
 *  <xh> hexadecimal numeric string
157 158
 *  <xq> standard quoted strings
 *  <xe> extended quoted strings (support backslash escape sequences)
159
 *  <xdolq> $foo$ quoted strings
160 161 162 163 164 165
 */

%x xb
%x xc
%x xd
%x xh
166
%x xe
167
%x xq
168
%x xdolq
169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213
/* Additional exclusive states for psql only: lex backslash commands */
%x xslashcmd
%x xslasharg
%x xslashquote
%x xslashbackquote
%x xslashdefaultarg
%x xslashquotedarg
%x xslashwholeline
%x xslashend

/*
 * In order to make the world safe for Windows and Mac clients as well as
 * Unix ones, we accept either \n or \r as a newline.  A DOS-style \r\n
 * sequence will be seen as two successive newlines, but that doesn't cause
 * any problems.  Comments that start with -- and extend to the next
 * newline are treated as equivalent to a single whitespace character.
 *
 * NOTE a fine point: if there is no newline following --, we will absorb
 * everything to the end of the input as a comment.  This is correct.  Older
 * versions of Postgres failed to recognize -- as a comment if the input
 * did not end with a newline.
 *
 * XXX perhaps \f (formfeed) should be treated as a newline as well?
 */

space			[ \t\n\r\f]
horiz_space		[ \t\f]
newline			[\n\r]
non_newline		[^\n\r]

comment			("--"{non_newline}*)

whitespace		({space}+|{comment})

/*
 * SQL requires at least one newline in the whitespace separating
 * string literals that are to be concatenated.  Silly, but who are we
 * to argue?  Note that {whitespace_with_newline} should not have * after
 * it, whereas {whitespace} should generally have a * after it...
 */

special_whitespace		({space}+|{comment}{newline})
horiz_whitespace		({horiz_space}|{comment})
whitespace_with_newline	({horiz_whitespace}*{newline}{special_whitespace}*)

214 215 216 217 218 219 220 221 222 223 224 225 226 227
/*
 * To ensure that {quotecontinue} can be scanned without having to back up
 * if the full pattern isn't matched, we include trailing whitespace in
 * {quotestop}.  This matches all cases where {quotecontinue} fails to match,
 * except for {quote} followed by whitespace and just one "-" (not two,
 * which would start a {comment}).  To cover that we have {quotefail}.
 * The actions for {quotestop} and {quotefail} must throw back characters
 * beyond the quote proper.
 */
quote			'
quotestop		{quote}{whitespace}*
quotecontinue	{quote}{whitespace_with_newline}{quote}
quotefail		{quote}{whitespace}*"-"

228 229 230 231 232 233 234 235 236 237 238 239
/* Bit string
 * It is tempting to scan the string for only those characters
 * which are allowed. However, this leads to silently swallowed
 * characters if illegal characters are included in the string.
 * For example, if xbinside is [01] then B'ABCD' is interpreted
 * as a zero-length string, and the ABCD' is lost!
 * Better to pass the string forward and let the input routines
 * validate the contents.
 */
xbstart			[bB]{quote}
xbinside		[^']*

240
/* Hexadecimal number */
241 242 243
xhstart			[xX]{quote}
xhinside		[^']*

244
/* National character */
245 246
xnstart			[nN]{quote}

247 248
/* Quoted string that allows backslash escapes */
xestart			[eE]{quote}
249 250 251 252
xeinside		[^\\']+
xeescape		[\\][^0-7]
xeoctesc		[\\][0-7]{1,3}
xehexesc		[\\]x[0-9A-Fa-f]{1,2}
253

254
/* Extended quote
255
 * xqdouble implements embedded quote, ''''
256 257 258
 */
xqstart			{quote}
xqdouble		{quote}{quote}
259
xqinside		[^']+
260

261 262 263 264 265
/* $foo$ style quotes ("dollar quoting")
 * The quoted string starts with $foo$ where "foo" is an optional string
 * in the form of an identifier, except that it may not contain "$", 
 * and extends to the first occurrence of an identical string.  
 * There is *no* processing of the quoted text.
266 267 268
 *
 * {dolqfailed} is an error rule to avoid scanner backup when {dolqdelim}
 * fails to match its trailing "$".
269 270 271 272
 */
dolq_start		[A-Za-z\200-\377_]
dolq_cont		[A-Za-z\200-\377_0-9]
dolqdelim		\$({dolq_start}{dolq_cont}*)?\$
273
dolqfailed		\${dolq_start}{dolq_cont}*
274 275
dolqinside		[^$]+

276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330
/* Double quote
 * Allows embedded spaces and other special characters into identifiers.
 */
dquote			\"
xdstart			{dquote}
xdstop			{dquote}
xddouble		{dquote}{dquote}
xdinside		[^"]+

/* C-style comments
 *
 * The "extended comment" syntax closely resembles allowable operator syntax.
 * The tricky part here is to get lex to recognize a string starting with
 * slash-star as a comment, when interpreting it as an operator would produce
 * a longer match --- remember lex will prefer a longer match!  Also, if we
 * have something like plus-slash-star, lex will think this is a 3-character
 * operator whereas we want to see it as a + operator and a comment start.
 * The solution is two-fold:
 * 1. append {op_chars}* to xcstart so that it matches as much text as
 *    {operator} would. Then the tie-breaker (first matching rule of same
 *    length) ensures xcstart wins.  We put back the extra stuff with yyless()
 *    in case it contains a star-slash that should terminate the comment.
 * 2. In the operator rule, check for slash-star within the operator, and
 *    if found throw it back with yyless().  This handles the plus-slash-star
 *    problem.
 * Dash-dash comments have similar interactions with the operator rule.
 */
xcstart			\/\*{op_chars}*
xcstop			\*+\/
xcinside		[^*/]+

digit			[0-9]
ident_start		[A-Za-z\200-\377_]
ident_cont		[A-Za-z\200-\377_0-9\$]

identifier		{ident_start}{ident_cont}*

typecast		"::"

/*
 * "self" is the set of chars that should be returned as single-character
 * tokens.  "op_chars" is the set of chars that can make up "Op" tokens,
 * which can be one or more characters long (but if a single-char token
 * appears in the "self" set, it is not to be returned as an Op).  Note
 * that the sets overlap, but each has some chars that are not in the other.
 *
 * If you change either set, adjust the character lists appearing in the
 * rule for "operator"!
 */
self			[,()\[\].;\:\+\-\*\/\%\^\<\>\=]
op_chars		[\~\!\@\#\^\&\|\`\?\+\-\*\/\%\<\>\=]
operator		{op_chars}+

/* we no longer allow unary minus in numbers. 
 * instead we pass it separately to parser. there it gets
331 332 333 334
 * coerced via doNegate() -- Leon aug 20 1999
 *
 * {realfail1} and {realfail2} are added to prevent the need for scanner
 * backup when the {real} rule fails to match completely.
335 336 337 338
 */

integer			{digit}+
decimal			(({digit}*\.{digit}+)|({digit}+\.{digit}*))
339 340 341
real			({integer}|{decimal})[Ee][-+]?{digit}+
realfail1		({integer}|{decimal})[Ee]
realfail2		({integer}|{decimal})[Ee][-+]
342 343 344 345 346 347

param			\${integer}

other			.

/*
348 349
 * Dollar quoted strings are totally opaque, and no escaping is done on them.
 * Other quoted strings must allow some special characters such as single-quote
350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407
 *  and newline.
 * Embedded single-quotes are implemented both in the SQL standard
 *  style of two adjacent single quotes "''" and in the Postgres/Java style
 *  of escaped-quote "\'".
 * Other embedded escaped characters are matched explicitly and the leading
 *  backslash is dropped from the string.
 * Note that xcstart must appear before operator, as explained above!
 *  Also whitespace (comment) must appear before operator.
 */

%%

{whitespace}	{
					/*
					 * Note that the whitespace rule includes both true
					 * whitespace and single-line ("--" style) comments.
					 * We suppress whitespace at the start of the query
					 * buffer.  We also suppress all single-line comments,
					 * which is pretty dubious but is the historical
					 * behavior.
					 */
					if (!(output_buf->len == 0 || yytext[0] == '-'))
						ECHO;
				}

{xcstart}		{
					cur_state->xcdepth = 0;
					BEGIN(xc);
					/* Put back any characters past slash-star; see above */
					yyless(2);
					ECHO;
				}

<xc>{xcstart}	{
					cur_state->xcdepth++;
					/* Put back any characters past slash-star; see above */
					yyless(2);
					ECHO;
				}

<xc>{xcstop}	{
					if (cur_state->xcdepth <= 0)
					{
						BEGIN(INITIAL);
					}
					else
						cur_state->xcdepth--;
					ECHO;
				}

<xc>{xcinside}	{
					ECHO;
				}

<xc>{op_chars}	{
					ECHO;
				}

408 409 410 411
<xc>\*+			{
					ECHO;
				}

412 413 414 415
{xbstart}		{
					BEGIN(xb);
					ECHO;
				}
416 417 418
<xb>{quotestop}	|
<xb>{quotefail} {
					yyless(1);
419 420 421 422 423 424 425
					BEGIN(INITIAL);
					ECHO;
				}
<xh>{xhinside}	|
<xb>{xbinside}	{
					ECHO;
				}
426 427
<xh>{quotecontinue}	|
<xb>{quotecontinue}	{
428 429 430 431 432 433 434 435 436 437 438 439 440
					ECHO;
				}

{xhstart}		{
					/* Hexadecimal bit type.
					 * At some point we should simply pass the string
					 * forward to the parser and label it there.
					 * In the meantime, place a leading "x" on the string
					 * to mark it for the input routine as a hex string.
					 */
					BEGIN(xh);
					ECHO;
				}
441 442 443
<xh>{quotestop}	|
<xh>{quotefail} {
					yyless(1);
444 445 446 447 448
					BEGIN(INITIAL);
					ECHO;
				}

{xnstart}		{
449
					yyless(1);				/* eat only 'n' this time */
450 451 452 453
					ECHO;
				}

{xqstart}		{
454 455 456 457
					if (standard_strings())
						BEGIN(xq);
					else
						BEGIN(xe);
458 459
					ECHO;
				}
460
{xestart}		{
461
					BEGIN(xe);
462 463
					ECHO;
				}
464 465
<xq,xe>{quotestop}	|
<xq,xe>{quotefail} {
466
					yyless(1);
467 468 469
					BEGIN(INITIAL);
					ECHO;
				}
470
<xq,xe>{xqdouble} {
471 472 473 474 475
					ECHO;
				}
<xq>{xqinside}  {
					ECHO;
				}
476 477 478 479
<xe>{xeinside}  {
					ECHO;
				}
<xe>{xeescape}  {
480 481
					ECHO;
				}
482
<xe>{xeoctesc}  {
483 484
					ECHO;
				}
485
<xe>{xehexesc}  {
486 487
					ECHO;
				}
488
<xq,xe>{quotecontinue} {
489 490
					ECHO;
				}
491
<xe>.			{
492 493 494 495 496 497 498 499 500
					/* This is only needed for \ just before EOF */
					ECHO;
				}

{dolqdelim}		{
					cur_state->dolqstart = pg_strdup(yytext);
					BEGIN(xdolq);
					ECHO;
				}
501 502 503 504 505
{dolqfailed}	{
					/* throw back all but the initial "$" */
					yyless(1);
					ECHO;
				}
506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526
<xdolq>{dolqdelim} {
					if (strcmp(yytext, cur_state->dolqstart) == 0)
					{
						free(cur_state->dolqstart);
						cur_state->dolqstart = NULL;
						BEGIN(INITIAL);
					}
					else
					{
						/*
						 * When we fail to match $...$ to dolqstart, transfer
						 * the $... part to the output, but put back the final
						 * $ for rescanning.  Consider $delim$...$junk$delim$
						 */
						yyless(yyleng-1);
					}
					ECHO;
				}
<xdolq>{dolqinside} {
					ECHO;
				}
527 528 529
<xdolq>{dolqfailed} {
					ECHO;
				}
530 531 532 533
<xdolq>.		{
					/* This is only needed for $ inside the quoted text */
					ECHO;
				}
534 535 536 537 538 539 540 541 542

{xdstart}		{
					BEGIN(xd);
					ECHO;
				}
<xd>{xdstop}	{
					BEGIN(INITIAL);
					ECHO;
				}
543
<xd>{xddouble}	{
544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691
					ECHO;
				}
<xd>{xdinside}	{
					ECHO;
				}

{typecast}		{
					ECHO;
				}

	/*
	 * These rules are specific to psql --- they implement parenthesis
	 * counting and detection of command-ending semicolon.  These must
	 * appear before the {self} rule so that they take precedence over it.
	 */

"("				{
					cur_state->paren_depth++;
					ECHO;
				}

")"				{
					if (cur_state->paren_depth > 0)
						cur_state->paren_depth--;
					ECHO;
				}

";"				{
					ECHO;
					if (cur_state->paren_depth == 0)
					{
						/* Terminate lexing temporarily */
						return LEXRES_SEMI;
					}
				}

	/*
	 * psql-specific rules to handle backslash commands and variable
	 * substitution.  We want these before {self}, also.
	 */

"\\"[;:]		{
					/* Force a semicolon or colon into the query buffer */
					emit(yytext + 1, 1);
				}

"\\"			{
					/* Terminate lexing temporarily */
					return LEXRES_BACKSLASH;
				}

:[A-Za-z0-9_]+	{
					/* Possible psql variable substitution */
					const char *value;

					value = GetVariable(pset.vars, yytext + 1);

					if (value)
					{
						/* It is a variable, perform substitution */
						push_new_buffer(value);
						/* yy_scan_string already made buffer active */
					}
					else
					{
						/*
						 * if the variable doesn't exist we'll copy the
						 * string as is
						 */
						ECHO;
					}
				}

	/*
	 * Back to backend-compatible rules.
	 */

{self}			{
					ECHO;
				}

{operator}		{
					/*
					 * Check for embedded slash-star or dash-dash; those
					 * are comment starts, so operator must stop there.
					 * Note that slash-star or dash-dash at the first
					 * character will match a prior rule, not this one.
					 */
					int		nchars = yyleng;
					char   *slashstar = strstr(yytext, "/*");
					char   *dashdash = strstr(yytext, "--");

					if (slashstar && dashdash)
					{
						/* if both appear, take the first one */
						if (slashstar > dashdash)
							slashstar = dashdash;
					}
					else if (!slashstar)
						slashstar = dashdash;
					if (slashstar)
						nchars = slashstar - yytext;

					/*
					 * For SQL compatibility, '+' and '-' cannot be the
					 * last char of a multi-char operator unless the operator
					 * contains chars that are not in SQL operators.
					 * The idea is to lex '=-' as two operators, but not
					 * to forbid operator names like '?-' that could not be
					 * sequences of SQL operators.
					 */
					while (nchars > 1 &&
						   (yytext[nchars-1] == '+' ||
							yytext[nchars-1] == '-'))
					{
						int		ic;

						for (ic = nchars-2; ic >= 0; ic--)
						{
							if (strchr("~!@#^&|`?%", yytext[ic]))
								break;
						}
						if (ic >= 0)
							break; /* found a char that makes it OK */
						nchars--; /* else remove the +/-, and check again */
					}

					if (nchars < yyleng)
					{
						/* Strip the unwanted chars from the token */
						yyless(nchars);
					}
					ECHO;
				}

{param}			{
					ECHO;
				}

{integer}		{
					ECHO;
				}
{decimal}		{
					ECHO;
				}
{real}			{
					ECHO;
				}
692 693 694 695 696 697 698 699 700 701 702 703 704 705 706
{realfail1}		{
					/*
					 * throw back the [Ee], and treat as {decimal}.  Note
					 * that it is possible the input is actually {integer},
					 * but since this case will almost certainly lead to a
					 * syntax error anyway, we don't bother to distinguish.
					 */
					yyless(yyleng-1);
					ECHO;
				}
{realfail2}		{
					/* throw back the [Ee][+-], and proceed as above */
					yyless(yyleng-2);
					ECHO;
				}
707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793


{identifier}	{
					ECHO;
				}

{other}			{
					ECHO;
				}


	/*
	 * Everything from here down is psql-specific.
	 */

<<EOF>>			{
					StackElem  *stackelem = cur_state->buffer_stack;

					if (stackelem == NULL)
						return LEXRES_EOL; /* end of input reached */

					/*
					 * We were expanding a variable, so pop the inclusion
					 * stack and keep lexing
					 */
					cur_state->buffer_stack = stackelem->next;
					yy_delete_buffer(stackelem->buf);
					free(stackelem->bufstring);
					if (stackelem->origstring)
						free(stackelem->origstring);
					free(stackelem);

					stackelem = cur_state->buffer_stack;
					if (stackelem != NULL)
					{
						yy_switch_to_buffer(stackelem->buf);
						cur_state->curline = stackelem->bufstring;
						cur_state->refline = stackelem->origstring ? stackelem->origstring : stackelem->bufstring;
					}
					else
					{
						yy_switch_to_buffer(cur_state->scanbufhandle);
						cur_state->curline = cur_state->scanbuf;
						cur_state->refline = cur_state->scanline;
					}
				}

	/*
	 * Exclusive lexer states to handle backslash command lexing
	 */

<xslashcmd>{
	/* command name ends at whitespace or backslash; eat all else */

{space}|"\\"	{
					yyless(0);
					return LEXRES_OK;
				}

{other}			{ ECHO; }

}

<xslasharg>{
	/* eat any whitespace, then decide what to do at first nonblank */

{space}+		{ }

"\\"			{
					/*
					 * backslash is end of command or next command, do not eat
					 *
					 * XXX this means we can't conveniently accept options
					 * that start with a backslash; therefore, option
					 * processing that encourages use of backslashes is rather
					 * broken.
					 */
					yyless(0);
					return LEXRES_OK;
				}

{quote}			{
					*option_quote = '\'';
					BEGIN(xslashquote);
				}

"`"				{
794 795 796 797 798 799 800 801 802 803 804
					if (option_type == OT_VERBATIM)
					{
						/* in verbatim mode, backquote is not special */
						ECHO;
						BEGIN(xslashdefaultarg);
					}
					else
					{
						*option_quote = '`';
						BEGIN(xslashbackquote);
					}
805 806 807 808
				}

:[A-Za-z0-9_]*	{
					/* Possible psql variable substitution */
809 810 811 812 813
					if (option_type == OT_VERBATIM)
						ECHO;
					else
					{
						const char *value;
814

815
						value = GetVariable(pset.vars, yytext + 1);
816

817 818 819 820 821 822 823 824 825
						/*
						 * The variable value is just emitted without any
						 * further examination.  This is consistent with the
						 * pre-8.0 code behavior, if not with the way that
						 * variables are handled outside backslash commands.
						 */
						if (value)
							appendPQExpBufferStr(output_buf, value);
					}
826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863

					*option_quote = ':';

					return LEXRES_OK;
				}

"|"				{
					ECHO;
					if (option_type == OT_FILEPIPE)
					{
						/* treat like whole-string case */
						BEGIN(xslashwholeline);
					}
					else
					{
						/* treat like default case */
						BEGIN(xslashdefaultarg);
					}
				}

{dquote}		{
					*option_quote = '"';
					ECHO;
					BEGIN(xslashquotedarg);
				}

{other}			{
					ECHO;
					BEGIN(xslashdefaultarg);
				}

}

<xslashquote>{
	/* single-quoted text: copy literally except for backslash sequences */

{quote}			{ return LEXRES_OK; }

864 865 866
    /* We don't need a state here because we are already in a string */
{xqdouble}		{ emit("'", 1); }

867 868 869 870 871 872
"\\n"			{ appendPQExpBufferChar(output_buf, '\n'); }
"\\t"			{ appendPQExpBufferChar(output_buf, '\t'); }
"\\b"			{ appendPQExpBufferChar(output_buf, '\b'); }
"\\r"			{ appendPQExpBufferChar(output_buf, '\r'); }
"\\f"			{ appendPQExpBufferChar(output_buf, '\f'); }

873
{xeoctesc}		{
874 875
					/* octal case */
					appendPQExpBufferChar(output_buf,
876
										  (char) strtol(yytext + 1, NULL, 8));
877 878
				}

879
{xehexesc}		{
880 881 882 883 884
					/* hex case */
					appendPQExpBufferChar(output_buf,
										  (char) strtol(yytext + 2, NULL, 16));
				}

885 886
"\\".			{ emit(yytext + 1, 1); }

887
{other}|\n		{ ECHO; }
888 889 890 891 892 893 894 895 896 897 898

}

<xslashbackquote>{
	/*
	 * backticked text: copy everything until next backquote or end of line.
	 * Invocation of the command will happen in psql_scan_slash_option.
	 */

"`"				{ return LEXRES_OK; }

899
{other}|\n		{ ECHO; }
900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918

}

<xslashdefaultarg>{
	/*
	 * Copy everything until unquoted whitespace or end of line.  Quotes
	 * do not get stripped yet.
	 */

{space}			{
					yyless(0);
					return LEXRES_OK;
				}

"\\"			{
					/*
					 * unquoted backslash is end of command or next command,
					 * do not eat
					 *
919
					 * (this was not the behavior pre-8.0, but it seems
920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943
					 * consistent)
					 */
					yyless(0);
					return LEXRES_OK;
				}

{dquote}		{
					*option_quote = '"';
					ECHO;
					BEGIN(xslashquotedarg);
				}

{other}			{ ECHO; }

}

<xslashquotedarg>{
	/* double-quoted text within a default-type argument: copy */

{dquote}		{
					ECHO;
					BEGIN(xslashdefaultarg);
				}

944
{other}|\n		{ ECHO; }
945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965

}

<xslashwholeline>{
	/* copy everything until end of input line */
	/* but suppress leading whitespace */

{space}+		{
					if (output_buf->len > 0)
						ECHO;
				}

{other}			{ ECHO; }

}

<xslashend>{
	/* at end of command, eat a double backslash, but not anything else */

"\\\\"			{ return LEXRES_OK; }

966
{other}|\n		{
967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997
					yyless(0);
					return LEXRES_OK;
				}

}

%%

/*
 * Create a lexer working state struct.
 */
PsqlScanState
psql_scan_create(void)
{
	PsqlScanState state;

	state = (PsqlScanStateData *) pg_malloc_zero(sizeof(PsqlScanStateData));

	psql_scan_reset(state);

	return state;
}

/*
 * Destroy a lexer working state struct, releasing all resources.
 */
void
psql_scan_destroy(PsqlScanState state)
{
	psql_scan_finish(state);

998 999
	psql_scan_reset(state);

1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142
	free(state);
}

/*
 * Set up to perform lexing of the given input line.
 *
 * The text at *line, extending for line_len bytes, will be scanned by
 * subsequent calls to the psql_scan routines.  psql_scan_finish should
 * be called when scanning is complete.  Note that the lexer retains
 * a pointer to the storage at *line --- this string must not be altered
 * or freed until after psql_scan_finish is called.
 */
void
psql_scan_setup(PsqlScanState state,
				const char *line, int line_len)
{
	/* Mustn't be scanning already */
	psql_assert(state->scanbufhandle == NULL);
	psql_assert(state->buffer_stack == NULL);

	/* Do we need to hack the character set encoding? */
	state->encoding = pset.encoding;
	state->safe_encoding = PG_VALID_BE_ENCODING(state->encoding);

	/* needed for prepare_buffer */
	cur_state = state;

	/* Set up flex input buffer with appropriate translation and padding */
	state->scanbufhandle = prepare_buffer(line, line_len,
										  &state->scanbuf);
	state->scanline = line;

	/* Set lookaside data in case we have to map unsafe encoding */
	state->curline = state->scanbuf;
	state->refline = state->scanline;
}

/*
 * Do lexical analysis of SQL command text.
 *
 * The text previously passed to psql_scan_setup is scanned, and appended
 * (possibly with transformation) to query_buf.
 *
 * The return value indicates the condition that stopped scanning:
 *
 * PSCAN_SEMICOLON: found a command-ending semicolon.  (The semicolon is
 * transferred to query_buf.)  The command accumulated in query_buf should
 * be executed, then clear query_buf and call again to scan the remainder
 * of the line.
 *
 * PSCAN_BACKSLASH: found a backslash that starts a psql special command.
 * Any previous data on the line has been transferred to query_buf.
 * The caller will typically next call psql_scan_slash_command(),
 * perhaps psql_scan_slash_option(), and psql_scan_slash_command_end().
 *
 * PSCAN_INCOMPLETE: the end of the line was reached, but we have an
 * incomplete SQL command.  *prompt is set to the appropriate prompt type.
 *
 * PSCAN_EOL: the end of the line was reached, and there is no lexical
 * reason to consider the command incomplete.  The caller may or may not
 * choose to send it.  *prompt is set to the appropriate prompt type if
 * the caller chooses to collect more input.
 *
 * In the PSCAN_INCOMPLETE and PSCAN_EOL cases, psql_scan_finish() should
 * be called next, then the cycle may be repeated with a fresh input line.
 *
 * In all cases, *prompt is set to an appropriate prompt type code for the
 * next line-input operation.
 */
PsqlScanResult
psql_scan(PsqlScanState state,
		  PQExpBuffer query_buf,
		  promptStatus_t *prompt)
{
	PsqlScanResult result;
	int			lexresult;

	/* Must be scanning already */
	psql_assert(state->scanbufhandle);

	/* Set up static variables that will be used by yylex */
	cur_state = state;
	output_buf = query_buf;

	if (state->buffer_stack != NULL)
		yy_switch_to_buffer(state->buffer_stack->buf);
	else
		yy_switch_to_buffer(state->scanbufhandle);

	BEGIN(state->start_state);

	/* And lex. */
	lexresult = yylex();

	/* Update static vars back to the state struct */
	state->start_state = YY_START;

	/*
	 * Check termination state and return appropriate result info.
	 */
	switch (lexresult)
	{
		case LEXRES_EOL:		/* end of input */
			switch (state->start_state)
			{
				case INITIAL:
					if (state->paren_depth > 0)
					{
						result = PSCAN_INCOMPLETE;
						*prompt = PROMPT_PAREN;
					}
					else if (query_buf->len > 0)
					{
						result = PSCAN_EOL;
						*prompt = PROMPT_CONTINUE;
					}
					else
					{
						/* never bother to send an empty buffer */
						result = PSCAN_INCOMPLETE;
						*prompt = PROMPT_READY;
					}
					break;
				case xb:
					result = PSCAN_INCOMPLETE;
					*prompt = PROMPT_SINGLEQUOTE;
					break;
				case xc:
					result = PSCAN_INCOMPLETE;
					*prompt = PROMPT_COMMENT;
					break;
				case xd:
					result = PSCAN_INCOMPLETE;
					*prompt = PROMPT_DOUBLEQUOTE;
					break;
				case xh:
					result = PSCAN_INCOMPLETE;
					*prompt = PROMPT_SINGLEQUOTE;
					break;
				case xq:
					result = PSCAN_INCOMPLETE;
					*prompt = PROMPT_SINGLEQUOTE;
					break;
1143 1144 1145 1146
				case xe:
					result = PSCAN_INCOMPLETE;
					*prompt = PROMPT_SINGLEQUOTE;
					break;
1147 1148 1149 1150
				case xdolq:
					result = PSCAN_INCOMPLETE;
					*prompt = PROMPT_DOLLARQUOTE;
					break;
1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224
				default:
					/* can't get here */
					fprintf(stderr, "invalid YY_START\n");
					exit(1);
			}
			break;
		case LEXRES_SEMI:		/* semicolon */
			result = PSCAN_SEMICOLON;
			*prompt = PROMPT_READY;
			break;
		case LEXRES_BACKSLASH:	/* backslash */
			result = PSCAN_BACKSLASH;
			*prompt = PROMPT_READY;
			break;
		default:
			/* can't get here */
			fprintf(stderr, "invalid yylex result\n");
			exit(1);
	}

	return result;
}

/*
 * Clean up after scanning a string.  This flushes any unread input and
 * releases resources (but not the PsqlScanState itself).  Note however
 * that this does not reset the lexer scan state; that can be done by
 * psql_scan_reset(), which is an orthogonal operation.
 *
 * It is legal to call this when not scanning anything (makes it easier
 * to deal with error recovery).
 */
void
psql_scan_finish(PsqlScanState state)
{
	/* Drop any incomplete variable expansions. */
	while (state->buffer_stack != NULL)
	{
		StackElem  *stackelem = state->buffer_stack;

		state->buffer_stack = stackelem->next;
		yy_delete_buffer(stackelem->buf);
		free(stackelem->bufstring);
		if (stackelem->origstring)
			free(stackelem->origstring);
		free(stackelem);
	}

	/* Done with the outer scan buffer, too */
	if (state->scanbufhandle)
		yy_delete_buffer(state->scanbufhandle);
	state->scanbufhandle = NULL;
	if (state->scanbuf)
		free(state->scanbuf);
	state->scanbuf = NULL;
}

/*
 * Reset lexer scanning state to start conditions.  This is appropriate
 * for executing \r psql commands (or any other time that we discard the
 * prior contents of query_buf).  It is not, however, necessary to do this
 * when we execute and clear the buffer after getting a PSCAN_SEMICOLON or
 * PSCAN_EOL scan result, because the scan state must be INITIAL when those
 * conditions are returned.
 *
 * Note that this is unrelated to flushing unread input; that task is
 * done by psql_scan_finish().
 */
void
psql_scan_reset(PsqlScanState state)
{
	state->start_state = INITIAL;
	state->paren_depth = 0;
	state->xcdepth = 0;			/* not really necessary */
1225 1226 1227
	if (state->dolqstart)
		free(state->dolqstart);
	state->dolqstart = NULL;
1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369
}

/*
 * Return true if lexer is currently in an "inside quotes" state.
 *
 * This is pretty grotty but is needed to preserve the old behavior
 * that mainloop.c drops blank lines not inside quotes without even
 * echoing them.
 */
bool
psql_scan_in_quote(PsqlScanState state)
{
	return state->start_state != INITIAL;
}

/*
 * Scan the command name of a psql backslash command.  This should be called
 * after psql_scan() returns PSCAN_BACKSLASH.  It is assumed that the input
 * has been consumed through the leading backslash.
 *
 * The return value is a malloc'd copy of the command name, as parsed off
 * from the input.
 */
char *
psql_scan_slash_command(PsqlScanState state)
{
	PQExpBufferData mybuf;
	int			lexresult;

	/* Must be scanning already */
	psql_assert(state->scanbufhandle);

	/* Build a local buffer that we'll return the data of */
	initPQExpBuffer(&mybuf);

	/* Set up static variables that will be used by yylex */
	cur_state = state;
	output_buf = &mybuf;

	if (state->buffer_stack != NULL)
		yy_switch_to_buffer(state->buffer_stack->buf);
	else
		yy_switch_to_buffer(state->scanbufhandle);

	BEGIN(xslashcmd);

	/* And lex. */
	lexresult = yylex();

	/* There are no possible errors in this lex state... */

	return mybuf.data;
}

/*
 * Parse off the next argument for a backslash command, and return it as a
 * malloc'd string.  If there are no more arguments, returns NULL.
 *
 * type tells what processing, if any, to perform on the option string;
 * for example, if it's a SQL identifier, we want to downcase any unquoted
 * letters.
 *
 * if quote is not NULL, *quote is set to 0 if no quoting was found, else
 * the quote symbol.
 *
 * if semicolon is true, unquoted trailing semicolon(s) that would otherwise
 * be taken as part of the option string will be stripped.
 *
 * NOTE: the only possible syntax errors for backslash options are unmatched
 * quotes, which are detected when we run out of input.  Therefore, on a
 * syntax error we just throw away the string and return NULL; there is no
 * need to worry about flushing remaining input.
 */
char *
psql_scan_slash_option(PsqlScanState state,
					   enum slash_option_type type,
					   char *quote,
					   bool semicolon)
{
	PQExpBufferData mybuf;
	int			lexresult;
	char		local_quote;
	bool		badarg;

	/* Must be scanning already */
	psql_assert(state->scanbufhandle);

	if (quote == NULL)
		quote = &local_quote;
	*quote = 0;

	/* Build a local buffer that we'll return the data of */
	initPQExpBuffer(&mybuf);

	/* Set up static variables that will be used by yylex */
	cur_state = state;
	output_buf = &mybuf;
	option_type = type;
	option_quote = quote;

	if (state->buffer_stack != NULL)
		yy_switch_to_buffer(state->buffer_stack->buf);
	else
		yy_switch_to_buffer(state->scanbufhandle);

	if (type == OT_WHOLE_LINE)
		BEGIN(xslashwholeline);
	else
		BEGIN(xslasharg);

	/* And lex. */
	lexresult = yylex();

	/*
	 * Check the lex result: we should have gotten back either LEXRES_OK
	 * or LEXRES_EOL (the latter indicating end of string).  If we were inside
	 * a quoted string, as indicated by YY_START, EOL is an error.
	 */
	psql_assert(lexresult == LEXRES_EOL || lexresult == LEXRES_OK);
	badarg = false;
	switch (YY_START)
	{
		case xslasharg:
			/* empty arg, or possibly a psql variable substitution */
			break;
		case xslashquote:
			if (lexresult != LEXRES_OK)
				badarg = true;		/* hit EOL not ending quote */
			break;
		case xslashbackquote:
			if (lexresult != LEXRES_OK)
				badarg = true;		/* hit EOL not ending quote */
			else
			{
				/* Perform evaluation of backticked command */
				char	   *cmd = mybuf.data;
				FILE	   *fd;
				bool		error = false;
				PQExpBufferData output;
				char		buf[512];
				size_t		result;

1370
				fd = popen(cmd, PG_BINARY_R);
1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459
				if (!fd)
				{
					psql_error("%s: %s\n", cmd, strerror(errno));
					error = true;
				}

				initPQExpBuffer(&output);

				if (!error)
				{
					do
					{
						result = fread(buf, 1, sizeof(buf), fd);
						if (ferror(fd))
						{
							psql_error("%s: %s\n", cmd, strerror(errno));
							error = true;
							break;
						}
						appendBinaryPQExpBuffer(&output, buf, result);
					} while (!feof(fd));
				}

				if (fd && pclose(fd) == -1)
				{
					psql_error("%s: %s\n", cmd, strerror(errno));
					error = true;
				}

				/* Now done with cmd, transfer result to mybuf */
				resetPQExpBuffer(&mybuf);

				if (!error)
				{
					/* strip any trailing newline */
					if (output.len > 0 &&
						output.data[output.len - 1] == '\n')
						output.len--;
					appendBinaryPQExpBuffer(&mybuf, output.data, output.len);
				}

				termPQExpBuffer(&output);
			}
			break;
		case xslashdefaultarg:
			/* Strip any trailing semi-colons if requested */
			if (semicolon)
			{
				while (mybuf.len > 0 &&
					   mybuf.data[mybuf.len - 1] == ';')
				{
					mybuf.data[--mybuf.len] = '\0';
				}
			}

			/*
			 * If SQL identifier processing was requested, then we strip out
			 * excess double quotes and downcase unquoted letters.
			 * Doubled double-quotes become output double-quotes, per spec.
			 *
			 * Note that a string like FOO"BAR"BAZ will be converted to
			 * fooBARbaz; this is somewhat inconsistent with the SQL spec,
			 * which would have us parse it as several identifiers.  But
			 * for psql's purposes, we want a string like "foo"."bar" to
			 * be treated as one option, so there's little choice.
			 */
			if (type == OT_SQLID || type == OT_SQLIDHACK)
			{
				bool		inquotes = false;
				char	   *cp = mybuf.data;

				while (*cp)
				{
					if (*cp == '"')
					{
						if (inquotes && cp[1] == '"')
						{
							/* Keep the first quote, remove the second */
							cp++;
						}
						inquotes = !inquotes;
						/* Collapse out quote at *cp */
						memmove(cp, cp + 1, strlen(cp));
						mybuf.len--;
						/* do not advance cp */
					}
					else
					{
						if (!inquotes && type == OT_SQLID)
1460
							*cp = pg_tolower((unsigned char) *cp);
1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646
						cp += PQmblen(cp, pset.encoding);
					}
				}
			}
			break;
		case xslashquotedarg:
			/* must have hit EOL inside double quotes */
			badarg = true;
			break;
		case xslashwholeline:
			/* always okay */
			break;
		default:
			/* can't get here */
			fprintf(stderr, "invalid YY_START\n");
			exit(1);
	}

	if (badarg)
	{
		psql_error("unterminated quoted string\n");
		termPQExpBuffer(&mybuf);
		return NULL;
	}

	/*
	 * An unquoted empty argument isn't possible unless we are at end of
	 * command.  Return NULL instead.
	 */
	if (mybuf.len == 0 && *quote == 0)
	{
		termPQExpBuffer(&mybuf);
		return NULL;
	}

	/* Else return the completed string. */
	return mybuf.data;
}

/*
 * Eat up any unused \\ to complete a backslash command.
 */
void
psql_scan_slash_command_end(PsqlScanState state)
{
	int			lexresult;

	/* Must be scanning already */
	psql_assert(state->scanbufhandle);

	/* Set up static variables that will be used by yylex */
	cur_state = state;
	output_buf = NULL;

	if (state->buffer_stack != NULL)
		yy_switch_to_buffer(state->buffer_stack->buf);
	else
		yy_switch_to_buffer(state->scanbufhandle);

	BEGIN(xslashend);

	/* And lex. */
	lexresult = yylex();

	/* There are no possible errors in this lex state... */
}

/*
 * "Push back" the passed string so that it will be rescanned by subsequent
 * psql_scan_slash_option calls.  This is presently only used in the case
 * where a single-letter command has been concatenated with its argument.
 *
 * We use the same buffer stack mechanism as for variable expansion.
 */
void
psql_scan_slash_pushback(PsqlScanState state, const char *str)
{
	/* needed for push_new_buffer */
	cur_state = state;

	push_new_buffer(str);
}


/*
 * Push the given string onto the stack of stuff to scan.
 *
 * cur_state must point to the active PsqlScanState.
 *
 * NOTE SIDE EFFECT: the new buffer is made the active flex input buffer.
 */
static void
push_new_buffer(const char *newstr)
{
	StackElem  *stackelem;

	stackelem = (StackElem *) pg_malloc(sizeof(StackElem));
	stackelem->buf = prepare_buffer(newstr, strlen(newstr),
									&stackelem->bufstring);
	cur_state->curline = stackelem->bufstring;
	if (cur_state->safe_encoding)
	{
		stackelem->origstring = NULL;
		cur_state->refline = stackelem->bufstring;
	}
	else
	{
		stackelem->origstring = pg_strdup(newstr);
		cur_state->refline = stackelem->origstring;
	}
	stackelem->next = cur_state->buffer_stack;
	cur_state->buffer_stack = stackelem;
}

/*
 * Set up a flex input buffer to scan the given data.  We always make a
 * copy of the data.  If working in an unsafe encoding, the copy has
 * multibyte sequences replaced by FFs to avoid fooling the lexer rules.
 *
 * cur_state must point to the active PsqlScanState.
 *
 * NOTE SIDE EFFECT: the new buffer is made the active flex input buffer.
 */
static YY_BUFFER_STATE
prepare_buffer(const char *txt, int len, char **txtcopy)
{
	char	   *newtxt;

	/* Flex wants two \0 characters after the actual data */
	newtxt = pg_malloc(len + 2);
	*txtcopy = newtxt;
	newtxt[len] = newtxt[len + 1] = YY_END_OF_BUFFER_CHAR;

	if (cur_state->safe_encoding)
		memcpy(newtxt, txt, len);
	else
	{
		/* Gotta do it the hard way */
		int		i = 0;

		while (i < len)
		{
			int		thislen = PQmblen(txt + i, cur_state->encoding);

			/* first byte should always be okay... */
			newtxt[i] = txt[i];
			i++;
			while (--thislen > 0)
				newtxt[i++] = (char) 0xFF;
		}
	}

	return yy_scan_buffer(newtxt, len + 2);
}

/*
 * emit() --- body for ECHO macro
 *
 * NB: this must be used for ALL and ONLY the text copied from the flex
 * input data.  If you pass it something that is not part of the yytext
 * string, you are making a mistake.  Internally generated text can be
 * appended directly to output_buf.
 */
static void
emit(const char *txt, int len)
{
	if (cur_state->safe_encoding)
		appendBinaryPQExpBuffer(output_buf, txt, len);
	else
	{
		/* Gotta do it the hard way */
		const char *reference = cur_state->refline;
		int		i;

		reference += (txt - cur_state->curline);

		for (i = 0; i < len; i++)
		{
			char	ch = txt[i];

			if (ch == (char) 0xFF)
				ch = reference[i];
			appendPQExpBufferChar(output_buf, ch);
		}
	}
}