psqlscan.l 45.6 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13
%{
/*-------------------------------------------------------------------------
 *
 * psqlscan.l
 *	  lexical scanner for psql
 *
 * This code is mainly needed to determine where the end of a SQL statement
 * is: we are looking for semicolons that are not within quotes, comments,
 * or parentheses.  The most reliable way to handle this is to borrow the
 * backend's flex lexer rules, lock, stock, and barrel.  The rules below
 * are (except for a few) the same as the backend's, but their actions are
 * just ECHO whereas the backend's actions generally do other things.
 *
14 15 16
 * XXX The rules in this file must be kept in sync with the backend lexer!!!
 *
 * XXX Avoid creating backtracking cases --- see the backend lexer for info.
17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
 *
 * The most difficult aspect of this code is that we need to work in multibyte
 * encodings that are not ASCII-safe.  A "safe" encoding is one in which each
 * byte of a multibyte character has the high bit set (it's >= 0x80).  Since
 * all our lexing rules treat all high-bit-set characters alike, we don't
 * really need to care whether such a byte is part of a sequence or not.
 * In an "unsafe" encoding, we still expect the first byte of a multibyte
 * sequence to be >= 0x80, but later bytes might not be.  If we scan such
 * a sequence as-is, the lexing rules could easily be fooled into matching
 * such bytes to ordinary ASCII characters.  Our solution for this is to
 * substitute 0xFF for each non-first byte within the data presented to flex.
 * The flex rules will then pass the FF's through unmolested.  The emit()
 * subroutine is responsible for looking back to the original string and
 * replacing FF's with the corresponding original bytes.
 *
32
 * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
33 34 35
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 * IDENTIFICATION
36
 *	  $PostgreSQL: pgsql/src/bin/psql/psqlscan.l,v 1.33 2010/05/05 22:18:56 tgl Exp $
37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61
 *
 *-------------------------------------------------------------------------
 */
#include "postgres_fe.h"

#include "psqlscan.h"

#include <ctype.h>

#include "common.h"
#include "settings.h"
#include "variables.h"


/*
 * We use a stack of flex buffers to handle substitution of psql variables.
 * Each stacked buffer contains the as-yet-unread text from one psql variable.
 * When we pop the stack all the way, we resume reading from the outer buffer
 * identified by scanbufhandle.
 */
typedef struct StackElem
{
	YY_BUFFER_STATE buf;		/* flex input control structure */
	char	   *bufstring;		/* data actually being scanned by flex */
	char	   *origstring;		/* copy of original data, if needed */
62
	char	   *varname;		/* name of variable providing data, or NULL */
63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
	struct StackElem *next;
} StackElem;

/*
 * All working state of the lexer must be stored in PsqlScanStateData
 * between calls.  This allows us to have multiple open lexer operations,
 * which is needed for nested include files.  The lexer itself is not
 * recursive, but it must be re-entrant.
 */
typedef struct PsqlScanStateData
{
	StackElem  *buffer_stack;	/* stack of variable expansion buffers */
	/*
	 * These variables always refer to the outer buffer, never to any
	 * stacked variable-expansion buffer.
	 */
	YY_BUFFER_STATE scanbufhandle;
	char	   *scanbuf;		/* start of outer-level input buffer */
	const char *scanline;		/* current input line at outer level */

	/* safe_encoding, curline, refline are used by emit() to replace FFs */
	int			encoding;		/* encoding being used now */
	bool		safe_encoding;	/* is current encoding "safe"? */
	const char *curline;		/* actual flex input string for cur buf */
	const char *refline;		/* original data for cur buffer */

	/*
	 * All this state lives across successive input lines, until explicitly
	 * reset by psql_scan_reset.
	 */
	int			start_state;	/* saved YY_START */
	int			paren_depth;	/* depth of nesting in parentheses */
	int			xcdepth;		/* depth of nesting in slash-star comments */
96
	char	   *dolqstart;		/* current $foo$ quote start string */
97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116
} PsqlScanStateData;

static PsqlScanState cur_state;	/* current state while active */

static PQExpBuffer output_buf;	/* current output buffer */

/* these variables do not need to be saved across calls */
static enum slash_option_type option_type;
static char *option_quote;


/* Return values from yylex() */
#define LEXRES_EOL			0	/* end of input */
#define LEXRES_SEMI			1	/* command-terminating semicolon found */
#define LEXRES_BACKSLASH	2	/* backslash command start */
#define LEXRES_OK			3	/* OK completion of backslash argument */


int	yylex(void);

117 118 119
static void push_new_buffer(const char *newstr, const char *varname);
static void pop_buffer_stack(PsqlScanState state);
static bool var_is_current_source(PsqlScanState state, const char *varname);
120 121 122
static YY_BUFFER_STATE prepare_buffer(const char *txt, int len,
									  char **txtcopy);
static void emit(const char *txt, int len);
123
static bool is_utf16_surrogate_first(uint32 c);
124
static void escape_variable(bool as_ident);
125 126 127 128 129 130 131

#define ECHO emit(yytext, yyleng)

%}

%option 8bit
%option never-interactive
132
%option nodefault
133
%option noinput
134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160
%option nounput
%option noyywrap

/*
 * All of the following definitions and rules should exactly match
 * src/backend/parser/scan.l so far as the flex patterns are concerned.
 * The rule bodies are just ECHO as opposed to what the backend does,
 * however.  (But be sure to duplicate code that affects the lexing process,
 * such as BEGIN().)  Also, psqlscan uses a single <<EOF>> rule whereas
 * scan.l has a separate one for each exclusive state.
 */

/*
 * OK, here is a short description of lex/flex rules behavior.
 * The longest pattern which matches an input string is always chosen.
 * For equal-length patterns, the first occurring in the rules list is chosen.
 * INITIAL is the starting state, to which all non-conditional rules apply.
 * Exclusive states change parsing rules while the state is active.  When in
 * an exclusive state, only those rules defined for that state apply.
 *
 * We use exclusive states for quoted strings, extended comments,
 * and to eliminate parsing troubles for numeric strings.
 * Exclusive states:
 *  <xb> bit string literal
 *  <xc> extended C-style comments
 *  <xd> delimited identifiers (double-quoted identifiers)
 *  <xh> hexadecimal numeric string
161 162
 *  <xq> standard quoted strings
 *  <xe> extended quoted strings (support backslash escape sequences)
163
 *  <xdolq> $foo$ quoted strings
164 165
 *  <xui> quoted identifier with Unicode escapes
 *  <xus> quoted string with Unicode escapes
166
 *  <xeu> Unicode surrogate pair in extended quoted string
167 168 169 170 171 172
 */

%x xb
%x xc
%x xd
%x xh
173
%x xe
174
%x xq
175
%x xdolq
176 177
%x xui
%x xus
178
%x xeu
179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201
/* Additional exclusive states for psql only: lex backslash commands */
%x xslashcmd
%x xslasharg
%x xslashquote
%x xslashbackquote
%x xslashdefaultarg
%x xslashquotedarg
%x xslashwholeline
%x xslashend

/*
 * In order to make the world safe for Windows and Mac clients as well as
 * Unix ones, we accept either \n or \r as a newline.  A DOS-style \r\n
 * sequence will be seen as two successive newlines, but that doesn't cause
 * any problems.  Comments that start with -- and extend to the next
 * newline are treated as equivalent to a single whitespace character.
 *
 * NOTE a fine point: if there is no newline following --, we will absorb
 * everything to the end of the input as a comment.  This is correct.  Older
 * versions of Postgres failed to recognize -- as a comment if the input
 * did not end with a newline.
 *
 * XXX perhaps \f (formfeed) should be treated as a newline as well?
202 203 204
 *
 * XXX if you change the set of whitespace characters, fix scanner_isspace()
 * to agree, and see also the plpgsql lexer.
205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226
 */

space			[ \t\n\r\f]
horiz_space		[ \t\f]
newline			[\n\r]
non_newline		[^\n\r]

comment			("--"{non_newline}*)

whitespace		({space}+|{comment})

/*
 * SQL requires at least one newline in the whitespace separating
 * string literals that are to be concatenated.  Silly, but who are we
 * to argue?  Note that {whitespace_with_newline} should not have * after
 * it, whereas {whitespace} should generally have a * after it...
 */

special_whitespace		({space}+|{comment}{newline})
horiz_whitespace		({horiz_space}|{comment})
whitespace_with_newline	({horiz_whitespace}*{newline}{special_whitespace}*)

227 228 229 230 231 232 233 234 235 236 237 238 239 240
/*
 * To ensure that {quotecontinue} can be scanned without having to back up
 * if the full pattern isn't matched, we include trailing whitespace in
 * {quotestop}.  This matches all cases where {quotecontinue} fails to match,
 * except for {quote} followed by whitespace and just one "-" (not two,
 * which would start a {comment}).  To cover that we have {quotefail}.
 * The actions for {quotestop} and {quotefail} must throw back characters
 * beyond the quote proper.
 */
quote			'
quotestop		{quote}{whitespace}*
quotecontinue	{quote}{whitespace_with_newline}{quote}
quotefail		{quote}{whitespace}*"-"

241 242 243 244 245 246 247 248 249 250 251 252
/* Bit string
 * It is tempting to scan the string for only those characters
 * which are allowed. However, this leads to silently swallowed
 * characters if illegal characters are included in the string.
 * For example, if xbinside is [01] then B'ABCD' is interpreted
 * as a zero-length string, and the ABCD' is lost!
 * Better to pass the string forward and let the input routines
 * validate the contents.
 */
xbstart			[bB]{quote}
xbinside		[^']*

253
/* Hexadecimal number */
254 255 256
xhstart			[xX]{quote}
xhinside		[^']*

257
/* National character */
258 259
xnstart			[nN]{quote}

260 261
/* Quoted string that allows backslash escapes */
xestart			[eE]{quote}
262 263 264 265
xeinside		[^\\']+
xeescape		[\\][^0-7]
xeoctesc		[\\][0-7]{1,3}
xehexesc		[\\]x[0-9A-Fa-f]{1,2}
266 267
xeunicode		[\\](u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})
xeunicodefail	[\\](u[0-9A-Fa-f]{0,3}|U[0-9A-Fa-f]{0,7})
268

269
/* Extended quote
270
 * xqdouble implements embedded quote, ''''
271 272 273
 */
xqstart			{quote}
xqdouble		{quote}{quote}
274
xqinside		[^']+
275

276 277 278 279 280
/* $foo$ style quotes ("dollar quoting")
 * The quoted string starts with $foo$ where "foo" is an optional string
 * in the form of an identifier, except that it may not contain "$", 
 * and extends to the first occurrence of an identical string.  
 * There is *no* processing of the quoted text.
281 282 283
 *
 * {dolqfailed} is an error rule to avoid scanner backup when {dolqdelim}
 * fails to match its trailing "$".
284 285 286 287
 */
dolq_start		[A-Za-z\200-\377_]
dolq_cont		[A-Za-z\200-\377_0-9]
dolqdelim		\$({dolq_start}{dolq_cont}*)?\$
288
dolqfailed		\${dolq_start}{dolq_cont}*
289 290
dolqinside		[^$]+

291 292 293 294 295 296 297 298 299
/* Double quote
 * Allows embedded spaces and other special characters into identifiers.
 */
dquote			\"
xdstart			{dquote}
xdstop			{dquote}
xddouble		{dquote}{dquote}
xdinside		[^"]+

300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318
/* Unicode escapes */
uescape			[uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}[^']{quote}
/* error rule to avoid backup */
uescapefail		("-"|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*"-"|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}[^']|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*|[uU][eE][sS][cC][aA][pP]|[uU][eE][sS][cC][aA]|[uU][eE][sS][cC]|[uU][eE][sS]|[uU][eE]|[uU])

/* Quoted identifier with Unicode escapes */
xuistart		[uU]&{dquote}
xuistop1		{dquote}{whitespace}*{uescapefail}?
xuistop2		{dquote}{whitespace}*{uescape}

/* Quoted string with Unicode escapes */
xusstart		[uU]&{quote}
xusstop1		{quote}{whitespace}*{uescapefail}?
xusstop2		{quote}{whitespace}*{uescape}

/* error rule to avoid backup */
xufailed		[uU]&


319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348
/* C-style comments
 *
 * The "extended comment" syntax closely resembles allowable operator syntax.
 * The tricky part here is to get lex to recognize a string starting with
 * slash-star as a comment, when interpreting it as an operator would produce
 * a longer match --- remember lex will prefer a longer match!  Also, if we
 * have something like plus-slash-star, lex will think this is a 3-character
 * operator whereas we want to see it as a + operator and a comment start.
 * The solution is two-fold:
 * 1. append {op_chars}* to xcstart so that it matches as much text as
 *    {operator} would. Then the tie-breaker (first matching rule of same
 *    length) ensures xcstart wins.  We put back the extra stuff with yyless()
 *    in case it contains a star-slash that should terminate the comment.
 * 2. In the operator rule, check for slash-star within the operator, and
 *    if found throw it back with yyless().  This handles the plus-slash-star
 *    problem.
 * Dash-dash comments have similar interactions with the operator rule.
 */
xcstart			\/\*{op_chars}*
xcstop			\*+\/
xcinside		[^*/]+

digit			[0-9]
ident_start		[A-Za-z\200-\377_]
ident_cont		[A-Za-z\200-\377_0-9\$]

identifier		{ident_start}{ident_cont}*

typecast		"::"

349 350 351 352
/* these two token types are used by PL/pgsql, though not in core SQL */
dot_dot			\.\.
colon_equals	":="

353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368
/*
 * "self" is the set of chars that should be returned as single-character
 * tokens.  "op_chars" is the set of chars that can make up "Op" tokens,
 * which can be one or more characters long (but if a single-char token
 * appears in the "self" set, it is not to be returned as an Op).  Note
 * that the sets overlap, but each has some chars that are not in the other.
 *
 * If you change either set, adjust the character lists appearing in the
 * rule for "operator"!
 */
self			[,()\[\].;\:\+\-\*\/\%\^\<\>\=]
op_chars		[\~\!\@\#\^\&\|\`\?\+\-\*\/\%\<\>\=]
operator		{op_chars}+

/* we no longer allow unary minus in numbers. 
 * instead we pass it separately to parser. there it gets
369 370
 * coerced via doNegate() -- Leon aug 20 1999
 *
371 372
 * {decimalfail} is used because we would like "1..10" to lex as 1, dot_dot, 10.
 *
373 374
 * {realfail1} and {realfail2} are added to prevent the need for scanner
 * backup when the {real} rule fails to match completely.
375 376 377 378
 */

integer			{digit}+
decimal			(({digit}*\.{digit}+)|({digit}+\.{digit}*))
379
decimalfail		{digit}+\.\.
380 381 382
real			({integer}|{decimal})[Ee][-+]?{digit}+
realfail1		({integer}|{decimal})[Ee]
realfail2		({integer}|{decimal})[Ee][-+]
383 384 385 386 387 388

param			\${integer}

other			.

/*
389 390
 * Dollar quoted strings are totally opaque, and no escaping is done on them.
 * Other quoted strings must allow some special characters such as single-quote
391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448
 *  and newline.
 * Embedded single-quotes are implemented both in the SQL standard
 *  style of two adjacent single quotes "''" and in the Postgres/Java style
 *  of escaped-quote "\'".
 * Other embedded escaped characters are matched explicitly and the leading
 *  backslash is dropped from the string.
 * Note that xcstart must appear before operator, as explained above!
 *  Also whitespace (comment) must appear before operator.
 */

%%

{whitespace}	{
					/*
					 * Note that the whitespace rule includes both true
					 * whitespace and single-line ("--" style) comments.
					 * We suppress whitespace at the start of the query
					 * buffer.  We also suppress all single-line comments,
					 * which is pretty dubious but is the historical
					 * behavior.
					 */
					if (!(output_buf->len == 0 || yytext[0] == '-'))
						ECHO;
				}

{xcstart}		{
					cur_state->xcdepth = 0;
					BEGIN(xc);
					/* Put back any characters past slash-star; see above */
					yyless(2);
					ECHO;
				}

<xc>{xcstart}	{
					cur_state->xcdepth++;
					/* Put back any characters past slash-star; see above */
					yyless(2);
					ECHO;
				}

<xc>{xcstop}	{
					if (cur_state->xcdepth <= 0)
					{
						BEGIN(INITIAL);
					}
					else
						cur_state->xcdepth--;
					ECHO;
				}

<xc>{xcinside}	{
					ECHO;
				}

<xc>{op_chars}	{
					ECHO;
				}

449 450 451 452
<xc>\*+			{
					ECHO;
				}

453 454 455 456
{xbstart}		{
					BEGIN(xb);
					ECHO;
				}
457 458 459
<xb>{quotestop}	|
<xb>{quotefail} {
					yyless(1);
460 461 462 463 464 465 466
					BEGIN(INITIAL);
					ECHO;
				}
<xh>{xhinside}	|
<xb>{xbinside}	{
					ECHO;
				}
467 468
<xh>{quotecontinue}	|
<xb>{quotecontinue}	{
469 470 471 472 473 474 475 476 477 478 479 480 481
					ECHO;
				}

{xhstart}		{
					/* Hexadecimal bit type.
					 * At some point we should simply pass the string
					 * forward to the parser and label it there.
					 * In the meantime, place a leading "x" on the string
					 * to mark it for the input routine as a hex string.
					 */
					BEGIN(xh);
					ECHO;
				}
482 483 484
<xh>{quotestop}	|
<xh>{quotefail} {
					yyless(1);
485 486 487 488 489
					BEGIN(INITIAL);
					ECHO;
				}

{xnstart}		{
490
					yyless(1);				/* eat only 'n' this time */
491 492 493 494
					ECHO;
				}

{xqstart}		{
495 496 497 498
					if (standard_strings())
						BEGIN(xq);
					else
						BEGIN(xe);
499 500
					ECHO;
				}
501
{xestart}		{
502
					BEGIN(xe);
503 504
					ECHO;
				}
505 506 507 508
{xusstart}		{
					BEGIN(xus);
					ECHO;
				}
509 510
<xq,xe>{quotestop}	|
<xq,xe>{quotefail} {
511
					yyless(1);
512 513 514
					BEGIN(INITIAL);
					ECHO;
				}
515 516 517 518 519 520 521 522 523 524
<xus>{xusstop1} {
					yyless(1);
					BEGIN(INITIAL);
					ECHO;
				}
<xus>{xusstop2} {
					BEGIN(INITIAL);
					ECHO;
				}
<xq,xe,xus>{xqdouble} {
525 526
					ECHO;
				}
527
<xq,xus>{xqinside}  {
528 529
					ECHO;
				}
530 531 532
<xe>{xeinside}  {
					ECHO;
				}
533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548
<xe>{xeunicode} {
					uint32 c = strtoul(yytext+2, NULL, 16);

					if (is_utf16_surrogate_first(c))
						BEGIN(xeu);
					ECHO;
				}
<xeu>{xeunicode} {
					BEGIN(xe);
					ECHO;
				}
<xeu>.			{ ECHO; }
<xeu>\n			{ ECHO; }
<xe,xeu>{xeunicodefail}	{
					ECHO;
				}
549
<xe>{xeescape}  {
550 551
					ECHO;
				}
552
<xe>{xeoctesc}  {
553 554
					ECHO;
				}
555
<xe>{xehexesc}  {
556 557
					ECHO;
				}
558
<xq,xe,xus>{quotecontinue} {
559 560
					ECHO;
				}
561
<xe>.			{
562 563 564 565 566 567 568 569 570
					/* This is only needed for \ just before EOF */
					ECHO;
				}

{dolqdelim}		{
					cur_state->dolqstart = pg_strdup(yytext);
					BEGIN(xdolq);
					ECHO;
				}
571 572 573 574 575
{dolqfailed}	{
					/* throw back all but the initial "$" */
					yyless(1);
					ECHO;
				}
576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596
<xdolq>{dolqdelim} {
					if (strcmp(yytext, cur_state->dolqstart) == 0)
					{
						free(cur_state->dolqstart);
						cur_state->dolqstart = NULL;
						BEGIN(INITIAL);
					}
					else
					{
						/*
						 * When we fail to match $...$ to dolqstart, transfer
						 * the $... part to the output, but put back the final
						 * $ for rescanning.  Consider $delim$...$junk$delim$
						 */
						yyless(yyleng-1);
					}
					ECHO;
				}
<xdolq>{dolqinside} {
					ECHO;
				}
597 598 599
<xdolq>{dolqfailed} {
					ECHO;
				}
600 601 602 603
<xdolq>.		{
					/* This is only needed for $ inside the quoted text */
					ECHO;
				}
604 605 606 607 608

{xdstart}		{
					BEGIN(xd);
					ECHO;
				}
609 610 611 612
{xuistart}		{
					BEGIN(xui);
					ECHO;
				}
613 614 615 616
<xd>{xdstop}	{
					BEGIN(INITIAL);
					ECHO;
				}
617 618 619 620 621 622 623
<xui>{xuistop1}	{
					yyless(1);
					BEGIN(INITIAL);
					ECHO;
				}
<xui>{xuistop2}	{
					BEGIN(INITIAL);
624 625
					ECHO;
				}
626 627 628 629 630 631 632 633 634 635
<xd,xui>{xddouble}	{
					ECHO;
				}
<xd,xui>{xdinside}	{
					ECHO;
				}

{xufailed}	{
					/* throw back all but the initial u/U */
					yyless(1);
636 637 638 639 640 641 642
					ECHO;
				}

{typecast}		{
					ECHO;
				}

643 644 645 646 647 648 649 650
{dot_dot}		{
					ECHO;
				}

{colon_equals}	{
					ECHO;
				}

651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693
	/*
	 * These rules are specific to psql --- they implement parenthesis
	 * counting and detection of command-ending semicolon.  These must
	 * appear before the {self} rule so that they take precedence over it.
	 */

"("				{
					cur_state->paren_depth++;
					ECHO;
				}

")"				{
					if (cur_state->paren_depth > 0)
						cur_state->paren_depth--;
					ECHO;
				}

";"				{
					ECHO;
					if (cur_state->paren_depth == 0)
					{
						/* Terminate lexing temporarily */
						return LEXRES_SEMI;
					}
				}

	/*
	 * psql-specific rules to handle backslash commands and variable
	 * substitution.  We want these before {self}, also.
	 */

"\\"[;:]		{
					/* Force a semicolon or colon into the query buffer */
					emit(yytext + 1, 1);
				}

"\\"			{
					/* Terminate lexing temporarily */
					return LEXRES_BACKSLASH;
				}

:[A-Za-z0-9_]+	{
					/* Possible psql variable substitution */
694
					const char *varname = yytext + 1;
695 696
					const char *value;

697
					value = GetVariable(pset.vars, varname);
698 699 700

					if (value)
					{
701 702 703 704 705 706 707 708 709 710 711 712 713 714 715
						/* It is a variable, check for recursion */
						if (var_is_current_source(cur_state, varname))
						{
							/* Recursive expansion --- don't go there */
							psql_error("skipping recursive expansion of variable \"%s\"\n",
									   varname);
							/* Instead copy the string as is */
							ECHO;
						}
						else
						{
							/* OK, perform substitution */
							push_new_buffer(value, varname);
							/* yy_scan_string already made buffer active */
						}
716 717 718 719 720 721 722 723 724 725 726
					}
					else
					{
						/*
						 * if the variable doesn't exist we'll copy the
						 * string as is
						 */
						ECHO;
					}
				}

727 728 729 730 731 732 733 734
:'[A-Za-z0-9_]+'	{
					escape_variable(false);
				}

:\"[A-Za-z0-9_]+\"	{
					escape_variable(true);
				}

735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806
	/*
	 * Back to backend-compatible rules.
	 */

{self}			{
					ECHO;
				}

{operator}		{
					/*
					 * Check for embedded slash-star or dash-dash; those
					 * are comment starts, so operator must stop there.
					 * Note that slash-star or dash-dash at the first
					 * character will match a prior rule, not this one.
					 */
					int		nchars = yyleng;
					char   *slashstar = strstr(yytext, "/*");
					char   *dashdash = strstr(yytext, "--");

					if (slashstar && dashdash)
					{
						/* if both appear, take the first one */
						if (slashstar > dashdash)
							slashstar = dashdash;
					}
					else if (!slashstar)
						slashstar = dashdash;
					if (slashstar)
						nchars = slashstar - yytext;

					/*
					 * For SQL compatibility, '+' and '-' cannot be the
					 * last char of a multi-char operator unless the operator
					 * contains chars that are not in SQL operators.
					 * The idea is to lex '=-' as two operators, but not
					 * to forbid operator names like '?-' that could not be
					 * sequences of SQL operators.
					 */
					while (nchars > 1 &&
						   (yytext[nchars-1] == '+' ||
							yytext[nchars-1] == '-'))
					{
						int		ic;

						for (ic = nchars-2; ic >= 0; ic--)
						{
							if (strchr("~!@#^&|`?%", yytext[ic]))
								break;
						}
						if (ic >= 0)
							break; /* found a char that makes it OK */
						nchars--; /* else remove the +/-, and check again */
					}

					if (nchars < yyleng)
					{
						/* Strip the unwanted chars from the token */
						yyless(nchars);
					}
					ECHO;
				}

{param}			{
					ECHO;
				}

{integer}		{
					ECHO;
				}
{decimal}		{
					ECHO;
				}
807 808 809 810 811
{decimalfail}	{
					/* throw back the .., and treat as integer */
					yyless(yyleng-2);
					ECHO;
				}
812 813 814
{real}			{
					ECHO;
				}
815 816 817 818 819 820 821 822 823 824 825 826 827 828 829
{realfail1}		{
					/*
					 * throw back the [Ee], and treat as {decimal}.  Note
					 * that it is possible the input is actually {integer},
					 * but since this case will almost certainly lead to a
					 * syntax error anyway, we don't bother to distinguish.
					 */
					yyless(yyleng-1);
					ECHO;
				}
{realfail2}		{
					/* throw back the [Ee][+-], and proceed as above */
					yyless(yyleng-2);
					ECHO;
				}
830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854


{identifier}	{
					ECHO;
				}

{other}			{
					ECHO;
				}


	/*
	 * Everything from here down is psql-specific.
	 */

<<EOF>>			{
					StackElem  *stackelem = cur_state->buffer_stack;

					if (stackelem == NULL)
						return LEXRES_EOL; /* end of input reached */

					/*
					 * We were expanding a variable, so pop the inclusion
					 * stack and keep lexing
					 */
855
					pop_buffer_stack(cur_state);
856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911

					stackelem = cur_state->buffer_stack;
					if (stackelem != NULL)
					{
						yy_switch_to_buffer(stackelem->buf);
						cur_state->curline = stackelem->bufstring;
						cur_state->refline = stackelem->origstring ? stackelem->origstring : stackelem->bufstring;
					}
					else
					{
						yy_switch_to_buffer(cur_state->scanbufhandle);
						cur_state->curline = cur_state->scanbuf;
						cur_state->refline = cur_state->scanline;
					}
				}

	/*
	 * Exclusive lexer states to handle backslash command lexing
	 */

<xslashcmd>{
	/* command name ends at whitespace or backslash; eat all else */

{space}|"\\"	{
					yyless(0);
					return LEXRES_OK;
				}

{other}			{ ECHO; }

}

<xslasharg>{
	/* eat any whitespace, then decide what to do at first nonblank */

{space}+		{ }

"\\"			{
					/*
					 * backslash is end of command or next command, do not eat
					 *
					 * XXX this means we can't conveniently accept options
					 * that start with a backslash; therefore, option
					 * processing that encourages use of backslashes is rather
					 * broken.
					 */
					yyless(0);
					return LEXRES_OK;
				}

{quote}			{
					*option_quote = '\'';
					BEGIN(xslashquote);
				}

"`"				{
912 913 914 915 916 917 918 919 920 921 922
					if (option_type == OT_VERBATIM)
					{
						/* in verbatim mode, backquote is not special */
						ECHO;
						BEGIN(xslashdefaultarg);
					}
					else
					{
						*option_quote = '`';
						BEGIN(xslashbackquote);
					}
923 924 925 926
				}

:[A-Za-z0-9_]*	{
					/* Possible psql variable substitution */
927 928 929 930 931
					if (option_type == OT_VERBATIM)
						ECHO;
					else
					{
						const char *value;
932

933
						value = GetVariable(pset.vars, yytext + 1);
934

935 936 937 938 939
						/*
						 * The variable value is just emitted without any
						 * further examination.  This is consistent with the
						 * pre-8.0 code behavior, if not with the way that
						 * variables are handled outside backslash commands.
940
						 * Note that we needn't guard against recursion here.
941 942 943 944
						 */
						if (value)
							appendPQExpBufferStr(output_buf, value);
					}
945 946 947 948 949 950

					*option_quote = ':';

					return LEXRES_OK;
				}

951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971
:'[A-Za-z0-9_]+'	{
					if (option_type == OT_VERBATIM)
						ECHO;
					else
					{
						escape_variable(false);
						return LEXRES_OK;
					}
				}


:\"[A-Za-z0-9_]+\"	{
					if (option_type == OT_VERBATIM)
						ECHO;
					else
					{
						escape_variable(true);
						return LEXRES_OK;
					}
				}

972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999
"|"				{
					ECHO;
					if (option_type == OT_FILEPIPE)
					{
						/* treat like whole-string case */
						BEGIN(xslashwholeline);
					}
					else
					{
						/* treat like default case */
						BEGIN(xslashdefaultarg);
					}
				}

{dquote}		{
					*option_quote = '"';
					ECHO;
					BEGIN(xslashquotedarg);
				}

{other}			{
					ECHO;
					BEGIN(xslashdefaultarg);
				}

}

<xslashquote>{
1000 1001 1002 1003
	/*
	 * single-quoted text: copy literally except for '' and backslash
	 * sequences
	 */
1004 1005 1006

{quote}			{ return LEXRES_OK; }

1007
{xqdouble}		{ appendPQExpBufferChar(output_buf, '\''); }
1008

1009 1010 1011 1012 1013 1014
"\\n"			{ appendPQExpBufferChar(output_buf, '\n'); }
"\\t"			{ appendPQExpBufferChar(output_buf, '\t'); }
"\\b"			{ appendPQExpBufferChar(output_buf, '\b'); }
"\\r"			{ appendPQExpBufferChar(output_buf, '\r'); }
"\\f"			{ appendPQExpBufferChar(output_buf, '\f'); }

1015
{xeoctesc}		{
1016 1017
					/* octal case */
					appendPQExpBufferChar(output_buf,
1018
										  (char) strtol(yytext + 1, NULL, 8));
1019 1020
				}

1021
{xehexesc}		{
1022 1023 1024 1025 1026
					/* hex case */
					appendPQExpBufferChar(output_buf,
										  (char) strtol(yytext + 2, NULL, 16));
				}

1027 1028
"\\".			{ emit(yytext + 1, 1); }

1029
{other}|\n		{ ECHO; }
1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040

}

<xslashbackquote>{
	/*
	 * backticked text: copy everything until next backquote or end of line.
	 * Invocation of the command will happen in psql_scan_slash_option.
	 */

"`"				{ return LEXRES_OK; }

1041
{other}|\n		{ ECHO; }
1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060

}

<xslashdefaultarg>{
	/*
	 * Copy everything until unquoted whitespace or end of line.  Quotes
	 * do not get stripped yet.
	 */

{space}			{
					yyless(0);
					return LEXRES_OK;
				}

"\\"			{
					/*
					 * unquoted backslash is end of command or next command,
					 * do not eat
					 *
1061
					 * (this was not the behavior pre-8.0, but it seems
1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085
					 * consistent)
					 */
					yyless(0);
					return LEXRES_OK;
				}

{dquote}		{
					*option_quote = '"';
					ECHO;
					BEGIN(xslashquotedarg);
				}

{other}			{ ECHO; }

}

<xslashquotedarg>{
	/* double-quoted text within a default-type argument: copy */

{dquote}		{
					ECHO;
					BEGIN(xslashdefaultarg);
				}

1086
{other}|\n		{ ECHO; }
1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107

}

<xslashwholeline>{
	/* copy everything until end of input line */
	/* but suppress leading whitespace */

{space}+		{
					if (output_buf->len > 0)
						ECHO;
				}

{other}			{ ECHO; }

}

<xslashend>{
	/* at end of command, eat a double backslash, but not anything else */

"\\\\"			{ return LEXRES_OK; }

1108
{other}|\n		{
1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139
					yyless(0);
					return LEXRES_OK;
				}

}

%%

/*
 * Create a lexer working state struct.
 */
PsqlScanState
psql_scan_create(void)
{
	PsqlScanState state;

	state = (PsqlScanStateData *) pg_malloc_zero(sizeof(PsqlScanStateData));

	psql_scan_reset(state);

	return state;
}

/*
 * Destroy a lexer working state struct, releasing all resources.
 */
void
psql_scan_destroy(PsqlScanState state)
{
	psql_scan_finish(state);

1140 1141
	psql_scan_reset(state);

1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163
	free(state);
}

/*
 * Set up to perform lexing of the given input line.
 *
 * The text at *line, extending for line_len bytes, will be scanned by
 * subsequent calls to the psql_scan routines.  psql_scan_finish should
 * be called when scanning is complete.  Note that the lexer retains
 * a pointer to the storage at *line --- this string must not be altered
 * or freed until after psql_scan_finish is called.
 */
void
psql_scan_setup(PsqlScanState state,
				const char *line, int line_len)
{
	/* Mustn't be scanning already */
	psql_assert(state->scanbufhandle == NULL);
	psql_assert(state->buffer_stack == NULL);

	/* Do we need to hack the character set encoding? */
	state->encoding = pset.encoding;
1164
	state->safe_encoding = pg_valid_server_encoding_id(state->encoding);
1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284

	/* needed for prepare_buffer */
	cur_state = state;

	/* Set up flex input buffer with appropriate translation and padding */
	state->scanbufhandle = prepare_buffer(line, line_len,
										  &state->scanbuf);
	state->scanline = line;

	/* Set lookaside data in case we have to map unsafe encoding */
	state->curline = state->scanbuf;
	state->refline = state->scanline;
}

/*
 * Do lexical analysis of SQL command text.
 *
 * The text previously passed to psql_scan_setup is scanned, and appended
 * (possibly with transformation) to query_buf.
 *
 * The return value indicates the condition that stopped scanning:
 *
 * PSCAN_SEMICOLON: found a command-ending semicolon.  (The semicolon is
 * transferred to query_buf.)  The command accumulated in query_buf should
 * be executed, then clear query_buf and call again to scan the remainder
 * of the line.
 *
 * PSCAN_BACKSLASH: found a backslash that starts a psql special command.
 * Any previous data on the line has been transferred to query_buf.
 * The caller will typically next call psql_scan_slash_command(),
 * perhaps psql_scan_slash_option(), and psql_scan_slash_command_end().
 *
 * PSCAN_INCOMPLETE: the end of the line was reached, but we have an
 * incomplete SQL command.  *prompt is set to the appropriate prompt type.
 *
 * PSCAN_EOL: the end of the line was reached, and there is no lexical
 * reason to consider the command incomplete.  The caller may or may not
 * choose to send it.  *prompt is set to the appropriate prompt type if
 * the caller chooses to collect more input.
 *
 * In the PSCAN_INCOMPLETE and PSCAN_EOL cases, psql_scan_finish() should
 * be called next, then the cycle may be repeated with a fresh input line.
 *
 * In all cases, *prompt is set to an appropriate prompt type code for the
 * next line-input operation.
 */
PsqlScanResult
psql_scan(PsqlScanState state,
		  PQExpBuffer query_buf,
		  promptStatus_t *prompt)
{
	PsqlScanResult result;
	int			lexresult;

	/* Must be scanning already */
	psql_assert(state->scanbufhandle);

	/* Set up static variables that will be used by yylex */
	cur_state = state;
	output_buf = query_buf;

	if (state->buffer_stack != NULL)
		yy_switch_to_buffer(state->buffer_stack->buf);
	else
		yy_switch_to_buffer(state->scanbufhandle);

	BEGIN(state->start_state);

	/* And lex. */
	lexresult = yylex();

	/* Update static vars back to the state struct */
	state->start_state = YY_START;

	/*
	 * Check termination state and return appropriate result info.
	 */
	switch (lexresult)
	{
		case LEXRES_EOL:		/* end of input */
			switch (state->start_state)
			{
				case INITIAL:
					if (state->paren_depth > 0)
					{
						result = PSCAN_INCOMPLETE;
						*prompt = PROMPT_PAREN;
					}
					else if (query_buf->len > 0)
					{
						result = PSCAN_EOL;
						*prompt = PROMPT_CONTINUE;
					}
					else
					{
						/* never bother to send an empty buffer */
						result = PSCAN_INCOMPLETE;
						*prompt = PROMPT_READY;
					}
					break;
				case xb:
					result = PSCAN_INCOMPLETE;
					*prompt = PROMPT_SINGLEQUOTE;
					break;
				case xc:
					result = PSCAN_INCOMPLETE;
					*prompt = PROMPT_COMMENT;
					break;
				case xd:
					result = PSCAN_INCOMPLETE;
					*prompt = PROMPT_DOUBLEQUOTE;
					break;
				case xh:
					result = PSCAN_INCOMPLETE;
					*prompt = PROMPT_SINGLEQUOTE;
					break;
				case xq:
					result = PSCAN_INCOMPLETE;
					*prompt = PROMPT_SINGLEQUOTE;
					break;
1285 1286 1287 1288
				case xe:
					result = PSCAN_INCOMPLETE;
					*prompt = PROMPT_SINGLEQUOTE;
					break;
1289 1290 1291 1292
				case xdolq:
					result = PSCAN_INCOMPLETE;
					*prompt = PROMPT_DOLLARQUOTE;
					break;
1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329
				default:
					/* can't get here */
					fprintf(stderr, "invalid YY_START\n");
					exit(1);
			}
			break;
		case LEXRES_SEMI:		/* semicolon */
			result = PSCAN_SEMICOLON;
			*prompt = PROMPT_READY;
			break;
		case LEXRES_BACKSLASH:	/* backslash */
			result = PSCAN_BACKSLASH;
			*prompt = PROMPT_READY;
			break;
		default:
			/* can't get here */
			fprintf(stderr, "invalid yylex result\n");
			exit(1);
	}

	return result;
}

/*
 * Clean up after scanning a string.  This flushes any unread input and
 * releases resources (but not the PsqlScanState itself).  Note however
 * that this does not reset the lexer scan state; that can be done by
 * psql_scan_reset(), which is an orthogonal operation.
 *
 * It is legal to call this when not scanning anything (makes it easier
 * to deal with error recovery).
 */
void
psql_scan_finish(PsqlScanState state)
{
	/* Drop any incomplete variable expansions. */
	while (state->buffer_stack != NULL)
1330
		pop_buffer_stack(state);
1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357

	/* Done with the outer scan buffer, too */
	if (state->scanbufhandle)
		yy_delete_buffer(state->scanbufhandle);
	state->scanbufhandle = NULL;
	if (state->scanbuf)
		free(state->scanbuf);
	state->scanbuf = NULL;
}

/*
 * Reset lexer scanning state to start conditions.  This is appropriate
 * for executing \r psql commands (or any other time that we discard the
 * prior contents of query_buf).  It is not, however, necessary to do this
 * when we execute and clear the buffer after getting a PSCAN_SEMICOLON or
 * PSCAN_EOL scan result, because the scan state must be INITIAL when those
 * conditions are returned.
 *
 * Note that this is unrelated to flushing unread input; that task is
 * done by psql_scan_finish().
 */
void
psql_scan_reset(PsqlScanState state)
{
	state->start_state = INITIAL;
	state->paren_depth = 0;
	state->xcdepth = 0;			/* not really necessary */
1358 1359 1360
	if (state->dolqstart)
		free(state->dolqstart);
	state->dolqstart = NULL;
1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502
}

/*
 * Return true if lexer is currently in an "inside quotes" state.
 *
 * This is pretty grotty but is needed to preserve the old behavior
 * that mainloop.c drops blank lines not inside quotes without even
 * echoing them.
 */
bool
psql_scan_in_quote(PsqlScanState state)
{
	return state->start_state != INITIAL;
}

/*
 * Scan the command name of a psql backslash command.  This should be called
 * after psql_scan() returns PSCAN_BACKSLASH.  It is assumed that the input
 * has been consumed through the leading backslash.
 *
 * The return value is a malloc'd copy of the command name, as parsed off
 * from the input.
 */
char *
psql_scan_slash_command(PsqlScanState state)
{
	PQExpBufferData mybuf;
	int			lexresult;

	/* Must be scanning already */
	psql_assert(state->scanbufhandle);

	/* Build a local buffer that we'll return the data of */
	initPQExpBuffer(&mybuf);

	/* Set up static variables that will be used by yylex */
	cur_state = state;
	output_buf = &mybuf;

	if (state->buffer_stack != NULL)
		yy_switch_to_buffer(state->buffer_stack->buf);
	else
		yy_switch_to_buffer(state->scanbufhandle);

	BEGIN(xslashcmd);

	/* And lex. */
	lexresult = yylex();

	/* There are no possible errors in this lex state... */

	return mybuf.data;
}

/*
 * Parse off the next argument for a backslash command, and return it as a
 * malloc'd string.  If there are no more arguments, returns NULL.
 *
 * type tells what processing, if any, to perform on the option string;
 * for example, if it's a SQL identifier, we want to downcase any unquoted
 * letters.
 *
 * if quote is not NULL, *quote is set to 0 if no quoting was found, else
 * the quote symbol.
 *
 * if semicolon is true, unquoted trailing semicolon(s) that would otherwise
 * be taken as part of the option string will be stripped.
 *
 * NOTE: the only possible syntax errors for backslash options are unmatched
 * quotes, which are detected when we run out of input.  Therefore, on a
 * syntax error we just throw away the string and return NULL; there is no
 * need to worry about flushing remaining input.
 */
char *
psql_scan_slash_option(PsqlScanState state,
					   enum slash_option_type type,
					   char *quote,
					   bool semicolon)
{
	PQExpBufferData mybuf;
	int			lexresult;
	char		local_quote;
	bool		badarg;

	/* Must be scanning already */
	psql_assert(state->scanbufhandle);

	if (quote == NULL)
		quote = &local_quote;
	*quote = 0;

	/* Build a local buffer that we'll return the data of */
	initPQExpBuffer(&mybuf);

	/* Set up static variables that will be used by yylex */
	cur_state = state;
	output_buf = &mybuf;
	option_type = type;
	option_quote = quote;

	if (state->buffer_stack != NULL)
		yy_switch_to_buffer(state->buffer_stack->buf);
	else
		yy_switch_to_buffer(state->scanbufhandle);

	if (type == OT_WHOLE_LINE)
		BEGIN(xslashwholeline);
	else
		BEGIN(xslasharg);

	/* And lex. */
	lexresult = yylex();

	/*
	 * Check the lex result: we should have gotten back either LEXRES_OK
	 * or LEXRES_EOL (the latter indicating end of string).  If we were inside
	 * a quoted string, as indicated by YY_START, EOL is an error.
	 */
	psql_assert(lexresult == LEXRES_EOL || lexresult == LEXRES_OK);
	badarg = false;
	switch (YY_START)
	{
		case xslasharg:
			/* empty arg, or possibly a psql variable substitution */
			break;
		case xslashquote:
			if (lexresult != LEXRES_OK)
				badarg = true;		/* hit EOL not ending quote */
			break;
		case xslashbackquote:
			if (lexresult != LEXRES_OK)
				badarg = true;		/* hit EOL not ending quote */
			else
			{
				/* Perform evaluation of backticked command */
				char	   *cmd = mybuf.data;
				FILE	   *fd;
				bool		error = false;
				PQExpBufferData output;
				char		buf[512];
				size_t		result;

1503
				fd = popen(cmd, PG_BINARY_R);
1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532
				if (!fd)
				{
					psql_error("%s: %s\n", cmd, strerror(errno));
					error = true;
				}

				initPQExpBuffer(&output);

				if (!error)
				{
					do
					{
						result = fread(buf, 1, sizeof(buf), fd);
						if (ferror(fd))
						{
							psql_error("%s: %s\n", cmd, strerror(errno));
							error = true;
							break;
						}
						appendBinaryPQExpBuffer(&output, buf, result);
					} while (!feof(fd));
				}

				if (fd && pclose(fd) == -1)
				{
					psql_error("%s: %s\n", cmd, strerror(errno));
					error = true;
				}

1533 1534 1535 1536 1537 1538
				if (PQExpBufferBroken(&output))
				{
					psql_error("%s: out of memory\n", cmd);
					error = true;
				}

1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598
				/* Now done with cmd, transfer result to mybuf */
				resetPQExpBuffer(&mybuf);

				if (!error)
				{
					/* strip any trailing newline */
					if (output.len > 0 &&
						output.data[output.len - 1] == '\n')
						output.len--;
					appendBinaryPQExpBuffer(&mybuf, output.data, output.len);
				}

				termPQExpBuffer(&output);
			}
			break;
		case xslashdefaultarg:
			/* Strip any trailing semi-colons if requested */
			if (semicolon)
			{
				while (mybuf.len > 0 &&
					   mybuf.data[mybuf.len - 1] == ';')
				{
					mybuf.data[--mybuf.len] = '\0';
				}
			}

			/*
			 * If SQL identifier processing was requested, then we strip out
			 * excess double quotes and downcase unquoted letters.
			 * Doubled double-quotes become output double-quotes, per spec.
			 *
			 * Note that a string like FOO"BAR"BAZ will be converted to
			 * fooBARbaz; this is somewhat inconsistent with the SQL spec,
			 * which would have us parse it as several identifiers.  But
			 * for psql's purposes, we want a string like "foo"."bar" to
			 * be treated as one option, so there's little choice.
			 */
			if (type == OT_SQLID || type == OT_SQLIDHACK)
			{
				bool		inquotes = false;
				char	   *cp = mybuf.data;

				while (*cp)
				{
					if (*cp == '"')
					{
						if (inquotes && cp[1] == '"')
						{
							/* Keep the first quote, remove the second */
							cp++;
						}
						inquotes = !inquotes;
						/* Collapse out quote at *cp */
						memmove(cp, cp + 1, strlen(cp));
						mybuf.len--;
						/* do not advance cp */
					}
					else
					{
						if (!inquotes && type == OT_SQLID)
1599
							*cp = pg_tolower((unsigned char) *cp);
1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675
						cp += PQmblen(cp, pset.encoding);
					}
				}
			}
			break;
		case xslashquotedarg:
			/* must have hit EOL inside double quotes */
			badarg = true;
			break;
		case xslashwholeline:
			/* always okay */
			break;
		default:
			/* can't get here */
			fprintf(stderr, "invalid YY_START\n");
			exit(1);
	}

	if (badarg)
	{
		psql_error("unterminated quoted string\n");
		termPQExpBuffer(&mybuf);
		return NULL;
	}

	/*
	 * An unquoted empty argument isn't possible unless we are at end of
	 * command.  Return NULL instead.
	 */
	if (mybuf.len == 0 && *quote == 0)
	{
		termPQExpBuffer(&mybuf);
		return NULL;
	}

	/* Else return the completed string. */
	return mybuf.data;
}

/*
 * Eat up any unused \\ to complete a backslash command.
 */
void
psql_scan_slash_command_end(PsqlScanState state)
{
	int			lexresult;

	/* Must be scanning already */
	psql_assert(state->scanbufhandle);

	/* Set up static variables that will be used by yylex */
	cur_state = state;
	output_buf = NULL;

	if (state->buffer_stack != NULL)
		yy_switch_to_buffer(state->buffer_stack->buf);
	else
		yy_switch_to_buffer(state->scanbufhandle);

	BEGIN(xslashend);

	/* And lex. */
	lexresult = yylex();

	/* There are no possible errors in this lex state... */
}


/*
 * Push the given string onto the stack of stuff to scan.
 *
 * cur_state must point to the active PsqlScanState.
 *
 * NOTE SIDE EFFECT: the new buffer is made the active flex input buffer.
 */
static void
1676
push_new_buffer(const char *newstr, const char *varname)
1677 1678 1679 1680
{
	StackElem  *stackelem;

	stackelem = (StackElem *) pg_malloc(sizeof(StackElem));
1681 1682 1683 1684 1685 1686 1687 1688

	/*
	 * In current usage, the passed varname points at the current flex
	 * input buffer; we must copy it before calling prepare_buffer()
	 * because that will change the buffer state.
	 */
	stackelem->varname = varname ? pg_strdup(varname) : NULL;

1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705
	stackelem->buf = prepare_buffer(newstr, strlen(newstr),
									&stackelem->bufstring);
	cur_state->curline = stackelem->bufstring;
	if (cur_state->safe_encoding)
	{
		stackelem->origstring = NULL;
		cur_state->refline = stackelem->bufstring;
	}
	else
	{
		stackelem->origstring = pg_strdup(newstr);
		cur_state->refline = stackelem->origstring;
	}
	stackelem->next = cur_state->buffer_stack;
	cur_state->buffer_stack = stackelem;
}

1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745
/*
 * Pop the topmost buffer stack item (there must be one!)
 *
 * NB: after this, the flex input state is unspecified; caller must
 * switch to an appropriate buffer to continue lexing.
 */
static void
pop_buffer_stack(PsqlScanState state)
{
	StackElem  *stackelem = state->buffer_stack;

	state->buffer_stack = stackelem->next;
	yy_delete_buffer(stackelem->buf);
	free(stackelem->bufstring);
	if (stackelem->origstring)
		free(stackelem->origstring);
	if (stackelem->varname)
		free(stackelem->varname);
	free(stackelem);
}

/*
 * Check if specified variable name is the source for any string
 * currently being scanned
 */
static bool
var_is_current_source(PsqlScanState state, const char *varname)
{
	StackElem  *stackelem;

	for (stackelem = state->buffer_stack;
		 stackelem != NULL;
		 stackelem = stackelem->next)
	{
		if (stackelem->varname && strcmp(stackelem->varname, varname) == 0)
			return true;
	}
	return false;
}

1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817
/*
 * Set up a flex input buffer to scan the given data.  We always make a
 * copy of the data.  If working in an unsafe encoding, the copy has
 * multibyte sequences replaced by FFs to avoid fooling the lexer rules.
 *
 * cur_state must point to the active PsqlScanState.
 *
 * NOTE SIDE EFFECT: the new buffer is made the active flex input buffer.
 */
static YY_BUFFER_STATE
prepare_buffer(const char *txt, int len, char **txtcopy)
{
	char	   *newtxt;

	/* Flex wants two \0 characters after the actual data */
	newtxt = pg_malloc(len + 2);
	*txtcopy = newtxt;
	newtxt[len] = newtxt[len + 1] = YY_END_OF_BUFFER_CHAR;

	if (cur_state->safe_encoding)
		memcpy(newtxt, txt, len);
	else
	{
		/* Gotta do it the hard way */
		int		i = 0;

		while (i < len)
		{
			int		thislen = PQmblen(txt + i, cur_state->encoding);

			/* first byte should always be okay... */
			newtxt[i] = txt[i];
			i++;
			while (--thislen > 0)
				newtxt[i++] = (char) 0xFF;
		}
	}

	return yy_scan_buffer(newtxt, len + 2);
}

/*
 * emit() --- body for ECHO macro
 *
 * NB: this must be used for ALL and ONLY the text copied from the flex
 * input data.  If you pass it something that is not part of the yytext
 * string, you are making a mistake.  Internally generated text can be
 * appended directly to output_buf.
 */
static void
emit(const char *txt, int len)
{
	if (cur_state->safe_encoding)
		appendBinaryPQExpBuffer(output_buf, txt, len);
	else
	{
		/* Gotta do it the hard way */
		const char *reference = cur_state->refline;
		int		i;

		reference += (txt - cur_state->curline);

		for (i = 0; i < len; i++)
		{
			char	ch = txt[i];

			if (ch == (char) 0xFF)
				ch = reference[i];
			appendPQExpBufferChar(output_buf, ch);
		}
	}
}
1818 1819 1820 1821 1822 1823

static bool
is_utf16_surrogate_first(uint32 c)
{
	return (c >= 0xD800 && c <= 0xDBFF);
}
1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871

static void
escape_variable(bool as_ident)
{
	char		saved_char;
	const char *value;

	/* Variable lookup. */
	saved_char = yytext[yyleng - 1];
	yytext[yyleng - 1] = '\0';
	value = GetVariable(pset.vars, yytext + 2);

	/* Escaping. */
	if (value)
	{
		if (!pset.db)
			psql_error("can't escape without active connection\n");
		else
		{
			char   *escaped_value;

			if (as_ident)
				escaped_value =
					PQescapeIdentifier(pset.db, value, strlen(value));
			else
				escaped_value =
					PQescapeLiteral(pset.db, value, strlen(value));
			if (escaped_value == NULL)
			{
				const char *error = PQerrorMessage(pset.db);
				psql_error("%s", error);
			}
			else
			{
				appendPQExpBufferStr(output_buf, escaped_value);
				PQfreemem(escaped_value);
				return;
			}
		}
	}

	/*
	 * If we reach this point, some kind of error has occurred.  Emit the
	 * original text into the output buffer.
	 */
	yytext[yyleng - 1] = saved_char;
	emit(yytext, yyleng);
}