pgc.l 26.0 KB
Newer Older
1
%{
2 3 4 5 6 7 8 9
/*-------------------------------------------------------------------------
 *
 * pgc.l
 *	  lexical scanner for ecpg
 *
 * This is a modified version of src/backend/parser/scan.l
 *
 *
B
Bruce Momjian 已提交
10
 * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
B
Add:  
Bruce Momjian 已提交
11
 * Portions Copyright (c) 1994, Regents of the University of California
12 13 14
 *
 *
 * IDENTIFICATION
15
 *	  $Header: /cvsroot/pgsql/src/interfaces/ecpg/preproc/pgc.l,v 1.104 2003/02/14 13:17:13 meskes Exp $
16 17 18
 *
 *-------------------------------------------------------------------------
 */
19 20
#include "postgres_fe.h"

M
Marc G. Fournier 已提交
21
#include <ctype.h>
M
Marc G. Fournier 已提交
22
#include <sys/types.h>
23
#include <limits.h>
24
#include <errno.h>
25

M
 
Marc G. Fournier 已提交
26
#include "extern.h"
27

28

M
Marc G. Fournier 已提交
29
extern YYSTYPE yylval;
30

31 32
static int		xcdepth = 0;	/* depth of nesting in slash-star comments */

33 34 35 36 37 38
/*
 * literalbuf is used to accumulate literal values when multiple rules
 * are needed to parse a single literal.  Call startlit to reset buffer
 * to empty, addlit to add text.  Note that the buffer is permanently
 * malloc'd to the largest size needed so far in the current run.
 */
39 40 41
static char    *literalbuf = NULL;		/* expandable buffer */
static int		literallen;				/* actual current length */
static int		literalalloc;			/* current allocated buffer size */
42

43
#define startlit()	(literalbuf[0] = '\0', literallen = 0)
44
static void addlit(char *ytext, int yleng);
45
static void addlitchar (unsigned char);
46

M
Michael Meskes 已提交
47
char *token_start;
M
Michael Meskes 已提交
48
int state_before;
M
Marc G. Fournier 已提交
49

50 51 52 53 54 55 56
struct _yy_buffer 
{ 
	YY_BUFFER_STATE		buffer;
	long				lineno;
	char		  		*filename;
	struct _yy_buffer 	*next;
} *yy_buffer = NULL;
57

M
 
Marc G. Fournier 已提交
58 59
static char *old;

60 61 62
#define MAX_NESTED_IF 128
static short preproc_tos;
static short ifcond;
63 64 65 66
static struct _if_value 
{
	short condition;
	short else_branch;
67 68
} stacked_if_value[MAX_NESTED_IF];

69
%}
M
Michael Meskes 已提交
70

71 72 73 74
%option 8bit
%option never-interactive
%option noyywrap

75
%option yylineno
76

M
 
Marc G. Fournier 已提交
77
%s C SQL incl def def_ident
M
Michael Meskes 已提交
78 79 80

/*
 * OK, here is a short description of lex/flex rules behavior.
M
Marc G. Fournier 已提交
81 82
 * The longest pattern which matches an input string is always chosen.
 * For equal-length patterns, the first occurring in the rules list is chosen.
M
Michael Meskes 已提交
83 84 85
 * INITIAL is the starting state, to which all non-conditional rules apply.
 * Exclusive states change parsing rules while the state is active.  When in
 * an exclusive state, only those rules defined for that state apply.
M
Marc G. Fournier 已提交
86
 *
M
Michael Meskes 已提交
87 88
 * We use exclusive states for quoted strings, extended comments,
 * and to eliminate parsing troubles for numeric strings.
M
Marc G. Fournier 已提交
89
 * Exclusive states:
M
Michael Meskes 已提交
90
 *	<xb> bit string literal
91 92 93 94
 *	<xc> extended C-style comments - thomas 1997-07-12
 *	<xd> delimited identifiers (double-quoted identifiers) - thomas 1997-10-27
 *	<xh> hexadecimal numeric string - thomas 1997-11-16
 *	<xq> quoted strings - thomas 1997-07-30
M
Marc G. Fournier 已提交
95 96
 */

M
Michael Meskes 已提交
97
%x xb
M
Marc G. Fournier 已提交
98 99
%x xc
%x xd
100
%x xdc
M
Marc G. Fournier 已提交
101 102
%x xh
%x xq
103 104 105
%x xpre
%x xcond
%x xskip
M
Marc G. Fournier 已提交
106

M
Michael Meskes 已提交
107
/* Bit string
M
Marc G. Fournier 已提交
108
 */
M
Michael Meskes 已提交
109 110 111 112
xbstart			[bB]{quote}
xbstop			{quote}
xbinside		[^']*
xbcat			{quote}{whitespace_with_newline}{quote}
M
Marc G. Fournier 已提交
113 114 115 116 117

/* Hexadecimal number
 */
xhstart			[xX]{quote}
xhstop			{quote}
118
xhinside		[^']*
M
Michael Meskes 已提交
119
xhcat			{quote}{whitespace_with_newline}{quote}
M
Marc G. Fournier 已提交
120

M
Michael Meskes 已提交
121 122 123 124
/* National character
 */
xnstart                        [nN]{quote}

125
/* C version of hex number
M
Michael Meskes 已提交
126 127 128
 */
xch			0[xX][0-9A-Fa-f]*

M
Marc G. Fournier 已提交
129 130 131 132 133 134 135 136
/* Extended quote
 * xqdouble implements SQL92 embedded quote
 * xqcat allows strings to cross input lines
 */
quote			'
xqstart			{quote}
xqstop			{quote}
xqdouble		{quote}{quote}
M
Michael Meskes 已提交
137
xqinside		[^\\']+
138 139
xqescape                [\\][^0-7]
xqoctesc                [\\][0-7]{1,3}
M
Michael Meskes 已提交
140
xqcat			{quote}{whitespace_with_newline}{quote}
M
Marc G. Fournier 已提交
141

142
/* Double quote
M
Marc G. Fournier 已提交
143 144 145 146 147
 * Allows embedded spaces and other special characters into identifiers.
 */
dquote			\"
xdstart			{dquote}
xdstop			{dquote}
148
xddouble				{dquote}{dquote}
M
Michael Meskes 已提交
149
xdinside		[^"]+
M
Michael Meskes 已提交
150 151 152 153 154 155

/* special stuff for C strings */
xdcqq			\\\\
xdcqdq			\\\"
xdcother		[^"]
xdcinside		({xdcqq}|{xdcqdq}|{xdcother})
M
Marc G. Fournier 已提交
156

157 158
/* C-style comments
 *
M
Michael Meskes 已提交
159 160 161
 * The "extended comment" syntax closely resembles allowable operator syntax.
 * The tricky part here is to get lex to recognize a string starting with
 * slash-star as a comment, when interpreting it as an operator would produce
162
 * a longer match --- remember lex will prefer a longer match!	Also, if we
163 164
 * have something like plus-slash-star, lex will think this is a 3-character
 * operator whereas we want to see it as a + operator and a comment start.
M
Michael Meskes 已提交
165
 * The solution is two-fold:
166
 * 1. append {op_chars}* to xcstart so that it matches as much text as
167 168 169
 *	  {operator} would. Then the tie-breaker (first matching rule of same
 *	  length) ensures xcstart wins.  We put back the extra stuff with yyless()
 *	  in case it contains a star-slash that should terminate the comment.
M
Michael Meskes 已提交
170
 * 2. In the operator rule, check for slash-star within the operator, and
171 172
 *	  if found throw it back with yyless().  This handles the plus-slash-star
 *	  problem.
M
Michael Meskes 已提交
173 174
 * SQL92-style comments, which start with dash-dash, have similar interactions
 * with the operator rule.
M
Marc G. Fournier 已提交
175
 */
176
xcstart			\/\*{op_chars}*
M
Michael Meskes 已提交
177
xcstop			\*+\/
178
xcinside		[^*/]+
M
Marc G. Fournier 已提交
179 180 181

digit			[0-9]
letter			[\200-\377_A-Za-z]
182
letter_or_digit [\200-\377_A-Za-z0-9]
M
Marc G. Fournier 已提交
183 184 185 186 187

identifier		{letter}{letter_or_digit}*

typecast		"::"

188 189
/*
 * "self" is the set of chars that should be returned as single-character
190
 * tokens.	"op_chars" is the set of chars that can make up "Op" tokens,
191 192 193 194 195 196 197
 * which can be one or more characters long (but if a single-char token
 * appears in the "self" set, it is not to be returned as an Op).  Note
 * that the sets overlap, but each has some chars that are not in the other.
 *
 * If you change either set, adjust the character lists appearing in the
 * rule for "operator"!
 */
M
Michael Meskes 已提交
198
self			[,()\[\].;$\:\+\-\*\/\%\^\<\>\=]
M
Michael Meskes 已提交
199
op_chars		[\~\!\@\#\^\&\|\`\?\$\+\-\*\/\%\<\>\=]
200
operator		{op_chars}+
M
Marc G. Fournier 已提交
201

202
/* we no longer allow unary minus in numbers.
203
 * instead we pass it separately to parser. there it gets
204
 * coerced via doNegate() -- Leon aug 20 1999
M
Michael Meskes 已提交
205
 */
206

M
Michael Meskes 已提交
207 208
integer			{digit}+
decimal			(({digit}*\.{digit}+)|({digit}+\.{digit}*))
209
real			((({digit}*\.{digit}+)|({digit}+\.{digit}*)|({digit}+))([Ee][-+]?{digit}+))
M
Marc G. Fournier 已提交
210 211 212

param			\${integer}

M
Michael Meskes 已提交
213 214 215 216 217 218 219 220 221 222 223 224 225 226 227
/*
 * In order to make the world safe for Windows and Mac clients as well as
 * Unix ones, we accept either \n or \r as a newline.  A DOS-style \r\n
 * sequence will be seen as two successive newlines, but that doesn't cause
 * any problems.  SQL92-style comments, which start with -- and extend to the
 * next newline, are treated as equivalent to a single whitespace character.
 *
 * NOTE a fine point: if there is no newline following --, we will absorb
 * everything to the end of the input as a comment.  This is correct.  Older
 * versions of Postgres failed to recognize -- as a comment if the input
 * did not end with a newline.
 *
 * XXX perhaps \f (formfeed) should be treated as a newline as well?
 */

M
Michael Meskes 已提交
228
ccomment		"//".*\n
M
Marc G. Fournier 已提交
229

230
space			[ \t\n\r\f]
M
Michael Meskes 已提交
231
horiz_space		[ \t\f]
232
newline					[\n\r]
M
Michael Meskes 已提交
233 234
non_newline		[^\n\r]

235
comment			("--"{non_newline}*)
M
Michael Meskes 已提交
236

237
whitespace		({space}+|{comment})
M
Michael Meskes 已提交
238 239 240 241 242 243 244 245 246

/*
 * SQL92 requires at least one newline in the whitespace separating
 * string literals that are to be concatenated.  Silly, but who are we
 * to argue?  Note that {whitespace_with_newline} should not have * after
 * it, whereas {whitespace} should generally have a * after it...
 */

horiz_whitespace	({horiz_space}|{comment})
247
whitespace_with_newline ({horiz_whitespace}*{newline}{whitespace}*)
M
Michael Meskes 已提交
248

249 250 251 252
/* special characters for other dbms */
/* we have to react differently in compat mode */
informix_special	[\$]

M
Marc G. Fournier 已提交
253 254 255
other			.

/* some stuff needed for ecpg */
256 257
exec	[eE][xX][eE][cC]
sql		[sS][qQ][lL]
M
 
Marc G. Fournier 已提交
258
define	[dD][eE][fF][iI][nN][eE]
259
include [iI][nN][cC][lL][uU][dD][eE]
M
Marc G. Fournier 已提交
260

261 262 263 264 265 266
ifdef	[iI][fF][dD][eE][fF]
ifndef	[iI][fF][nN][dD][eE][fF]
else	[eE][lL][sS][eE]
elif	[eE][lL][iI][fF]
endif	[eE][nN][dD][iI][fF]

267
exec_sql		{exec}{space}*{sql}{space}*
M
Michael Meskes 已提交
268 269
ipdigit			({digit}|{digit}{digit}|{digit}{digit}{digit})
ip			{ipdigit}\.{ipdigit}\.{ipdigit}\.{ipdigit}
270 271

/* Take care of cpp continuation lines */
272
cppline			{space}*#(.*\\{space})*.*
M
 
Marc G. Fournier 已提交
273

274
/*
M
Marc G. Fournier 已提交
275
 * Quoted strings must allow some special characters such as single-quote
276
 *	and newline.
M
Michael Meskes 已提交
277
 * Embedded single-quotes are implemented both in the SQL92-standard
278 279
 *	style of two adjacent single quotes "''" and in the Postgres/Java style
 *	of escaped-quote "\'".
M
Marc G. Fournier 已提交
280
 * Other embedded escaped characters are matched explicitly and the leading
281
 *	backslash is dropped from the string. - thomas 1997-09-24
M
Michael Meskes 已提交
282
 * Note that xcstart must appear before operator, as explained above!
283
 *	Also whitespace (comment) must appear before operator.
M
Marc G. Fournier 已提交
284 285
 */

286
%%
287 288 289 290 291 292

%{
                                       /* code to execute during start of each call of yylex() */
                                       token_start = NULL;
%}

M
Michael Meskes 已提交
293
<SQL>{whitespace}	{ /* ignore */ }
M
Marc G. Fournier 已提交
294

295
{xcstart}			{
296
						token_start = yytext;
297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313
						state_before = YYSTATE;
						xcdepth = 0;
						BEGIN(xc);
						/* Put back any characters past slash-star; see above */
						yyless(2);
						fputs("/*", yyout);
					}
<xc>{xcstart}		{
						xcdepth++;
				 		/* Put back any characters past slash-star; see above */
				 		yyless(2);
						fputs("/*", yyout);
					}

<xc>{xcstop}		{
						ECHO;
						if (xcdepth <= 0)
314
						{
M
Michael Meskes 已提交
315
							BEGIN(state_before);
316 317
							token_start = NULL;
						}
318 319 320 321 322 323 324 325 326
						else
							xcdepth--;
					}

<xc>{xcinside}		{ ECHO; }
<xc>{op_chars}		{ ECHO; }

<xc><<EOF>>			{ mmerror(PARSE_ERROR, ET_ERROR, "Unterminated /* comment"); }

M
Michael Meskes 已提交
327
<SQL>{xbstart}	{
328
						token_start = yytext;
M
Michael Meskes 已提交
329
						BEGIN(xb);
330
						startlit();
331
						addlitchar('b');
332
					}
M
Michael Meskes 已提交
333
<xb>{xbstop}	{
334 335 336 337
						BEGIN(SQL);
						if (literalbuf[strspn(literalbuf, "01") + 1] != '\0')
							mmerror(PARSE_ERROR, ET_ERROR, "invalid bit string input.");
						yylval.str = literalbuf;
338
						return BCONST;
339
					}
M
Marc G. Fournier 已提交
340 341

<xh>{xhinside}	|
M
Michael Meskes 已提交
342
<xb>{xbinside}	{ addlit(yytext, yyleng); }
M
Marc G. Fournier 已提交
343
<xh>{xhcat}		|
M
Michael Meskes 已提交
344 345
<xb>{xbcat}		{ /* ignore */ }
<xb><<EOF>>		{ mmerror(PARSE_ERROR, ET_ERROR, "Unterminated bit string"); }
M
Marc G. Fournier 已提交
346 347

<SQL>{xhstart}		{
348
						token_start = yytext;
349 350
						BEGIN(xh);
						startlit();
351
						addlitchar('x');
352 353
					}
<xh>{xhstop}		{
354 355
						yylval.str = literalbuf;
						return XCONST;
356 357 358
					}

<xh><<EOF>>			{ mmerror(PARSE_ERROR, ET_ERROR, "Unterminated hexadecimal integer"); }
M
Michael Meskes 已提交
359 360 361 362 363 364 365 366 367
<SQL>{xnstart}              {
				/* National character.
				 * Need to remember type info to flow it forward into the parser.
		                 * Not yet implemented. - thomas 2002-06-17
		                 */
			        token_start = yytext;
				BEGIN(xq);
				startlit();
			}
368
<C,SQL>{xqstart}	{
369
						token_start = yytext;
370 371 372 373 374 375 376 377 378
						state_before = YYSTATE;
						BEGIN(xq);
						startlit();
					}
<xq>{xqstop}		{
						BEGIN(state_before);
						yylval.str = mm_strdup(literalbuf);
						return SCONST;
					}
379 380
<xq>{xqdouble}		{ addlitchar('\''); }
<xq>{xqinside}		{ addlit(yytext, yyleng); }
M
Michael Meskes 已提交
381
<xq>{xqescape}  	{ addlit(yytext, yyleng); }
382
<xq>{xqoctesc}          { addlit(yytext, yyleng); }
383 384 385 386 387 388 389 390 391
<xq>{xqcat}			{ /* ignore */ }

<xq><<EOF>>			{ mmerror(PARSE_ERROR, ET_ERROR, "Unterminated quoted string"); }

<SQL>{xdstart}		{
						state_before = YYSTATE;
						BEGIN(xd);
						startlit();
					}
M
Michael Meskes 已提交
392
<xd>{xdstop}		{
393
						BEGIN(state_before);
394 395 396
						if (literallen == 0)
							mmerror(PARSE_ERROR, ET_ERROR, "zero-length delimited identifier");
						if (literallen >= NAMEDATALEN)
397
						{
398
							snprintf(errortext, sizeof(errortext), "identifier \"%s\" will be truncated to \"%.*s\"",
399 400 401 402 403 404 405 406 407 408 409 410 411
									literalbuf, NAMEDATALEN-1, literalbuf);
							literalbuf[NAMEDATALEN-1] = '\0';
							mmerror(PARSE_ERROR, ET_WARNING, errortext);
						}
					
						yylval.str = mm_strdup(literalbuf);
						return CSTRING;
					}
<xdc>{xdstop}		{
						BEGIN(state_before);
						yylval.str = mm_strdup(literalbuf);
						return CSTRING;
					}
412
<xd>{xddouble}		{ addlitchar('"'); }
413 414 415 416 417 418
<xd>{xdinside}		{ addlit(yytext, yyleng); }
<xd,xdc><<EOF>>		{ mmerror(PARSE_ERROR, ET_ERROR, "Unterminated quoted identifier"); }
<C,SQL>{xdstart}	{
						state_before = YYSTATE;
						BEGIN(xdc);
						startlit();
M
Michael Meskes 已提交
419
					}
420 421
<xdc>{xdcinside}	{ addlit(yytext, yyleng); }
<SQL>{typecast}		{ return TYPECAST; }
422 423 424 425 426 427 428 429 430
<SQL>{informix_special}	{
			  /* are we simulating Informix? */
                          if (compat == ECPG_COMPAT_INFORMIX)
			  {
			  	unput(':');
			  }
			  else
				return yytext[0];
			}
431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447
<SQL>{self}			{ /*
					   * We may find a ';' inside a structure
					   * definition in a TYPE or VAR statement.
					   * This is not an EOL marker.
					   */
					  if (yytext[0] == ';' && struct_level == 0)
						 BEGIN C;
					  return yytext[0];
					}
<SQL>{operator}		{
						/*
						 * Check for embedded slash-star or dash-dash; those
						 * are comment starts, so operator must stop there.
						 * Note that slash-star or dash-dash at the first
						 * character will match a prior rule, not this one.
						 */
						int		nchars = yyleng;
448 449
						char   *slashstar = strstr(yytext, "/*");
						char   *dashdash = strstr(yytext, "--");
450 451

						if (slashstar && dashdash)
452
						{
453 454 455
							/* if both appear, take the first one */
							if (slashstar > dashdash)
								slashstar = dashdash;
456
						}
457 458 459
						else if (!slashstar)
							slashstar = dashdash;
						if (slashstar)
460
							nchars = slashstar - yytext;
461 462

						/*
463 464 465 466 467 468
						 * For SQL92 compatibility, '+' and '-' cannot be the
						 * last char of a multi-char operator unless the operator
						 * contains chars that are not in SQL92 operators.
						 * The idea is to lex '=-' as two operators, but not
						 * to forbid operator names like '?-' that could not be
						 * sequences of SQL92 operators.
M
Michael Meskes 已提交
469
						 */
470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501
						while (nchars > 1 &&
							   (yytext[nchars-1] == '+' ||
								yytext[nchars-1] == '-'))
						{
							int		ic;

							for (ic = nchars-2; ic >= 0; ic--)
							{
								if (strchr("~!@#^&|`?$%", yytext[ic]))
									break;
							}
							if (ic >= 0)
								break; /* found a char that makes it OK */
							nchars--; /* else remove the +/-, and check again */
						}

						if (nchars < yyleng)
						{
							/* Strip the unwanted chars from the token */
							yyless(nchars);
							/*
							 * If what we have left is only one char, and it's
							 * one of the characters matching "self", then
							 * return it as a character token the same way
							 * that the "self" rule would have.
							 */
							if (nchars == 1 &&
								strchr(",()[].;$:+-*/%^<>=", yytext[0]))
								return yytext[0];
						}

						/* Convert "!=" operator to "<>" for compatibility */
502
						if (strcmp(yytext, "!=") == 0)
503 504
							yylval.str = mm_strdup("<>");
						else
505
							yylval.str = mm_strdup(yytext);
506
						return Op;
M
Michael Meskes 已提交
507
					}
508
<SQL>{param}		{
509
						yylval.ival = atol(yytext+1);
510 511 512 513 514
						return PARAM;
					}
<C,SQL>{integer}	{
						long val;
						char* endptr;
M
Michael Meskes 已提交
515

M
Marc G. Fournier 已提交
516
						errno = 0;
517 518 519 520 521 522 523 524 525
						val = strtol((char *)yytext, &endptr,10);
						if (*endptr != '\0' || errno == ERANGE
#ifdef HAVE_LONG_INT_64
							/* if long > 32 bits, check for overflow of int4 */
							|| val != (long) ((int32) val)
#endif
							)
						{
							errno = 0;
526
							yylval.str = mm_strdup(yytext);
527 528 529 530 531 532
							return FCONST;
						}
						yylval.ival = val;
						return ICONST;
					}
<SQL>{ip}			{
533
						yylval.str = mm_strdup(yytext);
534 535 536
						return IP;
					}
{decimal}			{
537
						yylval.str = mm_strdup(yytext);
538 539 540
						return FCONST;
					}
<C,SQL>{real}		{
541
						yylval.str = mm_strdup(yytext);
M
Michael Meskes 已提交
542
						return FCONST;
M
Marc G. Fournier 已提交
543
					}
544
<SQL>:{identifier}(("->"|\.){identifier})*	{
545
						yylval.str = mm_strdup(yytext+1);
546 547
						return(CVARIABLE);
					}
M
Marc G. Fournier 已提交
548
<SQL>{identifier}	{
549 550 551 552
						ScanKeyword    *keyword;
						struct _defines *ptr;

						/* Is it an SQL keyword? */
553
						keyword = ScanKeywordLookup(yytext);
554 555 556 557
						if (keyword != NULL)
							return keyword->value;

						/* Is it an ECPG keyword? */
558
						keyword = ScanECPGKeywordLookup( yytext);
559 560 561
						if (keyword != NULL)
							return keyword->value;

562 563 564 565 566 567
						/* Is it a C keyword? */
						keyword = ScanCKeywordLookup(yytext);
						if (keyword != NULL)
							return keyword->value;


568 569
						/* How about a DEFINE? */
						for (ptr = defines; ptr; ptr = ptr->next)
M
Marc G. Fournier 已提交
570
						{
571 572 573
							if (strcmp(yytext, ptr->old) == 0)
							{
								struct _yy_buffer *yb;
M
 
Marc G. Fournier 已提交
574

575 576 577 578 579 580
								yb = mm_alloc(sizeof(struct _yy_buffer));

								yb->buffer =  YY_CURRENT_BUFFER;
								yb->lineno = yylineno;
								yb->filename = mm_strdup(input_filename);
								yb->next = yy_buffer;
M
 
Marc G. Fournier 已提交
581

582
								yy_buffer = yb;
M
 
Marc G. Fournier 已提交
583

584 585 586 587
								yy_scan_string(ptr->new);
								break;
							}
						}
M
 
Marc G. Fournier 已提交
588

589 590 591 592 593 594 595 596 597 598
						/*
						 * None of the above.  Return it as an identifier.
						 *
						 * The backend would attempt to truncate and case-fold
						 * the identifier, but I see no good reason for ecpg
						 * to do so; that's just another way that ecpg could get
						 * out of step with the backend.
						 */
						if (ptr == NULL)
						{
M
Michael Meskes 已提交
599
							yylval.str = mm_strdup(yytext);
600
							return IDENT;
M
Marc G. Fournier 已提交
601 602
						}
					}
603 604
<SQL>{other}		{ return yytext[0]; }
<C>{exec_sql}		{ BEGIN SQL; return SQL_START; }
605 606 607 608 609 610 611 612 613 614 615
<C>{informix_special}	{ 
			  /* are we simulating Informix? */
			  if (compat == ECPG_COMPAT_INFORMIX)
			  {
			  	BEGIN SQL;
				return SQL_START;
			  }
			  else
			  	return S_ANYTHING;
			 }
<C>{ccomment}		 { /* ignore */ }
M
Michael Meskes 已提交
616
<C>{xch}			{
617
						char* endptr;
M
Michael Meskes 已提交
618 619

						errno = 0;
620 621 622 623
						yylval.ival = strtol((char *)yytext,&endptr,16);
						if (*endptr != '\0' || errno == ERANGE)
						{
							errno = 0;
624
							yylval.str = mm_strdup(yytext);
625 626 627 628 629
							return SCONST;
						}
						return ICONST;
					}
<C>{cppline}		{
630
						yylval.str = mm_strdup(yytext);
631
						return(CPP_LINE);
M
Michael Meskes 已提交
632
					}
633 634
<C>{identifier} 	{
						ScanKeyword		*keyword;
M
 
Marc G. Fournier 已提交
635

636
						keyword = ScanCKeywordLookup(yytext);
637 638 639 640
						if (keyword != NULL) {
							return keyword->value;
						}
						else
M
 
Marc G. Fournier 已提交
641
						{
642 643 644
							struct _defines *ptr;

							for (ptr = defines; ptr; ptr = ptr->next)
M
 
Marc G. Fournier 已提交
645
							{
646 647 648
								if (strcmp(yytext, ptr->old) == 0)
								{
									struct _yy_buffer *yb;
M
 
Marc G. Fournier 已提交
649

650
									yb = mm_alloc(sizeof(struct _yy_buffer));
M
 
Marc G. Fournier 已提交
651

652 653 654 655
												yb->buffer =  YY_CURRENT_BUFFER;
												yb->lineno = yylineno;
												yb->filename = mm_strdup(input_filename);
												yb->next = yy_buffer;
M
 
Marc G. Fournier 已提交
656

657
												yy_buffer = yb;
M
 
Marc G. Fournier 已提交
658

659 660 661 662 663 664
									yy_scan_string(ptr->new);
									break;
								}
							}
							if (ptr == NULL)
							{
665
								yylval.str = mm_strdup(yytext);
666
								return IDENT;
M
 
Marc G. Fournier 已提交
667 668
							}
						}
M
Marc G. Fournier 已提交
669
					}
670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701
<C>";"				{ return(';'); }
<C>","				{ return(','); }
<C>"*"				{ return('*'); }
<C>"%"				{ return('%'); }
<C>"/"				{ return('/'); }
<C>"+"				{ return('+'); }
<C>"-"				{ return('-'); }
<C>"("				{ return('('); }
<C>")"				{ return(')'); }
<C>{space}			{ ECHO; }
<C>\{				{ return('{'); }
<C>\}				{ return('}'); }
<C>\[				{ return('['); }
<C>\]				{ return(']'); }
<C>\=				{ return('='); }
<C>"->"				{ return(S_MEMBER); }
<C>">>"				{ return(S_RSHIFT); }
<C>"<<"				{ return(S_LSHIFT); }
<C>"||"				{ return(S_OR); }
<C>"&&"				{ return(S_AND); }
<C>"++"				{ return(S_INC); }
<C>"--"				{ return(S_DEC); }
<C>"=="				{ return(S_EQUAL); }
<C>"!="				{ return(S_NEQUAL); }
<C>"+="				{ return(S_ADD); }
<C>"-="				{ return(S_SUB); }
<C>"*="				{ return(S_MUL); }
<C>"/="				{ return(S_DIV); }
<C>"%="				{ return(S_MOD); }
<C>"->*"			{ return(S_MEMPOINT); }
<C>".*"				{ return(S_DOTPOINT); }
<C>{other}			{ return S_ANYTHING; }
702

703 704
<C>{exec_sql}{define}{space}*	{ BEGIN(def_ident); }
<C>{exec_sql}{include}{space}*	{ BEGIN(incl); }
705
<C>{informix_special}{include}{space}*	{ BEGIN(incl); }
706

707 708
<C,xskip>{exec_sql}{ifdef}{space}*	{ ifcond = TRUE; BEGIN(xcond); }
<C,xskip>{exec_sql}{ifndef}{space}* { ifcond = FALSE; BEGIN(xcond); }
709

710
<C,xskip>{exec_sql}{elif}{space}*	{	/* pop stack */
711
						if ( preproc_tos == 0 ) {
712
							mmerror(PARSE_ERROR, ET_FATAL, "Missing matching 'EXEC SQL IFDEF / EXEC SQL IFNDEF'");
713
						}
714 715 716 717
						else if ( stacked_if_value[preproc_tos].else_branch )
							mmerror(PARSE_ERROR, ET_FATAL, "Missing 'EXEC SQL ENDIF;'");
						else
							preproc_tos--;
718 719 720 721

						ifcond = TRUE; BEGIN(xcond);
					}

722
<C,xskip>{exec_sql}{else}{space}*";" {	/* only exec sql endif pops the stack, so take care of duplicated 'else' */
723
						if ( stacked_if_value[preproc_tos].else_branch ) {
724
							mmerror(PARSE_ERROR, ET_FATAL, "Duplicated 'EXEC SQL ELSE;'");
725 726
						}
						else {
727 728
							stacked_if_value[preproc_tos].else_branch = TRUE;
							stacked_if_value[preproc_tos].condition =
729 730 731
							(stacked_if_value[preproc_tos-1].condition &&
							 ! stacked_if_value[preproc_tos].condition);

732 733 734 735
							if ( stacked_if_value[preproc_tos].condition )
								BEGIN(C);
							else
								BEGIN(xskip);
736 737
						}
					}
738 739 740 741 742
<C,xskip>{exec_sql}{endif}{space}*";" {
						if ( preproc_tos == 0 )
							mmerror(PARSE_ERROR, ET_FATAL, "Unmatched 'EXEC SQL ENDIF;'");
						else
							preproc_tos--;
743

744
						if ( stacked_if_value[preproc_tos].condition )
745
						   BEGIN(C);
746
						else
747 748 749
						   BEGIN(xskip);
					}

750
<xskip>{other}		{ /* ignore */ }
751

752
<xcond>{identifier}{space}*";" {
753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769
						if ( preproc_tos >= MAX_NESTED_IF-1 ) {
							mmerror(PARSE_ERROR, ET_FATAL, "Too many nested 'EXEC SQL IFDEF' conditions");
						}
						else 
						{
							struct _defines *defptr;
							unsigned int i;

							/* skip the ";" and trailing whitespace. Note that yytext contains
							   at least one non-space character plus the ";" */
							for ( i = strlen(yytext)-2;
								  i > 0 && isspace((unsigned char) yytext[i]);
								  i-- )
							{}
							yytext[i+1] = '\0';

							for ( defptr = defines; defptr != NULL &&
770
								  ( strcmp(yytext, defptr->old) != 0 ); defptr = defptr->next );
771 772 773 774 775 776

							preproc_tos++;
							stacked_if_value[preproc_tos].else_branch = FALSE;
							stacked_if_value[preproc_tos].condition =
							( (defptr ? ifcond : !ifcond) && stacked_if_value[preproc_tos-1].condition );
						}
777

778 779 780 781
						if ( stacked_if_value[preproc_tos].condition )
						   BEGIN C;
						else
						   BEGIN(xskip);
782 783
					}

784
<def_ident>{identifier} {
M
 
Marc G. Fournier 已提交
785
				old = mm_strdup(yytext);
M
 
Marc G. Fournier 已提交
786
				BEGIN(def);
787
				startlit();
M
 
Marc G. Fournier 已提交
788
			}
789
<def>{space}*";"	{
790
						struct _defines *ptr, *this;
791

792 793 794 795 796 797 798 799 800 801 802 803
						for (ptr = defines; ptr != NULL; ptr = ptr->next)
						{
							 if (strcmp(old, ptr->old) == 0)
							 {
								free(ptr->new);
								/* ptr->new = mm_strdup(scanstr(literalbuf));*/
								ptr->new = mm_strdup(literalbuf);
							 }
						}
						if (ptr == NULL)
						{
												this = (struct _defines *) mm_alloc(sizeof(struct _defines));
804

805 806 807 808 809 810
												/* initial definition */
												this->old = old;
												this->new = mm_strdup(literalbuf);
							this->next = defines;
							defines = this;
						}
811

812 813 814
						BEGIN(C);
					}
<def>[^;]			{ addlit(yytext, yyleng); }
815

816 817 818 819 820 821
<incl>[^;]+";"		{ 
						/* got the include file name */
						struct _yy_buffer *yb;
					  	struct _include_path *ip;
					  	char inc_file[MAXPGPATH];
					  	unsigned int i;
822

823
					  	yb = mm_alloc(sizeof(struct _yy_buffer));
824

825 826 827 828
					  	yb->buffer =	YY_CURRENT_BUFFER;
					  	yb->lineno = yylineno;
					  	yb->filename = input_filename;
					  	yb->next = yy_buffer;
829

830
					  	yy_buffer = yb;
831

832 833 834 835 836 837 838 839
					  	/*
						 * skip the ";" and trailing whitespace. Note that yytext contains
						 * at least one non-space character plus the ";" 
						 */
					  	for ( i = strlen(yytext)-2;
							i > 0 && isspace((unsigned char) yytext[i]);
							i-- )
					  		{}
840

841 842
						yytext[i+1] = '\0';
						yyin = NULL;
843

844 845 846 847
						/* If file name is enclosed in '"' remove these and look only in '.' */
						if (yytext[0] == '"' && yytext[i] == '"') 
						{
							yytext[i] = '\0';
848
							memmove(yytext, yytext+1, strlen(yytext));
849 850 851
						
							strncpy(inc_file, yytext, sizeof(inc_file));
							yyin = fopen(inc_file, "r");
852 853 854 855 856
							if (!yyin)
							{
								if (strcmp(inc_file + strlen(inc_file) - 2, ".h"))
								{
									strcat(inc_file, ".h");
857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879
									yyin = fopen(inc_file, "r");
								}
							}
							
						}
						else
						{
						  	for (ip = include_paths; yyin == NULL && ip != NULL; ip = ip->next)
						  	{
								if (strlen(ip->path) + strlen(yytext) + 3 > MAXPGPATH)
								{
									fprintf(stderr, "Error: Path %s/%s is too long in line %d, skipping.\n", ip->path, yytext, yylineno);
									continue;
								}
								snprintf (inc_file, sizeof(inc_file), "%s/%s", ip->path, yytext);
								yyin = fopen(inc_file, "r");
								if (!yyin)
								{
									if (strcmp(inc_file + strlen(inc_file) - 2, ".h"))
									{
										strcat(inc_file, ".h");
										yyin = fopen( inc_file, "r" );
									}
880 881 882 883 884
								}
							}
						}
						if (!yyin)
						{
885
							snprintf(errortext, sizeof(errortext), "Cannot open include file %s in line %d\n", yytext, yylineno);
886 887
							mmerror(NO_INCLUDE_FILE, ET_FATAL, errortext);
					  	}
888

889 890 891 892
						input_filename = mm_strdup(inc_file);
						yy_switch_to_buffer(yy_create_buffer(yyin,YY_BUF_SIZE ));
						yylineno = 1;
						output_line_number();
893

894 895 896 897
					  	BEGIN C;
					}

<<EOF>>				{
898
				  		if (yy_buffer == NULL) {
899 900 901 902 903 904
				  		if ( preproc_tos > 0 ) 
						{
					  		preproc_tos = 0;
							mmerror(PARSE_ERROR, ET_FATAL, "Missing 'EXEC SQL ENDIF;'");
				  		}
							yyterminate();
905
							}
906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931
				  		else
				  		{
							struct _yy_buffer *yb = yy_buffer;
							int i;

							if (yyin != NULL)
								fclose(yyin);

							yy_delete_buffer( YY_CURRENT_BUFFER );
							yy_switch_to_buffer(yy_buffer->buffer);

							yylineno = yy_buffer->lineno;

							/* We have to output the filename only if we change files here */
							i = strcmp(input_filename, yy_buffer->filename);

							free(input_filename);
							input_filename = yy_buffer->filename;

							yy_buffer = yy_buffer->next;
							free(yb);

							if (i != 0)
								output_line_number();
				  		}
					}
932 933
%%
void
934
lex_init(void)
935
{
936 937 938
	braces_open = 0;

	preproc_tos = 0;
M
Michael Meskes 已提交
939
	yylineno = 1;
940 941 942
	ifcond = TRUE;
	stacked_if_value[preproc_tos].condition = ifcond;
	stacked_if_value[preproc_tos].else_branch = FALSE;
943 944 945 946 947 948 949 950 951

	/* initialize literal buffer to a reasonable but expansible size */
	if (literalbuf == NULL)
	{
		literalalloc = 128;
		literalbuf = (char *) malloc(literalalloc);
	}
	startlit();

952
	BEGIN C;
953 954
}

955 956 957 958 959 960
static void
addlit(char *ytext, int yleng)
{
	/* enlarge buffer if needed */
	if ((literallen+yleng) >= literalalloc)
	{
961
		do 
962
			literalalloc *= 2;
963
		while ((literallen+yleng) >= literalalloc);
964 965
		literalbuf = (char *) realloc(literalbuf, literalalloc);
	}
966 967
	/* append new data, add trailing null */
	memcpy(literalbuf+literallen, ytext, yleng);
968
	literallen += yleng;
969
	literalbuf[literallen] = '\0';
970 971
}

972 973
static void
addlitchar(unsigned char ychar)
974
{
975 976 977 978 979 980 981 982 983 984 985
	/* enlarge buffer if needed */
        if ((literallen+1) >= literalalloc)
        {
                literalalloc *= 2;
                literalbuf = (char *) realloc(literalbuf, literalalloc);
        }
	/* append new data, add trailing null */
	literalbuf[literallen] = ychar;
	literallen += 1;
	literalbuf[literallen] = '\0';
}