pgc.l 28.9 KB
Newer Older
1
%{
2 3 4 5 6 7 8 9
/*-------------------------------------------------------------------------
 *
 * pgc.l
 *	  lexical scanner for ecpg
 *
 * This is a modified version of src/backend/parser/scan.l
 *
 *
B
Bruce Momjian 已提交
10
 * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
B
Add:  
Bruce Momjian 已提交
11
 * Portions Copyright (c) 1994, Regents of the University of California
12 13 14
 *
 *
 * IDENTIFICATION
15
 *	  $PostgreSQL: pgsql/src/interfaces/ecpg/preproc/pgc.l,v 1.125 2004/02/15 13:48:54 meskes Exp $
16 17 18
 *
 *-------------------------------------------------------------------------
 */
19 20
#include "postgres_fe.h"

M
Marc G. Fournier 已提交
21
#include <ctype.h>
M
Marc G. Fournier 已提交
22
#include <sys/types.h>
23
#include <limits.h>
24
#include <errno.h>
25

M
 
Marc G. Fournier 已提交
26
#include "extern.h"
27

M
Marc G. Fournier 已提交
28
extern YYSTYPE yylval;
29

30 31
static int		xcdepth = 0;	/* depth of nesting in slash-star comments */

32 33 34 35 36 37
/*
 * literalbuf is used to accumulate literal values when multiple rules
 * are needed to parse a single literal.  Call startlit to reset buffer
 * to empty, addlit to add text.  Note that the buffer is permanently
 * malloc'd to the largest size needed so far in the current run.
 */
38 39 40
static char    *literalbuf = NULL;		/* expandable buffer */
static int		literallen;				/* actual current length */
static int		literalalloc;			/* current allocated buffer size */
41

42
#define startlit()	(literalbuf[0] = '\0', literallen = 0)
43
static void addlit(char *ytext, int yleng);
44
static void addlitchar (unsigned char);
45
static void parse_include (void);
46

M
Michael Meskes 已提交
47
char *token_start;
M
Michael Meskes 已提交
48
int state_before;
M
Marc G. Fournier 已提交
49

50 51 52 53 54 55 56
struct _yy_buffer 
{ 
	YY_BUFFER_STATE		buffer;
	long				lineno;
	char		  		*filename;
	struct _yy_buffer 	*next;
} *yy_buffer = NULL;
57

M
 
Marc G. Fournier 已提交
58 59
static char *old;

60 61 62
#define MAX_NESTED_IF 128
static short preproc_tos;
static short ifcond;
63 64 65 66
static struct _if_value 
{
	short condition;
	short else_branch;
67 68
} stacked_if_value[MAX_NESTED_IF];

69
%}
M
Michael Meskes 已提交
70

71 72 73 74
%option 8bit
%option never-interactive
%option noyywrap

75
%option yylineno
76

M
 
Marc G. Fournier 已提交
77
%s C SQL incl def def_ident
M
Michael Meskes 已提交
78 79 80

/*
 * OK, here is a short description of lex/flex rules behavior.
M
Marc G. Fournier 已提交
81 82
 * The longest pattern which matches an input string is always chosen.
 * For equal-length patterns, the first occurring in the rules list is chosen.
M
Michael Meskes 已提交
83 84 85
 * INITIAL is the starting state, to which all non-conditional rules apply.
 * Exclusive states change parsing rules while the state is active.  When in
 * an exclusive state, only those rules defined for that state apply.
M
Marc G. Fournier 已提交
86
 *
M
Michael Meskes 已提交
87 88
 * We use exclusive states for quoted strings, extended comments,
 * and to eliminate parsing troubles for numeric strings.
M
Marc G. Fournier 已提交
89
 * Exclusive states:
M
Michael Meskes 已提交
90
 *	<xb> bit string literal
91 92 93 94
 *	<xc> extended C-style comments - thomas 1997-07-12
 *	<xd> delimited identifiers (double-quoted identifiers) - thomas 1997-10-27
 *	<xh> hexadecimal numeric string - thomas 1997-11-16
 *	<xq> quoted strings - thomas 1997-07-30
M
Marc G. Fournier 已提交
95 96
 */

M
Michael Meskes 已提交
97
%x xb
M
Marc G. Fournier 已提交
98 99
%x xc
%x xd
100
%x xdc
M
Marc G. Fournier 已提交
101 102
%x xh
%x xq
103 104 105
%x xpre
%x xcond
%x xskip
M
Marc G. Fournier 已提交
106

M
Michael Meskes 已提交
107
/* Bit string
M
Marc G. Fournier 已提交
108
 */
M
Michael Meskes 已提交
109 110 111 112
xbstart			[bB]{quote}
xbstop			{quote}
xbinside		[^']*
xbcat			{quote}{whitespace_with_newline}{quote}
M
Marc G. Fournier 已提交
113 114 115 116 117

/* Hexadecimal number
 */
xhstart			[xX]{quote}
xhstop			{quote}
118
xhinside		[^']*
M
Michael Meskes 已提交
119
xhcat			{quote}{whitespace_with_newline}{quote}
M
Marc G. Fournier 已提交
120

M
Michael Meskes 已提交
121 122 123 124
/* National character
 */
xnstart                        [nN]{quote}

125
/* C version of hex number
M
Michael Meskes 已提交
126 127 128
 */
xch			0[xX][0-9A-Fa-f]*

M
Marc G. Fournier 已提交
129 130 131 132 133 134 135 136
/* Extended quote
 * xqdouble implements SQL92 embedded quote
 * xqcat allows strings to cross input lines
 */
quote			'
xqstart			{quote}
xqstop			{quote}
xqdouble		{quote}{quote}
M
Michael Meskes 已提交
137
xqinside		[^\\']+
138 139
xqescape                [\\][^0-7]
xqoctesc                [\\][0-7]{1,3}
M
Michael Meskes 已提交
140
xqcat			{quote}{whitespace_with_newline}{quote}
M
Marc G. Fournier 已提交
141

142
/* Double quote
M
Marc G. Fournier 已提交
143 144 145 146 147
 * Allows embedded spaces and other special characters into identifiers.
 */
dquote			\"
xdstart			{dquote}
xdstop			{dquote}
148
xddouble				{dquote}{dquote}
M
Michael Meskes 已提交
149
xdinside		[^"]+
M
Michael Meskes 已提交
150 151 152 153 154 155

/* special stuff for C strings */
xdcqq			\\\\
xdcqdq			\\\"
xdcother		[^"]
xdcinside		({xdcqq}|{xdcqdq}|{xdcother})
M
Marc G. Fournier 已提交
156

157 158
/* C-style comments
 *
M
Michael Meskes 已提交
159 160 161
 * The "extended comment" syntax closely resembles allowable operator syntax.
 * The tricky part here is to get lex to recognize a string starting with
 * slash-star as a comment, when interpreting it as an operator would produce
162
 * a longer match --- remember lex will prefer a longer match!	Also, if we
163 164
 * have something like plus-slash-star, lex will think this is a 3-character
 * operator whereas we want to see it as a + operator and a comment start.
M
Michael Meskes 已提交
165
 * The solution is two-fold:
166
 * 1. append {op_chars}* to xcstart so that it matches as much text as
167 168 169
 *	  {operator} would. Then the tie-breaker (first matching rule of same
 *	  length) ensures xcstart wins.  We put back the extra stuff with yyless()
 *	  in case it contains a star-slash that should terminate the comment.
M
Michael Meskes 已提交
170
 * 2. In the operator rule, check for slash-star within the operator, and
171 172
 *	  if found throw it back with yyless().  This handles the plus-slash-star
 *	  problem.
M
Michael Meskes 已提交
173 174
 * SQL92-style comments, which start with dash-dash, have similar interactions
 * with the operator rule.
M
Marc G. Fournier 已提交
175
 */
176
xcstart			\/\*{op_chars}*
M
Michael Meskes 已提交
177
xcstop			\*+\/
178
xcinside		[^*/]+
M
Marc G. Fournier 已提交
179 180

digit			[0-9]
M
Michael Meskes 已提交
181 182
ident_start		[A-Za-z\200-\377_]
ident_cont		[A-Za-z\200-\377_0-9\$]
M
Marc G. Fournier 已提交
183

M
Michael Meskes 已提交
184
identifier		{ident_start}{ident_cont}*
M
Marc G. Fournier 已提交
185

186
array			({ident_cont}|{whitespace}|[\[\]\+\-\*\%\/\(\)])*
M
Marc G. Fournier 已提交
187 188
typecast		"::"

189 190
/*
 * "self" is the set of chars that should be returned as single-character
191
 * tokens.	"op_chars" is the set of chars that can make up "Op" tokens,
192 193 194 195 196 197 198
 * which can be one or more characters long (but if a single-char token
 * appears in the "self" set, it is not to be returned as an Op).  Note
 * that the sets overlap, but each has some chars that are not in the other.
 *
 * If you change either set, adjust the character lists appearing in the
 * rule for "operator"!
 */
M
Michael Meskes 已提交
199 200
self			[,()\[\].;\:\+\-\*\/\%\^\<\>\=]
op_chars		[\~\!\@\#\^\&\|\`\?\+\-\*\/\%\<\>\=]
201
operator		{op_chars}+
M
Marc G. Fournier 已提交
202

203
/* we no longer allow unary minus in numbers.
204
 * instead we pass it separately to parser. there it gets
205
 * coerced via doNegate() -- Leon aug 20 1999
M
Michael Meskes 已提交
206
 */
207

M
Michael Meskes 已提交
208 209
integer			{digit}+
decimal			(({digit}*\.{digit}+)|({digit}+\.{digit}*))
210
real			((({digit}*\.{digit}+)|({digit}+\.{digit}*)|({digit}+))([Ee][-+]?{digit}+))
M
Marc G. Fournier 已提交
211 212 213

param			\${integer}

M
Michael Meskes 已提交
214 215 216 217 218 219 220 221 222 223 224 225 226 227 228
/*
 * In order to make the world safe for Windows and Mac clients as well as
 * Unix ones, we accept either \n or \r as a newline.  A DOS-style \r\n
 * sequence will be seen as two successive newlines, but that doesn't cause
 * any problems.  SQL92-style comments, which start with -- and extend to the
 * next newline, are treated as equivalent to a single whitespace character.
 *
 * NOTE a fine point: if there is no newline following --, we will absorb
 * everything to the end of the input as a comment.  This is correct.  Older
 * versions of Postgres failed to recognize -- as a comment if the input
 * did not end with a newline.
 *
 * XXX perhaps \f (formfeed) should be treated as a newline as well?
 */

M
Michael Meskes 已提交
229
ccomment		"//".*\n
M
Marc G. Fournier 已提交
230

231
space			[ \t\n\r\f]
M
Michael Meskes 已提交
232
horiz_space		[ \t\f]
233
newline			[\n\r]
M
Michael Meskes 已提交
234 235
non_newline		[^\n\r]

236
comment			("--"{non_newline}*)
M
Michael Meskes 已提交
237

238
whitespace		({space}+|{comment})
M
Michael Meskes 已提交
239 240 241 242 243 244 245 246 247

/*
 * SQL92 requires at least one newline in the whitespace separating
 * string literals that are to be concatenated.  Silly, but who are we
 * to argue?  Note that {whitespace_with_newline} should not have * after
 * it, whereas {whitespace} should generally have a * after it...
 */

horiz_whitespace	({horiz_space}|{comment})
248
whitespace_with_newline ({horiz_whitespace}*{newline}{whitespace}*)
M
Michael Meskes 已提交
249

250 251 252 253
/* special characters for other dbms */
/* we have to react differently in compat mode */
informix_special	[\$]

M
Marc G. Fournier 已提交
254 255 256
other			.

/* some stuff needed for ecpg */
257 258
exec	[eE][xX][eE][cC]
sql		[sS][qQ][lL]
M
 
Marc G. Fournier 已提交
259
define	[dD][eE][fF][iI][nN][eE]
260
include [iI][nN][cC][lL][uU][dD][eE]
M
Marc G. Fournier 已提交
261

262 263 264 265 266 267
ifdef	[iI][fF][dD][eE][fF]
ifndef	[iI][fF][nN][dD][eE][fF]
else	[eE][lL][sS][eE]
elif	[eE][lL][iI][fF]
endif	[eE][nN][dD][iI][fF]

268 269
struct	[sS][tT][rR][uU][cC][tT]

270
exec_sql		{exec}{space}*{sql}{space}*
M
Michael Meskes 已提交
271 272
ipdigit			({digit}|{digit}{digit}|{digit}{digit}{digit})
ip			{ipdigit}\.{ipdigit}\.{ipdigit}\.{ipdigit}
273

274 275 276
/* we might want to parse all cpp include files */
cppinclude 		{space}*#{include}{space}*

277
/* Take care of cpp continuation lines */
278
cppline			{space}*#(.*\\{space})+.*
M
 
Marc G. Fournier 已提交
279

280
/*
M
Marc G. Fournier 已提交
281
 * Quoted strings must allow some special characters such as single-quote
282
 *	and newline.
M
Michael Meskes 已提交
283
 * Embedded single-quotes are implemented both in the SQL92-standard
284 285
 *	style of two adjacent single quotes "''" and in the Postgres/Java style
 *	of escaped-quote "\'".
M
Marc G. Fournier 已提交
286
 * Other embedded escaped characters are matched explicitly and the leading
287
 *	backslash is dropped from the string. - thomas 1997-09-24
M
Michael Meskes 已提交
288
 * Note that xcstart must appear before operator, as explained above!
289
 *	Also whitespace (comment) must appear before operator.
M
Marc G. Fournier 已提交
290 291
 */

292
%%
293 294 295 296 297 298

%{
                                       /* code to execute during start of each call of yylex() */
                                       token_start = NULL;
%}

M
Michael Meskes 已提交
299
<SQL>{whitespace}	{ /* ignore */ }
M
Marc G. Fournier 已提交
300

301
{xcstart}			{
302
						token_start = yytext;
303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319
						state_before = YYSTATE;
						xcdepth = 0;
						BEGIN(xc);
						/* Put back any characters past slash-star; see above */
						yyless(2);
						fputs("/*", yyout);
					}
<xc>{xcstart}		{
						xcdepth++;
				 		/* Put back any characters past slash-star; see above */
				 		yyless(2);
						fputs("/*", yyout);
					}

<xc>{xcstop}		{
						ECHO;
						if (xcdepth <= 0)
320
						{
M
Michael Meskes 已提交
321
							BEGIN(state_before);
322 323
							token_start = NULL;
						}
324 325 326 327 328 329 330
						else
							xcdepth--;
					}

<xc>{xcinside}		{ ECHO; }
<xc>{op_chars}		{ ECHO; }

331
<xc><<EOF>>		{ mmerror(PARSE_ERROR, ET_FATAL, "Unterminated /* comment"); }
332

M
Michael Meskes 已提交
333
<SQL>{xbstart}	{
334
						token_start = yytext;
M
Michael Meskes 已提交
335
						BEGIN(xb);
336
						startlit();
337
						addlitchar('b');
338
					}
M
Michael Meskes 已提交
339
<xb>{xbstop}	{
340 341 342 343
						BEGIN(SQL);
						if (literalbuf[strspn(literalbuf, "01") + 1] != '\0')
							mmerror(PARSE_ERROR, ET_ERROR, "invalid bit string input.");
						yylval.str = literalbuf;
344
						return BCONST;
345
					}
M
Marc G. Fournier 已提交
346 347

<xh>{xhinside}	|
M
Michael Meskes 已提交
348
<xb>{xbinside}	{ addlit(yytext, yyleng); }
M
Marc G. Fournier 已提交
349
<xh>{xhcat}		|
M
Michael Meskes 已提交
350
<xb>{xbcat}		{ /* ignore */ }
351
<xb><<EOF>>		{ mmerror(PARSE_ERROR, ET_FATAL, "Unterminated bit string"); }
M
Marc G. Fournier 已提交
352 353

<SQL>{xhstart}		{
354
						token_start = yytext;
355 356
						BEGIN(xh);
						startlit();
357
						addlitchar('x');
358 359
					}
<xh>{xhstop}		{
360 361
						yylval.str = literalbuf;
						return XCONST;
362 363
					}

364
<xh><<EOF>>		{ mmerror(PARSE_ERROR, ET_FATAL, "Unterminated hexadecimal integer"); }
M
Michael Meskes 已提交
365 366 367 368 369 370 371 372 373
<SQL>{xnstart}              {
				/* National character.
				 * Need to remember type info to flow it forward into the parser.
		                 * Not yet implemented. - thomas 2002-06-17
		                 */
			        token_start = yytext;
				BEGIN(xq);
				startlit();
			}
374
<C,SQL>{xqstart}	{
375
						token_start = yytext;
376 377 378 379 380 381 382 383 384
						state_before = YYSTATE;
						BEGIN(xq);
						startlit();
					}
<xq>{xqstop}		{
						BEGIN(state_before);
						yylval.str = mm_strdup(literalbuf);
						return SCONST;
					}
385 386
<xq>{xqdouble}		{ addlitchar('\''); }
<xq>{xqinside}		{ addlit(yytext, yyleng); }
M
Michael Meskes 已提交
387
<xq>{xqescape}  	{ addlit(yytext, yyleng); }
388
<xq>{xqoctesc}          { addlit(yytext, yyleng); }
389
<xq>{xqcat}		{ /* ignore */ }
390

391
<xq><<EOF>>		{ mmerror(PARSE_ERROR, ET_FATAL, "Unterminated quoted string"); }
392 393 394 395 396 397

<SQL>{xdstart}		{
						state_before = YYSTATE;
						BEGIN(xd);
						startlit();
					}
M
Michael Meskes 已提交
398
<xd>{xdstop}		{
399
						BEGIN(state_before);
400 401
						if (literallen == 0)
							mmerror(PARSE_ERROR, ET_ERROR, "zero-length delimited identifier");
402 403 404 405 406 407 408 409
						yylval.str = mm_strdup(literalbuf);
						return CSTRING;
					}
<xdc>{xdstop}		{
						BEGIN(state_before);
						yylval.str = mm_strdup(literalbuf);
						return CSTRING;
					}
410
<xd>{xddouble}		{ addlitchar('"'); }
411
<xd>{xdinside}		{ addlit(yytext, yyleng); }
412
<xd,xdc><<EOF>>		{ mmerror(PARSE_ERROR, ET_FATAL, "Unterminated quoted identifier"); }
413 414 415 416
<C,SQL>{xdstart}	{
						state_before = YYSTATE;
						BEGIN(xdc);
						startlit();
M
Michael Meskes 已提交
417
					}
418 419
<xdc>{xdcinside}	{ addlit(yytext, yyleng); }
<SQL>{typecast}		{ return TYPECAST; }
420 421
<SQL>{informix_special}	{
			  /* are we simulating Informix? */
422
                          if (INFORMIX_MODE)
423 424 425 426 427 428
			  {
			  	unput(':');
			  }
			  else
				return yytext[0];
			}
429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445
<SQL>{self}			{ /*
					   * We may find a ';' inside a structure
					   * definition in a TYPE or VAR statement.
					   * This is not an EOL marker.
					   */
					  if (yytext[0] == ';' && struct_level == 0)
						 BEGIN C;
					  return yytext[0];
					}
<SQL>{operator}		{
						/*
						 * Check for embedded slash-star or dash-dash; those
						 * are comment starts, so operator must stop there.
						 * Note that slash-star or dash-dash at the first
						 * character will match a prior rule, not this one.
						 */
						int		nchars = yyleng;
446 447
						char   *slashstar = strstr(yytext, "/*");
						char   *dashdash = strstr(yytext, "--");
448 449

						if (slashstar && dashdash)
450
						{
451 452 453
							/* if both appear, take the first one */
							if (slashstar > dashdash)
								slashstar = dashdash;
454
						}
455 456 457
						else if (!slashstar)
							slashstar = dashdash;
						if (slashstar)
458
							nchars = slashstar - yytext;
459 460

						/*
461 462 463 464 465 466
						 * For SQL92 compatibility, '+' and '-' cannot be the
						 * last char of a multi-char operator unless the operator
						 * contains chars that are not in SQL92 operators.
						 * The idea is to lex '=-' as two operators, but not
						 * to forbid operator names like '?-' that could not be
						 * sequences of SQL92 operators.
M
Michael Meskes 已提交
467
						 */
468 469 470 471 472 473 474 475
						while (nchars > 1 &&
							   (yytext[nchars-1] == '+' ||
								yytext[nchars-1] == '-'))
						{
							int		ic;

							for (ic = nchars-2; ic >= 0; ic--)
							{
M
Michael Meskes 已提交
476
								if (strchr("~!@#^&|`?%", yytext[ic]))
477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494
									break;
							}
							if (ic >= 0)
								break; /* found a char that makes it OK */
							nchars--; /* else remove the +/-, and check again */
						}

						if (nchars < yyleng)
						{
							/* Strip the unwanted chars from the token */
							yyless(nchars);
							/*
							 * If what we have left is only one char, and it's
							 * one of the characters matching "self", then
							 * return it as a character token the same way
							 * that the "self" rule would have.
							 */
							if (nchars == 1 &&
M
Michael Meskes 已提交
495
								strchr(",()[].;:+-*/%^<>=", yytext[0]))
496 497 498 499
								return yytext[0];
						}

						/* Convert "!=" operator to "<>" for compatibility */
500
						if (strcmp(yytext, "!=") == 0)
501 502
							yylval.str = mm_strdup("<>");
						else
503
							yylval.str = mm_strdup(yytext);
504
						return Op;
M
Michael Meskes 已提交
505
					}
506
<SQL>{param}		{
507
						yylval.ival = atol(yytext+1);
508 509 510 511 512
						return PARAM;
					}
<C,SQL>{integer}	{
						long val;
						char* endptr;
M
Michael Meskes 已提交
513

M
Marc G. Fournier 已提交
514
						errno = 0;
515 516 517 518 519 520 521 522 523
						val = strtol((char *)yytext, &endptr,10);
						if (*endptr != '\0' || errno == ERANGE
#ifdef HAVE_LONG_INT_64
							/* if long > 32 bits, check for overflow of int4 */
							|| val != (long) ((int32) val)
#endif
							)
						{
							errno = 0;
524
							yylval.str = mm_strdup(yytext);
525 526 527 528 529 530
							return FCONST;
						}
						yylval.ival = val;
						return ICONST;
					}
<SQL>{ip}			{
531
						yylval.str = mm_strdup(yytext);
532 533 534
						return IP;
					}
{decimal}			{
535
						yylval.str = mm_strdup(yytext);
536 537 538
						return FCONST;
					}
<C,SQL>{real}		{
539
						yylval.str = mm_strdup(yytext);
M
Michael Meskes 已提交
540
						return FCONST;
M
Marc G. Fournier 已提交
541
					}
542
<SQL>:{identifier}((("->"|\.){identifier})|(\[{array}\]))*	{
543
						yylval.str = mm_strdup(yytext+1);
544 545
						return(CVARIABLE);
					}
M
Marc G. Fournier 已提交
546
<SQL>{identifier}	{
547 548 549 550 551
						ScanKeyword    *keyword;
						struct _defines *ptr;

						/* How about a DEFINE? */
						for (ptr = defines; ptr; ptr = ptr->next)
M
Marc G. Fournier 已提交
552
						{
553 554 555
							if (strcmp(yytext, ptr->old) == 0)
							{
								struct _yy_buffer *yb;
M
 
Marc G. Fournier 已提交
556

557 558 559 560 561 562
								yb = mm_alloc(sizeof(struct _yy_buffer));

								yb->buffer =  YY_CURRENT_BUFFER;
								yb->lineno = yylineno;
								yb->filename = mm_strdup(input_filename);
								yb->next = yy_buffer;
M
 
Marc G. Fournier 已提交
563

564
								yy_buffer = yb;
M
 
Marc G. Fournier 已提交
565

566 567 568 569
								yy_scan_string(ptr->new);
								break;
							}
						}
M
 
Marc G. Fournier 已提交
570

571 572 573 574 575 576
						if (ptr == NULL)
						{
							/* Is it an SQL keyword? */
							keyword = ScanKeywordLookup(yytext);
							if (keyword != NULL)
								return keyword->value;
577

578 579 580 581
							/* Is it an ECPG keyword? */
							keyword = ScanECPGKeywordLookup( yytext);
							if (keyword != NULL)
								return keyword->value;
582

583 584 585 586
							/* Is it a C keyword? */
							keyword = ScanCKeywordLookup(yytext);
							if (keyword != NULL)
								return keyword->value;
587

588 589 590 591 592 593 594 595 596 597 598 599 600
							/*
							 * None of the above.  Return it as an identifier.
							 *
							 * The backend would attempt to truncate and case-fold
							 * the identifier, but I see no good reason for ecpg
							 * to do so; that's just another way that ecpg could get
							 * out of step with the backend.
							 */
							if (ptr == NULL)
							{
								yylval.str = mm_strdup(yytext);
								return IDENT;
							}
M
Marc G. Fournier 已提交
601 602
						}
					}
603 604
<SQL>{other}		{ return yytext[0]; }
<C>{exec_sql}		{ BEGIN SQL; return SQL_START; }
605 606
<C>{informix_special}	{ 
			  /* are we simulating Informix? */
607
			  if (INFORMIX_MODE)
608 609 610 611 612 613 614
			  {
			  	BEGIN SQL;
				return SQL_START;
			  }
			  else
			  	return S_ANYTHING;
			 }
615
<C>{ccomment}		 { ECHO; }
M
Michael Meskes 已提交
616
<C>{xch}			{
617
						char* endptr;
M
Michael Meskes 已提交
618 619

						errno = 0;
M
Michael Meskes 已提交
620
						yylval.ival = strtoul((char *)yytext,&endptr,16);
621 622 623
						if (*endptr != '\0' || errno == ERANGE)
						{
							errno = 0;
624
							yylval.str = mm_strdup(yytext);
625 626 627 628
							return SCONST;
						}
						return ICONST;
					}
629 630 631 632 633 634 635 636 637 638 639
<C>{cppinclude}		{
					  if (system_includes)
					  {
					  	BEGIN(incl);
					  }
					  else
					  {
						yylval.str = mm_strdup(yytext);
						return(CPP_LINE);
					  }
			}
640
<C>{cppline}		{
641
						yylval.str = mm_strdup(yytext);
642
						return(CPP_LINE);
M
Michael Meskes 已提交
643
					}
644 645
<C>{identifier} 	{
						ScanKeyword		*keyword;
646
						struct _defines *ptr;
M
 
Marc G. Fournier 已提交
647

648 649
						/* is it a define? */
						for (ptr = defines; ptr; ptr = ptr->next)
M
 
Marc G. Fournier 已提交
650
						{
651
							if (strcmp(yytext, ptr->old) == 0)
M
 
Marc G. Fournier 已提交
652
							{
653
								struct _yy_buffer *yb;
M
 
Marc G. Fournier 已提交
654

655
								yb = mm_alloc(sizeof(struct _yy_buffer));
M
 
Marc G. Fournier 已提交
656

657 658 659 660
								yb->buffer =  YY_CURRENT_BUFFER;
								yb->lineno = yylineno;
								yb->filename = mm_strdup(input_filename);
								yb->next = yy_buffer;
M
 
Marc G. Fournier 已提交
661

662
								yy_buffer = yb;
M
 
Marc G. Fournier 已提交
663

664 665
								yy_scan_string(ptr->new);
								break;
666
							}
667 668 669 670 671 672 673 674 675
						}

						if (ptr == NULL)
						{
							keyword = ScanCKeywordLookup(yytext);
							if (keyword != NULL)
								return keyword->value;

							else {
676
								yylval.str = mm_strdup(yytext);
677
								return IDENT;
M
 
Marc G. Fournier 已提交
678 679
							}
						}
M
Marc G. Fournier 已提交
680
					}
681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712
<C>";"				{ return(';'); }
<C>","				{ return(','); }
<C>"*"				{ return('*'); }
<C>"%"				{ return('%'); }
<C>"/"				{ return('/'); }
<C>"+"				{ return('+'); }
<C>"-"				{ return('-'); }
<C>"("				{ return('('); }
<C>")"				{ return(')'); }
<C>{space}			{ ECHO; }
<C>\{				{ return('{'); }
<C>\}				{ return('}'); }
<C>\[				{ return('['); }
<C>\]				{ return(']'); }
<C>\=				{ return('='); }
<C>"->"				{ return(S_MEMBER); }
<C>">>"				{ return(S_RSHIFT); }
<C>"<<"				{ return(S_LSHIFT); }
<C>"||"				{ return(S_OR); }
<C>"&&"				{ return(S_AND); }
<C>"++"				{ return(S_INC); }
<C>"--"				{ return(S_DEC); }
<C>"=="				{ return(S_EQUAL); }
<C>"!="				{ return(S_NEQUAL); }
<C>"+="				{ return(S_ADD); }
<C>"-="				{ return(S_SUB); }
<C>"*="				{ return(S_MUL); }
<C>"/="				{ return(S_DIV); }
<C>"%="				{ return(S_MOD); }
<C>"->*"			{ return(S_MEMPOINT); }
<C>".*"				{ return(S_DOTPOINT); }
<C>{other}			{ return S_ANYTHING; }
713

714
<C>{exec_sql}{define}{space}*	{ BEGIN(def_ident); }
715 716
<C>{informix_special}{define}{space}*	{
						/* are we simulating Informix? */
717
						if (INFORMIX_MODE)
718 719 720 721 722
						{
							BEGIN(def_ident);
						}
						else
						{
723
							yyless(1);
724 725 726
							return (S_ANYTHING);
						}
					}
727
<C>{exec_sql}{include}{space}*	{ BEGIN(incl); }
728 729
<C>{informix_special}{include}{space}* { 
					  /* are we simulating Informix? */
730
					  if (INFORMIX_MODE)
731
					  {
732
						  BEGIN(incl);
733 734 735
					  }
					  else
					  {
736 737
						  yyless(1);
						  return (S_ANYTHING);
738 739
					  }
					}
740
<C,xskip>{exec_sql}{ifdef}{space}*	{ ifcond = TRUE; BEGIN(xcond); }
741 742
<C,xskip>{informix_special}{ifdef}{space}* { 
					  /* are we simulating Informix? */
743
					  if (INFORMIX_MODE)
744
					  {
745 746
						  ifcond = TRUE;
						  BEGIN(xcond);
747 748 749
					  }
					  else
					  {
750 751
						  yyless(1);
						  return (S_ANYTHING);
752 753
					  }
					}
754
<C,xskip>{exec_sql}{ifndef}{space}* { ifcond = FALSE; BEGIN(xcond); }
755 756
<C,xskip>{informix_special}{ifndef}{space}* { 
					  /* are we simulating Informix? */
757
					  if (INFORMIX_MODE)
758
					  {
759 760
						  ifcond = FALSE;
						  BEGIN(xcond);
761 762 763
					  }
					  else
					  {
764 765
						  yyless(1);
						  return (S_ANYTHING);
766 767
					  }
					}
768
<C,xskip>{exec_sql}{elif}{space}*	{	/* pop stack */
769
						if ( preproc_tos == 0 ) {
770
							mmerror(PARSE_ERROR, ET_FATAL, "Missing matching 'EXEC SQL IFDEF / EXEC SQL IFNDEF'");
771
						}
772 773 774 775
						else if ( stacked_if_value[preproc_tos].else_branch )
							mmerror(PARSE_ERROR, ET_FATAL, "Missing 'EXEC SQL ENDIF;'");
						else
							preproc_tos--;
776 777 778

						ifcond = TRUE; BEGIN(xcond);
					}
779 780
<C,xskip>{informix_special}{elif}{space}* { 
					  /* are we simulating Informix? */
781
					  if (INFORMIX_MODE)
782 783 784 785 786 787 788 789 790 791 792 793 794
					  {
						if ( preproc_tos == 0 ) {
							mmerror(PARSE_ERROR, ET_FATAL, "Missing matching 'EXEC SQL IFDEF / EXEC SQL IFNDEF'");
						}
						else if ( stacked_if_value[preproc_tos].else_branch )
							mmerror(PARSE_ERROR, ET_FATAL, "Missing 'EXEC SQL ENDIF;'");
						else
							preproc_tos--;

						ifcond = TRUE; BEGIN(xcond);
					  }
					  else
					  {
795 796
						  yyless(1);
						  return (S_ANYTHING);
797 798
					  }
					}
799

800
<C,xskip>{exec_sql}{else}{space}*";" {	/* only exec sql endif pops the stack, so take care of duplicated 'else' */
801
						if ( stacked_if_value[preproc_tos].else_branch ) {
802
							mmerror(PARSE_ERROR, ET_FATAL, "Duplicated 'EXEC SQL ELSE;'");
803 804
						}
						else {
805 806
							stacked_if_value[preproc_tos].else_branch = TRUE;
							stacked_if_value[preproc_tos].condition =
807 808 809
							(stacked_if_value[preproc_tos-1].condition &&
							 ! stacked_if_value[preproc_tos].condition);

810 811 812 813
							if ( stacked_if_value[preproc_tos].condition )
								BEGIN(C);
							else
								BEGIN(xskip);
814 815
						}
					}
816 817
<C,xskip>{informix_special}{else}{space}*	{
					  /* are we simulating Informix? */
818
					  if (INFORMIX_MODE)
819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836
					  {
						if ( stacked_if_value[preproc_tos].else_branch ) {
							mmerror(PARSE_ERROR, ET_FATAL, "Duplicated 'EXEC SQL ELSE;'");
						}
						else {
							stacked_if_value[preproc_tos].else_branch = TRUE;
							stacked_if_value[preproc_tos].condition =
							(stacked_if_value[preproc_tos-1].condition &&
							 ! stacked_if_value[preproc_tos].condition);

							if ( stacked_if_value[preproc_tos].condition )
								BEGIN(C);
							else
								BEGIN(xskip);
						}
					  }
					  else
					  {
837 838
						  yyless(1);
						  return (S_ANYTHING);
839 840
					  }
					}
841 842 843 844 845
<C,xskip>{exec_sql}{endif}{space}*";" {
						if ( preproc_tos == 0 )
							mmerror(PARSE_ERROR, ET_FATAL, "Unmatched 'EXEC SQL ENDIF;'");
						else
							preproc_tos--;
846

847
						if ( stacked_if_value[preproc_tos].condition )
848
						   BEGIN(C);
849
						else
850 851
						   BEGIN(xskip);
					}
M
Michael Meskes 已提交
852
<C,xskip>{informix_special}{endif}{space}*";"	{
853
					  /* are we simulating Informix? */
854
					  if (INFORMIX_MODE)
855 856 857 858 859 860 861 862 863 864 865 866 867
					  {
						if ( preproc_tos == 0 )
							mmerror(PARSE_ERROR, ET_FATAL, "Unmatched 'EXEC SQL ENDIF;'");
						else
							preproc_tos--;

						if ( stacked_if_value[preproc_tos].condition )
						   BEGIN(C);
						else
						   BEGIN(xskip);
					  }
					  else
					  {
868 869
						  yyless(1);
						  return (S_ANYTHING);
870 871
					  }
					}
872

873
<xskip>{other}		{ /* ignore */ }
874

875
<xcond>{identifier}{space}*";" {
876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892
						if ( preproc_tos >= MAX_NESTED_IF-1 ) {
							mmerror(PARSE_ERROR, ET_FATAL, "Too many nested 'EXEC SQL IFDEF' conditions");
						}
						else 
						{
							struct _defines *defptr;
							unsigned int i;

							/* skip the ";" and trailing whitespace. Note that yytext contains
							   at least one non-space character plus the ";" */
							for ( i = strlen(yytext)-2;
								  i > 0 && isspace((unsigned char) yytext[i]);
								  i-- )
							{}
							yytext[i+1] = '\0';

							for ( defptr = defines; defptr != NULL &&
893
								  ( strcmp(yytext, defptr->old) != 0 ); defptr = defptr->next );
894 895 896 897 898 899

							preproc_tos++;
							stacked_if_value[preproc_tos].else_branch = FALSE;
							stacked_if_value[preproc_tos].condition =
							( (defptr ? ifcond : !ifcond) && stacked_if_value[preproc_tos-1].condition );
						}
900

901 902 903 904
						if ( stacked_if_value[preproc_tos].condition )
						   BEGIN C;
						else
						   BEGIN(xskip);
905 906
					}

907
<def_ident>{identifier} {
M
 
Marc G. Fournier 已提交
908
				old = mm_strdup(yytext);
M
 
Marc G. Fournier 已提交
909
				BEGIN(def);
910
				startlit();
M
 
Marc G. Fournier 已提交
911
			}
912
<def>{space}*";"	{
913
						struct _defines *ptr, *this;
914

915 916 917 918 919 920 921 922 923 924 925 926
						for (ptr = defines; ptr != NULL; ptr = ptr->next)
						{
							 if (strcmp(old, ptr->old) == 0)
							 {
								free(ptr->new);
								/* ptr->new = mm_strdup(scanstr(literalbuf));*/
								ptr->new = mm_strdup(literalbuf);
							 }
						}
						if (ptr == NULL)
						{
												this = (struct _defines *) mm_alloc(sizeof(struct _defines));
927

928 929 930 931 932 933
												/* initial definition */
												this->old = old;
												this->new = mm_strdup(literalbuf);
							this->next = defines;
							defines = this;
						}
934

935 936 937
						BEGIN(C);
					}
<def>[^;]			{ addlit(yytext, yyleng); }
938

939 940
<incl>\<[^\>]+\>{space}*";"?		{	parse_include(); }
<incl>{dquote}{xdinside}{dquote}{space}*";"?	{	parse_include(); }
941
<incl>[^;\<\>\"]+";"		{ parse_include(); }
942 943

<<EOF>>				{
944
				  		if (yy_buffer == NULL) {
945 946 947 948 949 950
				  		if ( preproc_tos > 0 ) 
						{
					  		preproc_tos = 0;
							mmerror(PARSE_ERROR, ET_FATAL, "Missing 'EXEC SQL ENDIF;'");
				  		}
							yyterminate();
951
							}
952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977
				  		else
				  		{
							struct _yy_buffer *yb = yy_buffer;
							int i;

							if (yyin != NULL)
								fclose(yyin);

							yy_delete_buffer( YY_CURRENT_BUFFER );
							yy_switch_to_buffer(yy_buffer->buffer);

							yylineno = yy_buffer->lineno;

							/* We have to output the filename only if we change files here */
							i = strcmp(input_filename, yy_buffer->filename);

							free(input_filename);
							input_filename = yy_buffer->filename;

							yy_buffer = yy_buffer->next;
							free(yb);

							if (i != 0)
								output_line_number();
				  		}
					}
978 979
%%
void
980
lex_init(void)
981
{
982 983 984
	braces_open = 0;

	preproc_tos = 0;
M
Michael Meskes 已提交
985
	yylineno = 1;
986 987 988
	ifcond = TRUE;
	stacked_if_value[preproc_tos].condition = ifcond;
	stacked_if_value[preproc_tos].else_branch = FALSE;
989 990 991 992 993 994 995 996 997

	/* initialize literal buffer to a reasonable but expansible size */
	if (literalbuf == NULL)
	{
		literalalloc = 128;
		literalbuf = (char *) malloc(literalalloc);
	}
	startlit();

998
	BEGIN C;
999 1000
}

1001 1002 1003 1004 1005 1006
static void
addlit(char *ytext, int yleng)
{
	/* enlarge buffer if needed */
	if ((literallen+yleng) >= literalalloc)
	{
1007
		do 
1008
			literalalloc *= 2;
1009
		while ((literallen+yleng) >= literalalloc);
1010 1011
		literalbuf = (char *) realloc(literalbuf, literalalloc);
	}
1012 1013
	/* append new data, add trailing null */
	memcpy(literalbuf+literallen, ytext, yleng);
1014
	literallen += yleng;
1015
	literalbuf[literallen] = '\0';
1016 1017
}

1018 1019
static void
addlitchar(unsigned char ychar)
1020
{
1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031
	/* enlarge buffer if needed */
        if ((literallen+1) >= literalalloc)
        {
                literalalloc *= 2;
                literalbuf = (char *) realloc(literalbuf, literalalloc);
        }
	/* append new data, add trailing null */
	literalbuf[literallen] = ychar;
	literallen += 1;
	literalbuf[literallen] = '\0';
}
1032

1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068
static void
parse_include(void)
{
	/* got the include file name */
	struct _yy_buffer *yb;
  	struct _include_path *ip;
  	char inc_file[MAXPGPATH];
  	unsigned int i;

  	yb = mm_alloc(sizeof(struct _yy_buffer));

  	yb->buffer =	YY_CURRENT_BUFFER;
  	yb->lineno = yylineno;
  	yb->filename = input_filename;
  	yb->next = yy_buffer;

  	yy_buffer = yb;

  	/*
	 * skip the ";" if there is one and trailing whitespace. Note that
	 * yytext contains at least one non-space character plus the ";" 
	 */
  	for ( i = strlen(yytext)-2;
		i > 0 && isspace((unsigned char) yytext[i]);
		i-- )
  		{}

	if (yytext[i] == ';')
		i--;

	yytext[i+1] = '\0';
	
	yyin = NULL;

	/* If file name is enclosed in '"' remove these and look only in '.' */
	/* Informix does look into all include paths though, except filename starts with '/' */
1069 1070
	if ((yytext[0] == '"' && yytext[i] == '"') &&
	    ((compat != ECPG_COMPAT_INFORMIX && compat != ECPG_COMPAT_INFORMIX_SE) || yytext[1] == '/'))
1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126
	{
		yytext[i] = '\0';
		memmove(yytext, yytext+1, strlen(yytext));
	
		strncpy(inc_file, yytext, sizeof(inc_file));
		yyin = fopen(inc_file, "r");
		if (!yyin)
		{
			if (strcmp(inc_file + strlen(inc_file) - 2, ".h"))
			{
				strcat(inc_file, ".h");
				yyin = fopen(inc_file, "r");
			}
		}
		
	}
	else
	{
		if ((yytext[0] == '"' && yytext[i] == '"') || (yytext[0] == '<' && yytext[i] == '>'))
		{
			yytext[i] = '\0';
			memmove(yytext, yytext+1, strlen(yytext));
		}
		
	  	for (ip = include_paths; yyin == NULL && ip != NULL; ip = ip->next)
	  	{
			if (strlen(ip->path) + strlen(yytext) + 3 > MAXPGPATH)
			{
				fprintf(stderr, "Error: Path %s/%s is too long in line %d, skipping.\n", ip->path, yytext, yylineno);
				continue;
			}
			snprintf (inc_file, sizeof(inc_file), "%s/%s", ip->path, yytext);
			yyin = fopen(inc_file, "r");
			if (!yyin)
			{
				if (strcmp(inc_file + strlen(inc_file) - 2, ".h"))
				{
					strcat(inc_file, ".h");
					yyin = fopen( inc_file, "r" );
				}
			}
		}
	}
	if (!yyin)
	{
		snprintf(errortext, sizeof(errortext), "Cannot open include file %s in line %d\n", yytext, yylineno);
		mmerror(NO_INCLUDE_FILE, ET_FATAL, errortext);
  	}

	input_filename = mm_strdup(inc_file);
	yy_switch_to_buffer(yy_create_buffer(yyin,YY_BUF_SIZE ));
	yylineno = 1;
	output_line_number();

  	BEGIN C;
}