diff --git a/src/interfaces/ecpg/preproc/Makefile b/src/interfaces/ecpg/preproc/Makefile index 8e85fe9e5afb3874fb2163042a3a006386e49c8b..36b3c26adab6d764ba9ab1fe47bb28987cd95356 100644 --- a/src/interfaces/ecpg/preproc/Makefile +++ b/src/interfaces/ecpg/preproc/Makefile @@ -57,8 +57,8 @@ else @$(missing) flex $< $@ endif -preproc.y: ../../../backend/parser/gram.y parse.pl ecpg.addons ecpg.header ecpg.tokens ecpg.trailer ecpg.type - $(PERL) $(srcdir)/parse.pl $(srcdir) < $< > $@ +preproc.y: ../../../backend/parser/gram.y parse2.pl ecpg.addons ecpg.header ecpg.tokens ecpg.trailer ecpg.type + $(PERL) $(srcdir)/parse2.pl $(srcdir) < $< > $@ $(PERL) $(srcdir)/check_rules.pl $(srcdir) $< ecpg_keywords.o c_keywords.o keywords.o preproc.o parser.o: preproc.h diff --git a/src/interfaces/ecpg/preproc/README.parser b/src/interfaces/ecpg/preproc/README.parser index ddc3061d486b1afa90322ba2964e71368b91bdc1..6d6daac4ba3a6441a85f2a6e974b0278d102ccb4 100644 --- a/src/interfaces/ecpg/preproc/README.parser +++ b/src/interfaces/ecpg/preproc/README.parser @@ -3,7 +3,7 @@ ECPG modifies and extends the core grammar in a way that defined in ecpg.tokens, types are defined in ecpg.type 2) most tokens from the core grammar are simply converted to literals concatenated together to form the SQL string - passed to the server, this is done by parse.pl. + passed to the server, this is done by parse2.pl. 3) some rules need side-effects, actions are either added or completely overridden (compared to the basic token concatenation) for them, these are defined in ecpg.addons, @@ -20,7 +20,7 @@ rules concatenated together. e.g. if gram.y has this: ruleA: tokenA tokenB tokenC {...} then "dumpedtokens" is "ruleAtokenAtokenBtokenC". "postfix" above can be: -a) "block" - the automatic rule created by parse.pl is completely +a) "block" - the automatic rule created by parse2.pl is completely overridden, the code block has to be written completely as it were in a plain bison grammar b) "rule" - the automatic rule is extended on, so new syntaxes diff --git a/src/interfaces/ecpg/preproc/ecpg.trailer b/src/interfaces/ecpg/preproc/ecpg.trailer index 4e0c52caae21f5e87c3e6c46187f30cacdae929d..3a941ce1abe240406fae2a7b609c6916eebbc8d0 100644 --- a/src/interfaces/ecpg/preproc/ecpg.trailer +++ b/src/interfaces/ecpg/preproc/ecpg.trailer @@ -1591,7 +1591,7 @@ ECPGCKeywords: S_AUTO { $$ = mm_strdup("auto"); } * CONNECTION can be added back in all_unreserved_keyword, but CURRENT and * INPUT are reserved for ecpg purposes. * - * The mentioned exclusions are done by $replace_line settings in parse.pl. + * The mentioned exclusions are done by $replace_line settings in parse2.pl. */ all_unreserved_keyword: unreserved_keyword { $$ = $1; } | ECPGunreserved_interval { $$ = $1; } diff --git a/src/interfaces/ecpg/preproc/parse2.pl b/src/interfaces/ecpg/preproc/parse2.pl new file mode 100644 index 0000000000000000000000000000000000000000..dd509373a5afd5a4cc205b00e5c17e7198e4f252 --- /dev/null +++ b/src/interfaces/ecpg/preproc/parse2.pl @@ -0,0 +1,648 @@ +#!/usr/bin/perl +# src/interfaces/ecpg/preproc/parse2.pl +# parser generater for ecpg version 2 +# call with backend parser as stdin +# +# Copyright (c) 2007-2011, PostgreSQL Global Development Group +# +# Written by Mike Aubury +# Michael Meskes +# Andy Colson +# +# Placed under the same license as PostgreSQL. +# + +use strict; +use warnings; +no warnings 'uninitialized'; + +my $path = shift @ARGV; +$path = "." unless $path; + +my $copymode = 0; +my $brace_indent = 0; +my $yaccmode = 0; +my $header_included = 0; +my $feature_not_supported = 0; +my $tokenmode = 0; + +my(%buff, $infield, $comment, %tokens, %addons ); +my($stmt_mode, @fields); +my($line, $non_term_id); + + +# some token have to be replaced by other symbols +# either in the rule +my %replace_token = ( + 'BCONST' => 'ecpg_bconst', + 'FCONST' => 'ecpg_fconst', + 'Sconst' => 'ecpg_sconst', + 'IDENT' => 'ecpg_ident', + 'PARAM' => 'ecpg_param', +); + +# or in the block +my %replace_string = ( + 'WITH_TIME' => 'with time', + 'NULLS_FIRST' => 'nulls first', + 'NULLS_LAST' => 'nulls last', + 'TYPECAST' => '::', + 'DOT_DOT' => '..', + 'COLON_EQUALS' => ':=', +); + +# specific replace_types for specific non-terminals - never include the ':' +# ECPG-only replace_types are defined in ecpg-replace_types +my %replace_types = ( + 'PrepareStmt' => '', + 'opt_array_bounds' => '', + + # "ignore" means: do not create type and rules for this non-term-id + 'stmtblock' => 'ignore', + 'stmtmulti' => 'ignore', + 'CreateAsStmt' => 'ignore', + 'DeallocateStmt' => 'ignore', + 'ColId' => 'ignore', + 'type_function_name' => 'ignore', + 'ColLabel' => 'ignore', + 'Sconst' => 'ignore', +); + +# these replace_line commands excise certain keywords from the core keyword +# lists. Be sure to account for these in ColLabel and related productions. +my %replace_line = ( + 'unreserved_keywordCONNECTION' => 'ignore', + 'unreserved_keywordCURRENT_P' => 'ignore', + 'unreserved_keywordDAY_P' => 'ignore', + 'unreserved_keywordHOUR_P' => 'ignore', + 'unreserved_keywordINPUT_P' => 'ignore', + 'unreserved_keywordMINUTE_P' => 'ignore', + 'unreserved_keywordMONTH_P' => 'ignore', + 'unreserved_keywordSECOND_P' => 'ignore', + 'unreserved_keywordYEAR_P' => 'ignore', + 'col_name_keywordCHAR_P' => 'ignore', + 'col_name_keywordINT_P' => 'ignore', + 'col_name_keywordVALUES' => 'ignore', + 'reserved_keywordTO' => 'ignore', + 'reserved_keywordUNION' => 'ignore', + + # some other production rules have to be ignored or replaced + 'fetch_argsFORWARDopt_from_incursor_name' => 'ignore', + 'fetch_argsBACKWARDopt_from_incursor_name' => 'ignore', + "opt_array_boundsopt_array_bounds'['Iconst']'" => 'ignore', + 'VariableShowStmtSHOWvar_name' => 'SHOW var_name ecpg_into', + 'VariableShowStmtSHOWTIMEZONE' => 'SHOW TIME ZONE ecpg_into', + 'VariableShowStmtSHOWTRANSACTIONISOLATIONLEVEL' => 'SHOW TRANSACTION ISOLATION LEVEL ecpg_into', + 'VariableShowStmtSHOWSESSIONAUTHORIZATION' => 'SHOW SESSION AUTHORIZATION ecpg_into', + 'returning_clauseRETURNINGtarget_list' => 'RETURNING target_list ecpg_into', + 'ExecuteStmtEXECUTEnameexecute_param_clause' => 'EXECUTE prepared_name execute_param_clause execute_rest', + 'ExecuteStmtCREATEOptTempTABLEcreate_as_targetASEXECUTEnameexecute_param_clause' => + 'CREATE OptTemp TABLE create_as_target AS EXECUTE prepared_name execute_param_clause', + 'PrepareStmtPREPAREnameprep_type_clauseASPreparableStmt' => + 'PREPARE prepared_name prep_type_clause AS PreparableStmt', + 'var_nameColId' => 'ECPGColId', +); + +preload_addons(); + +main(); + +dump_buffer('header'); +dump_buffer('tokens'); +dump_buffer('types'); +dump_buffer('ecpgtype'); +dump_buffer('orig_tokens'); +print '%%', "\n"; +print 'prog: statements;', "\n"; +dump_buffer('rules'); +include_file( 'trailer', 'ecpg.trailer' ); +dump_buffer('trailer'); + +sub main +{ + line: while (<>) + { + if (/ERRCODE_FEATURE_NOT_SUPPORTED/) + { + $feature_not_supported = 1; + next line; + } + + chomp; + + # comment out the line below to make the result file match (blank line wise) + # the prior version. + #next if ($_ eq ''); + + # Dump the action for a rule - + # stmt_mode indicates if we are processing the 'stmt:' + # rule (mode==0 means normal, mode==1 means stmt:) + # flds are the fields to use. These may start with a '$' - in + # which case they are the result of a previous non-terminal + # + # if they dont start with a '$' then they are token name + # + # len is the number of fields in flds... + # leadin is the padding to apply at the beginning (just use for formatting) + + if (/^%%/) { + $tokenmode = 2; + $copymode = 1; + $yaccmode++; + $infield = 0; + } + + my $prec = 0; + + # Make sure any braces are split + s/{/ { /g; + s/}/ } /g; + + # Any comments are split + s|\/\*| /* |g; + s|\*\/| */ |g; + + # Now split the line into individual fields + my @arr = split(' '); + + if ( $arr[0] eq '%token' && $tokenmode == 0 ) + { + $tokenmode = 1; + include_file( 'tokens', 'ecpg.tokens' ); + } + elsif ( $arr[0] eq '%type' && $header_included == 0 ) + { + include_file( 'header', 'ecpg.header' ); + include_file( 'ecpgtype', 'ecpg.type' ); + $header_included = 1; + } + + if ( $tokenmode == 1 ) + { + my $str = ''; + my $prior = ''; + for my $a (@arr) + { + if ( $a eq '/*' ) + { + $comment++; + next; + } + if ( $a eq '*/' ) + { + $comment--; + next; + } + if ($comment) + { + next; + } + if ( substr( $a, 0, 1 ) eq '<' ) { + next; + + # its a type + } + $tokens{ $a } = 1; + + $str = $str . ' ' . $a; + if ( $a eq 'IDENT' && $prior eq '%nonassoc' ) + { + # add two more tokens to the list + $str = $str . "\n%nonassoc CSTRING\n%nonassoc UIDENT"; + } + $prior = $a; + } + add_to_buffer( 'orig_tokens', $str ); + next line; + } + + # Dont worry about anything if we're not in the right section of gram.y + if ( $yaccmode != 1 ) + { + next line; + } + + + # Go through each field in turn + for (my $fieldIndexer = 0 ; $fieldIndexer < scalar(@arr) ; $fieldIndexer++ ) + { + if ( $arr[$fieldIndexer] eq '*/' && $comment ) + { + $comment = 0; + next; + } + elsif ($comment) + { + next; + } + elsif ( $arr[$fieldIndexer] eq '/*' ) + { + # start of a multiline comment + $comment = 1; + next; + } + elsif ( $arr[$fieldIndexer] eq '//' ) + { + next line; + } + elsif ( $arr[$fieldIndexer] eq '}' ) + { + $brace_indent--; + next; + } + elsif ( $arr[$fieldIndexer] eq '{' ) + { + $brace_indent++; + next; + } + + if ( $brace_indent > 0 ) + { + next; + } + if ( $arr[$fieldIndexer] eq ';' ) + { + if ($copymode) + { + if ( $infield ) + { + dump_line( $stmt_mode, \@fields ); + } + add_to_buffer( 'rules', ";\n\n" ); + } + else + { + $copymode = 1; + } + @fields = (); + $infield = 0; + $line = ''; + next; + } + + if ( $arr[$fieldIndexer] eq '|' ) + { + if ($copymode) + { + if ( $infield ) + { + $infield = $infield + dump_line( $stmt_mode, \@fields ); + } + if ( $infield > 1 ) + { + $line = '| '; + } + } + @fields = (); + next; + } + + if ( exists $replace_token{ $arr[$fieldIndexer] } ) + { + $arr[$fieldIndexer] = $replace_token{ $arr[$fieldIndexer] }; + } + + # Are we looking at a declaration of a non-terminal ? + if ( ( $arr[$fieldIndexer] =~ /[A-Za-z0-9]+:/ ) + || $arr[ $fieldIndexer + 1 ] eq ':' ) + { + $non_term_id = $arr[$fieldIndexer]; + $non_term_id =~ tr/://d; + + if ( not defined $replace_types{$non_term_id} ) + { + $replace_types{$non_term_id} = ''; + $copymode = 1; + } + elsif ( $replace_types{$non_term_id} eq 'ignore' ) + { + $copymode = 0; + $line = ''; + next line; + } + $line = $line . ' ' . $arr[$fieldIndexer]; + + # Do we have the : attached already ? + # If yes, we'll have already printed the ':' + if ( !( $arr[$fieldIndexer] =~ '[A-Za-z0-9]+:' ) ) + { + # Consume the ':' which is next... + $line = $line . ':'; + $fieldIndexer++; + } + + # Special mode? + if ( $non_term_id eq 'stmt' ) + { + $stmt_mode = 1; + } + else + { + $stmt_mode = 0; + } + my $tstr = '%type ' . $replace_types{$non_term_id} . ' ' . $non_term_id; + add_to_buffer( 'types', $tstr ); + + if ($copymode) + { + add_to_buffer( 'rules', $line ); + } + $line = ''; + @fields = (); + $infield = 1; + next; + } + elsif ($copymode) { + $line = $line . ' ' . $arr[$fieldIndexer]; + } + if ( $arr[$fieldIndexer] eq '%prec' ) + { + $prec = 1; + next; + } + + if ( $copymode + && !$prec + && !$comment + && length( $arr[$fieldIndexer] ) + && $infield ) + { + if ( + $arr[$fieldIndexer] ne 'Op' + && ( $tokens{ $arr[$fieldIndexer] } > 0 || $arr[$fieldIndexer] =~ /'.+'/ ) + || $stmt_mode == 1 + ) + { + my $S; + if ( exists $replace_string{ $arr[$fieldIndexer] } ) + { + $S = $replace_string{ $arr[$fieldIndexer] }; + } + else + { + $S = $arr[$fieldIndexer]; + } + $S =~ s/_P//g; + $S =~ tr/'//d; + if ( $stmt_mode == 1 ) + { + push(@fields, $S); + } + else + { + push(@fields, lc($S)); + } + } + else + { + push(@fields, '$' . (scalar(@fields)+1)); + } + } + } + } +} + + +# append a file onto a buffer. +# Arguments: buffer_name, filename (without path) +sub include_file +{ + my ($buffer, $filename) = @_; + my $full = "$path/$filename"; + open(my $fh, '<', $full) or die; + while ( <$fh> ) + { + chomp; + add_to_buffer( $buffer, $_ ); + } + close($fh); +} + +sub include_addon +{ + my($buffer, $block, $fields, $stmt_mode) = @_; + my $rec = $addons{$block}; + return 0 unless $rec; + + if ( $rec->{type} eq 'rule' ) + { + dump_fields( $stmt_mode, $fields, ' { ' ); + } + elsif ( $rec->{type} eq 'addon' ) + { + add_to_buffer( 'rules', ' { ' ); + } + + #add_to_buffer( $stream, $_ ); + #We have an array to add to the buffer, we'll add it ourself instead of + #calling add_to_buffer, which does not know about arrays + + push( @{ $buff{$buffer} }, @{ $rec->{lines} } ); + + if ( $rec->{type} eq 'addon' ) + { + dump_fields( $stmt_mode, $fields, '' ); + } + + + # if we added something (ie there are lines in our array), return 1 + return 1 if (scalar(@{ $rec->{lines} }) > 0); + return 0; +} + + +# include_addon does this same thing, but does not call this +# sub... so if you change this, you need to fix include_addon too +# Pass: buffer_name, string_to_append +sub add_to_buffer +{ + push( @{ $buff{$_[0]} }, "$_[1]\n" ); +} + +sub dump_buffer +{ + my($buffer) = @_; + print '/* ', $buffer, ' */',"\n"; + my $ref = $buff{$buffer}; + print @$ref; +} + +sub dump_fields +{ + my ( $mode, $flds, $ln ) = @_; + my $len = scalar(@$flds); + + if ( $mode == 0 ) + { + #Normal + add_to_buffer( 'rules', $ln ); + if ( $feature_not_supported == 1 ) + { + # we found an unsupported feature, but we have to + # filter out ExecuteStmt: CREATE OptTemp TABLE ... + # because the warning there is only valid in some situations + if ( $flds->[0] ne 'create' || $flds->[2] ne 'table' ) + { + add_to_buffer( 'rules', + 'mmerror(PARSE_ERROR, ET_WARNING, "unsupported feature will be passed to server");' + ); + } + $feature_not_supported = 0; + } + + if ( $len == 0 ) + { + # We have no fields ? + add_to_buffer( 'rules', ' $$=EMPTY; }' ); + } + else + { + # Go through each field and try to 'aggregate' the tokens + # into a single 'mm_strdup' where possible + my @flds_new; + my $str; + for ( my $z = 0 ; $z < $len ; $z++ ) + { + if ( substr( $flds->[$z], 0, 1 ) eq '$' ) + { + push(@flds_new, $flds->[$z]); + next; + } + + $str = $flds->[$z]; + + while (1) + { + if ( $z >= $len - 1 || substr( $flds->[ $z + 1 ], 0, 1 ) eq '$' ) + { + # We're at the end... + push(@flds_new, "mm_strdup(\"$str\")"); + last; + } + $z++; + $str = $str . ' ' . $flds->[$z]; + } + } + + # So - how many fields did we end up with ? + $len = scalar(@flds_new); + if ( $len == 1 ) + { + # Straight assignement + $str = ' $$ = ' . $flds_new[0] . ';'; + add_to_buffer( 'rules', $str ); + } + else + { + # Need to concatenate the results to form + # our final string + $str = ' $$ = cat_str(' . $len . ',' . join(',', @flds_new) . ');'; + add_to_buffer( 'rules', $str ); + } + add_to_buffer( 'rules', '}' ); + } + } + else + { + # we're in the stmt: rule + if ($len) + { + # or just the statement ... + add_to_buffer( 'rules', ' { output_statement($1, 0, ECPGst_normal); }' ); + } + else + { + add_to_buffer( 'rules', ' { $$ = NULL; }' ); + } + } +} + + +sub dump_line +{ + my($stmt_mode, $fields) = @_; + my $block = $non_term_id . $line; + $block =~ tr/ |//d; + my $rep = $replace_line{$block}; + if ($rep) + { + if ($rep eq 'ignore' ) + { + return 0; + } + + if ( index( $line, '|' ) != -1 ) + { + $line = '| ' . $rep; + } + else + { + $line = $rep; + } + $block = $non_term_id . $line; + $block =~ tr/ |//d; + } + add_to_buffer( 'rules', $line ); + my $i = include_addon( 'rules', $block, $fields, $stmt_mode); + if ( $i == 0 ) + { + dump_fields( $stmt_mode, $fields, ' { ' ); + } + return 1; +} + +=top + load addons into cache + %addons = { + stmtClosePortalStmt => { 'type' => 'block', 'lines' => [ "{", "if (INFORMIX_MODE)" ..., "}" ] }, + stmtViewStmt => { 'type' => 'rule', 'lines' => [ "| ECPGAllocateDescr", ... ] } + } + +=cut +sub preload_addons +{ + my $filename = $path . "/ecpg.addons"; + open(my $fh, '<', $filename) or die; + # there may be multple lines starting ECPG: and then multiple lines of code. + # the code need to be add to all prior ECPG records. + my (@needsRules, @code, $record); + # there may be comments before the first ECPG line, skip them + my $skip = 1; + while ( <$fh> ) + { + if (/^ECPG:\s(\S+)\s?(\w+)?/) + { + $skip = 0; + if (@code) + { + for my $x (@needsRules) + { + push(@{ $x->{lines} }, @code); + } + @code = (); + @needsRules = (); + } + $record = {}; + $record->{type} = $2; + $record->{lines} = []; + if (exists $addons{$1}) { die "Ga! there are dups!\n"; } + $addons{$1} = $record; + push(@needsRules, $record); + } + else + { + next if $skip; + push(@code, $_); + } + } + close($fh); + if (@code) + { + for my $x (@needsRules) + { + push(@{ $x->{lines} }, @code); + } + } +} + +