#!/usr/bin/perl # # $Header$ # # copyright (c) 2009, 2010, 2011 # Author: Jeffrey I Cohen # # SLZY_HDR_END use POSIX; use Pod::Usage; use Getopt::Long; use Data::Dumper; use strict; use warnings; # SLZY_POD_HDR_BEGIN # WARNING: DO NOT MODIFY THE FOLLOWING POD DOCUMENT: # Generated by sleazy.pl version 6 (release Mon Aug 20 12:30:03 2012) # Make any changes under SLZY_TOP_BEGIN/SLZY_LONG_BEGIN =head1 NAME B - generate catalog entries =head1 VERSION This document describes version 34 of tidycat.pl, released Thu Oct 18 15:24:46 2012. =head1 SYNOPSIS B Options: -help brief help message -man full documentation -dumpdef output file for dump of serialized catalog data structures -dumpformat format options for dump file [perl, jason] -sqldef output file for dump of catalog DDL statements -syscache build syscache entries =head1 OPTIONS =over 8 =item B<-help> Print a brief help message and exits. =item B<-man> Prints the manual page and exits. =item B<-dumpdef> Specify an optional filename to hold a dump of the serialized catalog data structures. The format of the dump file is determined by dumpformat =item B<-dumpformat> Specify a format for the dumpfile. The only valid options are jason or perl. =item B<-sqldef> Specify an optional filename to hold the CREATE TABLE statements from the tidycat definitions. Note that these statements will contain the tidycat WITH clause, which is not valid SQL. =item B<-syscache> If specified, rebuild syscache.h and syscache.c. Note that this option, like dumpdef, must read all catalog headers, ie in src/include/catalog, the command: perl tidycat.pl -syscache *.h constructs new versions of syscache.c and syscache.h. NOTE: Modification and extension of syscache entries is extremely rare. Usage of this option is discouraged. =back =head1 DESCRIPTION tidycat.pl handles all of your stinky catalog problems, leaving a fresh, clean scent. Catalog tables require several sets of co-ordinated modifications to multiple source files to define the table and indexes, and (under some circumstances) the toast tables and indexes (in toasting.h and toasting.c), as well as some special code in catalog.c to aid in bootstrap and upgrade. tidycat also updates a generated list of headers in pg_tidycat.h and the catalog Makefile. The original files are copied to a special tidycat_backup directory in /tmp, and all generated files are written to /tmp. tidycat.pl uses a single definition statement to generate the code associated with the table in multiple source files. A sample definition for the fictional pg_foobar.h follows: /* TIDYCAT_BEGINDEF CREATE TABLE pg_foobar with (camelcase=FooBar, shared=true, oid=true, relid=9991) ( fooname name, -- name of foo bar foolimit real, -- max active count limit fooignore boolean, -- ignore foo in baz context ); create unique index on pg_foobar(oid) with (indexid=9993); create index on pg_foobar(fooname) with (indexid=9994); TIDYCAT_ENDDEF */ The definition must begin and end with the TIDYCAT_BEGINDEF/TIDYCAT_ENDDEF exactly as shown. The CREATE TABLE statement is almost identical to standard SQL, with the addition of a special WITH clause for implementation-specific features of the catalog entry. Currently, the relid must be specified using unassigned oids from the unused_oids script. The options are: =over 8 =item CamelCase: (optional) If your tablename is a compound name, the index definitions look a little nicer if you define an appropriate camelcase name. Otherwise, the default version of the name is the tablename, minus the "pg_" prefix, initial letter capitalized. =item shared: (false by default) Whether the table is local to each database or shared by all. =item oid: (true by default) Whether the table has an auto-generated oid column. =item relid: (required) The relid of the table in pg_class. Use unused_oids to find one. =item toast_oid: (required for all tables with text or array columns) The oid of the toast table (see toasting.h). Use unused_oids to find one. tidycat will automatically detect if the table definition requires a toast table and return an error if it is not specified. =item toast_index: (required for all tables with toast_oid) The oid of the index of the toast table (see toasting.h). Use unused_oids to find one. =item content: (optional) The "content" is only for catalog tables with non-standard content management. "Normal" catalog tables are replicated from the master to all the segments. Non-standard tables fall into three categories: MASTER_ONLY, SEGMENT_LOCAL, and PERSISTENT. Don't add any new non-standard tables. Please. Note that this flag controls the generation of validation logic for checkcat; it does not control the catalog table tuple replication mechanisms. =back Similarly, index definitions are unique or non-unique, and require an indexid (and an optional indexname). Running tidycat.pl against pg_foobar.h adds the following section after the definition: /* TIDYCAT_BEGIN_CODEGEN WARNING: DO NOT MODIFY THE FOLLOWING SECTION: Generated by tidycat.pl version 3. on Tue Dec 8 12:50:21 2009 */ /* TidyCat Comments for pg_foobar: Table is shared, so catalog.c:IsSharedRelation is updated. Table has an Oid column. Table has static type (see pg_types.h). */ /* ---------------- * pg_foobar definition. cpp turns this into * typedef struct FormData_pg_foobar * ---------------- */ #define FooBarRelationId 9991 CATALOG(pg_foobar,9991) BKI_SHARED_RELATION { NameData fooname; /* name of foo bar */ float4 foolimit; /* max active count limit */ bool fooignore; /* ignore foo in baz context */ } FormData_pg_foobar; /* ---------------- * Form_pg_foobar corresponds to a pointer to a tuple with * the format of pg_foobar relation. * ---------------- */ typedef FormData_pg_foobar *Form_pg_foobar; /* ---------------- * compiler constants for pg_foobar * ---------------- */ #define Natts_pg_foobar 3 #define Anum_pg_foobar_fooname 1 #define Anum_pg_foobar_foolimit 2 #define Anum_pg_foobar_fooignore 3 /* TIDYCAT_END_CODEGEN */ The generated code contains a CATALOG macro/struct definition for the table, where the SQL datatypes are converted to C types. The naming and comments follow established conventions. Additional modifications are made to indexing.h: /* relation id: 9991 - pg_foobar 20091208 */ DECLARE_UNIQUE_INDEX(pg_foobar_oid_index, 9993, on pg_foobar using btree(oid oid_ops)); #define FooBarOidIndexId 9993 /* relation id: 9991 - pg_foobar 20091208 */ DECLARE_INDEX(pg_foobar_fooname_index, 9994, on pg_foobar using btree(fooname name_ops)); #define FooBarFoonameIndexId 9994 And the function IsSharedRelation() in catalog.c: bool IsSharedRelation(Oid relationId) { /* These are the shared catalogs (look for BKI_SHARED_RELATION) */ if (relationId == AuthIdRelationId || (...much code...) /* relation id: 9991 - pg_foobar 20100105 */ relationId == FooBarRelationId || Note that IsSharedRelation is only updated for shared tables. =head2 JSON document In src/include/catalog, the command: perl tidycat.pl -dd foo.json -df json *.h will generate a JSON document describing all of the catalog tables. This file is installed under gpMgmt/bin/gppylib/data, and gpcheckcat uses this data to generate check queries for foreign key constraint. =head1 CAVEATS tidycat does not modify the original files -- it writes modified versions of the files to /tmp. You need to copy over the originals with the generated files manually. If you need to restore the originals you can use the copies from the tidycat_backup directory. Multiple cycles of tidycat with changing definitions can leave junk in /tmp, and you might copy that junk into you source tree. Do not copy over any generated files that are older than your latest backup directory. =head1 AUTHORS Jeffrey I Cohen Copyright (c) 2009-2012 Greenplum. All rights reserved. Address bug reports and comments to: jcohen@greenplum.com =cut # SLZY_POD_HDR_END my $glob_id = ""; my $glob_platform; my $glob_tabwidth = 4; my $glob_faketab = 0; my $glob_tabstr = "\t"; #my $glob_tabstr = " " x $glob_tabwidth; my $glob_tmpdir = "/tmp"; # SLZY_GLOB_BEGIN my $glob_glob; # SLZY_GLOB_END sub glob_validate { if ($glob_glob->{dumpdef} && defined($glob_glob->{dumpformat}) && length($glob_glob->{dumpformat})) { die ("bad dump format: $glob_glob->{dumpformat}") unless ($glob_glob->{dumpformat} =~ m/jason|json|perl/i); } else { $glob_glob->{dumpformat} = "perl"; } # print "loading...\n" ; } # SLZY_CMDLINE_BEGIN # WARNING: DO NOT MODIFY THE FOLLOWING SECTION: # Generated by sleazy.pl version 6 (release Mon Aug 20 12:30:03 2012) # Make any changes under SLZY_TOP_BEGIN/SLZY_LONG_BEGIN # Any additional validation logic belongs in glob_validate() BEGIN { my $s_help = 0; # brief help message my $s_man = 0; # full documentation my $s_dumpdef; # output file for dump of serialized catalog data structures my $s_dumpformat; # format options for dump file [perl, jason] my $s_sqldef; # output file for dump of catalog DDL statements my $s_syscache = 0; # build syscache entries my $slzy_argv_str; $slzy_argv_str = quotemeta(join(" ", @ARGV)) if (scalar(@ARGV)); GetOptions( 'help|?' => \$s_help, 'man' => \$s_man, 'dumpdef|dd:s' => \$s_dumpdef, 'dumpformat|df|dumpfmt:s' => \$s_dumpformat, 'sqldef:s' => \$s_sqldef, 'syscache' => \$s_syscache, ) or pod2usage(2); pod2usage(-msg => $glob_id, -exitstatus => 1) if $s_help; pod2usage(-msg => $glob_id, -exitstatus => 0, -verbose => 2) if $s_man; $glob_glob = {}; # version and properties from json definition $glob_glob->{_sleazy_properties} = {}; $glob_glob->{_sleazy_properties}->{version} = '34'; $glob_glob->{_sleazy_properties}->{COPYDATES} = '2009-2012'; $glob_glob->{_sleazy_properties}->{slzy_date} = '1350599086'; $glob_glob->{_sleazy_properties}->{slzy_argv_str} = $slzy_argv_str; $glob_glob->{dumpdef} = $s_dumpdef if (defined($s_dumpdef)); $glob_glob->{dumpformat} = $s_dumpformat if (defined($s_dumpformat)); $glob_glob->{sqldef} = $s_sqldef if (defined($s_sqldef)); $glob_glob->{syscache} = $s_syscache if (defined($s_syscache)); glob_validate(); } # SLZY_CMDLINE_END # DO NOT extend this list! To ensure smooth upgrade, all new tables # with text or array columns must have toast tables and indexes my %toast_tab_exception_h = ( "gp_configuration_history" => 1, "gp_configuration" => 1, "gp_distribution_policy" => 1, "gp_master_mirroring" => 1, "gp_persistent_filespace_node" => 1, "gp_san_configuration" => 1, "gp_version_at_initdb" => 1, "pg_aggregate" => 1, "pg_appendonly" => 1, "pg_class" => 1, "pg_exttable" => 1, "pg_index" => 1, "pg_partition_rule" => 1, "pg_pltemplate" => 1, "pg_resqueuecapability" => 1, "pg_resourcetype" => 1, "pg_stat_last_operation" => 1, "pg_stat_last_shoperation" => 1, "pg_tablespace" => 1, "pg_type" => 1, ); # DO NOT extend this list! All new tidycat files should get # registered in pg_tidycat.h my %allfiles_exception_h = ( "pg_aggregate.h" => 1, "pg_am.h" => 1, "pg_amop.h" => 1, "pg_amproc.h" => 1, "pg_aoseg.h" => 1, "pg_appendonly.h" => 1, "pg_attrdef.h" => 1, "pg_attribute.h" => 1, "pg_auth_members.h" => 1, "pg_authid.h" => 1, "pg_autovacuum.h" => 1, "pg_cast.h" => 1, "pg_class.h" => 1, "pg_constraint.h" => 1, "pg_conversion.h" => 1, "pg_database.h" => 1, "pg_depend.h" => 1, "pg_description.h" => 1, "pg_extprotocol.h" => 1, "pg_exttable.h" => 1, "pg_filespace.h" => 1, "pg_filespace_entry.h" => 1, "pg_index.h" => 1, "pg_inherits.h" => 1, "pg_language.h" => 1, "pg_largeobject.h" => 1, "pg_listener.h" => 1, "pg_namespace.h" => 1, "pg_opclass.h" => 1, "pg_operator.h" => 1, "pg_partition.h" => 1, "pg_partition_rule.h" => 1, "pg_pltemplate.h" => 1, "pg_proc.h" => 1, "pg_resqueue.h" => 1, "pg_rewrite.h" => 1, "pg_shdepend.h" => 1, "pg_shdescription.h" => 1, "pg_statistic.h" => 1, "pg_tablespace.h" => 1, "pg_trigger.h" => 1, "pg_type.h" => 1, "pg_user_mapping.h" => 1, "pg_window.h" => 1 ); sub getcomment1 { my $tname = shift; my $bigstr = <<"EOF_bigstr"; /* ---------------- *TWOTABSTABLENAME definition. cpp turns this into *TWOTABStypedef struct FormData_TABLENAME * ---------------- */ EOF_bigstr my $twotabs = $glob_tabstr x 2; $bigstr =~ s/TWOTABS/$twotabs/gm; $bigstr =~ s/TABLENAME/$tname/gm; return $bigstr; } sub getcomment2 { my ($tname, $tform) = @_; my $bigstr = <<"EOF_bigstr"; /* ---------------- *TWOTABSTFORM corresponds to a pointer to a tuple with *TWOTABSthe format of TABLENAME relation. * ---------------- */ EOF_bigstr my $twotabs = $glob_tabstr x 2; $bigstr =~ s/TWOTABS/$twotabs/gm; $bigstr =~ s/TABLENAME/$tname/gm; $bigstr =~ s/TFORM/$tform/gm; return $bigstr; } sub getcomment3 { my $tname = shift; my $bigstr = <<"EOF_bigstr"; /* ---------------- *TWOTABScompiler constants for TABLENAME * ---------------- */ EOF_bigstr my $twotabs = $glob_tabstr x 2; $bigstr =~ s/TWOTABS/$twotabs/gm; $bigstr =~ s/TABLENAME/$tname/gm; return $bigstr; } sub sqltype_to_ctype { my $coltype = shift; my $ctype; # print $coltype, "\n"; # list of valid C types for SQL types. # # NOTE: not all sql types are valid for catalog tables, # so do *NOT* extend this list unless you know what you are doing!! my %sql2ch = ( aclitem => "aclitem", bigint => "bigint", bool => "bool", # boolean is the real sqltype boolean => "bool", bytea => "bytea", # Note: a quoted_char (or "char") is a single C char quoted_char => "char", gpxlogloc => "gpxlogloc", int2vector => "int2vector", integer => "int4", name => "NameData", oid => "Oid", oidvector => "oidvector", real => "float4", regproc => "regproc", smallint => "int2", text => "text", tid => "tid", # NOTE: both time and timestamp use a sleazy hack due to # bootstrap nonsense time => "time", timestamp_with_time_zone => "timestamptz", xid => "xid", ); # "[]" for optional "array of" suffix my $isarray = ($coltype =~ m/\[\]$/); $coltype =~ s/\[\]$//; die "no C conversion for type: $coltype" unless (exists($sql2ch{$coltype})); $ctype = $sql2ch{$coltype}; # make an array of 1 $ctype .= "[1]" if ($isarray); return $ctype; } sub ctype_to_btree_op_prefix { my $ctype = shift; return $ctype unless ($ctype =~ m/Oid|NameData|regproc/); $ctype = "name" if ($ctype =~ m/NameData/); $ctype = "oid" if ($ctype =~ m/Oid/); # NOTE: regproc is an oid typedef $ctype = "oid" if ($ctype =~ m/regproc/); return $ctype; } sub parseindex { my ($alltabs, $bigstr) = @_; my $bUnique = 0; $bUnique = ($bigstr =~ m/create\s+unique\s+index/i); my $idef = {unique => $bUnique, with => {} }; $bigstr =~ s/^\s*create\s+(unique\s+)?index\s+on//i; # print "f:",$bigstr,":f\n"; # my @foo = ($bigstr =~ m/^s*(\w+)\s*$(.*)$\s*(with\s*$.*$)?/im); # my @foo = ($bigstr =~ m/\s*(\w+)\s*$((?Dump(\@foo), "\n"; die "bad index def: $bigstr" unless (2 < scalar(@foo)); my $tname = shift @foo; my $colnamlist = shift @foo; my @cols = split(",", $colnamlist); my $icols = []; $tname = lc($tname); $tname =~ s/^\s+//; $tname =~ s/\s+$//; die "bad index def - no such table: $tname" unless (exists($alltabs->{$tname})); for my $c1 (@cols) { $c1 = lc($c1); $c1 =~ s/^\s+//; $c1 =~ s/\s+$//; die "bad index for $tname - no such col: $c1" unless (exists($alltabs->{$tname}->{colh}->{$c1})); my $colop = $alltabs->{$tname}->{colh}->{$c1}; $colop = ctype_to_btree_op_prefix($colop); $colop = $colop . "_ops"; push @{$icols}, [$c1, $colop]; } @foo = split(/$/, $bigstr, 2); my $with = pop @foo; # print "WITH: $with \n"; my @baz = ($with =~ m/^\s*(with\s*$.*$)/is); die "bad index for $tname - no index oid: $bigstr" unless (scalar(@baz)); my $indwithclause = shift @baz; $indwithclause =~ s/^\s*with\s*$//is; $indwithclause =~ s/$\s*$//s; @baz = split(",", $indwithclause); for my $withdef (@baz) { my @bzz = split("=", $withdef, 2); die "bad index with def for $tname: $withdef" unless (2 == scalar(@bzz)); my $kk = shift @bzz; my $vv = shift @bzz; $kk =~ s/^\s+//; $kk =~ s/\s+$//; $kk = lc($kk); $vv =~ s/^\s+//; $vv =~ s/\s+$//; $idef->{with}->{$kk} = $vv; } $alltabs->{$tname}->{indexes} = [] unless (exists( $alltabs->{$tname}->{indexes})); $idef->{cols} = $icols; die "bad index def for $tname - no index oid: $bigstr" unless (exists($idef->{with}->{indexid})); $idef->{indexid} = $idef->{with}->{indexid}; if (exists($idef->{with}->{indexname})) { $idef->{indexname} = $idef->{with}->{indexname}; } push @{$alltabs->{$tname}->{indexes}}, $idef; } # end parseindex # parse foreign key definitions sub parsefk { my ($alltabs, $bigstr, $filnam) = @_; my @baz = ($bigstr =~ m/^\s*alter\s+table\s+(\w+)\s+add\s+(vector\_)?fk/i); my $tname = shift @baz; die "$filnam: bad foreign key - no such table: $tname" unless (exists($alltabs->{$tname})); # special case of vector fk for oidvector, oid array my $isvector = ($bigstr =~ m/add\s+vector\_fk/); $bigstr =~ s/^\s*alter\s+table\s+\w+\s+add\s+(vector\_)?fk//i; my @foo; # allow composite key, eg "add fk (k1[,k2...]) on ..." if ($bigstr =~ m/\s*$\s*(\w+(?:\s*,\s*\w+)*)\s*$\s*on\s+(\w+)\s*$\s*(\w+(?:\s*\,\s*\w+)*\s*)$/i) { @foo = ($bigstr =~ m/\s*$\s*(\w+(?:\s*,\s*\w+)*)\s*$\s*on\s+(\w+)\s*$\s*(\w+(?:\s*\,\s*\w+)*)\s*$/i); } else # single key, no parents, ie "add fk k1 on ..." { @foo = ($bigstr =~ m/\s*(\w+)\s+on\s+(\w+)\s*$\s*(\w+(?:\s*\,\s*\w+)*)\s*$/i); } # print Data::Dumper->Dump(\@foo), "\n"; die "$filnam: bad foreign key for table $tname: $bigstr" unless (2 < scalar(@foo)); $alltabs->{$tname}->{foreign_keys} = [] unless (exists($alltabs->{$tname}->{foreign_keys})); # NOTE: fk_list format supercededs original "foreign_keys" array... $alltabs->{$tname}->{fk_list} = [] unless (exists($alltabs->{$tname}->{fk_list})); my $fknamlist = shift @foo; # the foreign key column list my $pktname = shift @foo; # the primary key table name my $colnamlist = shift @foo; # the primary key cols # remove leading/trailing spaces around comma as well my @cols = split(/\s*,\s*/, $colnamlist); my @fkcols = split(/\s*,\s*/, $fknamlist); for my $fkname (@fkcols) { die "$filnam: bad foreign key for table $tname - no such column: $fkname" unless (exists($alltabs->{$tname}->{colh}->{$fkname})); } # XXX XXX: allow a "vector" fk for oidvector or oid array if ($isvector) { die "$filnam: bad vector foreign key for table $tname - too many columns" unless (1 == scalar(@fkcols)); my $fkname = $fkcols[0]; die "$filnam: bad vector foreign key $fkname for table $tname - must be an Oid vector or array: " unless ($alltabs->{$tname}->{colh}->{$fkname} =~ m/^(Oid\[1\]|oidvector)$/); } # old-style foreign key def only for regular, "scalar" keys push @{$alltabs->{$tname}->{foreign_keys}}, [\@fkcols, $pktname, \@cols] unless ($isvector); my $fkh = {type => "scalar", fkcols => \@fkcols, pktable => $pktname, pkcols => \@cols }; $fkh->{type} = "vector" if ($isvector); push @{$alltabs->{$tname}->{fk_list}}, $fkh; } # end parsefk sub parsecols { my ($alltabs, $tname, $bigstr) = @_; # print $bigstr; $bigstr =~ s/^\s*$//s; $bigstr =~ s/$\s*$//s; # print $bigstr; my @foo = split(/\n/, $bigstr); my $collist = []; my $ii = 0; my $tzcolname; my $tmcolname; my $precomment; my $colh = {}; # column data hash for my $lin (@foo) { # print $lin,"\n"; unless (length($lin)) { $precomment = "" unless (defined($precomment)); # mark the blank lines (remove this later) $precomment .= "\n**TK_BLANK_LINE**"; next; } if (($lin =~ m/^\s*\-\-/) || ($lin =~ m/^\s+$/)) { $precomment = "" unless (defined($precomment)); chomp($lin); $precomment .= "\n" . $lin; } else { # (optional quoted) word space word (or word space "char") die "bad col: $lin" unless ($lin =~ m/(\s*(\")*\w+(\")*\s+\w+)|\s*\w+\s+\"char\"/); # Note: timestamp fix - make into a single token $lin =~ s/timestamp\s+with\s+time\s+zone/timestamp_with_time_zone/igm; # Note: quoted char (ie "char") substitution -- # distinguish unquoted sql char, which is "character(1)", # or sql type bpchar, from postgresql-specific # quoted char ("char"), which is a single C char $lin =~ s/\"char\"/quoted_char/igm; # in sql, have a # colname (with optional quotes) space coltype (followed # by optional array brackets), optionally followed by a # comma and/or a sql comment my @baz = ($lin =~ m/\s*((?:\")*\w+(?:\")*)\s+(\w+(?:\[\])?)\s*(\,)?\s*(\-\-.*)?/); # print Data::Dumper->Dump(\@baz), "\n"; my $colname = shift @baz ; my $coltype = lc (shift @baz ); # downcase colname unless it is quoted if ($colname !~ m/^\".*\"$/) { $colname = lc ($colname); } else { # remove quotes $colname =~ s/^\"(.*)\"$/$1/; } die ("duplicate colname: $colname for $tname") if (exists($colh->{$colname})); shift @baz ; # comma or undef my $postcomment; # the comment trailing the definition $postcomment = $baz[0] if (scalar(@baz) && defined($baz[0])); my $coldef = {colname => $colname, sqltype => $coltype}; my $ctype = sqltype_to_ctype($coltype); $coldef->{ctype} = $ctype; $colh->{$colname} = $ctype; # track any timestamp columns specially if ($ctype =~ m/timestamp/) { if (defined($tzcolname)) { # print '"colname" et al' if more than one # timestamp col $tzcolname .= " et al" unless ($tzcolname =~ m/et al$/); } else { $tzcolname = '"' . $colname . '"'; } } # track any TimeADT columns specially if ($ctype =~ m/time$/) { if (defined($tmcolname)) { # print '"colname" et al' if more than one # timestamp col $tmcolname .= " et al" unless ($tmcolname =~ m/et al$/); } else { $tmcolname = '"' . $colname . '"'; } } $coldef->{postcomment} = $postcomment if (defined($postcomment)); $coldef->{precomment} = $precomment if (defined($precomment)); push @{$collist}, $coldef; $ii++; undef $precomment; } } # end for my lin # add the Oid column if ($alltabs->{$tname}->{with}->{oid}) { die "name conflict: cannot have named oid column" if (exists($colh->{oid})); $colh->{oid} = "Oid"; } $alltabs->{$tname}->{cols} = $collist; $alltabs->{$tname}->{colh} = $colh; # hack to track timestamp $alltabs->{$tname}->{tzhack} = $tzcolname if (defined($tzcolname)); $alltabs->{$tname}->{tmhack} = $tmcolname if (defined($tmcolname)); # print Data::Dumper->Dump(\@foo), "\n"; } # end parsecols sub parsetab { my ($alltabs, $bigstr, $filnam) = @_; my $tabdef = $bigstr; $bigstr =~ s/create\s+table//is; $bigstr =~ s/^\s+//s; $bigstr =~ s/\s+$//s; my @foo = ($bigstr =~ m/^\s*(\w+)/s); die "no tablename: $tabdef" unless (scalar(@foo)); my $tname = shift @foo; $bigstr =~ s/$tname//s; $bigstr =~ s/^\s+//s; $tname =~ s/^\s+//; $tname =~ s/\s+$//; $tname = lc($tname); $alltabs->{$tname} = {tabdef_text => $tabdef, filename => $filnam }; if ($bigstr =~ m/\s*with\s*$.*$/is) { # match a "WITH (...)" followed by "(" @foo = ($bigstr =~ m/^\s*(with\s*$.*$(?=(\s*$)))/is); die "bad with: $bigstr" unless (scalar(@foo)); my $withclause = shift @foo; $alltabs->{$tname}->{with} = {text => $withclause}; $bigstr =~ s/\s*(with\s*\(.*$(?=(\s*$)))//is; $bigstr =~ s/^\s+//s; $withclause =~ s/^\s*with\s*\(//is; $withclause =~ s/$\s*$//s; my @foo = split(",", $withclause); for my $withdef (@foo) { my @baz = split("=", $withdef, 2); die "bad with def: $withdef" unless (2 == scalar(@baz)); my $kk = shift @baz; my $vv = shift @baz; $kk =~ s/^\s+//; $kk =~ s/\s+$//; $kk = lc($kk); $vv =~ s/^\s+//; $vv =~ s/\s+$//; $alltabs->{$tname}->{with}->{$kk} = $vv; } die "no relid for table $tname" unless (exists($alltabs->{$tname}->{with}->{relid})); # build the common comment tag # (of the form /* relation id: nnn - tablename date */ ) # This comment is affixed to generated code (and is used as a # marker to find these entries when we need to cleanout # duplicates) $alltabs->{$tname}->{relid_comment_tag} = "/* relation id: " . $alltabs->{$tname}->{with}->{relid} . " - $tname " . yyyy_mm_dd() . " */\n"; if (exists($alltabs->{$tname}->{with}->{shared})) { $alltabs->{$tname}->{with}->{shared} = ($alltabs->{$tname}->{with}->{shared} =~ m/t|true|y|yes|1/i); } else { $alltabs->{$tname}->{with}->{shared} = 0; } if (exists($alltabs->{$tname}->{with}->{bootstrap})) { $alltabs->{$tname}->{with}->{bootstrap} = ($alltabs->{$tname}->{with}->{bootstrap} =~ m/t|true|y|yes|1/i); } else { $alltabs->{$tname}->{with}->{bootstrap} = 0; } if (exists($alltabs->{$tname}->{with}->{oid})) { $alltabs->{$tname}->{with}->{oid} = ($alltabs->{$tname}->{with}->{oid} =~ m/t|true|y|yes|1/i); } else { $alltabs->{$tname}->{with}->{oid} = 1; } # build a camel-case string for the tablename unless (exists($alltabs->{$tname}->{with}->{camelcase})) { my $cc1 = $tname; my $isgp = 0; if ($cc1 =~ m/^(gp)\_/i) { # treat camel-case name special for Gp tables... $cc1 =~ s/^...//; $isgp = 1; } if ($cc1 =~ m/^(pg)\_/i) { # remove prefix $cc1 =~ s/^...//; } $cc1 = ucfirst($cc1); $cc1 = "Gp" . $cc1 if ($isgp); $alltabs->{$tname}->{with}->{camelcase} = $cc1; } } # end if with # print "bigstr: ", $bigstr, "\n"; parsecols($alltabs, $tname, $bigstr); # print "bigstr: ", $bigstr, "\n"; # CONTENT checks: # NOTE: no code generation - only for validation if (exists($alltabs->{$tname}->{with}->{content})) { die "unknown content type for $tname: $alltabs->{$tname}->{with}->{content}" unless ($alltabs->{$tname}->{with}->{content} =~ m/MASTER\_ONLY|PERSISTENT|SEGMENT\_LOCAL/); } # TOAST table checks if (exists($alltabs->{$tname}->{with}->{toast_oid})) { die "toast table for $tname must have index (toast_index)" unless (exists($alltabs->{$tname}->{with}->{toast_index})); } if (exists($alltabs->{$tname}->{with}->{toast_index})) { die "toast index for $tname must have table (toast_oid)" unless (exists($alltabs->{$tname}->{with}->{toast_oid})); } if (exists($alltabs->{$tname}->{cols}) && defined($alltabs->{$tname}->{cols}) && # ignore pre-existing exception tables (!exists($toast_tab_exception_h{$tname}))) { my $badmsg = ""; for my $col1 (@{$alltabs->{$tname}->{cols}}) { my $cname = $col1->{colname}; my $ctype = $col1->{sqltype}; # aclitem arrays are ok next if ($ctype =~ m/aclitem/); if (($ctype =~ m/\[.*\]/) || ($ctype =~ /text/)) { last # table has a toast table - ok! if (exists($alltabs->{$tname}->{with}->{toast_oid})); $badmsg .= "table $tname needs toast table (toast_oid) for column $cname of type $ctype\n"; } } # end for die $badmsg if (length($badmsg)); } return $tname; } # end parsetab sub parsetabdef { my ($alltabs, $bigstr, $filnam) = @_; my @keys; # fix for case of semicolons in comments { my @ll; my @lins = split(/\n/, $bigstr); for my $lin (@lins) { # replace semicolon in comment with "dummy semi" if ($lin =~ m/\-\-.*\;/) { $lin =~ s/\;/TIDY_DUMMY_SEMI/g; } push @ll, $lin; } $bigstr = join("\n", @ll); } my @statements = split(';', $bigstr); # print Data::Dumper->Dump(\@statements), "\n"; for my $stat (@statements) { # replace dummy semicolons with the real thing $stat =~ s/TIDY\_DUMMY\_SEMI/;/gm; if ($stat =~ m/^\s*create\s+table/is) { push @keys, parsetab($alltabs, $stat, $filnam); } elsif ($stat =~ m/^\s*create\s+(unique\s+)?index/is) { parseindex($alltabs, $stat); } # elsif ($stat =~ m/^\s*alter\s+table\s+\w+add\s+fk/is) elsif ($stat =~ m/^\s*alter\s+table/is) { parsefk($alltabs, $stat, $filnam); } } return @keys; } # end parsetabdef # expects val1, len1, val2, len2 # where length values are based on printed offset, not length(val), ie # embedded tabs are counted. sub tabalign { my ($tabwidth, $collist) = @_; my $tabstr = $glob_tabstr; my $maxlen = 0; for my $coldef (@{$collist}) { die "bad coldef: " . Data::Dumper->Dump([$coldef]) unless (scalar(@{$coldef}) > 3); $maxlen = $coldef->[1] if ($coldef->[1] > $maxlen); } # find the tab position for the second column my $col2tab = (POSIX::ceil($maxlen / $tabwidth)) * $tabwidth; $col2tab++ if ($col2tab == $maxlen); # print $maxlen, " " , $col2tab, "\n"; # print Data::Dumper->Dump($collist), "\n"; for my $ii (0..(scalar(@{$collist})-1)) { # print Data::Dumper->Dump($collist->[$ii]), "\n"; my $val1 = shift @{$collist->[$ii]}; my $len1 = shift @{$collist->[$ii]}; my $val2 = shift @{$collist->[$ii]}; my $len2 = shift @{$collist->[$ii]}; my $newval = $val1; if ($len1 < $col2tab) { my $mod1 = $len1 % $tabwidth; # print "mod: $mod1\n"; if ($mod1) { $len1 += ($tabwidth - $mod1); if ($glob_faketab) { $val1 .= " " x ($tabwidth - $mod1); } else { $val1 .= $tabstr; } } } while ($len1 < $col2tab) { $len1 += $tabwidth; $val1 .= $tabstr; } unshift @{$collist->[$ii]}, $len1 + $len2 ; unshift @{$collist->[$ii]}, $val1 . $val2; } # end for ii # print Data::Dumper->Dump($collist), "\n"; return $collist; } sub simpletabalign { my ($col1, $col2) = @_; my $colitem = []; push @{$colitem}, $col1; push @{$colitem}, length($col1); push @{$colitem}, $col2; push @{$colitem}, length($col2); my $collist = tabalign($glob_tabwidth, [$colitem]); return $collist->[0]->[0]; } # take a table-format string (columns separated by "|", rows separated # by newline) and return an array with a single, formatted string row sub tabalignstr { my $str = shift; my $flist = []; my @lines = split(/\n/, $str); return $flist unless scalar(@lines); for my $lin (@lines) { my @foo = split(/\|/, $lin); last unless (scalar(@foo)); my $flitem = []; for my $itm (@foo) { $itm = "" unless (defined($itm)); push @{$flitem}, $itm, length($itm); } push @{$flist}, $flitem; } L_bigloop: while (1) { for my $coldef (@{$flist}) { last L_bigloop unless (scalar(@{$coldef}) > 3); } $flist = tabalign($glob_tabwidth, $flist); } my @itmlst; for my $itm (@{$flist}) { push @itmlst, shift(@{$itm}); } return \@itmlst; } # end tabalignstr # convert a column name to an "Anum_" column id sub anum_key { my ($tname, $wkey) = @_; my $atname = $tname; # XXX XXX XXX XXX XXX XXX XXX XXX XXX XXX XXX XXX # XXX XXX XXX XXX XXX XXX XXX XXX XXX XXX XXX XXX # XXX XXX: stupid last_operation/shoperation fixup if ($tname =~ m/pg\_stat\_last_(sh)?operation/) { $atname =~ s/eration$//; $atname =~ s/stat\_last\_/statlast/; } if ($tname =~ m/gp_distribution_policy/) { $atname =~ s/distribution\_//; } # XXX XXX XXX XXX XXX XXX XXX XXX XXX XXX XXX XXX # XXX XXX XXX XXX XXX XXX XXX XXX XXX XXX XXX XXX return "Anum_" . $atname . "_" . $wkey; } sub struct_form_tname { my $tname = shift; my $atname = $tname; # XXX XXX XXX XXX XXX XXX XXX XXX XXX XXX XXX XXX # XXX XXX XXX XXX XXX XXX XXX XXX XXX XXX XXX XXX # XXX XXX: stupid last_operation/shoperation fixup if ($tname =~ m/pg\_stat\_last_(sh)?operation/) { $atname =~ s/eration$//; $atname =~ s/stat\_last\_/statlast/; } # XXX XXX XXX XXX XXX XXX XXX XXX XXX XXX XXX XXX # XXX XXX XXX XXX XXX XXX XXX XXX XXX XXX XXX XXX return "Form_" . $atname; } sub formatcolconst { my ($tnamepref, $allcol) = @_; my $collist = []; { my $colitem = []; my $cdef; $cdef = "#define Natts_" . $tnamepref; push @{$colitem}, $cdef; push @{$colitem}, length($cdef) ; push @{$colitem}, scalar(@{$allcol}); push @{$colitem}, length(scalar(@{$allcol})); push @{$collist}, $colitem; } my $ii = 1; for my $coldef (@{$allcol}) { my $colitem = []; my $cdef; $cdef = "#define " . anum_key($tnamepref, $coldef->{colname}); push @{$colitem}, $cdef; push @{$colitem}, length($cdef) ; push @{$colitem}, $ii; push @{$colitem}, length($ii); $ii++; push @{$collist}, $colitem; } $collist = tabalign($glob_tabwidth, $collist); my $bigstr = ""; for my $coldef (@{$collist}) { my $st1 = shift @{$coldef}; $bigstr .= $st1 . "\n"; } return $bigstr; } # end formatcolconst sub formatcols { my $allcol = shift; my $collist = []; my $bigstr = ""; # formatting trick: col1 is prefixed by a tab, so increase its # length by tabwidth. col2 is the column name followed by a # semicolon. And col3 is the optional comment. Build an array of # items of [val1, len1, val2, len2, val3, len3] # # tabalign will shift off the first 4 entries and append the values # with embedded tabs to align them, nicely, then unshift the # append values and combined length back into original array, # resulting in: # ["val1val2", len1++len2, val3, len3] # # So now we just feed this array back into tabalign to get: # ["val1val2val3", len1++len2++len3] for my $coldef (@{$allcol}) { my $colitem = []; my $coltype = $coldef->{ctype}; my $colname = $coldef->{colname}; # fix arrays of types -- the array suffix moves from the # typename to the column name if ($coltype =~ m/\[1\]/) { $coltype =~ s/\[1\]//; $colname .= "[1]"; } push @{$colitem}, $glob_tabstr . $coltype; push @{$colitem}, length($coltype) + $glob_tabwidth; push @{$colitem}, $colname . ";"; push @{$colitem}, length($colname) + 1; if (exists($coldef->{postcomment})) { my $pc = $coldef->{postcomment}; $pc =~ s/^\-\-/\/\*/; $pc .= " \*\/"; push @{$colitem}, $pc; push @{$colitem}, length($pc); } else { push @{$colitem}, ""; push @{$colitem}, 0; } push @{$collist}, $colitem; } $collist = tabalign($glob_tabwidth, $collist); $collist = tabalign($glob_tabwidth, $collist); for my $ii (0..(scalar(@{$collist})-1)) { my $coldef1 = $allcol->[$ii]; my $coldef2 = $collist->[$ii]; my $st1 = shift @{$coldef2}; if (1 && (exists($coldef1->{precomment}))) { my $pc = $coldef1->{precomment}; my $bComment = 0; $bComment = ($pc =~ m/^\s*\-\-/m); if ($bComment) { # make everything into a single-line comment $pc =~ s/^\s*\-\-(.*)/$glob_tabstr\/\*$1 \*\//gm; # find adjacent single-line comments by looking for # "*/\n/*" and merge them together $pc =~ s/\*\/\s*(\n\s*)\/\*/$1 \*/gm; $pc .= "\n"; } # remove the blank lines $pc =~ s/\*\*TK\_BLANK\_LINE\*\*//gm; $bigstr .= $pc; } $bigstr .= $st1 . "\n"; } return $bigstr; } # end formatcols sub formatTZcomment { my $colname = shift; my $bigstr = <<"EOF_bigstr"; /* * The CATALOG definition has to refer to the type of MYTYPENAME as * "timestamptz" (lower case) so that bootstrap mode recognizes it. But * the C header files define this type as TimestampTz. Since the field is * potentially-null and therefore cannot be accessed directly from C code, * there is no particular need for the C struct definition to show the * field type as TimestampTz --- instead we just make it Datum. */ #define timestamptz Datum EOF_bigstr $bigstr =~ s/MYTYPENAME/$colname/; return $bigstr; } sub formatTMcomment { my $colname = shift; my $bigstr = <<"EOF_bigstr"; /* * The CATALOG definition has to refer to the type of MYTYPENAME as * "time" (lower case) so that bootstrap mode recognizes it. But * the C header files define this type as TimeADT. So we use a sleazy trick. * */ #define time TimeADT EOF_bigstr $bigstr =~ s/MYTYPENAME/$colname/; return $bigstr; } # print YEARMONTHDAY as 8 digit string sub yyyy_mm_dd { my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(); # adjust year and month to normal $year += 1900; $mon++; # format with lead zero if necessary $mon = sprintf("%02d", $mon); $mday = sprintf("%02d", $mday); return $year.$mon.$mday; } sub fix_issharedrelation_function { my $alltabs = shift; my $bigstr = ""; my $toaststr = ""; my %relidh; # nicer to sort by relid (vs tablename) while (my ($jj, $ww) = each(%{$alltabs})) { $relidh{$ww->{with}->{relid}} = $jj; } # for my $kk (sort (keys (%{$alltabs}))) for my $jj (sort {$a <=> $b} (keys (%relidh))) { my $kk = $relidh{$jj}; my $vv = $alltabs->{$kk}; my $relid = $vv->{with}->{relid}; my $rct = $vv->{relid_comment_tag}; # standard comment next unless ($vv->{with}->{shared}); # print "iss: ", Data::Dumper->Dump([$vv]), "\n"; $bigstr .= "\n\n"; $bigstr .= $rct; $bigstr .= "relationId == " . $vv->{CamelCaseRelationId} . " || "; $bigstr .= "\n"; # build entries for Toast table and toast index if (exists($vv->{CamelCaseToastTab})) { $toaststr .= "\n\n"; $toaststr .= $rct; $toaststr .= "relationId == " . $vv->{CamelCaseToastTab} . " || "; $toaststr .= "\n"; $toaststr .= "relationId == " . $vv->{CamelCaseToastInd} . " || "; $toaststr .= "\n"; } } my $indstr = ""; for my $jj (sort {$a <=> $b} (keys (%relidh))) { my $kk = $relidh{$jj}; my $vv = $alltabs->{$kk}; my $relid = $vv->{with}->{relid}; my $rct = $vv->{relid_comment_tag}; # standard comment next unless ($vv->{with}->{shared}); next unless (exists($vv->{indexes})); $indstr .= "\n\n"; for my $ind (@{$vv->{indexes}}) { $indstr .= $rct; $indstr .= "relationId == " . $ind->{CamelCaseIndexId} . " || " ; $indstr .= "\n"; } $indstr .= "\n\n"; } return [$bigstr, $indstr, $toaststr]; } # end fix_issharedrelation_function sub get_all_filenames { my ($alltabs, $h_allfiles) = @_; # store the basename of the file in a hash for my $kk (sort (keys (%{$alltabs}))) { my ($vol, $dirs, $basename) = File::Spec->splitpath($alltabs->{$kk}->{filename}); # skip exceptions next if (exists($allfiles_exception_h{$basename})); $h_allfiles->{$basename} = $kk; } } # end get_all_filenames sub formattypedata { my ($oid, $tname, $reltype_oid) = @_; my $bigstr = <<'EOF_bigstr'; DATA(insert OID = RELTYPE_OID ( TABLENAME PGNSP PGUID -1 f c t \054 THEE_OID 0 record_in record_out record_recv record_send - d x f 0 -1 0 _null_ _null_ )); EOF_bigstr $bigstr =~ s/TABLENAME/$tname/gm; $bigstr =~ s/RELTYPE\_OID/$reltype_oid/gm; $bigstr =~ s/THEE\_OID/$oid/gm; return $bigstr; } sub formattoasttypedata { my ($oid, $tname, $reltype_oid, $toast_oid, $toast_reltype) = @_; my $bigstr = <<'EOF_bigstr'; DATA(insert OID = TOAST_RELTYPE (pg_toast_THEE_OID TOASTNSP PGUID -1 f c t \054 TOAST_OID 0 record_in record_out record_recv record_send - d x f 0 -1 0 _null_ _null_)); EOF_bigstr $bigstr =~ s/TABLENAME/$tname/gm; $bigstr =~ s/RELTYPE\_OID/$reltype_oid/gm; $bigstr =~ s/THEE\_OID/$oid/gm; $bigstr =~ s/TOAST\_OID/$toast_oid/gm; $bigstr =~ s/TOAST\_RELTYPE/$toast_reltype/gm; return $bigstr; } sub formattypes { my $alltabs = shift; my $bigstr = ""; my %relidh; # nicer to sort by relid (vs tablename) while (my ($jj, $ww) = each(%{$alltabs})) { $relidh{$ww->{with}->{relid}} = $jj; } return $bigstr; } sub formatindexes { my $alltabs = shift; my $bigstr = ""; my %relidh; # nicer to sort by relid (vs tablename) while (my ($jj, $ww) = each(%{$alltabs})) { $relidh{$ww->{with}->{relid}} = $jj; } # for my $kk (sort (keys (%{$alltabs}))) for my $jj (sort {$a <=> $b} (keys (%relidh))) { my $kk = $relidh{$jj}; my $vv = $alltabs->{$kk}; my $relid = $vv->{with}->{relid}; my $rct = $vv->{relid_comment_tag}; # standard comment next unless (exists($vv->{indexes})); for my $ind (@{$vv->{indexes}}) { my $iname; my $icolist; my $camelname; $iname = $kk; $icolist = ""; $camelname = $vv->{with}->{camelcase}; for my $cols (@{$ind->{cols}}) { $iname .= "_" . $cols->[0]; $camelname .= ucfirst($cols->[0]); if (length($icolist)) { $icolist .= ", "; } $icolist .= $cols->[0] . " " . $cols->[1]; } $iname .= "_index"; # if indexname was supplied, use it instead if (exists($ind->{indexname})) { $iname = $ind->{indexname}; } if ($ind->{unique}) { $bigstr .= $rct; $bigstr .= "DECLARE_UNIQUE_INDEX("; } else { $bigstr .= $rct; $bigstr .= "DECLARE_INDEX("; } my $indexid = $ind->{indexid}; $bigstr .= $iname . ", " . "$indexid, on " . $kk . " using btree(" . $icolist . "));\n"; # use user-supplied CamelCase name if it exists if (exists($ind->{with}->{camelcase})) { $camelname = $ind->{with}->{camelcase}; } $ind->{CamelCaseIndexId} = $camelname . "IndexId"; my $ccdef = "#define " . $camelname . "IndexId"; $bigstr .= simpletabalign($ccdef, $indexid) . "\n\n\n"; } } return $bigstr; } # end formatindexes sub format_syscache_cacheinfo { my $cinfo = shift; my $vv = $cinfo; # example: # # {AggregateRelationId, /* AGGFNOID */ # AggregateAggfnoidIndexId, # 1, # { # Anum_pg_aggregate_aggfnoid, # 0, # 0, # 0 # }, # 32 # }, my $bigstr = <<"EOF_bigstr"; FIRSTLINE TWOTABSINDID, TWOTABSCOLCNT, TWOTABS{ ALLCOLS TWOTABS}, TWOTABSNBUCKETS EOF_bigstr my $twotabs = $glob_tabstr x 2; $bigstr =~ s/TWOTABS/$twotabs/gm; # slight goofiness with end curly brace to fix emacs formatting... $bigstr .= "\t" . '}'; $bigstr =~ s/FIRSTLINE/$vv->{firstline}/; $bigstr =~ s/INDID/$vv->{indid}/; $bigstr =~ s/NBUCKETS/$vv->{nbuckets}/; my $colcnt = scalar(@{$vv->{cols}}); $bigstr =~ s/COLCNT/$colcnt/; # build array of length 4, with zeroes in "null" positions my $cols = []; for my $ii (0..3) { if ($ii >= $colcnt) { push @{$cols}, '0'; next; } my $c1 = $vv->{cols}->[$ii]; if (($c1 eq 'oid') && $vv->{with_oid}) { push @{$cols}, 'ObjectIdAttributeNumber'; } else { push @{$cols}, anum_key($vv->{relname}, $c1); } } my $allcols = $glob_tabstr x 3; $allcols .= join(",\n\t\t\t", @{$cols}); $bigstr =~ s/ALLCOLS/$allcols/; return $bigstr; } # end format_syscache_cacheinfo sub fixup_syscache_cfile { my ($alltabs, $cacheh, $fullcname, $tmpcname, $gen_hdr_str) = @_; my $bigstr = ""; $bigstr = $gen_hdr_str; my $whole_file; { # $$$ $$$ undefine input record separator (\n) # and slurp entire file into variable local $/; undef $/; my $fh; open $fh, "< $fullcname" or die "cannot open $fullcname: $!"; $whole_file = <$fh>; close $fh; } # build array of strings of "first lines", eg # "{AggregateRelationId, /* AGGFNOID */" # using tabalignstr my $tab1 = ""; for my $kk (sort(keys(%{$cacheh}))) { my $vv = $cacheh->{$kk}; $tab1 .= "|{" . $vv->{relid} . ",|/* " . $kk . " */\n"; } my $firstlin = tabalignstr($tab1); # format every cacheinfo entry in alphabetical order my @out1; for my $kk (sort(keys(%{$cacheh}))) { my $vv = $cacheh->{$kk}; $vv->{firstline} = shift @{$firstlin}; push @out1, format_syscache_cacheinfo($vv); } # join the formatted entries $bigstr .= "\n" . "static const struct cachedesc cacheinfo[] = {\n" . join(",\n", @out1) . "\n};\n"; my $prefx = quotemeta('TIDYCAT_BEGIN_CODEGEN'); my $suffx = quotemeta('TIDYCAT_END_CODEGEN'); my @zzz = ($whole_file =~ m/^\s*\/\*\s*$prefx\s*\s*$(.*)^\s*\/\*\s*$suffx\s*\*\/\s*$/ms); die "bad target: $whole_file" unless (scalar(@zzz)); my $rex = $zzz[0]; # replace carriage returns first, then quotemeta, then fix CR again... $rex =~ s/\n/SLASHNNN/gm; $rex = quotemeta($rex); $rex =~ s/SLASHNNN/\\n/gm; # substitute the new generated definitions for the prior # generated definitions in the target file $whole_file =~ s/$rex/$bigstr/ms; my $outi; open $outi, "> $tmpcname" or die "cannot open $tmpcname for write: $!"; # rewrite the target file print $outi $whole_file; close $outi; } # end fixup_syscache_cfile sub fixup_syscache { my ($alltabs, $fullhname, $tmphname, $fullcname, $tmpcname, $verzion, $nnow) = @_; my $bigstr = ""; my %cacheh; my $gen_hdr_str = "\n * WARNING: DO NOT MODIFY THE FOLLOWING SECTION: \n" . " * Generated by " . $verzion . "\n" . " * on " . $nnow . "\n*/\n"; $bigstr = $gen_hdr_str; my $whole_file; { # $$$ $$$ undefine input record separator (\n) # and slurp entire file into variable local $/; undef $/; my $fh; open $fh, "< $fullhname" or die "cannot open $fullhname: $!"; $whole_file = <$fh>; close $fh; } for my $kk (keys (%{$alltabs})) { my $vv = $alltabs->{$kk}; my $relid = $vv->{with}->{relid}; my $rct = $vv->{relid_comment_tag}; # standard comment # print $kk, "\n"; next unless (exists($vv->{indexes})); for my $ind (@{$vv->{indexes}}) { my $iname; my $icolist; my $camelname; next unless (exists($ind->{with}->{syscacheid})); die "$kk: invalid syscacheid for non-unique index" unless ($ind->{unique}); my $cols = []; # get colnames for my $c1 (@{$ind->{cols}}) { push @{$cols}, $c1->[0]; } my $with_oid = ( (exists($vv->{with}->{oid})) && ($vv->{with}->{oid})); $cacheh{$ind->{with}->{syscacheid}} = { relname => $kk, relid => $vv->{CamelCaseRelationId}, indid => $ind->{CamelCaseIndexId}, nbuckets => $ind->{with}->{syscache_nbuckets}, with_oid => $with_oid, cols => $cols }; } } # XXX XXX: not enough syscacheid ? return if (scalar(keys(%cacheh)) <= 2); { my @sid = sort(keys(%cacheh)); # label the first and last elements with their integer value my $num = scalar(@sid) - 1; $sid[0] .= " = 0"; $sid[-1] .= " = $num"; $bigstr .= "enum SysCacheIdentifier\n{\n\t" . join(",\n\t", @sid) . "\n};\n"; } my $prefx = quotemeta('TIDYCAT_BEGIN_CODEGEN'); my $suffx = quotemeta('TIDYCAT_END_CODEGEN'); my @zzz = ($whole_file =~ m/^\s*\/\*\s*$prefx\s*\s*$(.*)^\s*\/\*\s*$suffx\s*\*\/\s*$/ms); die "bad target: $whole_file" unless (scalar(@zzz)); my $rex = $zzz[0]; # replace carriage returns first, then quotemeta, then fix CR again... $rex =~ s/\n/SLASHNNN/gm; $rex = quotemeta($rex); $rex =~ s/SLASHNNN/\\n/gm; # substitute the new generated definitions for the prior # generated definitions in the target file $whole_file =~ s/$rex/$bigstr/ms; my $outi; open $outi, "> $tmphname" or die "cannot open $tmphname for write: $!"; # rewrite the target file print $outi $whole_file; # now do syscache.c fixup_syscache_cfile($alltabs, \%cacheh, $fullcname, $tmpcname, $gen_hdr_str); } # end fixup_syscache sub formattoastheaders { my $alltabs = shift; my $bigstr = ""; my %relidh; # nicer to sort by relid (vs tablename) while (my ($jj, $ww) = each(%{$alltabs})) { $relidh{$ww->{with}->{relid}} = $jj; } # for my $kk (sort (keys (%{$alltabs}))) for my $jj (sort {$a <=> $b} (keys (%relidh))) { my $kk = $relidh{$jj}; my $vv = $alltabs->{$kk}; my $relid = $vv->{with}->{relid}; my $rct = $vv->{relid_comment_tag}; # standard comment next unless (exists($vv->{with}->{toast_oid})); { my $camelname; $camelname = $vv->{with}->{camelcase}; # maintain PG/GP prefix for the camelname for TOAST. # weird, but true... if ($kk =~ m/pg/i) { $camelname = "Pg" . $camelname unless ($camelname =~ m/pg/i); } elsif ($kk =~ m/gp/i) { $camelname = "Gp" . $camelname unless ($camelname =~ m/gp/i); } $bigstr .= $rct; $bigstr .= "DECLARE_TOAST("; $bigstr .= $kk . ", " . $vv->{with}->{toast_oid} . ", " . $vv->{with}->{toast_index} . ");\n"; my $toasttabname = $camelname . "ToastTable"; my $ccdef = "#define " . $toasttabname; $bigstr .= simpletabalign($ccdef, $vv->{with}->{toast_oid}) . "\n"; my $toastindname = $camelname . "ToastIndex"; $ccdef = "#define " . $toastindname; $bigstr .= simpletabalign($ccdef, $vv->{with}->{toast_index}) . "\n\n\n"; $vv->{CamelCaseToastTab} = $toasttabname; $vv->{CamelCaseToastInd} = $toastindname; } } return $bigstr; } # end formattoastheaders sub formattab { my ($alltabs, $keys) = @_; my $bigstr = ""; for my $kk (@{$keys}) { my $vv = $alltabs->{$kk}; my $relid = $vv->{with}->{relid}; $bigstr .= "\n/*\n TidyCat Comments for $kk:\n"; $bigstr .= " Table is shared, so catalog.c:IsSharedRelation is " . "updated.\n" if $vv->{with}->{shared}; if ($vv->{with}->{bootstrap}) { $bigstr .= " Table is a **bootstrap** table.\n"; } if ($vv->{with}->{oid}) { $bigstr .= " Table has an Oid column.\n"; } else { $bigstr .= " Table does not have an Oid column.\n"; } # TOAST comments if (exists($vv->{with}->{toast_oid})) { $bigstr .= " Table has TOASTable columns,"; } else { if (exists($toast_tab_exception_h{$kk})) { $bigstr .= " Table has TOASTable columns, but NO TOAST table.\n"; } } if (exists($vv->{with}->{content})) { if ($vv->{with}->{content} =~ m/MASTER/) { $bigstr .= " Table contents are only maintained on MASTER.\n"; } elsif ($vv->{with}->{content} =~ m/PERSISTENT/) { $bigstr .= " Table contents maintain PERSISTENT objects in special way.\n"; } elsif ($vv->{with}->{content} =~ m/SEGMENT/) { $bigstr .= " Table contents are local to each SEGMENT.\n"; } } $bigstr .= " Table has weird hack for timestamp column.\n " if (exists($vv->{tzhack})); $bigstr .= " Table has weird hack for time column.\n " if (exists($vv->{tmhack})); $bigstr .= "\n*/\n\n"; $bigstr .= formatTZcomment($vv->{tzhack}) if (exists($vv->{tzhack})); $bigstr .= formatTMcomment($vv->{tmhack}) if (exists($vv->{tmhack})); $bigstr .= getcomment1($kk); $vv->{CamelCaseRelationId} = $vv->{with}->{camelcase} . "RelationId" ; my $ccdef = '#define ' . $vv->{CamelCaseRelationId}; $bigstr .= simpletabalign($ccdef, $relid) . "\n\n"; $bigstr .= 'CATALOG('. $kk . ',' . $relid . ')'; $bigstr .= " BKI_BOOTSTRAP" if $vv->{with}->{bootstrap}; $bigstr .= " BKI_SHARED_RELATION" if $vv->{with}->{shared}; $bigstr .= " BKI_WITHOUT_OIDS" unless $vv->{with}->{oid}; $bigstr .= "\n{"; $bigstr .= formatcols($vv->{cols}); # build the Form/FormData strings my $tform = struct_form_tname($kk); my $tformdata = struct_form_tname($kk); $tformdata =~ s/Form/FormData/; $bigstr .= "} " . $tformdata . ";\n"; $bigstr .= "\n#undef timestamptz\n" if (exists($vv->{tzhack})); $bigstr .= "\n#undef time\n" if (exists($vv->{tmhack})); $bigstr .= "\n\n"; $bigstr .= getcomment2($kk, $tform); $bigstr .= "typedef " . $tformdata . " *" . $tform . ";\n"; $bigstr .= "\n\n"; $bigstr .= getcomment3($kk); $bigstr .= formatcolconst($kk, $vv->{cols}); } return $bigstr; } # end formattab sub formattabs { my $alltabs = shift; my $bigstr = ""; for my $kk (sort (keys (%{$alltabs}))) { $bigstr .= formattab($alltabs, [$kk]); } return $bigstr; } # end formattabs sub parsefile { my ($file_name, $finddefs) = @_; # if not looking for tidycat definitions, just find the code gen # sections my $begin_def_str = "TIDYCAT_BEGIN_CODEGEN"; my $end_def_str = "TIDYCAT_END_CODEGEN"; if (defined($finddefs)) { $begin_def_str = "TIDYCAT_BEGINDEF"; $end_def_str = "TIDYCAT_ENDDEF"; # XXX XXX: if only dumping sql definitions, allow "fake" ones... if ($glob_glob->{sqldef} || $glob_glob->{dumpdef} || $glob_glob->{syscache}) { $begin_def_str .= "|TIDYCAT_BEGINFAKEDEF"; $end_def_str .= "|TIDYCAT_ENDFAKEDEF"; } } my $entrylist = []; my $bigstr ; open my $file_in, "< $file_name" or die "cannot open $file_name: $!"; my $phase = "begindef"; # begindef, enddef, begingen, endgen my $ii = 0; my $entry; for my $ini (<$file_in>) { $ii++; if ($phase =~ m/enddef/) { if ($ini =~ m/$end_def_str/) { if ($ini =~ m/\*\//) { # look for trailing "*/" $entry->{enddef} = $ii; } else { # probably the next line $entry->{enddef} = $ii + 1; } $entry->{def} = $bigstr; if (defined($finddefs)) { $phase = "begingen"; } else { # save the current entry push @{$entrylist}, $entry; $phase = "begindef"; } next; } # append to the string!! $bigstr .= $ini; } elsif ($phase =~ m/begindef/) { next unless ($ini =~ m/$begin_def_str/); $entry = {begindef => $ii}; $bigstr = ""; $phase = "enddef"; next; } elsif ($phase =~ m/begingen/) { # might have new definition and no codegen if ($ini =~ m/$begin_def_str/) { push @{$entrylist}, $entry; $entry = {begindef => $ii}; $bigstr = ""; $phase = "enddef"; next; } elsif ($ini =~ m/TIDYCAT\_BEGIN\_CODEGEN/) { $entry->{begin_codegen} = $ii; $phase = "endgen"; } else { # stop looking if no generated code within 3 lines... if (($ii - $entry->{enddef}) > 3) { # save the current entry push @{$entrylist}, $entry; $phase = "begindef"; } next; } } elsif ($phase =~ m/endgen/) { if ($ini =~ m/TIDYCAT\_END\_CODEGEN/) { $entry->{end_codegen} = $ii; # save the current entry push @{$entrylist}, $entry; $bigstr = ""; $phase = "begindef"; } next; } } # end for ini die "unterminated tidycat definition in $file_name at " . $entry->{begindef} if (defined($entry) && ($phase =~ m/enddef/)); if (defined($entry) && ($phase =~ m/begingen/)) { push @{$entrylist}, $entry; } close $file_in; return $entrylist; } # end parsefile sub bufferfile { my $file_name = shift; open my $file_in, "< $file_name" or die "cannot open $file_name: $!"; my $buflist = []; for my $ini (<$file_in>) { push @{$buflist}, $ini; } close $file_in; return $buflist; } # We may already have previously generated code for a relation. Need # to filter it out to eliminate duplicate definitions. Note that this # only works as long as the relid does not change. sub clean_duplicate_entries { my ($alltabs, $basename, $gen_code) = @_; my $db1 = 1; print $basename, ":\n\n" if ($db1); # NOTE: list the names of special files with generated code return $gen_code unless ($basename =~ m/^(indexing\.h|toasting\.h|toasting\.c|catalog\.c toast|catalog\.c ind|catalog\.c tab|pg\_type\.h)$/); my $skiplines = 0; # skip N lines of generated code for duplicate entries $skiplines = 2 if ($basename =~ m/indexing/); $skiplines = 3 if ($basename =~ m/toasting\.h/); $skiplines = 3 if ($basename =~ m/toasting\.c/); $skiplines = 2 if ($basename =~ m/pg\_type/); $skiplines = 1 if ($basename =~ m/catalog.*ind/); $skiplines = 1 if ($basename =~ m/catalog.*tab/); $skiplines = 2 if ($basename =~ m/catalog.*toast/); for my $kk (sort (keys (%{$alltabs}))) { my $vv = $alltabs->{$kk}; my $relid = $vv->{with}->{relid}; my $rct = $vv->{relid_comment_tag}; # standard comment $rct = '/* relation id: ' . $relid; next # no match for current relation unless ($gen_code =~ m/$rct\s+/); next # don't cleanup unless have lines to skip... unless ($skiplines); my @foo = split (/\n/, $gen_code); my @baz; while (scalar(@foo)) { my $lin = shift @foo; # iter by line, but skip the line which matches the # "relation id" comment if ($lin !~ m/$rct\s+/) { push @baz, $lin; } else { print "skip: $lin\n" if ($db1); # and the subsequent N generated lines after the comment for my $ii (1..$skiplines) { # print "skip: ", shift @foo if (scalar(@foo)); # print "\n"; } # end for } } # end while foo $gen_code = join ("\n", @baz); } # end for kk return $gen_code; } # end clean_duplicate_entries sub fixup_generated_code { my ($alltabs, $basename, $fullname, $tmpname, $new_gen_list, $verzion, $nnow, $fixup_ref) = @_; my $elist_index = 1; # catalog.c toast case is third, and index case is second set of # definitions $elist_index = 3 if ($basename =~ m/catalog\.c toast/); $elist_index = 2 if ($basename =~ m/catalog\.c ind/); return unless (scalar(@{$new_gen_list})); my ($elist, $buffil); # handle toast, then index if (defined($fixup_ref) && (2 <= scalar(@{$fixup_ref}))) { $elist = shift @{$fixup_ref}; $buffil = shift @{$fixup_ref}; } else { $elist = parsefile($fullname); $buffil = bufferfile($fullname); } # XXX XXX XXX XXX: dump of regen # print $basename,":\n", Data::Dumper->Dump($elist), "\n"; return unless (scalar(@{$elist}) >= $elist_index); my $entry = $elist->[$elist_index - 1]; my $bigstr = $entry->{def}; $bigstr = clean_duplicate_entries($alltabs, $basename, $bigstr); # print "bigstr: ", $bigstr, "XXX\n"; my $genstr = "Generated by " . $verzion; $bigstr =~ s/Generated by.*tidycat\.pl\s+version\s+\d+\.(\d)*/$genstr/m; $bigstr =~ s/($genstr)\n(\s+on\s+)(\w+\s+\w+\s+\d+\s+\d+\:\d+\:\d+\s+\d+)/$1\n$2$nnow/m; $bigstr .= "\n" . join("", @{$new_gen_list}); # remove double-newlines $bigstr =~ s/\n\n/\n/gm; if (exists($entry->{begindef})) { splice (@{$buffil}, ($entry->{begindef}), ($entry->{enddef} - $entry->{begindef})-1, $bigstr); } # write the modified file to the temp directory open my $file_out, "> " . $tmpname or die "Cannot open $tmpname for write"; # write the modified file to the tmpdir print $file_out join("", @{$buffil}); close $file_out; return [$elist, $buffil]; } # end fixup_generated_code sub fixup_pg_tidy { my ($h_allfiles, $fullname, $tmpname, $verzion, $nnow) = @_; my $bigstr = <<'EOF_bigstr'; /*------------------------------------------------------------------------- * * pg_tidycat.h * * Copyright (c) 2011, 2012 Greenplum inc. * GENCOM *------------------------------------------------------------------------- */ EOF_bigstr my $gen_hdr_str = " * WARNING: DO NOT MODIFY THIS FILE: \n" . " * Generated by " . $verzion . "\n" . " * on " . $nnow ; $bigstr =~ s/GENCOM/$gen_hdr_str/gm; my $prevhdr = `grep include $fullname`; my @lines = split(/\n/, $prevhdr); for my $lin (@lines) { next unless ($lin =~ m/^\s*\#\s*include\s*\"catalog\//); my @foo = ($lin =~ m/^\s*\#\s*include\s*\"catalog\/(.*)\"/); my $fname = $foo[0]; next if (exists($allfiles_exception_h{$fname})); $h_allfiles->{$fname} = 1; } # build sorted list of all names in # include "catalog/pg_..." format $bigstr .= '#include "catalog/' . join("\"\n#include \"catalog/", sort(keys(%{$h_allfiles}))) . "\"\n"; # write the modified file to the temp directory open my $file_out, "> " . $tmpname or die "Cannot open $tmpname for write"; # write the modified file to the tmpdir print $file_out $bigstr; close $file_out; } # end fixup_pg_tidy sub fixup_makefile { my ($h_allfiles, $fullname, $tmpname, $verzion, $nnow) = @_; # here doc to replace tidycat_bki_src defs in Makefile my $bigstr = <<'EOF_bigstr'; # TIDYCAT_BEGIN_CODEGEN #GENHDRSTR TIDYCAT_BKI_SRCS := \ GENSTUFF # TIDYCAT_END_CODEGEN EOF_bigstr my $whole_file; { # $$$ $$$ undefine input record separator (\n) # and slurp entire file into variable local $/; undef $/; my $fh; open $fh, "< $fullname" or die "cannot open $fullname: $!"; $whole_file = <$fh>; close $fh; } my $prefx = quotemeta('TIDYCAT_BEGIN_CODEGEN'); my $suffx = quotemeta('TIDYCAT_END_CODEGEN'); my @zzz = ($whole_file =~ m/^\s*\#\s*$prefx\s*$(.*)^\s*\#\s*$suffx\s*$/ms); return unless (scalar(@zzz)); my @lines = split(/\n/, $zzz[0]); for my $lin (@lines) { next unless ($lin =~ m/\.h/); # capture the filename my @foo = ($lin =~ m/^\s*(.*)\s*(?:\\)?\s*$/); my $fname = $foo[0]; # trim leading and trailing spaces $fname =~ s/\\$//; $fname =~ s/^\s*//; $fname =~ s/\s*$//; next if (exists($allfiles_exception_h{$fname})); $h_allfiles->{$fname} = 1; } # build sorted list of all names prefixed with tab and ending with # slash, eg: # pg_foo.h \ ... my $genstuff = join(" \\\n\t", sort(keys(%{$h_allfiles}))); $bigstr =~ s/GENSTUFF/$genstuff/; my $gen_hdr_str = " WARNING: DO NOT MODIFY THE FOLLOWING SECTION: \n" . "# Generated by " . $verzion . "\n" . "# on " . $nnow . "\n\n"; $bigstr =~ s/GENHDRSTR/$gen_hdr_str/; # write the modified file to the temp directory open my $file_out, "> " . $tmpname or die "Cannot open $tmpname for write"; # replace with modified expression $whole_file =~ s/^\s*\#\s*$prefx\s*$(.*)^\s*\#\s*$suffx\s*$/$bigstr/ms; # write the modified file to the tmpdir print $file_out $whole_file; close $file_out; } # end fixup_makefile if (1) { # 5000 table # 6000 indexes my $verzion = "unknown"; $verzion = $glob_glob->{_sleazy_properties}->{version} if (exists($glob_glob->{_sleazy_properties}) && exists($glob_glob->{_sleazy_properties}->{version})); $verzion = $0 . " version " . $verzion; my $nnow = localtime; my $gen_hdr_str = "/* TIDYCAT_BEGIN_CODEGEN \n\n"; $gen_hdr_str .= " WARNING: DO NOT MODIFY THE FOLLOWING SECTION: \n" . " Generated by " . $verzion . "\n" . " on " . $nnow . "\n*/\n\n"; my ($vol0, $dirs0, $base0) = File::Spec->splitpath($0); # make sure have a directory for tidypath location $dirs0 = File::Spec->rel2abs(".") unless (length($dirs0)); # print "dirs0: $dirs0\n"; my @backup_files; # list of files to backup my $tmp_indexing = File::Spec->catfile($glob_tmpdir, "indexing.h"); my $fil_indexing = File::Spec->catfile($dirs0, "indexing.h"); my $buf_indexing; my @arr_indexing; push @backup_files, $fil_indexing; my $tmp_toastingh = File::Spec->catfile($glob_tmpdir, "toasting.h"); my $fil_toastingh = File::Spec->catfile($dirs0, "toasting.h"); my $buf_toastingh; my @arr_toastingh; push @backup_files, $fil_toastingh; my $tmp_pg_type = File::Spec->catfile($glob_tmpdir, "pg_type.h"); my $fil_pg_type = File::Spec->catfile($dirs0, "pg_type.h"); my $buf_pg_type; my @arr_pg_type; push @backup_files, $fil_pg_type; # dirs0 should be src/include/catalog my @srcdirs = File::Spec->splitdir($dirs0); push @srcdirs, ".."; push @srcdirs, ".."; # src/backend/catalog my $tmp_catalogc = File::Spec->catfile($glob_tmpdir, "catalog.c"); my $fil_catalogc = File::Spec->catfile( File::Spec->catdir(@srcdirs, "backend", "catalog"), "catalog.c"); my $buf_catalogc; my @arr_catalogc; my @arr_catalogc_tab; my @arr_catalogc_ind; my @arr_catalogc_toast; push @backup_files, $fil_catalogc; my $tmp_toastingc = File::Spec->catfile($glob_tmpdir, "toasting.c"); my $fil_toastingc = File::Spec->catfile( File::Spec->catdir(@srcdirs, "backend", "catalog"), "toasting.c"); my $buf_toastingc; my @arr_toastingc; push @backup_files, $fil_toastingc; # src/backend/catalog/Makefile my $tmp_catmakem = File::Spec->catfile($glob_tmpdir, "Makefile"); my $fil_catmakem = File::Spec->catfile( File::Spec->catdir(@srcdirs, "backend", "catalog"), "Makefile"); my $buf_catmakem; my @arr_catmakem; push @backup_files, $fil_catmakem; # src/include/catalog/pg_tidycat.h my $tmp_pg_tidy = File::Spec->catfile($glob_tmpdir, "pg_tidycat.h"); my $fil_pg_tidy = File::Spec->catfile($dirs0, "pg_tidycat.h"); my $buf_pg_tidy; my %h_pg_tidy; push @backup_files, $fil_pg_tidy; # src/include/utils/syscache.h my $tmp_syscacheh = File::Spec->catfile($glob_tmpdir, "syscache.h"); my $fil_syscacheh = File::Spec->catfile( File::Spec->catdir(@srcdirs, "include", "utils"), "syscache.h"); push @backup_files, $fil_syscacheh; # src/backend/utils/cache/syscache.c my $tmp_syscachec = File::Spec->catfile($glob_tmpdir, "syscache.c"); my $fil_syscachec = File::Spec->catfile( File::Spec->catdir(@srcdirs, "backend", "utils", "cache"), "syscache.c"); push @backup_files, $fil_syscachec; my $backup_dir; # backup files that may get changed unless ($glob_glob->{sqldef} || ($glob_glob->{dumpdef})) { $backup_dir = File::Spec->catfile($glob_tmpdir, "tidycat_backup." . $$); die "Cannot open backup directory $backup_dir : $! " unless (mkdir($backup_dir)); my $cpargs = join(" ", @backup_files); my $cpstat = `cp $cpargs $backup_dir`; } my %alltabs; my @arr_sql; # array of sql statement output my %dumptabs_h; # hash of table definitions for my $filnam (@ARGV) { %alltabs = (); my ($vol, $dirs, $basename) = File::Spec->splitpath($filnam); my $elist = parsefile($filnam, 1); # file has def and codegen sections my $buffil = bufferfile($filnam); # do it in reverse order to strip out line numbering correctly for my $entry (reverse (@{$elist})) { my $bigstr = $entry->{def}; # print $bigstr, "\n"; # tricky bit here: might have multiple CREATE table # statements in a tidycat definition entry, and multiple # tidycat defs in a single file. So build the list of all # table definitions in alltabs, but extract the set of # keys (tablenames) for the current entry in parsetabdef, # so we can feed that to formattab and generate the code # for just the current entry. my @keys = parsetabdef(\%alltabs, $bigstr, $filnam); # print Data::Dumper->Dump([%alltabs]); # print "fff:", formattabs(\%alltabs); my $gentext = []; push @{$gentext}, split(/(\n)/, $gen_hdr_str); push @{$gentext}, split(/(\n)/, formattab(\%alltabs, \@keys)); push @{$gentext}, "\n\n/* TIDYCAT_END_CODEGEN */\n"; if (exists($entry->{begin_codegen})) { splice (@{$buffil}, ($entry->{begin_codegen}-1), ($entry->{end_codegen} - $entry->{begin_codegen}) + 1, @{$gentext}); } else { splice (@{$buffil}, ($entry->{enddef}), 0, @{$gentext}); } } # end for entry push @arr_indexing, formatindexes(\%alltabs); push @arr_toastingh, formattoastheaders(\%alltabs); push @arr_pg_type, formattypes(\%alltabs); my $tab_ind = fix_issharedrelation_function(\%alltabs); # print Data::Dumper->Dump($tab_ind), "\n"; push @arr_catalogc, $tab_ind; if (defined($tab_ind) && scalar(@{$tab_ind})) { push @arr_catalogc_tab, $tab_ind->[0] if (length($tab_ind->[0])); push @arr_catalogc_ind, $tab_ind->[1] if ((1 < scalar(@{$tab_ind})) && (length($tab_ind->[1]))); push @arr_catalogc_toast, $tab_ind->[2] if ((2 < scalar(@{$tab_ind})) && (length($tab_ind->[2]))); } # store the filenames for pg_tidycat and Makefile fixup get_all_filenames(\%alltabs, \%h_pg_tidy); if ($glob_glob->{sqldef}) { while (my ($jj, $ww) = each(%alltabs)) { if (exists($ww->{tabdef_text}) && length($ww->{tabdef_text}) && ($ww->{tabdef_text} =~ m/CREATE\s+TABLE/i)) { push @arr_sql, $ww->{tabdef_text}; } } next; # don't update file only want sql } if (1) { while (my ($jj, $ww) = each(%alltabs)) { $dumptabs_h{$jj} = $ww; } next # don't update file if dumping definitions if ($glob_glob->{dumpdef}); } # backup the original version my $cpstat = `cp $filnam $backup_dir`; # write the modified file to the temp directory open my $file_out, "> " . File::Spec->catfile($glob_tmpdir, $basename) or die "Cannot open $basename for write in $glob_tmpdir"; # write the modified file to the tmpdir print $file_out join("", @{$buffil}); close $file_out; } # end for filnam # print "tmp: $tmp_indexing\tfil: $fil_indexing\n"; # get catalog version if possible my $fil_catversion = File::Spec->catfile($dirs0, "catversion.h"); my $catvstr = ""; if (-e $fil_catversion) { $catvstr = `grep 'CATALOG_VERSION_NO' $fil_catversion`; if (length($catvstr)) { my @cv = ($catvstr =~ m/(\d+)/im); $catvstr = ""; $catvstr = $cv[0] if (scalar(@cv)); } } if ($glob_glob->{syscache}) { fixup_syscache(\%dumptabs_h, $fil_syscacheh, $tmp_syscacheh, $fil_syscachec, $tmp_syscachec, $verzion, $nnow); } if ($glob_glob->{sqldef}) { open my $file_out, "> " . $glob_glob->{sqldef} or die "Cannot open $glob_glob->{sqldef} for write : $!"; print $file_out "-- Generated by " . $verzion . "\n-- on " . $nnow . "\n"; if (length($catvstr)) { print $file_out "-- CATALOG_VERSION_NO = " . $catvstr . "\n"; } print $file_out "\n"; print $file_out join(";\n", @arr_sql), ";\n"; close $file_out; exit(); } if ($glob_glob->{dumpdef}) { open my $file_out, "> " . $glob_glob->{dumpdef} or die "Cannot open $glob_glob->{dumpdef} for write : $!"; local $Data::Dumper::Sortkeys = 1; # JSON might not be installed, so test for it. if ($glob_glob->{dumpformat} =~ m/j/) { if (eval "require JSON") { # because JSON is REQUIREd, not USEd, the symbols are not # imported into the environment. # add a comment and sort the keys my $cmt = "Generated by $verzion on $nnow"; $cmt .= " CATALOG_VERSION_NO=$catvstr" if (length($catvstr)); $dumptabs_h{"__comment"} = $cmt; $dumptabs_h{"__info"} = {CATALOG_VERSION_NO => $catvstr} if (length($catvstr)); my $whole_jfil = JSON::to_json(\%dumptabs_h, {pretty => 1, indent => 2, canonical => 1}); # remove date strings from relid_comment_tags to # prevent diffs # # s|20yymmdd */| */| # $whole_jfil =~ s|\s+20(\d){6}\s+\*\/| \*/|gm; print $file_out $whole_jfil; } else { die("Fatal Error: The required package JSON is not installed -- please download it from www.cpan.org\n"); } } else { print $file_out Data::Dumper->Dump([\%dumptabs_h]),"\n"; } close $file_out; exit(); } fixup_generated_code (\%alltabs, "indexing.h", $fil_indexing, $tmp_indexing, \@arr_indexing, $verzion, $nnow) if (scalar(@arr_indexing)); fixup_generated_code (\%alltabs, "toasting.h", $fil_toastingh, $tmp_toastingh, \@arr_toastingh, $verzion, $nnow) if (scalar(@arr_toastingh)); fixup_generated_code (\%alltabs, "toasting.c", $fil_toastingc, $tmp_toastingc, \@arr_toastingc, $verzion, $nnow) if (scalar(@arr_toastingc)); fixup_generated_code (\%alltabs, "pg_type.h", $fil_pg_type, $tmp_pg_type, \@arr_pg_type, $verzion, $nnow) if (scalar(@arr_pg_type)); # print Data::Dumper->Dump(\@arr_catalogc_tab), "\n"; # do toast entries first because they are last (keep line # numbering consistent for prior items), then index, then table # # catalog.c IsSharedRelation index entries my ($catc_ref, $catc_ref2); $catc_ref2 = fixup_generated_code (\%alltabs, "catalog.c toast", $fil_catalogc, $tmp_catalogc, \@arr_catalogc_toast, $verzion, $nnow) if (scalar(@arr_catalogc_toast)); if (scalar(@arr_catalogc_ind)) { $catc_ref = fixup_generated_code (\%alltabs, "catalog.c ind", $fil_catalogc, $tmp_catalogc, \@arr_catalogc_ind, $verzion, $nnow, $catc_ref2); } else { $catc_ref = $catc_ref2; } # catalog.c IsSharedRelation table entries fixup_generated_code (\%alltabs, "catalog.c tab", $fil_catalogc, $tmp_catalogc, \@arr_catalogc_tab, $verzion, $nnow, $catc_ref) if (scalar(@arr_catalogc_tab)); if (scalar(keys(%h_pg_tidy))) { fixup_pg_tidy (\%h_pg_tidy, $fil_pg_tidy, $tmp_pg_tidy, $verzion, $nnow); fixup_makefile (\%h_pg_tidy, $fil_catmakem, $tmp_catmakem, $verzion, $nnow); } } exit(); # SLZY_TOP_BEGIN if (0) { my $bigstr = <<'EOF_bigstr'; { "args" : [ { "alias" : "?", "long" : "Print a brief help message and exits.", "name" : "help", "required" : "0", "short" : "brief help message", "type" : "untyped" }, { "long" : "Prints the manual page and exits.", "name" : "man", "required" : "0", "short" : "full documentation", "type" : "untyped" }, { "alias" : "dd", "long" : "$ddlong", "name" : "dumpdef", "short" : "output file for dump of serialized catalog data structures", "type" : "outfile" }, { "alias" : "df|dumpfmt", "long" : "Specify a format for the dumpfile. The only valid options are jason or perl.", "name" : "dumpformat", "short" : "format options for dump file [perl, jason]", "type" : "outfile" }, { "long" : "$sqldeflong", "name" : "sqldef", "short" : "output file for dump of catalog DDL statements", "type" : "outfile" }, { "long" : "$syscachelong", "name" : "syscache", "short" : "build syscache entries", "type" : "untyped" } ], "long" : "$toplong", "properties" : { "COPYDATES" : "2009-2012", "slzy_date" : 1350599086 }, "short" : "generate catalog entries", "version" : "34" } EOF_bigstr } # SLZY_TOP_END # SLZY_LONG_BEGIN if (0) { my $ddlong = <<'EOF_ddlong'; Specify an optional filename to hold a dump of the serialized catalog data structures. The format of the dump file is determined by dumpformat EOF_ddlong my $syscachelong = <<'EOF_syscachelong'; If specified, rebuild syscache.h and syscache.c. Note that this option, like dumpdef, must read all catalog headers, ie in src/include/catalog, the command: perl tidycat.pl -syscache *.h constructs new versions of syscache.c and syscache.h. NOTE: Modification and extension of syscache entries is extremely rare. Usage of this option is discouraged. EOF_syscachelong my $sqldeflong = <<'EOF_sqldeflong'; Specify an optional filename to hold the CREATE TABLE statements from the tidycat definitions. Note that these statements will contain the tidycat WITH clause, which is not valid SQL. EOF_sqldeflong my $toplong = <<'EOF_longstr'; tidycat.pl handles all of your stinky catalog problems, leaving a fresh, clean scent. Catalog tables require several sets of co-ordinated modifications to multiple source files to define the table and indexes, and (under some circumstances) the toast tables and indexes (in toasting.h and toasting.c), as well as some special code in catalog.c to aid in bootstrap and upgrade. tidycat also updates a generated list of headers in pg_tidycat.h and the catalog Makefile. The original files are copied to a special tidycat_backup directory in /tmp, and all generated files are written to /tmp. tidycat.pl uses a single definition statement to generate the code associated with the table in multiple source files. A sample definition for the fictional pg_foobar.h follows: /* TIDYCAT_BEGINDEF CREATE TABLE pg_foobar with (camelcase=FooBar, shared=true, oid=true, relid=9991) ( fooname name, -- name of foo bar foolimit real, -- max active count limit fooignore boolean, -- ignore foo in baz context ); create unique index on pg_foobar(oid) with (indexid=9993); create index on pg_foobar(fooname) with (indexid=9994); TIDYCAT_ENDDEF */ The definition must begin and end with the TIDYCAT_BEGINDEF/TIDYCAT_ENDDEF exactly as shown. The CREATE TABLE statement is almost identical to standard SQL, with the addition of a special WITH clause for implementation-specific features of the catalog entry. Currently, the relid must be specified using unassigned oids from the unused_oids script. The options are: {PODOVER8} {ITEM} CamelCase: (optional) If your tablename is a compound name, the index definitions look a little nicer if you define an appropriate camelcase name. Otherwise, the default version of the name is the tablename, minus the "pg_" prefix, initial letter capitalized. {ITEM} shared: (false by default) Whether the table is local to each database or shared by all. {ITEM} oid: (true by default) Whether the table has an auto-generated oid column. {ITEM} relid: (required) The relid of the table in pg_class. Use unused_oids to find one. {ITEM} toast_oid: (required for all tables with text or array columns) The oid of the toast table (see toasting.h). Use unused_oids to find one. tidycat will automatically detect if the table definition requires a toast table and return an error if it is not specified. {ITEM} toast_index: (required for all tables with toast_oid) The oid of the index of the toast table (see toasting.h). Use unused_oids to find one. {ITEM} content: (optional) The "content" is only for catalog tables with non-standard content management. "Normal" catalog tables are replicated from the master to all the segments. Non-standard tables fall into three categories: MASTER_ONLY, SEGMENT_LOCAL, and PERSISTENT. Don't add any new non-standard tables. Please. Note that this flag controls the generation of validation logic for checkcat; it does not control the catalog table tuple replication mechanisms. {PODBACK} Similarly, index definitions are unique or non-unique, and require an indexid (and an optional indexname). Running tidycat.pl against pg_foobar.h adds the following section after the definition: /* TIDYCAT_BEGIN_CODEGEN WARNING: DO NOT MODIFY THE FOLLOWING SECTION: Generated by tidycat.pl version 3. on Tue Dec 8 12:50:21 2009 */ /* TidyCat Comments for pg_foobar: Table is shared, so catalog.c:IsSharedRelation is updated. Table has an Oid column. Table has static type (see pg_types.h). */ /* ---------------- * pg_foobar definition. cpp turns this into * typedef struct FormData_pg_foobar * ---------------- */ #define FooBarRelationId 9991 CATALOG(pg_foobar,9991) BKI_SHARED_RELATION { NameData fooname; /* name of foo bar */ float4 foolimit; /* max active count limit */ bool fooignore; /* ignore foo in baz context */ } FormData_pg_foobar; /* ---------------- * Form_pg_foobar corresponds to a pointer to a tuple with * the format of pg_foobar relation. * ---------------- */ typedef FormData_pg_foobar *Form_pg_foobar; /* ---------------- * compiler constants for pg_foobar * ---------------- */ #define Natts_pg_foobar 3 #define Anum_pg_foobar_fooname 1 #define Anum_pg_foobar_foolimit 2 #define Anum_pg_foobar_fooignore 3 /* TIDYCAT_END_CODEGEN */ The generated code contains a CATALOG macro/struct definition for the table, where the SQL datatypes are converted to C types. The naming and comments follow established conventions. Additional modifications are made to indexing.h: /* relation id: 9991 - pg_foobar 20091208 */ DECLARE_UNIQUE_INDEX(pg_foobar_oid_index, 9993, on pg_foobar using btree(oid oid_ops)); #define FooBarOidIndexId 9993 /* relation id: 9991 - pg_foobar 20091208 */ DECLARE_INDEX(pg_foobar_fooname_index, 9994, on pg_foobar using btree(fooname name_ops)); #define FooBarFoonameIndexId 9994 And the function IsSharedRelation() in catalog.c: bool IsSharedRelation(Oid relationId) { /* These are the shared catalogs (look for BKI_SHARED_RELATION) */ if (relationId == AuthIdRelationId || (...much code...) /* relation id: 9991 - pg_foobar 20100105 */ relationId == FooBarRelationId || Note that IsSharedRelation is only updated for shared tables. {HEAD2} JSON document In src/include/catalog, the command: perl tidycat.pl -dd foo.json -df json *.h will generate a JSON document describing all of the catalog tables. This file is installed under gpMgmt/bin/gppylib/data, and gpcheckcat uses this data to generate check queries for foreign key constraint. {HEAD1} CAVEATS tidycat does not modify the original files -- it writes modified versions of the files to /tmp. You need to copy over the originals with the generated files manually. If you need to restore the originals you can use the copies from the tidycat_backup directory. Multiple cycles of tidycat with changing definitions can leave junk in /tmp, and you might copy that junk into you source tree. Do not copy over any generated files that are older than your latest backup directory. EOF_longstr } # SLZY_LONG_END