diff --git a/configure b/configure index c7623adb894df8a4ca12ada745b942018ba1d4e2..1ad862be1d302e7743e231ea1784aed46b6c0071 100755 --- a/configure +++ b/configure @@ -6989,7 +6989,8 @@ done -for ac_header in crypt.h dld.h endian.h fp_class.h getopt.h ieeefp.h poll.h pwd.h sys/ipc.h sys/poll.h sys/pstat.h sys/select.h sys/sem.h sys/socket.h sys/shm.h sys/time.h sys/un.h termios.h utime.h wchar.h wctype.h kernel/OS.h kernel/image.h SupportDefs.h + +for ac_header in crypt.h dld.h endian.h fp_class.h getopt.h ieeefp.h langinfo.h poll.h pwd.h sys/ipc.h sys/poll.h sys/pstat.h sys/select.h sys/sem.h sys/socket.h sys/shm.h sys/time.h sys/un.h termios.h utime.h wchar.h wctype.h kernel/OS.h kernel/image.h SupportDefs.h do as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh` if eval "test \"\${$as_ac_Header+set}\" = set"; then diff --git a/configure.in b/configure.in index bfb6dd0635ab1a720a7bb914de6df2941b3af0ed..b0588b411eb67a4f4b9af1c6b37662176a17c7f6 100644 --- a/configure.in +++ b/configure.in @@ -1,5 +1,5 @@ dnl Process this file with autoconf to produce a configure script. -dnl $PostgreSQL: pgsql/configure.in,v 1.366 2004/07/10 01:24:29 momjian Exp $ +dnl $PostgreSQL: pgsql/configure.in,v 1.367 2004/07/14 17:55:09 petere Exp $ dnl dnl Developers, please strive to achieve this order: dnl @@ -675,7 +675,7 @@ fi ## dnl sys/socket.h is required by AC_FUNC_ACCEPT_ARGTYPES -AC_CHECK_HEADERS([crypt.h dld.h endian.h fp_class.h getopt.h ieeefp.h poll.h pwd.h sys/ipc.h sys/poll.h sys/pstat.h sys/select.h sys/sem.h sys/socket.h sys/shm.h sys/time.h sys/un.h termios.h utime.h wchar.h wctype.h kernel/OS.h kernel/image.h SupportDefs.h]) +AC_CHECK_HEADERS([crypt.h dld.h endian.h fp_class.h getopt.h ieeefp.h langinfo.h poll.h pwd.h sys/ipc.h sys/poll.h sys/pstat.h sys/select.h sys/sem.h sys/socket.h sys/shm.h sys/time.h sys/un.h termios.h utime.h wchar.h wctype.h kernel/OS.h kernel/image.h SupportDefs.h]) # At least on IRIX, cpp test for netinet/tcp.h will fail unless # netinet/in.h is included first. diff --git a/doc/src/sgml/ref/initdb.sgml b/doc/src/sgml/ref/initdb.sgml index 7cd0e699a987f2d332ebaf9d49f655fcfa9f98aa..bc92cedd18b3bec7118502b9af188d6a5290aa35 100644 --- a/doc/src/sgml/ref/initdb.sgml +++ b/doc/src/sgml/ref/initdb.sgml @@ -1,5 +1,5 @@ @@ -121,8 +121,8 @@ PostgreSQL documentation Selects the encoding of the template database. This will also be the default encoding of any database you create later, - unless you override it there. The default is - SQL_ASCII. The character sets supported by + unless you override it there. The default is derived from the locale, or + SQL_ASCII if that does not work. The character sets supported by the PostgreSQL server are described in . diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c index 52e3e1ac752c477f2c1e9de1aca0ac1dec96f4ac..f9479555577b35423228e90e5810d66633f02e19 100644 --- a/src/bin/initdb/initdb.c +++ b/src/bin/initdb/initdb.c @@ -39,7 +39,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * Portions taken from FreeBSD. * - * $PostgreSQL: pgsql/src/bin/initdb/initdb.c,v 1.42 2004/07/12 01:54:10 momjian Exp $ + * $PostgreSQL: pgsql/src/bin/initdb/initdb.c,v 1.43 2004/07/14 17:55:10 petere Exp $ * *------------------------------------------------------------------------- */ @@ -52,6 +52,9 @@ #include #include #include +#ifdef HAVE_LANGINFO_H +# include +#endif #include "libpq/pqsignal.h" #include "mb/pg_wchar.h" @@ -600,6 +603,15 @@ get_id(void) return xstrdup(pw->pw_name); } +static char * +encodingid_to_string(int enc) +{ + char result[20]; + + sprintf(result, "%d", enc); + return xstrdup(result); +} + /* * get the encoding id for a given encoding name */ @@ -607,15 +619,13 @@ static char * get_encoding_id(char *encoding_name) { int enc; - char result[20]; if (encoding_name && *encoding_name) { if ((enc = pg_char_to_encoding(encoding_name)) >= 0 && pg_valid_server_encoding(encoding_name) >= 0) { - sprintf(result, "%d", enc); - return xstrdup(result); + return encodingid_to_string(enc); } } fprintf(stderr, _("%s: \"%s\" is not a valid server encoding name\n"), @@ -623,6 +633,191 @@ get_encoding_id(char *encoding_name) exit(1); } +#ifdef HAVE_LANGINFO_H +/* + * Checks whether the encoding selected for PostgreSQL and the + * encoding used by the system locale match. + */ + +struct encoding_match +{ + enum pg_enc pg_enc_code; + char *system_enc_name; +}; + +struct encoding_match encoding_match_list[] = { + { PG_EUC_JP, "EUC-JP" }, + { PG_EUC_JP, "eucJP" }, + { PG_EUC_JP, "IBM-eucJP" }, + { PG_EUC_JP, "sdeckanji" }, + + { PG_EUC_CN, "EUC-CN" }, + { PG_EUC_CN, "eucCN" }, + { PG_EUC_CN, "IBM-eucCN" }, + { PG_EUC_CN, "GB2312" }, + { PG_EUC_CN, "dechanzi" }, + + { PG_EUC_KR, "EUC-KR" }, + { PG_EUC_KR, "eucKR" }, + { PG_EUC_KR, "IBM-eucKR" }, + { PG_EUC_KR, "deckorean" }, + { PG_EUC_KR, "5601" }, + + { PG_EUC_TW, "EUC-TW" }, + { PG_EUC_TW, "eucTW" }, + { PG_EUC_TW, "IBM-eucTW" }, + { PG_EUC_TW, "cns11643" }, + +#ifdef NOT_VERIFIED + { PG_JOHAB, "???" }, +#endif + + { PG_UTF8, "UTF-8" }, + { PG_UTF8, "utf8" }, + + { PG_LATIN1, "ISO-8859-1" }, + { PG_LATIN1, "ISO8859-1" }, + { PG_LATIN1, "iso88591" }, + + { PG_LATIN2, "ISO-8859-2" }, + { PG_LATIN2, "ISO8859-2" }, + { PG_LATIN2, "iso88592" }, + + { PG_LATIN3, "ISO-8859-3" }, + { PG_LATIN3, "ISO8859-3" }, + { PG_LATIN3, "iso88593" }, + + { PG_LATIN4, "ISO-8859-4" }, + { PG_LATIN4, "ISO8859-4" }, + { PG_LATIN4, "iso88594" }, + + { PG_LATIN5, "ISO-8859-9" }, + { PG_LATIN5, "ISO8859-9" }, + { PG_LATIN5, "iso88599" }, + + { PG_LATIN6, "ISO-8859-10" }, + { PG_LATIN6, "ISO8859-10" }, + { PG_LATIN6, "iso885910" }, + + { PG_LATIN7, "ISO-8859-13" }, + { PG_LATIN7, "ISO8859-13" }, + { PG_LATIN7, "iso885913" }, + + { PG_LATIN8, "ISO-8859-14" }, + { PG_LATIN8, "ISO8859-14" }, + { PG_LATIN8, "iso885914" }, + + { PG_LATIN9, "ISO-8859-15" }, + { PG_LATIN9, "ISO8859-15" }, + { PG_LATIN9, "iso885915" }, + + { PG_LATIN10, "ISO-8859-16" }, + { PG_LATIN10, "ISO8859-16" }, + { PG_LATIN10, "iso885916" }, + + { PG_WIN1256, "CP1256" }, + { PG_TCVN, "CP1258" }, +#ifdef NOT_VERIFIED + { PG_WIN874, "???" }, +#endif + { PG_KOI8R, "KOI8-R" }, + { PG_WIN1251, "CP1251" }, + { PG_ALT, "CP866" }, + + { PG_ISO_8859_5, "ISO-8859-5" }, + { PG_ISO_8859_5, "ISO8859-5" }, + { PG_ISO_8859_5, "iso88595" }, + + { PG_ISO_8859_6, "ISO-8859-6" }, + { PG_ISO_8859_6, "ISO8859-6" }, + { PG_ISO_8859_6, "iso88596" }, + + { PG_ISO_8859_7, "ISO-8859-7" }, + { PG_ISO_8859_7, "ISO8859-7" }, + { PG_ISO_8859_7, "iso88597" }, + + { PG_ISO_8859_8, "ISO-8859-8" }, + { PG_ISO_8859_8, "ISO8859-8" }, + { PG_ISO_8859_8, "iso88598" }, + + { PG_SQL_ASCII, NULL } /* end marker */ +}; + +static char * +get_encoding_from_locale(const char *ctype) +{ + char *save; + char *sys; + + save = setlocale(LC_CTYPE, NULL); + if (!save) + return NULL; + save = xstrdup(save); + + setlocale(LC_CTYPE, ctype); + sys = nl_langinfo(CODESET); + sys = xstrdup(sys); + + setlocale(LC_CTYPE, save); + free(save); + + return sys; +} + +static void +check_encodings_match(int pg_enc, const char *ctype) +{ + char *sys; + int i; + + sys = get_encoding_from_locale(ctype); + + for (i = 0; encoding_match_list[i].system_enc_name; i++) + { + if (pg_enc == encoding_match_list[i].pg_enc_code + && strcasecmp(sys, encoding_match_list[i].system_enc_name) == 0) + { + free(sys); + return; + } + } + + fprintf(stderr, + _("%s: warning: encoding mismatch\n"), progname); + fprintf(stderr, + _("The encoding you selected (%s) and the encoding that the selected\n" + "locale uses (%s) are not known to match. This may lead to\n" + "misbehavior in various character string processing functions. To fix\n" + "this situation, rerun %s and either do not specify an encoding\n" + "explicitly, or choose a matching combination.\n"), + pg_encoding_to_char(pg_enc), sys, progname); + + free(sys); + return; +} + +static int +find_matching_encoding(const char *ctype) +{ + char *sys; + int i; + + sys = get_encoding_from_locale(ctype); + + for (i = 0; encoding_match_list[i].system_enc_name; i++) + { + if (strcasecmp(sys, encoding_match_list[i].system_enc_name) == 0) + { + free(sys); + return encoding_match_list[i].pg_enc_code; + } + } + + free(sys); + return -1; +} +#endif /* HAVE_LANGINFO_H */ + /* * get short version of VERSION */ @@ -2027,13 +2222,11 @@ main(int argc, char *argv[]) fprintf(stderr, "VERSION=%s\n" "PGDATA=%s\nshare_path=%s\nPGPATH=%s\n" - "ENCODING=%s\nENCODINGID=%s\n" "POSTGRES_SUPERUSERNAME=%s\nPOSTGRES_BKI=%s\n" "POSTGRES_DESCR=%s\nPOSTGRESQL_CONF_SAMPLE=%s\n" "PG_HBA_SAMPLE=%s\nPG_IDENT_SAMPLE=%s\n", PG_VERSION, pg_data, share_path, bin_path, - encoding, encodingid, effective_user, bki_file, desc_file, conf_file, hba_file, ident_file); @@ -2051,21 +2244,20 @@ main(int argc, char *argv[]) check_input(features_file); check_input(system_views_file); + setlocales(); + printf(_("The files belonging to this database system will be owned " "by user \"%s\".\n" "This user must also own the server process.\n\n"), effective_user); - setlocales(); - if (strcmp(lc_ctype, lc_collate) == 0 && strcmp(lc_ctype, lc_time) == 0 && strcmp(lc_ctype, lc_numeric) == 0 && strcmp(lc_ctype, lc_monetary) == 0 && strcmp(lc_ctype, lc_messages) == 0) { - printf(_("The database cluster will be initialized with locale %s.\n\n"), - lc_ctype); + printf(_("The database cluster will be initialized with locale %s.\n"), lc_ctype); } else { @@ -2075,7 +2267,7 @@ main(int argc, char *argv[]) " MESSAGES: %s\n" " MONETARY: %s\n" " NUMERIC: %s\n" - " TIME: %s\n\n"), + " TIME: %s\n"), lc_collate, lc_ctype, lc_messages, @@ -2084,6 +2276,34 @@ main(int argc, char *argv[]) lc_time); } +#ifdef HAVE_LANGINFO_H + if (strcmp(lc_ctype, "C") != 0 && strcmp(lc_ctype, "POSIX") != 0) + { + if (strlen(encoding) == 0) + { + int tmp; + tmp = find_matching_encoding(lc_ctype); + if (tmp == -1) + { + fprintf(stderr, _("%s: could not find suitable encoding for locale \"%s\"\n"), progname, lc_ctype); + fprintf(stderr, _("Rerun %s with the -E option.\n"), progname); + fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); + exit(1); + } + else + { + encodingid = encodingid_to_string(tmp); + printf(_("The default database encoding has accordingly been set to %s.\n"), + pg_encoding_to_char(tmp)); + } + } + else + check_encodings_match(atoi(encodingid), lc_ctype); + } +#endif /* HAVE_LANGINFO_H */ + + printf("\n"); + umask(077); /* diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in index 73af75eaab490c9275218b236b427721bb1a265c..2fb4adf8fb28158fb7eb18063b2834fc2456a564 100644 --- a/src/include/pg_config.h.in +++ b/src/include/pg_config.h.in @@ -200,6 +200,9 @@ /* Define to 1 if `enc_part2' is member of `krb5_ticket'. */ #undef HAVE_KRB5_TICKET_ENC_PART2 +/* Define to 1 if you have the header file. */ +#undef HAVE_LANGINFO_H + /* Define to 1 if you have the `bind' library (-lbind). */ #undef HAVE_LIBBIND