Add small hack to support use of Unicode-based locales on WIN32. This

is not adequately tested yet, but let's get it into beta1 so it can be tested. Magnus Hagander and Tom Lane.

Add small hack to support use of Unicode-based locales on WIN32. This
is not adequately tested yet, but let's get it into beta1 so it can be tested. Magnus Hagander and Tom Lane.
767a9021 · Tom Lane · a0a0bc02 · 767a9021 · 767a9021
隐藏空白更改
内联并排

Showing with 197 addition and 7 deletion

src/backend/utils/adt/oracle_compat.c src/backend/utils/adt/oracle_compat.c +112 -1

src/backend/utils/adt/varlena.c src/backend/utils/adt/varlena.c +85 -6

未找到文件。
--- a/src/backend/utils/adt/oracle_compat.c
+++ b/src/backend/utils/adt/oracle_compat.c
@@ -9,7 +9,7 @@
 *
 *
 * IDENTIFICATION
- *	$PostgreSQL: pgsql/src/backend/utils/adt/oracle_compat.c,v 1.60 2005/05/07 15:18:17 momjian Exp $
+ *	$PostgreSQL: pgsql/src/backend/utils/adt/oracle_compat.c,v 1.61 2005/08/24 17:50:00 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -149,6 +149,117 @@ wcstotext(const wchar_t *str, int ncodes)
 #endif   /* USE_WIDE_UPPER_LOWER */


+/*
+ * On Windows, the "Unicode" locales assume UTF16 not UTF8 encoding.
+ * To make use of the upper/lower functionality, we need to map UTF8 to
+ * UTF16, which for some reason mbstowcs and wcstombs won't do for us.
+ * This conversion layer takes care of it.
+ */
+
+#ifdef WIN32
+
+/* texttowcs for the case of UTF8 to UTF16 */
+static wchar_t *
+win32_utf8_texttowcs(const text *txt)
+{
+	int			nbytes = VARSIZE(txt) - VARHDRSZ;
+	wchar_t    *result;
+	int         r;
+
+	/* Overflow paranoia */
+	if (nbytes < 0 ||
+		nbytes > (int) (INT_MAX / sizeof(wchar_t)) -1)
+		ereport(ERROR,
+				(errcode(ERRCODE_OUT_OF_MEMORY),
+				 errmsg("out of memory")));
+
+	/* Output workspace cannot have more codes than input bytes */
+	result = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
+
+	/* stupid Microsloth API does not work for zero-length input */
+	if (nbytes == 0)
+		r = 0;
+	else
+	{
+		/* Do the conversion */
+		r = MultiByteToWideChar(CP_UTF8, 0, VARDATA(txt), nbytes,
+								result, nbytes);
+
+		if (!r)					/* assume it's NO_UNICODE_TRANSLATION */
+		{
+			/* see notes above about error reporting */
+			pg_verifymbstr(VARDATA(txt), nbytes, false);
+			ereport(ERROR,
+					(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
+					 errmsg("invalid multibyte character for locale"),
+					 errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
+		}
+	}
+
+	Assert(r <= nbytes);
+	result[r] = 0;
+
+	return result;
+}
+
+/* wcstotext for the case of UTF16 to UTF8 */
+static text *
+win32_utf8_wcstotext(const wchar_t *str)
+{
+	text		*result;
+	int			 nbytes;
+	int			 r;
+
+	nbytes = WideCharToMultiByte(CP_UTF8, 0, str, -1, NULL, 0, NULL, NULL);
+	if (nbytes == 0)			/* shouldn't happen */
+		ereport(ERROR,
+				(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
+				 errmsg("UTF16 to UTF8 translation failed: %lu",
+						GetLastError())));
+
+	result = palloc(nbytes+VARHDRSZ);
+
+	r = WideCharToMultiByte(CP_UTF8, 0, str, -1, VARDATA(result), nbytes,
+							NULL, NULL);
+	if (r == 0)					/* shouldn't happen */
+		ereport(ERROR,
+				(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
+				 errmsg("UTF16 to UTF8 translation failed: %lu",
+						GetLastError())));
+
+	VARATT_SIZEP(result) = nbytes + VARHDRSZ - 1; /* -1 to ignore null */
+
+	return result;
+}
+
+/* interface layer to check which encoding is in use */
+
+static wchar_t *
+win32_texttowcs(const text *txt)
+{
+	if (GetDatabaseEncoding() == PG_UTF8)
+		return win32_utf8_texttowcs(txt);
+	else
+		return texttowcs(txt);
+}
+
+static text *
+win32_wcstotext(const wchar_t *str, int ncodes)
+{
+	if (GetDatabaseEncoding() == PG_UTF8)
+		return win32_utf8_wcstotext(str);
+	else
+		return wcstotext(str, ncodes);
+}
+
+/* use macros to cause routines below to call interface layer */
+
+#define texttowcs	win32_texttowcs
+#define wcstotext	win32_wcstotext
+
+#endif /* WIN32 */
+
+
 /********************************************************************
 *
 * lower

--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/adt/varlena.c,v 1.131 2005/08/02 16:11:57 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/adt/varlena.c,v 1.132 2005/08/24 17:50:00 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -849,6 +849,8 @@ varstr_cmp(char *arg1, int len1, char *arg2, int len2)
 		char	   *a1p,
 				   *a2p;

+#ifndef WIN32
+
 		if (len1 >= STACKBUFLEN)
 			a1p = (char *) palloc(len1 + 1);
 		else
@@ -865,10 +867,87 @@ varstr_cmp(char *arg1, int len1, char *arg2, int len2)

 		result = strcoll(a1p, a2p);

-		if (len1 >= STACKBUFLEN)
+		if (a1p != a1buf)
 			pfree(a1p);
-		if (len2 >= STACKBUFLEN)
+		if (a2p != a2buf)
 			pfree(a2p);
+
+#else /* WIN32 */
+
+		/* Win32 does not have UTF-8, so we need to map to UTF-16 */
+		if (GetDatabaseEncoding() == PG_UTF8)
+		{
+			int a1len;
+			int a2len;
+			int r;
+
+			if (len1 >= STACKBUFLEN/2)
+			{
+				a1len = len1 * 2 + 2;
+				a1p = palloc(a1len);
+			}
+			else
+			{
+				a1len = STACKBUFLEN;
+				a1p = a1buf;
+			}
+			if (len2 >= STACKBUFLEN/2)
+			{
+				a2len = len2 * 2 + 2;
+				a2p = palloc(a2len);
+			}
+			else
+			{
+				a2len = STACKBUFLEN;
+				a2p = a2buf;
+			}
+
+			/* stupid Microsloth API does not work for zero-length input */
+			if (len1 == 0)
+				r = 0;
+			else
+			{
+				r = MultiByteToWideChar(CP_UTF8, 0, arg1, len1,
+										(LPWSTR) a1p, a1len/2);
+				if (!r)
+					ereport(ERROR,
+							(errmsg("could not convert string to UTF16: %lu",
+									GetLastError())));
+			}
+			((LPWSTR) a1p)[r] = 0;
+
+			if (len2 == 0)
+				r = 0;
+			else
+			{
+				r = MultiByteToWideChar(CP_UTF8, 0, arg2, len2,
+										(LPWSTR) a2p, a2len/2);
+				if (!r)
+					ereport(ERROR,
+							(errmsg("could not convert string to UTF16: %lu",
+									GetLastError())));
+			}
+			((LPWSTR) a2p)[r] = 0;
+
+			errno = 0;
+			result = wcscoll((LPWSTR) a1p, (LPWSTR) a2p);
+			if (result == 2147483647) /* _NLSCMPERROR; missing from mingw headers */
+				ereport(ERROR,
+						(errmsg("could not compare unicode strings: %d",
+								errno)));
+
+			if (a1p != a1buf)
+				pfree(a1p);
+			if (a2p != a2buf)
+				pfree(a2p);
+
+			return result;
+		}
+
+		/* Win32 has strncoll(), so use it to avoid copying */
+		return _strncoll(arg1, arg2, Min(len1, len2));
+
+#endif /* WIN32 */
 	}
 	else
 	{
@@ -2000,7 +2079,7 @@ replace_text(PG_FUNCTION_ARGS)

 /*
 * check_replace_text_has_escape_char
- * check whether replace_text has escape char. 
+ * check whether replace_text has escape char.
 */
 static bool
 check_replace_text_has_escape_char(const text *replace_text)
@@ -2175,7 +2254,7 @@ replace_text_regexp(PG_FUNCTION_ARGS)

 		/*
 		 * Copy the replace_text. Process back references when the
-		 * replace_text has escape characters. 
+		 * replace_text has escape characters.
 		 */
 		if (have_escape)
 			appendStringInfoRegexpSubstr(str, replace_text, pmatch, src_text);
@@ -2573,7 +2652,7 @@ md5_bytea(PG_FUNCTION_ARGS)
 	PG_RETURN_TEXT_P(result_text);
 }

-/* 
+/*
 * Return the size of a datum, possibly compressed
 *
 * Works on any data type