diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c index 80e113329cef6dd7f99c27ccfa97a2cd8053b340..c746c71d6c9fdb2438c127bffcc88214e54a96fd 100644 --- a/src/backend/utils/adt/xml.c +++ b/src/backend/utils/adt/xml.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/backend/utils/adt/xml.c,v 1.93 2009/08/10 05:46:50 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/utils/adt/xml.c,v 1.94 2009/09/04 10:49:29 heikki Exp $ * *------------------------------------------------------------------------- */ @@ -109,7 +109,7 @@ static int parse_xml_decl(const xmlChar *str, size_t *lenp, static bool print_xml_decl(StringInfo buf, const xmlChar *version, pg_enc encoding, int standalone); static xmlDocPtr xml_parse(text *data, XmlOptionType xmloption_arg, - bool preserve_whitespace, xmlChar *encoding); + bool preserve_whitespace, int encoding); static text *xml_xmlnodetoxmltype(xmlNodePtr cur); #endif /* USE_LIBXML */ @@ -183,7 +183,7 @@ xml_in(PG_FUNCTION_ARGS) * Parse the data to check if it is well-formed XML data. Assume that * ERROR occurred if parsing failed. */ - doc = xml_parse(vardata, xmloption, true, NULL); + doc = xml_parse(vardata, xmloption, true, GetDatabaseEncoding()); xmlFreeDoc(doc); PG_RETURN_XML_P(vardata); @@ -272,7 +272,8 @@ xml_recv(PG_FUNCTION_ARGS) char *newstr; int nbytes; xmlDocPtr doc; - xmlChar *encoding = NULL; + xmlChar *encodingStr = NULL; + int encoding; /* * Read the data in raw format. We don't know yet what the encoding is, as @@ -293,7 +294,15 @@ xml_recv(PG_FUNCTION_ARGS) str = VARDATA(result); str[nbytes] = '\0'; - parse_xml_decl((xmlChar *) str, NULL, NULL, &encoding, NULL); + parse_xml_decl((xmlChar *) str, NULL, NULL, &encodingStr, NULL); + + /* + * If encoding wasn't explicitly specified in the XML header, treat it as + * UTF-8, as that's the default in XML. This is different from xml_in(), + * where the input has to go through the normal client to server encoding + * conversion. + */ + encoding = encodingStr ? xmlChar_to_encoding(encodingStr) : PG_UTF8; /* * Parse the data to check if it is well-formed XML data. Assume that @@ -305,9 +314,7 @@ xml_recv(PG_FUNCTION_ARGS) /* Now that we know what we're dealing with, convert to server encoding */ newstr = (char *) pg_do_encoding_conversion((unsigned char *) str, nbytes, - encoding ? - xmlChar_to_encoding(encoding) : - PG_UTF8, + encoding, GetDatabaseEncoding()); if (newstr != str) @@ -659,7 +666,8 @@ xmlparse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace) #ifdef USE_LIBXML xmlDocPtr doc; - doc = xml_parse(data, xmloption_arg, preserve_whitespace, NULL); + doc = xml_parse(data, xmloption_arg, preserve_whitespace, + GetDatabaseEncoding()); xmlFreeDoc(doc); return (xmltype *) data; @@ -799,7 +807,8 @@ xml_is_document(xmltype *arg) /* We want to catch ereport(INVALID_XML_DOCUMENT) and return false */ PG_TRY(); { - doc = xml_parse((text *) arg, XMLOPTION_DOCUMENT, true, NULL); + doc = xml_parse((text *) arg, XMLOPTION_DOCUMENT, true, + GetDatabaseEncoding()); result = true; } PG_CATCH(); @@ -1152,7 +1161,7 @@ print_xml_decl(StringInfo buf, const xmlChar *version, */ static xmlDocPtr xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace, - xmlChar *encoding) + int encoding) { int32 len; xmlChar *string; @@ -1165,9 +1174,7 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace, utf8string = pg_do_encoding_conversion(string, len, - encoding ? - xmlChar_to_encoding(encoding) : - GetDatabaseEncoding(), + encoding, PG_UTF8); /* Start up libxml and its parser (no-ops if already done) */