Thanks to the generous support of Torchbox (http://www.torchbox.com), I

have been able to significantly improve the contrib/xml XPath integration code. New features: * XPath set-returning function allows multiple results from an several XPath queries to be used as a virtual table. * Using libxslt, XSLT transformations (with and without parameters) are supported. (Caution: This support allows generic URL fetching from within the backend as well). I've removed the old code so that it is all libxml based. Rather than attach as a patch, I've put the tar.gz (10k!) at http://www.azuli.co.uk/pgxml-1.0.tar.gz (all files in archive are xml/....). I think this is worth replacing the contrib version with, even though the function names have changed (though the same functionality is there), because it includes a SRF and some SPI usage, in addition to linking to an external library. And it isn't a big module! Obviously, I understand that people might prefer to move it elsewhere, or might have reservations about replacing an existing contrib module with an incompatible one. I'm open to suggestions. John Gray

Thanks to the generous support of Torchbox (http://www.torchbox.com), I
have been able to significantly improve the contrib/xml XPath integration code. New features: * XPath set-returning function allows multiple results from an several XPath queries to be used as a virtual table. * Using libxslt, XSLT transformations (with and without parameters) are supported. (Caution: This support allows generic URL fetching from within the backend as well). I've removed the old code so that it is all libxml based. Rather than attach as a patch, I've put the tar.gz (10k!) at http://www.azuli.co.uk/pgxml-1.0.tar.gz (all files in archive are xml/....). I think this is worth replacing the contrib version with, even though the function names have changed (though the same functionality is there), because it includes a SRF and some SPI usage, in addition to linking to an external library. And it isn't a big module! Obviously, I understand that people might prefer to move it elsewhere, or might have reservations about replacing an existing contrib module with an incompatible one. I'm open to suggestions. John Gray
adca025c · Bruce Momjian · 19739718 · adca025c · adca025c · 19739718
10 changed file
--- a/contrib/xml/Makefile
+++ b/contrib/xml/Makefile
-# $PostgreSQL: pgsql/contrib/xml/Makefile,v 1.4 2003/11/29 19:51:36 pgsql Exp $
-
+# This makefile will build the new XML and XSLT routines.
 subdir = contrib/xml
-top_builddir = ../..
+top_builddir = ../../
 include $(top_builddir)/src/Makefile.global

-MODULE_big = pgxml_dom
-OBJS = pgxml_dom.o
-SHLIB_LINK = -lxml2
-DATA_built = pgxml_dom.sql
+MODULE_big = pgxml
+
+# Remove xslt_proc.o from the following line if you don't have libxslt
+OBJS = xpath.o xslt_proc.o
+
+# Remove -lxslt from the following line if you don't have libxslt.
+SHLIB_LINK = -lxml2 -lxslt
+
+DATA_built = pgxml.sql
 DOCS = README.pgxml

-include $(top_srcdir)/contrib/contrib-global.mk
+include $(top_builddir)contrib/contrib-global.mk
+
--- a/contrib/xml/README.pgxml
+++ b/contrib/xml/README.pgxml
-This package contains some simple routines for manipulating XML
-documents stored in PostgreSQL. This is a work-in-progress and
-somewhat basic at the moment (see the file TODO for some outline of
-what remains to be done).
+XML-handling functions for PostgreSQL
+=====================================

-At present, two modules (based on different XML handling libraries)
-are provided.
+Development of this module was sponsored by Torchbox Ltd. (www.torchbox.com)

-Prerequisite:
+This version of the XML functions provides both XPath querying and
+XSLT functionality. There is also a new table function which allows
+the straightforward return of multiple XML results. Note that the current code
+doesn't take any particular care over character sets - this is
+something that should be fixed at some point!

-pgxml.c:
-expat parser 1.95.0 or newer (http://expat.sourceforge.net)
+Installation
+------------

-or
+The current build process will only work if the files are in
+contrib/xml in a PostgreSQL 7.3 or 7.4 source tree which has been
+configured and built (If you alter the subdir value in the Makefile
+you can place it in a different directory in a PostgreSQL tree).

-pgxml_dom.c:
-libxml2 (http://xmlsoft.org)
+Before you begin, just check the Makefile, and then just 'make' and
+'make install'.

-The libxml2 version provides more complete XPath functionality, and
-seems like a good way to go. I've left the old versions in there for
-comparison.
+This code requires libxml to be previously installed.

-Compiling and loading:
----------------------
+Description of functions
+------------------------

-The Makefile only builds the libxml2 version.
+The first set of functions are straightforward XML parsing and XPath queries:

-To compile, just type make.
+pgxml_parse(document) RETURNS bool

-Then you can use psql to load the two function definitions: 
-\i pgxml_dom.sql
+This parses the document text in its parameter and returns true if the
+document is well-formed XML.

+xpath_string(document,query) RETURNS text
+xpath_number(document,query) RETURNS float4
+xpath_bool(document,query) RETURNS bool

-Function documentation and usage:
---------------------------------
+These functions evaluate the XPath query on the supplied document, and
+cast the result to the specified type.

-pgxml_parse(text) returns bool
-  parses the provided text and returns true or false if it is 
-well-formed or not. It returns NULL if the parser couldn't be
-created for any reason.

-pgxml_xpath (XQuery functions) - differs between the versions:
+xpath_nodeset(document,query,toptag,itemtag) RETURNS text

-pgxml.c (expat version) has:
+This evaluates query on document and wraps the result in XML tags. If
+the result is multivalued, the output will look like:

-pgxml_xpath(text doc, text xpath, int n) returns text
-  parses doc and returns the cdata of the nth occurence of
-the "simple path" entry. 
+<toptag>
+<itemtag>Value 1 which could be an XML fragment</itemtag>
+<itemtag>Value 2....</itemtag>
+</toptag>

-However, the remainder of this document will cover the pgxml_dom.c version.
+If either toptag or itemtag is an empty string, the relevant tag is omitted.
+There are also wrapper functions for this operation:

-pgxml_xpath(text doc, text xpath, text toptag, text septag) returns text
-  evaluates xpath on doc, and returns the result wrapped in
-<toptag>...</toptag> and each result node wrapped in
-<septag></septag>. toptag and septag may be empty strings, in which
-case the respective tag will be omitted.
+xpath_nodeset(document,query) RETURNS text omits both tags.
+xpath_nodeset(document,query,itemtag) RETURNS text omits toptag.

-Example:

-Given a  table docstore:
+xpath_list(document,query,seperator) RETURNS text

- Attribute |  Type   | Modifier 
-----------+---------+----------
- docid     | integer | 
- document  | text    | 
+This function returns multiple values seperated by the specified
+seperator, e.g. Value 1,Value 2,Value 3 if seperator=','.

-containing documents such as (these are archaeological site
-descriptions, in case anyone is wondering):
+xpath_list(document,query) RETURNS text

-<?XML version="1.0"?>
-<site provider="Foundations" sitecode="ak97" version="1">
-   <name>Church Farm, Ashton Keynes</name>
-   <invtype>watching brief</invtype>
-   <location scheme="osgb">SU04209424</location>
-</site>
+This is a wrapper for the above function that uses ',' as the seperator.

-one can type:

-select docid, 
-pgxml_xpath(document,'//site/name/text()','','') as sitename,
-pgxml_xpath(document,'//site/location/text()','','') as location
- from docstore;
- 
-and get as output:
+xpath_table
+-----------

- docid |               sitename               |  location  
-------+--------------------------------------+------------
-     1 | Church Farm, Ashton Keynes           | SU04209424
-     2 | Glebe Farm, Long Itchington          | SP41506500
-     3 | The Bungalow, Thames Lane, Cricklade | SU10229362
-(3 rows)
+This is a table function which evaluates a set of XPath queries on
+each of a set of documents and returns the results as a table. The
+primary key field from the original document table is returned as the
+first column of the result so that the resultset from xpath_table can
+be readily used in joins.

-or, to illustrate the use of the extra tags:
+The function itself takes 5 arguments, all text.

-select docid as id,
-pgxml_xpath(document,'//find/type/text()','set','findtype') 
-from docstore;
+xpath_table(key,document,relation,xpaths,criteria)

- id |                               pgxml_xpath                               
----+-------------------------------------------------------------------------
-  1 | <set></set>
-  2 | <set><findtype>Urn</findtype></set>
-  3 | <set><findtype>Pottery</findtype><findtype>Animal bone</findtype></set>
-(3 rows)
+key - the name of the "key" field - this is just a field to be used as
+the first column of the output table i.e. it identifies the record from
+which each output row came.

-Which produces a new, well-formed document. Note that document 1 had
-no matching instances, so the set returned contains no
-elements. document 2 has 1 matching element and document 3 has 2.
+document - the name of the field containing the XML document

-This is just scratching the surface because XPath allows all sorts of
-operations.
+relation - the name of the table or view containing the documents

-Note: I've only implemented the return of nodeset and string values so
-far. This covers (I think) many types of queries, however.
+xpaths - multiple xpath expressions separated by |

-John Gray <jgray@azuli.co.uk>  16 August 2001
+criteria - The contents of the where clause. This needs to be specified,
+so use "true" or "1=1" here if you want to process all the rows in the
+relation.

+NB These parameters (except the XPath strings) are just substituted
+into a plain SQL SELECT statement, so you have some flexibility - the
+statement is

+SELECT <key>,<document> FROM <relation> WHERE <criteria>
+
+so those parameters can be *anything* valid in those particular
+locations. The result from this SELECT needs to return exactly two
+columns (which it will unless you try to list multiple fields for key
+or document). Beware that this simplistic approach requires that you
+validate any user-supplied values to avoid SQL injection attacks.
+
+Using the function
+
+The function has to be used in a FROM expression. This gives the following
+form:
+
+SELECT * FROM
+xpath_table('article_id', 
+	'article_xml',
+	'articles', 
+	'/article/author|/article/pages|/article/title',
+	'date_entered > ''2003-01-01'' ') 
+AS t(article_id integer, author text, page_count integer, title text);
+
+The AS clause defines the names and types of the columns in the
+virtual table. If there are more XPath queries than result columns,
+the extra queries will be ignored. If there are more result columns
+than XPath queries, the extra columns will be NULL.
+
+Note that I've said in this example that pages is an integer.  The
+function deals internally with string representations, so when you say
+you want an integer in the output, it will take the string
+representation of the XPath result and use PostgreSQL input functions
+to transform it into an integer (or whatever type the AS clause
+requests). An error will result if it can't do this - for example if
+the result is empty - so you may wish to just stick to 'text' as the
+column type if you think your data has any problems.
+
+The select statement doesn't need to use * alone - it can reference the
+columns by name or join them to other tables. The function produces a
+virtual table with which you can perform any operation you wish (e.g.
+aggregation, joining, sorting etc). So we could also have:
+
+SELECT t.title, p.fullname, p.email 
+FROM xpath_table('article_id','article_xml','articles',
+            '/article/title|/article/author/@id',
+            'xpath_string(article_xml,''/article/@date'') > ''2003-03-20'' ')
+            AS t(article_id integer, title text, author_id integer), 
+     tblPeopleInfo AS p 
+WHERE t.author_id = p.person_id;
+
+as a more complicated example. Of course, you could wrap all
+of this in a view for convenience.
+
+XSLT functions
+--------------
+
+The following functions are available if libxslt is installed (this is
+not currently detected automatically, so you will have to amend the
+Makefile)
+
+xslt_process(document,stylesheet,paramlist) RETURNS text
+
+This function appplies the XSL stylesheet to the document and returns
+the transformed result. The paramlist is a list of parameter
+assignments to be used in the transformation, specified in the form
+'a=1,b=2'. Note that this is also proof-of-concept code and the
+parameter parsing is very simple-minded (e.g. parameter values cannot
+contain commas!)
+
+Also note that if either the document or stylesheet values do not
+begin with a < then they will be treated as URLs and libxslt will
+fetch them. It thus follows that you can use xslt_process as a means
+to fetch the contents of URLs - you should be aware of the security
+implications of this.
+
+There is also a two-parameter version of xslt_process which does not
+pass any parameters to the transformation.
+
+If you have any comments or suggestions, please do contact me at
+jgray@azuli.co.uk. Unfortunately, this isn't my main job, so I can't
+guarantee a rapid response to your query!
--- a/contrib/xml/TODO
+++ b/contrib/xml/TODO
-PGXML TODO List
-===============
-
-Some of these items still require much more thought! Since the first
-release, the XPath support has improved (because I'm no longer using a
-homemade algorithm!).
-
-1. Performance considerations
-
-At present each document is parsed to produce the DOM tree on every query.
-
-Pros: 
-	Easy
-	No persistent memory or storage allocation for parsed trees
-		(libxml docs suggest representation of a document might
-		 be 4 times the size of the text)
-
-Cons:
-	Slow/ CPU intensive to parse.
-	Makes it difficult for PLs to apply libxml manipulations to create
-		new documents or amend existing ones.
-
-
-2. XQuery 
-
-I'm not sure if the addition of XQuery would be best as a function or
-as a new front-end parser. This is one to think about, but with a
-decent implementation of XPath, one of the prerequisites is covered.
-
-3. DOM Interfaces
-
-Expose more aspects of the DOM to user functions/ PLs. This would
-allow a procedure in a PL to run some queries and then use exposed
-interfaces to libxml to create an XML document out of the query
-results. I accept the argument that this might be more properly
-performed on the client side.
-
-4. Returning sets of documents from XPath queries.
-
-Although the current implementation allows you to amalgamate the
-returned results into a single document, it's quite possible that
-you'd like to use the returned set of nodes as a source for FROM.
- 
-Is there a good way to optimise/index the results of certain XPath
-operations to make them faster?:
-
-select docid, pgxml_xpath(document,'//site/location/text()','','') as location 
-where pgxml_xpath(document,'//site/name/text()','','') = 'Church Farm';
-
-and with multiple element occurences in a document?
-
-select d.docid, pgxml_xpath(d.document,'//site/location/text()','','') 
-from docstore d, 
-pgxml_xpaths('docstore','document','//feature/type/text()','docid') ft 
-where ft.key = d.docid and ft.value ='Limekiln';
-
-pgxml_xpaths params are relname, attrname, xpath, returnkey. It would
-return a set of two-element tuples (key,value) consisting of the value of
-returnkey, and the cdata value of the xpath. The XML document would be
-defined by relname and attrname.
-
-The pgxml_xpaths function could be the basis of a functional index,
-which could speed up the above query very substantially, working
-through the normal query planner mechanism.
-
-5. Return type support.
-
-Better support for returning e.g. numeric or boolean values. I need to
-get to grips with the returned data from libxml first.
-
- 
-John Gray <jgray@azuli.co.uk> 16 August 2001
-
-
-
-
-
-
--- a/contrib/xml/pgxml.c
+++ b/contrib/xml/pgxml.c
-/********************************************************
- * Interface code to parse an XML document using expat
- ********************************************************/
-
-#include "postgres.h"
-#include "fmgr.h"
-
-#include "expat.h"
-#include "pgxml.h"
-
-/* Memory management - we make expat use standard pg MM */
-
-XML_Memory_Handling_Suite mhs;
-
-/* passthrough functions (palloc is a macro) */
-
-static void *
-pgxml_palloc(size_t size)
-{
-	return palloc(size);
-}
-
-static void *
-pgxml_repalloc(void *ptr, size_t size)
-{
-	return repalloc(ptr, size);
-}
-
-static void
-pgxml_pfree(void *ptr)
-{
-	return pfree(ptr);
-}
-
-static void
-pgxml_mhs_init()
-{
-	mhs.malloc_fcn = pgxml_palloc;
-	mhs.realloc_fcn = pgxml_repalloc;
-	mhs.free_fcn = pgxml_pfree;
-}
-
-static void
-pgxml_handler_init()
-{
-	/*
-	 * This code should set up the relevant handlers from  user-supplied
-	 * settings. Quite how these settings are made is another matter :)
-	 */
-}
-
-/* Returns true if document is well-formed */
-
-PG_FUNCTION_INFO_V1(pgxml_parse);
-
-Datum
-pgxml_parse(PG_FUNCTION_ARGS)
-{
-	/* called as pgxml_parse(document) */
-	XML_Parser	p;
-	text	   *t = PG_GETARG_TEXT_P(0);		/* document buffer */
-	int32		docsize = VARSIZE(t) - VARHDRSZ;
-
-	pgxml_mhs_init();
-
-	pgxml_handler_init();
-
-	p = XML_ParserCreate_MM(NULL, &mhs, NULL);
-	if (!p)
-	{
-		ereport(ERROR,
-				(errcode(ERRCODE_EXTERNAL_ROUTINE_EXCEPTION),
-				 errmsg("could not create expat parser")));
-		PG_RETURN_NULL();		/* seems appropriate if we couldn't parse */
-	}
-
-	if (!XML_Parse(p, (char *) VARDATA(t), docsize, 1))
-	{
-		/*
-		 * elog(WARNING, "Parse error at line %d:%s",
-		 * XML_GetCurrentLineNumber(p),
-		 * XML_ErrorString(XML_GetErrorCode(p)));
-		 */
-		XML_ParserFree(p);
-		PG_RETURN_BOOL(false);
-	}
-
-	XML_ParserFree(p);
-	PG_RETURN_BOOL(true);
-}
-
-/* XPath handling functions */
-
-/* XPath support here is for a very skeletal kind of XPath!
-   It was easy to program though... */
-
-/* This first is the core function that builds a result set. The
-   actual functions called by the user manipulate that result set
-   in various ways.
-*/
-
-static XPath_Results *
-build_xpath_results(text *doc, text *pathstr)
-{
-	XPath_Results *xpr;
-	char	   *res;
-	pgxml_udata *udata;
-	XML_Parser	p;
-	int32		docsize;
-
-	xpr = (XPath_Results *) palloc((sizeof(XPath_Results)));
-	memset((void *) xpr, 0, sizeof(XPath_Results));
-	xpr->rescount = 0;
-
-	docsize = VARSIZE(doc) - VARHDRSZ;
-
-	/* res isn't going to be the real return type, it is just a buffer */
-
-	res = (char *) palloc(docsize);
-	memset((void *) res, 0, docsize);
-
-	xpr->resbuf = res;
-
-	udata = (pgxml_udata *) palloc((sizeof(pgxml_udata)));
-	memset((void *) udata, 0, sizeof(pgxml_udata));
-
-	udata->currentpath[0] = '\0';
-	udata->textgrab = 0;
-
-	udata->path = (char *) palloc(VARSIZE(pathstr));
-	memcpy(udata->path, VARDATA(pathstr), VARSIZE(pathstr) - VARHDRSZ);
-
-	udata->path[VARSIZE(pathstr) - VARHDRSZ] = '\0';
-
-	udata->resptr = res;
-	udata->reslen = 0;
-
-	udata->xpres = xpr;
-
-	/* Now fire up the parser */
-	pgxml_mhs_init();
-
-	p = XML_ParserCreate_MM(NULL, &mhs, NULL);
-	if (!p)
-	{
-		ereport(ERROR,
-				(errcode(ERRCODE_EXTERNAL_ROUTINE_EXCEPTION),
-				 errmsg("could not create expat parser")));
-		pfree(xpr);
-		pfree(udata->path);
-		pfree(udata);
-		pfree(res);
-		return NULL;
-	}
-	XML_SetUserData(p, (void *) udata);
-
-	/* Set the handlers */
-
-	XML_SetElementHandler(p, pgxml_starthandler, pgxml_endhandler);
-	XML_SetCharacterDataHandler(p, pgxml_charhandler);
-
-	if (!XML_Parse(p, (char *) VARDATA(doc), docsize, 1))
-	{
-		/*
-		 * elog(WARNING, "Parse error at line %d:%s",
-		 * XML_GetCurrentLineNumber(p),
-		 * XML_ErrorString(XML_GetErrorCode(p)));
-		 */
-		XML_ParserFree(p);
-		pfree(xpr);
-		pfree(udata->path);
-		pfree(udata);
-
-		return NULL;
-	}
-
-	pfree(udata->path);
-	pfree(udata);
-	XML_ParserFree(p);
-	return xpr;
-}
-
-
-PG_FUNCTION_INFO_V1(pgxml_xpath);
-
-Datum
-pgxml_xpath(PG_FUNCTION_ARGS)
-{
-	/* called as pgxml_xpath(document,pathstr, index) for the moment */
-
-	XPath_Results *xpresults;
-	text	   *restext;
-
-	text	   *t = PG_GETARG_TEXT_P(0);		/* document buffer */
-	text	   *t2 = PG_GETARG_TEXT_P(1);
-	int32		ind = PG_GETARG_INT32(2) - 1;
-
-	xpresults = build_xpath_results(t, t2);
-
-	/*
-	 * This needs to be changed depending on the mechanism for returning
-	 * our set of results.
-	 */
-
-	if (xpresults == NULL)		/* parse error (not WF or parser failure) */
-		PG_RETURN_NULL();
-
-	if (ind >= (xpresults->rescount))
-		PG_RETURN_NULL();
-
-	restext = (text *) palloc(xpresults->reslens[ind] + VARHDRSZ);
-	memcpy(VARDATA(restext), xpresults->results[ind], xpresults->reslens[ind]);
-
-	VARATT_SIZEP(restext) = xpresults->reslens[ind] + VARHDRSZ;
-
-	pfree(xpresults->resbuf);
-	pfree(xpresults);
-
-	PG_RETURN_TEXT_P(restext);
-}
-
-
-static void
-pgxml_pathcompare(void *userData)
-{
-	char	   *matchpos;
-
-	matchpos = strstr(UD->currentpath, UD->path);
-
-	if (matchpos == NULL)
-	{							/* Should we have more logic here ? */
-		if (UD->textgrab)
-		{
-			UD->textgrab = 0;
-			pgxml_finalisegrabbedtext(userData);
-		}
-		return;
-	}
-
-	/*
-	 * OK, we have a match of some sort. Now we need to check that our
-	 * match is anchored to the *end* of the string AND that it is
-	 * immediately preceded by a '/'
-	 */
-
-	/*
-	 * This test wouldn't work if strlen (UD->path) overran the length of
-	 * the currentpath, but that's not possible because we got a match!
-	 */
-
-	if ((matchpos + strlen(UD->path))[0] == '\0')
-	{
-		if ((UD->path)[0] == '/')
-		{
-			if (matchpos == UD->currentpath)
-				UD->textgrab = 1;
-		}
-		else
-		{
-			if ((matchpos - 1)[0] == '/')
-				UD->textgrab = 1;
-		}
-	}
-}
-
-static void
-pgxml_starthandler(void *userData, const XML_Char * name,
-				   const XML_Char ** atts)
-{
-
-	char		sepstr[] = "/";
-
-	if ((strlen(name) + strlen(UD->currentpath)) > MAXPATHLENGTH - 2)
-		elog(WARNING, "path too long");
-	else
-	{
-		strncat(UD->currentpath, sepstr, 1);
-		strcat(UD->currentpath, name);
-	}
-	if (UD->textgrab)
-	{
-		/*
-		 * Depending on user preference, should we "reconstitute" the
-		 * element into the result text?
-		 */
-	}
-	else
-		pgxml_pathcompare(userData);
-}
-
-static void
-pgxml_endhandler(void *userData, const XML_Char * name)
-{
-	/*
-	 * Start by removing the current element off the end of the
-	 * currentpath
-	 */
-
-	char	   *sepptr;
-
-	sepptr = strrchr(UD->currentpath, '/');
-	if (sepptr == NULL)
-	{
-		/* internal error */
-		elog(ERROR, "did not find '/'");
-		sepptr = UD->currentpath;
-	}
-	if (strcmp(name, sepptr + 1) != 0)
-	{
-		elog(WARNING, "wanted [%s], got [%s]", sepptr, name);
-		/* unmatched entry, so do nothing */
-	}
-	else
-	{
-		sepptr[0] = '\0';		/* Chop that element off the end */
-	}
-
-	if (UD->textgrab)
-		pgxml_pathcompare(userData);
-
-}
-
-static void
-pgxml_charhandler(void *userData, const XML_Char * s, int len)
-{
-	if (UD->textgrab)
-	{
-		if (len > 0)
-		{
-			memcpy(UD->resptr, s, len);
-			UD->resptr += len;
-			UD->reslen += len;
-		}
-	}
-}
-
-/* Should I be using PG list types here? */
-
-static void
-pgxml_finalisegrabbedtext(void *userData)
-{
-	/* In res/reslen, we have a single result. */
-	UD->xpres->results[UD->xpres->rescount] = UD->resptr - UD->reslen;
-	UD->xpres->reslens[UD->xpres->rescount] = UD->reslen;
-	UD->reslen = 0;
-	UD->xpres->rescount++;
-
-	/*
-	 * This effectively concatenates all the results together but we do
-	 * know where one ends and the next begins
-	 */
-}
--- a/contrib/xml/pgxml.h
+++ b/contrib/xml/pgxml.h
-/* Header for pg xml parser interface */
-
-static void *pgxml_palloc(size_t size);
-static void *pgxml_repalloc(void *ptr, size_t size);
-static void pgxml_pfree(void *ptr);
-static void pgxml_mhs_init();
-static void pgxml_handler_init();
-Datum		pgxml_parse(PG_FUNCTION_ARGS);
-Datum		pgxml_xpath(PG_FUNCTION_ARGS);
-static void pgxml_starthandler(void *userData, const XML_Char * name,
-				   const XML_Char ** atts);
-static void pgxml_endhandler(void *userData, const XML_Char * name);
-static void pgxml_charhandler(void *userData, const XML_Char * s, int len);
-static void pgxml_pathcompare(void *userData);
-static void pgxml_finalisegrabbedtext(void *userData);
-
-#define MAXPATHLENGTH 512
-#define MAXRESULTS 100
-
-
-typedef struct
-{
-	int			rescount;
-	char	   *results[MAXRESULTS];
-	int32		reslens[MAXRESULTS];
-	char	   *resbuf;			/* pointer to the result buffer for pfree */
-}	XPath_Results;
-
-
-
-typedef struct
-{
-	char		currentpath[MAXPATHLENGTH];
-	char	   *path;
-	int			textgrab;
-	char	   *resptr;
-	int32		reslen;
-	XPath_Results *xpres;
-}	pgxml_udata;
-
-
-#define UD ((pgxml_udata *) userData)
--- a/contrib/xml/pgxml.sql.in
+++ b/contrib/xml/pgxml.sql.in
-- SQL for XML parser
+--SQL for XML parser

-- Adjust this setting to control where the objects get created.
-SET search_path TO public;
+CREATE OR REPLACE FUNCTION pgxml_parse(text) RETURNS bool
+	AS 'MODULE_PATHNAME' LANGUAGE 'c' WITH (isStrict);

-CREATE OR REPLACE FUNCTION pgxml_parse(text) RETURNS boolean
-    AS 'MODULE_PATHNAME' LANGUAGE c STRICT;
+CREATE OR REPLACE FUNCTION xpath_string(text,text) RETURNS text
+	AS 'MODULE_PATHNAME' LANGUAGE 'c' WITH (isStrict);

-CREATE OR REPLACE FUNCTION pgxml_xpath(text, text, text, text) RETURNS text
-    AS 'MODULE_PATHNAME' LANGUAGE c STRICT;
+CREATE OR REPLACE FUNCTION xpath_nodeset(text,text,text,text) RETURNS text
+	AS 'MODULE_PATHNAME' LANGUAGE 'c' WITH (isStrict);
+
+CREATE OR REPLACE FUNCTION xpath_number(text,text) RETURNS float4
+	AS 'MODULE_PATHNAME' LANGUAGE 'c' WITH (isStrict);
+
+CREATE OR REPLACE FUNCTION xpath_bool(text,text) RETURNS boolean
+	AS 'MODULE_PATHNAME' LANGUAGE 'c' WITH (isStrict);
+
+-- List function
+
+CREATE OR REPLACE FUNCTION xpath_list(text,text,text) RETURNS text
+	AS 'MODULE_PATHNAME'
+	LANGUAGE 'c' WITH (isStrict);
+
+
+CREATE OR REPLACE FUNCTION xpath_list(text,text) RETURNS text 
+AS 'SELECT xpath_list($1,$2,'','')' language 'SQL' WITH (isStrict);
+
+
+
+-- Wrapper functions for nodeset where no tags needed.
+
+
+CREATE OR REPLACE FUNCTION xpath_nodeset(text,text) RETURNS text AS
+'SELECT xpath_nodeset($1,$2,'''','''')' language 'SQL' WITH (isStrict);
+
+
+CREATE OR REPLACE FUNCTION xpath_nodeset(text,text,text) RETURNS text AS
+'SELECT xpath_nodeset($1,$2,'''',$3)' language 'SQL' WITH (isStrict);
+
+-- Table function
+
+CREATE OR REPLACE FUNCTION xpath_table(text,text,text,text,text) RETURNS setof record
+	AS 'MODULE_PATHNAME'
+	LANGUAGE 'c' WITH (isStrict);
+
+-- XSLT functions
+-- Delete from here to the end of the file if you are not compiling with
+-- XSLT support.
+
+
+CREATE OR REPLACE FUNCTION xslt_process(text,text,text) RETURNS text 
+	AS 'MODULE_PATHNAME' LANGUAGE 'c' WITH (isStrict);
+
+-- the function checks for the correct argument count
+
+CREATE OR REPLACE FUNCTION xslt_process(text,text) RETURNS text 
+	AS 'MODULE_PATHNAME' LANGUAGE 'c' WITH (isStrict);
--- a/contrib/xml/pgxml_dom.c
+++ b/contrib/xml/pgxml_dom.c
-/* Parser interface for DOM-based parser (libxml) rather than
-   stream-based SAX-type parser */
-
-#include "postgres.h"
-#include "fmgr.h"
-
-/* libxml includes */
-
-#include <libxml/xpath.h>
-#include <libxml/tree.h>
-#include <libxml/xmlmemory.h>
-
-/* declarations */
-
-static void *pgxml_palloc(size_t size);
-static void *pgxml_repalloc(void *ptr, size_t size);
-static void pgxml_pfree(void *ptr);
-static char *pgxml_pstrdup(const char *string);
-
-static void pgxml_parser_init();
-
-static xmlChar *pgxmlNodeSetToText(xmlNodeSetPtr nodeset, xmlDocPtr doc,
-				   xmlChar * toptagname, xmlChar * septagname,
-				   int format);
-
-static xmlChar *pgxml_texttoxmlchar(text *textstring);
-
-
-Datum		pgxml_parse(PG_FUNCTION_ARGS);
-Datum		pgxml_xpath(PG_FUNCTION_ARGS);
-
-/* memory handling passthrough functions (e.g. palloc, pstrdup are
-   currently macros, and the others might become so...) */
-
-static void *
-pgxml_palloc(size_t size)
-{
-	return palloc(size);
-}
-
-static void *
-pgxml_repalloc(void *ptr, size_t size)
-{
-	return repalloc(ptr, size);
-}
-
-static void
-pgxml_pfree(void *ptr)
-{
-	return pfree(ptr);
-}
-
-static char *
-pgxml_pstrdup(const char *string)
-{
-	return pstrdup(string);
-}
-
-static void
-pgxml_parser_init()
-{
-	/*
-	 * This code should also set parser settings from  user-supplied info.
-	 * Quite how these settings are made is another matter :)
-	 */
-
-	xmlMemSetup(pgxml_pfree, pgxml_palloc, pgxml_repalloc, pgxml_pstrdup);
-	xmlInitParser();
-
-}
-
-
-/* Returns true if document is well-formed */
-
-PG_FUNCTION_INFO_V1(pgxml_parse);
-
-Datum
-pgxml_parse(PG_FUNCTION_ARGS)
-{
-	/* called as pgxml_parse(document) */
-	xmlDocPtr	doctree;
-	text	   *t = PG_GETARG_TEXT_P(0);		/* document buffer */
-	int32		docsize = VARSIZE(t) - VARHDRSZ;
-
-	pgxml_parser_init();
-
-	doctree = xmlParseMemory((char *) VARDATA(t), docsize);
-	if (doctree == NULL)
-	{
-		xmlCleanupParser();
-		PG_RETURN_BOOL(false);	/* i.e. not well-formed */
-	}
-	xmlCleanupParser();
-	xmlFreeDoc(doctree);
-	PG_RETURN_BOOL(true);
-}
-
-static xmlChar
-*
-pgxmlNodeSetToText(xmlNodeSetPtr nodeset,
-				   xmlDocPtr doc,
-				   xmlChar * toptagname,
-				   xmlChar * septagname,
-				   int format)
-{
-	/* Function translates a nodeset into a text representation */
-
-	/*
-	 * iterates over each node in the set and calls xmlNodeDump to write
-	 * it to an xmlBuffer -from which an xmlChar * string is returned.
-	 */
-	/* each representation is surrounded by <tagname> ... </tagname> */
-	/* if format==0, add a newline between nodes?? */
-
-	xmlBufferPtr buf;
-	xmlChar    *result;
-	int			i;
-
-	buf = xmlBufferCreate();
-
-	if ((toptagname != NULL) && (xmlStrlen(toptagname) > 0))
-	{
-		xmlBufferWriteChar(buf, "<");
-		xmlBufferWriteCHAR(buf, toptagname);
-		xmlBufferWriteChar(buf, ">");
-	}
-	if (nodeset != NULL)
-	{
-		for (i = 0; i < nodeset->nodeNr; i++)
-		{
-			if ((septagname != NULL) && (xmlStrlen(septagname) > 0))
-			{
-				xmlBufferWriteChar(buf, "<");
-				xmlBufferWriteCHAR(buf, septagname);
-				xmlBufferWriteChar(buf, ">");
-			}
-			xmlNodeDump(buf, doc, nodeset->nodeTab[i], 1, (format == 2));
-
-			if ((septagname != NULL) && (xmlStrlen(septagname) > 0))
-			{
-				xmlBufferWriteChar(buf, "</");
-				xmlBufferWriteCHAR(buf, septagname);
-				xmlBufferWriteChar(buf, ">");
-			}
-			if (format)
-				xmlBufferWriteChar(buf, "\n");
-		}
-	}
-
-	if ((toptagname != NULL) && (xmlStrlen(toptagname) > 0))
-	{
-		xmlBufferWriteChar(buf, "</");
-		xmlBufferWriteCHAR(buf, toptagname);
-		xmlBufferWriteChar(buf, ">");
-	}
-	result = xmlStrdup(buf->content);
-	xmlBufferFree(buf);
-	return result;
-}
-
-static xmlChar *
-pgxml_texttoxmlchar(text *textstring)
-{
-	xmlChar    *res;
-	int32		txsize;
-
-	txsize = VARSIZE(textstring) - VARHDRSZ;
-	res = (xmlChar *) palloc(txsize + 1);
-	memcpy((char *) res, VARDATA(textstring), txsize);
-	res[txsize] = '\0';
-	return res;
-}
-
-
-PG_FUNCTION_INFO_V1(pgxml_xpath);
-
-Datum
-pgxml_xpath(PG_FUNCTION_ARGS)
-{
-	xmlDocPtr	doctree;
-	xmlXPathContextPtr ctxt;
-	xmlXPathObjectPtr res;
-	xmlChar    *xpath,
-			   *xpresstr,
-			   *toptag,
-			   *septag;
-	xmlXPathCompExprPtr comppath;
-
-	int32		docsize,
-				ressize;
-	text	   *t,
-			   *xpres;
-
-	t = PG_GETARG_TEXT_P(0);	/* document buffer */
-	xpath = pgxml_texttoxmlchar(PG_GETARG_TEXT_P(1));	/* XPath expression */
-	toptag = pgxml_texttoxmlchar(PG_GETARG_TEXT_P(2));
-	septag = pgxml_texttoxmlchar(PG_GETARG_TEXT_P(3));
-
-	docsize = VARSIZE(t) - VARHDRSZ;
-
-	pgxml_parser_init();
-
-	doctree = xmlParseMemory((char *) VARDATA(t), docsize);
-	if (doctree == NULL)
-	{							/* not well-formed */
-		xmlCleanupParser();
-		PG_RETURN_NULL();
-	}
-
-	ctxt = xmlXPathNewContext(doctree);
-	ctxt->node = xmlDocGetRootElement(doctree);
-
-	/* compile the path */
-	comppath = xmlXPathCompile(xpath);
-	if (comppath == NULL)
-	{
-		elog(WARNING, "XPath syntax error");
-		xmlFreeDoc(doctree);
-		pfree((void *) xpath);
-		xmlCleanupParser();
-		PG_RETURN_NULL();
-	}
-
-	/* Now evaluate the path expression. */
-	res = xmlXPathCompiledEval(comppath, ctxt);
-	xmlXPathFreeCompExpr(comppath);
-
-	if (res == NULL)
-	{
-		xmlFreeDoc(doctree);
-		pfree((void *) xpath);
-		xmlCleanupParser();
-		PG_RETURN_NULL();		/* seems appropriate */
-	}
-	/* now we dump this node, ?surrounding by tags? */
-	/* To do this, we look first at the type */
-	switch (res->type)
-	{
-		case XPATH_NODESET:
-			xpresstr = pgxmlNodeSetToText(res->nodesetval,
-										  doctree,
-										  toptag, septag, 0);
-			break;
-		case XPATH_STRING:
-			xpresstr = xmlStrdup(res->stringval);
-			break;
-		default:
-			elog(WARNING, "Unsupported XQuery result: %d", res->type);
-			xpresstr = xmlStrdup("<unsupported/>");
-	}
-
-
-	/* Now convert this result back to text */
-	ressize = strlen(xpresstr);
-	xpres = (text *) palloc(ressize + VARHDRSZ);
-	memcpy(VARDATA(xpres), xpresstr, ressize);
-	VARATT_SIZEP(xpres) = ressize + VARHDRSZ;
-
-	/* Free various storage */
-	xmlFreeDoc(doctree);
-	pfree((void *) xpath);
-	xmlFree(xpresstr);
-	xmlCleanupParser();
-	PG_RETURN_TEXT_P(xpres);
-}
--- a/contrib/xml/pgxml_dom.sql.in
+++ b/contrib/xml/pgxml_dom.sql.in
-- SQL for XML parser
-
-- Adjust this setting to control where the objects get created.
-SET search_path TO public;
-
-CREATE OR REPLACE FUNCTION pgxml_parse(text) RETURNS boolean
-    AS 'MODULE_PATHNAME' LANGUAGE c STRICT;
-
-CREATE OR REPLACE FUNCTION pgxml_xpath(text, text, text, text) RETURNS text
-    AS 'MODULE_PATHNAME' LANGUAGE c STRICT;
--- a/contrib/xml/xpath.c
+++ b/contrib/xml/xpath.c
--- a/contrib/xml/xslt_proc.c
+++ b/contrib/xml/xslt_proc.c
+/* XSLT processing functions (requiring libxslt) */
+/* John Gray, for Torchbox 2003-04-01 */
+
+#include "postgres.h"
+#include "fmgr.h"
+#include "executor/spi.h"
+#include "funcapi.h"
+#include "miscadmin.h"
+
+/* libxml includes */
+
+#include <libxml/xpath.h>
+#include <libxml/tree.h>
+#include <libxml/xmlmemory.h>
+
+/* libxslt includes */
+
+#include <libxslt/xslt.h>
+#include <libxslt/xsltInternals.h>
+#include <libxslt/transform.h>
+#include <libxslt/xsltutils.h>
+
+
+/* declarations to come from xpath.c */
+
+extern void elog_error(int level, char *explain, int force);
+extern void pgxml_parser_init();
+extern xmlChar *pgxml_texttoxmlchar(text *textstring);
+
+#define GET_STR(textp) DatumGetCString(DirectFunctionCall1(textout, PointerGetDatum(textp)))
+
+/* local defs */
+static void parse_params(const char **params, text *paramstr);
+
+Datum xslt_process(PG_FUNCTION_ARGS);
+
+
+#define MAXPARAMS 20
+
+PG_FUNCTION_INFO_V1(xslt_process);
+
+Datum xslt_process(PG_FUNCTION_ARGS) {
+
+
+  const char *params[MAXPARAMS + 1]; /* +1 for the terminator */
+  xsltStylesheetPtr stylesheet = NULL;
+  xmlDocPtr doctree;
+  xmlDocPtr restree;
+  xmlDocPtr ssdoc = NULL;
+  xmlChar *resstr;
+  int resstat;
+  int reslen;
+
+  text *doct  = PG_GETARG_TEXT_P(0);
+  text *ssheet  = PG_GETARG_TEXT_P(1);
+  text *paramstr;
+  text *tres;
+
+
+  if (fcinfo->nargs == 3)
+    {
+      paramstr = PG_GETARG_TEXT_P(2);
+      parse_params(params,paramstr);
+    }
+  else /* No parameters */
+    {
+      params[0] = NULL;
+    }
+
+  /* Setup parser */
+  pgxml_parser_init();
+
+  /* Check to see if document is a file or a literal */
+
+  if (VARDATA(doct)[0] == '<')
+    {
+      doctree = xmlParseMemory((char *) VARDATA(doct), VARSIZE(doct)-VARHDRSZ);
+    } 
+  else 
+    {
+      doctree = xmlParseFile(GET_STR(doct));
+    }
+
+  if (doctree == NULL)
+    {
+      xmlCleanupParser();
+      elog_error(ERROR,"Error parsing XML document",0);
+
+      PG_RETURN_NULL();
+    }
+
+  /* Same for stylesheet */
+  if (VARDATA(ssheet)[0] == '<')
+    {
+      ssdoc = xmlParseMemory((char *) VARDATA(ssheet),
+					    VARSIZE(ssheet)-VARHDRSZ);
+      if (ssdoc == NULL) 
+	{
+	  xmlFreeDoc(doctree);
+	  xmlCleanupParser();
+	  elog_error(ERROR,"Error parsing stylesheet as XML document",0);	  
+	  PG_RETURN_NULL();
+	}
+
+      stylesheet = xsltParseStylesheetDoc(ssdoc);
+    }
+  else 
+   {
+      stylesheet = xsltParseStylesheetFile(GET_STR(ssheet));
+    }
+
+
+  if (stylesheet == NULL)
+    {
+      xmlFreeDoc(doctree);
+      xsltCleanupGlobals();
+      xmlCleanupParser();
+      elog_error(ERROR,"Failed to parse stylesheet",0);
+      PG_RETURN_NULL();
+    }
+
+  restree = xsltApplyStylesheet(stylesheet, doctree, params);
+  resstat = xsltSaveResultToString(&resstr, &reslen, restree, stylesheet);
+
+  xsltFreeStylesheet(stylesheet);
+  xmlFreeDoc(restree);
+  xmlFreeDoc(doctree);
+  
+  xsltCleanupGlobals();
+  xmlCleanupParser();
+  
+  if (resstat < 0) {
+    PG_RETURN_NULL();
+  }
+  
+  tres = palloc(reslen + VARHDRSZ);
+  memcpy(VARDATA(tres),resstr,reslen);
+  VARATT_SIZEP(tres) = reslen + VARHDRSZ;
+  
+  PG_RETURN_TEXT_P(tres);
+}
+
+
+void parse_params(const char **params, text *paramstr)
+{
+  char *pos;
+  char *pstr;
+
+  int i;
+  char *nvsep="=";
+  char *itsep=",";
+
+  pstr = GET_STR(paramstr);
+
+  pos=pstr;
+  
+  for (i=0; i < MAXPARAMS; i++) 
+    {
+      params[i] = pos;
+      pos = strstr(pos,nvsep);
+      if (pos != NULL) {
+	*pos = '\0';
+	pos++;
+      } else {
+	params[i]=NULL;
+	break;
+      }
+      /* Value */
+      i++;
+      params[i]=pos;
+      pos = strstr(pos,itsep);
+      if (pos != NULL) {
+	*pos = '\0';
+	pos++;
+      } else {
+	break;
+      }
+
+    }
+  if (i < MAXPARAMS) 
+    {
+      params[i+1]=NULL;
+    }
+}