diff --git a/contrib/xml/Makefile b/contrib/xml/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..d5526e1c8149028f3c120273b134721459879ed8 --- /dev/null +++ b/contrib/xml/Makefile @@ -0,0 +1,13 @@ +# $Header: /cvsroot/pgsql/contrib/xml/Attic/Makefile,v 1.7 2004/03/05 04:10:11 momjian Exp $ + +subdir = contrib/xml +top_builddir = ../.. +include $(top_builddir)/src/Makefile.global + +MODULE_big = pgxml_dom +OBJS = pgxml_dom.o +SHLIB_LINK = -lxml2 +DATA_built = pgxml_dom.sql +DOCS = README.pgxml + +include $(top_srcdir)/contrib/contrib-global.mk diff --git a/contrib/xml/README.pgxml b/contrib/xml/README.pgxml new file mode 100644 index 0000000000000000000000000000000000000000..6c714f74e120f467f9d07842084da2b02707afac --- /dev/null +++ b/contrib/xml/README.pgxml @@ -0,0 +1,118 @@ +This package contains some simple routines for manipulating XML +documents stored in PostgreSQL. This is a work-in-progress and +somewhat basic at the moment (see the file TODO for some outline of +what remains to be done). + +At present, two modules (based on different XML handling libraries) +are provided. + +Prerequisite: + +pgxml.c: +expat parser 1.95.0 or newer (http://expat.sourceforge.net) + +or + +pgxml_dom.c: +libxml2 (http://xmlsoft.org) + +The libxml2 version provides more complete XPath functionality, and +seems like a good way to go. I've left the old versions in there for +comparison. + +Compiling and loading: +---------------------- + +The Makefile only builds the libxml2 version. + +To compile, just type make. + +Then you can use psql to load the two function definitions: +\i pgxml_dom.sql + + +Function documentation and usage: +--------------------------------- + +pgxml_parse(text) returns bool + parses the provided text and returns true or false if it is +well-formed or not. It returns NULL if the parser couldn't be +created for any reason. + +pgxml_xpath (XQuery functions) - differs between the versions: + +pgxml.c (expat version) has: + +pgxml_xpath(text doc, text xpath, int n) returns text + parses doc and returns the cdata of the nth occurence of +the "simple path" entry. + +However, the remainder of this document will cover the pgxml_dom.c version. + +pgxml_xpath(text doc, text xpath, text toptag, text septag) returns text + evaluates xpath on doc, and returns the result wrapped in +... and each result node wrapped in +. toptag and septag may be empty strings, in which +case the respective tag will be omitted. + +Example: + +Given a table docstore: + + Attribute | Type | Modifier +-----------+---------+---------- + docid | integer | + document | text | + +containing documents such as (these are archaeological site +descriptions, in case anyone is wondering): + + + + Church Farm, Ashton Keynes + watching brief + SU04209424 + + +one can type: + +select docid, +pgxml_xpath(document,'//site/name/text()','','') as sitename, +pgxml_xpath(document,'//site/location/text()','','') as location + from docstore; + +and get as output: + + docid | sitename | location +-------+--------------------------------------+------------ + 1 | Church Farm, Ashton Keynes | SU04209424 + 2 | Glebe Farm, Long Itchington | SP41506500 + 3 | The Bungalow, Thames Lane, Cricklade | SU10229362 +(3 rows) + +or, to illustrate the use of the extra tags: + +select docid as id, +pgxml_xpath(document,'//find/type/text()','set','findtype') +from docstore; + + id | pgxml_xpath +----+------------------------------------------------------------------------- + 1 | + 2 | Urn + 3 | PotteryAnimal bone +(3 rows) + +Which produces a new, well-formed document. Note that document 1 had +no matching instances, so the set returned contains no +elements. document 2 has 1 matching element and document 3 has 2. + +This is just scratching the surface because XPath allows all sorts of +operations. + +Note: I've only implemented the return of nodeset and string values so +far. This covers (I think) many types of queries, however. + +John Gray 16 August 2001 + + diff --git a/contrib/xml/pgxml.sql.in b/contrib/xml/pgxml.sql.in new file mode 100644 index 0000000000000000000000000000000000000000..514643b936e69c7cb46b309c28a8f7d12bb22403 --- /dev/null +++ b/contrib/xml/pgxml.sql.in @@ -0,0 +1,10 @@ +-- SQL for XML parser + +-- Adjust this setting to control where the objects get created. +SET search_path TO public; + +CREATE OR REPLACE FUNCTION pgxml_parse(text) RETURNS boolean + AS 'MODULE_PATHNAME' LANGUAGE c STRICT; + +CREATE OR REPLACE FUNCTION pgxml_xpath(text, text, text, text) RETURNS text + AS 'MODULE_PATHNAME' LANGUAGE c STRICT;