mirror of https://github.com/postgres/postgres
parent
adca025c9e
commit
31f4b59a46
@ -0,0 +1,78 @@ |
|||||||
|
PGXML TODO List |
||||||
|
=============== |
||||||
|
|
||||||
|
Some of these items still require much more thought! Since the first |
||||||
|
release, the XPath support has improved (because I'm no longer using a |
||||||
|
homemade algorithm!). |
||||||
|
|
||||||
|
1. Performance considerations |
||||||
|
|
||||||
|
At present each document is parsed to produce the DOM tree on every query. |
||||||
|
|
||||||
|
Pros: |
||||||
|
Easy |
||||||
|
No persistent memory or storage allocation for parsed trees |
||||||
|
(libxml docs suggest representation of a document might |
||||||
|
be 4 times the size of the text) |
||||||
|
|
||||||
|
Cons: |
||||||
|
Slow/ CPU intensive to parse. |
||||||
|
Makes it difficult for PLs to apply libxml manipulations to create |
||||||
|
new documents or amend existing ones. |
||||||
|
|
||||||
|
|
||||||
|
2. XQuery |
||||||
|
|
||||||
|
I'm not sure if the addition of XQuery would be best as a function or |
||||||
|
as a new front-end parser. This is one to think about, but with a |
||||||
|
decent implementation of XPath, one of the prerequisites is covered. |
||||||
|
|
||||||
|
3. DOM Interfaces |
||||||
|
|
||||||
|
Expose more aspects of the DOM to user functions/ PLs. This would |
||||||
|
allow a procedure in a PL to run some queries and then use exposed |
||||||
|
interfaces to libxml to create an XML document out of the query |
||||||
|
results. I accept the argument that this might be more properly |
||||||
|
performed on the client side. |
||||||
|
|
||||||
|
4. Returning sets of documents from XPath queries. |
||||||
|
|
||||||
|
Although the current implementation allows you to amalgamate the |
||||||
|
returned results into a single document, it's quite possible that |
||||||
|
you'd like to use the returned set of nodes as a source for FROM. |
||||||
|
|
||||||
|
Is there a good way to optimise/index the results of certain XPath |
||||||
|
operations to make them faster?: |
||||||
|
|
||||||
|
select docid, pgxml_xpath(document,'//site/location/text()','','') as location |
||||||
|
where pgxml_xpath(document,'//site/name/text()','','') = 'Church Farm'; |
||||||
|
|
||||||
|
and with multiple element occurences in a document? |
||||||
|
|
||||||
|
select d.docid, pgxml_xpath(d.document,'//site/location/text()','','') |
||||||
|
from docstore d, |
||||||
|
pgxml_xpaths('docstore','document','//feature/type/text()','docid') ft |
||||||
|
where ft.key = d.docid and ft.value ='Limekiln'; |
||||||
|
|
||||||
|
pgxml_xpaths params are relname, attrname, xpath, returnkey. It would |
||||||
|
return a set of two-element tuples (key,value) consisting of the value of |
||||||
|
returnkey, and the cdata value of the xpath. The XML document would be |
||||||
|
defined by relname and attrname. |
||||||
|
|
||||||
|
The pgxml_xpaths function could be the basis of a functional index, |
||||||
|
which could speed up the above query very substantially, working |
||||||
|
through the normal query planner mechanism. |
||||||
|
|
||||||
|
5. Return type support. |
||||||
|
|
||||||
|
Better support for returning e.g. numeric or boolean values. I need to |
||||||
|
get to grips with the returned data from libxml first. |
||||||
|
|
||||||
|
|
||||||
|
John Gray <jgray@azuli.co.uk> 16 August 2001 |
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -0,0 +1,352 @@ |
|||||||
|
/********************************************************
|
||||||
|
* Interface code to parse an XML document using expat |
||||||
|
********************************************************/ |
||||||
|
|
||||||
|
#include "postgres.h" |
||||||
|
#include "fmgr.h" |
||||||
|
|
||||||
|
#include "expat.h" |
||||||
|
#include "pgxml.h" |
||||||
|
|
||||||
|
/* Memory management - we make expat use standard pg MM */ |
||||||
|
|
||||||
|
XML_Memory_Handling_Suite mhs; |
||||||
|
|
||||||
|
/* passthrough functions (palloc is a macro) */ |
||||||
|
|
||||||
|
static void * |
||||||
|
pgxml_palloc(size_t size) |
||||||
|
{ |
||||||
|
return palloc(size); |
||||||
|
} |
||||||
|
|
||||||
|
static void * |
||||||
|
pgxml_repalloc(void *ptr, size_t size) |
||||||
|
{ |
||||||
|
return repalloc(ptr, size); |
||||||
|
} |
||||||
|
|
||||||
|
static void |
||||||
|
pgxml_pfree(void *ptr) |
||||||
|
{ |
||||||
|
return pfree(ptr); |
||||||
|
} |
||||||
|
|
||||||
|
static void |
||||||
|
pgxml_mhs_init() |
||||||
|
{ |
||||||
|
mhs.malloc_fcn = pgxml_palloc; |
||||||
|
mhs.realloc_fcn = pgxml_repalloc; |
||||||
|
mhs.free_fcn = pgxml_pfree; |
||||||
|
} |
||||||
|
|
||||||
|
static void |
||||||
|
pgxml_handler_init() |
||||||
|
{ |
||||||
|
/*
|
||||||
|
* This code should set up the relevant handlers from user-supplied |
||||||
|
* settings. Quite how these settings are made is another matter :) |
||||||
|
*/ |
||||||
|
} |
||||||
|
|
||||||
|
/* Returns true if document is well-formed */ |
||||||
|
|
||||||
|
PG_FUNCTION_INFO_V1(pgxml_parse); |
||||||
|
|
||||||
|
Datum |
||||||
|
pgxml_parse(PG_FUNCTION_ARGS) |
||||||
|
{ |
||||||
|
/* called as pgxml_parse(document) */ |
||||||
|
XML_Parser p; |
||||||
|
text *t = PG_GETARG_TEXT_P(0); /* document buffer */ |
||||||
|
int32 docsize = VARSIZE(t) - VARHDRSZ; |
||||||
|
|
||||||
|
pgxml_mhs_init(); |
||||||
|
|
||||||
|
pgxml_handler_init(); |
||||||
|
|
||||||
|
p = XML_ParserCreate_MM(NULL, &mhs, NULL); |
||||||
|
if (!p) |
||||||
|
{ |
||||||
|
ereport(ERROR, |
||||||
|
(errcode(ERRCODE_EXTERNAL_ROUTINE_EXCEPTION), |
||||||
|
errmsg("could not create expat parser"))); |
||||||
|
PG_RETURN_NULL(); /* seems appropriate if we couldn't parse */ |
||||||
|
} |
||||||
|
|
||||||
|
if (!XML_Parse(p, (char *) VARDATA(t), docsize, 1)) |
||||||
|
{ |
||||||
|
/*
|
||||||
|
* elog(WARNING, "Parse error at line %d:%s", |
||||||
|
* XML_GetCurrentLineNumber(p), |
||||||
|
* XML_ErrorString(XML_GetErrorCode(p))); |
||||||
|
*/ |
||||||
|
XML_ParserFree(p); |
||||||
|
PG_RETURN_BOOL(false); |
||||||
|
} |
||||||
|
|
||||||
|
XML_ParserFree(p); |
||||||
|
PG_RETURN_BOOL(true); |
||||||
|
} |
||||||
|
|
||||||
|
/* XPath handling functions */ |
||||||
|
|
||||||
|
/* XPath support here is for a very skeletal kind of XPath!
|
||||||
|
It was easy to program though... */ |
||||||
|
|
||||||
|
/* This first is the core function that builds a result set. The
|
||||||
|
actual functions called by the user manipulate that result set |
||||||
|
in various ways. |
||||||
|
*/ |
||||||
|
|
||||||
|
static XPath_Results * |
||||||
|
build_xpath_results(text *doc, text *pathstr) |
||||||
|
{ |
||||||
|
XPath_Results *xpr; |
||||||
|
char *res; |
||||||
|
pgxml_udata *udata; |
||||||
|
XML_Parser p; |
||||||
|
int32 docsize; |
||||||
|
|
||||||
|
xpr = (XPath_Results *) palloc((sizeof(XPath_Results))); |
||||||
|
memset((void *) xpr, 0, sizeof(XPath_Results)); |
||||||
|
xpr->rescount = 0; |
||||||
|
|
||||||
|
docsize = VARSIZE(doc) - VARHDRSZ; |
||||||
|
|
||||||
|
/* res isn't going to be the real return type, it is just a buffer */ |
||||||
|
|
||||||
|
res = (char *) palloc(docsize); |
||||||
|
memset((void *) res, 0, docsize); |
||||||
|
|
||||||
|
xpr->resbuf = res; |
||||||
|
|
||||||
|
udata = (pgxml_udata *) palloc((sizeof(pgxml_udata))); |
||||||
|
memset((void *) udata, 0, sizeof(pgxml_udata)); |
||||||
|
|
||||||
|
udata->currentpath[0] = '\0'; |
||||||
|
udata->textgrab = 0; |
||||||
|
|
||||||
|
udata->path = (char *) palloc(VARSIZE(pathstr)); |
||||||
|
memcpy(udata->path, VARDATA(pathstr), VARSIZE(pathstr) - VARHDRSZ); |
||||||
|
|
||||||
|
udata->path[VARSIZE(pathstr) - VARHDRSZ] = '\0'; |
||||||
|
|
||||||
|
udata->resptr = res; |
||||||
|
udata->reslen = 0; |
||||||
|
|
||||||
|
udata->xpres = xpr; |
||||||
|
|
||||||
|
/* Now fire up the parser */ |
||||||
|
pgxml_mhs_init(); |
||||||
|
|
||||||
|
p = XML_ParserCreate_MM(NULL, &mhs, NULL); |
||||||
|
if (!p) |
||||||
|
{ |
||||||
|
ereport(ERROR, |
||||||
|
(errcode(ERRCODE_EXTERNAL_ROUTINE_EXCEPTION), |
||||||
|
errmsg("could not create expat parser"))); |
||||||
|
pfree(xpr); |
||||||
|
pfree(udata->path); |
||||||
|
pfree(udata); |
||||||
|
pfree(res); |
||||||
|
return NULL; |
||||||
|
} |
||||||
|
XML_SetUserData(p, (void *) udata); |
||||||
|
|
||||||
|
/* Set the handlers */ |
||||||
|
|
||||||
|
XML_SetElementHandler(p, pgxml_starthandler, pgxml_endhandler); |
||||||
|
XML_SetCharacterDataHandler(p, pgxml_charhandler); |
||||||
|
|
||||||
|
if (!XML_Parse(p, (char *) VARDATA(doc), docsize, 1)) |
||||||
|
{ |
||||||
|
/*
|
||||||
|
* elog(WARNING, "Parse error at line %d:%s", |
||||||
|
* XML_GetCurrentLineNumber(p), |
||||||
|
* XML_ErrorString(XML_GetErrorCode(p))); |
||||||
|
*/ |
||||||
|
XML_ParserFree(p); |
||||||
|
pfree(xpr); |
||||||
|
pfree(udata->path); |
||||||
|
pfree(udata); |
||||||
|
|
||||||
|
return NULL; |
||||||
|
} |
||||||
|
|
||||||
|
pfree(udata->path); |
||||||
|
pfree(udata); |
||||||
|
XML_ParserFree(p); |
||||||
|
return xpr; |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
PG_FUNCTION_INFO_V1(pgxml_xpath); |
||||||
|
|
||||||
|
Datum |
||||||
|
pgxml_xpath(PG_FUNCTION_ARGS) |
||||||
|
{ |
||||||
|
/* called as pgxml_xpath(document,pathstr, index) for the moment */ |
||||||
|
|
||||||
|
XPath_Results *xpresults; |
||||||
|
text *restext; |
||||||
|
|
||||||
|
text *t = PG_GETARG_TEXT_P(0); /* document buffer */ |
||||||
|
text *t2 = PG_GETARG_TEXT_P(1); |
||||||
|
int32 ind = PG_GETARG_INT32(2) - 1; |
||||||
|
|
||||||
|
xpresults = build_xpath_results(t, t2); |
||||||
|
|
||||||
|
/*
|
||||||
|
* This needs to be changed depending on the mechanism for returning |
||||||
|
* our set of results. |
||||||
|
*/ |
||||||
|
|
||||||
|
if (xpresults == NULL) /* parse error (not WF or parser failure) */ |
||||||
|
PG_RETURN_NULL(); |
||||||
|
|
||||||
|
if (ind >= (xpresults->rescount)) |
||||||
|
PG_RETURN_NULL(); |
||||||
|
|
||||||
|
restext = (text *) palloc(xpresults->reslens[ind] + VARHDRSZ); |
||||||
|
memcpy(VARDATA(restext), xpresults->results[ind], xpresults->reslens[ind]); |
||||||
|
|
||||||
|
VARATT_SIZEP(restext) = xpresults->reslens[ind] + VARHDRSZ; |
||||||
|
|
||||||
|
pfree(xpresults->resbuf); |
||||||
|
pfree(xpresults); |
||||||
|
|
||||||
|
PG_RETURN_TEXT_P(restext); |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
static void |
||||||
|
pgxml_pathcompare(void *userData) |
||||||
|
{ |
||||||
|
char *matchpos; |
||||||
|
|
||||||
|
matchpos = strstr(UD->currentpath, UD->path); |
||||||
|
|
||||||
|
if (matchpos == NULL) |
||||||
|
{ /* Should we have more logic here ? */ |
||||||
|
if (UD->textgrab) |
||||||
|
{ |
||||||
|
UD->textgrab = 0; |
||||||
|
pgxml_finalisegrabbedtext(userData); |
||||||
|
} |
||||||
|
return; |
||||||
|
} |
||||||
|
|
||||||
|
/*
|
||||||
|
* OK, we have a match of some sort. Now we need to check that our |
||||||
|
* match is anchored to the *end* of the string AND that it is |
||||||
|
* immediately preceded by a '/' |
||||||
|
*/ |
||||||
|
|
||||||
|
/*
|
||||||
|
* This test wouldn't work if strlen (UD->path) overran the length of |
||||||
|
* the currentpath, but that's not possible because we got a match! |
||||||
|
*/ |
||||||
|
|
||||||
|
if ((matchpos + strlen(UD->path))[0] == '\0') |
||||||
|
{ |
||||||
|
if ((UD->path)[0] == '/') |
||||||
|
{ |
||||||
|
if (matchpos == UD->currentpath) |
||||||
|
UD->textgrab = 1; |
||||||
|
} |
||||||
|
else |
||||||
|
{ |
||||||
|
if ((matchpos - 1)[0] == '/') |
||||||
|
UD->textgrab = 1; |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
static void |
||||||
|
pgxml_starthandler(void *userData, const XML_Char * name, |
||||||
|
const XML_Char ** atts) |
||||||
|
{ |
||||||
|
|
||||||
|
char sepstr[] = "/"; |
||||||
|
|
||||||
|
if ((strlen(name) + strlen(UD->currentpath)) > MAXPATHLENGTH - 2) |
||||||
|
elog(WARNING, "path too long"); |
||||||
|
else |
||||||
|
{ |
||||||
|
strncat(UD->currentpath, sepstr, 1); |
||||||
|
strcat(UD->currentpath, name); |
||||||
|
} |
||||||
|
if (UD->textgrab) |
||||||
|
{ |
||||||
|
/*
|
||||||
|
* Depending on user preference, should we "reconstitute" the |
||||||
|
* element into the result text? |
||||||
|
*/ |
||||||
|
} |
||||||
|
else |
||||||
|
pgxml_pathcompare(userData); |
||||||
|
} |
||||||
|
|
||||||
|
static void |
||||||
|
pgxml_endhandler(void *userData, const XML_Char * name) |
||||||
|
{ |
||||||
|
/*
|
||||||
|
* Start by removing the current element off the end of the |
||||||
|
* currentpath |
||||||
|
*/ |
||||||
|
|
||||||
|
char *sepptr; |
||||||
|
|
||||||
|
sepptr = strrchr(UD->currentpath, '/'); |
||||||
|
if (sepptr == NULL) |
||||||
|
{ |
||||||
|
/* internal error */ |
||||||
|
elog(ERROR, "did not find '/'"); |
||||||
|
sepptr = UD->currentpath; |
||||||
|
} |
||||||
|
if (strcmp(name, sepptr + 1) != 0) |
||||||
|
{ |
||||||
|
elog(WARNING, "wanted [%s], got [%s]", sepptr, name); |
||||||
|
/* unmatched entry, so do nothing */ |
||||||
|
} |
||||||
|
else |
||||||
|
{ |
||||||
|
sepptr[0] = '\0'; /* Chop that element off the end */ |
||||||
|
} |
||||||
|
|
||||||
|
if (UD->textgrab) |
||||||
|
pgxml_pathcompare(userData); |
||||||
|
|
||||||
|
} |
||||||
|
|
||||||
|
static void |
||||||
|
pgxml_charhandler(void *userData, const XML_Char * s, int len) |
||||||
|
{ |
||||||
|
if (UD->textgrab) |
||||||
|
{ |
||||||
|
if (len > 0) |
||||||
|
{ |
||||||
|
memcpy(UD->resptr, s, len); |
||||||
|
UD->resptr += len; |
||||||
|
UD->reslen += len; |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
/* Should I be using PG list types here? */ |
||||||
|
|
||||||
|
static void |
||||||
|
pgxml_finalisegrabbedtext(void *userData) |
||||||
|
{ |
||||||
|
/* In res/reslen, we have a single result. */ |
||||||
|
UD->xpres->results[UD->xpres->rescount] = UD->resptr - UD->reslen; |
||||||
|
UD->xpres->reslens[UD->xpres->rescount] = UD->reslen; |
||||||
|
UD->reslen = 0; |
||||||
|
UD->xpres->rescount++; |
||||||
|
|
||||||
|
/*
|
||||||
|
* This effectively concatenates all the results together but we do |
||||||
|
* know where one ends and the next begins |
||||||
|
*/ |
||||||
|
} |
@ -0,0 +1,42 @@ |
|||||||
|
/* Header for pg xml parser interface */ |
||||||
|
|
||||||
|
static void *pgxml_palloc(size_t size); |
||||||
|
static void *pgxml_repalloc(void *ptr, size_t size); |
||||||
|
static void pgxml_pfree(void *ptr); |
||||||
|
static void pgxml_mhs_init(); |
||||||
|
static void pgxml_handler_init(); |
||||||
|
Datum pgxml_parse(PG_FUNCTION_ARGS); |
||||||
|
Datum pgxml_xpath(PG_FUNCTION_ARGS); |
||||||
|
static void pgxml_starthandler(void *userData, const XML_Char * name, |
||||||
|
const XML_Char ** atts); |
||||||
|
static void pgxml_endhandler(void *userData, const XML_Char * name); |
||||||
|
static void pgxml_charhandler(void *userData, const XML_Char * s, int len); |
||||||
|
static void pgxml_pathcompare(void *userData); |
||||||
|
static void pgxml_finalisegrabbedtext(void *userData); |
||||||
|
|
||||||
|
#define MAXPATHLENGTH 512 |
||||||
|
#define MAXRESULTS 100 |
||||||
|
|
||||||
|
|
||||||
|
typedef struct |
||||||
|
{ |
||||||
|
int rescount; |
||||||
|
char *results[MAXRESULTS]; |
||||||
|
int32 reslens[MAXRESULTS]; |
||||||
|
char *resbuf; /* pointer to the result buffer for pfree */ |
||||||
|
} XPath_Results; |
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
typedef struct |
||||||
|
{ |
||||||
|
char currentpath[MAXPATHLENGTH]; |
||||||
|
char *path; |
||||||
|
int textgrab; |
||||||
|
char *resptr; |
||||||
|
int32 reslen; |
||||||
|
XPath_Results *xpres; |
||||||
|
} pgxml_udata; |
||||||
|
|
||||||
|
|
||||||
|
#define UD ((pgxml_udata *) userData) |
@ -0,0 +1,265 @@ |
|||||||
|
/* Parser interface for DOM-based parser (libxml) rather than
|
||||||
|
stream-based SAX-type parser */ |
||||||
|
|
||||||
|
#include "postgres.h" |
||||||
|
#include "fmgr.h" |
||||||
|
|
||||||
|
/* libxml includes */ |
||||||
|
|
||||||
|
#include <libxml/xpath.h> |
||||||
|
#include <libxml/tree.h> |
||||||
|
#include <libxml/xmlmemory.h> |
||||||
|
|
||||||
|
/* declarations */ |
||||||
|
|
||||||
|
static void *pgxml_palloc(size_t size); |
||||||
|
static void *pgxml_repalloc(void *ptr, size_t size); |
||||||
|
static void pgxml_pfree(void *ptr); |
||||||
|
static char *pgxml_pstrdup(const char *string); |
||||||
|
|
||||||
|
static void pgxml_parser_init(); |
||||||
|
|
||||||
|
static xmlChar *pgxmlNodeSetToText(xmlNodeSetPtr nodeset, xmlDocPtr doc, |
||||||
|
xmlChar * toptagname, xmlChar * septagname, |
||||||
|
int format); |
||||||
|
|
||||||
|
static xmlChar *pgxml_texttoxmlchar(text *textstring); |
||||||
|
|
||||||
|
|
||||||
|
Datum pgxml_parse(PG_FUNCTION_ARGS); |
||||||
|
Datum pgxml_xpath(PG_FUNCTION_ARGS); |
||||||
|
|
||||||
|
/* memory handling passthrough functions (e.g. palloc, pstrdup are
|
||||||
|
currently macros, and the others might become so...) */ |
||||||
|
|
||||||
|
static void * |
||||||
|
pgxml_palloc(size_t size) |
||||||
|
{ |
||||||
|
return palloc(size); |
||||||
|
} |
||||||
|
|
||||||
|
static void * |
||||||
|
pgxml_repalloc(void *ptr, size_t size) |
||||||
|
{ |
||||||
|
return repalloc(ptr, size); |
||||||
|
} |
||||||
|
|
||||||
|
static void |
||||||
|
pgxml_pfree(void *ptr) |
||||||
|
{ |
||||||
|
return pfree(ptr); |
||||||
|
} |
||||||
|
|
||||||
|
static char * |
||||||
|
pgxml_pstrdup(const char *string) |
||||||
|
{ |
||||||
|
return pstrdup(string); |
||||||
|
} |
||||||
|
|
||||||
|
static void |
||||||
|
pgxml_parser_init() |
||||||
|
{ |
||||||
|
/*
|
||||||
|
* This code should also set parser settings from user-supplied info. |
||||||
|
* Quite how these settings are made is another matter :) |
||||||
|
*/ |
||||||
|
|
||||||
|
xmlMemSetup(pgxml_pfree, pgxml_palloc, pgxml_repalloc, pgxml_pstrdup); |
||||||
|
xmlInitParser(); |
||||||
|
|
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
/* Returns true if document is well-formed */ |
||||||
|
|
||||||
|
PG_FUNCTION_INFO_V1(pgxml_parse); |
||||||
|
|
||||||
|
Datum |
||||||
|
pgxml_parse(PG_FUNCTION_ARGS) |
||||||
|
{ |
||||||
|
/* called as pgxml_parse(document) */ |
||||||
|
xmlDocPtr doctree; |
||||||
|
text *t = PG_GETARG_TEXT_P(0); /* document buffer */ |
||||||
|
int32 docsize = VARSIZE(t) - VARHDRSZ; |
||||||
|
|
||||||
|
pgxml_parser_init(); |
||||||
|
|
||||||
|
doctree = xmlParseMemory((char *) VARDATA(t), docsize); |
||||||
|
if (doctree == NULL) |
||||||
|
{ |
||||||
|
xmlCleanupParser(); |
||||||
|
PG_RETURN_BOOL(false); /* i.e. not well-formed */ |
||||||
|
} |
||||||
|
xmlCleanupParser(); |
||||||
|
xmlFreeDoc(doctree); |
||||||
|
PG_RETURN_BOOL(true); |
||||||
|
} |
||||||
|
|
||||||
|
static xmlChar |
||||||
|
* |
||||||
|
pgxmlNodeSetToText(xmlNodeSetPtr nodeset, |
||||||
|
xmlDocPtr doc, |
||||||
|
xmlChar * toptagname, |
||||||
|
xmlChar * septagname, |
||||||
|
int format) |
||||||
|
{ |
||||||
|
/* Function translates a nodeset into a text representation */ |
||||||
|
|
||||||
|
/*
|
||||||
|
* iterates over each node in the set and calls xmlNodeDump to write |
||||||
|
* it to an xmlBuffer -from which an xmlChar * string is returned. |
||||||
|
*/ |
||||||
|
/* each representation is surrounded by <tagname> ... </tagname> */ |
||||||
|
/* if format==0, add a newline between nodes?? */ |
||||||
|
|
||||||
|
xmlBufferPtr buf; |
||||||
|
xmlChar *result; |
||||||
|
int i; |
||||||
|
|
||||||
|
buf = xmlBufferCreate(); |
||||||
|
|
||||||
|
if ((toptagname != NULL) && (xmlStrlen(toptagname) > 0)) |
||||||
|
{ |
||||||
|
xmlBufferWriteChar(buf, "<"); |
||||||
|
xmlBufferWriteCHAR(buf, toptagname); |
||||||
|
xmlBufferWriteChar(buf, ">"); |
||||||
|
} |
||||||
|
if (nodeset != NULL) |
||||||
|
{ |
||||||
|
for (i = 0; i < nodeset->nodeNr; i++) |
||||||
|
{ |
||||||
|
if ((septagname != NULL) && (xmlStrlen(septagname) > 0)) |
||||||
|
{ |
||||||
|
xmlBufferWriteChar(buf, "<"); |
||||||
|
xmlBufferWriteCHAR(buf, septagname); |
||||||
|
xmlBufferWriteChar(buf, ">"); |
||||||
|
} |
||||||
|
xmlNodeDump(buf, doc, nodeset->nodeTab[i], 1, (format == 2)); |
||||||
|
|
||||||
|
if ((septagname != NULL) && (xmlStrlen(septagname) > 0)) |
||||||
|
{ |
||||||
|
xmlBufferWriteChar(buf, "</"); |
||||||
|
xmlBufferWriteCHAR(buf, septagname); |
||||||
|
xmlBufferWriteChar(buf, ">"); |
||||||
|
} |
||||||
|
if (format) |
||||||
|
xmlBufferWriteChar(buf, "\n"); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
if ((toptagname != NULL) && (xmlStrlen(toptagname) > 0)) |
||||||
|
{ |
||||||
|
xmlBufferWriteChar(buf, "</"); |
||||||
|
xmlBufferWriteCHAR(buf, toptagname); |
||||||
|
xmlBufferWriteChar(buf, ">"); |
||||||
|
} |
||||||
|
result = xmlStrdup(buf->content); |
||||||
|
xmlBufferFree(buf); |
||||||
|
return result; |
||||||
|
} |
||||||
|
|
||||||
|
static xmlChar * |
||||||
|
pgxml_texttoxmlchar(text *textstring) |
||||||
|
{ |
||||||
|
xmlChar *res; |
||||||
|
int32 txsize; |
||||||
|
|
||||||
|
txsize = VARSIZE(textstring) - VARHDRSZ; |
||||||
|
res = (xmlChar *) palloc(txsize + 1); |
||||||
|
memcpy((char *) res, VARDATA(textstring), txsize); |
||||||
|
res[txsize] = '\0'; |
||||||
|
return res; |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
PG_FUNCTION_INFO_V1(pgxml_xpath); |
||||||
|
|
||||||
|
Datum |
||||||
|
pgxml_xpath(PG_FUNCTION_ARGS) |
||||||
|
{ |
||||||
|
xmlDocPtr doctree; |
||||||
|
xmlXPathContextPtr ctxt; |
||||||
|
xmlXPathObjectPtr res; |
||||||
|
xmlChar *xpath, |
||||||
|
*xpresstr, |
||||||
|
*toptag, |
||||||
|
*septag; |
||||||
|
xmlXPathCompExprPtr comppath; |
||||||
|
|
||||||
|
int32 docsize, |
||||||
|
ressize; |
||||||
|
text *t, |
||||||
|
*xpres; |
||||||
|
|
||||||
|
t = PG_GETARG_TEXT_P(0); /* document buffer */ |
||||||
|
xpath = pgxml_texttoxmlchar(PG_GETARG_TEXT_P(1)); /* XPath expression */ |
||||||
|
toptag = pgxml_texttoxmlchar(PG_GETARG_TEXT_P(2)); |
||||||
|
septag = pgxml_texttoxmlchar(PG_GETARG_TEXT_P(3)); |
||||||
|
|
||||||
|
docsize = VARSIZE(t) - VARHDRSZ; |
||||||
|
|
||||||
|
pgxml_parser_init(); |
||||||
|
|
||||||
|
doctree = xmlParseMemory((char *) VARDATA(t), docsize); |
||||||
|
if (doctree == NULL) |
||||||
|
{ /* not well-formed */ |
||||||
|
xmlCleanupParser(); |
||||||
|
PG_RETURN_NULL(); |
||||||
|
} |
||||||
|
|
||||||
|
ctxt = xmlXPathNewContext(doctree); |
||||||
|
ctxt->node = xmlDocGetRootElement(doctree); |
||||||
|
|
||||||
|
/* compile the path */ |
||||||
|
comppath = xmlXPathCompile(xpath); |
||||||
|
if (comppath == NULL) |
||||||
|
{ |
||||||
|
elog(WARNING, "XPath syntax error"); |
||||||
|
xmlFreeDoc(doctree); |
||||||
|
pfree((void *) xpath); |
||||||
|
xmlCleanupParser(); |
||||||
|
PG_RETURN_NULL(); |
||||||
|
} |
||||||
|
|
||||||
|
/* Now evaluate the path expression. */ |
||||||
|
res = xmlXPathCompiledEval(comppath, ctxt); |
||||||
|
xmlXPathFreeCompExpr(comppath); |
||||||
|
|
||||||
|
if (res == NULL) |
||||||
|
{ |
||||||
|
xmlFreeDoc(doctree); |
||||||
|
pfree((void *) xpath); |
||||||
|
xmlCleanupParser(); |
||||||
|
PG_RETURN_NULL(); /* seems appropriate */ |
||||||
|
} |
||||||
|
/* now we dump this node, ?surrounding by tags? */ |
||||||
|
/* To do this, we look first at the type */ |
||||||
|
switch (res->type) |
||||||
|
{ |
||||||
|
case XPATH_NODESET: |
||||||
|
xpresstr = pgxmlNodeSetToText(res->nodesetval, |
||||||
|
doctree, |
||||||
|
toptag, septag, 0); |
||||||
|
break; |
||||||
|
case XPATH_STRING: |
||||||
|
xpresstr = xmlStrdup(res->stringval); |
||||||
|
break; |
||||||
|
default: |
||||||
|
elog(WARNING, "Unsupported XQuery result: %d", res->type); |
||||||
|
xpresstr = xmlStrdup("<unsupported/>"); |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
/* Now convert this result back to text */ |
||||||
|
ressize = strlen(xpresstr); |
||||||
|
xpres = (text *) palloc(ressize + VARHDRSZ); |
||||||
|
memcpy(VARDATA(xpres), xpresstr, ressize); |
||||||
|
VARATT_SIZEP(xpres) = ressize + VARHDRSZ; |
||||||
|
|
||||||
|
/* Free various storage */ |
||||||
|
xmlFreeDoc(doctree); |
||||||
|
pfree((void *) xpath); |
||||||
|
xmlFree(xpresstr); |
||||||
|
xmlCleanupParser(); |
||||||
|
PG_RETURN_TEXT_P(xpres); |
||||||
|
} |
@ -0,0 +1,10 @@ |
|||||||
|
-- SQL for XML parser |
||||||
|
|
||||||
|
-- Adjust this setting to control where the objects get created. |
||||||
|
SET search_path TO public; |
||||||
|
|
||||||
|
CREATE OR REPLACE FUNCTION pgxml_parse(text) RETURNS boolean |
||||||
|
AS 'MODULE_PATHNAME' LANGUAGE c STRICT; |
||||||
|
|
||||||
|
CREATE OR REPLACE FUNCTION pgxml_xpath(text, text, text, text) RETURNS text |
||||||
|
AS 'MODULE_PATHNAME' LANGUAGE c STRICT; |
Loading…
Reference in new issue