1
/* Parser interface for DOM-based parser (libxml) rather than
2
stream-based SAX-type parser */
6
#include "executor/spi.h"
9
#include "lib/stringinfo.h"
13
#include <libxml/xpath.h>
14
#include <libxml/tree.h>
15
#include <libxml/xmlmemory.h>
16
#include <libxml/xmlerror.h>
17
#include <libxml/parserInternals.h>
21
static void *pgxml_palloc(size_t size);
22
static void *pgxml_repalloc(void *ptr, size_t size);
23
static void pgxml_pfree(void *ptr);
24
static char *pgxml_pstrdup(const char *string);
25
static void pgxml_errorHandler(void *ctxt, const char *msg,...);
27
void elog_error(int level, char *explain, int force);
28
void pgxml_parser_init(void);
30
static xmlChar *pgxmlNodeSetToText(xmlNodeSetPtr nodeset,
31
xmlChar * toptagname, xmlChar * septagname,
34
text *pgxml_result_to_text(xmlXPathObjectPtr res, xmlChar * toptag,
35
xmlChar * septag, xmlChar * plainsep);
37
xmlChar *pgxml_texttoxmlchar(text *textstring);
39
static xmlXPathObjectPtr pgxml_xpath(text *document, xmlChar * xpath);
42
Datum xml_valid(PG_FUNCTION_ARGS);
43
Datum xml_encode_special_chars(PG_FUNCTION_ARGS);
44
Datum xpath_nodeset(PG_FUNCTION_ARGS);
45
Datum xpath_string(PG_FUNCTION_ARGS);
46
Datum xpath_number(PG_FUNCTION_ARGS);
47
Datum xpath_bool(PG_FUNCTION_ARGS);
48
Datum xpath_list(PG_FUNCTION_ARGS);
49
Datum xpath_table(PG_FUNCTION_ARGS);
51
/* Global variables */
52
char *errbuf; /* per line error buffer */
53
char *pgxml_errorMsg = NULL; /* overall error message */
55
/* Convenience macros */
57
#define GET_TEXT(cstrp) DatumGetTextP(DirectFunctionCall1(textin, CStringGetDatum(cstrp)))
58
#define GET_STR(textp) DatumGetCString(DirectFunctionCall1(textout, PointerGetDatum(textp)))
60
#define ERRBUF_SIZE 200
62
/* memory handling passthrough functions (e.g. palloc, pstrdup are
63
currently macros, and the others might become so...) */
66
pgxml_palloc(size_t size)
68
/* elog(DEBUG1,"Alloc %d in CMC %x",size,CurrentMemoryContext); */
73
pgxml_repalloc(void *ptr, size_t size)
75
/* elog(DEBUG1,"ReAlloc in CMC %x",CurrentMemoryContext);*/
76
return repalloc(ptr, size);
80
pgxml_pfree(void *ptr)
82
/* elog(DEBUG1,"Free in CMC %x",CurrentMemoryContext); */
87
pgxml_pstrdup(const char *string)
89
return pstrdup(string);
92
/* The error handling function. This formats an error message and sets
93
* a flag - an ereport will be issued prior to return
97
pgxml_errorHandler(void *ctxt, const char *msg,...)
102
vsnprintf(errbuf, ERRBUF_SIZE, msg, args);
104
/* Now copy the argument across */
105
if (pgxml_errorMsg == NULL)
106
pgxml_errorMsg = pstrdup(errbuf);
109
int32 xsize = strlen(pgxml_errorMsg);
111
pgxml_errorMsg = repalloc(pgxml_errorMsg,
112
(size_t) (xsize + strlen(errbuf) + 1));
113
strncpy(&pgxml_errorMsg[xsize - 1], errbuf, strlen(errbuf));
114
pgxml_errorMsg[xsize + strlen(errbuf) - 1] = '\0';
117
memset(errbuf, 0, ERRBUF_SIZE);
120
/* This function reports the current message at the level specified */
122
elog_error(int level, char *explain, int force)
124
if (force || (pgxml_errorMsg != NULL))
126
if (pgxml_errorMsg == NULL)
128
ereport(level, (errcode(ERRCODE_EXTERNAL_ROUTINE_EXCEPTION),
133
ereport(level, (errcode(ERRCODE_EXTERNAL_ROUTINE_EXCEPTION),
134
errmsg("%s:%s", explain, pgxml_errorMsg)));
135
pfree(pgxml_errorMsg);
144
* This code could also set parser settings from user-supplied info.
145
* Quite how these settings are made is another matter :)
148
xmlMemSetup(pgxml_pfree, pgxml_palloc, pgxml_repalloc, pgxml_pstrdup);
151
xmlSetGenericErrorFunc(NULL, pgxml_errorHandler);
153
xmlSubstituteEntitiesDefault(1);
154
xmlLoadExtDtdDefaultValue = 1;
156
pgxml_errorMsg = NULL;
158
errbuf = palloc(200);
159
memset(errbuf, 0, 200);
164
/* Returns true if document is well-formed */
166
PG_FUNCTION_INFO_V1(xml_valid);
169
xml_valid(PG_FUNCTION_ARGS)
171
/* called as xml_valid(document) */
173
text *t = PG_GETARG_TEXT_P(0); /* document buffer */
174
int32 docsize = VARSIZE(t) - VARHDRSZ;
178
doctree = xmlParseMemory((char *) VARDATA(t), docsize);
182
PG_RETURN_BOOL(false); /* i.e. not well-formed */
186
PG_RETURN_BOOL(true);
190
/* Encodes special characters (<, >, &, " and \r) as XML entities */
192
PG_FUNCTION_INFO_V1(xml_encode_special_chars);
195
xml_encode_special_chars(PG_FUNCTION_ARGS)
197
text *tin = PG_GETARG_TEXT_P(0);
202
ts = pgxml_texttoxmlchar(tin);
204
tt = xmlEncodeSpecialChars(NULL, ts);
208
ressize = strlen(tt);
209
tout = (text *) palloc(ressize + VARHDRSZ);
210
memcpy(VARDATA(tout), tt, ressize);
211
VARATT_SIZEP(tout) = ressize + VARHDRSZ;
215
PG_RETURN_TEXT_P(tout);
220
pgxmlNodeSetToText(xmlNodeSetPtr nodeset,
221
xmlChar * toptagname,
222
xmlChar * septagname,
225
/* Function translates a nodeset into a text representation */
228
* iterates over each node in the set and calls xmlNodeDump to write
229
* it to an xmlBuffer -from which an xmlChar * string is returned.
232
/* each representation is surrounded by <tagname> ... </tagname> */
235
* plainsep is an ordinary (not tag) seperator - if used, then nodes
236
* are cast to string as output method
244
buf = xmlBufferCreate();
246
if ((toptagname != NULL) && (xmlStrlen(toptagname) > 0))
248
xmlBufferWriteChar(buf, "<");
249
xmlBufferWriteCHAR(buf, toptagname);
250
xmlBufferWriteChar(buf, ">");
254
for (i = 0; i < nodeset->nodeNr; i++)
257
if (plainsep != NULL)
259
xmlBufferWriteCHAR(buf,
260
xmlXPathCastNodeToString(nodeset->nodeTab[i]));
262
/* If this isn't the last entry, write the plain sep. */
263
if (i < (nodeset->nodeNr) - 1)
264
xmlBufferWriteChar(buf, plainsep);
270
if ((septagname != NULL) && (xmlStrlen(septagname) > 0))
272
xmlBufferWriteChar(buf, "<");
273
xmlBufferWriteCHAR(buf, septagname);
274
xmlBufferWriteChar(buf, ">");
277
nodeset->nodeTab[i]->doc,
281
if ((septagname != NULL) && (xmlStrlen(septagname) > 0))
283
xmlBufferWriteChar(buf, "</");
284
xmlBufferWriteCHAR(buf, septagname);
285
xmlBufferWriteChar(buf, ">");
291
if ((toptagname != NULL) && (xmlStrlen(toptagname) > 0))
293
xmlBufferWriteChar(buf, "</");
294
xmlBufferWriteCHAR(buf, toptagname);
295
xmlBufferWriteChar(buf, ">");
297
result = xmlStrdup(buf->content);
303
/* Translate a PostgreSQL "varlena" -i.e. a variable length parameter
304
* into the libxml2 representation
308
pgxml_texttoxmlchar(text *textstring)
313
txsize = VARSIZE(textstring) - VARHDRSZ;
314
res = (xmlChar *) palloc(txsize + 1);
315
memcpy((char *) res, VARDATA(textstring), txsize);
320
/* Public visible XPath functions */
322
/* This is a "raw" xpath function. Check that it returns child elements
326
PG_FUNCTION_INFO_V1(xpath_nodeset);
329
xpath_nodeset(PG_FUNCTION_ARGS)
339
/* PG_GETARG_TEXT_P(0) is document buffer */
340
xpathsupp = PG_GETARG_TEXT_P(1); /* XPath expression */
342
toptag = pgxml_texttoxmlchar(PG_GETARG_TEXT_P(2));
343
septag = pgxml_texttoxmlchar(PG_GETARG_TEXT_P(3));
345
pathsize = VARSIZE(xpathsupp) - VARHDRSZ;
347
xpath = pgxml_texttoxmlchar(xpathsupp);
349
xpres = pgxml_result_to_text(
350
pgxml_xpath(PG_GETARG_TEXT_P(0), xpath),
351
toptag, septag, NULL);
353
/* xmlCleanupParser(); done by result_to_text routine */
358
PG_RETURN_TEXT_P(xpres);
361
/* The following function is almost identical, but returns the elements in */
364
PG_FUNCTION_INFO_V1(xpath_list);
367
xpath_list(PG_FUNCTION_ARGS)
376
/* PG_GETARG_TEXT_P(0) is document buffer */
377
xpathsupp = PG_GETARG_TEXT_P(1); /* XPath expression */
379
plainsep = pgxml_texttoxmlchar(PG_GETARG_TEXT_P(2));
381
pathsize = VARSIZE(xpathsupp) - VARHDRSZ;
383
xpath = pgxml_texttoxmlchar(xpathsupp);
385
xpres = pgxml_result_to_text(
386
pgxml_xpath(PG_GETARG_TEXT_P(0), xpath),
387
NULL, NULL, plainsep);
389
/* xmlCleanupParser(); done by result_to_text routine */
394
PG_RETURN_TEXT_P(xpres);
398
PG_FUNCTION_INFO_V1(xpath_string);
401
xpath_string(PG_FUNCTION_ARGS)
409
/* PG_GETARG_TEXT_P(0) is document buffer */
410
xpathsupp = PG_GETARG_TEXT_P(1); /* XPath expression */
412
pathsize = VARSIZE(xpathsupp) - VARHDRSZ;
415
* We encapsulate the supplied path with "string()" = 8 chars + 1 for
418
/* We could try casting to string using the libxml function? */
420
xpath = (xmlChar *) palloc(pathsize + 9);
421
memcpy((char *) (xpath + 7), VARDATA(xpathsupp), pathsize);
422
strncpy((char *) xpath, "string(", 7);
423
xpath[pathsize + 7] = ')';
424
xpath[pathsize + 8] = '\0';
426
xpres = pgxml_result_to_text(
427
pgxml_xpath(PG_GETARG_TEXT_P(0), xpath),
435
PG_RETURN_TEXT_P(xpres);
439
PG_FUNCTION_INFO_V1(xpath_number);
442
xpath_number(PG_FUNCTION_ARGS)
451
xmlXPathObjectPtr res;
453
/* PG_GETARG_TEXT_P(0) is document buffer */
454
xpathsupp = PG_GETARG_TEXT_P(1); /* XPath expression */
456
pathsize = VARSIZE(xpathsupp) - VARHDRSZ;
458
xpath = pgxml_texttoxmlchar(xpathsupp);
460
res = pgxml_xpath(PG_GETARG_TEXT_P(0), xpath);
469
fRes = xmlXPathCastToNumber(res);
471
if (xmlXPathIsNaN(fRes))
474
PG_RETURN_FLOAT4(fRes);
479
PG_FUNCTION_INFO_V1(xpath_bool);
482
xpath_bool(PG_FUNCTION_ARGS)
491
xmlXPathObjectPtr res;
493
/* PG_GETARG_TEXT_P(0) is document buffer */
494
xpathsupp = PG_GETARG_TEXT_P(1); /* XPath expression */
496
pathsize = VARSIZE(xpathsupp) - VARHDRSZ;
498
xpath = pgxml_texttoxmlchar(xpathsupp);
500
res = pgxml_xpath(PG_GETARG_TEXT_P(0), xpath);
506
PG_RETURN_BOOL(false);
509
bRes = xmlXPathCastToBoolean(res);
511
PG_RETURN_BOOL(bRes);
517
/* Core function to evaluate XPath query */
520
pgxml_xpath(text *document, xmlChar * xpath)
524
xmlXPathContextPtr ctxt;
525
xmlXPathObjectPtr res;
527
xmlXPathCompExprPtr comppath;
532
docsize = VARSIZE(document) - VARHDRSZ;
536
doctree = xmlParseMemory((char *) VARDATA(document), docsize);
538
{ /* not well-formed */
542
ctxt = xmlXPathNewContext(doctree);
543
ctxt->node = xmlDocGetRootElement(doctree);
546
/* compile the path */
547
comppath = xmlXPathCompile(xpath);
548
if (comppath == NULL)
552
elog_error(ERROR, "XPath Syntax Error", 1);
557
/* Now evaluate the path expression. */
558
res = xmlXPathCompiledEval(comppath, ctxt);
559
xmlXPathFreeCompExpr(comppath);
563
xmlXPathFreeContext(ctxt);
564
/* xmlCleanupParser(); */
569
/* xmlFreeDoc(doctree); */
575
pgxml_result_to_text(xmlXPathObjectPtr res,
592
xpresstr = pgxmlNodeSetToText(res->nodesetval,
598
xpresstr = xmlStrdup(res->stringval);
602
elog(NOTICE, "Unsupported XQuery result: %d", res->type);
603
xpresstr = xmlStrdup("<unsupported/>");
607
/* Now convert this result back to text */
608
ressize = strlen(xpresstr);
609
xpres = (text *) palloc(ressize + VARHDRSZ);
610
memcpy(VARDATA(xpres), xpresstr, ressize);
611
VARATT_SIZEP(xpres) = ressize + VARHDRSZ;
613
/* Free various storage */
615
/* xmlFreeDoc(doctree); -- will die at end of tuple anyway */
619
elog_error(ERROR, "XPath error", 0);
625
/* xpath_table is a table function. It needs some tidying (as do the
626
* other functions here!
629
PG_FUNCTION_INFO_V1(xpath_table);
632
xpath_table(PG_FUNCTION_ARGS)
634
/* SPI (input tuple) support */
635
SPITupleTable *tuptable;
637
TupleDesc spi_tupdesc;
639
/* Output tuple (tuplestore) support */
640
Tuplestorestate *tupstore = NULL;
641
TupleDesc ret_tupdesc;
644
ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
645
AttInMetadata *attinmeta;
646
MemoryContext per_query_ctx;
647
MemoryContext oldcontext;
649
/* Function parameters */
650
char *pkeyfield = GET_STR(PG_GETARG_TEXT_P(0));
651
char *xmlfield = GET_STR(PG_GETARG_TEXT_P(1));
652
char *relname = GET_STR(PG_GETARG_TEXT_P(2));
653
char *xpathset = GET_STR(PG_GETARG_TEXT_P(3));
654
char *condition = GET_STR(PG_GETARG_TEXT_P(4));
659
xmlChar *pathsep = "|";
666
int rownr; /* For issuing multiple rows from one
667
* original document */
668
int had_values; /* To determine end of nodeset results */
672
/* We only have a valid tuple description in table function mode */
673
if (rsinfo->expectedDesc == NULL)
675
ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR),
676
errmsg("xpath_table must be called as a table function")));
679
/* The tuplestore must exist in a higher context than
680
* this function call (per_query_ctx is used) */
682
per_query_ctx = rsinfo->econtext->ecxt_per_query_memory;
683
oldcontext = MemoryContextSwitchTo(per_query_ctx);
685
/* Create the tuplestore - work_mem is the max in-memory size before a
686
* file is created on disk to hold it.
689
tupstore = tuplestore_begin_heap(true, false, work_mem);
691
MemoryContextSwitchTo(oldcontext);
693
/* get the requested return tuple description */
694
ret_tupdesc = CreateTupleDescCopy(rsinfo->expectedDesc);
697
* At the moment we assume that the returned attributes make sense for
698
* the XPath specififed (i.e. we trust the caller). It's not fatal if
699
* they get it wrong - the input function for the column type will
700
* raise an error if the path result can't be converted into the
701
* correct binary representation.
704
attinmeta = TupleDescGetAttInMetadata(ret_tupdesc);
707
* We want to materialise because it means that we don't have to carry
708
* libxml2 parser state between invocations of this function
711
/* check to see if caller supports us returning a tuplestore */
712
if (!rsinfo || !(rsinfo->allowedModes & SFRM_Materialize))
713
ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR),
714
errmsg("xpath_table requires Materialize mode, but it is not "
715
"allowed in this context")));
717
/* Set return mode and allocate value space. */
718
rsinfo->returnMode = SFRM_Materialize;
719
rsinfo->setDesc = ret_tupdesc;
721
values = (char **) palloc(ret_tupdesc->natts * sizeof(char *));
723
xpaths = (xmlChar **) palloc(ret_tupdesc->natts * sizeof(xmlChar *));
725
/* Split XPaths. xpathset is a writable CString. */
727
/* Note that we stop splitting once we've done all needed for tupdesc */
733
xpaths[numpaths] = pos;
734
pos = strstr(pos, pathsep);
741
} while ((pos != NULL) && (numpaths < (ret_tupdesc->natts - 1)));
743
/* Now build query */
745
querysql = makeStringInfo();
747
/* Build initial sql statement */
748
appendStringInfo(querysql, "SELECT %s, %s FROM %s WHERE %s",
756
if ((ret = SPI_connect()) < 0)
757
elog(ERROR, "xpath_table: SPI_connect returned %d", ret);
759
if ((ret = SPI_exec(querysql->data, 0)) != SPI_OK_SELECT)
760
elog(ERROR, "xpath_table: SPI execution failed for query %s", querysql->data);
762
proc = SPI_processed;
763
/* elog(DEBUG1,"xpath_table: SPI returned %d rows",proc); */
764
tuptable = SPI_tuptable;
765
spi_tupdesc = tuptable->tupdesc;
767
/* Switch out of SPI context */
768
MemoryContextSwitchTo(oldcontext);
771
/* Check that SPI returned correct result. If you put a comma into one of
772
* the function parameters, this will catch it when the SPI query returns
776
if (spi_tupdesc->natts != 2)
778
ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
779
errmsg("Expression returning multiple columns is not valid in parameter list"),
780
errdetail("Expected two columns in SPI result, got %d", spi_tupdesc->natts)));
783
/* Setup the parser. Beware that this must happen in the same context as the
784
* cleanup - which means that any error from here on must do cleanup to
785
* ensure that the entity table doesn't get freed by being out of context.
789
/* For each row i.e. document returned from SPI */
790
for (i = 0; i < proc; i++)
796
xmlXPathContextPtr ctxt;
797
xmlXPathObjectPtr res;
801
xmlXPathCompExprPtr comppath;
803
/* Extract the row data as C Strings */
805
spi_tuple = tuptable->vals[i];
806
pkey = SPI_getvalue(spi_tuple, spi_tupdesc, 1);
807
xmldoc = SPI_getvalue(spi_tuple, spi_tupdesc, 2);
811
* Clear the values array, so that not-well-formed documents
812
* return NULL in all columns.
815
/* Note that this also means that spare columns will be NULL. */
816
for (j = 0; j < ret_tupdesc->natts; j++)
819
/* Insert primary key */
822
/* Parse the document */
823
doctree = xmlParseMemory(xmldoc, strlen(xmldoc));
826
{ /* not well-formed, so output all-NULL
829
ret_tuple = BuildTupleFromCStrings(attinmeta, values);
830
oldcontext = MemoryContextSwitchTo(per_query_ctx);
831
tuplestore_puttuple(tupstore, ret_tuple);
832
MemoryContextSwitchTo(oldcontext);
833
heap_freetuple(ret_tuple);
837
/* New loop here - we have to deal with nodeset results */
842
/* Now evaluate the set of xpaths. */
844
for (j = 0; j < numpaths; j++)
847
ctxt = xmlXPathNewContext(doctree);
848
ctxt->node = xmlDocGetRootElement(doctree);
849
xmlSetGenericErrorFunc(ctxt, pgxml_errorHandler);
851
/* compile the path */
852
comppath = xmlXPathCompile(xpaths[j]);
853
if (comppath == NULL)
858
elog_error(ERROR, "XPath Syntax Error", 1);
860
PG_RETURN_NULL(); /* Keep compiler happy */
863
/* Now evaluate the path expression. */
864
res = xmlXPathCompiledEval(comppath, ctxt);
865
xmlXPathFreeCompExpr(comppath);
872
/* We see if this nodeset has enough nodes */
873
if ((res->nodesetval != NULL) && (rownr < res->nodesetval->nodeNr))
876
xmlXPathCastNodeToString(res->nodesetval->nodeTab[rownr]);
885
resstr = xmlStrdup(res->stringval);
889
elog(NOTICE, "Unsupported XQuery result: %d", res->type);
890
resstr = xmlStrdup("<unsupported/>");
895
* Insert this into the appropriate column in the
898
values[j + 1] = resstr;
900
xmlXPathFreeContext(ctxt);
902
/* Now add the tuple to the output, if there is one. */
905
ret_tuple = BuildTupleFromCStrings(attinmeta, values);
906
oldcontext = MemoryContextSwitchTo(per_query_ctx);
907
tuplestore_puttuple(tupstore, ret_tuple);
908
MemoryContextSwitchTo(oldcontext);
909
heap_freetuple(ret_tuple);
914
} while (had_values);
925
/* Needed to flag completeness in 7.3.1. 7.4 defines it as a no-op. */
926
tuplestore_donestoring(tupstore);
930
rsinfo->setResult = tupstore;
933
* SFRM_Materialize mode expects us to return a NULL Datum. The actual
934
* tuples are in our tuplestore and passed back through
935
* rsinfo->setResult. rsinfo->setDesc is set to the tuple description
936
* that we actually used to build our tuples with, so the caller can
937
* verify we did what it was expecting.