Avoid regression in the size of XML input that we will accept.

This mostly reverts commit 6082b3d5d, "Use xmlParseInNodeContext
not xmlParseBalancedChunkMemory".  It turns out that
xmlParseInNodeContext will reject text chunks exceeding 10MB, while
(in most libxml2 versions) xmlParseBalancedChunkMemory will not.
The bleeding-edge libxml2 bug that we needed to work around a year
ago is presumably no longer a factor, and the argument that
xmlParseBalancedChunkMemory is semi-deprecated is not enough to
justify a functionality regression.  Hence, go back to doing it
the old way.

Reported-by: Michael Paquier <michael@paquier.xyz>
Author: Michael Paquier <michael@paquier.xyz>
Co-authored-by: Erik Wienhold <ewie@ewie.name>
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://postgr.es/m/aIGknLuc8b8ega2X@paquier.xyz
Backpatch-through: 13
REL_13_STABLE
Tom Lane 2 months ago
parent f32a471612
commit 589d6e6408
  1. 57
      src/backend/utils/adt/xml.c

@ -1529,6 +1529,7 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
PgXmlErrorContext *xmlerrcxt; PgXmlErrorContext *xmlerrcxt;
volatile xmlParserCtxtPtr ctxt = NULL; volatile xmlParserCtxtPtr ctxt = NULL;
volatile xmlDocPtr doc = NULL; volatile xmlDocPtr doc = NULL;
volatile int save_keep_blanks = -1;
len = VARSIZE_ANY_EXHDR(data); /* will be useful later */ len = VARSIZE_ANY_EXHDR(data); /* will be useful later */
string = xml_text2xmlChar(data); string = xml_text2xmlChar(data);
@ -1545,7 +1546,6 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
PG_TRY(); PG_TRY();
{ {
bool parse_as_document = false; bool parse_as_document = false;
int options;
int res_code; int res_code;
size_t count = 0; size_t count = 0;
xmlChar *version = NULL; xmlChar *version = NULL;
@ -1571,25 +1571,28 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
parse_as_document = true; parse_as_document = true;
} }
/*
* Select parse options.
*
* Note that here we try to apply DTD defaults (XML_PARSE_DTDATTR)
* according to SQL/XML:2008 GR 10.16.7.d: 'Default values defined by
* internal DTD are applied'. As for external DTDs, we try to support
* them too (see SQL/XML:2008 GR 10.16.7.e), but that doesn't really
* happen because xmlPgEntityLoader prevents it.
*/
options = XML_PARSE_NOENT | XML_PARSE_DTDATTR
| (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS);
if (parse_as_document) if (parse_as_document)
{ {
int options;
/* set up parser context used by xmlCtxtReadDoc */
ctxt = xmlNewParserCtxt(); ctxt = xmlNewParserCtxt();
if (ctxt == NULL || xmlerrcxt->err_occurred) if (ctxt == NULL || xmlerrcxt->err_occurred)
xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
"could not allocate parser context"); "could not allocate parser context");
/*
* Select parse options.
*
* Note that here we try to apply DTD defaults (XML_PARSE_DTDATTR)
* according to SQL/XML:2008 GR 10.16.7.d: 'Default values defined
* by internal DTD are applied'. As for external DTDs, we try to
* support them too (see SQL/XML:2008 GR 10.16.7.e), but that
* doesn't really happen because xmlPgEntityLoader prevents it.
*/
options = XML_PARSE_NOENT | XML_PARSE_DTDATTR
| (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS);
doc = xmlCtxtReadDoc(ctxt, utf8string, doc = xmlCtxtReadDoc(ctxt, utf8string,
NULL, /* no URL */ NULL, /* no URL */
"UTF-8", "UTF-8",
@ -1608,36 +1611,27 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
} }
else else
{ {
xmlNodePtr root; /* set up document that xmlParseBalancedChunkMemory will add to */
/* set up document with empty root node to be the context node */
doc = xmlNewDoc(version); doc = xmlNewDoc(version);
Assert(doc->encoding == NULL); Assert(doc->encoding == NULL);
doc->encoding = xmlStrdup((const xmlChar *) "UTF-8"); doc->encoding = xmlStrdup((const xmlChar *) "UTF-8");
doc->standalone = standalone; doc->standalone = standalone;
root = xmlNewNode(NULL, (const xmlChar *) "content-root"); /* set parse options --- have to do this the ugly way */
if (root == NULL || xmlerrcxt->err_occurred) save_keep_blanks = xmlKeepBlanksDefault(preserve_whitespace ? 1 : 0);
xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
"could not allocate xml node");
/* This attaches root to doc, so we need not free it separately. */
xmlDocSetRootElement(doc, root);
/* allow empty content */ /* allow empty content */
if (*(utf8string + count)) if (*(utf8string + count))
{ {
xmlNodePtr node_list = NULL; xmlNodePtr node_list = NULL;
xmlParserErrors res;
res = xmlParseInNodeContext(root, res_code = xmlParseBalancedChunkMemory(doc, NULL, NULL, 0,
(char *) utf8string + count, utf8string + count,
strlen((char *) utf8string + count), &node_list);
options,
&node_list);
xmlFreeNodeList(node_list); xmlFreeNodeList(node_list);
if (res != XML_ERR_OK || xmlerrcxt->err_occurred) if (res_code != 0 || xmlerrcxt->err_occurred)
xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_CONTENT, xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_CONTENT,
"invalid XML content"); "invalid XML content");
} }
@ -1645,6 +1639,8 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
} }
PG_CATCH(); PG_CATCH();
{ {
if (save_keep_blanks != -1)
xmlKeepBlanksDefault(save_keep_blanks);
if (doc != NULL) if (doc != NULL)
xmlFreeDoc(doc); xmlFreeDoc(doc);
if (ctxt != NULL) if (ctxt != NULL)
@ -1656,6 +1652,9 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
} }
PG_END_TRY(); PG_END_TRY();
if (save_keep_blanks != -1)
xmlKeepBlanksDefault(save_keep_blanks);
if (ctxt != NULL) if (ctxt != NULL)
xmlFreeParserCtxt(ctxt); xmlFreeParserCtxt(ctxt);

Loading…
Cancel
Save