From 3b5eeb92df6970611693e6428aee1bedbcd248a0 Mon Sep 17 00:00:00 2001 From: David Carlier Date: Thu, 4 Jun 2026 06:09:18 +0100 Subject: [PATCH] ext/dom: resolve in-scope prefixed QName values during document validation. Fix #22219 Modern DOM keeps namespace declarations off the tree (node->nsDef is NULL), so libxml's native validators cannot resolve a prefixed QName appearing in element or attribute content. Temporarily materialize them as nsDef entries around schema, RelaxNG and DTD validation, reusing the C14N relink machinery, then restore the tree. close GH-22224 --- NEWS | 4 ++ ext/dom/document.c | 42 ++++++++++- ext/dom/namespace_compat.c | 138 ++++++++++++++++++++++++++++++++++++ ext/dom/node.c | 140 ------------------------------------- ext/dom/php_dom.h | 7 ++ ext/dom/tests/gh22219.phpt | 54 ++++++++++++++ 6 files changed, 243 insertions(+), 142 deletions(-) create mode 100644 ext/dom/tests/gh22219.phpt diff --git a/NEWS b/NEWS index 800423779d11..ad5d3753b22c 100644 --- a/NEWS +++ b/NEWS @@ -10,6 +10,10 @@ PHP NEWS . Fix incorrect recurrence check of DatePeriod::createFromISO8601String(). (ndossche) +- DOM: + . Fix GH-22219 (Dom\XMLDocument::schemaValidate fails to resolve + xs:QName with prefix from imported schema). (David Carlier) + - GD: . Fixed bug GH-22121 (Double free in gdImageSetStyle() after overflow-triggered early return). (iliaal) diff --git a/ext/dom/document.c b/ext/dom/document.c index 371c9f846273..e4d285c990fe 100644 --- a/ext/dom/document.c +++ b/ext/dom/document.c @@ -1757,6 +1757,35 @@ static int dom_perform_xinclude(xmlDocPtr docp, dom_object *intern, zend_long fl return err; } +/* For modern DOM, namespace declarations are stored as attributes (node->nsDef + * is NULL), so libxml's native validators can't resolve prefixed QNames found in + * content (e.g. an xs:QName attribute value). Temporarily relink them, mirroring + * what C14N does in dom_canonicalization(). */ +typedef struct { + HashTable links; + bool active; +} dom_validate_ns_guard; + +static void dom_validate_ns_guard_begin(dom_validate_ns_guard *guard, xmlDocPtr docp) +{ + guard->active = php_dom_follow_spec_node((const xmlNode *) docp); + if (guard->active) { + zend_hash_init(&guard->links, 0, NULL, NULL, false); + xmlNodePtr root_element = xmlDocGetRootElement(docp); + if (root_element) { + dom_relink_ns_decls(&guard->links, root_element); + } + } +} + +static void dom_validate_ns_guard_end(dom_validate_ns_guard *guard) +{ + if (guard->active) { + dom_unlink_ns_decls(&guard->links); + zend_hash_destroy(&guard->links); + } +} + /* {{{ Substitutues xincludes in a DomDocument */ PHP_METHOD(DOMDocument, xinclude) { @@ -1832,8 +1861,11 @@ PHP_METHOD(DOMDocument, validate) cvp->userData = NULL; cvp->error = (xmlValidityErrorFunc) php_libxml_error_handler; cvp->warning = (xmlValidityErrorFunc) php_libxml_error_handler; - - if (xmlValidateDocument(cvp, docp)) { + dom_validate_ns_guard guard; + dom_validate_ns_guard_begin(&guard, docp); + int dtd_valid = xmlValidateDocument(cvp, docp); + dom_validate_ns_guard_end(&guard); + if (dtd_valid) { RETVAL_TRUE; } else { RETVAL_FALSE; @@ -1930,7 +1962,10 @@ static void dom_document_schema_validate(INTERNAL_FUNCTION_PARAMETERS, int type) PHP_LIBXML_SANITIZE_GLOBALS(validate); xmlSchemaSetValidOptions(vptr, valid_opts); xmlSchemaSetValidErrors(vptr, php_libxml_error_handler, php_libxml_error_handler, vptr); + dom_validate_ns_guard guard; + dom_validate_ns_guard_begin(&guard, docp); is_valid = xmlSchemaValidateDoc(vptr, docp); + dom_validate_ns_guard_end(&guard); xmlSchemaFree(sptr); xmlSchemaFreeValidCtxt(vptr); PHP_LIBXML_RESTORE_GLOBALS(validate); @@ -2028,7 +2063,10 @@ static void dom_document_relaxNG_validate(INTERNAL_FUNCTION_PARAMETERS, int type } xmlRelaxNGSetValidErrors(vptr, php_libxml_error_handler, php_libxml_error_handler, vptr); + dom_validate_ns_guard guard; + dom_validate_ns_guard_begin(&guard, docp); is_valid = xmlRelaxNGValidateDoc(vptr, docp); + dom_validate_ns_guard_end(&guard); xmlRelaxNGFree(sptr); xmlRelaxNGFreeValidCtxt(vptr); diff --git a/ext/dom/namespace_compat.c b/ext/dom/namespace_compat.c index 7a3bd68b0111..e1dcd73503ea 100644 --- a/ext/dom/namespace_compat.c +++ b/ext/dom/namespace_compat.c @@ -501,4 +501,142 @@ PHP_DOM_EXPORT void php_dom_in_scope_ns_destroy(php_dom_in_scope_ns *in_scope_ns } } +static xmlNsPtr dom_alloc_ns_decl(HashTable *links, xmlNodePtr node) +{ + xmlNsPtr ns = xmlMalloc(sizeof(*ns)); + if (!ns) { + return NULL; + } + + zval *zv = zend_hash_index_lookup(links, (zend_ulong) node); + if (Z_ISNULL_P(zv)) { + ZVAL_LONG(zv, 1); + } else { + Z_LVAL_P(zv)++; + } + + memset(ns, 0, sizeof(*ns)); + ns->type = XML_LOCAL_NAMESPACE; + ns->next = node->nsDef; + node->nsDef = ns; + + return ns; +} + +/* Mint a temporary nsDef entry so C14N finds namespaces that live on node->ns + * but have no matching xmlns attribute (typical for createElementNS). */ +static void dom_add_synthetic_ns_decl(HashTable *links, xmlNodePtr node, xmlNsPtr src_ns) +{ + xmlNsPtr ns = dom_alloc_ns_decl(links, node); + if (!ns) { + return; + } + + ns->href = xmlStrdup(src_ns->href); + ns->prefix = src_ns->prefix ? xmlStrdup(src_ns->prefix) : NULL; +} + +/* Same, but for attribute namespaces, which may collide by prefix with the + * element's own ns or with a sibling attribute's ns. */ +static void dom_add_synthetic_ns_decl_for_attr(HashTable *links, xmlNodePtr node, xmlNsPtr src_ns) +{ + for (xmlNsPtr existing = node->nsDef; existing; existing = existing->next) { + if (xmlStrEqual(existing->prefix, src_ns->prefix)) { + return; + } + } + + dom_add_synthetic_ns_decl(links, node, src_ns); +} + +static void dom_relink_ns_decls_element(HashTable *links, xmlNodePtr node) +{ + if (node->type == XML_ELEMENT_NODE) { + for (xmlAttrPtr attr = node->properties; attr; attr = attr->next) { + if (php_dom_ns_is_fast((const xmlNode *) attr, php_dom_ns_is_xmlns_magic_token)) { + xmlNsPtr ns = dom_alloc_ns_decl(links, node); + if (!ns) { + return; + } + + bool should_free; + xmlChar *attr_value = php_libxml_attr_value(attr, &should_free); + + ns->href = should_free ? attr_value : xmlStrdup(attr_value); + ns->prefix = attr->ns->prefix ? xmlStrdup(attr->name) : NULL; + ns->_private = attr; + if (attr->prev) { + attr->prev->next = attr->next; + } else { + node->properties = attr->next; + } + if (attr->next) { + attr->next->prev = attr->prev; + } + } + } + + /* The default namespace is handled separately from the other namespaces in C14N. + * The default namespace is explicitly looked up while the other namespaces are + * deduplicated and compared to a list of visible namespaces. */ + if (node->ns && !node->ns->prefix) { + /* Workaround for the behaviour where the xmlSearchNs() call inside c14n.c + * can return the current namespace. */ + zend_hash_index_add_new_ptr(links, (zend_ulong) node | 1, node->ns); + node->ns = xmlSearchNs(node->doc, node, NULL); + } else if (node->ns) { + dom_add_synthetic_ns_decl(links, node, node->ns); + } + + for (xmlAttrPtr attr = node->properties; attr; attr = attr->next) { + if (attr->ns && !php_dom_ns_is_fast((const xmlNode *) attr, php_dom_ns_is_xmlns_magic_token)) { + dom_add_synthetic_ns_decl_for_attr(links, node, attr->ns); + } + } + } +} + +void dom_relink_ns_decls(HashTable *links, xmlNodePtr root) +{ + dom_relink_ns_decls_element(links, root); + + xmlNodePtr base = root; + xmlNodePtr node = base->children; + while (node != NULL) { + dom_relink_ns_decls_element(links, node); + node = php_dom_next_in_tree_order(node, base); + } +} + +void dom_unlink_ns_decls(HashTable *links) +{ + ZEND_HASH_MAP_FOREACH_NUM_KEY_VAL(links, zend_ulong h, zval *data) { + if (h & 1) { + xmlNodePtr node = (xmlNodePtr) (h ^ 1); + node->ns = Z_PTR_P(data); + } else { + xmlNodePtr node = (xmlNodePtr) h; + while (Z_LVAL_P(data)-- > 0) { + xmlNsPtr ns = node->nsDef; + node->nsDef = ns->next; + + xmlAttrPtr attr = ns->_private; + if (attr) { + if (attr->prev) { + attr->prev->next = attr; + } else { + node->properties = attr; + } + if (attr->next) { + attr->next->prev = attr; + } + } + + xmlFreeNs(ns); + } + } + } ZEND_HASH_FOREACH_END(); +} + + #endif /* HAVE_LIBXML && HAVE_DOM */ diff --git a/ext/dom/node.c b/ext/dom/node.c index 505f6ee452c3..df806bddfae7 100644 --- a/ext/dom/node.c +++ b/ext/dom/node.c @@ -2103,146 +2103,6 @@ PHP_METHOD(DOMNode, lookupNamespaceURI) } /* }}} end dom_node_lookup_namespace_uri */ -/* Allocate, track and prepend a temporary nsDef entry for C14N. - * Returns the new xmlNsPtr for the caller to fill in href/prefix/_private, - * or NULL on allocation failure. */ -static xmlNsPtr dom_alloc_ns_decl(HashTable *links, xmlNodePtr node) -{ - xmlNsPtr ns = xmlMalloc(sizeof(*ns)); - if (!ns) { - return NULL; - } - - zval *zv = zend_hash_index_lookup(links, (zend_ulong) node); - if (Z_ISNULL_P(zv)) { - ZVAL_LONG(zv, 1); - } else { - Z_LVAL_P(zv)++; - } - - memset(ns, 0, sizeof(*ns)); - ns->type = XML_LOCAL_NAMESPACE; - ns->next = node->nsDef; - node->nsDef = ns; - - return ns; -} - -/* Mint a temporary nsDef entry so C14N finds namespaces that live on node->ns - * but have no matching xmlns attribute (typical for createElementNS). */ -static void dom_add_synthetic_ns_decl(HashTable *links, xmlNodePtr node, xmlNsPtr src_ns) -{ - xmlNsPtr ns = dom_alloc_ns_decl(links, node); - if (!ns) { - return; - } - - ns->href = xmlStrdup(src_ns->href); - ns->prefix = src_ns->prefix ? xmlStrdup(src_ns->prefix) : NULL; -} - -/* Same, but for attribute namespaces, which may collide by prefix with the - * element's own ns or with a sibling attribute's ns. */ -static void dom_add_synthetic_ns_decl_for_attr(HashTable *links, xmlNodePtr node, xmlNsPtr src_ns) -{ - for (xmlNsPtr existing = node->nsDef; existing; existing = existing->next) { - if (xmlStrEqual(existing->prefix, src_ns->prefix)) { - return; - } - } - - dom_add_synthetic_ns_decl(links, node, src_ns); -} - -static void dom_relink_ns_decls_element(HashTable *links, xmlNodePtr node) -{ - if (node->type == XML_ELEMENT_NODE) { - for (xmlAttrPtr attr = node->properties; attr; attr = attr->next) { - if (php_dom_ns_is_fast((const xmlNode *) attr, php_dom_ns_is_xmlns_magic_token)) { - xmlNsPtr ns = dom_alloc_ns_decl(links, node); - if (!ns) { - return; - } - - bool should_free; - xmlChar *attr_value = php_libxml_attr_value(attr, &should_free); - - ns->href = should_free ? attr_value : xmlStrdup(attr_value); - ns->prefix = attr->ns->prefix ? xmlStrdup(attr->name) : NULL; - ns->_private = attr; - if (attr->prev) { - attr->prev->next = attr->next; - } else { - node->properties = attr->next; - } - if (attr->next) { - attr->next->prev = attr->prev; - } - } - } - - /* The default namespace is handled separately from the other namespaces in C14N. - * The default namespace is explicitly looked up while the other namespaces are - * deduplicated and compared to a list of visible namespaces. */ - if (node->ns && !node->ns->prefix) { - /* Workaround for the behaviour where the xmlSearchNs() call inside c14n.c - * can return the current namespace. */ - zend_hash_index_add_new_ptr(links, (zend_ulong) node | 1, node->ns); - node->ns = xmlSearchNs(node->doc, node, NULL); - } else if (node->ns) { - dom_add_synthetic_ns_decl(links, node, node->ns); - } - - for (xmlAttrPtr attr = node->properties; attr; attr = attr->next) { - if (attr->ns && !php_dom_ns_is_fast((const xmlNode *) attr, php_dom_ns_is_xmlns_magic_token)) { - dom_add_synthetic_ns_decl_for_attr(links, node, attr->ns); - } - } - } -} - -static void dom_relink_ns_decls(HashTable *links, xmlNodePtr root) -{ - dom_relink_ns_decls_element(links, root); - - xmlNodePtr base = root; - xmlNodePtr node = base->children; - while (node != NULL) { - dom_relink_ns_decls_element(links, node); - node = php_dom_next_in_tree_order(node, base); - } -} - -static void dom_unlink_ns_decls(HashTable *links) -{ - ZEND_HASH_MAP_FOREACH_NUM_KEY_VAL(links, zend_ulong h, zval *data) { - if (h & 1) { - xmlNodePtr node = (xmlNodePtr) (h ^ 1); - node->ns = Z_PTR_P(data); - } else { - xmlNodePtr node = (xmlNodePtr) h; - while (Z_LVAL_P(data)-- > 0) { - xmlNsPtr ns = node->nsDef; - node->nsDef = ns->next; - - xmlAttrPtr attr = ns->_private; - if (attr) { - if (attr->prev) { - attr->prev->next = attr; - } else { - node->properties = attr; - } - if (attr->next) { - attr->next->prev = attr; - } - } - - xmlFreeNs(ns); - } - } - } ZEND_HASH_FOREACH_END(); -} - static int dom_canonicalize_node_parent_lookup_cb(void *user_data, xmlNodePtr node, xmlNodePtr parent) { xmlNodePtr root = user_data; diff --git a/ext/dom/php_dom.h b/ext/dom/php_dom.h index 22c738b20e0f..13f49879bb38 100644 --- a/ext/dom/php_dom.h +++ b/ext/dom/php_dom.h @@ -187,6 +187,13 @@ bool php_dom_create_nullable_object(xmlNodePtr obj, zval *return_value, dom_obje xmlNodePtr dom_clone_node(php_dom_libxml_ns_mapper *ns_mapper, xmlNodePtr node, xmlDocPtr doc, bool recursive); void dom_set_document_ref_pointers(xmlNodePtr node, php_libxml_ref_obj *document); void dom_set_document_ref_pointers_attr(xmlAttrPtr attr, php_libxml_ref_obj *document); + +/* Temporarily materialize namespace declarations as nsDef entries on the tree so + * that libxml's native validators/canonicalizers can resolve prefixed QNames that + * appear in element/attribute *content*. Modern DOM keeps declarations off the + * tree (node->nsDef == NULL), which xmlSearchNs() cannot follow. Internal only. */ +void dom_relink_ns_decls(HashTable *links, xmlNodePtr root); +void dom_unlink_ns_decls(HashTable *links); zval *dom_element_class_list_zval(dom_object *obj); typedef enum { diff --git a/ext/dom/tests/gh22219.phpt b/ext/dom/tests/gh22219.phpt new file mode 100644 index 000000000000..637a47039c35 --- /dev/null +++ b/ext/dom/tests/gh22219.phpt @@ -0,0 +1,54 @@ +--TEST-- +GH-22219 (Dom\XMLDocument::schemaValidate fails to resolve xs:QName value from an in-scope prefix) +--EXTENSIONS-- +dom +--SKIPIF-- + +--FILE-- + + + + +XML; + +// The 'ref' prefix is declared on but only used inside the xs:QName +// attribute value, never as an element or attribute namespace. +$xsd = << + + + + + + + + + + + + + +XSD; + +libxml_use_internal_errors(true); + +$modern = Dom\XMLDocument::createFromString($xml, LIBXML_NSCLEAN); +var_dump($modern->schemaValidateSource($xsd)); + +$legacy = new DOMDocument(); +$legacy->loadXML($xml, LIBXML_NSCLEAN); +var_dump($legacy->schemaValidateSource($xsd)); + +foreach (libxml_get_errors() as $error) { + echo trim($error->message), PHP_EOL; +} +?> +--EXPECT-- +bool(true) +bool(true)