diff --git a/ext/dom/document.c b/ext/dom/document.c index b0274c676354..9c6f12143ad1 100644 --- a/ext/dom/document.c +++ b/ext/dom/document.c @@ -1749,6 +1749,35 @@ static int dom_perform_xinclude(xmlDocPtr docp, dom_object *intern, zend_long fl return err; } +/* For modern DOM, namespace declarations are stored as attributes (node->nsDef + * is NULL), so libxml's native validators can't resolve prefixed QNames found in + * content (e.g. an xs:QName attribute value). Temporarily relink them, mirroring + * what C14N does in dom_canonicalization(). */ +typedef struct { + HashTable links; + bool active; +} dom_validate_ns_guard; + +static void dom_validate_ns_guard_begin(dom_validate_ns_guard *guard, xmlDocPtr docp) +{ + guard->active = php_dom_follow_spec_node((const xmlNode *) docp); + if (guard->active) { + zend_hash_init(&guard->links, 0, NULL, NULL, false); + xmlNodePtr root_element = xmlDocGetRootElement(docp); + if (root_element) { + dom_relink_ns_decls(&guard->links, root_element); + } + } +} + +static void dom_validate_ns_guard_end(dom_validate_ns_guard *guard) +{ + if (guard->active) { + dom_unlink_ns_decls(&guard->links); + zend_hash_destroy(&guard->links); + } +} + /* {{{ Substitutues xincludes in a DomDocument */ PHP_METHOD(DOMDocument, xinclude) { @@ -1822,8 +1851,11 @@ PHP_METHOD(DOMDocument, validate) cvp->userData = NULL; cvp->error = (xmlValidityErrorFunc) php_libxml_error_handler; cvp->warning = (xmlValidityErrorFunc) php_libxml_error_handler; - - if (xmlValidateDocument(cvp, docp)) { + dom_validate_ns_guard guard; + dom_validate_ns_guard_begin(&guard, docp); + int dtd_valid = xmlValidateDocument(cvp, docp); + dom_validate_ns_guard_end(&guard); + if (dtd_valid) { RETVAL_TRUE; } else { RETVAL_FALSE; @@ -1920,7 +1952,10 @@ static void dom_document_schema_validate(INTERNAL_FUNCTION_PARAMETERS, int type) PHP_LIBXML_SANITIZE_GLOBALS(validate); xmlSchemaSetValidOptions(vptr, valid_opts); xmlSchemaSetValidErrors(vptr, php_libxml_error_handler, php_libxml_error_handler, vptr); + dom_validate_ns_guard guard; + dom_validate_ns_guard_begin(&guard, docp); is_valid = xmlSchemaValidateDoc(vptr, docp); + dom_validate_ns_guard_end(&guard); xmlSchemaFree(sptr); xmlSchemaFreeValidCtxt(vptr); PHP_LIBXML_RESTORE_GLOBALS(validate); @@ -2018,7 +2053,10 @@ static void dom_document_relaxNG_validate(INTERNAL_FUNCTION_PARAMETERS, int type } xmlRelaxNGSetValidErrors(vptr, php_libxml_error_handler, php_libxml_error_handler, vptr); + dom_validate_ns_guard guard; + dom_validate_ns_guard_begin(&guard, docp); is_valid = xmlRelaxNGValidateDoc(vptr, docp); + dom_validate_ns_guard_end(&guard); xmlRelaxNGFree(sptr); xmlRelaxNGFreeValidCtxt(vptr); diff --git a/ext/dom/namespace_compat.c b/ext/dom/namespace_compat.c index 377abbd01dc7..420759b369ef 100644 --- a/ext/dom/namespace_compat.c +++ b/ext/dom/namespace_compat.c @@ -492,4 +492,142 @@ PHP_DOM_EXPORT void php_dom_in_scope_ns_destroy(php_dom_in_scope_ns *in_scope_ns } } +static xmlNsPtr dom_alloc_ns_decl(HashTable *links, xmlNodePtr node) +{ + xmlNsPtr ns = xmlMalloc(sizeof(*ns)); + if (!ns) { + return NULL; + } + + zval *zv = zend_hash_index_lookup(links, (zend_ulong) node); + if (Z_ISNULL_P(zv)) { + ZVAL_LONG(zv, 1); + } else { + Z_LVAL_P(zv)++; + } + + memset(ns, 0, sizeof(*ns)); + ns->type = XML_LOCAL_NAMESPACE; + ns->next = node->nsDef; + node->nsDef = ns; + + return ns; +} + +/* Mint a temporary nsDef entry so C14N finds namespaces that live on node->ns + * but have no matching xmlns attribute (typical for createElementNS). */ +static void dom_add_synthetic_ns_decl(HashTable *links, xmlNodePtr node, xmlNsPtr src_ns) +{ + xmlNsPtr ns = dom_alloc_ns_decl(links, node); + if (!ns) { + return; + } + + ns->href = xmlStrdup(src_ns->href); + ns->prefix = src_ns->prefix ? xmlStrdup(src_ns->prefix) : NULL; +} + +/* Same, but for attribute namespaces, which may collide by prefix with the + * element's own ns or with a sibling attribute's ns. */ +static void dom_add_synthetic_ns_decl_for_attr(HashTable *links, xmlNodePtr node, xmlNsPtr src_ns) +{ + for (xmlNsPtr existing = node->nsDef; existing; existing = existing->next) { + if (xmlStrEqual(existing->prefix, src_ns->prefix)) { + return; + } + } + + dom_add_synthetic_ns_decl(links, node, src_ns); +} + +static void dom_relink_ns_decls_element(HashTable *links, xmlNodePtr node) +{ + if (node->type == XML_ELEMENT_NODE) { + for (xmlAttrPtr attr = node->properties; attr; attr = attr->next) { + if (php_dom_ns_is_fast((const xmlNode *) attr, php_dom_ns_is_xmlns_magic_token)) { + xmlNsPtr ns = dom_alloc_ns_decl(links, node); + if (!ns) { + return; + } + + bool should_free; + xmlChar *attr_value = php_libxml_attr_value(attr, &should_free); + + ns->href = should_free ? attr_value : xmlStrdup(attr_value); + ns->prefix = attr->ns->prefix ? xmlStrdup(attr->name) : NULL; + ns->_private = attr; + if (attr->prev) { + attr->prev->next = attr->next; + } else { + node->properties = attr->next; + } + if (attr->next) { + attr->next->prev = attr->prev; + } + } + } + + /* The default namespace is handled separately from the other namespaces in C14N. + * The default namespace is explicitly looked up while the other namespaces are + * deduplicated and compared to a list of visible namespaces. */ + if (node->ns && !node->ns->prefix) { + /* Workaround for the behaviour where the xmlSearchNs() call inside c14n.c + * can return the current namespace. */ + zend_hash_index_add_new_ptr(links, (zend_ulong) node | 1, node->ns); + node->ns = xmlSearchNs(node->doc, node, NULL); + } else if (node->ns) { + dom_add_synthetic_ns_decl(links, node, node->ns); + } + + for (xmlAttrPtr attr = node->properties; attr; attr = attr->next) { + if (attr->ns && !php_dom_ns_is_fast((const xmlNode *) attr, php_dom_ns_is_xmlns_magic_token)) { + dom_add_synthetic_ns_decl_for_attr(links, node, attr->ns); + } + } + } +} + +void dom_relink_ns_decls(HashTable *links, xmlNodePtr root) +{ + dom_relink_ns_decls_element(links, root); + + xmlNodePtr base = root; + xmlNodePtr node = base->children; + while (node != NULL) { + dom_relink_ns_decls_element(links, node); + node = php_dom_next_in_tree_order(node, base); + } +} + +void dom_unlink_ns_decls(HashTable *links) +{ + ZEND_HASH_MAP_FOREACH_NUM_KEY_VAL(links, zend_ulong h, zval *data) { + if (h & 1) { + xmlNodePtr node = (xmlNodePtr) (h ^ 1); + node->ns = Z_PTR_P(data); + } else { + xmlNodePtr node = (xmlNodePtr) h; + while (Z_LVAL_P(data)-- > 0) { + xmlNsPtr ns = node->nsDef; + node->nsDef = ns->next; + + xmlAttrPtr attr = ns->_private; + if (attr) { + if (attr->prev) { + attr->prev->next = attr; + } else { + node->properties = attr; + } + if (attr->next) { + attr->next->prev = attr; + } + } + + xmlFreeNs(ns); + } + } + } ZEND_HASH_FOREACH_END(); +} + + #endif /* HAVE_LIBXML && HAVE_DOM */ diff --git a/ext/dom/node.c b/ext/dom/node.c index ad30a3fa67e1..4cfc6c5af0d2 100644 --- a/ext/dom/node.c +++ b/ext/dom/node.c @@ -2101,146 +2101,6 @@ PHP_METHOD(DOMNode, lookupNamespaceURI) } /* }}} end dom_node_lookup_namespace_uri */ -/* Allocate, track and prepend a temporary nsDef entry for C14N. - * Returns the new xmlNsPtr for the caller to fill in href/prefix/_private, - * or NULL on allocation failure. */ -static xmlNsPtr dom_alloc_ns_decl(HashTable *links, xmlNodePtr node) -{ - xmlNsPtr ns = xmlMalloc(sizeof(*ns)); - if (!ns) { - return NULL; - } - - zval *zv = zend_hash_index_lookup(links, (zend_ulong) node); - if (Z_ISNULL_P(zv)) { - ZVAL_LONG(zv, 1); - } else { - Z_LVAL_P(zv)++; - } - - memset(ns, 0, sizeof(*ns)); - ns->type = XML_LOCAL_NAMESPACE; - ns->next = node->nsDef; - node->nsDef = ns; - - return ns; -} - -/* Mint a temporary nsDef entry so C14N finds namespaces that live on node->ns - * but have no matching xmlns attribute (typical for createElementNS). */ -static void dom_add_synthetic_ns_decl(HashTable *links, xmlNodePtr node, xmlNsPtr src_ns) -{ - xmlNsPtr ns = dom_alloc_ns_decl(links, node); - if (!ns) { - return; - } - - ns->href = xmlStrdup(src_ns->href); - ns->prefix = src_ns->prefix ? xmlStrdup(src_ns->prefix) : NULL; -} - -/* Same, but for attribute namespaces, which may collide by prefix with the - * element's own ns or with a sibling attribute's ns. */ -static void dom_add_synthetic_ns_decl_for_attr(HashTable *links, xmlNodePtr node, xmlNsPtr src_ns) -{ - for (xmlNsPtr existing = node->nsDef; existing; existing = existing->next) { - if (xmlStrEqual(existing->prefix, src_ns->prefix)) { - return; - } - } - - dom_add_synthetic_ns_decl(links, node, src_ns); -} - -static void dom_relink_ns_decls_element(HashTable *links, xmlNodePtr node) -{ - if (node->type == XML_ELEMENT_NODE) { - for (xmlAttrPtr attr = node->properties; attr; attr = attr->next) { - if (php_dom_ns_is_fast((const xmlNode *) attr, php_dom_ns_is_xmlns_magic_token)) { - xmlNsPtr ns = dom_alloc_ns_decl(links, node); - if (!ns) { - return; - } - - bool should_free; - xmlChar *attr_value = php_libxml_attr_value(attr, &should_free); - - ns->href = should_free ? attr_value : xmlStrdup(attr_value); - ns->prefix = attr->ns->prefix ? xmlStrdup(attr->name) : NULL; - ns->_private = attr; - if (attr->prev) { - attr->prev->next = attr->next; - } else { - node->properties = attr->next; - } - if (attr->next) { - attr->next->prev = attr->prev; - } - } - } - - /* The default namespace is handled separately from the other namespaces in C14N. - * The default namespace is explicitly looked up while the other namespaces are - * deduplicated and compared to a list of visible namespaces. */ - if (node->ns && !node->ns->prefix) { - /* Workaround for the behaviour where the xmlSearchNs() call inside c14n.c - * can return the current namespace. */ - zend_hash_index_add_new_ptr(links, (zend_ulong) node | 1, node->ns); - node->ns = xmlSearchNs(node->doc, node, NULL); - } else if (node->ns) { - dom_add_synthetic_ns_decl(links, node, node->ns); - } - - for (xmlAttrPtr attr = node->properties; attr; attr = attr->next) { - if (attr->ns && !php_dom_ns_is_fast((const xmlNode *) attr, php_dom_ns_is_xmlns_magic_token)) { - dom_add_synthetic_ns_decl_for_attr(links, node, attr->ns); - } - } - } -} - -static void dom_relink_ns_decls(HashTable *links, xmlNodePtr root) -{ - dom_relink_ns_decls_element(links, root); - - xmlNodePtr base = root; - xmlNodePtr node = base->children; - while (node != NULL) { - dom_relink_ns_decls_element(links, node); - node = php_dom_next_in_tree_order(node, base); - } -} - -static void dom_unlink_ns_decls(HashTable *links) -{ - ZEND_HASH_MAP_FOREACH_NUM_KEY_VAL(links, zend_ulong h, zval *data) { - if (h & 1) { - xmlNodePtr node = (xmlNodePtr) (h ^ 1); - node->ns = Z_PTR_P(data); - } else { - xmlNodePtr node = (xmlNodePtr) h; - while (Z_LVAL_P(data)-- > 0) { - xmlNsPtr ns = node->nsDef; - node->nsDef = ns->next; - - xmlAttrPtr attr = ns->_private; - if (attr) { - if (attr->prev) { - attr->prev->next = attr; - } else { - node->properties = attr; - } - if (attr->next) { - attr->next->prev = attr; - } - } - - xmlFreeNs(ns); - } - } - } ZEND_HASH_FOREACH_END(); -} - static int dom_canonicalize_node_parent_lookup_cb(void *user_data, xmlNodePtr node, xmlNodePtr parent) { xmlNodePtr root = user_data; diff --git a/ext/dom/php_dom.h b/ext/dom/php_dom.h index 2cb306f1f5d1..1b19aeee3baa 100644 --- a/ext/dom/php_dom.h +++ b/ext/dom/php_dom.h @@ -161,6 +161,12 @@ void dom_set_document_ref_pointers_attr(xmlAttrPtr attr, php_libxml_ref_obj *doc /* Prop getters by offset */ zval *dom_get_prop_checked_offset(dom_object *obj, uint32_t offset, const char *name); +/* Temporarily materialize namespace declarations as nsDef entries on the tree so + * that libxml's native validators/canonicalizers can resolve prefixed QNames that + * appear in element/attribute *content*. Modern DOM keeps declarations off the + * tree (node->nsDef == NULL), which xmlSearchNs() cannot follow. Internal only. */ +void dom_relink_ns_decls(HashTable *links, xmlNodePtr root); +void dom_unlink_ns_decls(HashTable *links); zval *dom_element_class_list_zval(dom_object *obj); zval *dom_parent_node_children(dom_object *obj); diff --git a/ext/dom/tests/gh22219.phpt b/ext/dom/tests/gh22219.phpt new file mode 100644 index 000000000000..637a47039c35 --- /dev/null +++ b/ext/dom/tests/gh22219.phpt @@ -0,0 +1,54 @@ +--TEST-- +GH-22219 (Dom\XMLDocument::schemaValidate fails to resolve xs:QName value from an in-scope prefix) +--EXTENSIONS-- +dom +--SKIPIF-- + +--FILE-- + + + + +XML; + +// The 'ref' prefix is declared on but only used inside the xs:QName +// attribute value, never as an element or attribute namespace. +$xsd = << + + + + + + + + + + + + + +XSD; + +libxml_use_internal_errors(true); + +$modern = Dom\XMLDocument::createFromString($xml, LIBXML_NSCLEAN); +var_dump($modern->schemaValidateSource($xsd)); + +$legacy = new DOMDocument(); +$legacy->loadXML($xml, LIBXML_NSCLEAN); +var_dump($legacy->schemaValidateSource($xsd)); + +foreach (libxml_get_errors() as $error) { + echo trim($error->message), PHP_EOL; +} +?> +--EXPECT-- +bool(true) +bool(true)