From 86e81afd00229e8c72b97189645f777b7ae22326 Mon Sep 17 00:00:00 2001 From: Kacper Bojakowski Date: Mon, 27 Apr 2026 23:01:05 +0200 Subject: [PATCH 01/59] Test --- .gitignore | 3 + conf.py | 8 +- plugins/ros_related_packages.py | 189 ++++++++++++ ...ingle-Package-Define-And-Use-Interface.rst | 257 ++++++++++++++++ source/_static/custom.css | 33 ++ source/_static/related_packages.js | 285 ++++++++++++++++++ 6 files changed, 774 insertions(+), 1 deletion(-) create mode 100644 plugins/ros_related_packages.py create mode 100644 source/How-To-Guides/Single-Package-Define-And-Use-Interface.rst create mode 100644 source/_static/related_packages.js diff --git a/.gitignore b/.gitignore index 652f1b03313..9a7719c1dbd 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,6 @@ _build/ __pycache__ ros2doc/ .DS_Store + +# Downloaded at HTML build time for browser-side package lists (large). +source/_static/rosdistro_cache/*.yaml.gz diff --git a/conf.py b/conf.py index d6fab26cd14..562eb01c0ef 100644 --- a/conf.py +++ b/conf.py @@ -89,6 +89,7 @@ 'sphinx_adopters', 'sphinxcontrib.googleanalytics', 'sphinxcontrib.mermaid', + 'ros_related_packages', ] # Intersphinx mapping @@ -180,7 +181,12 @@ # Relative to html_static_path html_css_files = ['custom.css', 'adopters.css'] -html_js_files = ['adopters.js'] +html_js_files = [ + ('https://cdn.jsdelivr.net/npm/pako@2.1.0/dist/pako.min.js', {'defer': ''}), + ('https://cdn.jsdelivr.net/npm/js-yaml@4.1.0/dist/js-yaml.min.js', {'defer': ''}), + 'adopters.js', + 'related_packages.js', +] # -- Options for HTMLHelp output ------------------------------------------ diff --git a/plugins/ros_related_packages.py b/plugins/ros_related_packages.py new file mode 100644 index 00000000000..4140aa6b2bf --- /dev/null +++ b/plugins/ros_related_packages.py @@ -0,0 +1,189 @@ +# Copyright 2026 Open Robotics and contributors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Sphinx directive for runtime ROS distro package lists (filtered in the browser).""" + +from __future__ import annotations + +import html +import os +import urllib.error +import urllib.request +from typing import List + +from docutils import nodes +from docutils.parsers.rst import directives +from sphinx.util import logging as sphinx_logging +from sphinx.util.docutils import SphinxDirective + +LOGGER = sphinx_logging.getLogger(__name__) + +ROSDISTRO_CACHE_TEMPLATE = ( + 'https://repo.ros2.org/rosdistro_cache/{distro}-cache.yaml.gz' +) + + +def _normalize_field_name(raw: str) -> str: + """Normalize a docinfo field label for comparison (e.g. ``Build-type`` → ``build-type``).""" + name = raw.strip().lower().rstrip(':') + return name.replace(' ', '-') + + +def _field_value_from_doctree(document: nodes.document, wanted: str) -> str | None: + """Return the body of the first matching docinfo/rst field in the document.""" + wanted_norm = _normalize_field_name(wanted) + for field in document.traverse(nodes.field): + children = getattr(field, 'children', ()) or () + if len(children) < 2: + continue + label = children[0].astext() + if _normalize_field_name(label) != wanted_norm: + continue + return children[1].astext().strip() + return None + + +def _meta_get(metadata: dict, *names: str) -> str | None: + """Look up document metadata using several possible keys (Sphinx/docutils variants).""" + for name in names: + for key, val in metadata.items(): + if not val: + continue + if _normalize_field_name(str(key)) == _normalize_field_name(name): + return str(val).strip() + return None + + +def _meta_content_from_docutils(document: nodes.document, meta_name: str) -> str | None: + """Read ``docutils.nodes.meta`` emitted by ``.. meta::`` (typically ```` HTML meta tags). + + Hyphenated names work in rST as ``.. meta::`` fields, e.g. ``:build-type: ament_cmake``. + """ + for node in document.traverse(nodes.meta): + if node.get('name') != meta_name: + continue + raw = node.get('content') + if raw: + return str(raw).strip() + return None + + +def _positive_int_option(argument: str) -> int: + """Parse a positive integer option for the directive.""" + if argument is None: + raise ValueError('option requires a number') + value = int(argument) + if value < 1: + raise ValueError('must be positive') + return value + + +class RosRelatedPackagesDirective(SphinxDirective): + """Emit a placeholder ``div`` filled at runtime by ``related_packages.js``. + + Filter criteria (currently ``build-type``) should be supplied as **HTML meta tags** + via Docutils ``.. meta::`` so values appear in ```` and not in the page body:: + + .. meta:: + :build-type: ament_cmake + + Fallbacks: Sphinx ``env.metadata`` / a visible rST field list ``:build-type:``. + Optional ``:build-type:`` on this directive overrides document metadata. + """ + + has_content = False + required_arguments = 0 + optional_arguments = 0 + option_spec = { + 'build-type': directives.unchanged, + 'max': _positive_int_option, + } + + def run(self) -> List[nodes.Node]: + build_type_opt = self.options.get('build-type') + if build_type_opt: + build_type = build_type_opt.strip() + else: + meta = self.env.metadata.get(self.env.docname, {}) + build_type = ( + _meta_content_from_docutils(self.state.document, 'build-type') + or _meta_get(meta, 'build-type', 'build_type') + or _field_value_from_doctree(self.state.document, 'build-type') + or '' + ) + + if not build_type: + raise self.error( + 'ros-related-packages: define build type with `.. meta::` and ' + '`:build-type: ament_cmake` (recommended), or a `:build-type:` field list, ' + 'or pass `:build-type:` on this directive.' + ) + + max_pkgs = self.options.get('max', 10) + + macros = getattr(self.env.config, 'macros', {}) or {} + distro = macros.get('DISTRO', 'rolling') + + escaped_type = html.escape(build_type, quote=True) + escaped_distro = html.escape(distro, quote=True) + + html_body = ( + '' + ) + return [nodes.raw('', html_body, format='html')] + + +def download_rosdistro_cache(app, builder) -> None: + """Fetch the gzipped rosdistro cache into ``source/_static`` for same-origin loads.""" + if builder.format != 'html': + return + + macros = getattr(app.config, 'macros', {}) or {} + distro = macros.get('DISTRO', 'rolling') + + dest_dir = os.path.join(app.confdir, 'source', '_static', 'rosdistro_cache') + os.makedirs(dest_dir, exist_ok=True) + dest_path = os.path.join(dest_dir, f'{distro}-cache.yaml.gz') + url = ROSDISTRO_CACHE_TEMPLATE.format(distro=distro) + + request = urllib.request.Request(url, headers={'User-Agent': 'ros2-documentation-build/1.0'}) + try: + with urllib.request.urlopen(request, timeout=120) as response: + data = response.read() + with open(dest_path, 'wb') as handle: + handle.write(data) + except (urllib.error.URLError, OSError, TimeoutError) as exc: + LOGGER.warning( + 'Could not download rosdistro cache from %s (%s). ' + 'Related package lists may not work until the file exists at %s', + url, + exc, + dest_path, + ) + + +def setup(app): + app.add_directive('ros-related-packages', RosRelatedPackagesDirective) + app.connect('builder-inited', download_rosdistro_cache) + return { + 'parallel_read_safe': True, + 'parallel_write_safe': True, + 'version': '1.0.0', + } diff --git a/source/How-To-Guides/Single-Package-Define-And-Use-Interface.rst b/source/How-To-Guides/Single-Package-Define-And-Use-Interface.rst new file mode 100644 index 00000000000..b203d592aab --- /dev/null +++ b/source/How-To-Guides/Single-Package-Define-And-Use-Interface.rst @@ -0,0 +1,257 @@ +.. redirect-from:: + + How-To-Guides/Implementing-custom-interfaces + +.. meta:: + :build-type: ament_cmake + +Implementing custom interfaces - how-to +======================================= + +.. centered:: **When predefined interface definitions are not enough, you need to create custom interfaces. + In this article, you will learn how to define and build interfaces with different field types. + This will help you implement custom interfaces in ROS to suit your needs.** + +:: + + Area: ROS-framework | Content-type: how-to | Experience: beginner, intermediate + +.. contents:: Contents + :depth: 2 + :local: + +Summary +------- + +Interfaces define how nodes exchange data. +ROS offers three main interface types: + +* Topics (``.msg`` files) +* Services (``.srv`` files) +* Actions (``.action`` files) + +`Learn more about interfaces `__ + +While predefined interface definitions are useful at the beginning, you soon realize that they can't meet all your needs. +That's why the ability to create custom interfaces is essential. + +Creating custom interfaces involves preparing a package, specifying interface definitions, and registering the interfaces in ``package.xml`` and ``CMakeLists.txt``. +Using custom interfaces involves configuring a node to include the interfaces in its source, and configuring the node to build with the interfaces in ``CMakeLists.txt``. + +.. tip:: + + The best practice is to declare interfaces in dedicated interface packages, but sometimes it may be more convenient for you to declare, create and use an interface all in one package. + +Prerequisites +------------- + +#. Install :doc:`ROS 2 <../Installation>`, and create your :doc:`workspace <../Tutorials/Beginner-Client-Libraries/Creating-A-Workspace/Creating-A-Workspace>`. +#. Make sure you understand how to :doc:`create packages <../Tutorials/Beginner-Client-Libraries/Creating-Your-First-ROS2-Package>`. + +Steps +----- + +.. note:: + + For our examples, we are using the ``msg`` interface type, but the steps below apply to all interface types. + +#. In your workspace ``src`` folder, create a ``more_interfaces`` CMake package with a folder for interface definitions. + For example: + + .. code-block:: console + + $ ros2 pkg create --build-type ament_cmake more_interfaces + $ mkdir -p more_interfaces/msg + + .. note:: + + In ROS 2, interfaces can only be defined in CMake packages. + You can also use `ament_cmake_python `__ to include Python libraries and nodes in a CMake package. + +#. In your interface definitions folder, create a file in which you provide the definitions for the interface. + For example, for a message interface, you can create an ``AddressBook.msg`` file that collects personal data: + + .. code-block:: text + + uint8 PHONE_TYPE_HOME=0 + uint8 PHONE_TYPE_WORK=1 + uint8 PHONE_TYPE_MOBILE=2 + string first_name + string last_name + string phone_number + uint8 phone_type + +#. In ``package.xml``, add the following code to register your package as part of interface groups: + ``rosidl_default_generators``: Needed to generate the code during the build. + ``rosidl_default_runtime``: Needed only at run time. + + .. code-block:: xml + + rosidl_default_generators + rosidl_default_runtime + rosidl_interface_packages + +#. In ``CMakeLists.txt``, add the required code to make the runtime libraries available and to generate source files from your interface definition. + For example: + + .. code-block:: cmake + + find_package(rosidl_default_generators REQUIRED) + set(msg_files "msg/AddressBook.msg") + rosidl_generate_interfaces(${PROJECT_NAME} ${msg_files}) + ament_export_dependencies(rosidl_default_runtime) + +#. In the ``more_interfaces/src`` folder, create a node to interact with your new interface. + For example, for a message interface, create ``publish_address_book.cpp`` with code to publish the message periodically. + + .. code-block:: c++ + + #include + #include + + #include "rclcpp/rclcpp.hpp" + #include "more_interfaces/msg/address_book.hpp" + + using namespace std::chrono_literals; + + class AddressBookPublisher : public rclcpp::Node + { + public: + AddressBookPublisher() + : Node("address_book_publisher") + { + address_book_publisher_ = + this->create_publisher("address_book", 10); + + auto publish_msg = [this]() -> void { + auto message = more_interfaces::msg::AddressBook(); + + message.first_name = "John"; + message.last_name = "Doe"; + message.phone_number = "1234567890"; + message.phone_type = message.PHONE_TYPE_MOBILE; + + std::cout << "Publishing Contact\nFirst:" << message.first_name << + " Last:" << message.last_name << std::endl; + + this->address_book_publisher_->publish(message); + }; + timer_ = this->create_wall_timer(1s, publish_msg); + } + + private: + rclcpp::Publisher::SharedPtr address_book_publisher_; + rclcpp::TimerBase::SharedPtr timer_; + }; + + + int main(int argc, char * argv[]) + { + rclcpp::init(argc, argv); + rclcpp::spin(std::make_shared()); + rclcpp::shutdown(); + + return 0; + } + +#. In ``CMakeLists.txt``, create a new target so the node builds correctly. + For example: + + .. code-block:: cmake + + find_package(rclcpp REQUIRED) + add_executable(publish_address_book src/publish_address_book.cpp) + target_link_libraries(publish_address_book rclcpp::rclcpp) + install(TARGETS publish_address_book DESTINATION lib/${PROJECT_NAME}) + +#. In ``CMakeLists.txt``, link the node to your interface. + For example: + + .. code-block:: cmake + + rosidl_get_typesupport_target(cpp_typesupport_target ${PROJECT_NAME} rosidl_typesupport_cpp) + target_link_libraries(publish_address_book "${cpp_typesupport_target}") + +#. To test your new interface, do the following: + + a) In your workspace root, build the package. + + b) Source the workspace and run the node that uses the interface. + + For example: + + .. tabs:: + + .. group-tab:: Linux + + .. code-block:: console + + $ cd ~/ros2_ws + $ colcon build --packages-up-to more_interfaces + $ source install/local_setup.bash + $ ros2 run more_interfaces publish_address_book + + .. group-tab:: macOS + + .. code-block:: console + + $ cd ~/ros2_ws + $ colcon build --packages-up-to more_interfaces + $ . install/local_setup.bash + $ ros2 run more_interfaces publish_address_book + + .. group-tab:: Windows + + .. code-block:: console + + $ cd /ros2_ws + $ colcon build --merge-install --packages-up-to more_interfaces + $ call install/local_setup.bat + $ ros2 run more_interfaces publish_address_book + + Or using Powershell: + + .. code-block:: console + + $ install/local_setup.ps1 + $ ros2 run more_interfaces publish_address_book + + c) Check the interface or interact with it. + + For example, for a message interface, you could open another terminal and use the following code: + + .. tabs:: + + .. group-tab:: Linux + + .. code-block:: console + + $ source install/setup.bash + $ ros2 topic echo /address_book + + .. group-tab:: macOS + + .. code-block:: console + + $ . install/setup.bash + $ ros2 topic echo /address_book + + .. group-tab:: Windows + + .. code-block:: console + + $ call install/setup.bat + $ ros2 topic echo /address_book + + Or using Powershell: + + .. code-block:: console + + $ install/setup.ps1 + $ ros2 topic echo /address_book + +Related content +--------------- + +.. ros-related-packages:: + diff --git a/source/_static/custom.css b/source/_static/custom.css index 4252f921bb8..529f9c5d8bd 100644 --- a/source/_static/custom.css +++ b/source/_static/custom.css @@ -1,3 +1,36 @@ .wy-nav-content { max-width: 64rem; } + +/* Runtime “related packages” list (see ros_related_packages extension) */ +.related-packages { + margin-top: 0.75rem; +} + +.related-packages--loading .related-packages__status { + color: #666; + font-style: italic; +} + +.related-packages__list { + margin: 0.35em 0 0; + padding-left: 1.25rem; +} + +.related-packages__list li { + margin: 0.35em 0; +} + +.related-packages__list a { + font-weight: 500; +} + +.related-packages__empty, +.related-packages--error .related-packages__status { + color: #555; + margin: 0.35em 0 0; +} + +.related-packages--error .related-packages__status { + color: #a94442; +} diff --git a/source/_static/related_packages.js b/source/_static/related_packages.js new file mode 100644 index 00000000000..99a1bc40949 --- /dev/null +++ b/source/_static/related_packages.js @@ -0,0 +1,285 @@ +/** + * Populate ``.js-related-packages`` widgets from the rosdistro cache YAML. + * + * Depends on global ``pako`` (gzip) and ``yaml`` / ``jsyaml`` (js-yaml), loaded + * earlier via html_js_files in conf.py. + */ +(function () { + 'use strict'; + + /** @type {Record>>} */ + var cacheByDistro = {}; + + /** + * Resolve the js-yaml API regardless of how the bundle exposes it. + * + * @returns {{ load: function(string): unknown }} + */ + function yamlApi() { + var g = typeof window !== 'undefined' ? window : globalThis; + /* js-yaml UMD sets ``globalThis.jsyaml`` (see dist/js-yaml.min.js). */ + if (g.jsyaml && typeof g.jsyaml.load === 'function') { + return g.jsyaml; + } + if (g.yaml && typeof g.yaml.load === 'function') { + return g.yaml; + } + throw new Error('js-yaml is not loaded'); + } + + /** + * Directory containing ``related_packages.js`` (ends with slash or empty). + * + * @returns {string} + */ + function scriptBaseUrl() { + var nodes = document.getElementsByTagName('script'); + var i; + var src; + for (i = nodes.length - 1; i >= 0; i--) { + src = nodes[i].src; + if (src && src.indexOf('related_packages.js') !== -1) { + return src.replace(/related_packages\.js([?#].*)?$/i, ''); + } + } + return ''; + } + + /** + * @param {string} distro + * @returns {string|null} + */ + function bundledCacheUrl(distro) { + var base = scriptBaseUrl(); + if (!base) { + return null; + } + return base + 'rosdistro_cache/' + distro + '-cache.yaml.gz'; + } + + /** + * @param {string} distro + * @returns {Promise>} + */ + function loadXmls(distro) { + if (cacheByDistro[distro]) { + return cacheByDistro[distro]; + } + cacheByDistro[distro] = fetchAndParse(distro); + return cacheByDistro[distro]; + } + + /** + * @param {string} distro + * @returns {Promise>} + */ + function fetchAndParse(distro) { + var remote = + 'https://repo.ros2.org/rosdistro_cache/' + encodeURIComponent(distro) + '-cache.yaml.gz'; + var urls = []; + var bundled = bundledCacheUrl(distro); + if (bundled) { + urls.push(bundled); + } + urls.push(remote); + + return tryUrls(urls); + } + + /** + * @param {string[]} urls + * @returns {Promise>} + */ + function tryUrls(urls) { + var i = 0; + + function next(lastErr) { + if (i >= urls.length) { + return Promise.reject(lastErr || new Error('failed to load rosdistro cache')); + } + var url = urls[i]; + i += 1; + return fetch(url, { cache: 'no-cache' }) + .then(function (res) { + if (!res.ok) { + throw new Error('HTTP ' + res.status + ' for ' + url); + } + return res.arrayBuffer(); + }) + .then(function (buf) { + var g = typeof window !== 'undefined' ? window : globalThis; + var inflated = g.pako.inflate(new Uint8Array(buf), { to: 'string' }); + var data = yamlApi().load(inflated); + var xmls = data && data.release_package_xmls; + if (!xmls || typeof xmls !== 'object') { + throw new Error('release_package_xmls missing in rosdistro cache'); + } + return /** @type {Record} */ (xmls); + }) + .catch(function (err) { + return next(err); + }); + } + + return next(null); + } + + /** + * @param {string} xmlStr + * @returns {string[]} + */ + function extractBuildTypes(xmlStr) { + var out = []; + var re = /]*>([^<]+)<\/build_type>/gi; + var m; + while ((m = re.exec(xmlStr)) !== null) { + out.push(m[1].trim()); + } + return out; + } + + /** + * @param {string} xmlStr + * @param {string} want + * @returns {boolean} + */ + function matchesBuildType(xmlStr, want) { + var types = extractBuildTypes(xmlStr); + var k; + for (k = 0; k < types.length; k += 1) { + if (types[k] === want) { + return true; + } + } + return false; + } + + /** + * @param {string} distro + * @param {string} pkg + * @returns {string} + */ + function docsPackageUrl(distro, pkg) { + return ( + 'https://docs.ros.org/en/' + + encodeURIComponent(distro) + + '/p/' + + encodeURIComponent(pkg) + + '/' + ); + } + + /** + * @param {HTMLElement} el + * @param {Error} err + */ + function showError(el, err) { + el.classList.remove('related-packages--loading'); + el.classList.add('related-packages--error'); + el.innerHTML = + ''; + if (typeof console !== 'undefined' && console.warn) { + console.warn('related_packages:', err); + } + } + + /** + * @param {HTMLElement} el + * @param {Record} xmls + */ + function fillWidget(el, xmls) { + var want = el.getAttribute('data-build-type') || ''; + var max = parseInt(el.getAttribute('data-max') || '10', 10); + var distro = el.getAttribute('data-distro') || 'rolling'; + + var names = Object.keys(xmls).filter(function (name) { + var xmlStr = xmls[name]; + if (typeof xmlStr !== 'string') { + return false; + } + return matchesBuildType(xmlStr, want); + }); + names.sort(function (a, b) { + return a.localeCompare(b); + }); + var picked = names.slice(0, max); + + var ul = document.createElement('ul'); + ul.className = 'related-packages__list'; + var j; + for (j = 0; j < picked.length; j += 1) { + var pkg = picked[j]; + var li = document.createElement('li'); + var a = document.createElement('a'); + a.href = docsPackageUrl(distro, pkg); + a.textContent = pkg; + a.rel = 'noopener noreferrer'; + li.appendChild(a); + ul.appendChild(li); + } + + el.innerHTML = ''; + el.classList.remove('related-packages--loading'); + + if (picked.length === 0) { + var p = document.createElement('p'); + p.className = 'related-packages__empty'; + p.textContent = 'No packages matched this filter.'; + el.appendChild(p); + } else { + el.appendChild(ul); + } + } + + function fillAll() { + var widgets = document.querySelectorAll('.js-related-packages'); + if (!widgets.length) { + return; + } + + /** @type {Record} */ + var byDistro = {}; + var idx; + for (idx = 0; idx < widgets.length; idx += 1) { + var el = widgets[idx]; + var d = el.getAttribute('data-distro') || 'rolling'; + if (!byDistro[d]) { + byDistro[d] = []; + } + byDistro[d].push(el); + } + + var distroKeys = Object.keys(byDistro); + var di; + for (di = 0; di < distroKeys.length; di += 1) { + (function (distro) { + var group = byDistro[distro]; + loadXmls(distro).then( + function (xmls) { + var gi; + for (gi = 0; gi < group.length; gi += 1) { + fillWidget(group[gi], xmls); + } + }, + function (err) { + var ei; + for (ei = 0; ei < group.length; ei += 1) { + showError(group[ei], err); + } + } + ); + })(distroKeys[di]); + } + } + + if (typeof document !== 'undefined') { + if (document.readyState === 'loading') { + document.addEventListener('DOMContentLoaded', fillAll); + } else { + fillAll(); + } + } +})(); From 929e4fd99080fd3ee2321a2c6144370a6fdea0f9 Mon Sep 17 00:00:00 2001 From: Kacper Bojakowski Date: Mon, 27 Apr 2026 23:09:01 +0200 Subject: [PATCH 02/59] Fix --- plugins/ros_related_packages.py | 23 +++++++++++++-- source/How-To-Guides.rst | 1 + source/_static/related_packages.js | 45 +++++++++++++++++++++++------- 3 files changed, 56 insertions(+), 13 deletions(-) diff --git a/plugins/ros_related_packages.py b/plugins/ros_related_packages.py index 4140aa6b2bf..cfd4d467ed6 100644 --- a/plugins/ros_related_packages.py +++ b/plugins/ros_related_packages.py @@ -79,6 +79,16 @@ def _meta_content_from_docutils(document: nodes.document, meta_name: str) -> str return None +def _bundled_cache_href(docname: str, distro: str) -> str: + """Relative URL from this page's HTML file to the downloaded gzip in ``_static/``. + + Sphinx emits sibling paths like ``_static/`` under the HTML root (including per-version + directories for multiversion builds). Depth follows ``docname`` segments (slashes). + """ + depth = docname.count('/') + return ('../' * depth) + f'_static/rosdistro_cache/{distro}-cache.yaml.gz' + + def _positive_int_option(argument: str) -> int: """Parse a positive integer option for the directive.""" if argument is None: @@ -137,12 +147,15 @@ def run(self) -> List[nodes.Node]: escaped_type = html.escape(build_type, quote=True) escaped_distro = html.escape(distro, quote=True) + bundled_href = _bundled_cache_href(self.env.docname, distro) + escaped_bundled = html.escape(bundled_href, quote=True) html_body = ( '' @@ -150,9 +163,13 @@ def run(self) -> List[nodes.Node]: return [nodes.raw('', html_body, format='html')] -def download_rosdistro_cache(app, builder) -> None: - """Fetch the gzipped rosdistro cache into ``source/_static`` for same-origin loads.""" - if builder.format != 'html': +def download_rosdistro_cache(app) -> None: + """Fetch the gzipped rosdistro cache into ``source/_static`` for same-origin loads. + + Sphinx 8+ passes only ``app`` to ``builder-inited``; the builder is ``app.builder``. + """ + builder = app.builder + if builder is None or builder.format != 'html': return macros = getattr(app.config, 'macros', {}) or {} diff --git a/source/How-To-Guides.rst b/source/How-To-Guides.rst index c8f3cb4627c..15707a74d71 100644 --- a/source/How-To-Guides.rst +++ b/source/How-To-Guides.rst @@ -22,6 +22,7 @@ If you are new and looking to learn the ropes, start with the :doc:`Tutorials >} */ - function loadXmls(distro) { - if (cacheByDistro[distro]) { - return cacheByDistro[distro]; + function loadXmls(distro, sampleWidget) { + var bundledKey = + distro + + '|' + + (sampleWidget ? sampleWidget.getAttribute('data-bundled-cache-href') || '' : ''); + if (cacheByDistro[bundledKey]) { + return cacheByDistro[bundledKey]; } - cacheByDistro[distro] = fetchAndParse(distro); - return cacheByDistro[distro]; + cacheByDistro[bundledKey] = fetchAndParse(distro, resolveBundledAbsoluteUrl(sampleWidget, distro)); + return cacheByDistro[bundledKey]; } /** * @param {string} distro + * @param {string|null} bundledAbsolute resolved same-origin URL to gzip, if any * @returns {Promise>} */ - function fetchAndParse(distro) { + function fetchAndParse(distro, bundledAbsolute) { var remote = 'https://repo.ros2.org/rosdistro_cache/' + encodeURIComponent(distro) + '-cache.yaml.gz'; var urls = []; - var bundled = bundledCacheUrl(distro); - if (bundled) { - urls.push(bundled); + if (bundledAbsolute) { + urls.push(bundledAbsolute); } urls.push(remote); @@ -117,6 +141,7 @@ return /** @type {Record} */ (xmls); }) .catch(function (err) { + /* Try next URL (e.g. bundled 404 then HTTPS remote — remote may hit CORS). */ return next(err); }); } @@ -257,7 +282,7 @@ for (di = 0; di < distroKeys.length; di += 1) { (function (distro) { var group = byDistro[distro]; - loadXmls(distro).then( + loadXmls(distro, group[0]).then( function (xmls) { var gi; for (gi = 0; gi < group.length; gi += 1) { From 3d437e4deeffc7c42c993916c690e1f2d7beff01 Mon Sep 17 00:00:00 2001 From: Keith Kirkwood Date: Fri, 1 May 2026 16:35:49 +0100 Subject: [PATCH 03/59] OPENR-89: Simple test for adding an enhance job and run script on changed files --- .github/scripts/enhance_topics.py | 4 ++ .github/workflows/test.yml | 83 +++++++++++++++++++------------ Makefile | 3 ++ 3 files changed, 57 insertions(+), 33 deletions(-) create mode 100644 .github/scripts/enhance_topics.py diff --git a/.github/scripts/enhance_topics.py b/.github/scripts/enhance_topics.py new file mode 100644 index 00000000000..70d137d09d3 --- /dev/null +++ b/.github/scripts/enhance_topics.py @@ -0,0 +1,4 @@ +import sys + +for arg in sys.argv[1:]: + print(f"Enhancing topic: {arg}") \ No newline at end of file diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 5cea1c262d8..e412c58b4c8 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -3,6 +3,23 @@ name: Test on: pull_request jobs: + enhance: + runs-on: ubuntu-24.04 + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: '3.12' + + - name: Enhance topics + env: + PR_BASE_SHA: ${{ github.event.pull_request.base.sha }} + PR_HEAD_SHA: ${{ github.event.pull_request.head.sha }} + run: make enhance-topics + test: runs-on: ubuntu-24.04 steps: @@ -114,36 +131,36 @@ jobs: 3. Open `html-artifacts-${{ github.event.pull_request.number }}/index.html` in your favorite browser edit-mode: replace - multi-build: - needs: [test, lint, spellcheck] - runs-on: ubuntu-24.04 - steps: - - name: Checkout - uses: actions/checkout@v4 - - # Create a fake remote ref matching the target branch name - - name: Setup branch for multiversion - run: | - TARGET_BRANCH="${{ github.base_ref }}" - echo "PR target branch: $TARGET_BRANCH" - - # Create a remote ref that sphinx-multiversion will find - git update-ref "refs/remotes/origin/$TARGET_BRANCH" HEAD - - # Verify the ref was created - echo "Created refs:" - git show-ref | grep "$TARGET_BRANCH" - - - name: Setup Python - uses: actions/setup-python@v5 - with: - python-version: '3.12' - - - name: Setup Graphviz - uses: ts-graphviz/setup-graphviz@v2 - - - name: Install dependencies with pip - run: pip install --no-warn-script-location --user -r requirements.txt -c constraints.txt - - - name: Build the docs - run: make multiversion + # multi-build: + # needs: [test, lint, spellcheck] + # runs-on: ubuntu-24.04 + # steps: + # - name: Checkout + # uses: actions/checkout@v4 + # + # # Create a fake remote ref matching the target branch name + # - name: Setup branch for multiversion + # run: | + # TARGET_BRANCH="${{ github.base_ref }}" + # echo "PR target branch: $TARGET_BRANCH" + # + # # Create a remote ref that sphinx-multiversion will find + # git update-ref "refs/remotes/origin/$TARGET_BRANCH" HEAD + # + # # Verify the ref was created + # echo "Created refs:" + # git show-ref | grep "$TARGET_BRANCH" + # + # - name: Setup Python + # uses: actions/setup-python@v5 + # with: + # python-version: '3.12' + # + # - name: Setup Graphviz + # uses: ts-graphviz/setup-graphviz@v2 + # + # - name: Install dependencies with pip + # run: pip install --no-warn-script-location --user -r requirements.txt -c constraints.txt + # + # - name: Build the docs + # run: make multiversion diff --git a/Makefile b/Makefile index f2d90d3a3a8..5a85e9dfe64 100644 --- a/Makefile +++ b/Makefile @@ -24,6 +24,9 @@ multiversion: Makefile %: Makefile @$(BUILD) -M $@ "$(SOURCE)" "$(OUT)" $(OPTS) +enhance-topics: + git diff --name-only "$(PR_BASE_SHA)" "$(PR_HEAD_SHA)" | xargs -r $(PYTHON) scripts/enhance_topics.py + lint: ./sphinx-lint-with-ros source From 73517017851a3cede7e65c021053ccb07fcd60ea Mon Sep 17 00:00:00 2001 From: Keith Kirkwood Date: Mon, 4 May 2026 12:38:10 +0100 Subject: [PATCH 04/59] OPENR-89: Return the test workflow to original state --- .github/workflows/test.yml | 83 +++++++++++++++----------------------- 1 file changed, 33 insertions(+), 50 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index e412c58b4c8..5cea1c262d8 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -3,23 +3,6 @@ name: Test on: pull_request jobs: - enhance: - runs-on: ubuntu-24.04 - steps: - - name: Checkout - uses: actions/checkout@v4 - - - name: Setup Python - uses: actions/setup-python@v5 - with: - python-version: '3.12' - - - name: Enhance topics - env: - PR_BASE_SHA: ${{ github.event.pull_request.base.sha }} - PR_HEAD_SHA: ${{ github.event.pull_request.head.sha }} - run: make enhance-topics - test: runs-on: ubuntu-24.04 steps: @@ -131,36 +114,36 @@ jobs: 3. Open `html-artifacts-${{ github.event.pull_request.number }}/index.html` in your favorite browser edit-mode: replace - # multi-build: - # needs: [test, lint, spellcheck] - # runs-on: ubuntu-24.04 - # steps: - # - name: Checkout - # uses: actions/checkout@v4 - # - # # Create a fake remote ref matching the target branch name - # - name: Setup branch for multiversion - # run: | - # TARGET_BRANCH="${{ github.base_ref }}" - # echo "PR target branch: $TARGET_BRANCH" - # - # # Create a remote ref that sphinx-multiversion will find - # git update-ref "refs/remotes/origin/$TARGET_BRANCH" HEAD - # - # # Verify the ref was created - # echo "Created refs:" - # git show-ref | grep "$TARGET_BRANCH" - # - # - name: Setup Python - # uses: actions/setup-python@v5 - # with: - # python-version: '3.12' - # - # - name: Setup Graphviz - # uses: ts-graphviz/setup-graphviz@v2 - # - # - name: Install dependencies with pip - # run: pip install --no-warn-script-location --user -r requirements.txt -c constraints.txt - # - # - name: Build the docs - # run: make multiversion + multi-build: + needs: [test, lint, spellcheck] + runs-on: ubuntu-24.04 + steps: + - name: Checkout + uses: actions/checkout@v4 + + # Create a fake remote ref matching the target branch name + - name: Setup branch for multiversion + run: | + TARGET_BRANCH="${{ github.base_ref }}" + echo "PR target branch: $TARGET_BRANCH" + + # Create a remote ref that sphinx-multiversion will find + git update-ref "refs/remotes/origin/$TARGET_BRANCH" HEAD + + # Verify the ref was created + echo "Created refs:" + git show-ref | grep "$TARGET_BRANCH" + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: '3.12' + + - name: Setup Graphviz + uses: ts-graphviz/setup-graphviz@v2 + + - name: Install dependencies with pip + run: pip install --no-warn-script-location --user -r requirements.txt -c constraints.txt + + - name: Build the docs + run: make multiversion From 20efaf83513d6371ed414d673515d9410059bcde Mon Sep 17 00:00:00 2001 From: Keith Kirkwood Date: Mon, 4 May 2026 12:40:06 +0100 Subject: [PATCH 05/59] OPENR-89: Use new workflow based on push. List out files which remain to be enhanced. --- .github/workflows/enhance.yml | 23 +++++++++++++++++++++++ Makefile | 2 +- 2 files changed, 24 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/enhance.yml diff --git a/.github/workflows/enhance.yml b/.github/workflows/enhance.yml new file mode 100644 index 00000000000..db354226907 --- /dev/null +++ b/.github/workflows/enhance.yml @@ -0,0 +1,23 @@ +name: Enhance content + +on: push + +jobs: + enhance: + # Runs only on forks when contributor pushes to their fork + if: github.event.repository.fork == true + runs-on: ubuntu-24.04 + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: '3.12' + + - name: Enhance topics + env: + BASE_SHA: ${{ github.event.before }} + HEAD_SHA: ${{ github.event.after }} + run: make enhance-topics \ No newline at end of file diff --git a/Makefile b/Makefile index 5a85e9dfe64..f411c155a1e 100644 --- a/Makefile +++ b/Makefile @@ -25,7 +25,7 @@ multiversion: Makefile @$(BUILD) -M $@ "$(SOURCE)" "$(OUT)" $(OPTS) enhance-topics: - git diff --name-only "$(PR_BASE_SHA)" "$(PR_HEAD_SHA)" | xargs -r $(PYTHON) scripts/enhance_topics.py + git diff --name-only --diff-filter=d $(BASE_SHA) $(HEAD_SHA) | xargs -r $(PYTHON) scripts/enhance_topics.py lint: ./sphinx-lint-with-ros source From 737797225e679dab0a9f1467be48fb631aa8b095 Mon Sep 17 00:00:00 2001 From: Keith Kirkwood Date: Mon, 4 May 2026 12:53:58 +0100 Subject: [PATCH 06/59] OPENR-89: Fix to fetch history so that SHAs exist --- .github/workflows/enhance.yml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/.github/workflows/enhance.yml b/.github/workflows/enhance.yml index db354226907..e03ce90d395 100644 --- a/.github/workflows/enhance.yml +++ b/.github/workflows/enhance.yml @@ -9,10 +9,14 @@ jobs: runs-on: ubuntu-24.04 steps: - name: Checkout - uses: actions/checkout@v4 + # Using checkout v5, as v4 was warning that it will soon be deprecated (Node 20) + uses: actions/checkout@v5 + with: + fetch-depth: 0 - name: Setup Python - uses: actions/setup-python@v5 + # Using setup-python v6, as v5 has same warning as above + uses: actions/setup-python@v6 with: python-version: '3.12' From 39eb9ac74ded0cd83fbda038aea47cb734ea4a5d Mon Sep 17 00:00:00 2001 From: Keith Kirkwood Date: Mon, 4 May 2026 12:55:52 +0100 Subject: [PATCH 07/59] OPENR-89: Test script in wrong location --- {.github/scripts => scripts}/enhance_topics.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename {.github/scripts => scripts}/enhance_topics.py (100%) diff --git a/.github/scripts/enhance_topics.py b/scripts/enhance_topics.py similarity index 100% rename from .github/scripts/enhance_topics.py rename to scripts/enhance_topics.py From 292e4551a4b4e1e8310e86bf039ed9cd98a62bbb Mon Sep 17 00:00:00 2001 From: Keith Kirkwood Date: Mon, 4 May 2026 18:00:14 +0100 Subject: [PATCH 08/59] OPENR-89: Creat RST-specific parsing and updates in new module, and update topic enhance script --- constraints.txt | 4 + requirements.txt | 4 + scripts/enhance_data.py | 213 +++++++++++++++++++++++++ scripts/enhance_topics.py | 323 +++++++++++++++++++++++++++++++++++++- scripts/rst_utils.py | 124 +++++++++++++++ 5 files changed, 666 insertions(+), 2 deletions(-) create mode 100644 scripts/enhance_data.py create mode 100644 scripts/rst_utils.py diff --git a/constraints.txt b/constraints.txt index 56ae59259be..2ba36b535b7 100644 --- a/constraints.txt +++ b/constraints.txt @@ -11,11 +11,13 @@ imagesize==1.4.1 iniconfig==2.1.0 Jinja2==3.1.6 MarkupSafe==3.0.3 +openai==2.33.0 packaging==25.0 pluggy==1.6.0 polib==1.2.0 Pygments==2.19.2 pytest==8.4.2 +python-dotenv==1.1.0 PyYAML==6.0.3 regex==2025.9.18 requests==2.32.5 @@ -39,4 +41,6 @@ sphinxcontrib-mermaid==1.0.0 sphinxcontrib-qthelp==2.0.0 sphinxcontrib-serializinghtml==2.0.0 stevedore==5.5.0 +tenacity==9.1.4 +timeout-decorator==0.5.0 urllib3==2.5.0 diff --git a/requirements.txt b/requirements.txt index f952c4882fb..21c4c057505 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,8 @@ codespell doc8 docutils +openai +python-dotenv pip pytest sphinx @@ -12,3 +14,5 @@ sphinx-tabs sphinx-tamer sphinxcontrib-googleanalytics sphinxcontrib-mermaid +tenacity +timeout-decorator diff --git a/scripts/enhance_data.py b/scripts/enhance_data.py new file mode 100644 index 00000000000..e7e7928c6b2 --- /dev/null +++ b/scripts/enhance_data.py @@ -0,0 +1,213 @@ +""" +Data structures and pure functions for tracking enhancement results and computing metrics. + +This module provides a functional-programming-oriented core for managing analysis results +and deriving metrics. It is independent of the domain logic (e.g. RST file handling, OpenAI integration) +and can be reused in other contexts. +""" + +from typing import NamedTuple, Dict, Set, List, Optional + + +class EnhanceMetrics(NamedTuple): + """ + Immutable data structure representing analysis metrics derived from enhancement results. + + Attributes: + counts_by_analysis: Dictionary mapping analysis types to their value counts. + Example: {"content-type": {"task": 5, "concept": 3, "reference": 2}} + files_with_results_count: Number of files that had analysis results. + updated_files_count: Number of files that had metadata successfully updated. + """ + counts_by_analysis: Dict[str, Dict[str, int]] + files_with_results_count: int + updated_files_count: int + + def get_total_analysis_count(self) -> int: + """ + Calculate the total number of analysis results across all analysis types. + + Note: Files with multiple analysis types contribute multiple counts. + For unique file count, use files_with_results_count instead. + + Returns: + Total count of all analysis results across all analysis types. + """ + return sum(sum(counts.values()) for counts in self.counts_by_analysis.values()) + + +class EnhanceData(NamedTuple): + """ + Immutable data structure representing enhancement results. + + Attributes: + results: Dictionary mapping filename to analysis results. + Format: {filename: {analysis_type: result_value}} + updated_files: Set of filenames that had metadata successfully updated. + """ + results: Dict[str, Dict[str, str]] + updated_files: Set[str] + + +def create_enhance_data() -> EnhanceData: + """ + Initialise an empty EnhanceData structure. + + Returns: + Empty EnhanceData with no results or updated files. + """ + return EnhanceData(results={}, updated_files=set()) + + +def add_analysis_result(data: EnhanceData, filename: str, analysis_type: str, result: str) -> EnhanceData: + """ + Add an analysis result to the enhancement data. + + Returns a new EnhanceData instance with the added result. + + Args: + data: Current enhancement data. + filename: Name of the file. + analysis_type: Type of analysis (e.g., "content-type"). + result: Analysis result value. + + Returns: + New EnhanceData with the result added. + """ + new_results = {**data.results} # Shallow copy: replace one filename entry immutably + file_results = {**new_results.get(filename, {})} # Preserve other analysis keys for this file + file_results[analysis_type] = result + new_results[filename] = file_results + return EnhanceData(results=new_results, updated_files=data.updated_files) # ``updated_files`` unchanged here + + +def mark_file_updated(data: EnhanceData, filename: str) -> EnhanceData: + """ + Mark a file as having been successfully updated with metadata. + + Returns a new EnhanceData instance with the file added to updated_files. + + Args: + data: Current enhancement data. + filename: Name of the file that was updated. + + Returns: + New EnhanceData with the file marked as updated. + """ + return EnhanceData(results=data.results, updated_files=data.updated_files | {filename}) # Set union adds one basename + + +def calculate_metrics(data: EnhanceData) -> EnhanceMetrics: + """ + Derive metrics from enhancement data. + + Pure function that transforms EnhanceData into EnhanceMetrics for analysis and reporting. + + Args: + data: Current enhancement data. + + Returns: + EnhanceMetrics containing counts, file counts, and update counts. + """ + counts_by_analysis: Dict[str, Dict[str, int]] = {} + + for file_results in data.results.values(): + if file_results: + for analysis_type, result_value in file_results.items(): + clean_value = result_value.strip().lower() # Normalise so ``Task`` and ``task`` aggregate together + if analysis_type not in counts_by_analysis: + counts_by_analysis[analysis_type] = {} + counts_by_analysis[analysis_type][clean_value] = counts_by_analysis[analysis_type].get(clean_value, 0) + 1 + + files_with_results_count = sum(1 for file_results in data.results.values() if file_results) # Files with at least one non-empty result dict + + return EnhanceMetrics( + counts_by_analysis=counts_by_analysis, + files_with_results_count=files_with_results_count, + updated_files_count=len(data.updated_files) # Distinct files whose RST was rewritten on disk + ) + + +def get_files_with_results(data: EnhanceData) -> List[str]: + """ + Get list of filenames that had analysis results. + + Args: + data: Current enhancement data. + + Returns: + List of filenames with at least one analysis result. + """ + return [filename for filename, file_results in data.results.items() if file_results] + + +def get_updated_files(data: EnhanceData) -> List[str]: + """ + Get list of filenames that had metadata successfully updated. + + Args: + data: Current enhancement data. + + Returns: + List of filenames that were updated with metadata. + """ + return list(data.updated_files) + + +def is_file_updated(data: EnhanceData, filename: str) -> bool: + """ + Check if a file was successfully updated with metadata. + + Args: + data: Current enhancement data. + filename: Name of the file to check. + + Returns: + True if the file was updated, False otherwise. + """ + return filename in data.updated_files + + +def get_analysis_types(data: EnhanceData) -> List[str]: + """ + Get list of all analysis types performed. + + Args: + data: Current enhancement data. + + Returns: + List of unique analysis types found in results. + """ + analysis_types: Set[str] = set() + for file_results in data.results.values(): + analysis_types.update(file_results.keys()) + return list(analysis_types) + + +def get_result_for_file(data: EnhanceData, filename: str, analysis_type: str) -> Optional[str]: + """ + Get analysis result for a specific file and analysis type. + + Args: + data: Current enhancement data. + filename: Name of the file. + analysis_type: Type of analysis (e.g., "content-type"). + + Returns: + Analysis result or None if not found. + """ + return data.results.get(filename, {}).get(analysis_type) + + +def get_results_for_file(data: EnhanceData, filename: str) -> Dict[str, str]: + """ + Get all analysis results for a specific file. + + Args: + data: Current enhancement data. + filename: Name of the file. + + Returns: + Dictionary of analysis results for the file, or empty dict if not found. + """ + return data.results.get(filename, {}) # Consumed by ``update_meta_rst_files`` as ``.. meta::`` field names diff --git a/scripts/enhance_topics.py b/scripts/enhance_topics.py index 70d137d09d3..01572d0e6e6 100644 --- a/scripts/enhance_topics.py +++ b/scripts/enhance_topics.py @@ -1,4 +1,323 @@ +import logging import sys +import os +from typing import Optional -for arg in sys.argv[1:]: - print(f"Enhancing topic: {arg}") \ No newline at end of file +from dotenv import load_dotenv +from openai import OpenAI, RateLimitError, APIConnectionError, OpenAIError +from tenacity import retry, stop_after_attempt, wait_random_exponential, retry_if_exception_type +from concurrent.futures import ThreadPoolExecutor + +from enhance_data import EnhanceData, create_enhance_data, add_analysis_result, calculate_metrics +from rst_utils import get_results_for_file, inject_metadata_to_content, mark_file_updated + +logger = logging.getLogger(__name__) + +# Define constants +GPT_MODEL = "gpt-5.4-nano" # GPT model to use for the API calls +# Maximum content length in characters for topic analysis , approximately 300k tokens (leaving 100k for instructions/output) +MAX_CONTENT_LENGTH = 1200000 +RST_EXTENSION = '.rst' # File extension for RST files + +# Define timeout and retry parameters for API calls +# - Individual API calls timeout after DEFAULT_TIMEOUT seconds +# - On rate limits/connection errors, retry up to MAX_RETRIES times +# - Wait between retries, increasing exponentially: MIN_WAIT → MAX_WAIT (capped) +DEFAULT_TIMEOUT = 60 # Default timeout in seconds for an individual API call +MAX_RETRIES = 10 # Maximum number of retry attempts for exponential backoff +MIN_WAIT = 10 # Minimum wait time between retries in seconds +MAX_WAIT = 120 # Maximum wait time between retries in seconds + +# Content type classification prompt +CONTENT_TYPE_PROMPT = """You are a content analyst, and your role is to analyze text content within supplied HTML documents. You can distinguish between three types of content: task, concept, and reference. + +*Concept topics* +Concept topics explain or define ideas. These topics often include background information that users need to understand before they start working with a specific product. Concept topics help the users understand the product, its purpose and benefits, before using the product. Concept topics do the following: describe a system, product, or a solution, outline a process, introduce tools or features, explain features, components, characteristics, restrictions, or capabilities, define terms in more detail than a simple glossary. + +*Task topics* +Task topics help achieve a specific goal by presenting instructions as 'procedures'. The first paragraph of the topic usually provides an overview and the benefits or importance of the task. A task is usually a numbered list of individual steps that help users achieve the goal. + +*Reference topics* +Reference topics provide quick access to information that users need to perform a task effectively. For example, lists all necessary links. Information in the main body of a reference topic may also be presented in a list or table format, for quick access and easy readability. + +When analyzed content is a mixture of different content types, classify based on the majority of content. + +Finally, generate a single-word lowercase output which is the recognized content type, with no additional styling, characters, or formatting.""" + +@retry( + retry=retry_if_exception_type((RateLimitError, APIConnectionError)), + stop=stop_after_attempt(MAX_RETRIES), + wait=wait_random_exponential(multiplier=MIN_WAIT, max=MAX_WAIT), + reraise=True +) +def analyze_content(client: OpenAI, content: str, prompt: str, timeout: int = DEFAULT_TIMEOUT) -> str: + """ + Analyse content using OpenAI's API with retry and timeout logic. + Uses ThreadPoolExecutor for cross-platform timeout handling and retries for transient API errors. + + Args: + client (OpenAI): OpenAI client instance. + content (str): Preprocessed content. + prompt (str): Prompt for the AI model. + timeout (int): Maximum time to wait for response in seconds. + + Returns: + str: Analysis result from the AI model, or empty string if analysis fails. + + Raises: + TimeoutError: If the API call exceeds the specified timeout. + RateLimitError: If API rate limits are exceeded (will trigger retry). + APIConnectionError: If connection fails (will trigger retry). + """ + # Log the content length before potential truncation + logger.debug(f"Processing content of length: {len(content)} characters") + + # Truncate content if its too long + if len(content) > MAX_CONTENT_LENGTH: + logger.warning(f"Content truncated to {MAX_CONTENT_LENGTH} characters for analysis.") + content = content[:MAX_CONTENT_LENGTH] + + def _make_api_call() -> str: + """ + Inner function to handle the OpenAI API call. + Separated to allow for clean timeout handling via ThreadPoolExecutor. + + Returns: + str: The model's response content + + Raises: + RateLimitError, APIConnectionError: Propagated for retry handling + """ + try: + logger.debug("Sending request to OpenAI API...") + completion = client.chat.completions.create( + model=GPT_MODEL, + messages=[ + {"role": "system", "content": prompt}, + {"role": "user", "content": f"Content:\n\n{content}"} + ] + ) + result = completion.choices[0].message.content + logger.debug("Successfully received response from OpenAI API") + return result if result is not None else "" + except (RateLimitError, APIConnectionError) as e: + logger.warning(f"Retryable error occurred: {str(e)}") + raise # Re-raise for retry decorator to handle + + # Use ThreadPoolExecutor for cross-platform timeout handling + with ThreadPoolExecutor() as executor: + try: + future = executor.submit(_make_api_call) + return future.result(timeout=timeout) + except TimeoutError: + logger.error(f"API call timed out after {timeout} seconds") + raise # Re-raise the original timeout error + +def analyze_files(files: list[str], client: OpenAI, prompts: dict[str, str], timeout: int = DEFAULT_TIMEOUT) -> EnhanceData: + """ + Process a list of files and analyse their content using each of the passed prompts. + + Args: + files (list[str]): List of paths to files. + client (OpenAI): OpenAI client instance. + prompts (dict[str, str]): Dictionary of prompts for the AI model. + timeout (int): Maximum time to wait for each API call in seconds. + + Returns: + EnhanceData: Enhancement data structure containing analysis results and update tracking. + """ + data = create_enhance_data() + + logger.debug("============================") + logger.debug("Performing content analysis:") + logger.debug("============================") + + for file_path in files: # Iterate through each file in the list + logger.debug(f"Analysing file: {file_path}") + + # Read the content of the file + try: + with open(file_path, 'r', encoding='utf-8') as f: + content = f.read() + except (OSError, PermissionError) as e: + logger.error("Error reading file %s: %s", file_path, e) + continue + except UnicodeDecodeError as e: + logger.error("Unicode decode error reading file %s: %s", file_path, e) + continue + + # Check if the content is not empty + if content.strip(): + filename = os.path.basename(file_path) # Extract filename from file path + for prompt_name, prompt in prompts.items(): # Iterate through each prompt in the dictionary + logger.debug(f"Running analysis: {prompt_name}") + try: + # Analyse the content using API with timeout and retry logic + result = analyze_content( + client, + content, + prompt, + timeout=timeout + ) + if result: + # Add the analysis result to the data structure + data = add_analysis_result(data, filename, prompt_name, result) + else: + logger.warning(f"No result for {filename} with prompt name: {prompt_name}") + + except (RateLimitError, APIConnectionError) as e: + # Exhausted all retries due to rate limits or connection errors + logger.error(f"Failed to analyse {filename} with prompt {prompt_name} after {MAX_RETRIES} retries: {e}") + continue + except TimeoutError as e: + # Timeout error due to an individual API call timing out + logger.error(f"Analysis timed out for {filename} with prompt {prompt_name}: {e}") + continue + except (OpenAIError, ValueError) as e: + # Other API errors and value errors + logger.error(f"Failed to analyse {filename} with prompt {prompt_name}: {e}") + continue + else: + logger.info(f"No analysable content found for {file_path}") + + metrics = calculate_metrics(data) + logger.info(f"Analysed {metrics.files_with_results_count} out of {len(files)} files with the configured prompts.") + return data + + +def get_openai_client() -> OpenAI: + """ + Create an OpenAI client with proper authentication. + + The API key is sourced in the following order: + 1. Environment variable OPENAI_API_KEY + 2. .env file in the project root + + Returns: + OpenAI: Authenticated OpenAI client instance + + Raises: + AuthenticationError: If no valid API key is found + """ + # Load environment variables from .env file if present + load_dotenv() + + api_key = os.environ.get("OPENAI_API_KEY") + if not api_key: + raise OpenAIError("OpenAI API key not found. Set OPENAI_API_KEY environment variable.") + + return OpenAI(api_key=api_key) + +def enhance_metadata(files: list[str], client: Optional[OpenAI] = None) -> EnhanceData: + """ + Enhance RST files with metadata based on content analysis. + + Args: + files (list[str]): Paths to files to enhance. + client (OpenAI, optional): OpenAI client instance. If None, creates new instance. + + Returns: + EnhanceData: Enhancement data structure containing analysis results and update tracking. + + Raises: + OpenAIError: If no valid API key is found when creating a new client. + """ + try: + client = client or get_openai_client() + except OpenAIError as e: + logger.error(f"Failed to initialise OpenAI client: {e}") + return create_enhance_data() + + # TODO: Make this config-driven, so that we can easily add more prompts and analysis types + prompts: dict[str, str] = {"content-type": CONTENT_TYPE_PROMPT} + + data = analyze_files(files, client, prompts) # Populate ``EnhanceData.results`` from the model + data = update_meta_files(files, data) # Persist results as metadata fields and set ``updated_files`` + + return data + +def update_meta_files(files: list[str], data: EnhanceData) -> EnhanceData: + """ + Process a list of files and update them with passed metadata. + + Args: + files (list[str]): List of paths to files. + data (EnhanceData): Enhancement data structure containing metadata for files. + + Returns: + EnhanceData: Updated enhancement data with files marked as updated. + """ + + logger.debug("===========================") + logger.debug("Updating metadata in files:") + logger.debug("===========================") + + current_data = data # Thread results through ``mark_file_updated`` immutably + + for file_path in files: + logger.debug("Updating metadata in file: %s", file_path) + filename = os.path.basename(file_path) # Keys in ``EnhanceData.results`` are basenames only + metadata = get_results_for_file(current_data, filename) + + # Confirm the metadata is not empty for the file, else skip + if not metadata: + logger.info("Skipping %s as it has no results for enhancement", filename) + continue + + logger.debug("Metadata found for %s, proceeding with updates.", filename) + + try: + with open(file_path, encoding="utf-8") as file: + content = file.read() # Full document; helpers locate or synthesise ``.. meta::`` + except (OSError, PermissionError) as exc: + logger.error("Error reading RST file %s: %s", file_path, exc) + continue + except UnicodeDecodeError as exc: + logger.error("Unicode decode error reading RST file %s: %s", file_path, exc) + continue + + new_content, changed = inject_metadata_to_content(content, metadata) + + # Confirm that at least one metadata has been changed for the file, else skip + if not changed: + logger.debug("No metadata changes applied for %s", filename) + continue # All keys already present or no additions—do not touch the file + + try: + with open(file_path, "w", encoding="utf-8") as file: + file.write(new_content) # Full-document rewrite (same path as read) + except (OSError, PermissionError) as exc: + logger.error("Error writing RST file %s: %s", file_path, exc) + continue + except UnicodeEncodeError as exc: + logger.error("Unicode encode error while writing RST file %s: %s", file_path, exc) + continue + + current_data = mark_file_updated(current_data, filename) # Record success for metrics only after a clean write + logger.debug("Updated file with supplied metadata: %s", filename) + logger.debug("-" * 50) + + metrics = calculate_metrics(current_data) # ``updated_files_count`` reflects files we rewrote + logger.info("Enhanced %s files' metadata out of %s files processed.", metrics.updated_files_count, len(files)) + return current_data + +def main() -> None: + logging.basicConfig( + level=logging.INFO, + format="%(levelname)s %(name)s: %(message)s", + ) + + # Collect filenames from command line arguments + rst_files = sys.argv[1:] + if not rst_files: + logger.error("No input files provided. Pass a list of RST files as arguments.") + sys.exit(1) + + # Enhance the metadata in the RST files and return the enhancement data with updated files + data = enhance_metadata(rst_files) + # Log the metrics for the enhancement data + metrics = calculate_metrics(data) + logger.info(f"Enhanced {metrics.updated_files_count} RST files metadata out of {len(rst_files)} files with results.") + +if __name__ == "__main__": + main() diff --git a/scripts/rst_utils.py b/scripts/rst_utils.py new file mode 100644 index 00000000000..c40d3cd6022 --- /dev/null +++ b/scripts/rst_utils.py @@ -0,0 +1,124 @@ +""" +Utilities for editing reStructuredText source, in particular ``.. meta::`` directives. +""" + +import logging +import os +import re + +from enhance_data import ( + EnhanceData, + calculate_metrics, + get_results_for_file, + mark_file_updated, +) + +logger = logging.getLogger(__name__) + + +def _find_meta_block(content: str) -> tuple[int, int, int, str, str]: + """ + Locate the first ``.. meta::`` directive in RST source. + + The directive block consists of the explicit marker line followed by + contiguous indented lines; a blank line or a less-indented line ends the + block (per reStructuredText directive block rules). + + Returns: + Tuple of ``(start, marker_end, block_end, inner, indent)``. + If no directive is found, ``start``, ``marker_end``, and ``block_end`` + are ``-1``, ``inner`` is ``''``, and ``indent`` defaults to three spaces. + """ + # Explicit markup + directive name; block body starts on the following line only + match = re.search(r"^\.\.\s+meta::\s*\n", content, re.MULTILINE) + if not match: + return -1, -1, -1, "", " " + + start = match.start() # Byte index of ``.. meta::`` (for whole-directive splice) + marker_end = match.end() # First character after the marker line's newline + indent = " " # Default field indent when the block is empty or we prepend a new block + inner_parts: list[str] = [] + consumed = 0 # Length of directive body in ``content`` (may omit final ``\n`` on last line) + remainder = content[marker_end:] # Scan forward only inside this file slice + + for line in remainder.splitlines(keepends=True): + if line.strip() == "": + break # Blank line terminates the directive block + if not line.startswith((" ", "\t")): + break # Body element at column 0 ends the block + if not inner_parts: + ws_len = len(line) - len(line.lstrip(" \t")) + indent = line[:ws_len] # Reuse the author's indent for new ``:name:`` lines + inner_parts.append(line) + consumed += len(line) + + block_end = marker_end + consumed # Exclusive end of the directive in ``content`` + inner = "".join(inner_parts) + # EOF without ``\n`` yields a last ``splitlines`` element with no newline—append one before new fields + if inner and not inner.endswith("\n"): + inner += "\n" + return start, marker_end, block_end, inner, indent + + +def _get_existing_meta_names(meta_block_inner: str) -> set[str]: + """ + Collect field names from the body of a ``.. meta::`` directive. + + Each line of the form ``:name: value`` contributes ``name`` (Docutils also + allows forms such as ``:name attr=value:``; the captured segment matches + that usage). + """ + names: set[str] = set() + # Field list lines only; group 1 is the name segment (includes ``attr=value`` forms before the final ``:``) + for field_match in re.finditer(r"^[ \t]+:([^:\n]+?):", meta_block_inner, re.MULTILINE): + names.add(field_match.group(1).strip()) + return names + + +def _normalise_meta_field_value(value: str) -> str: + """Collapse whitespace so the meta field body stays a single logical line.""" + return " ".join(value.split()) # Docutils treats the field body as one string; keep it one physical line + + +def inject_metadata_to_content(content: str, metadata: dict[str, str]) -> tuple[str, bool]: + """ + Insert or append ``.. meta::`` field entries for the given name/value pairs. + + Appends to an existing ``.. meta::`` block when present; otherwise prepends + a new block at the start of the document (leading whitespace is stripped so + the directive is the first element). Skips keys that already appear in the + block. + + Returns: + Updated source and whether any change was made. + """ + start, marker_end, block_end, inner, indent = _find_meta_block(content) + names = _get_existing_meta_names(inner) # Snapshot before we add keys from this same batch + additions: list[str] = [] + + for key, raw_value in metadata.items(): + if key in names: + logger.warning( + "Existing meta field %r in .. meta:: block; skipping", + key, + ) + continue + value = _normalise_meta_field_value(raw_value) + additions.append(f"{indent}:{key}: {value}\n") + names.add(key) # Prevent duplicate inserts if ``metadata`` repeats a key + + if not additions: + return content, False # Nothing new to write; leave the file untouched + + new_inner = inner + "".join(additions) # Existing fields unchanged, then appended lines + + if start >= 0: + # Replace only the directive body slice; ``marker_end``/``block_end`` bracket the original inner + new_content = content[:marker_end] + new_inner + content[block_end:] + else: + # No ``.. meta::`` yet: insert at document start; strip leading whitespace so the block is truly first + remainder = content.lstrip() + new_content = ".. meta::\n" + "".join(additions) + "\n" + remainder # Blank line after block separates it from the body + + return new_content, True + From 6fd04f8751792ed7e7d5e122f573bc265304a859 Mon Sep 17 00:00:00 2001 From: Keith Kirkwood Date: Mon, 4 May 2026 18:07:25 +0100 Subject: [PATCH 09/59] OPENR-89: Fix bug using base filename for enhance results data --- scripts/enhance_topics.py | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/scripts/enhance_topics.py b/scripts/enhance_topics.py index 01572d0e6e6..45145e444df 100644 --- a/scripts/enhance_topics.py +++ b/scripts/enhance_topics.py @@ -148,7 +148,6 @@ def analyze_files(files: list[str], client: OpenAI, prompts: dict[str, str], tim # Check if the content is not empty if content.strip(): - filename = os.path.basename(file_path) # Extract filename from file path for prompt_name, prompt in prompts.items(): # Iterate through each prompt in the dictionary logger.debug(f"Running analysis: {prompt_name}") try: @@ -161,21 +160,21 @@ def analyze_files(files: list[str], client: OpenAI, prompts: dict[str, str], tim ) if result: # Add the analysis result to the data structure - data = add_analysis_result(data, filename, prompt_name, result) + data = add_analysis_result(data, file_path, prompt_name, result) else: - logger.warning(f"No result for {filename} with prompt name: {prompt_name}") + logger.warning(f"No result for {file_path} with prompt name: {prompt_name}") except (RateLimitError, APIConnectionError) as e: # Exhausted all retries due to rate limits or connection errors - logger.error(f"Failed to analyse {filename} with prompt {prompt_name} after {MAX_RETRIES} retries: {e}") + logger.error(f"Failed to analyse {file_path} with prompt {prompt_name} after {MAX_RETRIES} retries: {e}") continue except TimeoutError as e: # Timeout error due to an individual API call timing out - logger.error(f"Analysis timed out for {filename} with prompt {prompt_name}: {e}") + logger.error(f"Analysis timed out for {file_path} with prompt {prompt_name}: {e}") continue except (OpenAIError, ValueError) as e: # Other API errors and value errors - logger.error(f"Failed to analyse {filename} with prompt {prompt_name}: {e}") + logger.error(f"Failed to analyse {file_path} with prompt {prompt_name}: {e}") continue else: logger.info(f"No analysable content found for {file_path}") @@ -256,15 +255,14 @@ def update_meta_files(files: list[str], data: EnhanceData) -> EnhanceData: for file_path in files: logger.debug("Updating metadata in file: %s", file_path) - filename = os.path.basename(file_path) # Keys in ``EnhanceData.results`` are basenames only - metadata = get_results_for_file(current_data, filename) + metadata = get_results_for_file(current_data, file_path) # Confirm the metadata is not empty for the file, else skip if not metadata: - logger.info("Skipping %s as it has no results for enhancement", filename) + logger.info("Skipping %s as it has no results for enhancement", file_path) continue - logger.debug("Metadata found for %s, proceeding with updates.", filename) + logger.debug("Metadata found for %s, proceeding with updates.", file_path) try: with open(file_path, encoding="utf-8") as file: @@ -280,7 +278,7 @@ def update_meta_files(files: list[str], data: EnhanceData) -> EnhanceData: # Confirm that at least one metadata has been changed for the file, else skip if not changed: - logger.debug("No metadata changes applied for %s", filename) + logger.debug("No metadata changes applied for %s", file_path) continue # All keys already present or no additions—do not touch the file try: @@ -293,8 +291,8 @@ def update_meta_files(files: list[str], data: EnhanceData) -> EnhanceData: logger.error("Unicode encode error while writing RST file %s: %s", file_path, exc) continue - current_data = mark_file_updated(current_data, filename) # Record success for metrics only after a clean write - logger.debug("Updated file with supplied metadata: %s", filename) + current_data = mark_file_updated(current_data, file_path) # Record success for metrics only after a clean write + logger.debug("Updated file with supplied metadata: %s", file_path) logger.debug("-" * 50) metrics = calculate_metrics(current_data) # ``updated_files_count`` reflects files we rewrote From 93734f450bff3a2b1f9e352243d2ac308ded5935 Mon Sep 17 00:00:00 2001 From: Keith Kirkwood Date: Tue, 5 May 2026 10:18:00 +0100 Subject: [PATCH 10/59] OPENR-89: Install python dependencies in workflow --- .github/workflows/enhance.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/enhance.yml b/.github/workflows/enhance.yml index e03ce90d395..d60fb72a88c 100644 --- a/.github/workflows/enhance.yml +++ b/.github/workflows/enhance.yml @@ -20,6 +20,9 @@ jobs: with: python-version: '3.12' + - name: Install dependencies + run: pip install --no-warn-script-location --user -r requirements.txt -c constraints.txt + - name: Enhance topics env: BASE_SHA: ${{ github.event.before }} From 88bf36a23b12d11af4e03e5d8abf503d9e1eefa6 Mon Sep 17 00:00:00 2001 From: Keith Kirkwood Date: Tue, 5 May 2026 11:54:30 +0100 Subject: [PATCH 11/59] OPENR-89: Prompt adjustments --- .gitignore | 1 + scripts/enhance_topics.py | 18 +++++++----------- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/.gitignore b/.gitignore index 652f1b03313..24c42db21a7 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,4 @@ _build/ __pycache__ ros2doc/ .DS_Store +.env diff --git a/scripts/enhance_topics.py b/scripts/enhance_topics.py index 45145e444df..d2f39c6770f 100644 --- a/scripts/enhance_topics.py +++ b/scripts/enhance_topics.py @@ -28,21 +28,17 @@ MIN_WAIT = 10 # Minimum wait time between retries in seconds MAX_WAIT = 120 # Maximum wait time between retries in seconds -# Content type classification prompt -CONTENT_TYPE_PROMPT = """You are a content analyst, and your role is to analyze text content within supplied HTML documents. You can distinguish between three types of content: task, concept, and reference. +KEYWORDS_PROMPT = """You are a content analyst, and your role is to analyze text content within supplied documents. -*Concept topics* -Concept topics explain or define ideas. These topics often include background information that users need to understand before they start working with a specific product. Concept topics help the users understand the product, its purpose and benefits, before using the product. Concept topics do the following: describe a system, product, or a solution, outline a process, introduce tools or features, explain features, components, characteristics, restrictions, or capabilities, define terms in more detail than a simple glossary. +Your role is to extract 3 to 5 keywords from the content for use in metadata. The keywords should be single words that are the most important and relevant words to the content topic. -*Task topics* -Task topics help achieve a specific goal by presenting instructions as 'procedures'. The first paragraph of the topic usually provides an overview and the benefits or importance of the task. A task is usually a numbered list of individual steps that help users achieve the goal. +Finally, generate a comma-separated list of these keywords, in lowercase, with no additional styling, characters, or formatting.""" -*Reference topics* -Reference topics provide quick access to information that users need to perform a task effectively. For example, lists all necessary links. Information in the main body of a reference topic may also be presented in a list or table format, for quick access and easy readability. +DESCRIPTION_PROMPT = """You are a content analyst, and your role is to analyze text content within supplied documents. -When analyzed content is a mixture of different content types, classify based on the majority of content. +Your role is to create a concise description of the content for use in metadata. The description should be a single sentence (of a maximum of 130 characters) that captures the main idea of the content. -Finally, generate a single-word lowercase output which is the recognized content type, with no additional styling, characters, or formatting.""" +Finally, generate this description, with no additional styling, characters, or formatting.""" @retry( retry=retry_if_exception_type((RateLimitError, APIConnectionError)), @@ -228,7 +224,7 @@ def enhance_metadata(files: list[str], client: Optional[OpenAI] = None) -> Enhan return create_enhance_data() # TODO: Make this config-driven, so that we can easily add more prompts and analysis types - prompts: dict[str, str] = {"content-type": CONTENT_TYPE_PROMPT} + prompts: dict[str, str] = {"description": DESCRIPTION_PROMPT, "keywords": KEYWORDS_PROMPT} data = analyze_files(files, client, prompts) # Populate ``EnhanceData.results`` from the model data = update_meta_files(files, data) # Persist results as metadata fields and set ``updated_files`` From 2b8021e363c99e873095896c8dc184244303a315 Mon Sep 17 00:00:00 2001 From: Keith Kirkwood Date: Tue, 5 May 2026 12:17:46 +0100 Subject: [PATCH 12/59] OPENR-89: Only make API calls for fields which do not exist --- scripts/enhance_topics.py | 12 ++++++++++-- scripts/rst_utils.py | 22 ++++++++++++---------- 2 files changed, 22 insertions(+), 12 deletions(-) diff --git a/scripts/enhance_topics.py b/scripts/enhance_topics.py index d2f39c6770f..2c17ce67ee4 100644 --- a/scripts/enhance_topics.py +++ b/scripts/enhance_topics.py @@ -8,8 +8,8 @@ from tenacity import retry, stop_after_attempt, wait_random_exponential, retry_if_exception_type from concurrent.futures import ThreadPoolExecutor -from enhance_data import EnhanceData, create_enhance_data, add_analysis_result, calculate_metrics -from rst_utils import get_results_for_file, inject_metadata_to_content, mark_file_updated +from enhance_data import EnhanceData, add_analysis_result, calculate_metrics, create_enhance_data, get_results_for_file, mark_file_updated +from rst_utils import get_meta_names_from_content, inject_metadata_to_content logger = logging.getLogger(__name__) @@ -144,7 +144,15 @@ def analyze_files(files: list[str], client: OpenAI, prompts: dict[str, str], tim # Check if the content is not empty if content.strip(): + existing_meta_names = get_meta_names_from_content(content) for prompt_name, prompt in prompts.items(): # Iterate through each prompt in the dictionary + if prompt_name in existing_meta_names: + logger.info( + "Skipping analysis for %s: meta field %r already present in .. meta::", + file_path, + prompt_name, + ) + continue logger.debug(f"Running analysis: {prompt_name}") try: # Analyse the content using API with timeout and retry logic diff --git a/scripts/rst_utils.py b/scripts/rst_utils.py index c40d3cd6022..6599f7a97d9 100644 --- a/scripts/rst_utils.py +++ b/scripts/rst_utils.py @@ -3,16 +3,8 @@ """ import logging -import os import re -from enhance_data import ( - EnhanceData, - calculate_metrics, - get_results_for_file, - mark_file_updated, -) - logger = logging.getLogger(__name__) @@ -60,7 +52,7 @@ def _find_meta_block(content: str) -> tuple[int, int, int, str, str]: return start, marker_end, block_end, inner, indent -def _get_existing_meta_names(meta_block_inner: str) -> set[str]: +def _extract_meta_names_from_block(meta_block_inner: str) -> set[str]: """ Collect field names from the body of a ``.. meta::`` directive. @@ -75,6 +67,16 @@ def _get_existing_meta_names(meta_block_inner: str) -> set[str]: return names +def get_meta_names_from_content(content: str) -> set[str]: + """ + Return the set of field names already present in the first ``.. meta::`` block. + + If no ``.. meta::`` directive exists, returns an empty set. + """ + _start, _marker_end, _block_end, inner, _indent = _find_meta_block(content) + return _extract_meta_names_from_block(inner) + + def _normalise_meta_field_value(value: str) -> str: """Collapse whitespace so the meta field body stays a single logical line.""" return " ".join(value.split()) # Docutils treats the field body as one string; keep it one physical line @@ -93,7 +95,7 @@ def inject_metadata_to_content(content: str, metadata: dict[str, str]) -> tuple[ Updated source and whether any change was made. """ start, marker_end, block_end, inner, indent = _find_meta_block(content) - names = _get_existing_meta_names(inner) # Snapshot before we add keys from this same batch + names = _extract_meta_names_from_block(inner) # Snapshot before we add keys from this same batch additions: list[str] = [] for key, raw_value in metadata.items(): From 9badeb9cf194ab05da57bd572f07462f82ab5dc9 Mon Sep 17 00:00:00 2001 From: Keith Kirkwood Date: Tue, 5 May 2026 13:21:50 +0100 Subject: [PATCH 13/59] OPENR-89: Enhance only RST files --- scripts/enhance_data.py | 54 +++++++++++++++++++++------------------ scripts/enhance_topics.py | 40 +++++++++++++++++++++-------- 2 files changed, 59 insertions(+), 35 deletions(-) diff --git a/scripts/enhance_data.py b/scripts/enhance_data.py index e7e7928c6b2..dea61b02600 100644 --- a/scripts/enhance_data.py +++ b/scripts/enhance_data.py @@ -22,18 +22,6 @@ class EnhanceMetrics(NamedTuple): counts_by_analysis: Dict[str, Dict[str, int]] files_with_results_count: int updated_files_count: int - - def get_total_analysis_count(self) -> int: - """ - Calculate the total number of analysis results across all analysis types. - - Note: Files with multiple analysis types contribute multiple counts. - For unique file count, use files_with_results_count instead. - - Returns: - Total count of all analysis results across all analysis types. - """ - return sum(sum(counts.values()) for counts in self.counts_by_analysis.values()) class EnhanceData(NamedTuple): @@ -41,14 +29,30 @@ class EnhanceData(NamedTuple): Immutable data structure representing enhancement results. Attributes: - results: Dictionary mapping filename to analysis results. - Format: {filename: {analysis_type: result_value}} - updated_files: Set of filenames that had metadata successfully updated. + results: Dictionary mapping file paths to analysis results. + Format: {file_path: {analysis_type: result_value}} + updated_files: Set of file paths that had metadata successfully updated. """ results: Dict[str, Dict[str, str]] updated_files: Set[str] +def get_total_analysis_count(metrics: EnhanceMetrics) -> int: + """ + Calculate the total number of analysis results across all analysis types. + + Note: Files with multiple analysis types contribute multiple counts. + For unique file count, use metrics.files_with_results_count instead. + + Args: + metrics: The metrics structure to analyse. + + Returns: + Total count of all analysis results across all analysis types. + """ + return sum(sum(counts.values()) for counts in metrics.counts_by_analysis.values()) + + def create_enhance_data() -> EnhanceData: """ Initialise an empty EnhanceData structure. @@ -67,7 +71,7 @@ def add_analysis_result(data: EnhanceData, filename: str, analysis_type: str, re Args: data: Current enhancement data. - filename: Name of the file. + filename: Path to the file (relative to repository root). analysis_type: Type of analysis (e.g., "content-type"). result: Analysis result value. @@ -89,12 +93,12 @@ def mark_file_updated(data: EnhanceData, filename: str) -> EnhanceData: Args: data: Current enhancement data. - filename: Name of the file that was updated. + filename: Path to the file that was updated (relative to repository root). Returns: New EnhanceData with the file marked as updated. """ - return EnhanceData(results=data.results, updated_files=data.updated_files | {filename}) # Set union adds one basename + return EnhanceData(results=data.results, updated_files=data.updated_files | {filename}) # Set union adds one file path def calculate_metrics(data: EnhanceData) -> EnhanceMetrics: @@ -130,26 +134,26 @@ def calculate_metrics(data: EnhanceData) -> EnhanceMetrics: def get_files_with_results(data: EnhanceData) -> List[str]: """ - Get list of filenames that had analysis results. + Get list of file paths that had analysis results. Args: data: Current enhancement data. Returns: - List of filenames with at least one analysis result. + List of file paths with at least one analysis result. """ return [filename for filename, file_results in data.results.items() if file_results] def get_updated_files(data: EnhanceData) -> List[str]: """ - Get list of filenames that had metadata successfully updated. + Get list of file paths that had metadata successfully updated. Args: data: Current enhancement data. Returns: - List of filenames that were updated with metadata. + List of file paths that were updated with metadata. """ return list(data.updated_files) @@ -160,7 +164,7 @@ def is_file_updated(data: EnhanceData, filename: str) -> bool: Args: data: Current enhancement data. - filename: Name of the file to check. + filename: Path to the file to check (relative to repository root). Returns: True if the file was updated, False otherwise. @@ -190,7 +194,7 @@ def get_result_for_file(data: EnhanceData, filename: str, analysis_type: str) -> Args: data: Current enhancement data. - filename: Name of the file. + filename: Path to the file (relative to repository root). analysis_type: Type of analysis (e.g., "content-type"). Returns: @@ -205,7 +209,7 @@ def get_results_for_file(data: EnhanceData, filename: str) -> Dict[str, str]: Args: data: Current enhancement data. - filename: Name of the file. + filename: Path to the file (relative to repository root). Returns: Dictionary of analysis results for the file, or empty dict if not found. diff --git a/scripts/enhance_topics.py b/scripts/enhance_topics.py index 2c17ce67ee4..6b0cb1c079c 100644 --- a/scripts/enhance_topics.py +++ b/scripts/enhance_topics.py @@ -213,7 +213,7 @@ def get_openai_client() -> OpenAI: def enhance_metadata(files: list[str], client: Optional[OpenAI] = None) -> EnhanceData: """ - Enhance RST files with metadata based on content analysis. + Enhance files with metadata based on content analysis. Args: files (list[str]): Paths to files to enhance. @@ -272,10 +272,10 @@ def update_meta_files(files: list[str], data: EnhanceData) -> EnhanceData: with open(file_path, encoding="utf-8") as file: content = file.read() # Full document; helpers locate or synthesise ``.. meta::`` except (OSError, PermissionError) as exc: - logger.error("Error reading RST file %s: %s", file_path, exc) + logger.error("Error reading file %s: %s", file_path, exc) continue except UnicodeDecodeError as exc: - logger.error("Unicode decode error reading RST file %s: %s", file_path, exc) + logger.error("Unicode decode error reading file %s: %s", file_path, exc) continue new_content, changed = inject_metadata_to_content(content, metadata) @@ -289,10 +289,10 @@ def update_meta_files(files: list[str], data: EnhanceData) -> EnhanceData: with open(file_path, "w", encoding="utf-8") as file: file.write(new_content) # Full-document rewrite (same path as read) except (OSError, PermissionError) as exc: - logger.error("Error writing RST file %s: %s", file_path, exc) + logger.error("Error writing file %s: %s", file_path, exc) continue except UnicodeEncodeError as exc: - logger.error("Unicode encode error while writing RST file %s: %s", file_path, exc) + logger.error("Unicode encode error while writing file %s: %s", file_path, exc) continue current_data = mark_file_updated(current_data, file_path) # Record success for metrics only after a clean write @@ -304,22 +304,42 @@ def update_meta_files(files: list[str], data: EnhanceData) -> EnhanceData: return current_data def main() -> None: + """ + Main entry point for the script. + + - Parses command-line arguments to collect input file paths. + - Filters the provided files to include only reStructuredText (.rst) files. + - Enhances the metadata of each RST file using AI-based analysis (keywords and description). + - Writes updated metadata back to files and logs processing metrics. + + Usage: + python enhance_topics.py ... + + Only files with the .rst extension will be processed. + Logs the number of files successfully enhanced. + """ + logging.basicConfig( level=logging.INFO, format="%(levelname)s %(name)s: %(message)s", ) - # Collect filenames from command line arguments - rst_files = sys.argv[1:] + # Collect filenames from command line arguments and filter for RST files + input_files = sys.argv[1:] + rst_files = [f for f in input_files if f.lower().endswith(RST_EXTENSION)] + if not rst_files: - logger.error("No input files provided. Pass a list of RST files as arguments.") - sys.exit(1) + if input_files: + logger.info("No RST files found among provided arguments. Skipping enhancement.") + else: + logger.error("No input files provided. Pass a list of RST files as arguments.") + sys.exit(0) # Enhance the metadata in the RST files and return the enhancement data with updated files data = enhance_metadata(rst_files) # Log the metrics for the enhancement data metrics = calculate_metrics(data) - logger.info(f"Enhanced {metrics.updated_files_count} RST files metadata out of {len(rst_files)} files with results.") + logger.info(f"Enhanced {metrics.updated_files_count} RST files metadata out of {len(rst_files)} files processed.") if __name__ == "__main__": main() From 0ededa3c3c81f622f857eabeb1e1343466a237a1 Mon Sep 17 00:00:00 2001 From: Keith Kirkwood Date: Tue, 5 May 2026 15:16:34 +0100 Subject: [PATCH 14/59] OPENR-89: Small tweaks to logging and constants --- scripts/enhance_topics.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/enhance_topics.py b/scripts/enhance_topics.py index 6b0cb1c079c..6c13dd29eb9 100644 --- a/scripts/enhance_topics.py +++ b/scripts/enhance_topics.py @@ -23,7 +23,7 @@ # - Individual API calls timeout after DEFAULT_TIMEOUT seconds # - On rate limits/connection errors, retry up to MAX_RETRIES times # - Wait between retries, increasing exponentially: MIN_WAIT → MAX_WAIT (capped) -DEFAULT_TIMEOUT = 60 # Default timeout in seconds for an individual API call +DEFAULT_TIMEOUT = 30 # Default timeout in seconds for an individual API call MAX_RETRIES = 10 # Maximum number of retry attempts for exponential backoff MIN_WAIT = 10 # Minimum wait time between retries in seconds MAX_WAIT = 120 # Maximum wait time between retries in seconds @@ -300,7 +300,7 @@ def update_meta_files(files: list[str], data: EnhanceData) -> EnhanceData: logger.debug("-" * 50) metrics = calculate_metrics(current_data) # ``updated_files_count`` reflects files we rewrote - logger.info("Enhanced %s files' metadata out of %s files processed.", metrics.updated_files_count, len(files)) + logger.info("Updated metadata in %s files out of %s files processed.", metrics.updated_files_count, len(files)) return current_data def main() -> None: @@ -317,7 +317,7 @@ def main() -> None: Only files with the .rst extension will be processed. Logs the number of files successfully enhanced. - """ + """b logging.basicConfig( level=logging.INFO, @@ -339,7 +339,7 @@ def main() -> None: data = enhance_metadata(rst_files) # Log the metrics for the enhancement data metrics = calculate_metrics(data) - logger.info(f"Enhanced {metrics.updated_files_count} RST files metadata out of {len(rst_files)} files processed.") + logger.info(f"Enhanced files: {metrics.files_with_results_count} with analysis results, and {metrics.updated_files_count} files updated, out of {len(rst_files)} RST files.") if __name__ == "__main__": main() From 80eb658c1dfef672c37124c37802bdbeac31a31d Mon Sep 17 00:00:00 2001 From: Kacper Bojakowski Date: Wed, 6 May 2026 16:23:38 +0200 Subject: [PATCH 15/59] Fix --- conf.py | 4 ++-- source/_static/vendor/js-yaml.min.js | 2 ++ source/_static/vendor/pako.min.js | 2 ++ 3 files changed, 6 insertions(+), 2 deletions(-) create mode 100644 source/_static/vendor/js-yaml.min.js create mode 100644 source/_static/vendor/pako.min.js diff --git a/conf.py b/conf.py index 562eb01c0ef..37c1dbdec27 100644 --- a/conf.py +++ b/conf.py @@ -182,8 +182,8 @@ # Relative to html_static_path html_css_files = ['custom.css', 'adopters.css'] html_js_files = [ - ('https://cdn.jsdelivr.net/npm/pako@2.1.0/dist/pako.min.js', {'defer': ''}), - ('https://cdn.jsdelivr.net/npm/js-yaml@4.1.0/dist/js-yaml.min.js', {'defer': ''}), + ('vendor/pako.min.js', {'defer': ''}), + ('vendor/js-yaml.min.js', {'defer': ''}), 'adopters.js', 'related_packages.js', ] diff --git a/source/_static/vendor/js-yaml.min.js b/source/_static/vendor/js-yaml.min.js new file mode 100644 index 00000000000..bdd8eef542b --- /dev/null +++ b/source/_static/vendor/js-yaml.min.js @@ -0,0 +1,2 @@ +/*! js-yaml 4.1.0 https://github.com/nodeca/js-yaml @license MIT */ +!function(e,t){"object"==typeof exports&&"undefined"!=typeof module?t(exports):"function"==typeof define&&define.amd?define(["exports"],t):t((e="undefined"!=typeof globalThis?globalThis:e||self).jsyaml={})}(this,(function(e){"use strict";function t(e){return null==e}var n={isNothing:t,isObject:function(e){return"object"==typeof e&&null!==e},toArray:function(e){return Array.isArray(e)?e:t(e)?[]:[e]},repeat:function(e,t){var n,i="";for(n=0;nl&&(t=i-l+(o=" ... ").length),n-i>l&&(n=i+l-(a=" ...").length),{str:o+e.slice(t,n).replace(/\t/g,"→")+a,pos:i-t+o.length}}function l(e,t){return n.repeat(" ",t-e.length)+e}var c=function(e,t){if(t=Object.create(t||null),!e.buffer)return null;t.maxLength||(t.maxLength=79),"number"!=typeof t.indent&&(t.indent=1),"number"!=typeof t.linesBefore&&(t.linesBefore=3),"number"!=typeof t.linesAfter&&(t.linesAfter=2);for(var i,r=/\r?\n|\r|\0/g,o=[0],c=[],s=-1;i=r.exec(e.buffer);)c.push(i.index),o.push(i.index+i[0].length),e.position<=i.index&&s<0&&(s=o.length-2);s<0&&(s=o.length-1);var u,p,f="",d=Math.min(e.line+t.linesAfter,c.length).toString().length,h=t.maxLength-(t.indent+d+3);for(u=1;u<=t.linesBefore&&!(s-u<0);u++)p=a(e.buffer,o[s-u],c[s-u],e.position-(o[s]-o[s-u]),h),f=n.repeat(" ",t.indent)+l((e.line-u+1).toString(),d)+" | "+p.str+"\n"+f;for(p=a(e.buffer,o[s],c[s],e.position,h),f+=n.repeat(" ",t.indent)+l((e.line+1).toString(),d)+" | "+p.str+"\n",f+=n.repeat("-",t.indent+d+3+p.pos)+"^\n",u=1;u<=t.linesAfter&&!(s+u>=c.length);u++)p=a(e.buffer,o[s+u],c[s+u],e.position-(o[s]-o[s+u]),h),f+=n.repeat(" ",t.indent)+l((e.line+u+1).toString(),d)+" | "+p.str+"\n";return f.replace(/\n$/,"")},s=["kind","multi","resolve","construct","instanceOf","predicate","represent","representName","defaultStyle","styleAliases"],u=["scalar","sequence","mapping"];var p=function(e,t){if(t=t||{},Object.keys(t).forEach((function(t){if(-1===s.indexOf(t))throw new o('Unknown option "'+t+'" is met in definition of "'+e+'" YAML type.')})),this.options=t,this.tag=e,this.kind=t.kind||null,this.resolve=t.resolve||function(){return!0},this.construct=t.construct||function(e){return e},this.instanceOf=t.instanceOf||null,this.predicate=t.predicate||null,this.represent=t.represent||null,this.representName=t.representName||null,this.defaultStyle=t.defaultStyle||null,this.multi=t.multi||!1,this.styleAliases=function(e){var t={};return null!==e&&Object.keys(e).forEach((function(n){e[n].forEach((function(e){t[String(e)]=n}))})),t}(t.styleAliases||null),-1===u.indexOf(this.kind))throw new o('Unknown kind "'+this.kind+'" is specified for "'+e+'" YAML type.')};function f(e,t){var n=[];return e[t].forEach((function(e){var t=n.length;n.forEach((function(n,i){n.tag===e.tag&&n.kind===e.kind&&n.multi===e.multi&&(t=i)})),n[t]=e})),n}function d(e){return this.extend(e)}d.prototype.extend=function(e){var t=[],n=[];if(e instanceof p)n.push(e);else if(Array.isArray(e))n=n.concat(e);else{if(!e||!Array.isArray(e.implicit)&&!Array.isArray(e.explicit))throw new o("Schema.extend argument should be a Type, [ Type ], or a schema definition ({ implicit: [...], explicit: [...] })");e.implicit&&(t=t.concat(e.implicit)),e.explicit&&(n=n.concat(e.explicit))}t.forEach((function(e){if(!(e instanceof p))throw new o("Specified list of YAML types (or a single Type object) contains a non-Type object.");if(e.loadKind&&"scalar"!==e.loadKind)throw new o("There is a non-scalar type in the implicit list of a schema. Implicit resolving of such types is not supported.");if(e.multi)throw new o("There is a multi type in the implicit list of a schema. Multi tags can only be listed as explicit.")})),n.forEach((function(e){if(!(e instanceof p))throw new o("Specified list of YAML types (or a single Type object) contains a non-Type object.")}));var i=Object.create(d.prototype);return i.implicit=(this.implicit||[]).concat(t),i.explicit=(this.explicit||[]).concat(n),i.compiledImplicit=f(i,"implicit"),i.compiledExplicit=f(i,"explicit"),i.compiledTypeMap=function(){var e,t,n={scalar:{},sequence:{},mapping:{},fallback:{},multi:{scalar:[],sequence:[],mapping:[],fallback:[]}};function i(e){e.multi?(n.multi[e.kind].push(e),n.multi.fallback.push(e)):n[e.kind][e.tag]=n.fallback[e.tag]=e}for(e=0,t=arguments.length;e=0?"0b"+e.toString(2):"-0b"+e.toString(2).slice(1)},octal:function(e){return e>=0?"0o"+e.toString(8):"-0o"+e.toString(8).slice(1)},decimal:function(e){return e.toString(10)},hexadecimal:function(e){return e>=0?"0x"+e.toString(16).toUpperCase():"-0x"+e.toString(16).toUpperCase().slice(1)}},defaultStyle:"decimal",styleAliases:{binary:[2,"bin"],octal:[8,"oct"],decimal:[10,"dec"],hexadecimal:[16,"hex"]}}),x=new RegExp("^(?:[-+]?(?:[0-9][0-9_]*)(?:\\.[0-9_]*)?(?:[eE][-+]?[0-9]+)?|\\.[0-9_]+(?:[eE][-+]?[0-9]+)?|[-+]?\\.(?:inf|Inf|INF)|\\.(?:nan|NaN|NAN))$");var I=/^[-+]?[0-9]+e/;var S=new p("tag:yaml.org,2002:float",{kind:"scalar",resolve:function(e){return null!==e&&!(!x.test(e)||"_"===e[e.length-1])},construct:function(e){var t,n;return n="-"===(t=e.replace(/_/g,"").toLowerCase())[0]?-1:1,"+-".indexOf(t[0])>=0&&(t=t.slice(1)),".inf"===t?1===n?Number.POSITIVE_INFINITY:Number.NEGATIVE_INFINITY:".nan"===t?NaN:n*parseFloat(t,10)},predicate:function(e){return"[object Number]"===Object.prototype.toString.call(e)&&(e%1!=0||n.isNegativeZero(e))},represent:function(e,t){var i;if(isNaN(e))switch(t){case"lowercase":return".nan";case"uppercase":return".NAN";case"camelcase":return".NaN"}else if(Number.POSITIVE_INFINITY===e)switch(t){case"lowercase":return".inf";case"uppercase":return".INF";case"camelcase":return".Inf"}else if(Number.NEGATIVE_INFINITY===e)switch(t){case"lowercase":return"-.inf";case"uppercase":return"-.INF";case"camelcase":return"-.Inf"}else if(n.isNegativeZero(e))return"-0.0";return i=e.toString(10),I.test(i)?i.replace("e",".e"):i},defaultStyle:"lowercase"}),O=b.extend({implicit:[A,v,C,S]}),j=O,T=new RegExp("^([0-9][0-9][0-9][0-9])-([0-9][0-9])-([0-9][0-9])$"),N=new RegExp("^([0-9][0-9][0-9][0-9])-([0-9][0-9]?)-([0-9][0-9]?)(?:[Tt]|[ \\t]+)([0-9][0-9]?):([0-9][0-9]):([0-9][0-9])(?:\\.([0-9]*))?(?:[ \\t]*(Z|([-+])([0-9][0-9]?)(?::([0-9][0-9]))?))?$");var F=new p("tag:yaml.org,2002:timestamp",{kind:"scalar",resolve:function(e){return null!==e&&(null!==T.exec(e)||null!==N.exec(e))},construct:function(e){var t,n,i,r,o,a,l,c,s=0,u=null;if(null===(t=T.exec(e))&&(t=N.exec(e)),null===t)throw new Error("Date resolve error");if(n=+t[1],i=+t[2]-1,r=+t[3],!t[4])return new Date(Date.UTC(n,i,r));if(o=+t[4],a=+t[5],l=+t[6],t[7]){for(s=t[7].slice(0,3);s.length<3;)s+="0";s=+s}return t[9]&&(u=6e4*(60*+t[10]+ +(t[11]||0)),"-"===t[9]&&(u=-u)),c=new Date(Date.UTC(n,i,r,o,a,l,s)),u&&c.setTime(c.getTime()-u),c},instanceOf:Date,represent:function(e){return e.toISOString()}});var E=new p("tag:yaml.org,2002:merge",{kind:"scalar",resolve:function(e){return"<<"===e||null===e}}),M="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=\n\r";var L=new p("tag:yaml.org,2002:binary",{kind:"scalar",resolve:function(e){if(null===e)return!1;var t,n,i=0,r=e.length,o=M;for(n=0;n64)){if(t<0)return!1;i+=6}return i%8==0},construct:function(e){var t,n,i=e.replace(/[\r\n=]/g,""),r=i.length,o=M,a=0,l=[];for(t=0;t>16&255),l.push(a>>8&255),l.push(255&a)),a=a<<6|o.indexOf(i.charAt(t));return 0===(n=r%4*6)?(l.push(a>>16&255),l.push(a>>8&255),l.push(255&a)):18===n?(l.push(a>>10&255),l.push(a>>2&255)):12===n&&l.push(a>>4&255),new Uint8Array(l)},predicate:function(e){return"[object Uint8Array]"===Object.prototype.toString.call(e)},represent:function(e){var t,n,i="",r=0,o=e.length,a=M;for(t=0;t>18&63],i+=a[r>>12&63],i+=a[r>>6&63],i+=a[63&r]),r=(r<<8)+e[t];return 0===(n=o%3)?(i+=a[r>>18&63],i+=a[r>>12&63],i+=a[r>>6&63],i+=a[63&r]):2===n?(i+=a[r>>10&63],i+=a[r>>4&63],i+=a[r<<2&63],i+=a[64]):1===n&&(i+=a[r>>2&63],i+=a[r<<4&63],i+=a[64],i+=a[64]),i}}),_=Object.prototype.hasOwnProperty,D=Object.prototype.toString;var U=new p("tag:yaml.org,2002:omap",{kind:"sequence",resolve:function(e){if(null===e)return!0;var t,n,i,r,o,a=[],l=e;for(t=0,n=l.length;t>10),56320+(e-65536&1023))}for(var ie=new Array(256),re=new Array(256),oe=0;oe<256;oe++)ie[oe]=te(oe)?1:0,re[oe]=te(oe);function ae(e,t){this.input=e,this.filename=t.filename||null,this.schema=t.schema||K,this.onWarning=t.onWarning||null,this.legacy=t.legacy||!1,this.json=t.json||!1,this.listener=t.listener||null,this.implicitTypes=this.schema.compiledImplicit,this.typeMap=this.schema.compiledTypeMap,this.length=e.length,this.position=0,this.line=0,this.lineStart=0,this.lineIndent=0,this.firstTabInLine=-1,this.documents=[]}function le(e,t){var n={name:e.filename,buffer:e.input.slice(0,-1),position:e.position,line:e.line,column:e.position-e.lineStart};return n.snippet=c(n),new o(t,n)}function ce(e,t){throw le(e,t)}function se(e,t){e.onWarning&&e.onWarning.call(null,le(e,t))}var ue={YAML:function(e,t,n){var i,r,o;null!==e.version&&ce(e,"duplication of %YAML directive"),1!==n.length&&ce(e,"YAML directive accepts exactly one argument"),null===(i=/^([0-9]+)\.([0-9]+)$/.exec(n[0]))&&ce(e,"ill-formed argument of the YAML directive"),r=parseInt(i[1],10),o=parseInt(i[2],10),1!==r&&ce(e,"unacceptable YAML version of the document"),e.version=n[0],e.checkLineBreaks=o<2,1!==o&&2!==o&&se(e,"unsupported YAML version of the document")},TAG:function(e,t,n){var i,r;2!==n.length&&ce(e,"TAG directive accepts exactly two arguments"),i=n[0],r=n[1],G.test(i)||ce(e,"ill-formed tag handle (first argument) of the TAG directive"),P.call(e.tagMap,i)&&ce(e,'there is a previously declared suffix for "'+i+'" tag handle'),V.test(r)||ce(e,"ill-formed tag prefix (second argument) of the TAG directive");try{r=decodeURIComponent(r)}catch(t){ce(e,"tag prefix is malformed: "+r)}e.tagMap[i]=r}};function pe(e,t,n,i){var r,o,a,l;if(t1&&(e.result+=n.repeat("\n",t-1))}function be(e,t){var n,i,r=e.tag,o=e.anchor,a=[],l=!1;if(-1!==e.firstTabInLine)return!1;for(null!==e.anchor&&(e.anchorMap[e.anchor]=a),i=e.input.charCodeAt(e.position);0!==i&&(-1!==e.firstTabInLine&&(e.position=e.firstTabInLine,ce(e,"tab characters must not be used in indentation")),45===i)&&z(e.input.charCodeAt(e.position+1));)if(l=!0,e.position++,ge(e,!0,-1)&&e.lineIndent<=t)a.push(null),i=e.input.charCodeAt(e.position);else if(n=e.line,we(e,t,3,!1,!0),a.push(e.result),ge(e,!0,-1),i=e.input.charCodeAt(e.position),(e.line===n||e.lineIndent>t)&&0!==i)ce(e,"bad indentation of a sequence entry");else if(e.lineIndentt?g=1:e.lineIndent===t?g=0:e.lineIndentt?g=1:e.lineIndent===t?g=0:e.lineIndentt)&&(y&&(a=e.line,l=e.lineStart,c=e.position),we(e,t,4,!0,r)&&(y?g=e.result:m=e.result),y||(de(e,f,d,h,g,m,a,l,c),h=g=m=null),ge(e,!0,-1),s=e.input.charCodeAt(e.position)),(e.line===o||e.lineIndent>t)&&0!==s)ce(e,"bad indentation of a mapping entry");else if(e.lineIndent=0))break;0===o?ce(e,"bad explicit indentation width of a block scalar; it cannot be less than one"):u?ce(e,"repeat of an indentation width identifier"):(p=t+o-1,u=!0)}if(Q(a)){do{a=e.input.charCodeAt(++e.position)}while(Q(a));if(35===a)do{a=e.input.charCodeAt(++e.position)}while(!J(a)&&0!==a)}for(;0!==a;){for(he(e),e.lineIndent=0,a=e.input.charCodeAt(e.position);(!u||e.lineIndentp&&(p=e.lineIndent),J(a))f++;else{if(e.lineIndent0){for(r=a,o=0;r>0;r--)(a=ee(l=e.input.charCodeAt(++e.position)))>=0?o=(o<<4)+a:ce(e,"expected hexadecimal character");e.result+=ne(o),e.position++}else ce(e,"unknown escape sequence");n=i=e.position}else J(l)?(pe(e,n,i,!0),ye(e,ge(e,!1,t)),n=i=e.position):e.position===e.lineStart&&me(e)?ce(e,"unexpected end of the document within a double quoted scalar"):(e.position++,i=e.position)}ce(e,"unexpected end of the stream within a double quoted scalar")}(e,d)?y=!0:!function(e){var t,n,i;if(42!==(i=e.input.charCodeAt(e.position)))return!1;for(i=e.input.charCodeAt(++e.position),t=e.position;0!==i&&!z(i)&&!X(i);)i=e.input.charCodeAt(++e.position);return e.position===t&&ce(e,"name of an alias node must contain at least one character"),n=e.input.slice(t,e.position),P.call(e.anchorMap,n)||ce(e,'unidentified alias "'+n+'"'),e.result=e.anchorMap[n],ge(e,!0,-1),!0}(e)?function(e,t,n){var i,r,o,a,l,c,s,u,p=e.kind,f=e.result;if(z(u=e.input.charCodeAt(e.position))||X(u)||35===u||38===u||42===u||33===u||124===u||62===u||39===u||34===u||37===u||64===u||96===u)return!1;if((63===u||45===u)&&(z(i=e.input.charCodeAt(e.position+1))||n&&X(i)))return!1;for(e.kind="scalar",e.result="",r=o=e.position,a=!1;0!==u;){if(58===u){if(z(i=e.input.charCodeAt(e.position+1))||n&&X(i))break}else if(35===u){if(z(e.input.charCodeAt(e.position-1)))break}else{if(e.position===e.lineStart&&me(e)||n&&X(u))break;if(J(u)){if(l=e.line,c=e.lineStart,s=e.lineIndent,ge(e,!1,-1),e.lineIndent>=t){a=!0,u=e.input.charCodeAt(e.position);continue}e.position=o,e.line=l,e.lineStart=c,e.lineIndent=s;break}}a&&(pe(e,r,o,!1),ye(e,e.line-l),r=o=e.position,a=!1),Q(u)||(o=e.position+1),u=e.input.charCodeAt(++e.position)}return pe(e,r,o,!1),!!e.result||(e.kind=p,e.result=f,!1)}(e,d,1===i)&&(y=!0,null===e.tag&&(e.tag="?")):(y=!0,null===e.tag&&null===e.anchor||ce(e,"alias node should not have any properties")),null!==e.anchor&&(e.anchorMap[e.anchor]=e.result)):0===g&&(y=c&&be(e,h))),null===e.tag)null!==e.anchor&&(e.anchorMap[e.anchor]=e.result);else if("?"===e.tag){for(null!==e.result&&"scalar"!==e.kind&&ce(e,'unacceptable node kind for ! tag; it should be "scalar", not "'+e.kind+'"'),s=0,u=e.implicitTypes.length;s"),null!==e.result&&f.kind!==e.kind&&ce(e,"unacceptable node kind for !<"+e.tag+'> tag; it should be "'+f.kind+'", not "'+e.kind+'"'),f.resolve(e.result,e.tag)?(e.result=f.construct(e.result,e.tag),null!==e.anchor&&(e.anchorMap[e.anchor]=e.result)):ce(e,"cannot resolve a node with !<"+e.tag+"> explicit tag")}return null!==e.listener&&e.listener("close",e),null!==e.tag||null!==e.anchor||y}function ke(e){var t,n,i,r,o=e.position,a=!1;for(e.version=null,e.checkLineBreaks=e.legacy,e.tagMap=Object.create(null),e.anchorMap=Object.create(null);0!==(r=e.input.charCodeAt(e.position))&&(ge(e,!0,-1),r=e.input.charCodeAt(e.position),!(e.lineIndent>0||37!==r));){for(a=!0,r=e.input.charCodeAt(++e.position),t=e.position;0!==r&&!z(r);)r=e.input.charCodeAt(++e.position);for(i=[],(n=e.input.slice(t,e.position)).length<1&&ce(e,"directive name must not be less than one character in length");0!==r;){for(;Q(r);)r=e.input.charCodeAt(++e.position);if(35===r){do{r=e.input.charCodeAt(++e.position)}while(0!==r&&!J(r));break}if(J(r))break;for(t=e.position;0!==r&&!z(r);)r=e.input.charCodeAt(++e.position);i.push(e.input.slice(t,e.position))}0!==r&&he(e),P.call(ue,n)?ue[n](e,n,i):se(e,'unknown document directive "'+n+'"')}ge(e,!0,-1),0===e.lineIndent&&45===e.input.charCodeAt(e.position)&&45===e.input.charCodeAt(e.position+1)&&45===e.input.charCodeAt(e.position+2)?(e.position+=3,ge(e,!0,-1)):a&&ce(e,"directives end mark is expected"),we(e,e.lineIndent-1,4,!1,!0),ge(e,!0,-1),e.checkLineBreaks&&H.test(e.input.slice(o,e.position))&&se(e,"non-ASCII line breaks are interpreted as content"),e.documents.push(e.result),e.position===e.lineStart&&me(e)?46===e.input.charCodeAt(e.position)&&(e.position+=3,ge(e,!0,-1)):e.position=55296&&i<=56319&&t+1=56320&&n<=57343?1024*(i-55296)+n-56320+65536:i}function Re(e){return/^\n* /.test(e)}function Be(e,t,n,i,r,o,a,l){var c,s,u=0,p=null,f=!1,d=!1,h=-1!==i,g=-1,m=De(s=Ye(e,0))&&s!==Oe&&!_e(s)&&45!==s&&63!==s&&58!==s&&44!==s&&91!==s&&93!==s&&123!==s&&125!==s&&35!==s&&38!==s&&42!==s&&33!==s&&124!==s&&61!==s&&62!==s&&39!==s&&34!==s&&37!==s&&64!==s&&96!==s&&function(e){return!_e(e)&&58!==e}(Ye(e,e.length-1));if(t||a)for(c=0;c=65536?c+=2:c++){if(!De(u=Ye(e,c)))return 5;m=m&&qe(u,p,l),p=u}else{for(c=0;c=65536?c+=2:c++){if(10===(u=Ye(e,c)))f=!0,h&&(d=d||c-g-1>i&&" "!==e[g+1],g=c);else if(!De(u))return 5;m=m&&qe(u,p,l),p=u}d=d||h&&c-g-1>i&&" "!==e[g+1]}return f||d?n>9&&Re(e)?5:a?2===o?5:2:d?4:3:!m||a||r(e)?2===o?5:2:1}function Ke(e,t,n,i,r){e.dump=function(){if(0===t.length)return 2===e.quotingType?'""':"''";if(!e.noCompatMode&&(-1!==Te.indexOf(t)||Ne.test(t)))return 2===e.quotingType?'"'+t+'"':"'"+t+"'";var a=e.indent*Math.max(1,n),l=-1===e.lineWidth?-1:Math.max(Math.min(e.lineWidth,40),e.lineWidth-a),c=i||e.flowLevel>-1&&n>=e.flowLevel;switch(Be(t,c,e.indent,l,(function(t){return function(e,t){var n,i;for(n=0,i=e.implicitTypes.length;n"+Pe(t,e.indent)+We(Me(function(e,t){var n,i,r=/(\n+)([^\n]*)/g,o=(l=e.indexOf("\n"),l=-1!==l?l:e.length,r.lastIndex=l,He(e.slice(0,l),t)),a="\n"===e[0]||" "===e[0];var l;for(;i=r.exec(e);){var c=i[1],s=i[2];n=" "===s[0],o+=c+(a||n||""===s?"":"\n")+He(s,t),a=n}return o}(t,l),a));case 5:return'"'+function(e){for(var t,n="",i=0,r=0;r=65536?r+=2:r++)i=Ye(e,r),!(t=je[i])&&De(i)?(n+=e[r],i>=65536&&(n+=e[r+1])):n+=t||Fe(i);return n}(t)+'"';default:throw new o("impossible error: invalid scalar style")}}()}function Pe(e,t){var n=Re(e)?String(t):"",i="\n"===e[e.length-1];return n+(i&&("\n"===e[e.length-2]||"\n"===e)?"+":i?"":"-")+"\n"}function We(e){return"\n"===e[e.length-1]?e.slice(0,-1):e}function He(e,t){if(""===e||" "===e[0])return e;for(var n,i,r=/ [^ ]/g,o=0,a=0,l=0,c="";n=r.exec(e);)(l=n.index)-o>t&&(i=a>o?a:l,c+="\n"+e.slice(o,i),o=i+1),a=l;return c+="\n",e.length-o>t&&a>o?c+=e.slice(o,a)+"\n"+e.slice(a+1):c+=e.slice(o),c.slice(1)}function $e(e,t,n,i){var r,o,a,l="",c=e.tag;for(r=0,o=n.length;r tag resolver accepts not "'+s+'" style');i=c.represent[s](t,s)}e.dump=i}return!0}return!1}function Ve(e,t,n,i,r,a,l){e.tag=null,e.dump=n,Ge(e,n,!1)||Ge(e,n,!0);var c,s=Ie.call(e.dump),u=i;i&&(i=e.flowLevel<0||e.flowLevel>t);var p,f,d="[object Object]"===s||"[object Array]"===s;if(d&&(f=-1!==(p=e.duplicates.indexOf(n))),(null!==e.tag&&"?"!==e.tag||f||2!==e.indent&&t>0)&&(r=!1),f&&e.usedDuplicates[p])e.dump="*ref_"+p;else{if(d&&f&&!e.usedDuplicates[p]&&(e.usedDuplicates[p]=!0),"[object Object]"===s)i&&0!==Object.keys(e.dump).length?(!function(e,t,n,i){var r,a,l,c,s,u,p="",f=e.tag,d=Object.keys(n);if(!0===e.sortKeys)d.sort();else if("function"==typeof e.sortKeys)d.sort(e.sortKeys);else if(e.sortKeys)throw new o("sortKeys must be a boolean or a function");for(r=0,a=d.length;r1024)&&(e.dump&&10===e.dump.charCodeAt(0)?u+="?":u+="? "),u+=e.dump,s&&(u+=Le(e,t)),Ve(e,t+1,c,!0,s)&&(e.dump&&10===e.dump.charCodeAt(0)?u+=":":u+=": ",p+=u+=e.dump));e.tag=f,e.dump=p||"{}"}(e,t,e.dump,r),f&&(e.dump="&ref_"+p+e.dump)):(!function(e,t,n){var i,r,o,a,l,c="",s=e.tag,u=Object.keys(n);for(i=0,r=u.length;i1024&&(l+="? "),l+=e.dump+(e.condenseFlow?'"':"")+":"+(e.condenseFlow?"":" "),Ve(e,t,a,!1,!1)&&(c+=l+=e.dump));e.tag=s,e.dump="{"+c+"}"}(e,t,e.dump),f&&(e.dump="&ref_"+p+" "+e.dump));else if("[object Array]"===s)i&&0!==e.dump.length?(e.noArrayIndent&&!l&&t>0?$e(e,t-1,e.dump,r):$e(e,t,e.dump,r),f&&(e.dump="&ref_"+p+e.dump)):(!function(e,t,n){var i,r,o,a="",l=e.tag;for(i=0,r=n.length;i",e.dump=c+" "+e.dump)}return!0}function Ze(e,t){var n,i,r=[],o=[];for(Je(e,r,o),n=0,i=o.length;n=0;)t[e]=0}const a=256,i=286,n=30,s=15,r=new Uint8Array([0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0]),o=new Uint8Array([0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13]),l=new Uint8Array([0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,3,7]),h=new Uint8Array([16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15]),d=new Array(576);e(d);const _=new Array(60);e(_);const f=new Array(512);e(f);const c=new Array(256);e(c);const u=new Array(29);e(u);const w=new Array(n);function m(t,e,a,i,n){this.static_tree=t,this.extra_bits=e,this.extra_base=a,this.elems=i,this.max_length=n,this.has_stree=t&&t.length}let b,g,p;function k(t,e){this.dyn_tree=t,this.max_code=0,this.stat_desc=e}e(w);const v=t=>t<256?f[t]:f[256+(t>>>7)],y=(t,e)=>{t.pending_buf[t.pending++]=255&e,t.pending_buf[t.pending++]=e>>>8&255},x=(t,e,a)=>{t.bi_valid>16-a?(t.bi_buf|=e<>16-t.bi_valid,t.bi_valid+=a-16):(t.bi_buf|=e<{x(t,a[2*e],a[2*e+1])},A=(t,e)=>{let a=0;do{a|=1&t,t>>>=1,a<<=1}while(--e>0);return a>>>1},E=(t,e,a)=>{const i=new Array(16);let n,r,o=0;for(n=1;n<=s;n++)o=o+a[n-1]<<1,i[n]=o;for(r=0;r<=e;r++){let e=t[2*r+1];0!==e&&(t[2*r]=A(i[e]++,e))}},R=t=>{let e;for(e=0;e{t.bi_valid>8?y(t,t.bi_buf):t.bi_valid>0&&(t.pending_buf[t.pending++]=t.bi_buf),t.bi_buf=0,t.bi_valid=0},U=(t,e,a,i)=>{const n=2*e,s=2*a;return t[n]{const i=t.heap[a];let n=a<<1;for(;n<=t.heap_len&&(n{let n,s,l,h,d=0;if(0!==t.sym_next)do{n=255&t.pending_buf[t.sym_buf+d++],n+=(255&t.pending_buf[t.sym_buf+d++])<<8,s=t.pending_buf[t.sym_buf+d++],0===n?z(t,s,e):(l=c[s],z(t,l+a+1,e),h=r[l],0!==h&&(s-=u[l],x(t,s,h)),n--,l=v(n),z(t,l,i),h=o[l],0!==h&&(n-=w[l],x(t,n,h)))}while(d{const a=e.dyn_tree,i=e.stat_desc.static_tree,n=e.stat_desc.has_stree,r=e.stat_desc.elems;let o,l,h,d=-1;for(t.heap_len=0,t.heap_max=573,o=0;o>1;o>=1;o--)S(t,a,o);h=r;do{o=t.heap[1],t.heap[1]=t.heap[t.heap_len--],S(t,a,1),l=t.heap[1],t.heap[--t.heap_max]=o,t.heap[--t.heap_max]=l,a[2*h]=a[2*o]+a[2*l],t.depth[h]=(t.depth[o]>=t.depth[l]?t.depth[o]:t.depth[l])+1,a[2*o+1]=a[2*l+1]=h,t.heap[1]=h++,S(t,a,1)}while(t.heap_len>=2);t.heap[--t.heap_max]=t.heap[1],((t,e)=>{const a=e.dyn_tree,i=e.max_code,n=e.stat_desc.static_tree,r=e.stat_desc.has_stree,o=e.stat_desc.extra_bits,l=e.stat_desc.extra_base,h=e.stat_desc.max_length;let d,_,f,c,u,w,m=0;for(c=0;c<=s;c++)t.bl_count[c]=0;for(a[2*t.heap[t.heap_max]+1]=0,d=t.heap_max+1;d<573;d++)_=t.heap[d],c=a[2*a[2*_+1]+1]+1,c>h&&(c=h,m++),a[2*_+1]=c,_>i||(t.bl_count[c]++,u=0,_>=l&&(u=o[_-l]),w=a[2*_],t.opt_len+=w*(c+u),r&&(t.static_len+=w*(n[2*_+1]+u)));if(0!==m){do{for(c=h-1;0===t.bl_count[c];)c--;t.bl_count[c]--,t.bl_count[c+1]+=2,t.bl_count[h]--,m-=2}while(m>0);for(c=h;0!==c;c--)for(_=t.bl_count[c];0!==_;)f=t.heap[--d],f>i||(a[2*f+1]!==c&&(t.opt_len+=(c-a[2*f+1])*a[2*f],a[2*f+1]=c),_--)}})(t,e),E(a,d,t.bl_count)},O=(t,e,a)=>{let i,n,s=-1,r=e[1],o=0,l=7,h=4;for(0===r&&(l=138,h=3),e[2*(a+1)+1]=65535,i=0;i<=a;i++)n=r,r=e[2*(i+1)+1],++o{let i,n,s=-1,r=e[1],o=0,l=7,h=4;for(0===r&&(l=138,h=3),i=0;i<=a;i++)if(n=r,r=e[2*(i+1)+1],!(++o{x(t,0+(i?1:0),3),Z(t),y(t,a),y(t,~a),a&&t.pending_buf.set(t.window.subarray(e,e+a),t.pending),t.pending+=a};var N=(t,e,i,n)=>{let s,r,o=0;t.level>0?(2===t.strm.data_type&&(t.strm.data_type=(t=>{let e,i=4093624447;for(e=0;e<=31;e++,i>>>=1)if(1&i&&0!==t.dyn_ltree[2*e])return 0;if(0!==t.dyn_ltree[18]||0!==t.dyn_ltree[20]||0!==t.dyn_ltree[26])return 1;for(e=32;e{let e;for(O(t,t.dyn_ltree,t.l_desc.max_code),O(t,t.dyn_dtree,t.d_desc.max_code),T(t,t.bl_desc),e=18;e>=3&&0===t.bl_tree[2*h[e]+1];e--);return t.opt_len+=3*(e+1)+5+5+4,e})(t),s=t.opt_len+3+7>>>3,r=t.static_len+3+7>>>3,r<=s&&(s=r)):s=r=i+5,i+4<=s&&-1!==e?L(t,e,i,n):4===t.strategy||r===s?(x(t,2+(n?1:0),3),D(t,d,_)):(x(t,4+(n?1:0),3),((t,e,a,i)=>{let n;for(x(t,e-257,5),x(t,a-1,5),x(t,i-4,4),n=0;n{F||((()=>{let t,e,a,h,k;const v=new Array(16);for(a=0,h=0;h<28;h++)for(u[h]=a,t=0;t<1<>=7;h(t.pending_buf[t.sym_buf+t.sym_next++]=e,t.pending_buf[t.sym_buf+t.sym_next++]=e>>8,t.pending_buf[t.sym_buf+t.sym_next++]=i,0===e?t.dyn_ltree[2*i]++:(t.matches++,e--,t.dyn_ltree[2*(c[i]+a+1)]++,t.dyn_dtree[2*v(e)]++),t.sym_next===t.sym_end),_tr_align:t=>{x(t,2,3),z(t,256,d),(t=>{16===t.bi_valid?(y(t,t.bi_buf),t.bi_buf=0,t.bi_valid=0):t.bi_valid>=8&&(t.pending_buf[t.pending++]=255&t.bi_buf,t.bi_buf>>=8,t.bi_valid-=8)})(t)}};var C=(t,e,a,i)=>{let n=65535&t|0,s=t>>>16&65535|0,r=0;for(;0!==a;){r=a>2e3?2e3:a,a-=r;do{n=n+e[i++]|0,s=s+n|0}while(--r);n%=65521,s%=65521}return n|s<<16|0};const M=new Uint32Array((()=>{let t,e=[];for(var a=0;a<256;a++){t=a;for(var i=0;i<8;i++)t=1&t?3988292384^t>>>1:t>>>1;e[a]=t}return e})());var H=(t,e,a,i)=>{const n=M,s=i+a;t^=-1;for(let a=i;a>>8^n[255&(t^e[a])];return-1^t},j={2:"need dictionary",1:"stream end",0:"","-1":"file error","-2":"stream error","-3":"data error","-4":"insufficient memory","-5":"buffer error","-6":"incompatible version"},K={Z_NO_FLUSH:0,Z_PARTIAL_FLUSH:1,Z_SYNC_FLUSH:2,Z_FULL_FLUSH:3,Z_FINISH:4,Z_BLOCK:5,Z_TREES:6,Z_OK:0,Z_STREAM_END:1,Z_NEED_DICT:2,Z_ERRNO:-1,Z_STREAM_ERROR:-2,Z_DATA_ERROR:-3,Z_MEM_ERROR:-4,Z_BUF_ERROR:-5,Z_NO_COMPRESSION:0,Z_BEST_SPEED:1,Z_BEST_COMPRESSION:9,Z_DEFAULT_COMPRESSION:-1,Z_FILTERED:1,Z_HUFFMAN_ONLY:2,Z_RLE:3,Z_FIXED:4,Z_DEFAULT_STRATEGY:0,Z_BINARY:0,Z_TEXT:1,Z_UNKNOWN:2,Z_DEFLATED:8};const{_tr_init:P,_tr_stored_block:Y,_tr_flush_block:G,_tr_tally:X,_tr_align:W}=B,{Z_NO_FLUSH:q,Z_PARTIAL_FLUSH:J,Z_FULL_FLUSH:Q,Z_FINISH:V,Z_BLOCK:$,Z_OK:tt,Z_STREAM_END:et,Z_STREAM_ERROR:at,Z_DATA_ERROR:it,Z_BUF_ERROR:nt,Z_DEFAULT_COMPRESSION:st,Z_FILTERED:rt,Z_HUFFMAN_ONLY:ot,Z_RLE:lt,Z_FIXED:ht,Z_DEFAULT_STRATEGY:dt,Z_UNKNOWN:_t,Z_DEFLATED:ft}=K,ct=258,ut=262,wt=42,mt=113,bt=666,gt=(t,e)=>(t.msg=j[e],e),pt=t=>2*t-(t>4?9:0),kt=t=>{let e=t.length;for(;--e>=0;)t[e]=0},vt=t=>{let e,a,i,n=t.w_size;e=t.hash_size,i=e;do{a=t.head[--i],t.head[i]=a>=n?a-n:0}while(--e);e=n,i=e;do{a=t.prev[--i],t.prev[i]=a>=n?a-n:0}while(--e)};let yt=(t,e,a)=>(e<{const e=t.state;let a=e.pending;a>t.avail_out&&(a=t.avail_out),0!==a&&(t.output.set(e.pending_buf.subarray(e.pending_out,e.pending_out+a),t.next_out),t.next_out+=a,e.pending_out+=a,t.total_out+=a,t.avail_out-=a,e.pending-=a,0===e.pending&&(e.pending_out=0))},zt=(t,e)=>{G(t,t.block_start>=0?t.block_start:-1,t.strstart-t.block_start,e),t.block_start=t.strstart,xt(t.strm)},At=(t,e)=>{t.pending_buf[t.pending++]=e},Et=(t,e)=>{t.pending_buf[t.pending++]=e>>>8&255,t.pending_buf[t.pending++]=255&e},Rt=(t,e,a,i)=>{let n=t.avail_in;return n>i&&(n=i),0===n?0:(t.avail_in-=n,e.set(t.input.subarray(t.next_in,t.next_in+n),a),1===t.state.wrap?t.adler=C(t.adler,e,n,a):2===t.state.wrap&&(t.adler=H(t.adler,e,n,a)),t.next_in+=n,t.total_in+=n,n)},Zt=(t,e)=>{let a,i,n=t.max_chain_length,s=t.strstart,r=t.prev_length,o=t.nice_match;const l=t.strstart>t.w_size-ut?t.strstart-(t.w_size-ut):0,h=t.window,d=t.w_mask,_=t.prev,f=t.strstart+ct;let c=h[s+r-1],u=h[s+r];t.prev_length>=t.good_match&&(n>>=2),o>t.lookahead&&(o=t.lookahead);do{if(a=e,h[a+r]===u&&h[a+r-1]===c&&h[a]===h[s]&&h[++a]===h[s+1]){s+=2,a++;do{}while(h[++s]===h[++a]&&h[++s]===h[++a]&&h[++s]===h[++a]&&h[++s]===h[++a]&&h[++s]===h[++a]&&h[++s]===h[++a]&&h[++s]===h[++a]&&h[++s]===h[++a]&&sr){if(t.match_start=e,r=i,i>=o)break;c=h[s+r-1],u=h[s+r]}}}while((e=_[e&d])>l&&0!=--n);return r<=t.lookahead?r:t.lookahead},Ut=t=>{const e=t.w_size;let a,i,n;do{if(i=t.window_size-t.lookahead-t.strstart,t.strstart>=e+(e-ut)&&(t.window.set(t.window.subarray(e,e+e-i),0),t.match_start-=e,t.strstart-=e,t.block_start-=e,t.insert>t.strstart&&(t.insert=t.strstart),vt(t),i+=e),0===t.strm.avail_in)break;if(a=Rt(t.strm,t.window,t.strstart+t.lookahead,i),t.lookahead+=a,t.lookahead+t.insert>=3)for(n=t.strstart-t.insert,t.ins_h=t.window[n],t.ins_h=yt(t,t.ins_h,t.window[n+1]);t.insert&&(t.ins_h=yt(t,t.ins_h,t.window[n+3-1]),t.prev[n&t.w_mask]=t.head[t.ins_h],t.head[t.ins_h]=n,n++,t.insert--,!(t.lookahead+t.insert<3)););}while(t.lookahead{let a,i,n,s=t.pending_buf_size-5>t.w_size?t.w_size:t.pending_buf_size-5,r=0,o=t.strm.avail_in;do{if(a=65535,n=t.bi_valid+42>>3,t.strm.avail_outi+t.strm.avail_in&&(a=i+t.strm.avail_in),a>n&&(a=n),a>8,t.pending_buf[t.pending-2]=~a,t.pending_buf[t.pending-1]=~a>>8,xt(t.strm),i&&(i>a&&(i=a),t.strm.output.set(t.window.subarray(t.block_start,t.block_start+i),t.strm.next_out),t.strm.next_out+=i,t.strm.avail_out-=i,t.strm.total_out+=i,t.block_start+=i,a-=i),a&&(Rt(t.strm,t.strm.output,t.strm.next_out,a),t.strm.next_out+=a,t.strm.avail_out-=a,t.strm.total_out+=a)}while(0===r);return o-=t.strm.avail_in,o&&(o>=t.w_size?(t.matches=2,t.window.set(t.strm.input.subarray(t.strm.next_in-t.w_size,t.strm.next_in),0),t.strstart=t.w_size,t.insert=t.strstart):(t.window_size-t.strstart<=o&&(t.strstart-=t.w_size,t.window.set(t.window.subarray(t.w_size,t.w_size+t.strstart),0),t.matches<2&&t.matches++,t.insert>t.strstart&&(t.insert=t.strstart)),t.window.set(t.strm.input.subarray(t.strm.next_in-o,t.strm.next_in),t.strstart),t.strstart+=o,t.insert+=o>t.w_size-t.insert?t.w_size-t.insert:o),t.block_start=t.strstart),t.high_watern&&t.block_start>=t.w_size&&(t.block_start-=t.w_size,t.strstart-=t.w_size,t.window.set(t.window.subarray(t.w_size,t.w_size+t.strstart),0),t.matches<2&&t.matches++,n+=t.w_size,t.insert>t.strstart&&(t.insert=t.strstart)),n>t.strm.avail_in&&(n=t.strm.avail_in),n&&(Rt(t.strm,t.window,t.strstart,n),t.strstart+=n,t.insert+=n>t.w_size-t.insert?t.w_size-t.insert:n),t.high_water>3,n=t.pending_buf_size-n>65535?65535:t.pending_buf_size-n,s=n>t.w_size?t.w_size:n,i=t.strstart-t.block_start,(i>=s||(i||e===V)&&e!==q&&0===t.strm.avail_in&&i<=n)&&(a=i>n?n:i,r=e===V&&0===t.strm.avail_in&&a===i?1:0,Y(t,t.block_start,a,r),t.block_start+=a,xt(t.strm)),r?3:1)},Dt=(t,e)=>{let a,i;for(;;){if(t.lookahead=3&&(t.ins_h=yt(t,t.ins_h,t.window[t.strstart+3-1]),a=t.prev[t.strstart&t.w_mask]=t.head[t.ins_h],t.head[t.ins_h]=t.strstart),0!==a&&t.strstart-a<=t.w_size-ut&&(t.match_length=Zt(t,a)),t.match_length>=3)if(i=X(t,t.strstart-t.match_start,t.match_length-3),t.lookahead-=t.match_length,t.match_length<=t.max_lazy_match&&t.lookahead>=3){t.match_length--;do{t.strstart++,t.ins_h=yt(t,t.ins_h,t.window[t.strstart+3-1]),a=t.prev[t.strstart&t.w_mask]=t.head[t.ins_h],t.head[t.ins_h]=t.strstart}while(0!=--t.match_length);t.strstart++}else t.strstart+=t.match_length,t.match_length=0,t.ins_h=t.window[t.strstart],t.ins_h=yt(t,t.ins_h,t.window[t.strstart+1]);else i=X(t,0,t.window[t.strstart]),t.lookahead--,t.strstart++;if(i&&(zt(t,!1),0===t.strm.avail_out))return 1}return t.insert=t.strstart<2?t.strstart:2,e===V?(zt(t,!0),0===t.strm.avail_out?3:4):t.sym_next&&(zt(t,!1),0===t.strm.avail_out)?1:2},Tt=(t,e)=>{let a,i,n;for(;;){if(t.lookahead=3&&(t.ins_h=yt(t,t.ins_h,t.window[t.strstart+3-1]),a=t.prev[t.strstart&t.w_mask]=t.head[t.ins_h],t.head[t.ins_h]=t.strstart),t.prev_length=t.match_length,t.prev_match=t.match_start,t.match_length=2,0!==a&&t.prev_length4096)&&(t.match_length=2)),t.prev_length>=3&&t.match_length<=t.prev_length){n=t.strstart+t.lookahead-3,i=X(t,t.strstart-1-t.prev_match,t.prev_length-3),t.lookahead-=t.prev_length-1,t.prev_length-=2;do{++t.strstart<=n&&(t.ins_h=yt(t,t.ins_h,t.window[t.strstart+3-1]),a=t.prev[t.strstart&t.w_mask]=t.head[t.ins_h],t.head[t.ins_h]=t.strstart)}while(0!=--t.prev_length);if(t.match_available=0,t.match_length=2,t.strstart++,i&&(zt(t,!1),0===t.strm.avail_out))return 1}else if(t.match_available){if(i=X(t,0,t.window[t.strstart-1]),i&&zt(t,!1),t.strstart++,t.lookahead--,0===t.strm.avail_out)return 1}else t.match_available=1,t.strstart++,t.lookahead--}return t.match_available&&(i=X(t,0,t.window[t.strstart-1]),t.match_available=0),t.insert=t.strstart<2?t.strstart:2,e===V?(zt(t,!0),0===t.strm.avail_out?3:4):t.sym_next&&(zt(t,!1),0===t.strm.avail_out)?1:2};function Ot(t,e,a,i,n){this.good_length=t,this.max_lazy=e,this.nice_length=a,this.max_chain=i,this.func=n}const It=[new Ot(0,0,0,0,St),new Ot(4,4,8,4,Dt),new Ot(4,5,16,8,Dt),new Ot(4,6,32,32,Dt),new Ot(4,4,16,16,Tt),new Ot(8,16,32,32,Tt),new Ot(8,16,128,128,Tt),new Ot(8,32,128,256,Tt),new Ot(32,128,258,1024,Tt),new Ot(32,258,258,4096,Tt)];function Ft(){this.strm=null,this.status=0,this.pending_buf=null,this.pending_buf_size=0,this.pending_out=0,this.pending=0,this.wrap=0,this.gzhead=null,this.gzindex=0,this.method=ft,this.last_flush=-1,this.w_size=0,this.w_bits=0,this.w_mask=0,this.window=null,this.window_size=0,this.prev=null,this.head=null,this.ins_h=0,this.hash_size=0,this.hash_bits=0,this.hash_mask=0,this.hash_shift=0,this.block_start=0,this.match_length=0,this.prev_match=0,this.match_available=0,this.strstart=0,this.match_start=0,this.lookahead=0,this.prev_length=0,this.max_chain_length=0,this.max_lazy_match=0,this.level=0,this.strategy=0,this.good_match=0,this.nice_match=0,this.dyn_ltree=new Uint16Array(1146),this.dyn_dtree=new Uint16Array(122),this.bl_tree=new Uint16Array(78),kt(this.dyn_ltree),kt(this.dyn_dtree),kt(this.bl_tree),this.l_desc=null,this.d_desc=null,this.bl_desc=null,this.bl_count=new Uint16Array(16),this.heap=new Uint16Array(573),kt(this.heap),this.heap_len=0,this.heap_max=0,this.depth=new Uint16Array(573),kt(this.depth),this.sym_buf=0,this.lit_bufsize=0,this.sym_next=0,this.sym_end=0,this.opt_len=0,this.static_len=0,this.matches=0,this.insert=0,this.bi_buf=0,this.bi_valid=0}const Lt=t=>{if(!t)return 1;const e=t.state;return!e||e.strm!==t||e.status!==wt&&57!==e.status&&69!==e.status&&73!==e.status&&91!==e.status&&103!==e.status&&e.status!==mt&&e.status!==bt?1:0},Nt=t=>{if(Lt(t))return gt(t,at);t.total_in=t.total_out=0,t.data_type=_t;const e=t.state;return e.pending=0,e.pending_out=0,e.wrap<0&&(e.wrap=-e.wrap),e.status=2===e.wrap?57:e.wrap?wt:mt,t.adler=2===e.wrap?0:1,e.last_flush=-2,P(e),tt},Bt=t=>{const e=Nt(t);var a;return e===tt&&((a=t.state).window_size=2*a.w_size,kt(a.head),a.max_lazy_match=It[a.level].max_lazy,a.good_match=It[a.level].good_length,a.nice_match=It[a.level].nice_length,a.max_chain_length=It[a.level].max_chain,a.strstart=0,a.block_start=0,a.lookahead=0,a.insert=0,a.match_length=a.prev_length=2,a.match_available=0,a.ins_h=0),e},Ct=(t,e,a,i,n,s)=>{if(!t)return at;let r=1;if(e===st&&(e=6),i<0?(r=0,i=-i):i>15&&(r=2,i-=16),n<1||n>9||a!==ft||i<8||i>15||e<0||e>9||s<0||s>ht||8===i&&1!==r)return gt(t,at);8===i&&(i=9);const o=new Ft;return t.state=o,o.strm=t,o.status=wt,o.wrap=r,o.gzhead=null,o.w_bits=i,o.w_size=1<Ct(t,e,ft,15,8,dt),deflateInit2:Ct,deflateReset:Bt,deflateResetKeep:Nt,deflateSetHeader:(t,e)=>Lt(t)||2!==t.state.wrap?at:(t.state.gzhead=e,tt),deflate:(t,e)=>{if(Lt(t)||e>$||e<0)return t?gt(t,at):at;const a=t.state;if(!t.output||0!==t.avail_in&&!t.input||a.status===bt&&e!==V)return gt(t,0===t.avail_out?nt:at);const i=a.last_flush;if(a.last_flush=e,0!==a.pending){if(xt(t),0===t.avail_out)return a.last_flush=-1,tt}else if(0===t.avail_in&&pt(e)<=pt(i)&&e!==V)return gt(t,nt);if(a.status===bt&&0!==t.avail_in)return gt(t,nt);if(a.status===wt&&0===a.wrap&&(a.status=mt),a.status===wt){let e=ft+(a.w_bits-8<<4)<<8,i=-1;if(i=a.strategy>=ot||a.level<2?0:a.level<6?1:6===a.level?2:3,e|=i<<6,0!==a.strstart&&(e|=32),e+=31-e%31,Et(a,e),0!==a.strstart&&(Et(a,t.adler>>>16),Et(a,65535&t.adler)),t.adler=1,a.status=mt,xt(t),0!==a.pending)return a.last_flush=-1,tt}if(57===a.status)if(t.adler=0,At(a,31),At(a,139),At(a,8),a.gzhead)At(a,(a.gzhead.text?1:0)+(a.gzhead.hcrc?2:0)+(a.gzhead.extra?4:0)+(a.gzhead.name?8:0)+(a.gzhead.comment?16:0)),At(a,255&a.gzhead.time),At(a,a.gzhead.time>>8&255),At(a,a.gzhead.time>>16&255),At(a,a.gzhead.time>>24&255),At(a,9===a.level?2:a.strategy>=ot||a.level<2?4:0),At(a,255&a.gzhead.os),a.gzhead.extra&&a.gzhead.extra.length&&(At(a,255&a.gzhead.extra.length),At(a,a.gzhead.extra.length>>8&255)),a.gzhead.hcrc&&(t.adler=H(t.adler,a.pending_buf,a.pending,0)),a.gzindex=0,a.status=69;else if(At(a,0),At(a,0),At(a,0),At(a,0),At(a,0),At(a,9===a.level?2:a.strategy>=ot||a.level<2?4:0),At(a,3),a.status=mt,xt(t),0!==a.pending)return a.last_flush=-1,tt;if(69===a.status){if(a.gzhead.extra){let e=a.pending,i=(65535&a.gzhead.extra.length)-a.gzindex;for(;a.pending+i>a.pending_buf_size;){let n=a.pending_buf_size-a.pending;if(a.pending_buf.set(a.gzhead.extra.subarray(a.gzindex,a.gzindex+n),a.pending),a.pending=a.pending_buf_size,a.gzhead.hcrc&&a.pending>e&&(t.adler=H(t.adler,a.pending_buf,a.pending-e,e)),a.gzindex+=n,xt(t),0!==a.pending)return a.last_flush=-1,tt;e=0,i-=n}let n=new Uint8Array(a.gzhead.extra);a.pending_buf.set(n.subarray(a.gzindex,a.gzindex+i),a.pending),a.pending+=i,a.gzhead.hcrc&&a.pending>e&&(t.adler=H(t.adler,a.pending_buf,a.pending-e,e)),a.gzindex=0}a.status=73}if(73===a.status){if(a.gzhead.name){let e,i=a.pending;do{if(a.pending===a.pending_buf_size){if(a.gzhead.hcrc&&a.pending>i&&(t.adler=H(t.adler,a.pending_buf,a.pending-i,i)),xt(t),0!==a.pending)return a.last_flush=-1,tt;i=0}e=a.gzindexi&&(t.adler=H(t.adler,a.pending_buf,a.pending-i,i)),a.gzindex=0}a.status=91}if(91===a.status){if(a.gzhead.comment){let e,i=a.pending;do{if(a.pending===a.pending_buf_size){if(a.gzhead.hcrc&&a.pending>i&&(t.adler=H(t.adler,a.pending_buf,a.pending-i,i)),xt(t),0!==a.pending)return a.last_flush=-1,tt;i=0}e=a.gzindexi&&(t.adler=H(t.adler,a.pending_buf,a.pending-i,i))}a.status=103}if(103===a.status){if(a.gzhead.hcrc){if(a.pending+2>a.pending_buf_size&&(xt(t),0!==a.pending))return a.last_flush=-1,tt;At(a,255&t.adler),At(a,t.adler>>8&255),t.adler=0}if(a.status=mt,xt(t),0!==a.pending)return a.last_flush=-1,tt}if(0!==t.avail_in||0!==a.lookahead||e!==q&&a.status!==bt){let i=0===a.level?St(a,e):a.strategy===ot?((t,e)=>{let a;for(;;){if(0===t.lookahead&&(Ut(t),0===t.lookahead)){if(e===q)return 1;break}if(t.match_length=0,a=X(t,0,t.window[t.strstart]),t.lookahead--,t.strstart++,a&&(zt(t,!1),0===t.strm.avail_out))return 1}return t.insert=0,e===V?(zt(t,!0),0===t.strm.avail_out?3:4):t.sym_next&&(zt(t,!1),0===t.strm.avail_out)?1:2})(a,e):a.strategy===lt?((t,e)=>{let a,i,n,s;const r=t.window;for(;;){if(t.lookahead<=ct){if(Ut(t),t.lookahead<=ct&&e===q)return 1;if(0===t.lookahead)break}if(t.match_length=0,t.lookahead>=3&&t.strstart>0&&(n=t.strstart-1,i=r[n],i===r[++n]&&i===r[++n]&&i===r[++n])){s=t.strstart+ct;do{}while(i===r[++n]&&i===r[++n]&&i===r[++n]&&i===r[++n]&&i===r[++n]&&i===r[++n]&&i===r[++n]&&i===r[++n]&&nt.lookahead&&(t.match_length=t.lookahead)}if(t.match_length>=3?(a=X(t,1,t.match_length-3),t.lookahead-=t.match_length,t.strstart+=t.match_length,t.match_length=0):(a=X(t,0,t.window[t.strstart]),t.lookahead--,t.strstart++),a&&(zt(t,!1),0===t.strm.avail_out))return 1}return t.insert=0,e===V?(zt(t,!0),0===t.strm.avail_out?3:4):t.sym_next&&(zt(t,!1),0===t.strm.avail_out)?1:2})(a,e):It[a.level].func(a,e);if(3!==i&&4!==i||(a.status=bt),1===i||3===i)return 0===t.avail_out&&(a.last_flush=-1),tt;if(2===i&&(e===J?W(a):e!==$&&(Y(a,0,0,!1),e===Q&&(kt(a.head),0===a.lookahead&&(a.strstart=0,a.block_start=0,a.insert=0))),xt(t),0===t.avail_out))return a.last_flush=-1,tt}return e!==V?tt:a.wrap<=0?et:(2===a.wrap?(At(a,255&t.adler),At(a,t.adler>>8&255),At(a,t.adler>>16&255),At(a,t.adler>>24&255),At(a,255&t.total_in),At(a,t.total_in>>8&255),At(a,t.total_in>>16&255),At(a,t.total_in>>24&255)):(Et(a,t.adler>>>16),Et(a,65535&t.adler)),xt(t),a.wrap>0&&(a.wrap=-a.wrap),0!==a.pending?tt:et)},deflateEnd:t=>{if(Lt(t))return at;const e=t.state.status;return t.state=null,e===mt?gt(t,it):tt},deflateSetDictionary:(t,e)=>{let a=e.length;if(Lt(t))return at;const i=t.state,n=i.wrap;if(2===n||1===n&&i.status!==wt||i.lookahead)return at;if(1===n&&(t.adler=C(t.adler,e,a,0)),i.wrap=0,a>=i.w_size){0===n&&(kt(i.head),i.strstart=0,i.block_start=0,i.insert=0);let t=new Uint8Array(i.w_size);t.set(e.subarray(a-i.w_size,a),0),e=t,a=i.w_size}const s=t.avail_in,r=t.next_in,o=t.input;for(t.avail_in=a,t.next_in=0,t.input=e,Ut(i);i.lookahead>=3;){let t=i.strstart,e=i.lookahead-2;do{i.ins_h=yt(i,i.ins_h,i.window[t+3-1]),i.prev[t&i.w_mask]=i.head[i.ins_h],i.head[i.ins_h]=t,t++}while(--e);i.strstart=t,i.lookahead=2,Ut(i)}return i.strstart+=i.lookahead,i.block_start=i.strstart,i.insert=i.lookahead,i.lookahead=0,i.match_length=i.prev_length=2,i.match_available=0,t.next_in=r,t.input=o,t.avail_in=s,i.wrap=n,tt},deflateInfo:"pako deflate (from Nodeca project)"};const Ht=(t,e)=>Object.prototype.hasOwnProperty.call(t,e);var jt=function(t){const e=Array.prototype.slice.call(arguments,1);for(;e.length;){const a=e.shift();if(a){if("object"!=typeof a)throw new TypeError(a+"must be non-object");for(const e in a)Ht(a,e)&&(t[e]=a[e])}}return t},Kt=t=>{let e=0;for(let a=0,i=t.length;a=252?6:t>=248?5:t>=240?4:t>=224?3:t>=192?2:1;Yt[254]=Yt[254]=1;var Gt=t=>{if("function"==typeof TextEncoder&&TextEncoder.prototype.encode)return(new TextEncoder).encode(t);let e,a,i,n,s,r=t.length,o=0;for(n=0;n>>6,e[s++]=128|63&a):a<65536?(e[s++]=224|a>>>12,e[s++]=128|a>>>6&63,e[s++]=128|63&a):(e[s++]=240|a>>>18,e[s++]=128|a>>>12&63,e[s++]=128|a>>>6&63,e[s++]=128|63&a);return e},Xt=(t,e)=>{const a=e||t.length;if("function"==typeof TextDecoder&&TextDecoder.prototype.decode)return(new TextDecoder).decode(t.subarray(0,e));let i,n;const s=new Array(2*a);for(n=0,i=0;i4)s[n++]=65533,i+=r-1;else{for(e&=2===r?31:3===r?15:7;r>1&&i1?s[n++]=65533:e<65536?s[n++]=e:(e-=65536,s[n++]=55296|e>>10&1023,s[n++]=56320|1023&e)}}return((t,e)=>{if(e<65534&&t.subarray&&Pt)return String.fromCharCode.apply(null,t.length===e?t:t.subarray(0,e));let a="";for(let i=0;i{(e=e||t.length)>t.length&&(e=t.length);let a=e-1;for(;a>=0&&128==(192&t[a]);)a--;return a<0||0===a?e:a+Yt[t[a]]>e?a:e};var qt=function(){this.input=null,this.next_in=0,this.avail_in=0,this.total_in=0,this.output=null,this.next_out=0,this.avail_out=0,this.total_out=0,this.msg="",this.state=null,this.data_type=2,this.adler=0};const Jt=Object.prototype.toString,{Z_NO_FLUSH:Qt,Z_SYNC_FLUSH:Vt,Z_FULL_FLUSH:$t,Z_FINISH:te,Z_OK:ee,Z_STREAM_END:ae,Z_DEFAULT_COMPRESSION:ie,Z_DEFAULT_STRATEGY:ne,Z_DEFLATED:se}=K;function re(t){this.options=jt({level:ie,method:se,chunkSize:16384,windowBits:15,memLevel:8,strategy:ne},t||{});let e=this.options;e.raw&&e.windowBits>0?e.windowBits=-e.windowBits:e.gzip&&e.windowBits>0&&e.windowBits<16&&(e.windowBits+=16),this.err=0,this.msg="",this.ended=!1,this.chunks=[],this.strm=new qt,this.strm.avail_out=0;let a=Mt.deflateInit2(this.strm,e.level,e.method,e.windowBits,e.memLevel,e.strategy);if(a!==ee)throw new Error(j[a]);if(e.header&&Mt.deflateSetHeader(this.strm,e.header),e.dictionary){let t;if(t="string"==typeof e.dictionary?Gt(e.dictionary):"[object ArrayBuffer]"===Jt.call(e.dictionary)?new Uint8Array(e.dictionary):e.dictionary,a=Mt.deflateSetDictionary(this.strm,t),a!==ee)throw new Error(j[a]);this._dict_set=!0}}function oe(t,e){const a=new re(e);if(a.push(t,!0),a.err)throw a.msg||j[a.err];return a.result}re.prototype.push=function(t,e){const a=this.strm,i=this.options.chunkSize;let n,s;if(this.ended)return!1;for(s=e===~~e?e:!0===e?te:Qt,"string"==typeof t?a.input=Gt(t):"[object ArrayBuffer]"===Jt.call(t)?a.input=new Uint8Array(t):a.input=t,a.next_in=0,a.avail_in=a.input.length;;)if(0===a.avail_out&&(a.output=new Uint8Array(i),a.next_out=0,a.avail_out=i),(s===Vt||s===$t)&&a.avail_out<=6)this.onData(a.output.subarray(0,a.next_out)),a.avail_out=0;else{if(n=Mt.deflate(a,s),n===ae)return a.next_out>0&&this.onData(a.output.subarray(0,a.next_out)),n=Mt.deflateEnd(this.strm),this.onEnd(n),this.ended=!0,n===ee;if(0!==a.avail_out){if(s>0&&a.next_out>0)this.onData(a.output.subarray(0,a.next_out)),a.avail_out=0;else if(0===a.avail_in)break}else this.onData(a.output)}return!0},re.prototype.onData=function(t){this.chunks.push(t)},re.prototype.onEnd=function(t){t===ee&&(this.result=Kt(this.chunks)),this.chunks=[],this.err=t,this.msg=this.strm.msg};var le={Deflate:re,deflate:oe,deflateRaw:function(t,e){return(e=e||{}).raw=!0,oe(t,e)},gzip:function(t,e){return(e=e||{}).gzip=!0,oe(t,e)},constants:K};const he=16209;var de=function(t,e){let a,i,n,s,r,o,l,h,d,_,f,c,u,w,m,b,g,p,k,v,y,x,z,A;const E=t.state;a=t.next_in,z=t.input,i=a+(t.avail_in-5),n=t.next_out,A=t.output,s=n-(e-t.avail_out),r=n+(t.avail_out-257),o=E.dmax,l=E.wsize,h=E.whave,d=E.wnext,_=E.window,f=E.hold,c=E.bits,u=E.lencode,w=E.distcode,m=(1<>>24,f>>>=p,c-=p,p=g>>>16&255,0===p)A[n++]=65535&g;else{if(!(16&p)){if(0==(64&p)){g=u[(65535&g)+(f&(1<>>=p,c-=p),c<15&&(f+=z[a++]<>>24,f>>>=p,c-=p,p=g>>>16&255,!(16&p)){if(0==(64&p)){g=w[(65535&g)+(f&(1<o){t.msg="invalid distance too far back",E.mode=he;break t}if(f>>>=p,c-=p,p=n-s,v>p){if(p=v-p,p>h&&E.sane){t.msg="invalid distance too far back",E.mode=he;break t}if(y=0,x=_,0===d){if(y+=l-p,p2;)A[n++]=x[y++],A[n++]=x[y++],A[n++]=x[y++],k-=3;k&&(A[n++]=x[y++],k>1&&(A[n++]=x[y++]))}else{y=n-v;do{A[n++]=A[y++],A[n++]=A[y++],A[n++]=A[y++],k-=3}while(k>2);k&&(A[n++]=A[y++],k>1&&(A[n++]=A[y++]))}break}}break}}while(a>3,a-=k,c-=k<<3,f&=(1<{const l=o.bits;let h,d,_,f,c,u,w=0,m=0,b=0,g=0,p=0,k=0,v=0,y=0,x=0,z=0,A=null;const E=new Uint16Array(16),R=new Uint16Array(16);let Z,U,S,D=null;for(w=0;w<=_e;w++)E[w]=0;for(m=0;m=1&&0===E[g];g--);if(p>g&&(p=g),0===g)return n[s++]=20971520,n[s++]=20971520,o.bits=1,0;for(b=1;b0&&(0===t||1!==g))return-1;for(R[1]=0,w=1;w<_e;w++)R[w+1]=R[w]+E[w];for(m=0;m852||2===t&&x>592)return 1;for(;;){Z=w-v,r[m]+1=u?(U=D[r[m]-u],S=A[r[m]-u]):(U=96,S=0),h=1<>v)+d]=Z<<24|U<<16|S|0}while(0!==d);for(h=1<>=1;if(0!==h?(z&=h-1,z+=h):z=0,m++,0==--E[w]){if(w===g)break;w=e[a+r[m]]}if(w>p&&(z&f)!==_){for(0===v&&(v=p),c+=b,k=w-v,y=1<852||2===t&&x>592)return 1;_=z&f,n[_]=p<<24|k<<16|c-s|0}}return 0!==z&&(n[c+z]=w-v<<24|64<<16|0),o.bits=p,0};const{Z_FINISH:be,Z_BLOCK:ge,Z_TREES:pe,Z_OK:ke,Z_STREAM_END:ve,Z_NEED_DICT:ye,Z_STREAM_ERROR:xe,Z_DATA_ERROR:ze,Z_MEM_ERROR:Ae,Z_BUF_ERROR:Ee,Z_DEFLATED:Re}=K,Ze=16180,Ue=16190,Se=16191,De=16192,Te=16194,Oe=16199,Ie=16200,Fe=16206,Le=16209,Ne=t=>(t>>>24&255)+(t>>>8&65280)+((65280&t)<<8)+((255&t)<<24);function Be(){this.strm=null,this.mode=0,this.last=!1,this.wrap=0,this.havedict=!1,this.flags=0,this.dmax=0,this.check=0,this.total=0,this.head=null,this.wbits=0,this.wsize=0,this.whave=0,this.wnext=0,this.window=null,this.hold=0,this.bits=0,this.length=0,this.offset=0,this.extra=0,this.lencode=null,this.distcode=null,this.lenbits=0,this.distbits=0,this.ncode=0,this.nlen=0,this.ndist=0,this.have=0,this.next=null,this.lens=new Uint16Array(320),this.work=new Uint16Array(288),this.lendyn=null,this.distdyn=null,this.sane=0,this.back=0,this.was=0}const Ce=t=>{if(!t)return 1;const e=t.state;return!e||e.strm!==t||e.mode16211?1:0},Me=t=>{if(Ce(t))return xe;const e=t.state;return t.total_in=t.total_out=e.total=0,t.msg="",e.wrap&&(t.adler=1&e.wrap),e.mode=Ze,e.last=0,e.havedict=0,e.flags=-1,e.dmax=32768,e.head=null,e.hold=0,e.bits=0,e.lencode=e.lendyn=new Int32Array(852),e.distcode=e.distdyn=new Int32Array(592),e.sane=1,e.back=-1,ke},He=t=>{if(Ce(t))return xe;const e=t.state;return e.wsize=0,e.whave=0,e.wnext=0,Me(t)},je=(t,e)=>{let a;if(Ce(t))return xe;const i=t.state;return e<0?(a=0,e=-e):(a=5+(e>>4),e<48&&(e&=15)),e&&(e<8||e>15)?xe:(null!==i.window&&i.wbits!==e&&(i.window=null),i.wrap=a,i.wbits=e,He(t))},Ke=(t,e)=>{if(!t)return xe;const a=new Be;t.state=a,a.strm=t,a.window=null,a.mode=Ze;const i=je(t,e);return i!==ke&&(t.state=null),i};let Pe,Ye,Ge=!0;const Xe=t=>{if(Ge){Pe=new Int32Array(512),Ye=new Int32Array(32);let e=0;for(;e<144;)t.lens[e++]=8;for(;e<256;)t.lens[e++]=9;for(;e<280;)t.lens[e++]=7;for(;e<288;)t.lens[e++]=8;for(me(1,t.lens,0,288,Pe,0,t.work,{bits:9}),e=0;e<32;)t.lens[e++]=5;me(2,t.lens,0,32,Ye,0,t.work,{bits:5}),Ge=!1}t.lencode=Pe,t.lenbits=9,t.distcode=Ye,t.distbits=5},We=(t,e,a,i)=>{let n;const s=t.state;return null===s.window&&(s.wsize=1<=s.wsize?(s.window.set(e.subarray(a-s.wsize,a),0),s.wnext=0,s.whave=s.wsize):(n=s.wsize-s.wnext,n>i&&(n=i),s.window.set(e.subarray(a-i,a-i+n),s.wnext),(i-=n)?(s.window.set(e.subarray(a-i,a),0),s.wnext=i,s.whave=s.wsize):(s.wnext+=n,s.wnext===s.wsize&&(s.wnext=0),s.whaveKe(t,15),inflateInit2:Ke,inflate:(t,e)=>{let a,i,n,s,r,o,l,h,d,_,f,c,u,w,m,b,g,p,k,v,y,x,z=0;const A=new Uint8Array(4);let E,R;const Z=new Uint8Array([16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15]);if(Ce(t)||!t.output||!t.input&&0!==t.avail_in)return xe;a=t.state,a.mode===Se&&(a.mode=De),r=t.next_out,n=t.output,l=t.avail_out,s=t.next_in,i=t.input,o=t.avail_in,h=a.hold,d=a.bits,_=o,f=l,x=ke;t:for(;;)switch(a.mode){case Ze:if(0===a.wrap){a.mode=De;break}for(;d<16;){if(0===o)break t;o--,h+=i[s++]<>>8&255,a.check=H(a.check,A,2,0),h=0,d=0,a.mode=16181;break}if(a.head&&(a.head.done=!1),!(1&a.wrap)||(((255&h)<<8)+(h>>8))%31){t.msg="incorrect header check",a.mode=Le;break}if((15&h)!==Re){t.msg="unknown compression method",a.mode=Le;break}if(h>>>=4,d-=4,y=8+(15&h),0===a.wbits&&(a.wbits=y),y>15||y>a.wbits){t.msg="invalid window size",a.mode=Le;break}a.dmax=1<>8&1),512&a.flags&&4&a.wrap&&(A[0]=255&h,A[1]=h>>>8&255,a.check=H(a.check,A,2,0)),h=0,d=0,a.mode=16182;case 16182:for(;d<32;){if(0===o)break t;o--,h+=i[s++]<>>8&255,A[2]=h>>>16&255,A[3]=h>>>24&255,a.check=H(a.check,A,4,0)),h=0,d=0,a.mode=16183;case 16183:for(;d<16;){if(0===o)break t;o--,h+=i[s++]<>8),512&a.flags&&4&a.wrap&&(A[0]=255&h,A[1]=h>>>8&255,a.check=H(a.check,A,2,0)),h=0,d=0,a.mode=16184;case 16184:if(1024&a.flags){for(;d<16;){if(0===o)break t;o--,h+=i[s++]<>>8&255,a.check=H(a.check,A,2,0)),h=0,d=0}else a.head&&(a.head.extra=null);a.mode=16185;case 16185:if(1024&a.flags&&(c=a.length,c>o&&(c=o),c&&(a.head&&(y=a.head.extra_len-a.length,a.head.extra||(a.head.extra=new Uint8Array(a.head.extra_len)),a.head.extra.set(i.subarray(s,s+c),y)),512&a.flags&&4&a.wrap&&(a.check=H(a.check,i,c,s)),o-=c,s+=c,a.length-=c),a.length))break t;a.length=0,a.mode=16186;case 16186:if(2048&a.flags){if(0===o)break t;c=0;do{y=i[s+c++],a.head&&y&&a.length<65536&&(a.head.name+=String.fromCharCode(y))}while(y&&c>9&1,a.head.done=!0),t.adler=a.check=0,a.mode=Se;break;case 16189:for(;d<32;){if(0===o)break t;o--,h+=i[s++]<>>=7&d,d-=7&d,a.mode=Fe;break}for(;d<3;){if(0===o)break t;o--,h+=i[s++]<>>=1,d-=1,3&h){case 0:a.mode=16193;break;case 1:if(Xe(a),a.mode=Oe,e===pe){h>>>=2,d-=2;break t}break;case 2:a.mode=16196;break;case 3:t.msg="invalid block type",a.mode=Le}h>>>=2,d-=2;break;case 16193:for(h>>>=7&d,d-=7&d;d<32;){if(0===o)break t;o--,h+=i[s++]<>>16^65535)){t.msg="invalid stored block lengths",a.mode=Le;break}if(a.length=65535&h,h=0,d=0,a.mode=Te,e===pe)break t;case Te:a.mode=16195;case 16195:if(c=a.length,c){if(c>o&&(c=o),c>l&&(c=l),0===c)break t;n.set(i.subarray(s,s+c),r),o-=c,s+=c,l-=c,r+=c,a.length-=c;break}a.mode=Se;break;case 16196:for(;d<14;){if(0===o)break t;o--,h+=i[s++]<>>=5,d-=5,a.ndist=1+(31&h),h>>>=5,d-=5,a.ncode=4+(15&h),h>>>=4,d-=4,a.nlen>286||a.ndist>30){t.msg="too many length or distance symbols",a.mode=Le;break}a.have=0,a.mode=16197;case 16197:for(;a.have>>=3,d-=3}for(;a.have<19;)a.lens[Z[a.have++]]=0;if(a.lencode=a.lendyn,a.lenbits=7,E={bits:a.lenbits},x=me(0,a.lens,0,19,a.lencode,0,a.work,E),a.lenbits=E.bits,x){t.msg="invalid code lengths set",a.mode=Le;break}a.have=0,a.mode=16198;case 16198:for(;a.have>>24,b=z>>>16&255,g=65535&z,!(m<=d);){if(0===o)break t;o--,h+=i[s++]<>>=m,d-=m,a.lens[a.have++]=g;else{if(16===g){for(R=m+2;d>>=m,d-=m,0===a.have){t.msg="invalid bit length repeat",a.mode=Le;break}y=a.lens[a.have-1],c=3+(3&h),h>>>=2,d-=2}else if(17===g){for(R=m+3;d>>=m,d-=m,y=0,c=3+(7&h),h>>>=3,d-=3}else{for(R=m+7;d>>=m,d-=m,y=0,c=11+(127&h),h>>>=7,d-=7}if(a.have+c>a.nlen+a.ndist){t.msg="invalid bit length repeat",a.mode=Le;break}for(;c--;)a.lens[a.have++]=y}}if(a.mode===Le)break;if(0===a.lens[256]){t.msg="invalid code -- missing end-of-block",a.mode=Le;break}if(a.lenbits=9,E={bits:a.lenbits},x=me(1,a.lens,0,a.nlen,a.lencode,0,a.work,E),a.lenbits=E.bits,x){t.msg="invalid literal/lengths set",a.mode=Le;break}if(a.distbits=6,a.distcode=a.distdyn,E={bits:a.distbits},x=me(2,a.lens,a.nlen,a.ndist,a.distcode,0,a.work,E),a.distbits=E.bits,x){t.msg="invalid distances set",a.mode=Le;break}if(a.mode=Oe,e===pe)break t;case Oe:a.mode=Ie;case Ie:if(o>=6&&l>=258){t.next_out=r,t.avail_out=l,t.next_in=s,t.avail_in=o,a.hold=h,a.bits=d,de(t,f),r=t.next_out,n=t.output,l=t.avail_out,s=t.next_in,i=t.input,o=t.avail_in,h=a.hold,d=a.bits,a.mode===Se&&(a.back=-1);break}for(a.back=0;z=a.lencode[h&(1<>>24,b=z>>>16&255,g=65535&z,!(m<=d);){if(0===o)break t;o--,h+=i[s++]<>p)],m=z>>>24,b=z>>>16&255,g=65535&z,!(p+m<=d);){if(0===o)break t;o--,h+=i[s++]<>>=p,d-=p,a.back+=p}if(h>>>=m,d-=m,a.back+=m,a.length=g,0===b){a.mode=16205;break}if(32&b){a.back=-1,a.mode=Se;break}if(64&b){t.msg="invalid literal/length code",a.mode=Le;break}a.extra=15&b,a.mode=16201;case 16201:if(a.extra){for(R=a.extra;d>>=a.extra,d-=a.extra,a.back+=a.extra}a.was=a.length,a.mode=16202;case 16202:for(;z=a.distcode[h&(1<>>24,b=z>>>16&255,g=65535&z,!(m<=d);){if(0===o)break t;o--,h+=i[s++]<>p)],m=z>>>24,b=z>>>16&255,g=65535&z,!(p+m<=d);){if(0===o)break t;o--,h+=i[s++]<>>=p,d-=p,a.back+=p}if(h>>>=m,d-=m,a.back+=m,64&b){t.msg="invalid distance code",a.mode=Le;break}a.offset=g,a.extra=15&b,a.mode=16203;case 16203:if(a.extra){for(R=a.extra;d>>=a.extra,d-=a.extra,a.back+=a.extra}if(a.offset>a.dmax){t.msg="invalid distance too far back",a.mode=Le;break}a.mode=16204;case 16204:if(0===l)break t;if(c=f-l,a.offset>c){if(c=a.offset-c,c>a.whave&&a.sane){t.msg="invalid distance too far back",a.mode=Le;break}c>a.wnext?(c-=a.wnext,u=a.wsize-c):u=a.wnext-c,c>a.length&&(c=a.length),w=a.window}else w=n,u=r-a.offset,c=a.length;c>l&&(c=l),l-=c,a.length-=c;do{n[r++]=w[u++]}while(--c);0===a.length&&(a.mode=Ie);break;case 16205:if(0===l)break t;n[r++]=a.length,l--,a.mode=Ie;break;case Fe:if(a.wrap){for(;d<32;){if(0===o)break t;o--,h|=i[s++]<{if(Ce(t))return xe;let e=t.state;return e.window&&(e.window=null),t.state=null,ke},inflateGetHeader:(t,e)=>{if(Ce(t))return xe;const a=t.state;return 0==(2&a.wrap)?xe:(a.head=e,e.done=!1,ke)},inflateSetDictionary:(t,e)=>{const a=e.length;let i,n,s;return Ce(t)?xe:(i=t.state,0!==i.wrap&&i.mode!==Ue?xe:i.mode===Ue&&(n=1,n=C(n,e,a,0),n!==i.check)?ze:(s=We(t,e,a,a),s?(i.mode=16210,Ae):(i.havedict=1,ke)))},inflateInfo:"pako inflate (from Nodeca project)"};var Je=function(){this.text=0,this.time=0,this.xflags=0,this.os=0,this.extra=null,this.extra_len=0,this.name="",this.comment="",this.hcrc=0,this.done=!1};const Qe=Object.prototype.toString,{Z_NO_FLUSH:Ve,Z_FINISH:$e,Z_OK:ta,Z_STREAM_END:ea,Z_NEED_DICT:aa,Z_STREAM_ERROR:ia,Z_DATA_ERROR:na,Z_MEM_ERROR:sa}=K;function ra(t){this.options=jt({chunkSize:65536,windowBits:15,to:""},t||{});const e=this.options;e.raw&&e.windowBits>=0&&e.windowBits<16&&(e.windowBits=-e.windowBits,0===e.windowBits&&(e.windowBits=-15)),!(e.windowBits>=0&&e.windowBits<16)||t&&t.windowBits||(e.windowBits+=32),e.windowBits>15&&e.windowBits<48&&0==(15&e.windowBits)&&(e.windowBits|=15),this.err=0,this.msg="",this.ended=!1,this.chunks=[],this.strm=new qt,this.strm.avail_out=0;let a=qe.inflateInit2(this.strm,e.windowBits);if(a!==ta)throw new Error(j[a]);if(this.header=new Je,qe.inflateGetHeader(this.strm,this.header),e.dictionary&&("string"==typeof e.dictionary?e.dictionary=Gt(e.dictionary):"[object ArrayBuffer]"===Qe.call(e.dictionary)&&(e.dictionary=new Uint8Array(e.dictionary)),e.raw&&(a=qe.inflateSetDictionary(this.strm,e.dictionary),a!==ta)))throw new Error(j[a])}function oa(t,e){const a=new ra(e);if(a.push(t),a.err)throw a.msg||j[a.err];return a.result}ra.prototype.push=function(t,e){const a=this.strm,i=this.options.chunkSize,n=this.options.dictionary;let s,r,o;if(this.ended)return!1;for(r=e===~~e?e:!0===e?$e:Ve,"[object ArrayBuffer]"===Qe.call(t)?a.input=new Uint8Array(t):a.input=t,a.next_in=0,a.avail_in=a.input.length;;){for(0===a.avail_out&&(a.output=new Uint8Array(i),a.next_out=0,a.avail_out=i),s=qe.inflate(a,r),s===aa&&n&&(s=qe.inflateSetDictionary(a,n),s===ta?s=qe.inflate(a,r):s===na&&(s=aa));a.avail_in>0&&s===ea&&a.state.wrap>0&&0!==t[a.next_in];)qe.inflateReset(a),s=qe.inflate(a,r);switch(s){case ia:case na:case aa:case sa:return this.onEnd(s),this.ended=!0,!1}if(o=a.avail_out,a.next_out&&(0===a.avail_out||s===ea))if("string"===this.options.to){let t=Wt(a.output,a.next_out),e=a.next_out-t,n=Xt(a.output,t);a.next_out=e,a.avail_out=i-e,e&&a.output.set(a.output.subarray(t,t+e),0),this.onData(n)}else this.onData(a.output.length===a.next_out?a.output:a.output.subarray(0,a.next_out));if(s!==ta||0!==o){if(s===ea)return s=qe.inflateEnd(this.strm),this.onEnd(s),this.ended=!0,!0;if(0===a.avail_in)break}}return!0},ra.prototype.onData=function(t){this.chunks.push(t)},ra.prototype.onEnd=function(t){t===ta&&("string"===this.options.to?this.result=this.chunks.join(""):this.result=Kt(this.chunks)),this.chunks=[],this.err=t,this.msg=this.strm.msg};var la={Inflate:ra,inflate:oa,inflateRaw:function(t,e){return(e=e||{}).raw=!0,oa(t,e)},ungzip:oa,constants:K};const{Deflate:ha,deflate:da,deflateRaw:_a,gzip:fa}=le,{Inflate:ca,inflate:ua,inflateRaw:wa,ungzip:ma}=la;var ba=ha,ga=da,pa=_a,ka=fa,va=ca,ya=ua,xa=wa,za=ma,Aa=K,Ea={Deflate:ba,deflate:ga,deflateRaw:pa,gzip:ka,Inflate:va,inflate:ya,inflateRaw:xa,ungzip:za,constants:Aa};t.Deflate=ba,t.Inflate=va,t.constants=Aa,t.default=Ea,t.deflate=ga,t.deflateRaw=pa,t.gzip=ka,t.inflate=ya,t.inflateRaw=xa,t.ungzip=za,Object.defineProperty(t,"__esModule",{value:!0})})); From 2900dddc56d1d5596eadb4fb96fd12d59ab24999 Mon Sep 17 00:00:00 2001 From: Kacper Bojakowski Date: Wed, 6 May 2026 16:44:20 +0200 Subject: [PATCH 16/59] Add proxy logic --- conf.py | 8 ++ plugins/ros_related_packages.py | 12 +++ source/_static/related_packages.js | 54 ++++++++-- tools/rosdistro_cache_proxy.py | 152 +++++++++++++++++++++++++++++ 4 files changed, 219 insertions(+), 7 deletions(-) create mode 100644 tools/rosdistro_cache_proxy.py diff --git a/conf.py b/conf.py index 37c1dbdec27..643c9fa50fc 100644 --- a/conf.py +++ b/conf.py @@ -188,6 +188,14 @@ 'related_packages.js', ] +# Optional runtime proxy endpoint for freshest rosdistro cache data. +# Use same-origin endpoint to avoid browser CORS limits, for example: +# ros_related_packages_proxy_url = '/api/rosdistro-cache/{distro}-cache.yaml.gz' +# For local testing you can export environment variable: +# ROS_RELATED_PACKAGES_PROXY_URL=http://127.0.0.1:9000/api/rosdistro-cache/{distro}-cache.yaml.gz +# Leave empty to skip proxy and use bundled _static fallback. +ros_related_packages_proxy_url = os.environ.get('ROS_RELATED_PACKAGES_PROXY_URL', '') + # -- Options for HTMLHelp output ------------------------------------------ # Output file base name for HTML help builder. diff --git a/plugins/ros_related_packages.py b/plugins/ros_related_packages.py index cfd4d467ed6..8321f18397c 100644 --- a/plugins/ros_related_packages.py +++ b/plugins/ros_related_packages.py @@ -89,6 +89,13 @@ def _bundled_cache_href(docname: str, distro: str) -> str: return ('../' * depth) + f'_static/rosdistro_cache/{distro}-cache.yaml.gz' +def _proxy_cache_href(proxy_template: str, distro: str) -> str: + """Build runtime proxy URL from template, replacing ``{distro}``.""" + if not proxy_template: + return '' + return proxy_template.replace('{distro}', distro) + + def _positive_int_option(argument: str) -> int: """Parse a positive integer option for the directive.""" if argument is None: @@ -149,6 +156,9 @@ def run(self) -> List[nodes.Node]: escaped_distro = html.escape(distro, quote=True) bundled_href = _bundled_cache_href(self.env.docname, distro) escaped_bundled = html.escape(bundled_href, quote=True) + proxy_template = getattr(self.env.config, 'ros_related_packages_proxy_url', '') + proxy_href = _proxy_cache_href(proxy_template, distro) + escaped_proxy = html.escape(proxy_href, quote=True) html_body = ( '' @@ -197,6 +208,7 @@ def download_rosdistro_cache(app) -> None: def setup(app): + app.add_config_value('ros_related_packages_proxy_url', '', 'html') app.add_directive('ros-related-packages', RosRelatedPackagesDirective) app.connect('builder-inited', download_rosdistro_cache) return { diff --git a/source/_static/related_packages.js b/source/_static/related_packages.js index c9c9fa49dd9..0d8e8f25320 100644 --- a/source/_static/related_packages.js +++ b/source/_static/related_packages.js @@ -76,35 +76,61 @@ return bundledCacheUrl(distro); } + /** + * Proxy URL configured by Sphinx via data attribute. + * + * @param {HTMLElement|null} widget + * @param {string} distro + * @returns {string|null} + */ + function resolveProxyUrl(widget, distro) { + var templateUrl = widget && widget.getAttribute('data-proxy-cache-href'); + if (!templateUrl) { + return null; + } + return templateUrl.replace('{distro}', encodeURIComponent(distro)); + } + /** * @param {string} distro * @param {HTMLElement|null} sampleWidget widget from this page (for data-bundled-cache-href) * @returns {Promise>} */ function loadXmls(distro, sampleWidget) { - var bundledKey = + var cacheKey = distro + '|' + + (sampleWidget ? sampleWidget.getAttribute('data-proxy-cache-href') || '' : '') + + '|' + (sampleWidget ? sampleWidget.getAttribute('data-bundled-cache-href') || '' : ''); - if (cacheByDistro[bundledKey]) { - return cacheByDistro[bundledKey]; + if (cacheByDistro[cacheKey]) { + return cacheByDistro[cacheKey]; } - cacheByDistro[bundledKey] = fetchAndParse(distro, resolveBundledAbsoluteUrl(sampleWidget, distro)); - return cacheByDistro[bundledKey]; + cacheByDistro[cacheKey] = fetchAndParse( + distro, + resolveProxyUrl(sampleWidget, distro), + resolveBundledAbsoluteUrl(sampleWidget, distro) + ); + return cacheByDistro[cacheKey]; } /** * @param {string} distro + * @param {string|null} proxyUrl same-origin backend proxy endpoint (freshest) * @param {string|null} bundledAbsolute resolved same-origin URL to gzip, if any * @returns {Promise>} */ - function fetchAndParse(distro, bundledAbsolute) { + function fetchAndParse(distro, proxyUrl, bundledAbsolute) { var remote = 'https://repo.ros2.org/rosdistro_cache/' + encodeURIComponent(distro) + '-cache.yaml.gz'; var urls = []; + if (proxyUrl) { + urls.push(proxyUrl); + } if (bundledAbsolute) { urls.push(bundledAbsolute); } + /* Final fallback may still fail in browsers due to upstream CORS. */ urls.push(remote); return tryUrls(urls); @@ -123,8 +149,19 @@ } var url = urls[i]; i += 1; - return fetch(url, { cache: 'no-cache' }) + var controller = typeof AbortController !== 'undefined' ? new AbortController() : null; + var timer = null; + if (controller && i === 1) { + /* Keep proxy attempt snappy so fallback isn't delayed. */ + timer = setTimeout(function () { + controller.abort(); + }, 6000); + } + return fetch(url, { cache: 'no-cache', signal: controller ? controller.signal : undefined }) .then(function (res) { + if (timer) { + clearTimeout(timer); + } if (!res.ok) { throw new Error('HTTP ' + res.status + ' for ' + url); } @@ -141,6 +178,9 @@ return /** @type {Record} */ (xmls); }) .catch(function (err) { + if (timer) { + clearTimeout(timer); + } /* Try next URL (e.g. bundled 404 then HTTPS remote — remote may hit CORS). */ return next(err); }); diff --git a/tools/rosdistro_cache_proxy.py b/tools/rosdistro_cache_proxy.py new file mode 100644 index 00000000000..df5c1a03654 --- /dev/null +++ b/tools/rosdistro_cache_proxy.py @@ -0,0 +1,152 @@ +#!/usr/bin/env python3 +"""Tiny local proxy for ROS distro cache gzip files. + +Why this exists: +- Browsers may not be able to fetch repo.ros2.org directly due to CORS. +- This proxy is same-origin relative to your local docs server workflow. +- It enables "proxy-first, bundled-fallback" runtime behavior. + +Endpoint: + /api/rosdistro-cache/-cache.yaml.gz + +Example: + python tools/rosdistro_cache_proxy.py --port 9000 + # Then point conf.py setting to: + # ros_related_packages_proxy_url = 'http://127.0.0.1:9000/api/rosdistro-cache/{distro}-cache.yaml.gz' +""" + +from __future__ import annotations + +import argparse +import gzip +import re +import time +import urllib.error +import urllib.request +from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer +from typing import Dict, Tuple + +UPSTREAM_TEMPLATE = 'https://repo.ros2.org/rosdistro_cache/{distro}-cache.yaml.gz' +DISTRO_RE = re.compile(r'^[a-z0-9][a-z0-9_-]*$', re.IGNORECASE) +PATH_RE = re.compile(r'^/api/rosdistro-cache/([a-z0-9_-]+)-cache\.yaml\.gz$', re.IGNORECASE) + + +class CacheStore: + """Simple in-memory TTL cache for gzip bytes by distro.""" + + def __init__(self, ttl_seconds: int) -> None: + self._ttl = max(0, ttl_seconds) + self._data: Dict[str, Tuple[float, bytes]] = {} + + def get(self, distro: str) -> bytes | None: + record = self._data.get(distro) + if record is None: + return None + expires_at, payload = record + if time.time() >= expires_at: + self._data.pop(distro, None) + return None + return payload + + def put(self, distro: str, payload: bytes) -> None: + self._data[distro] = (time.time() + self._ttl, payload) + + +class ProxyHandler(BaseHTTPRequestHandler): + """HTTP handler serving rosdistro cache gzip responses.""" + + server_version = 'RostdistroCacheProxy/1.0' + cache: CacheStore + timeout_seconds: int + + def do_GET(self) -> None: # noqa: N802 (BaseHTTPRequestHandler interface) + match = PATH_RE.match(self.path) + if not match: + self.send_error(404, 'Unknown path') + return + + distro = match.group(1).lower() + if not DISTRO_RE.match(distro): + self.send_error(400, 'Invalid distro name') + return + + payload = self.cache.get(distro) + if payload is None: + try: + payload = self._fetch_upstream(distro) + except urllib.error.HTTPError as exc: + self.send_error(exc.code, f'Upstream HTTP error: {exc.reason}') + return + except urllib.error.URLError as exc: + self.send_error(502, f'Upstream URL error: {exc.reason}') + return + except TimeoutError: + self.send_error(504, 'Upstream timeout') + return + except ValueError as exc: + self.send_error(502, f'Bad upstream payload: {exc}') + return + self.cache.put(distro, payload) + + self.send_response(200) + self.send_header('Content-Type', 'application/gzip') + self.send_header('Cache-Control', 'public, max-age=300') + self.send_header('Content-Length', str(len(payload))) + self.end_headers() + self.wfile.write(payload) + + def log_message(self, fmt: str, *args) -> None: + """Compact log format.""" + super().log_message('[proxy] ' + fmt, *args) + + def _fetch_upstream(self, distro: str) -> bytes: + url = UPSTREAM_TEMPLATE.format(distro=distro) + request = urllib.request.Request(url, headers={'User-Agent': 'ros2-docs-cache-proxy/1.0'}) + with urllib.request.urlopen(request, timeout=self.timeout_seconds) as response: + payload = response.read() + + # Quick sanity check: must be valid gzip bytes. + try: + gzip.decompress(payload) + except OSError as exc: + raise ValueError('response is not valid gzip') from exc + return payload + + +def main() -> None: + parser = argparse.ArgumentParser(description='Local proxy for rosdistro cache gz files.') + parser.add_argument('--host', default='127.0.0.1', help='Listen host (default: 127.0.0.1)') + parser.add_argument('--port', type=int, default=9000, help='Listen port (default: 9000)') + parser.add_argument( + '--cache-ttl', + type=int, + default=300, + help='In-memory cache TTL seconds (default: 300)', + ) + parser.add_argument( + '--upstream-timeout', + type=int, + default=20, + help='Upstream timeout seconds (default: 20)', + ) + args = parser.parse_args() + + cache = CacheStore(ttl_seconds=args.cache_ttl) + + def handler_factory(*factory_args, **factory_kwargs): + handler = ProxyHandler(*factory_args, **factory_kwargs) + handler.cache = cache + handler.timeout_seconds = args.upstream_timeout + return handler + + server = ThreadingHTTPServer((args.host, args.port), handler_factory) + print( + f'Proxy running on http://{args.host}:{args.port} ' + '(endpoint: /api/rosdistro-cache/-cache.yaml.gz)' + ) + server.serve_forever() + + +if __name__ == '__main__': + main() + From e6389f8c664a985c1a7958dfe8cc470c9502f964 Mon Sep 17 00:00:00 2001 From: Keith Kirkwood Date: Wed, 6 May 2026 18:28:52 +0100 Subject: [PATCH 17/59] OPENR-89: Adding some sanity check validation for generated values - moderation API and basic language check --- scripts/enhance_topics.py | 139 +++++++++++++++++++++++++++++++++++--- 1 file changed, 129 insertions(+), 10 deletions(-) diff --git a/scripts/enhance_topics.py b/scripts/enhance_topics.py index 6c13dd29eb9..0a3b5745672 100644 --- a/scripts/enhance_topics.py +++ b/scripts/enhance_topics.py @@ -1,4 +1,5 @@ import logging +import re import sys import os from typing import Optional @@ -15,7 +16,7 @@ # Define constants GPT_MODEL = "gpt-5.4-nano" # GPT model to use for the API calls -# Maximum content length in characters for topic analysis , approximately 300k tokens (leaving 100k for instructions/output) +# Maximum content length in characters, approximately 300k tokens (leaving 100k for instructions/output) MAX_CONTENT_LENGTH = 1200000 RST_EXTENSION = '.rst' # File extension for RST files @@ -40,6 +41,10 @@ Finally, generate this description, with no additional styling, characters, or formatting.""" +ENGLISH_LANGUAGE_CHECK_PROMPT = """You are a validation assistant, and your role is to determine whether the following text is written entirely in English. Common technical terms, acronyms, and internationally recognised proper nouns are acceptable if they are normally used in English technical documentation. + +Answer ONLY with the single word yes or no in lowercase, with no punctuation, explanation, or additional text.""" + @retry( retry=retry_if_exception_type((RateLimitError, APIConnectionError)), stop=stop_after_attempt(MAX_RETRIES), @@ -109,6 +114,113 @@ def _make_api_call() -> str: logger.error(f"API call timed out after {timeout} seconds") raise # Re-raise the original timeout error +@retry( + retry=retry_if_exception_type((RateLimitError, APIConnectionError)), + stop=stop_after_attempt(MAX_RETRIES), + wait=wait_random_exponential(multiplier=MIN_WAIT, max=MAX_WAIT), + reraise=True +) +def validate_content(client: OpenAI, generated: str, timeout: int = DEFAULT_TIMEOUT) -> bool: + """ + Validate generated content using the moderation API and a separate English-language check. + + Intended for any model-generated text before it is persisted (metadata today; other content later). + Uses ThreadPoolExecutor for cross-platform timeout handling and retries for transient API errors. + + Args: + client (OpenAI): OpenAI client instance. + generated (str): Model-generated text to validate. + timeout (int): Maximum time to wait for the combined validation calls in seconds. + + Returns: + bool: True if content passes moderation and the language check; False otherwise. + + Raises: + TimeoutError: If the validation calls exceed the specified timeout. + RateLimitError: If API rate limits are exceeded (will trigger retry). + APIConnectionError: If connection fails (will trigger retry). + """ + if not generated.strip(): + logger.debug("Validation skipped: empty generated content") + return False + + text = generated + if len(text) > MAX_CONTENT_LENGTH: + logger.warning( + "Generated text truncated to %s characters for validation.", + MAX_CONTENT_LENGTH, + ) + text = text[:MAX_CONTENT_LENGTH] + + def _run_validation() -> bool: + """ + Run moderation and English checks sequentially. + + Returns: + bool: True if both checks pass. + + Raises: + RateLimitError, APIConnectionError: Propagated for retry handling. + """ + try: + logger.debug("Sending generated text to moderation API...") + moderation = client.moderations.create(input=text) + except (RateLimitError, APIConnectionError) as e: + logger.warning("Retryable error during moderation: %s", e) + raise + + if not moderation.results: + logger.warning("Moderation API returned no results; treating as validation failure") + return False + + result0 = moderation.results[0] + if result0.flagged: + categories = [ + name + for name, flagged in result0.categories.model_dump().items() + if flagged + ] + logger.warning( + "Content failed moderation (flagged). Categories: %s", + ", ".join(categories) if categories else "unknown", + ) + return False + + try: + logger.debug("Sending generated text for English-language validation...") + completion = client.chat.completions.create( + model=GPT_MODEL, + messages=[ + {"role": "system", "content": ENGLISH_LANGUAGE_CHECK_PROMPT}, + {"role": "user", "content": f"Text:\n\n{text}"}, + ], + ) + except (RateLimitError, APIConnectionError) as e: + logger.warning("Retryable error during language validation: %s", e) + raise + + answer = completion.choices[0].message.content + raw = (answer or "").strip().lower() + # Accept a single leading yes/no token even if the model adds stray whitespace + match = re.match(r"^(yes|no)\b", raw) + if not match or match.group(1) != "yes": + logger.warning( + "Content failed English-language validation (model answer: %r)", + answer, + ) + return False + + logger.debug("Generated content passed moderation and English-language validation") + return True + + with ThreadPoolExecutor() as executor: + try: + future = executor.submit(_run_validation) + return future.result(timeout=timeout) + except TimeoutError: + logger.error("Validation timed out after %s seconds", timeout) + raise + def analyze_files(files: list[str], client: OpenAI, prompts: dict[str, str], timeout: int = DEFAULT_TIMEOUT) -> EnhanceData: """ Process a list of files and analyse their content using each of the passed prompts. @@ -144,10 +256,11 @@ def analyze_files(files: list[str], client: OpenAI, prompts: dict[str, str], tim # Check if the content is not empty if content.strip(): + # Check if the content has any meta fields already existing_meta_names = get_meta_names_from_content(content) for prompt_name, prompt in prompts.items(): # Iterate through each prompt in the dictionary if prompt_name in existing_meta_names: - logger.info( + logger.warning( "Skipping analysis for %s: meta field %r already present in .. meta::", file_path, prompt_name, @@ -163,8 +276,15 @@ def analyze_files(files: list[str], client: OpenAI, prompts: dict[str, str], tim timeout=timeout ) if result: - # Add the analysis result to the data structure - data = add_analysis_result(data, file_path, prompt_name, result) + if validate_content(client, result, timeout=timeout): + # Add the analysis result to the data structure + data = add_analysis_result(data, file_path, prompt_name, result) + else: + logger.warning( + "Validation failed for generated %s in %s; result not stored", + prompt_name, + file_path, + ) else: logger.warning(f"No result for {file_path} with prompt name: {prompt_name}") @@ -183,8 +303,6 @@ def analyze_files(files: list[str], client: OpenAI, prompts: dict[str, str], tim else: logger.info(f"No analysable content found for {file_path}") - metrics = calculate_metrics(data) - logger.info(f"Analysed {metrics.files_with_results_count} out of {len(files)} files with the configured prompts.") return data @@ -234,7 +352,7 @@ def enhance_metadata(files: list[str], client: Optional[OpenAI] = None) -> Enhan # TODO: Make this config-driven, so that we can easily add more prompts and analysis types prompts: dict[str, str] = {"description": DESCRIPTION_PROMPT, "keywords": KEYWORDS_PROMPT} - data = analyze_files(files, client, prompts) # Populate ``EnhanceData.results`` from the model + data = analyze_files(files, client, prompts) # Populate and validate ``EnhanceData.results`` from the model data = update_meta_files(files, data) # Persist results as metadata fields and set ``updated_files`` return data @@ -299,7 +417,8 @@ def update_meta_files(files: list[str], data: EnhanceData) -> EnhanceData: logger.debug("Updated file with supplied metadata: %s", file_path) logger.debug("-" * 50) - metrics = calculate_metrics(current_data) # ``updated_files_count`` reflects files we rewrote + # ``files_with_results_count`` reflects files with at least one valid analysis result, and ``updated_files_count`` reflects files we rewrote + metrics = calculate_metrics(current_data) logger.info("Updated metadata in %s files out of %s files processed.", metrics.updated_files_count, len(files)) return current_data @@ -317,7 +436,7 @@ def main() -> None: Only files with the .rst extension will be processed. Logs the number of files successfully enhanced. - """b + """ logging.basicConfig( level=logging.INFO, @@ -339,7 +458,7 @@ def main() -> None: data = enhance_metadata(rst_files) # Log the metrics for the enhancement data metrics = calculate_metrics(data) - logger.info(f"Enhanced files: {metrics.files_with_results_count} with analysis results, and {metrics.updated_files_count} files updated, out of {len(rst_files)} RST files.") + logger.info(f"Enhanced files: {metrics.files_with_results_count} with at least one valid analysis result, and {metrics.updated_files_count} files updated, out of {len(rst_files)} RST files.") if __name__ == "__main__": main() From b0513cde510edc837d3a5b4f1e4413e32221bdb3 Mon Sep 17 00:00:00 2001 From: Keith Kirkwood Date: Wed, 6 May 2026 18:29:31 +0100 Subject: [PATCH 18/59] OPENR-89: Add some unit tests for central enhance topics module --- scripts/test/__init__.py | 0 scripts/test/test_enhance_topics.py | 191 ++++++++++++++++++ .../test/test_enhance_topics_validation.py | 61 ++++++ 3 files changed, 252 insertions(+) create mode 100644 scripts/test/__init__.py create mode 100644 scripts/test/test_enhance_topics.py create mode 100644 scripts/test/test_enhance_topics_validation.py diff --git a/scripts/test/__init__.py b/scripts/test/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/scripts/test/test_enhance_topics.py b/scripts/test/test_enhance_topics.py new file mode 100644 index 00000000000..4b8bdbd64aa --- /dev/null +++ b/scripts/test/test_enhance_topics.py @@ -0,0 +1,191 @@ +import pytest +from unittest.mock import MagicMock, patch, mock_open +import sys +import os +from openai import OpenAIError + +# Add the scripts directory to sys.path to allow importing enhance_topics +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from enhance_topics import ( + analyze_content, + get_openai_client, + analyze_files, + update_meta_files, + enhance_metadata, + MAX_CONTENT_LENGTH +) +from enhance_data import EnhanceData + +@pytest.fixture +def mock_client(): + """Provides a mocked OpenAI client.""" + return MagicMock() + +# --- Tests for analyze_content --- + +def test_analyze_content_success(mock_client): + """Test successful content analysis.""" + mock_completion = MagicMock() + mock_completion.choices = [MagicMock(message=MagicMock(content='Analysis result'))] + mock_client.chat.completions.create.return_value = mock_completion + + result = analyze_content(mock_client, "Some content", "Some prompt") + assert result == 'Analysis result' + mock_client.chat.completions.create.assert_called_once() + +def test_analyze_content_truncation(mock_client): + """Test that content is truncated if it exceeds MAX_CONTENT_LENGTH.""" + long_content = "a" * (MAX_CONTENT_LENGTH + 100) + mock_completion = MagicMock() + mock_completion.choices = [MagicMock(message=MagicMock(content='Result'))] + mock_client.chat.completions.create.return_value = mock_completion + + analyze_content(mock_client, long_content, "Prompt") + + # Check the call arguments to ensure content was truncated + args, kwargs = mock_client.chat.completions.create.call_args + sent_content = kwargs['messages'][1]['content'] + assert len(sent_content) <= MAX_CONTENT_LENGTH + len("Content:\n\n") + +def test_analyze_content_empty_response(mock_client): + """Test handling of empty response from API.""" + mock_completion = MagicMock() + mock_completion.choices = [MagicMock(message=MagicMock(content=None))] + mock_client.chat.completions.create.return_value = mock_completion + + result = analyze_content(mock_client, "Content", "Prompt") + assert result == "" + +# --- Tests for get_openai_client --- + +@patch('enhance_topics.load_dotenv') +@patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}) +def test_get_openai_client_success(mock_load_dotenv): + """Test successful client initialisation.""" + client = get_openai_client() + assert client.api_key == "test-key" + +@patch('enhance_topics.load_dotenv') +@patch.dict(os.environ, {}, clear=True) +def test_get_openai_client_missing_key(mock_load_dotenv): + """Test error when API key is missing.""" + with pytest.raises(OpenAIError, match="OpenAI API key not found"): + get_openai_client() + +# --- Tests for analyze_files --- + +@patch('enhance_topics.get_meta_names_from_content') +@patch('enhance_topics.analyze_content') +@patch('enhance_topics.validate_content') +@patch('enhance_topics.add_analysis_result') +@patch('enhance_topics.create_enhance_data') +def test_analyze_files_basic_flow( + mock_create_data, + mock_add_result, + mock_validate, + mock_analyze, + mock_get_meta, + mock_client +): + """Test the basic flow of analyze_files.""" + mock_create_data.return_value = EnhanceData(results={}, updated_files=set()) + mock_get_meta.return_value = [] # No existing metadata + mock_analyze.return_value = "Generated result" + mock_validate.return_value = True + mock_add_result.return_value = EnhanceData( + results={"file1.rst": {"description": "res"}}, + updated_files=set() + ) + + files = ["file1.rst"] + prompts = {"description": "desc prompt"} + + with patch("builtins.open", mock_open(read_data="File content")): + analyze_files(files, mock_client, prompts) + + mock_analyze.assert_called_once() + mock_validate.assert_called_once() + mock_add_result.assert_called_once() + +@patch('enhance_topics.get_meta_names_from_content') +def test_analyze_files_skips_existing_meta(mock_get_meta, mock_client): + """Test that files with existing metadata are skipped.""" + mock_get_meta.return_value = ["description"] # Description already exists + + files = ["file1.rst"] + prompts = {"description": "desc prompt"} + + with patch("builtins.open", mock_open(read_data="File content")): + with patch('enhance_topics.analyze_content') as mock_analyze: + analyze_files(files, mock_client, prompts) + mock_analyze.assert_not_called() + +# --- Tests for update_meta_files --- + +@patch('enhance_topics.get_results_for_file') +@patch('enhance_topics.inject_metadata_to_content') +@patch('enhance_topics.mark_file_updated') +def test_update_meta_files_writes_on_change( + mock_mark_updated, + mock_inject, + mock_get_results, + mock_client +): + """Test that files are written only when metadata changes.""" + mock_get_results.return_value = {"description": "new desc"} + mock_inject.return_value = ("New content", True) # Changed is True + mock_mark_updated.return_value = EnhanceData( + results={}, + updated_files={"file1.rst"} + ) + + data = EnhanceData( + results={"file1.rst": {"description": "new desc"}}, + updated_files=set() + ) + + m_open = mock_open(read_data="Old content") + with patch("builtins.open", m_open): + update_meta_files(["file1.rst"], data) + + # Verify write was called + m_open().write.assert_called_once_with("New content") + mock_mark_updated.assert_called_once() + +@patch('enhance_topics.get_results_for_file') +@patch('enhance_topics.inject_metadata_to_content') +def test_update_meta_files_skips_no_change(mock_inject, mock_get_results): + """Test that files are NOT written when no metadata changes.""" + mock_get_results.return_value = {"description": "same desc"} + mock_inject.return_value = ("Old content", False) # Changed is False + + data = EnhanceData( + results={"file1.rst": {"description": "same desc"}}, + updated_files=set() + ) + + m_open = mock_open(read_data="Old content") + with patch("builtins.open", m_open): + update_meta_files(["file1.rst"], data) + + # Verify write was NOT called + m_open().write.assert_not_called() + +# --- Tests for enhance_metadata --- + +@patch('enhance_topics.get_openai_client') +@patch('enhance_topics.analyze_files') +@patch('enhance_topics.update_meta_files') +def test_enhance_metadata_orchestration(mock_update, mock_analyze, mock_get_client): + """Test the orchestration in enhance_metadata.""" + mock_get_client.return_value = MagicMock() + mock_analyze.return_value = EnhanceData(results={"f": {"d": "r"}}, updated_files=set()) + mock_update.return_value = EnhanceData(results={"f": {"d": "r"}}, updated_files={"f"}) + + result = enhance_metadata(["file1.rst"]) + + assert result.updated_files == {"f"} + mock_get_client.assert_called_once() + mock_analyze.assert_called_once() + mock_update.assert_called_once() diff --git a/scripts/test/test_enhance_topics_validation.py b/scripts/test/test_enhance_topics_validation.py new file mode 100644 index 00000000000..54309bbf340 --- /dev/null +++ b/scripts/test/test_enhance_topics_validation.py @@ -0,0 +1,61 @@ +import pytest +from unittest.mock import MagicMock +import sys +import os + +# Add the scripts directory to sys.path to allow importing enhance_topics +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from enhance_topics import validate_content + +@pytest.fixture +def mock_client(): + """Provides a mocked OpenAI client.""" + return MagicMock() + +def test_validate_content_success(mock_client): + """Test that valid English content passes both moderation and language checks.""" + # Mock Moderation: Not flagged + mock_result = MagicMock() + mock_result.flagged = False + mock_client.moderations.create.return_value.results = [mock_result] + + # Mock Chat: Returns 'yes' + mock_completion = MagicMock() + mock_completion.choices = [MagicMock(message=MagicMock(content='yes'))] + mock_client.chat.completions.create.return_value = mock_completion + + assert validate_content(mock_client, "This is a valid English sentence.") is True + +def test_validate_content_moderation_fail(mock_client): + """Test that content flagged by moderation returns False.""" + # Mock Moderation: Flagged + mock_result = MagicMock() + mock_result.flagged = True + # Mock categories.model_dump() for the logger + mock_result.categories.model_dump.return_value = {"hate": True, "violence": False} + mock_client.moderations.create.return_value.results = [mock_result] + + assert validate_content(mock_client, "Some offensive content.") is False + # Verify chat.completions was NOT called (short-circuit) + mock_client.chat.completions.create.assert_not_called() + +def test_validate_content_language_fail(mock_client): + """Test that non-English content (as determined by the LLM) returns False.""" + # Mock Moderation: Not flagged + mock_result = MagicMock() + mock_result.flagged = False + mock_client.moderations.create.return_value.results = [mock_result] + + # Mock Chat: Returns 'no' + mock_completion = MagicMock() + mock_completion.choices = [MagicMock(message=MagicMock(content='no'))] + mock_client.chat.completions.create.return_value = mock_completion + + assert validate_content(mock_client, "Ceci n'est pas anglais.") is False + +def test_validate_content_empty_input(mock_client): + """Test that empty or whitespace-only input returns False immediately.""" + assert validate_content(mock_client, "") is False + assert validate_content(mock_client, " ") is False + mock_client.moderations.create.assert_not_called() From c16efda822cbdbe9795a14cbe40aec11073cebec Mon Sep 17 00:00:00 2001 From: Kacper Bojakowski Date: Wed, 6 May 2026 22:43:35 +0200 Subject: [PATCH 19/59] Add introduction --- source/_static/related_packages.js | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/source/_static/related_packages.js b/source/_static/related_packages.js index 0d8e8f25320..26218254597 100644 --- a/source/_static/related_packages.js +++ b/source/_static/related_packages.js @@ -295,6 +295,10 @@ p.textContent = 'No packages matched this filter.'; el.appendChild(p); } else { + var intro = document.createElement('p'); + intro.className = 'related-packages__intro'; + intro.textContent = 'Packages/reference: '; + el.appendChild(intro); el.appendChild(ul); } } From e86ac48e90ec0d21c127c25dcb8f0a3a67cdb1bf Mon Sep 17 00:00:00 2001 From: Kacper Bojakowski Date: Thu, 7 May 2026 14:02:53 +0200 Subject: [PATCH 20/59] Add related articles --- conf.py | 1 + plugins/ros_related_articles.py | 224 ++++++++++++++++++ .../Migrating-Interfaces.rst | 4 + ...ingle-Package-Define-And-Use-Interface.rst | 4 + .../Creating-A-Workspace.rst | 4 + .../Creating-Your-First-ROS2-Package.rst | 9 + 6 files changed, 246 insertions(+) create mode 100644 plugins/ros_related_articles.py diff --git a/conf.py b/conf.py index 643c9fa50fc..1be0735f286 100644 --- a/conf.py +++ b/conf.py @@ -90,6 +90,7 @@ 'sphinxcontrib.googleanalytics', 'sphinxcontrib.mermaid', 'ros_related_packages', + 'ros_related_articles', ] # Intersphinx mapping diff --git a/plugins/ros_related_articles.py b/plugins/ros_related_articles.py new file mode 100644 index 00000000000..be55a54a6e0 --- /dev/null +++ b/plugins/ros_related_articles.py @@ -0,0 +1,224 @@ +# Copyright 2026 Open Robotics and contributors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Sphinx directive for build-time related article lists.""" + +from __future__ import annotations + +from typing import List, TypedDict + +from docutils import nodes +from docutils.parsers.rst import directives +from sphinx.util.docutils import SphinxDirective + + +def _normalize_field_name(raw: str) -> str: + """Normalize a metadata key for comparison (e.g. ``Experience`` -> ``experience``).""" + name = raw.strip().lower().rstrip(':') + return name.replace(' ', '-') + + +def _field_value_from_doctree(document: nodes.document, wanted: str) -> str | None: + """Return the body of the first matching docinfo/rST field in the document.""" + wanted_norm = _normalize_field_name(wanted) + for field in document.traverse(nodes.field): + children = getattr(field, 'children', ()) or () + if len(children) < 2: + continue + label = children[0].astext() + if _normalize_field_name(label) != wanted_norm: + continue + return children[1].astext().strip() + return None + + +def _meta_get(metadata: dict, *names: str) -> str | None: + """Look up metadata using several possible keys (Sphinx/docutils variants).""" + for name in names: + for key, val in metadata.items(): + if not val: + continue + if _normalize_field_name(str(key)) == _normalize_field_name(name): + return str(val).strip() + return None + + +def _meta_content_from_docutils(document: nodes.document, meta_name: str) -> str | None: + """Read ``docutils.nodes.meta`` emitted by ``.. meta::``.""" + for node in document.traverse(nodes.meta): + if node.get('name') != meta_name: + continue + raw = node.get('content') + if raw: + return str(raw).strip() + return None + + +def _positive_int_option(argument: str) -> int: + """Parse a positive integer option for the directive.""" + if argument is None: + raise ValueError('option requires a number') + value = int(argument) + if value < 1: + raise ValueError('must be positive') + return value + + +class RelatedArticle(TypedDict): + docname: str + title: str + area: str + experience: str + + +def _normalized_value(raw: str) -> str: + """Normalize metadata value for stable matching.""" + return ' '.join(raw.strip().lower().split()) + + +class RosRelatedArticlesNode(nodes.General, nodes.Element): + """Placeholder node replaced during ``doctree-resolved``.""" + + +class RosRelatedArticlesDirective(SphinxDirective): + """Emit a placeholder for static related-article links. + + Uses page metadata values from ``.. meta::``: + + .. code-block:: rst + + .. meta:: + :area: Tutorials + :experience: Beginner + """ + + has_content = False + required_arguments = 0 + optional_arguments = 0 + option_spec = {'max': _positive_int_option} + + def run(self) -> List[nodes.Node]: + meta = self.env.metadata.get(self.env.docname, {}) + area = ( + _meta_content_from_docutils(self.state.document, 'area') + or _meta_get(meta, 'area') + or _field_value_from_doctree(self.state.document, 'area') + or '' + ) + experience = ( + _meta_content_from_docutils(self.state.document, 'experience') + or _meta_get(meta, 'experience') + or _field_value_from_doctree(self.state.document, 'experience') + or '' + ) + + if not area or not experience: + raise self.error( + 'ros-related-articles: define both `area` and `experience` ' + 'with `.. meta::` (recommended), or field list metadata.' + ) + + node = RosRelatedArticlesNode() + node['area'] = area + node['experience'] = experience + node['max'] = self.options.get('max', 10) + return [node] + + +def _collect_article_index(env) -> List[RelatedArticle]: + """Build an index of docs that declare both ``area`` and ``experience`` metadata.""" + records: List[RelatedArticle] = [] + for docname in sorted(env.found_docs): + doctree = env.get_doctree(docname) + meta = env.metadata.get(docname, {}) + area = ( + _meta_content_from_docutils(doctree, 'area') + or _meta_get(meta, 'area') + or _field_value_from_doctree(doctree, 'area') + or '' + ) + experience = ( + _meta_content_from_docutils(doctree, 'experience') + or _meta_get(meta, 'experience') + or _field_value_from_doctree(doctree, 'experience') + or '' + ) + if not area or not experience: + continue + title_node = env.titles.get(docname) + title = title_node.astext().strip() if title_node else docname + records.append({ + 'docname': docname, + 'title': title, + 'area': _normalized_value(area), + 'experience': _normalized_value(experience), + }) + return records + + +def build_related_articles_index(app, env) -> None: + """Build metadata map once after Sphinx has read all source documents.""" + env.ros_related_articles_index = _collect_article_index(env) + + +def resolve_related_articles(app, doctree, fromdocname) -> None: + """Replace placeholders with static paragraph + list markup.""" + index: List[RelatedArticle] = getattr(app.env, 'ros_related_articles_index', []) + for node in list(doctree.traverse(RosRelatedArticlesNode)): + area = _normalized_value(str(node.get('area', ''))) + experience = _normalized_value(str(node.get('experience', ''))) + max_items = int(node.get('max', 10)) + + matches = [ + item for item in index + if item['docname'] != fromdocname + and item['area'] == area + and item['experience'] == experience + ] + matches.sort(key=lambda item: item['title'].lower()) + matches = matches[:max_items] + + if not matches: + node.replace_self([]) + continue + + container = nodes.container(classes=['related-articles']) + intro = nodes.paragraph() + intro += nodes.Text('Related articles:') + container += intro + + bullets = nodes.bullet_list() + for item in matches: + refuri = app.builder.get_relative_uri(fromdocname, item['docname']) + link = nodes.reference('', item['title'], refuri=refuri) + entry = nodes.list_item() + para = nodes.paragraph() + para += link + entry += para + bullets += entry + container += bullets + + node.replace_self(container) + + +def setup(app): + app.add_directive('ros-related-articles', RosRelatedArticlesDirective) + app.add_node(RosRelatedArticlesNode) + app.connect('env-updated', build_related_articles_index) + app.connect('doctree-resolved', resolve_related_articles) + return { + 'parallel_read_safe': True, + 'parallel_write_safe': True, + 'version': '1.0.0', + } diff --git a/source/How-To-Guides/Migrating-from-ROS1/Migrating-Interfaces.rst b/source/How-To-Guides/Migrating-from-ROS1/Migrating-Interfaces.rst index fc10d714179..44e9f6e0d89 100644 --- a/source/How-To-Guides/Migrating-from-ROS1/Migrating-Interfaces.rst +++ b/source/How-To-Guides/Migrating-from-ROS1/Migrating-Interfaces.rst @@ -1,6 +1,10 @@ Migrating Interfaces ==================== +.. meta:: + :area: ROS-framework + :experience: beginner, intermediate + .. contents:: Table of Contents :depth: 2 :local: diff --git a/source/How-To-Guides/Single-Package-Define-And-Use-Interface.rst b/source/How-To-Guides/Single-Package-Define-And-Use-Interface.rst index b203d592aab..5a46dae0d9a 100644 --- a/source/How-To-Guides/Single-Package-Define-And-Use-Interface.rst +++ b/source/How-To-Guides/Single-Package-Define-And-Use-Interface.rst @@ -4,6 +4,8 @@ .. meta:: :build-type: ament_cmake + :area: ROS-framework + :experience: beginner, intermediate Implementing custom interfaces - how-to ======================================= @@ -253,5 +255,7 @@ Steps Related content --------------- +.. ros-related-articles:: + .. ros-related-packages:: diff --git a/source/Tutorials/Beginner-Client-Libraries/Creating-A-Workspace/Creating-A-Workspace.rst b/source/Tutorials/Beginner-Client-Libraries/Creating-A-Workspace/Creating-A-Workspace.rst index 2ade50673cc..bbbd706b927 100644 --- a/source/Tutorials/Beginner-Client-Libraries/Creating-A-Workspace/Creating-A-Workspace.rst +++ b/source/Tutorials/Beginner-Client-Libraries/Creating-A-Workspace/Creating-A-Workspace.rst @@ -4,6 +4,10 @@ .. _ROS2Workspace: +.. meta:: + :area: ROS-framework + :experience: beginner, intermediate + Creating a workspace ==================== diff --git a/source/Tutorials/Beginner-Client-Libraries/Creating-Your-First-ROS2-Package.rst b/source/Tutorials/Beginner-Client-Libraries/Creating-Your-First-ROS2-Package.rst index 3104af34ef0..55720a2ab5a 100644 --- a/source/Tutorials/Beginner-Client-Libraries/Creating-Your-First-ROS2-Package.rst +++ b/source/Tutorials/Beginner-Client-Libraries/Creating-Your-First-ROS2-Package.rst @@ -4,6 +4,10 @@ .. _CreatePkg: +.. meta:: + :area: ROS-framework + :experience: beginner, intermediate + Creating a package ================== @@ -533,3 +537,8 @@ Next steps Next, let's add something meaningful to a package. You'll start with a simple publisher/subscriber system, which you can choose to write in either :doc:`C++ <./Writing-A-Simple-Cpp-Publisher-And-Subscriber>` or :doc:`Python <./Writing-A-Simple-Py-Publisher-And-Subscriber>`. + +Related content +--------------- + +.. ros-related-articles:: From 15e718597d1894f1604dc9e5ff42c0ac66057519 Mon Sep 17 00:00:00 2001 From: Kacper Bojakowski Date: Thu, 7 May 2026 14:05:43 +0200 Subject: [PATCH 21/59] Remove custom CSS --- source/_static/custom.css | 33 --------------------------------- 1 file changed, 33 deletions(-) diff --git a/source/_static/custom.css b/source/_static/custom.css index 529f9c5d8bd..4252f921bb8 100644 --- a/source/_static/custom.css +++ b/source/_static/custom.css @@ -1,36 +1,3 @@ .wy-nav-content { max-width: 64rem; } - -/* Runtime “related packages” list (see ros_related_packages extension) */ -.related-packages { - margin-top: 0.75rem; -} - -.related-packages--loading .related-packages__status { - color: #666; - font-style: italic; -} - -.related-packages__list { - margin: 0.35em 0 0; - padding-left: 1.25rem; -} - -.related-packages__list li { - margin: 0.35em 0; -} - -.related-packages__list a { - font-weight: 500; -} - -.related-packages__empty, -.related-packages--error .related-packages__status { - color: #555; - margin: 0.35em 0 0; -} - -.related-packages--error .related-packages__status { - color: #a94442; -} From 24a433692a7c0fb32d0565c9a28e549aaeb8d772 Mon Sep 17 00:00:00 2001 From: Kacper Bojakowski Date: Thu, 7 May 2026 15:00:07 +0200 Subject: [PATCH 22/59] Proxy fix --- tools/rosdistro_cache_proxy.py | 96 +++++++++++++++++++++------------- 1 file changed, 59 insertions(+), 37 deletions(-) diff --git a/tools/rosdistro_cache_proxy.py b/tools/rosdistro_cache_proxy.py index df5c1a03654..6074587c677 100644 --- a/tools/rosdistro_cache_proxy.py +++ b/tools/rosdistro_cache_proxy.py @@ -20,6 +20,7 @@ import argparse import gzip import re +import traceback import time import urllib.error import urllib.request @@ -59,41 +60,62 @@ class ProxyHandler(BaseHTTPRequestHandler): cache: CacheStore timeout_seconds: int + def _send_cors_headers(self) -> None: + """Allow browser fetches from local docs hosts on another port.""" + self.send_header('Access-Control-Allow-Origin', '*') + self.send_header('Access-Control-Allow-Methods', 'GET, OPTIONS') + self.send_header('Access-Control-Allow-Headers', 'Content-Type') + + def do_OPTIONS(self) -> None: # noqa: N802 (BaseHTTPRequestHandler interface) + self.send_response(204) + self._send_cors_headers() + self.end_headers() + def do_GET(self) -> None: # noqa: N802 (BaseHTTPRequestHandler interface) - match = PATH_RE.match(self.path) - if not match: - self.send_error(404, 'Unknown path') - return - - distro = match.group(1).lower() - if not DISTRO_RE.match(distro): - self.send_error(400, 'Invalid distro name') - return - - payload = self.cache.get(distro) - if payload is None: - try: - payload = self._fetch_upstream(distro) - except urllib.error.HTTPError as exc: - self.send_error(exc.code, f'Upstream HTTP error: {exc.reason}') - return - except urllib.error.URLError as exc: - self.send_error(502, f'Upstream URL error: {exc.reason}') - return - except TimeoutError: - self.send_error(504, 'Upstream timeout') + try: + match = PATH_RE.match(self.path) + if not match: + self.send_error(404, 'Unknown path') return - except ValueError as exc: - self.send_error(502, f'Bad upstream payload: {exc}') + + distro = match.group(1).lower() + if not DISTRO_RE.match(distro): + self.send_error(400, 'Invalid distro name') return - self.cache.put(distro, payload) - self.send_response(200) - self.send_header('Content-Type', 'application/gzip') - self.send_header('Cache-Control', 'public, max-age=300') - self.send_header('Content-Length', str(len(payload))) - self.end_headers() - self.wfile.write(payload) + if not hasattr(self, 'cache'): + raise RuntimeError('Proxy handler is missing cache configuration') + if not hasattr(self, 'timeout_seconds'): + raise RuntimeError('Proxy handler is missing timeout configuration') + + payload = self.cache.get(distro) + if payload is None: + try: + payload = self._fetch_upstream(distro) + except urllib.error.HTTPError as exc: + self.send_error(exc.code, f'Upstream HTTP error: {exc.reason}') + return + except urllib.error.URLError as exc: + self.send_error(502, f'Upstream URL error: {exc.reason}') + return + except TimeoutError: + self.send_error(504, 'Upstream timeout') + return + except ValueError as exc: + self.send_error(502, f'Bad upstream payload: {exc}') + return + self.cache.put(distro, payload) + + self.send_response(200) + self._send_cors_headers() + self.send_header('Content-Type', 'application/gzip') + self.send_header('Cache-Control', 'public, max-age=300') + self.send_header('Content-Length', str(len(payload))) + self.end_headers() + self.wfile.write(payload) + except Exception as exc: # pragma: no cover - defensive safety net for local proxy. + traceback.print_exc() + self.send_error(500, f'Proxy internal error: {exc}') def log_message(self, fmt: str, *args) -> None: """Compact log format.""" @@ -133,13 +155,13 @@ def main() -> None: cache = CacheStore(ttl_seconds=args.cache_ttl) - def handler_factory(*factory_args, **factory_kwargs): - handler = ProxyHandler(*factory_args, **factory_kwargs) - handler.cache = cache - handler.timeout_seconds = args.upstream_timeout - return handler + class ConfiguredProxyHandler(ProxyHandler): + """Proxy handler class with shared cache and timeout configuration.""" + + ConfiguredProxyHandler.cache = cache + ConfiguredProxyHandler.timeout_seconds = args.upstream_timeout - server = ThreadingHTTPServer((args.host, args.port), handler_factory) + server = ThreadingHTTPServer((args.host, args.port), ConfiguredProxyHandler) print( f'Proxy running on http://{args.host}:{args.port} ' '(endpoint: /api/rosdistro-cache/-cache.yaml.gz)' From a75df3023bbd8ab00d4971f3034f9fc3e3a91169 Mon Sep 17 00:00:00 2001 From: Keith Kirkwood Date: Fri, 15 May 2026 15:20:05 +0100 Subject: [PATCH 23/59] OPENR-89: Add short description custom directive --- conf.py | 1 + plugins/short_description.py | 32 ++++++++++++++++++++++++++++++++ source/About-ROS.rst | 5 +++-- source/_static/custom.css | 7 +++++++ 4 files changed, 43 insertions(+), 2 deletions(-) create mode 100644 plugins/short_description.py diff --git a/conf.py b/conf.py index 2a9def973fd..6d9eaab8730 100644 --- a/conf.py +++ b/conf.py @@ -89,6 +89,7 @@ 'sphinx_adopters', 'sphinxcontrib.googleanalytics', 'sphinxcontrib.mermaid', + 'short_description', ] # Intersphinx mapping diff --git a/plugins/short_description.py b/plugins/short_description.py new file mode 100644 index 00000000000..9b275626162 --- /dev/null +++ b/plugins/short_description.py @@ -0,0 +1,32 @@ +from __future__ import annotations + +from docutils import nodes +from sphinx.util.docutils import SphinxDirective + + +class ShortDescriptionDirective(SphinxDirective): + """Directive to render the short description of an article.""" + + has_content = True + required_arguments = 0 + optional_arguments = 0 + option_spec = {} + + def run(self) -> list[nodes.Node]: + # Create a container node to hold the parsed content + node = nodes.container() + node['classes'].append('short-description') + + # Parse the directive content into the container node + self.state.nested_parse(self.content, self.content_offset, node) + + return [node] + + +def setup(app): + app.add_directive('short-description', ShortDescriptionDirective) + return { + 'parallel_read_safe': True, + 'parallel_write_safe': True, + 'version': '0.1.0', + } \ No newline at end of file diff --git a/source/About-ROS.rst b/source/About-ROS.rst index 05fe7db14e9..3f9ce041162 100644 --- a/source/About-ROS.rst +++ b/source/About-ROS.rst @@ -3,8 +3,9 @@ About ROS ========= -ROS (Robot Operating System) is an open-source ecosystem that provides the framework, tools, and libraries for building, deploying, running, and maintaining robotic applications. -This article introduces the main areas of the ecosystem and outlines their intended use. +.. short-description:: + ROS (Robot Operating System) is an open-source ecosystem that provides the framework, tools, and libraries for building, deploying, running, and maintaining robotic applications. + This article introduces the main areas of the ecosystem and outlines their intended use. **Area: ROS-framework, ROS-tools, ROS-capabilities | Content-type: about | Experience: beginner** diff --git a/source/_static/custom.css b/source/_static/custom.css index 4252f921bb8..99f8209fdbb 100644 --- a/source/_static/custom.css +++ b/source/_static/custom.css @@ -1,3 +1,10 @@ .wy-nav-content { max-width: 64rem; } + +.short-description p{ + font-size: 1.25rem; + line-height: 1.5; + color: #777777; + margin-bottom: 1.5rem; +} From f506799b17d0e44b7e5fe186fd0c37917dcac260 Mon Sep 17 00:00:00 2001 From: GeorgeL Date: Mon, 18 May 2026 16:28:20 +0100 Subject: [PATCH 24/59] add pagefind from prototype --- .github/workflows/test.yml | 16 + Makefile | 6 + conf.py | 29 +- plugins/meta_util.py | 70 ++++ plugins/pagefind_meta.py | 222 +++++++++++++ plugins/showmeta.py | 120 +++++++ requirements.txt | 4 + source/About-ROS.rst | 14 +- source/_static/pagefind-docsearch.css | 219 ++++++++++++ source/_templates/layout.html | 9 + source/_templates/searchbox.html | 462 ++++++++++++++++++++++++++ source/search_results.rst | 21 ++ 12 files changed, 1190 insertions(+), 2 deletions(-) create mode 100644 plugins/meta_util.py create mode 100644 plugins/pagefind_meta.py create mode 100644 plugins/showmeta.py create mode 100644 source/_static/pagefind-docsearch.css create mode 100644 source/_templates/layout.html create mode 100644 source/_templates/searchbox.html create mode 100644 source/search_results.rst diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 5cea1c262d8..5a890815fcd 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -77,6 +77,14 @@ jobs: - name: Build the docs run: make html + + - name: Setup Node.js (Pagefind) + uses: actions/setup-node@v4 + with: + node-version: '20' + + - name: Index HTML with Pagefind + run: make pagefind - name: Upload document artifacts uses: actions/upload-artifact@v4 @@ -147,3 +155,11 @@ jobs: - name: Build the docs run: make multiversion + + - name: Setup Node.js (Pagefind) + uses: actions/setup-node@v4 + with: + node-version: '20' + + - name: Index HTML with Pagefind + run: make pagefind diff --git a/Makefile b/Makefile index f411c155a1e..f5a18e791ba 100644 --- a/Makefile +++ b/Makefile @@ -20,6 +20,12 @@ multiversion: Makefile sphinx-multiversion $(OPTS) "$(SOURCE)" build/html @echo "" > build/html/index.html $(PYTHON) make_sitemapindex.py + +# Pagefind static search index (requires Node.js / npx). Run after html or multiversion. +PAGEFIND_VERSION ?= 1.5.2 +pagefind: + npx -y pagefind@$(PAGEFIND_VERSION) --site "$(OUT)/html" + %: Makefile @$(BUILD) -M $@ "$(SOURCE)" "$(OUT)" $(OPTS) diff --git a/conf.py b/conf.py index 2a9def973fd..2955a31393a 100644 --- a/conf.py +++ b/conf.py @@ -89,8 +89,34 @@ 'sphinx_adopters', 'sphinxcontrib.googleanalytics', 'sphinxcontrib.mermaid', + 'pagefind_meta', + 'showmeta', ] +# Pagefind mergeIndex: optional per-package API doc bundles. +# Enable only when upstream sites publish Pagefind at .../en/{distro}/p/{pkg}/pagefind +pagefind_merge_enabled = False +pagefind_merge_package_pkgs = [] +pagefind_merge_index_base = 'https://docs.ros.org' +pagefind_merge_index_overrides = {} +pagefind_merge_filter_per_pkg = None +pagefind_merge_index_weight_per_pkg = None + +# Optional display labels for Pagefind filter UI (key → label). Unlisted keys use title-case. +pagefind_filter_labels = { + 'contentType': 'Content Type', +} + +pagefind_result_meta_order = [ + 'product', + 'distro', + 'area', + 'capability', + 'contentType', + 'experience', +] + + # Intersphinx mapping intersphinx_mapping = { @@ -168,6 +194,7 @@ 'DISTRO_TITLE': 'Rolling', 'DISTRO_TITLE_FULL': 'Rolling Ridley', 'REPOS_FILE_BRANCH': 'rolling', + 'PRODUCT': 'ROS 2', } html_favicon = 'favicon.ico' @@ -181,7 +208,7 @@ html_sourcelink_suffix = '' # Relative to html_static_path -html_css_files = ['custom.css', 'adopters.css'] +html_css_files = ['custom.css', 'adopters.css', 'pagefind-docsearch.css'] html_js_files = ['adopters.js'] # -- Options for HTMLHelp output ------------------------------------------ diff --git a/plugins/meta_util.py b/plugins/meta_util.py new file mode 100644 index 00000000000..32aef4d2f3b --- /dev/null +++ b/plugins/meta_util.py @@ -0,0 +1,70 @@ +# Copyright 2026 Open Robotics — shared helpers for ``.. meta::`` / Pagefind +""" +Collect every ``.. meta::`` field from the doctree, sanitize keys, and expand +``{MACRO}`` placeholders using the Sphinx ``macros`` config (longest keys first). + +Sphinx / the HTML theme may also emit plain ```` tags for the same fields. +The Pagefind extension emits additional tags with ``data-pagefind-filter`` and may +split comma-separated values into multiple tags for faceted search. +""" + +from __future__ import annotations + +import re +from typing import Dict, List, Optional + +from docutils import nodes + +# HTML ```` names should be conservative; allow common patterns. +_META_NAME_RE = re.compile(r'^[A-Za-z0-9_.:-]+$') + + +def sanitize_meta_key(raw: str) -> Optional[str]: + s = str(raw).strip() + if not s or not _META_NAME_RE.match(s): + return None + return s + + +def all_doctree_meta(doctree: Optional[nodes.document]) -> Dict[str, str]: + """Return last-wins mapping of every ``nodes.meta`` ``name``/``property`` → ``content``.""" + if doctree is None: + return {} + + out: Dict[str, str] = {} + for meta in doctree.findall(nodes.meta): + if meta.get('http-equiv'): + continue + content = meta.get('content') + if not content: + continue + key: Optional[str] = None + name = meta.get('name') + if name: + key = sanitize_meta_key(str(name)) + else: + prop = meta.get('property') + if prop: + key = sanitize_meta_key(str(prop)) + if not key: + continue + out[key] = str(content).strip() + return out + + +def expand_meta_macros(text: str, macros: Dict[str, str]) -> str: + """Expand ``{KEY}`` placeholders; longer macro names first to avoid partial matches.""" + result = text + for key, value in sorted(macros.items(), key=lambda kv: len(kv[0]), reverse=True): + result = result.replace(f'{{{key}}}', value) + return result + + +def expand_all_meta_values(meta: Dict[str, str], macros: Dict[str, str]) -> Dict[str, str]: + """Apply ``expand_meta_macros`` to every meta value.""" + return {k: expand_meta_macros(v, macros) for k, v in meta.items()} + + +def split_meta_values(value: str) -> List[str]: + """Return comma-separated metadata values as individual Pagefind values.""" + return [part.strip() for part in value.split(',') if part.strip()] diff --git a/plugins/pagefind_meta.py b/plugins/pagefind_meta.py new file mode 100644 index 00000000000..690f6363d6b --- /dev/null +++ b/plugins/pagefind_meta.py @@ -0,0 +1,222 @@ +# Copyright 2026 Open Robotics — Pagefind metadata for ROS 2 documentation +""" +Emit SEO tags, Pagefind ``data-pagefind-meta``, and ``data-pagefind-filter`` +from every ``.. meta::`` field on the page (passthrough, no whitelist). + +Sphinx / the HTML theme typically also emits plain ```` tags for the same +``.. meta::`` fields. We intentionally emit an additional block with +``data-pagefind-filter`` (and split comma-separated values) so Pagefind faceting +works; crawlers may see duplicate name/content pairs for non-split fields. +""" + +from __future__ import annotations + +import html +import re +from typing import Any, Dict, List, Optional, Tuple + +from docutils import nodes + +from meta_util import all_doctree_meta, expand_all_meta_values, split_meta_values + + +def _macros_flat(app) -> Dict[str, str]: + macros = getattr(app.config, 'macros', {}) or {} + return {str(k): str(v) for k, v in macros.items()} + + +def _resolved_page_meta(app, doctree: Optional[nodes.document]) -> Dict[str, str]: + raw = all_doctree_meta(doctree) + return expand_all_meta_values(raw, _macros_flat(app)) + + +def _default_filter_label(key: str) -> str: + spaced = re.sub(r'([a-z])([A-Z])', r'\1 \2', key) + return spaced.replace('_', ' ').replace('-', ' ').strip().title() + + +def _metadata_fields_for_keys(app, sorted_keys: List[str]) -> List[List[str]]: + labels = getattr(app.config, 'pagefind_filter_labels', None) or {} + out: List[List[str]] = [] + for k in sorted_keys: + if isinstance(labels, dict) and labels.get(k): + lbl = str(labels[k]) + else: + lbl = _default_filter_label(k) + out.append([k, lbl]) + return out + + +def _pagefind_data_meta_attr(values: Dict[str, str]) -> str: + """Single data-pagefind-meta attribute value with repeated keys for multi-values.""" + parts: List[str] = [] + for key in sorted(values.keys()): + for value in split_meta_values(values.get(key, '')): + parts.append(f'{key}:{value}') + inner = ', '.join(parts) + return html.escape(inner, quote=True) + + +def _seo_and_filter_metas(values: Dict[str, str]) -> str: + """One per value: SEO name/content + data-pagefind-filter (Pagefind filtering docs).""" + lines: List[str] = [] + for key in sorted(values.keys()): + esc_name = html.escape(key, quote=True) + for value in split_meta_values(values.get(key, '')): + esc_val = html.escape(value, quote=True) + lines.append( + f'' + ) + return '\n '.join(lines) + + +def _ensure_meta_keys_store(env) -> Dict[str, Any]: + if not hasattr(env, 'pagefind_meta_keys_by_doc'): + env.pagefind_meta_keys_by_doc = {} + return env.pagefind_meta_keys_by_doc + + +def _collect_meta_keys(app, doctree: nodes.document, docname: str) -> None: + if app.builder.format != 'html': + return + raw = all_doctree_meta(doctree) + store = _ensure_meta_keys_store(app.env) + store[docname] = set(raw.keys()) + + +def _purge_meta_keys(app, env, docname: str) -> None: + if hasattr(env, 'pagefind_meta_keys_by_doc') and docname in env.pagefind_meta_keys_by_doc: + del env.pagefind_meta_keys_by_doc[docname] + + +def _merge_meta_keys(app, env, docnames, other) -> None: + """Merge per-document meta key sets from a parallel read worker environment.""" + if not hasattr(other, 'pagefind_meta_keys_by_doc'): + return + store = _ensure_meta_keys_store(env) + for docname, keys in other.pagefind_meta_keys_by_doc.items(): + store[docname] = set(keys) + + +def _union_meta_keys(env) -> List[str]: + if not hasattr(env, 'pagefind_meta_keys_by_doc'): + return [] + union: set[str] = set() + for keys in env.pagefind_meta_keys_by_doc.values(): + union |= set(keys) + return sorted(union) + + +def _pagefind_bundle_prefix(pagename: str) -> str: + """Relative URL prefix from current HTML page to the site root ``pagefind/`` directory. + + Must start with ``./`` or ``../`` so the browser resolves dynamic imports (e.g. + ``import(bundlePath + 'pagefind.js')``) as URLs, not bare module specifiers. + """ + depth = pagename.count('/') + if depth == 0: + return './pagefind/' + return ('../' * depth) + 'pagefind/' + + +def _pagefind_component_urls(pagename: str) -> Tuple[str, str]: + """(css_href, js_href) relative to current page.""" + prefix = _pagefind_bundle_prefix(pagename) + return prefix + 'pagefind-component-ui.css', prefix + 'pagefind-component-ui.js' + + +def _merge_index_entries(app, distro: str) -> List[Dict[str, Any]]: + """Build mergeIndex list from conf (pinned docs.ros.org template).""" + pkgs: List[str] = list(getattr(app.config, 'pagefind_merge_package_pkgs', []) or []) + if not pkgs or not getattr(app.config, 'pagefind_merge_enabled', False): + return [] + base = getattr(app.config, 'pagefind_merge_index_base', 'https://docs.ros.org').rstrip('/') + overrides = getattr(app.config, 'pagefind_merge_index_overrides', {}) or {} + out: List[Dict[str, Any]] = [] + for pkg in pkgs: + key = f'{distro}/{pkg}' + if key in overrides: + bundle = overrides[key] + else: + bundle = f'{base}/en/{distro}/p/{pkg}/pagefind' + entry: Dict[str, Any] = {'bundlePath': bundle} + mf = getattr(app.config, 'pagefind_merge_filter_per_pkg', None) + if isinstance(mf, dict) and pkg in mf: + entry['mergeFilter'] = mf[pkg] + iw = getattr(app.config, 'pagefind_merge_index_weight_per_pkg', None) + if isinstance(iw, dict) and pkg in iw: + entry['indexWeight'] = iw[pkg] + out.append(entry) + return out + + +def _html_page_context( + app, + pagename: str, + templatename: str, + context: Dict[str, Any], + doctree, +) -> None: + sorted_keys = _union_meta_keys(app.env) + metadata_fields = _metadata_fields_for_keys(app, sorted_keys) + filter_csv = ','.join(sorted_keys) + + empty = { + 'pagefind_seo_filter_metas': '', + 'pagefind_data_meta_attr': '', + 'pagefind_bundle_prefix': './pagefind/', + 'pagefind_component_css': './pagefind/pagefind-component-ui.css', + 'pagefind_component_js': './pagefind/pagefind-component-ui.js', + 'pagefind_merge_index': [], + 'pagefind_filter_keys_csv': filter_csv, + 'pagefind_metadata_fields': metadata_fields, + 'pagefind_result_meta_order': list( + getattr(app.config, 'pagefind_result_meta_order', []) or [] + ), + } + context.update(empty) + + if app.builder.format != 'html' or templatename is None: + return + if not templatename.endswith('.html'): + return + + default_distro = (getattr(app.config, 'macros', {}) or {}).get('DISTRO', 'rolling') + values = _resolved_page_meta(app, doctree) + + seo_filters = _seo_and_filter_metas(values) + data_attr = _pagefind_data_meta_attr(values) + css_href, js_href = _pagefind_component_urls(pagename) + bundle_prefix = _pagefind_bundle_prefix(pagename) + + merge_distro = values.get('distro') or str(default_distro) + merge = _merge_index_entries(app, merge_distro) + context['pagefind_seo_filter_metas'] = seo_filters + context['pagefind_data_meta_attr'] = data_attr + context['pagefind_bundle_prefix'] = bundle_prefix + context['pagefind_component_css'] = css_href + context['pagefind_component_js'] = js_href + context['pagefind_merge_index'] = merge + + +def setup(app) -> Dict[str, Any]: + app.add_config_value('pagefind_merge_enabled', default=False, rebuild='html') + app.add_config_value('pagefind_merge_package_pkgs', default=[], rebuild='html') + app.add_config_value('pagefind_merge_index_base', default='https://docs.ros.org', rebuild='html') + app.add_config_value('pagefind_merge_index_overrides', default={}, rebuild='html') + app.add_config_value('pagefind_merge_filter_per_pkg', default=None, rebuild='html') + app.add_config_value('pagefind_merge_index_weight_per_pkg', default=None, rebuild='html') + app.add_config_value('pagefind_filter_labels', default={}, rebuild='html') + app.add_config_value('pagefind_result_meta_order', default=[], rebuild='html') + + app.connect('html-page-context', _html_page_context) + app.connect('doctree-resolved', _collect_meta_keys) + app.connect('env-purge-doc', _purge_meta_keys) + app.connect('env-merge-info', _merge_meta_keys) + + return { + 'parallel_read_safe': True, + 'parallel_write_safe': True, + 'version': '1.0.0', + } diff --git a/plugins/showmeta.py b/plugins/showmeta.py new file mode 100644 index 00000000000..f11b140429c --- /dev/null +++ b/plugins/showmeta.py @@ -0,0 +1,120 @@ +# Copyright 2026 Open Robotics — explicit in-body ``.. showmeta::`` summary +""" +Render selected ``.. meta::`` fields in the document body with author-controlled +order and labels. Place ``.. showmeta::`` where the summary should appear (HTML only). +""" + +from __future__ import annotations + +import html as html_module +import re +from typing import List + +from docutils import nodes +from docutils.parsers.rst import directives +from sphinx.util.docutils import SphinxDirective + +from meta_util import all_doctree_meta, expand_all_meta_values + + +def _macros_flat(app) -> dict[str, str]: + return {str(k): str(v) for k, v in (getattr(app.config, 'macros', {}) or {}).items()} + + +def _default_showmeta_label(key: str) -> str: + spaced = re.sub(r'([a-z])([A-Z])', r'\1 \2', key) + return spaced.replace('_', ' ').replace('-', ' ').strip().title() + + +class showmeta_node(nodes.General, nodes.Element): + """Placeholder replaced on ``doctree-resolved`` (HTML builds only).""" + + +class ShowMetaDirective(SphinxDirective): + """Insert a visible metadata line built from ``.. meta::`` on this page.""" + + has_content = False + option_spec = { + 'order': directives.unchanged, + 'labels': directives.unchanged, + } + + def run(self) -> List[nodes.Node]: + node = showmeta_node() + node['order'] = self.options.get('order', '') + node['labels'] = self.options.get('labels', '') + self.set_source_info(node) + return [node] + + +def visit_skip_showmeta(self, node: showmeta_node) -> None: + raise nodes.SkipNode + + +def depart_showmeta_noop(self, node: showmeta_node) -> None: + pass + + +def _parse_labels(raw: str) -> dict[str, str]: + out: dict[str, str] = {} + for part in [p.strip() for p in raw.split(',') if p.strip() and '=' in p]: + key, _, value = part.partition('=') + key, value = key.strip(), value.strip() + if key: + out[key] = value + return out + + +def replace_showmeta_nodes(app, doctree: nodes.document, docname: str) -> None: + if app.builder.format != 'html': + for node in list(doctree.findall(showmeta_node)): + node.parent.remove(node) + return + + macros = _macros_flat(app) + meta = expand_all_meta_values(all_doctree_meta(doctree), macros) + + for node in list(doctree.findall(showmeta_node)): + order = [x.strip() for x in node.get('order', '').split(',') if x.strip()] + labels_map = _parse_labels(node.get('labels', '')) + if not order: + node.parent.remove(node) + continue + + parts: List[str] = [] + for key in order: + val = meta.get(key, '').strip() + if not val: + continue + label_base = labels_map.get(key) or _default_showmeta_label(key) + label_display = label_base if label_base.rstrip().endswith(':') else f'{label_base}:' + parts.append( + f'{html_module.escape(label_display)} ' + f'{html_module.escape(val)}' + ) + + if not parts: + node.parent.remove(node) + else: + inner = ' | '.join(parts) + raw = nodes.raw( + '', + f'

{inner}

', + format='html', + ) + node.replace_self(raw) + + +def setup(app): + app.add_node( + showmeta_node, + html=(visit_skip_showmeta, depart_showmeta_noop), + latex=(visit_skip_showmeta, depart_showmeta_noop), + ) + app.add_directive('showmeta', ShowMetaDirective) + app.connect('doctree-resolved', replace_showmeta_nodes) + return { + 'version': '1.0.0', + 'parallel_read_safe': True, + 'parallel_write_safe': True, + } diff --git a/requirements.txt b/requirements.txt index 21c4c057505..71bd6e769fe 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,7 @@ +# Non-Python build dependency (install separately; used by `make pagefind`): +# Node.js 18+ with npx — https://nodejs.org/ +# Verify: node -v && npx -v + codespell doc8 docutils diff --git a/source/About-ROS.rst b/source/About-ROS.rst index 05fe7db14e9..e377fa738ef 100644 --- a/source/About-ROS.rst +++ b/source/About-ROS.rst @@ -3,10 +3,22 @@ About ROS ========= +.. meta:: + :contentType: about + :experience: beginner + :area: framework, tools, capabilities + :capability: simulation + :distro: {DISTRO} + :product: {PRODUCT} + + + ROS (Robot Operating System) is an open-source ecosystem that provides the framework, tools, and libraries for building, deploying, running, and maintaining robotic applications. This article introduces the main areas of the ecosystem and outlines their intended use. -**Area: ROS-framework, ROS-tools, ROS-capabilities | Content-type: about | Experience: beginner** +.. showmeta:: + :order: product, area, capability, contentType, experience + :labels: product=Product, area=Area, capability=Capability, contentType=Content type, experience=Level .. contents:: Table of Contents :depth: 2 diff --git a/source/_static/pagefind-docsearch.css b/source/_static/pagefind-docsearch.css new file mode 100644 index 00000000000..5932d5ceec0 --- /dev/null +++ b/source/_static/pagefind-docsearch.css @@ -0,0 +1,219 @@ +/* DocSearch-like sidebar trigger for Pagefind modal (plan §3) */ +.ros2-pagefind-search { + margin: 0.5rem 0 1rem; +} + +.ros2-pagefind-search pagefind-modal-trigger { + display: block; + width: 100%; +} + +/* Light styling for the trigger button (Pagefind exposes light DOM button) */ +.ros2-pagefind-search pagefind-modal-trigger::part(button), +.ros2-pagefind-search button { + align-items: center; + background: var(--wy-menu-vertical-background-color, #fcfcfc); + border: 1px solid #ccc; + border-radius: 40px; + color: var(--wy-menu-vertical-color, #404040); + cursor: pointer; + display: flex; + font-size: 0.85rem; + gap: 0.35rem; + justify-content: space-between; + min-height: 2.25rem; + padding: 0.35rem 0.6rem 0.35rem 0.75rem; + text-align: left; + width: 100%; +} + +.ros2-pagefind-search pagefind-modal-trigger::part(button):hover, +.ros2-pagefind-search button:hover { + border-color: #999; + box-shadow: 0 1px 2px rgba(0, 0, 0, 0.06); +} + +/* Keyboard hint styling (Algolia DocSearch-like) */ +.ros2-pagefind-search .DocSearch-Button-Keys, +.ros2-pagefind-search pagefind-modal-trigger::part(keys) { + display: flex; + gap: 0.2rem; +} + +.ros2-pagefind-search kbd, +.ros2-pagefind-search pagefind-modal-trigger::part(kbd) { + align-items: center; + background: linear-gradient(-225deg, #d5dbe4, #f8f8f8); + border: 0; + border-radius: 3px; + box-shadow: inset 0 -2px 0 0 #cdcde6, inset 0 0 1px 1px #fff, 0 1px 2px 1px rgba(30, 35, 90, 0.2); + color: #969faf; + display: flex; + font-size: 0.65rem; + font-weight: 600; + line-height: 1; + min-height: 1.25rem; + min-width: 1.25rem; + padding: 0 0.3rem; + justify-content: center; +} + +.wy-nav-side-scroll .ros2-pagefind-search { + padding-right: 0.5rem; +} + +.ros-page-meta-summary, +.ros2-pagefind-search .pf-result-meta-block, +#ros-search-page .pf-result-meta-block, +dialog.pf-modal .pf-result-meta-block { + margin: -0.25rem 0 1rem !important; + padding: 0.45rem 0.75rem !important; + border-left: 4px solid #6c757d !important; + background: #f8f9fa !important; + color: #495057 !important; + font-size: 0.85rem !important; +} + +.ros2-pagefind-search dialog.pf-modal { + width: clamp(900px, 60vw, 1200px) !important; + max-width: 92vw !important; + min-width: min(900px, 92vw) !important; +} + +.ros2-pagefind-search .ros-search-two-col, +#ros-search-page .ros-search-two-col { + display: grid; + grid-template-columns: minmax(220px, 260px) minmax(0, 1fr); + gap: 1rem; + min-height: 0; + width: 100%; +} + +.ros2-pagefind-search .ros-search-facets, +.ros2-pagefind-search .ros-search-results { + max-height: 62vh; + overflow: auto; + min-width: 0; +} + +#ros-search-page .ros-search-facets, +#ros-search-page .ros-search-results { + min-width: 0; +} + +.ros2-pagefind-search .ros-search-facets { + border-right: 1px solid #e9ecef; + padding-right: 0.75rem; +} + +#ros-search-page .ros-search-facets { + border-right: 1px solid #e9ecef; + padding-right: 0.75rem; +} + +.ros2-pagefind-search .ros-search-facets pagefind-filter-pane, +.ros2-pagefind-search .ros-search-results pagefind-summary, +.ros2-pagefind-search .ros-search-results pagefind-results, +#ros-search-page .ros-search-facets pagefind-filter-pane, +#ros-search-page .ros-search-results pagefind-summary, +#ros-search-page .ros-search-results pagefind-results { + display: block; +} + +.ros2-pagefind-search .ros-search-results pagefind-summary, +#ros-search-page .ros-search-results pagefind-summary { + margin-bottom: 0.75rem; +} + +.ros2-pagefind-search .pf-result-link, +#ros-search-page .pf-result-link { + font-size: 1rem; + font-weight: 700; + line-height: 1.25; +} + +.ros2-pagefind-search .pf-result-excerpt, +.ros2-pagefind-search .pf-result-preview, +#ros-search-page .pf-result-excerpt, +#ros-search-page .pf-result-preview { + font-size: 0.85rem; + line-height: 1.35; +} + +.ros2-pagefind-search .pf-result-meta-block, +#ros-search-page .pf-result-meta-block, +dialog.pf-modal .pf-result-meta-block { + margin-top: 0.35rem !important; + margin-bottom: 0.45rem !important; + border-radius: 0 !important; + display: block !important; + line-height: 1.35 !important; +} + +.ros2-pagefind-search .pf-result-meta-block b, +#ros-search-page .pf-result-meta-block b, +dialog.pf-modal .pf-result-meta-block b { + color: #495057 !important; + font-weight: 600 !important; +} + +/* Full-page search results (search_results.rst) */ +.ros-search-page { + padding: 0 0 2rem; +} + +.ros-search-page-input-row { + margin-bottom: 1.5rem; +} + +.ros-search-page-input-row pagefind-input { + display: block; + width: 100%; +} + +.ros-search-page-two-col .ros-search-facets, +.ros-search-page-two-col .ros-search-results { + max-height: none; + overflow: visible; +} + +/* + Force Pagefind's per-result IntersectionObserver to use this + element as its root. The component walks up the DOM looking for an ancestor + whose computed overflow-y is not "visible" or "hidden"; without this, no + ancestor matches on a dedicated search page (everything renders with default + overflow), the observer never fires, and result cards remain skeletons. + + Setting overflow-y: auto with no max-height gives the observer a valid root + without producing any visible scrollbar - the element grows to fit content + naturally and the page itself remains the scroll context for the user. +*/ +#ros-search-page pagefind-results { + overflow-y: auto !important; +} + +@media (max-width: 980px) { + .ros2-pagefind-search .ros-search-two-col, + #ros-search-page .ros-search-two-col { + grid-template-columns: 1fr; + } + + .ros2-pagefind-search .ros-search-facets, + .ros2-pagefind-search .ros-search-results { + max-height: none; + } + + .ros2-pagefind-search .ros-search-facets { + border-right: 0; + border-bottom: 1px solid #e9ecef; + margin-bottom: 0.75rem; + padding: 0 0 0.75rem; + } + + #ros-search-page .ros-search-facets { + border-right: 0; + border-bottom: 1px solid #e9ecef; + margin-bottom: 0.75rem; + padding: 0 0 0.75rem; + } +} diff --git a/source/_templates/layout.html b/source/_templates/layout.html new file mode 100644 index 00000000000..94830854a69 --- /dev/null +++ b/source/_templates/layout.html @@ -0,0 +1,9 @@ +{% extends "!layout.html" %} +{% block extrahead %} + {{ super() }} + {% if pagefind_seo_filter_metas %} + + {{ pagefind_seo_filter_metas|safe }} + + {% endif %} +{% endblock %} diff --git a/source/_templates/searchbox.html b/source/_templates/searchbox.html new file mode 100644 index 00000000000..c63231694fa --- /dev/null +++ b/source/_templates/searchbox.html @@ -0,0 +1,462 @@ +{# Pagefind Component UI (plan §3) + DocSearch-inspired styling via pagefind-docsearch.css #} + + + + diff --git a/source/search_results.rst b/source/search_results.rst new file mode 100644 index 00000000000..ac2620b9d19 --- /dev/null +++ b/source/search_results.rst @@ -0,0 +1,21 @@ +:orphan: + +Search Results +============== + +.. raw:: html + +
+
+ +
+
+ +
+ + +
+
+
From 25a2cff7160b13b86b7215e5ec0194d3fd80b034 Mon Sep 17 00:00:00 2001 From: GeorgeL Date: Mon, 18 May 2026 16:54:52 +0100 Subject: [PATCH 25/59] Revert "add pagefind from prototype" This reverts commit f506799b17d0e44b7e5fe186fd0c37917dcac260. --- .github/workflows/test.yml | 16 - Makefile | 6 - conf.py | 29 +- plugins/meta_util.py | 70 ---- plugins/pagefind_meta.py | 222 ------------- plugins/showmeta.py | 120 ------- requirements.txt | 4 - source/About-ROS.rst | 14 +- source/_static/pagefind-docsearch.css | 219 ------------ source/_templates/layout.html | 9 - source/_templates/searchbox.html | 462 -------------------------- source/search_results.rst | 21 -- 12 files changed, 2 insertions(+), 1190 deletions(-) delete mode 100644 plugins/meta_util.py delete mode 100644 plugins/pagefind_meta.py delete mode 100644 plugins/showmeta.py delete mode 100644 source/_static/pagefind-docsearch.css delete mode 100644 source/_templates/layout.html delete mode 100644 source/_templates/searchbox.html delete mode 100644 source/search_results.rst diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 5a890815fcd..5cea1c262d8 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -77,14 +77,6 @@ jobs: - name: Build the docs run: make html - - - name: Setup Node.js (Pagefind) - uses: actions/setup-node@v4 - with: - node-version: '20' - - - name: Index HTML with Pagefind - run: make pagefind - name: Upload document artifacts uses: actions/upload-artifact@v4 @@ -155,11 +147,3 @@ jobs: - name: Build the docs run: make multiversion - - - name: Setup Node.js (Pagefind) - uses: actions/setup-node@v4 - with: - node-version: '20' - - - name: Index HTML with Pagefind - run: make pagefind diff --git a/Makefile b/Makefile index f5a18e791ba..f411c155a1e 100644 --- a/Makefile +++ b/Makefile @@ -20,12 +20,6 @@ multiversion: Makefile sphinx-multiversion $(OPTS) "$(SOURCE)" build/html @echo "" > build/html/index.html $(PYTHON) make_sitemapindex.py - -# Pagefind static search index (requires Node.js / npx). Run after html or multiversion. -PAGEFIND_VERSION ?= 1.5.2 -pagefind: - npx -y pagefind@$(PAGEFIND_VERSION) --site "$(OUT)/html" - %: Makefile @$(BUILD) -M $@ "$(SOURCE)" "$(OUT)" $(OPTS) diff --git a/conf.py b/conf.py index 2955a31393a..2a9def973fd 100644 --- a/conf.py +++ b/conf.py @@ -89,34 +89,8 @@ 'sphinx_adopters', 'sphinxcontrib.googleanalytics', 'sphinxcontrib.mermaid', - 'pagefind_meta', - 'showmeta', ] -# Pagefind mergeIndex: optional per-package API doc bundles. -# Enable only when upstream sites publish Pagefind at .../en/{distro}/p/{pkg}/pagefind -pagefind_merge_enabled = False -pagefind_merge_package_pkgs = [] -pagefind_merge_index_base = 'https://docs.ros.org' -pagefind_merge_index_overrides = {} -pagefind_merge_filter_per_pkg = None -pagefind_merge_index_weight_per_pkg = None - -# Optional display labels for Pagefind filter UI (key → label). Unlisted keys use title-case. -pagefind_filter_labels = { - 'contentType': 'Content Type', -} - -pagefind_result_meta_order = [ - 'product', - 'distro', - 'area', - 'capability', - 'contentType', - 'experience', -] - - # Intersphinx mapping intersphinx_mapping = { @@ -194,7 +168,6 @@ 'DISTRO_TITLE': 'Rolling', 'DISTRO_TITLE_FULL': 'Rolling Ridley', 'REPOS_FILE_BRANCH': 'rolling', - 'PRODUCT': 'ROS 2', } html_favicon = 'favicon.ico' @@ -208,7 +181,7 @@ html_sourcelink_suffix = '' # Relative to html_static_path -html_css_files = ['custom.css', 'adopters.css', 'pagefind-docsearch.css'] +html_css_files = ['custom.css', 'adopters.css'] html_js_files = ['adopters.js'] # -- Options for HTMLHelp output ------------------------------------------ diff --git a/plugins/meta_util.py b/plugins/meta_util.py deleted file mode 100644 index 32aef4d2f3b..00000000000 --- a/plugins/meta_util.py +++ /dev/null @@ -1,70 +0,0 @@ -# Copyright 2026 Open Robotics — shared helpers for ``.. meta::`` / Pagefind -""" -Collect every ``.. meta::`` field from the doctree, sanitize keys, and expand -``{MACRO}`` placeholders using the Sphinx ``macros`` config (longest keys first). - -Sphinx / the HTML theme may also emit plain ```` tags for the same fields. -The Pagefind extension emits additional tags with ``data-pagefind-filter`` and may -split comma-separated values into multiple tags for faceted search. -""" - -from __future__ import annotations - -import re -from typing import Dict, List, Optional - -from docutils import nodes - -# HTML ```` names should be conservative; allow common patterns. -_META_NAME_RE = re.compile(r'^[A-Za-z0-9_.:-]+$') - - -def sanitize_meta_key(raw: str) -> Optional[str]: - s = str(raw).strip() - if not s or not _META_NAME_RE.match(s): - return None - return s - - -def all_doctree_meta(doctree: Optional[nodes.document]) -> Dict[str, str]: - """Return last-wins mapping of every ``nodes.meta`` ``name``/``property`` → ``content``.""" - if doctree is None: - return {} - - out: Dict[str, str] = {} - for meta in doctree.findall(nodes.meta): - if meta.get('http-equiv'): - continue - content = meta.get('content') - if not content: - continue - key: Optional[str] = None - name = meta.get('name') - if name: - key = sanitize_meta_key(str(name)) - else: - prop = meta.get('property') - if prop: - key = sanitize_meta_key(str(prop)) - if not key: - continue - out[key] = str(content).strip() - return out - - -def expand_meta_macros(text: str, macros: Dict[str, str]) -> str: - """Expand ``{KEY}`` placeholders; longer macro names first to avoid partial matches.""" - result = text - for key, value in sorted(macros.items(), key=lambda kv: len(kv[0]), reverse=True): - result = result.replace(f'{{{key}}}', value) - return result - - -def expand_all_meta_values(meta: Dict[str, str], macros: Dict[str, str]) -> Dict[str, str]: - """Apply ``expand_meta_macros`` to every meta value.""" - return {k: expand_meta_macros(v, macros) for k, v in meta.items()} - - -def split_meta_values(value: str) -> List[str]: - """Return comma-separated metadata values as individual Pagefind values.""" - return [part.strip() for part in value.split(',') if part.strip()] diff --git a/plugins/pagefind_meta.py b/plugins/pagefind_meta.py deleted file mode 100644 index 690f6363d6b..00000000000 --- a/plugins/pagefind_meta.py +++ /dev/null @@ -1,222 +0,0 @@ -# Copyright 2026 Open Robotics — Pagefind metadata for ROS 2 documentation -""" -Emit SEO tags, Pagefind ``data-pagefind-meta``, and ``data-pagefind-filter`` -from every ``.. meta::`` field on the page (passthrough, no whitelist). - -Sphinx / the HTML theme typically also emits plain ```` tags for the same -``.. meta::`` fields. We intentionally emit an additional block with -``data-pagefind-filter`` (and split comma-separated values) so Pagefind faceting -works; crawlers may see duplicate name/content pairs for non-split fields. -""" - -from __future__ import annotations - -import html -import re -from typing import Any, Dict, List, Optional, Tuple - -from docutils import nodes - -from meta_util import all_doctree_meta, expand_all_meta_values, split_meta_values - - -def _macros_flat(app) -> Dict[str, str]: - macros = getattr(app.config, 'macros', {}) or {} - return {str(k): str(v) for k, v in macros.items()} - - -def _resolved_page_meta(app, doctree: Optional[nodes.document]) -> Dict[str, str]: - raw = all_doctree_meta(doctree) - return expand_all_meta_values(raw, _macros_flat(app)) - - -def _default_filter_label(key: str) -> str: - spaced = re.sub(r'([a-z])([A-Z])', r'\1 \2', key) - return spaced.replace('_', ' ').replace('-', ' ').strip().title() - - -def _metadata_fields_for_keys(app, sorted_keys: List[str]) -> List[List[str]]: - labels = getattr(app.config, 'pagefind_filter_labels', None) or {} - out: List[List[str]] = [] - for k in sorted_keys: - if isinstance(labels, dict) and labels.get(k): - lbl = str(labels[k]) - else: - lbl = _default_filter_label(k) - out.append([k, lbl]) - return out - - -def _pagefind_data_meta_attr(values: Dict[str, str]) -> str: - """Single data-pagefind-meta attribute value with repeated keys for multi-values.""" - parts: List[str] = [] - for key in sorted(values.keys()): - for value in split_meta_values(values.get(key, '')): - parts.append(f'{key}:{value}') - inner = ', '.join(parts) - return html.escape(inner, quote=True) - - -def _seo_and_filter_metas(values: Dict[str, str]) -> str: - """One per value: SEO name/content + data-pagefind-filter (Pagefind filtering docs).""" - lines: List[str] = [] - for key in sorted(values.keys()): - esc_name = html.escape(key, quote=True) - for value in split_meta_values(values.get(key, '')): - esc_val = html.escape(value, quote=True) - lines.append( - f'' - ) - return '\n '.join(lines) - - -def _ensure_meta_keys_store(env) -> Dict[str, Any]: - if not hasattr(env, 'pagefind_meta_keys_by_doc'): - env.pagefind_meta_keys_by_doc = {} - return env.pagefind_meta_keys_by_doc - - -def _collect_meta_keys(app, doctree: nodes.document, docname: str) -> None: - if app.builder.format != 'html': - return - raw = all_doctree_meta(doctree) - store = _ensure_meta_keys_store(app.env) - store[docname] = set(raw.keys()) - - -def _purge_meta_keys(app, env, docname: str) -> None: - if hasattr(env, 'pagefind_meta_keys_by_doc') and docname in env.pagefind_meta_keys_by_doc: - del env.pagefind_meta_keys_by_doc[docname] - - -def _merge_meta_keys(app, env, docnames, other) -> None: - """Merge per-document meta key sets from a parallel read worker environment.""" - if not hasattr(other, 'pagefind_meta_keys_by_doc'): - return - store = _ensure_meta_keys_store(env) - for docname, keys in other.pagefind_meta_keys_by_doc.items(): - store[docname] = set(keys) - - -def _union_meta_keys(env) -> List[str]: - if not hasattr(env, 'pagefind_meta_keys_by_doc'): - return [] - union: set[str] = set() - for keys in env.pagefind_meta_keys_by_doc.values(): - union |= set(keys) - return sorted(union) - - -def _pagefind_bundle_prefix(pagename: str) -> str: - """Relative URL prefix from current HTML page to the site root ``pagefind/`` directory. - - Must start with ``./`` or ``../`` so the browser resolves dynamic imports (e.g. - ``import(bundlePath + 'pagefind.js')``) as URLs, not bare module specifiers. - """ - depth = pagename.count('/') - if depth == 0: - return './pagefind/' - return ('../' * depth) + 'pagefind/' - - -def _pagefind_component_urls(pagename: str) -> Tuple[str, str]: - """(css_href, js_href) relative to current page.""" - prefix = _pagefind_bundle_prefix(pagename) - return prefix + 'pagefind-component-ui.css', prefix + 'pagefind-component-ui.js' - - -def _merge_index_entries(app, distro: str) -> List[Dict[str, Any]]: - """Build mergeIndex list from conf (pinned docs.ros.org template).""" - pkgs: List[str] = list(getattr(app.config, 'pagefind_merge_package_pkgs', []) or []) - if not pkgs or not getattr(app.config, 'pagefind_merge_enabled', False): - return [] - base = getattr(app.config, 'pagefind_merge_index_base', 'https://docs.ros.org').rstrip('/') - overrides = getattr(app.config, 'pagefind_merge_index_overrides', {}) or {} - out: List[Dict[str, Any]] = [] - for pkg in pkgs: - key = f'{distro}/{pkg}' - if key in overrides: - bundle = overrides[key] - else: - bundle = f'{base}/en/{distro}/p/{pkg}/pagefind' - entry: Dict[str, Any] = {'bundlePath': bundle} - mf = getattr(app.config, 'pagefind_merge_filter_per_pkg', None) - if isinstance(mf, dict) and pkg in mf: - entry['mergeFilter'] = mf[pkg] - iw = getattr(app.config, 'pagefind_merge_index_weight_per_pkg', None) - if isinstance(iw, dict) and pkg in iw: - entry['indexWeight'] = iw[pkg] - out.append(entry) - return out - - -def _html_page_context( - app, - pagename: str, - templatename: str, - context: Dict[str, Any], - doctree, -) -> None: - sorted_keys = _union_meta_keys(app.env) - metadata_fields = _metadata_fields_for_keys(app, sorted_keys) - filter_csv = ','.join(sorted_keys) - - empty = { - 'pagefind_seo_filter_metas': '', - 'pagefind_data_meta_attr': '', - 'pagefind_bundle_prefix': './pagefind/', - 'pagefind_component_css': './pagefind/pagefind-component-ui.css', - 'pagefind_component_js': './pagefind/pagefind-component-ui.js', - 'pagefind_merge_index': [], - 'pagefind_filter_keys_csv': filter_csv, - 'pagefind_metadata_fields': metadata_fields, - 'pagefind_result_meta_order': list( - getattr(app.config, 'pagefind_result_meta_order', []) or [] - ), - } - context.update(empty) - - if app.builder.format != 'html' or templatename is None: - return - if not templatename.endswith('.html'): - return - - default_distro = (getattr(app.config, 'macros', {}) or {}).get('DISTRO', 'rolling') - values = _resolved_page_meta(app, doctree) - - seo_filters = _seo_and_filter_metas(values) - data_attr = _pagefind_data_meta_attr(values) - css_href, js_href = _pagefind_component_urls(pagename) - bundle_prefix = _pagefind_bundle_prefix(pagename) - - merge_distro = values.get('distro') or str(default_distro) - merge = _merge_index_entries(app, merge_distro) - context['pagefind_seo_filter_metas'] = seo_filters - context['pagefind_data_meta_attr'] = data_attr - context['pagefind_bundle_prefix'] = bundle_prefix - context['pagefind_component_css'] = css_href - context['pagefind_component_js'] = js_href - context['pagefind_merge_index'] = merge - - -def setup(app) -> Dict[str, Any]: - app.add_config_value('pagefind_merge_enabled', default=False, rebuild='html') - app.add_config_value('pagefind_merge_package_pkgs', default=[], rebuild='html') - app.add_config_value('pagefind_merge_index_base', default='https://docs.ros.org', rebuild='html') - app.add_config_value('pagefind_merge_index_overrides', default={}, rebuild='html') - app.add_config_value('pagefind_merge_filter_per_pkg', default=None, rebuild='html') - app.add_config_value('pagefind_merge_index_weight_per_pkg', default=None, rebuild='html') - app.add_config_value('pagefind_filter_labels', default={}, rebuild='html') - app.add_config_value('pagefind_result_meta_order', default=[], rebuild='html') - - app.connect('html-page-context', _html_page_context) - app.connect('doctree-resolved', _collect_meta_keys) - app.connect('env-purge-doc', _purge_meta_keys) - app.connect('env-merge-info', _merge_meta_keys) - - return { - 'parallel_read_safe': True, - 'parallel_write_safe': True, - 'version': '1.0.0', - } diff --git a/plugins/showmeta.py b/plugins/showmeta.py deleted file mode 100644 index f11b140429c..00000000000 --- a/plugins/showmeta.py +++ /dev/null @@ -1,120 +0,0 @@ -# Copyright 2026 Open Robotics — explicit in-body ``.. showmeta::`` summary -""" -Render selected ``.. meta::`` fields in the document body with author-controlled -order and labels. Place ``.. showmeta::`` where the summary should appear (HTML only). -""" - -from __future__ import annotations - -import html as html_module -import re -from typing import List - -from docutils import nodes -from docutils.parsers.rst import directives -from sphinx.util.docutils import SphinxDirective - -from meta_util import all_doctree_meta, expand_all_meta_values - - -def _macros_flat(app) -> dict[str, str]: - return {str(k): str(v) for k, v in (getattr(app.config, 'macros', {}) or {}).items()} - - -def _default_showmeta_label(key: str) -> str: - spaced = re.sub(r'([a-z])([A-Z])', r'\1 \2', key) - return spaced.replace('_', ' ').replace('-', ' ').strip().title() - - -class showmeta_node(nodes.General, nodes.Element): - """Placeholder replaced on ``doctree-resolved`` (HTML builds only).""" - - -class ShowMetaDirective(SphinxDirective): - """Insert a visible metadata line built from ``.. meta::`` on this page.""" - - has_content = False - option_spec = { - 'order': directives.unchanged, - 'labels': directives.unchanged, - } - - def run(self) -> List[nodes.Node]: - node = showmeta_node() - node['order'] = self.options.get('order', '') - node['labels'] = self.options.get('labels', '') - self.set_source_info(node) - return [node] - - -def visit_skip_showmeta(self, node: showmeta_node) -> None: - raise nodes.SkipNode - - -def depart_showmeta_noop(self, node: showmeta_node) -> None: - pass - - -def _parse_labels(raw: str) -> dict[str, str]: - out: dict[str, str] = {} - for part in [p.strip() for p in raw.split(',') if p.strip() and '=' in p]: - key, _, value = part.partition('=') - key, value = key.strip(), value.strip() - if key: - out[key] = value - return out - - -def replace_showmeta_nodes(app, doctree: nodes.document, docname: str) -> None: - if app.builder.format != 'html': - for node in list(doctree.findall(showmeta_node)): - node.parent.remove(node) - return - - macros = _macros_flat(app) - meta = expand_all_meta_values(all_doctree_meta(doctree), macros) - - for node in list(doctree.findall(showmeta_node)): - order = [x.strip() for x in node.get('order', '').split(',') if x.strip()] - labels_map = _parse_labels(node.get('labels', '')) - if not order: - node.parent.remove(node) - continue - - parts: List[str] = [] - for key in order: - val = meta.get(key, '').strip() - if not val: - continue - label_base = labels_map.get(key) or _default_showmeta_label(key) - label_display = label_base if label_base.rstrip().endswith(':') else f'{label_base}:' - parts.append( - f'{html_module.escape(label_display)} ' - f'{html_module.escape(val)}' - ) - - if not parts: - node.parent.remove(node) - else: - inner = ' | '.join(parts) - raw = nodes.raw( - '', - f'

{inner}

', - format='html', - ) - node.replace_self(raw) - - -def setup(app): - app.add_node( - showmeta_node, - html=(visit_skip_showmeta, depart_showmeta_noop), - latex=(visit_skip_showmeta, depart_showmeta_noop), - ) - app.add_directive('showmeta', ShowMetaDirective) - app.connect('doctree-resolved', replace_showmeta_nodes) - return { - 'version': '1.0.0', - 'parallel_read_safe': True, - 'parallel_write_safe': True, - } diff --git a/requirements.txt b/requirements.txt index 71bd6e769fe..21c4c057505 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,3 @@ -# Non-Python build dependency (install separately; used by `make pagefind`): -# Node.js 18+ with npx — https://nodejs.org/ -# Verify: node -v && npx -v - codespell doc8 docutils diff --git a/source/About-ROS.rst b/source/About-ROS.rst index e377fa738ef..05fe7db14e9 100644 --- a/source/About-ROS.rst +++ b/source/About-ROS.rst @@ -3,22 +3,10 @@ About ROS ========= -.. meta:: - :contentType: about - :experience: beginner - :area: framework, tools, capabilities - :capability: simulation - :distro: {DISTRO} - :product: {PRODUCT} - - - ROS (Robot Operating System) is an open-source ecosystem that provides the framework, tools, and libraries for building, deploying, running, and maintaining robotic applications. This article introduces the main areas of the ecosystem and outlines their intended use. -.. showmeta:: - :order: product, area, capability, contentType, experience - :labels: product=Product, area=Area, capability=Capability, contentType=Content type, experience=Level +**Area: ROS-framework, ROS-tools, ROS-capabilities | Content-type: about | Experience: beginner** .. contents:: Table of Contents :depth: 2 diff --git a/source/_static/pagefind-docsearch.css b/source/_static/pagefind-docsearch.css deleted file mode 100644 index 5932d5ceec0..00000000000 --- a/source/_static/pagefind-docsearch.css +++ /dev/null @@ -1,219 +0,0 @@ -/* DocSearch-like sidebar trigger for Pagefind modal (plan §3) */ -.ros2-pagefind-search { - margin: 0.5rem 0 1rem; -} - -.ros2-pagefind-search pagefind-modal-trigger { - display: block; - width: 100%; -} - -/* Light styling for the trigger button (Pagefind exposes light DOM button) */ -.ros2-pagefind-search pagefind-modal-trigger::part(button), -.ros2-pagefind-search button { - align-items: center; - background: var(--wy-menu-vertical-background-color, #fcfcfc); - border: 1px solid #ccc; - border-radius: 40px; - color: var(--wy-menu-vertical-color, #404040); - cursor: pointer; - display: flex; - font-size: 0.85rem; - gap: 0.35rem; - justify-content: space-between; - min-height: 2.25rem; - padding: 0.35rem 0.6rem 0.35rem 0.75rem; - text-align: left; - width: 100%; -} - -.ros2-pagefind-search pagefind-modal-trigger::part(button):hover, -.ros2-pagefind-search button:hover { - border-color: #999; - box-shadow: 0 1px 2px rgba(0, 0, 0, 0.06); -} - -/* Keyboard hint styling (Algolia DocSearch-like) */ -.ros2-pagefind-search .DocSearch-Button-Keys, -.ros2-pagefind-search pagefind-modal-trigger::part(keys) { - display: flex; - gap: 0.2rem; -} - -.ros2-pagefind-search kbd, -.ros2-pagefind-search pagefind-modal-trigger::part(kbd) { - align-items: center; - background: linear-gradient(-225deg, #d5dbe4, #f8f8f8); - border: 0; - border-radius: 3px; - box-shadow: inset 0 -2px 0 0 #cdcde6, inset 0 0 1px 1px #fff, 0 1px 2px 1px rgba(30, 35, 90, 0.2); - color: #969faf; - display: flex; - font-size: 0.65rem; - font-weight: 600; - line-height: 1; - min-height: 1.25rem; - min-width: 1.25rem; - padding: 0 0.3rem; - justify-content: center; -} - -.wy-nav-side-scroll .ros2-pagefind-search { - padding-right: 0.5rem; -} - -.ros-page-meta-summary, -.ros2-pagefind-search .pf-result-meta-block, -#ros-search-page .pf-result-meta-block, -dialog.pf-modal .pf-result-meta-block { - margin: -0.25rem 0 1rem !important; - padding: 0.45rem 0.75rem !important; - border-left: 4px solid #6c757d !important; - background: #f8f9fa !important; - color: #495057 !important; - font-size: 0.85rem !important; -} - -.ros2-pagefind-search dialog.pf-modal { - width: clamp(900px, 60vw, 1200px) !important; - max-width: 92vw !important; - min-width: min(900px, 92vw) !important; -} - -.ros2-pagefind-search .ros-search-two-col, -#ros-search-page .ros-search-two-col { - display: grid; - grid-template-columns: minmax(220px, 260px) minmax(0, 1fr); - gap: 1rem; - min-height: 0; - width: 100%; -} - -.ros2-pagefind-search .ros-search-facets, -.ros2-pagefind-search .ros-search-results { - max-height: 62vh; - overflow: auto; - min-width: 0; -} - -#ros-search-page .ros-search-facets, -#ros-search-page .ros-search-results { - min-width: 0; -} - -.ros2-pagefind-search .ros-search-facets { - border-right: 1px solid #e9ecef; - padding-right: 0.75rem; -} - -#ros-search-page .ros-search-facets { - border-right: 1px solid #e9ecef; - padding-right: 0.75rem; -} - -.ros2-pagefind-search .ros-search-facets pagefind-filter-pane, -.ros2-pagefind-search .ros-search-results pagefind-summary, -.ros2-pagefind-search .ros-search-results pagefind-results, -#ros-search-page .ros-search-facets pagefind-filter-pane, -#ros-search-page .ros-search-results pagefind-summary, -#ros-search-page .ros-search-results pagefind-results { - display: block; -} - -.ros2-pagefind-search .ros-search-results pagefind-summary, -#ros-search-page .ros-search-results pagefind-summary { - margin-bottom: 0.75rem; -} - -.ros2-pagefind-search .pf-result-link, -#ros-search-page .pf-result-link { - font-size: 1rem; - font-weight: 700; - line-height: 1.25; -} - -.ros2-pagefind-search .pf-result-excerpt, -.ros2-pagefind-search .pf-result-preview, -#ros-search-page .pf-result-excerpt, -#ros-search-page .pf-result-preview { - font-size: 0.85rem; - line-height: 1.35; -} - -.ros2-pagefind-search .pf-result-meta-block, -#ros-search-page .pf-result-meta-block, -dialog.pf-modal .pf-result-meta-block { - margin-top: 0.35rem !important; - margin-bottom: 0.45rem !important; - border-radius: 0 !important; - display: block !important; - line-height: 1.35 !important; -} - -.ros2-pagefind-search .pf-result-meta-block b, -#ros-search-page .pf-result-meta-block b, -dialog.pf-modal .pf-result-meta-block b { - color: #495057 !important; - font-weight: 600 !important; -} - -/* Full-page search results (search_results.rst) */ -.ros-search-page { - padding: 0 0 2rem; -} - -.ros-search-page-input-row { - margin-bottom: 1.5rem; -} - -.ros-search-page-input-row pagefind-input { - display: block; - width: 100%; -} - -.ros-search-page-two-col .ros-search-facets, -.ros-search-page-two-col .ros-search-results { - max-height: none; - overflow: visible; -} - -/* - Force Pagefind's per-result IntersectionObserver to use this - element as its root. The component walks up the DOM looking for an ancestor - whose computed overflow-y is not "visible" or "hidden"; without this, no - ancestor matches on a dedicated search page (everything renders with default - overflow), the observer never fires, and result cards remain skeletons. - - Setting overflow-y: auto with no max-height gives the observer a valid root - without producing any visible scrollbar - the element grows to fit content - naturally and the page itself remains the scroll context for the user. -*/ -#ros-search-page pagefind-results { - overflow-y: auto !important; -} - -@media (max-width: 980px) { - .ros2-pagefind-search .ros-search-two-col, - #ros-search-page .ros-search-two-col { - grid-template-columns: 1fr; - } - - .ros2-pagefind-search .ros-search-facets, - .ros2-pagefind-search .ros-search-results { - max-height: none; - } - - .ros2-pagefind-search .ros-search-facets { - border-right: 0; - border-bottom: 1px solid #e9ecef; - margin-bottom: 0.75rem; - padding: 0 0 0.75rem; - } - - #ros-search-page .ros-search-facets { - border-right: 0; - border-bottom: 1px solid #e9ecef; - margin-bottom: 0.75rem; - padding: 0 0 0.75rem; - } -} diff --git a/source/_templates/layout.html b/source/_templates/layout.html deleted file mode 100644 index 94830854a69..00000000000 --- a/source/_templates/layout.html +++ /dev/null @@ -1,9 +0,0 @@ -{% extends "!layout.html" %} -{% block extrahead %} - {{ super() }} - {% if pagefind_seo_filter_metas %} - - {{ pagefind_seo_filter_metas|safe }} - - {% endif %} -{% endblock %} diff --git a/source/_templates/searchbox.html b/source/_templates/searchbox.html deleted file mode 100644 index c63231694fa..00000000000 --- a/source/_templates/searchbox.html +++ /dev/null @@ -1,462 +0,0 @@ -{# Pagefind Component UI (plan §3) + DocSearch-inspired styling via pagefind-docsearch.css #} - - - - diff --git a/source/search_results.rst b/source/search_results.rst deleted file mode 100644 index ac2620b9d19..00000000000 --- a/source/search_results.rst +++ /dev/null @@ -1,21 +0,0 @@ -:orphan: - -Search Results -============== - -.. raw:: html - -
-
- -
-
- -
- - -
-
-
From c9b316eb62df6f42dc6a1f52da66197e40f44d7f Mon Sep 17 00:00:00 2001 From: Keith Kirkwood Date: Mon, 18 May 2026 17:14:37 +0100 Subject: [PATCH 26/59] OPENR-89: First attempt at short desc enhancement --- Makefile | 2 +- scripts/enhance_data.py | 7 +- scripts/enhance_topics.py | 389 +++++++++++++----- scripts/openai_retrieval.py | 278 +++++++++++++ scripts/rst_utils.py | 154 ++++++- scripts/test/test_enhance_topics.py | 56 ++- .../test/test_rst_utils_short_description.py | 86 ++++ 7 files changed, 850 insertions(+), 122 deletions(-) create mode 100644 scripts/openai_retrieval.py create mode 100644 scripts/test/test_rst_utils_short_description.py diff --git a/Makefile b/Makefile index f411c155a1e..cdc64e2e78c 100644 --- a/Makefile +++ b/Makefile @@ -25,7 +25,7 @@ multiversion: Makefile @$(BUILD) -M $@ "$(SOURCE)" "$(OUT)" $(OPTS) enhance-topics: - git diff --name-only --diff-filter=d $(BASE_SHA) $(HEAD_SHA) | xargs -r $(PYTHON) scripts/enhance_topics.py + git diff --name-only --diff-filter=d HEAD | xargs -r $(PYTHON) scripts/enhance_topics.py lint: ./sphinx-lint-with-ros source diff --git a/scripts/enhance_data.py b/scripts/enhance_data.py index dea61b02600..4e91d1f3472 100644 --- a/scripts/enhance_data.py +++ b/scripts/enhance_data.py @@ -78,8 +78,11 @@ def add_analysis_result(data: EnhanceData, filename: str, analysis_type: str, re Returns: New EnhanceData with the result added. """ - new_results = {**data.results} # Shallow copy: replace one filename entry immutably - file_results = {**new_results.get(filename, {})} # Preserve other analysis keys for this file + + # Creates a new EnhanceData object with the analysis result added for the given file and analysis type, + # making copies so that original data is not changed (keeping EnhanceData immutable). + new_results = {**data.results} + file_results = {**new_results.get(filename, {})} file_results[analysis_type] = result new_results[filename] = file_results return EnhanceData(results=new_results, updated_files=data.updated_files) # ``updated_files`` unchanged here diff --git a/scripts/enhance_topics.py b/scripts/enhance_topics.py index 0a3b5745672..62ad88012f2 100644 --- a/scripts/enhance_topics.py +++ b/scripts/enhance_topics.py @@ -2,15 +2,36 @@ import re import sys import os -from typing import Optional +from dataclasses import dataclass +from typing import Callable, Optional from dotenv import load_dotenv from openai import OpenAI, RateLimitError, APIConnectionError, OpenAIError from tenacity import retry, stop_after_attempt, wait_random_exponential, retry_if_exception_type from concurrent.futures import ThreadPoolExecutor -from enhance_data import EnhanceData, add_analysis_result, calculate_metrics, create_enhance_data, get_results_for_file, mark_file_updated -from rst_utils import get_meta_names_from_content, inject_metadata_to_content +from enhance_data import ( + EnhanceData, + add_analysis_result, + calculate_metrics, + create_enhance_data, + get_results_for_file, + mark_file_updated, +) +from openai_retrieval import ( + ASSISTANT_RUN_TIMEOUT, + RetrievalResources, + analyze_with_file_search, + cleanup_short_description_resources, + create_short_description_assistant, + ensure_example_vector_store, +) +from rst_utils import ( + get_meta_names_from_content, + has_short_description_content, + inject_metadata_to_content, + inject_short_description_to_content, +) logger = logging.getLogger(__name__) @@ -29,6 +50,37 @@ MIN_WAIT = 10 # Minimum wait time between retries in seconds MAX_WAIT = 120 # Maximum wait time between retries in seconds +# Example RST paths (relative to repository root) indexed into the vector store for file_search +SHORT_DESCRIPTION_EXAMPLE_PATHS = [ + "source/About-ROS.rst", +] + +# Define prompts for the AI model + +SHORT_DESCRIPTION_PROMPT = """You are a Technical Author in the technology industry working on documenting a robotics product, and your role is to analyze RST content within supplied documents. +You'll then create new content based on this analysis for a new draft article, which I can use to supplement that article. + +## Examples +Use file_search to read through the following RST files in their entirety as examples of completed articles: + +- About-ROS.rst +- First-Steps.rst +- Interfaces-Topics-Services-Actions.rst + +## Short Description +For each article in this set of examples, analyse the content associated with the "short-description" directive, and what it constitutes in relation to the article it describes. For example, in the First-Steps article, the 3 sentences which begin as follows comprise the specified short description: + +* "Interfaces in ROS..." +* "This article explains the..." +* "With this information..." + +This short description content does not include the single line of text commencing with "**Area...", or the "contents" (Table of Contents) directive. + +When you have identified the short description in all example articles, remember the formatting and how the paragraph is constructed, including tone/style and length. We call this the article Short Description. + +Finally, generate the short description for the new article given in the user message, with no additional styling, characters, or formatting. +""" + KEYWORDS_PROMPT = """You are a content analyst, and your role is to analyze text content within supplied documents. Your role is to extract 3 to 5 keywords from the content for use in metadata. The keywords should be single words that are the most important and relevant words to the content topic. @@ -45,6 +97,40 @@ Answer ONLY with the single word yes or no in lowercase, with no punctuation, explanation, or additional text.""" +@dataclass(frozen=True) +class EnhancementTask: + """One analysable enhancement (metadata field or short description) applied per file.""" + + key: str + should_skip: Callable[[str], bool] + analyze: Callable[[OpenAI, str, int], str] + timeout: int = DEFAULT_TIMEOUT + + +def _metadata_enhancement_task(key: str, prompt: str) -> EnhancementTask: + """Build a task that writes to ``.. meta::`` under the given field name.""" + + def should_skip(content: str) -> bool: + return key in get_meta_names_from_content(content) + + def analyze(cl: OpenAI, content: str, to: int) -> str: + return analyze_content(cl, content, prompt, timeout=to) + + return EnhancementTask(key=key, should_skip=should_skip, analyze=analyze, timeout=DEFAULT_TIMEOUT) + + +def _short_description_enhancement_task(assistant_id: str) -> EnhancementTask: + """Build a task that writes to the ``.. short-description::`` directive body.""" + + def analyze(cl: OpenAI, content: str, to: int) -> str: + return analyze_with_file_search(cl, assistant_id, content, timeout=to) + + def should_skip(content: str) -> bool: + return has_short_description_content(content) + + return EnhancementTask("short-description", should_skip, analyze, ASSISTANT_RUN_TIMEOUT) + + @retry( retry=retry_if_exception_type((RateLimitError, APIConnectionError)), stop=stop_after_attempt(MAX_RETRIES), @@ -124,7 +210,7 @@ def validate_content(client: OpenAI, generated: str, timeout: int = DEFAULT_TIME """ Validate generated content using the moderation API and a separate English-language check. - Intended for any model-generated text before it is persisted (metadata today; other content later). + Intended for any model-generated text before it is persisted. Uses ThreadPoolExecutor for cross-platform timeout handling and retries for transient API errors. Args: @@ -221,31 +307,30 @@ def _run_validation() -> bool: logger.error("Validation timed out after %s seconds", timeout) raise -def analyze_files(files: list[str], client: OpenAI, prompts: dict[str, str], timeout: int = DEFAULT_TIMEOUT) -> EnhanceData: +def analyze_files(files: list[str], client: OpenAI, tasks: list[EnhancementTask]) -> EnhanceData: """ - Process a list of files and analyse their content using each of the passed prompts. + Process a list of files and analyse their content using each enhancement task. Args: files (list[str]): List of paths to files. client (OpenAI): OpenAI client instance. - prompts (dict[str, str]): Dictionary of prompts for the AI model. - timeout (int): Maximum time to wait for each API call in seconds. + tasks (list[EnhancementTask]): Enhancement tasks to run per file. Returns: EnhanceData: Enhancement data structure containing analysis results and update tracking. """ data = create_enhance_data() - + logger.debug("============================") logger.debug("Performing content analysis:") logger.debug("============================") - for file_path in files: # Iterate through each file in the list - logger.debug(f"Analysing file: {file_path}") + for file_path in files: # Iterate through each file in the list + logger.debug("Analysing file: %s", file_path) # Read the content of the file try: - with open(file_path, 'r', encoding='utf-8') as f: + with open(file_path, encoding="utf-8") as f: content = f.read() except (OSError, PermissionError) as e: logger.error("Error reading file %s: %s", file_path, e) @@ -255,53 +340,43 @@ def analyze_files(files: list[str], client: OpenAI, prompts: dict[str, str], tim continue # Check if the content is not empty - if content.strip(): - # Check if the content has any meta fields already - existing_meta_names = get_meta_names_from_content(content) - for prompt_name, prompt in prompts.items(): # Iterate through each prompt in the dictionary - if prompt_name in existing_meta_names: - logger.warning( - "Skipping analysis for %s: meta field %r already present in .. meta::", - file_path, - prompt_name, - ) - continue - logger.debug(f"Running analysis: {prompt_name}") - try: - # Analyse the content using API with timeout and retry logic - result = analyze_content( - client, - content, - prompt, - timeout=timeout - ) - if result: - if validate_content(client, result, timeout=timeout): - # Add the analysis result to the data structure - data = add_analysis_result(data, file_path, prompt_name, result) - else: - logger.warning( - "Validation failed for generated %s in %s; result not stored", - prompt_name, - file_path, - ) + if not content.strip(): + logger.info("No analysable content found for %s", file_path) + continue + + # Iterate through each task and run the analysis + for task in tasks: + if task.should_skip(content): + logger.warning( + "Skipping analysis for %s: task %r (content already satisfies skip rule)", + file_path, + task.key, + ) + continue + logger.debug("Running analysis: %s", task.key) + try: + result = task.analyze(client, content, task.timeout) + if result: + if validate_content(client, result, timeout=DEFAULT_TIMEOUT): + data = add_analysis_result(data, file_path, task.key, result) else: - logger.warning(f"No result for {file_path} with prompt name: {prompt_name}") - - except (RateLimitError, APIConnectionError) as e: - # Exhausted all retries due to rate limits or connection errors - logger.error(f"Failed to analyse {file_path} with prompt {prompt_name} after {MAX_RETRIES} retries: {e}") - continue - except TimeoutError as e: - # Timeout error due to an individual API call timing out - logger.error(f"Analysis timed out for {file_path} with prompt {prompt_name}: {e}") - continue - except (OpenAIError, ValueError) as e: - # Other API errors and value errors - logger.error(f"Failed to analyse {file_path} with prompt {prompt_name}: {e}") - continue - else: - logger.info(f"No analysable content found for {file_path}") + logger.warning( + "Validation failed for generated %s in %s; result not stored", + task.key, + file_path, + ) + else: + logger.warning("No result for %s with task %r", file_path, task.key) + + except (RateLimitError, APIConnectionError) as e: + logger.error("Failed to analyse %s with task %r after %s retries: %s", file_path, task.key, MAX_RETRIES, e) + continue + except TimeoutError as e: + logger.error("Analysis timed out for %s with task %r: %s", file_path, task.key, e) + continue + except (OpenAIError, ValueError) as e: + logger.error("Failed to analyse %s with task %r: %s", file_path, task.key, e) + continue return data @@ -329,66 +404,58 @@ def get_openai_client() -> OpenAI: return OpenAI(api_key=api_key) -def enhance_metadata(files: list[str], client: Optional[OpenAI] = None) -> EnhanceData: - """ - Enhance files with metadata based on content analysis. - Args: - files (list[str]): Paths to files to enhance. - client (OpenAI, optional): OpenAI client instance. If None, creates new instance. +def _apply_metadata_results(content: str, results: dict[str, str]) -> tuple[str, bool]: + """Merge ``description`` / ``keywords`` results into ``.. meta::``.""" + # Create a subset of the results dictionary containing only the description and keywords + subset = {k: v for k, v in results.items() if k in ("description", "keywords")} + if not subset: + return content, False + return inject_metadata_to_content(content, subset) - Returns: - EnhanceData: Enhancement data structure containing analysis results and update tracking. - - Raises: - OpenAIError: If no valid API key is found when creating a new client. - """ - try: - client = client or get_openai_client() - except OpenAIError as e: - logger.error(f"Failed to initialise OpenAI client: {e}") - return create_enhance_data() - - # TODO: Make this config-driven, so that we can easily add more prompts and analysis types - prompts: dict[str, str] = {"description": DESCRIPTION_PROMPT, "keywords": KEYWORDS_PROMPT} - data = analyze_files(files, client, prompts) # Populate and validate ``EnhanceData.results`` from the model - data = update_meta_files(files, data) # Persist results as metadata fields and set ``updated_files`` +def _apply_short_description_results(content: str, results: dict[str, str]) -> tuple[str, bool]: + """Insert or fill ``.. short-description::`` from analysis results.""" + # Get the short description result from the results dictionary + val = results.get("short-description") + # If the short description is not found or is empty, return the content and False + if not val or not val.strip(): + return content, False + return inject_short_description_to_content(content, val) - return data -def update_meta_files(files: list[str], data: EnhanceData) -> EnhanceData: +def update_enhanced_files( + files: list[str], + data: EnhanceData, + apply_hooks: list[Callable[[str, dict[str, str]], tuple[str, bool]]], + log_label: str, +) -> EnhanceData: """ - Process a list of files and update them with passed metadata. + Process a list of files and apply enhancement hooks that may rewrite RST. - Args: - files (list[str]): List of paths to files. - data (EnhanceData): Enhancement data structure containing metadata for files. - - Returns: - EnhanceData: Updated enhancement data with files marked as updated. + Each hook receives the current file content and the per-file results dictionary, + and returns ``(new_content, changed)``. Hooks run in order; the file is written + once if any hook reported a change. """ - logger.debug("===========================") - logger.debug("Updating metadata in files:") + logger.debug("Updating %s in files:", log_label) logger.debug("===========================") - current_data = data # Thread results through ``mark_file_updated`` immutably + current_data = data for file_path in files: - logger.debug("Updating metadata in file: %s", file_path) - metadata = get_results_for_file(current_data, file_path) + logger.debug("Updating %s in file: %s", log_label, file_path) + file_results = get_results_for_file(current_data, file_path) - # Confirm the metadata is not empty for the file, else skip - if not metadata: + if not file_results: logger.info("Skipping %s as it has no results for enhancement", file_path) continue - logger.debug("Metadata found for %s, proceeding with updates.", file_path) + logger.debug("Results found for %s, proceeding with updates.", file_path) try: with open(file_path, encoding="utf-8") as file: - content = file.read() # Full document; helpers locate or synthesise ``.. meta::`` + content = file.read() except (OSError, PermissionError) as exc: logger.error("Error reading file %s: %s", file_path, exc) continue @@ -396,16 +463,23 @@ def update_meta_files(files: list[str], data: EnhanceData) -> EnhanceData: logger.error("Unicode decode error reading file %s: %s", file_path, exc) continue - new_content, changed = inject_metadata_to_content(content, metadata) - - # Confirm that at least one metadata has been changed for the file, else skip - if not changed: - logger.debug("No metadata changes applied for %s", file_path) - continue # All keys already present or no additions—do not touch the file + # Apply the enhancement hooks to the content + working = content + changed_any = False + # Iterate through each hook and apply it to the content + for hook in apply_hooks: + working, changed = hook(working, file_results) + changed_any = changed_any or changed + + # If no changes were made, log a message and continue + if not changed_any: + logger.debug("No %s changes applied for %s", log_label, file_path) + continue + # Write the updated content to the file try: with open(file_path, "w", encoding="utf-8") as file: - file.write(new_content) # Full-document rewrite (same path as read) + file.write(working) except (OSError, PermissionError) as exc: logger.error("Error writing file %s: %s", file_path, exc) continue @@ -413,15 +487,112 @@ def update_meta_files(files: list[str], data: EnhanceData) -> EnhanceData: logger.error("Unicode encode error while writing file %s: %s", file_path, exc) continue - current_data = mark_file_updated(current_data, file_path) # Record success for metrics only after a clean write - logger.debug("Updated file with supplied metadata: %s", file_path) + # Mark the file as updated in the enhancement data + current_data = mark_file_updated(current_data, file_path) + logger.debug("Updated file with %s: %s", log_label, file_path) logger.debug("-" * 50) - # ``files_with_results_count`` reflects files with at least one valid analysis result, and ``updated_files_count`` reflects files we rewrote metrics = calculate_metrics(current_data) - logger.info("Updated metadata in %s files out of %s files processed.", metrics.updated_files_count, len(files)) + logger.info( + "Updated %s in %s files out of %s files processed.", + log_label, + metrics.updated_files_count, + len(files), + ) return current_data + +def enhance_metadata(files: list[str], client: Optional[OpenAI] = None) -> EnhanceData: + """ + Enhance files with metadata based on content analysis. + + Args: + files (list[str]): Paths to files to enhance. + client (OpenAI, optional): OpenAI client instance. If None, creates new instance. + + Returns: + EnhanceData: Enhancement data structure containing analysis results and update tracking. + + Raises: + OpenAIError: If no valid API key is found when creating a new client. + """ + try: + client = client or get_openai_client() + except OpenAIError as e: + logger.error(f"Failed to initialise OpenAI client: {e}") + return create_enhance_data() + + # TODO: Make this config-driven, so that we can easily add more prompts and analysis types + tasks = [ + _metadata_enhancement_task("description", DESCRIPTION_PROMPT), + _metadata_enhancement_task("keywords", KEYWORDS_PROMPT), + ] + + data = analyze_files(files, client, tasks) + data = update_meta_files(files, data) + + return data + + +def enhance_short_descriptions(files: list[str], client: Optional[OpenAI] = None) -> EnhanceData: + """ + Enhance RST files with a ``.. short-description::`` body using an assistant with file_search. + + Example articles are taken from ``SHORT_DESCRIPTION_EXAMPLE_PATHS`` (indexed once per run). + Each target file is sent in its own thread; the vector store and assistant are deleted + afterwards. Not wired to ``main()``; import and call from a REPL or another script. + + Args: + files: Paths to RST files to enhance. + client: Optional pre-built OpenAI client. + + Returns: + ``EnhanceData`` with results under the key ``short-description`` and ``updated_files`` set + after successful writes. + """ + try: + client = client or get_openai_client() + except OpenAIError as e: + logger.error("Failed to initialise OpenAI client: %s", e) + return create_enhance_data() + + resources: RetrievalResources | None = None + try: + vector_store_id = ensure_example_vector_store(client, SHORT_DESCRIPTION_EXAMPLE_PATHS) + assistant_id = create_short_description_assistant( + client, + vector_store_id, + SHORT_DESCRIPTION_PROMPT, + GPT_MODEL, + ) + resources = RetrievalResources(assistant_id, vector_store_id) + + tasks = [_short_description_enhancement_task(assistant_id)] + data = analyze_files(files, client, tasks) + data = update_enhanced_files( + files, + data, + [_apply_short_description_results], + "short description", + ) + return data + finally: + cleanup_short_description_resources(client, resources) + + +def update_meta_files(files: list[str], data: EnhanceData) -> EnhanceData: + """ + Process a list of files and update them with passed metadata (``.. meta::`` fields). + + Args: + files (list[str]): List of paths to files. + data (EnhanceData): Enhancement data structure containing metadata for files. + + Returns: + EnhanceData: Updated enhancement data with files marked as updated. + """ + return update_enhanced_files(files, data, [_apply_metadata_results], "metadata") + def main() -> None: """ Main entry point for the script. diff --git a/scripts/openai_retrieval.py b/scripts/openai_retrieval.py new file mode 100644 index 00000000000..c07f57d2ec7 --- /dev/null +++ b/scripts/openai_retrieval.py @@ -0,0 +1,278 @@ +""" +OpenAI Assistants API helpers for short-description generation with file_search. + +Uses a vector store of example RST files so each target article is sent once per run, +without inlining full examples in every request. +""" + +from __future__ import annotations + +import logging +import time +from concurrent.futures import ThreadPoolExecutor +from pathlib import Path +from typing import Iterable + +from openai import OpenAI, RateLimitError, APIConnectionError +from tenacity import retry, stop_after_attempt, wait_random_exponential, retry_if_exception_type + +logger = logging.getLogger(__name__) + +# Align with enhance_topics retry policy for vector store / assistant creation +MAX_RETRIES = 10 +MIN_WAIT = 10 +MAX_WAIT = 120 + +# Maximum time for one assistant run (thread message + run + polling) +ASSISTANT_RUN_TIMEOUT = 120 +# Interval between run status polls +ASSISTANT_POLL_INTERVAL = 1.5 + +# Match enhance_topics.MAX_CONTENT_LENGTH for user message payload +MAX_CONTENT_LENGTH = 1_200_000 + +_SCRIPTS_DIR = Path(__file__).resolve().parent +REPO_ROOT = _SCRIPTS_DIR.parent + + +class RetrievalResources: + """IDs created for one enhance_short_descriptions run (for cleanup).""" + + __slots__ = ("assistant_id", "vector_store_id") + + def __init__(self, assistant_id: str, vector_store_id: str) -> None: + self.assistant_id = assistant_id + self.vector_store_id = vector_store_id + + +def _resolve_example_paths(example_paths: Iterable[str]) -> list[Path]: + paths: list[Path] = [] + for rel in example_paths: + p = (REPO_ROOT / rel).resolve() + if not p.is_file(): + raise FileNotFoundError(f"Example RST not found: {p}") + paths.append(p) + return paths + + +@retry( + retry=retry_if_exception_type((RateLimitError, APIConnectionError)), + stop=stop_after_attempt(MAX_RETRIES), + wait=wait_random_exponential(multiplier=MIN_WAIT, max=MAX_WAIT), + reraise=True, +) +def ensure_example_vector_store(client: OpenAI, example_paths: Iterable[str]) -> str: + """ + Create a vector store, upload example RST files, and wait for indexing to finish. + + Returns: + vector_store_id + """ + paths = _resolve_example_paths(example_paths) + logger.debug("Creating vector store for %s example file(s)", len(paths)) + vs = client.vector_stores.create(name="ros2-doc-short-description-examples") + + from contextlib import ExitStack + + with ExitStack() as stack: + streams = [stack.enter_context(open(p, "rb")) for p in paths] + batch = client.vector_stores.file_batches.upload_and_poll( + vector_store_id=vs.id, + files=streams, + ) + + if batch.status != "completed": + logger.error("Vector store file batch ended with status %r", batch.status) + raise RuntimeError(f"Vector store indexing did not complete: {batch.status}") + + logger.debug("Vector store %s ready (batch status=%s)", vs.id, batch.status) + return vs.id + + +@retry( + retry=retry_if_exception_type((RateLimitError, APIConnectionError)), + stop=stop_after_attempt(MAX_RETRIES), + wait=wait_random_exponential(multiplier=MIN_WAIT, max=MAX_WAIT), + reraise=True, +) +def create_short_description_assistant( + client: OpenAI, + vector_store_id: str, + instructions: str, + model: str, +) -> str: + """Create an assistant with file_search over the given vector store. Returns assistant_id.""" + assistant = client.beta.assistants.create( + name="ROS documentation short description", + instructions=instructions, + model=model, + tools=[{"type": "file_search"}], + tool_resources={"file_search": {"vector_store_ids": [vector_store_id]}}, + ) + logger.debug("Created assistant %s", assistant.id) + return assistant.id + + +def _extract_assistant_message_text(client: OpenAI, thread_id: str) -> str: + messages = client.beta.threads.messages.list(thread_id=thread_id, order="desc", limit=10) + for msg in messages.data: + if msg.role != "assistant": + continue + parts: list[str] = [] + for block in msg.content: + if block.type == "text": + parts.append(block.text.value) + return "".join(parts).strip() + return "" + + +def _retrieve_run_with_backoff( + client: OpenAI, + thread_id: str, + run_id: str, + deadline: float, +) -> object: + """Retrieve run status; on rate limit / connection errors sleep and retry until deadline.""" + attempt = 0 + while True: + if time.monotonic() >= deadline: + raise TimeoutError(f"Assistant run {run_id}: deadline before status retrieve") + try: + return client.beta.threads.runs.retrieve(thread_id=thread_id, run_id=run_id) + except (RateLimitError, APIConnectionError) as exc: + attempt += 1 + wait = min(ASSISTANT_POLL_INTERVAL * (2**attempt), 30.0) + logger.warning("Run retrieve retry %s after %s: sleeping %.1fs", attempt, exc, wait) + time.sleep(wait) + + +def _poll_run_until_terminal( + client: OpenAI, + thread_id: str, + run_id: str, + deadline: float, +) -> object: + """Poll run status until terminal state or deadline. Returns final run object.""" + run = _retrieve_run_with_backoff(client, thread_id, run_id, deadline) + while run.status in ("queued", "in_progress", "cancelling"): + if time.monotonic() >= deadline: + raise TimeoutError( + f"Assistant run {run_id} still {run.status!r} after polling deadline", + ) + time.sleep(ASSISTANT_POLL_INTERVAL) + run = _retrieve_run_with_backoff(client, thread_id, run_id, deadline) + return run + + +def _run_assistant_once(client: OpenAI, assistant_id: str, content: str, run_timeout: int) -> str: + """Single attempt: new thread, user message, run, poll until terminal, read assistant text.""" + if len(content) > MAX_CONTENT_LENGTH: + logger.warning( + "Article RST truncated to %s characters for assistant message.", + MAX_CONTENT_LENGTH, + ) + content = content[:MAX_CONTENT_LENGTH] + + user_text = ( + "Article RST (generate only the short description prose per your instructions; " + "use file_search on the indexed examples for tone and structure):\n\n" + f"{content}" + ) + + thread = client.beta.threads.create() + client.beta.threads.messages.create( + thread_id=thread.id, + role="user", + content=user_text, + ) + run = client.beta.threads.runs.create( + thread_id=thread.id, + assistant_id=assistant_id, + ) + deadline = time.monotonic() + float(run_timeout) + run = _poll_run_until_terminal(client, thread.id, run.id, deadline) + + if run.status == "completed": + text = _extract_assistant_message_text(client, thread.id) + logger.debug("Assistant run completed; response length %s", len(text)) + return text + + if run.status == "failed": + err = getattr(run, "last_error", None) + logger.error("Assistant run failed: %s", err) + return "" + + if run.status == "expired": + logger.error("Assistant run expired") + return "" + + if run.status == "cancelled": + logger.warning("Assistant run cancelled") + return "" + + if run.status == "requires_action": + logger.error("Assistant run requires_action (unexpected for file_search-only flow)") + return "" + + logger.error("Assistant run ended with unexpected status %r", run.status) + return "" + + +def analyze_with_file_search( + client: OpenAI, + assistant_id: str, + content: str, + timeout: int = ASSISTANT_RUN_TIMEOUT, +) -> str: + """ + Run the short-description assistant on one article's RST. + + Uses ThreadPoolExecutor so ``timeout`` bounds wall-clock time including polling + (same value as the internal poll deadline for the run). + """ + + def _bounded_attempt() -> str: + return _run_assistant_once(client, assistant_id, content, timeout) + + with ThreadPoolExecutor(max_workers=1) as executor: + future = executor.submit(_bounded_attempt) + try: + return future.result(timeout=timeout) + except TimeoutError: + logger.error("analyze_with_file_search timed out after %s seconds", timeout) + raise + + +def cleanup_short_description_resources(client: OpenAI, resources: RetrievalResources | None) -> None: + """Best-effort deletion of assistant and vector store (and hosted files in the store).""" + if resources is None: + return + + try: + client.beta.assistants.delete(resources.assistant_id) + logger.debug("Deleted assistant %s", resources.assistant_id) + except Exception as exc: # noqa: BLE001 — cleanup must not raise + logger.warning("Could not delete assistant %s: %s", resources.assistant_id, exc) + + try: + listed = client.vector_stores.files.list(vector_store_id=resources.vector_store_id) + file_entries = getattr(listed, "data", None) + if file_entries is None: + file_entries = list(listed) + for vs_file in file_entries: + fid = getattr(vs_file, "id", None) + if not fid: + continue + try: + client.files.delete(fid) + logger.debug("Deleted file %s from vector store", fid) + except Exception as exc: # noqa: BLE001 + logger.warning("Could not delete file %s: %s", fid, exc) + except Exception as exc: # noqa: BLE001 + logger.warning("Could not list vector store files for %s: %s", resources.vector_store_id, exc) + + try: + client.vector_stores.delete(resources.vector_store_id) + logger.debug("Deleted vector store %s", resources.vector_store_id) + except Exception as exc: # noqa: BLE001 + logger.warning("Could not delete vector store %s: %s", resources.vector_store_id, exc) diff --git a/scripts/rst_utils.py b/scripts/rst_utils.py index 6599f7a97d9..abb2e6eb0a4 100644 --- a/scripts/rst_utils.py +++ b/scripts/rst_utils.py @@ -1,5 +1,6 @@ """ -Utilities for editing reStructuredText source, in particular ``.. meta::`` directives. +Utilities for editing reStructuredText source, in particular ``.. meta::`` and +``.. short-description::`` directives. """ import logging @@ -124,3 +125,154 @@ def inject_metadata_to_content(content: str, metadata: dict[str, str]) -> tuple[ return new_content, True + +def _find_short_description_block(content: str) -> tuple[int, int, int, str, str]: + """ + Locate the first ``.. short-description::`` directive in RST source. + + Uses the same block-boundary rules as ``_find_meta_block``: the body is + contiguous indented lines until a blank line or a line starting at column 0. + + Returns: + Tuple of ``(start, marker_end, block_end, inner, indent)``. + If no directive is found, ``start``, ``marker_end``, and ``block_end`` + are ``-1``, ``inner`` is ``''``, and ``indent`` defaults to three spaces. + """ + match = re.search(r"^\.\.\s+short-description::\s*\n", content, re.MULTILINE) + if not match: + return -1, -1, -1, "", " " + + start = match.start() + marker_end = match.end() + indent = " " + inner_parts: list[str] = [] + consumed = 0 + remainder = content[marker_end:] + + for line in remainder.splitlines(keepends=True): + if line.strip() == "": + break + if not line.startswith((" ", "\t")): + break + if not inner_parts: + ws_len = len(line) - len(line.lstrip(" \t")) + indent = line[:ws_len] + inner_parts.append(line) + consumed += len(line) + + block_end = marker_end + consumed + inner = "".join(inner_parts) + if inner and not inner.endswith("\n"): + inner += "\n" + return start, marker_end, block_end, inner, indent + + +def _short_description_inner_has_content(inner: str) -> bool: + """True when the directive body contains non-whitespace text.""" + for line in inner.splitlines(): + if line.strip(): + return True + return False + + +def has_short_description_content(content: str) -> bool: + """ + Return whether the document already has a non-empty ``.. short-description::`` body. + """ + _s, _m, _b, inner, _i = _find_short_description_block(content) + return _short_description_inner_has_content(inner) + + +def get_short_description_body(content: str) -> str | None: + """ + Return the normalised inner body text of the first ``.. short-description::`` block. + + Returns ``None`` if the directive is missing or the body is empty. + """ + _s, _m, _b, inner, _i = _find_short_description_block(content) + if not _short_description_inner_has_content(inner): + return None + paragraphs: list[str] = [] + current: list[str] = [] + for line in inner.splitlines(): + stripped = line.strip() + if not stripped: + if current: + paragraphs.append(" ".join(current)) + current = [] + continue + current.append(stripped) + if current: + paragraphs.append(" ".join(current)) + return "\n\n".join(paragraphs) if paragraphs else None + + +def _format_short_description_inner(text: str, indent: str) -> str: + """Turn model output into RST directive body lines (indented paragraphs).""" + chunks = [p.strip() for p in text.split("\n\n") if p.strip()] + lines_out: list[str] = [] + for i, para in enumerate(chunks): + for line in para.split("\n"): + s = line.strip() + if s: + lines_out.append(f"{indent}{s}\n") + if i < len(chunks) - 1: + lines_out.append(f"{indent}\n") + return "".join(lines_out) + + +def _find_insertion_point_after_title(content: str) -> int: + """ + Return the index in ``content`` immediately after the first document title block. + + A title block is a non-blank text line followed by a line of ``=``, ``-``, or ``~`` + underline characters (classic reStructuredText transition marker). + If no title is found, returns ``0``. + """ + lines = content.splitlines(keepends=True) + i = 0 + while i + 1 < len(lines): + title_line = lines[i] + underline_line = lines[i + 1] + title_stripped = title_line.strip() + ul_match = re.match(r"^([=\-~]+)\s*$", underline_line.rstrip("\n")) + if title_stripped and ul_match is not None: + ul = ul_match.group(1) + if len(ul) >= len(title_stripped): + pos = 0 + for j in range(i + 2): + pos += len(lines[j]) + return pos + i += 1 + return 0 + + +def inject_short_description_to_content(content: str, text: str) -> tuple[str, bool]: + """ + Insert or fill the first ``.. short-description::`` directive with the given prose. + + If the directive exists and already has body text, logs a warning and returns + the original content unchanged. If the directive exists with an empty body, + fills the body. If the directive is missing, inserts a new block after the + first detected document title (or at the start of the file if none). + + Returns: + Updated source and whether any change was made. + """ + start, marker_end, block_end, inner, indent = _find_short_description_block(content) + new_inner = _format_short_description_inner(text, indent) + + if start >= 0: + if _short_description_inner_has_content(inner): + logger.warning( + "Existing .. short-description:: body has content; skipping replacement", + ) + return content, False + new_content = content[:marker_end] + new_inner + content[block_end:] + return new_content, True + + insert_at = _find_insertion_point_after_title(content) + block = f"\n.. short-description::\n{new_inner}\n" + new_content = content[:insert_at] + block + content[insert_at:] + return new_content, True + diff --git a/scripts/test/test_enhance_topics.py b/scripts/test/test_enhance_topics.py index 4b8bdbd64aa..bb3703701c4 100644 --- a/scripts/test/test_enhance_topics.py +++ b/scripts/test/test_enhance_topics.py @@ -13,7 +13,9 @@ analyze_files, update_meta_files, enhance_metadata, - MAX_CONTENT_LENGTH + enhance_short_descriptions, + _metadata_enhancement_task, + MAX_CONTENT_LENGTH, ) from enhance_data import EnhanceData @@ -99,10 +101,10 @@ def test_analyze_files_basic_flow( ) files = ["file1.rst"] - prompts = {"description": "desc prompt"} - + tasks = [_metadata_enhancement_task("description", "desc prompt")] + with patch("builtins.open", mock_open(read_data="File content")): - analyze_files(files, mock_client, prompts) + analyze_files(files, mock_client, tasks) mock_analyze.assert_called_once() mock_validate.assert_called_once() @@ -111,14 +113,14 @@ def test_analyze_files_basic_flow( @patch('enhance_topics.get_meta_names_from_content') def test_analyze_files_skips_existing_meta(mock_get_meta, mock_client): """Test that files with existing metadata are skipped.""" - mock_get_meta.return_value = ["description"] # Description already exists - + mock_get_meta.return_value = {"description"} + files = ["file1.rst"] - prompts = {"description": "desc prompt"} - + tasks = [_metadata_enhancement_task("description", "desc prompt")] + with patch("builtins.open", mock_open(read_data="File content")): with patch('enhance_topics.analyze_content') as mock_analyze: - analyze_files(files, mock_client, prompts) + analyze_files(files, mock_client, tasks) mock_analyze.assert_not_called() # --- Tests for update_meta_files --- @@ -189,3 +191,39 @@ def test_enhance_metadata_orchestration(mock_update, mock_analyze, mock_get_clie mock_get_client.assert_called_once() mock_analyze.assert_called_once() mock_update.assert_called_once() + + +@patch("enhance_topics.cleanup_short_description_resources") +@patch("enhance_topics.update_enhanced_files") +@patch("enhance_topics.analyze_files") +@patch("enhance_topics.create_short_description_assistant") +@patch("enhance_topics.ensure_example_vector_store") +@patch("enhance_topics.get_openai_client") +def test_enhance_short_descriptions_orchestration( + mock_get_client, + mock_ensure_vs, + mock_create_asst, + mock_analyze, + mock_update, + mock_cleanup, +): + """Short-description path creates VS + assistant, analyses, updates, and cleans up.""" + mock_client = MagicMock() + mock_get_client.return_value = mock_client + mock_ensure_vs.return_value = "vs_1" + mock_create_asst.return_value = "asst_1" + empty = EnhanceData(results={}, updated_files=set()) + mock_analyze.return_value = empty + mock_update.return_value = empty + + enhance_short_descriptions(["article.rst"]) + + mock_ensure_vs.assert_called_once() + mock_create_asst.assert_called_once() + mock_analyze.assert_called_once() + mock_update.assert_called_once() + mock_cleanup.assert_called_once() + res = mock_cleanup.call_args[0][1] + assert res is not None + assert res.assistant_id == "asst_1" + assert res.vector_store_id == "vs_1" diff --git a/scripts/test/test_rst_utils_short_description.py b/scripts/test/test_rst_utils_short_description.py new file mode 100644 index 00000000000..ec4c528edce --- /dev/null +++ b/scripts/test/test_rst_utils_short_description.py @@ -0,0 +1,86 @@ +"""Tests for ``.. short-description::`` helpers in ``rst_utils``.""" + +import os +import sys + +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from rst_utils import ( + get_short_description_body, + has_short_description_content, + inject_short_description_to_content, +) + + +def test_has_short_description_content_false_when_missing() -> None: + src = "Title\n=====\n\nBody.\n" + assert has_short_description_content(src) is False + + +def test_has_short_description_content_true_when_populated() -> None: + src = """Title +===== + +.. short-description:: + First sentence here. + Second sentence here. + +Next section +------------ +""" + assert has_short_description_content(src) is True + + +def test_get_short_description_body_normalises() -> None: + src = """.. short-description:: + Line one continued + same paragraph. + New paragraph line. + +Body. +""" + body = get_short_description_body(src) + assert body is not None + assert "Line one" in body + + +def test_inject_fills_empty_directive() -> None: + src = """Title +===== + +.. short-description:: + +**Area:** x + +""" + new_src, changed = inject_short_description_to_content(src, "One paragraph.\n\nTwo paragraph.") + assert changed is True + assert "One paragraph." in new_src + assert ".. short-description::" in new_src + assert "**Area:**" in new_src + + +def test_inject_skips_when_body_present() -> None: + src = """Title +===== + +.. short-description:: + Original text. + +""" + new_src, changed = inject_short_description_to_content(src, "New prose.") + assert changed is False + assert new_src == src + + +def test_inject_inserts_after_title_when_missing() -> None: + src = """My Doc +====== + +Some intro text. +""" + new_src, changed = inject_short_description_to_content(src, "Intro summary.") + assert changed is True + assert ".. short-description::" in new_src + assert "Intro summary." in new_src + assert new_src.index("======") < new_src.index(".. short-description::") From 795f1a00a95d987e4a1bf522dc2a62038ba2fa83 Mon Sep 17 00:00:00 2001 From: 3di-techx Date: Tue, 19 May 2026 07:40:30 +0000 Subject: [PATCH 27/59] update RT Corporation url to fix build --- source/The-ROS2-Project/Adopters/adopters.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/The-ROS2-Project/Adopters/adopters.yaml b/source/The-ROS2-Project/Adopters/adopters.yaml index 30fd1772086..a064c11f8f1 100644 --- a/source/The-ROS2-Project/Adopters/adopters.yaml +++ b/source/The-ROS2-Project/Adopters/adopters.yaml @@ -95,7 +95,7 @@ adopters: description: "Development platform for autonomous mobile robots across logistics, construction, and retail." - organization: "RT Corporation" - organization_url: "https://rt-net.jp" + organization_url: "https://en.rt-net.jp" project: "CRANE-X7" project_url: "https://github.com/rt-net/crane_x7_ros" domain: From e504a359a924e9851406e59141964445cc8fca6f Mon Sep 17 00:00:00 2001 From: Kacper Bojakowski Date: Wed, 20 May 2026 02:41:57 +0200 Subject: [PATCH 28/59] Fix proxy --- conf.py | 13 +- source/_static/related_packages.js | 6 + tools/rosdistro_cache_proxy.py | 23 +++- tools/serve_docs_with_proxy.py | 199 +++++++++++++++++++++++++++++ 4 files changed, 235 insertions(+), 6 deletions(-) create mode 100644 tools/serve_docs_with_proxy.py diff --git a/conf.py b/conf.py index 1be0735f286..cbff75e65b4 100644 --- a/conf.py +++ b/conf.py @@ -190,11 +190,14 @@ ] # Optional runtime proxy endpoint for freshest rosdistro cache data. -# Use same-origin endpoint to avoid browser CORS limits, for example: -# ros_related_packages_proxy_url = '/api/rosdistro-cache/{distro}-cache.yaml.gz' -# For local testing you can export environment variable: -# ROS_RELATED_PACKAGES_PROXY_URL=http://127.0.0.1:9000/api/rosdistro-cache/{distro}-cache.yaml.gz -# Leave empty to skip proxy and use bundled _static fallback. +# Use a same-origin path (recommended), for example: +# /api/rosdistro-cache/{distro}-cache.yaml.gz +# Local testing: build with that path, then serve with: +# python tools/serve_docs_with_proxy.py +# (serves build/html and /api/... on http://127.0.0.1:8000). +# Separate-port proxy (http://127.0.0.1:9001/...) works only if that process is +# healthy; otherwise the bundled _static fallback is used (see browser console). +# Leave empty to skip proxy and use bundled _static fallback only. ros_related_packages_proxy_url = os.environ.get('ROS_RELATED_PACKAGES_PROXY_URL', '') # -- Options for HTMLHelp output ------------------------------------------ diff --git a/source/_static/related_packages.js b/source/_static/related_packages.js index 26218254597..f28638c0eae 100644 --- a/source/_static/related_packages.js +++ b/source/_static/related_packages.js @@ -175,12 +175,18 @@ if (!xmls || typeof xmls !== 'object') { throw new Error('release_package_xmls missing in rosdistro cache'); } + if (typeof console !== 'undefined' && console.info) { + console.info('related_packages: loaded rosdistro cache from', url); + } return /** @type {Record} */ (xmls); }) .catch(function (err) { if (timer) { clearTimeout(timer); } + if (typeof console !== 'undefined' && console.warn) { + console.warn('related_packages: failed', url, err); + } /* Try next URL (e.g. bundled 404 then HTTPS remote — remote may hit CORS). */ return next(err); }); diff --git a/tools/rosdistro_cache_proxy.py b/tools/rosdistro_cache_proxy.py index 6074587c677..f0334156de3 100644 --- a/tools/rosdistro_cache_proxy.py +++ b/tools/rosdistro_cache_proxy.py @@ -20,9 +20,11 @@ import argparse import gzip import re +import socket import traceback import time import urllib.error +import urllib.parse import urllib.request from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer from typing import Dict, Tuple @@ -73,7 +75,8 @@ def do_OPTIONS(self) -> None: # noqa: N802 (BaseHTTPRequestHandler interface) def do_GET(self) -> None: # noqa: N802 (BaseHTTPRequestHandler interface) try: - match = PATH_RE.match(self.path) + path = urllib.parse.urlparse(self.path).path + match = PATH_RE.match(path) if not match: self.send_error(404, 'Unknown path') return @@ -135,6 +138,22 @@ def _fetch_upstream(self, distro: str) -> bytes: return payload +def _assert_port_free(host: str, port: int) -> None: + """Fail fast when another local server already owns the port (common on Windows).""" + probe = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + try: + if hasattr(socket, 'SO_EXCLUSIVEADDRUSE'): + probe.setsockopt(socket.SOL_SOCKET, socket.SO_EXCLUSIVEADDRUSE, 1) + probe.bind((host, port)) + except OSError as exc: + raise SystemExit( + f'Port {port} on {host} is already in use ({exc}).\n' + 'Stop leftover python/http.server processes or pass --port with a free value.' + ) from exc + finally: + probe.close() + + def main() -> None: parser = argparse.ArgumentParser(description='Local proxy for rosdistro cache gz files.') parser.add_argument('--host', default='127.0.0.1', help='Listen host (default: 127.0.0.1)') @@ -153,6 +172,8 @@ def main() -> None: ) args = parser.parse_args() + _assert_port_free(args.host, args.port) + cache = CacheStore(ttl_seconds=args.cache_ttl) class ConfiguredProxyHandler(ProxyHandler): diff --git a/tools/serve_docs_with_proxy.py b/tools/serve_docs_with_proxy.py new file mode 100644 index 00000000000..922d09fba81 --- /dev/null +++ b/tools/serve_docs_with_proxy.py @@ -0,0 +1,199 @@ +#!/usr/bin/env python3 +"""Serve built Sphinx HTML and rosdistro cache API on one origin (local testing). + +Use this instead of ``python -m http.server`` when testing proxy-first related +packages. The browser can fetch ``/api/rosdistro-cache/-cache.yaml.gz`` +same-origin (no cross-port CORS quirks). + +Build docs with:: + + set ROS_RELATED_PACKAGES_PROXY_URL=/api/rosdistro-cache/{distro}-cache.yaml.gz + make html + +Then run:: + + python tools/serve_docs_with_proxy.py + +Open http://127.0.0.1:8000/... and check DevTools Network for a 200 on +``/api/rosdistro-cache/rolling-cache.yaml.gz``. +""" + +from __future__ import annotations + +import argparse +import gzip +import re +import socket +import urllib.error +import urllib.parse +import urllib.request +from http.server import SimpleHTTPRequestHandler, ThreadingHTTPServer +from pathlib import Path + +UPSTREAM_TEMPLATE = 'https://repo.ros2.org/rosdistro_cache/{distro}-cache.yaml.gz' +DISTRO_RE = re.compile(r'^[a-z0-9][a-z0-9_-]*$', re.IGNORECASE) +PATH_RE = re.compile(r'^/api/rosdistro-cache/([a-z0-9_-]+)-cache\.yaml\.gz$', re.IGNORECASE) + + +class CacheStore: + """Simple in-memory TTL cache for gzip bytes by distro.""" + + def __init__(self, ttl_seconds: int) -> None: + import time + + self._time = time + self._ttl = max(0, ttl_seconds) + self._data: dict[str, tuple[float, bytes]] = {} + + def get(self, distro: str) -> bytes | None: + record = self._data.get(distro) + if record is None: + return None + expires_at, payload = record + if self._time.time() >= expires_at: + self._data.pop(distro, None) + return None + return payload + + def put(self, distro: str, payload: bytes) -> None: + self._data[distro] = (self._time.time() + self._ttl, payload) + + +def _fetch_upstream(distro: str, timeout_seconds: int) -> bytes: + url = UPSTREAM_TEMPLATE.format(distro=distro) + request = urllib.request.Request(url, headers={'User-Agent': 'ros2-docs-cache-proxy/1.0'}) + with urllib.request.urlopen(request, timeout=timeout_seconds) as response: + payload = response.read() + try: + gzip.decompress(payload) + except OSError as exc: + raise ValueError('response is not valid gzip') from exc + return payload + + +class DocsWithProxyHandler(SimpleHTTPRequestHandler): + """Static files from *directory*; ``/api/rosdistro-cache/...`` proxied upstream.""" + + cache: CacheStore + upstream_timeout: int + + def _send_cors_headers(self) -> None: + self.send_header('Access-Control-Allow-Origin', '*') + self.send_header('Access-Control-Allow-Methods', 'GET, OPTIONS') + self.send_header('Access-Control-Allow-Headers', 'Content-Type') + + def do_OPTIONS(self) -> None: # noqa: N802 + path = urllib.parse.urlparse(self.path).path + if PATH_RE.match(path): + self.send_response(204) + self._send_cors_headers() + self.end_headers() + return + super().do_OPTIONS() + + def do_GET(self) -> None: # noqa: N802 + path = urllib.parse.urlparse(self.path).path + match = PATH_RE.match(path) + if not match: + return super().do_GET() + + distro = match.group(1).lower() + if not DISTRO_RE.match(distro): + self.send_error(400, 'Invalid distro name') + return + + payload = self.cache.get(distro) + if payload is None: + try: + payload = _fetch_upstream(distro, self.upstream_timeout) + except urllib.error.HTTPError as exc: + self.send_error(exc.code, f'Upstream HTTP error: {exc.reason}') + return + except urllib.error.URLError as exc: + self.send_error(502, f'Upstream URL error: {exc.reason}') + return + except TimeoutError: + self.send_error(504, 'Upstream timeout') + return + except ValueError as exc: + self.send_error(502, f'Bad upstream payload: {exc}') + return + self.cache.put(distro, payload) + + self.send_response(200) + self._send_cors_headers() + self.send_header('Content-Type', 'application/gzip') + self.send_header('Cache-Control', 'public, max-age=300') + self.send_header('Content-Length', str(len(payload))) + self.end_headers() + self.wfile.write(payload) + + def log_message(self, fmt: str, *args) -> None: + super().log_message('[docs+proxy] ' + fmt, *args) + + +def _assert_port_free(host: str, port: int) -> None: + """Fail fast when another local server already owns the port (common on Windows).""" + probe = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + try: + if hasattr(socket, 'SO_EXCLUSIVEADDRUSE'): + probe.setsockopt(socket.SOL_SOCKET, socket.SO_EXCLUSIVEADDRUSE, 1) + probe.bind((host, port)) + except OSError as exc: + raise SystemExit( + f'Port {port} on {host} is already in use ({exc}).\n' + 'Stop leftover python/http.server processes or pass --port with a free value.' + ) from exc + finally: + probe.close() + + +def main() -> None: + repo = Path(__file__).resolve().parents[1] + default_html = repo / 'build' / 'html' + + parser = argparse.ArgumentParser( + description='Serve build/html and /api/rosdistro-cache/ on one port.', + ) + parser.add_argument('--host', default='127.0.0.1', help='Listen host (default: 127.0.0.1)') + parser.add_argument('--port', type=int, default=8000, help='Listen port (default: 8000)') + parser.add_argument( + '--directory', + type=Path, + default=default_html, + help=f'HTML root (default: {default_html})', + ) + parser.add_argument('--cache-ttl', type=int, default=300, help='Proxy cache TTL seconds') + parser.add_argument('--upstream-timeout', type=int, default=20, help='Upstream timeout seconds') + args = parser.parse_args() + + html_dir = args.directory.resolve() + if not html_dir.is_dir(): + raise SystemExit(f'HTML directory not found: {html_dir}\nRun make html first.') + + _assert_port_free(args.host, args.port) + + cache_store = CacheStore(ttl_seconds=args.cache_ttl) + html_dir_str = str(html_dir) + + class ConfiguredHandler(DocsWithProxyHandler): + """Handler with shared cache and HTML root.""" + + def __init__(self, request, client_address, server): + super().__init__(request, client_address, server, directory=html_dir_str) + + ConfiguredHandler.cache = cache_store + ConfiguredHandler.upstream_timeout = args.upstream_timeout + + server = ThreadingHTTPServer((args.host, args.port), ConfiguredHandler) + print(f'Serving {html_dir}') + print(f'Open http://{args.host}:{args.port}/') + print('API: /api/rosdistro-cache/-cache.yaml.gz') + print( + 'Build with: ROS_RELATED_PACKAGES_PROXY_URL=/api/rosdistro-cache/{distro}-cache.yaml.gz' + ) + server.serve_forever() + + +if __name__ == '__main__': + main() From 8f1c64c5c5dc3c947ccc97a1e1870634ed086b1d Mon Sep 17 00:00:00 2001 From: Kacper Bojakowski Date: Wed, 20 May 2026 10:52:53 +0200 Subject: [PATCH 29/59] Update related_packages.js --- source/_static/related_packages.js | 33 ++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/source/_static/related_packages.js b/source/_static/related_packages.js index f28638c0eae..8d7880e8f78 100644 --- a/source/_static/related_packages.js +++ b/source/_static/related_packages.js @@ -225,6 +225,37 @@ return false; } + /** + * @param {string} xmlStr + * @returns {string} + */ + function extractDescription(xmlStr) { + if (typeof DOMParser !== 'undefined') { + try { + var doc = new DOMParser().parseFromString(xmlStr, 'application/xml'); + var parseErr = doc.getElementsByTagName('parsererror'); + if (!parseErr.length) { + var nodes = doc.getElementsByTagName('description'); + if (nodes.length && nodes[0].textContent) { + return nodes[0].textContent.replace(/\s+/g, ' ').trim(); + } + } + } catch (err) { + /* Fall through to regex extraction. */ + } + } + + var match = /]*>([\s\S]*?)<\/description>/i.exec(xmlStr); + if (!match) { + return ''; + } + return match[1] + .replace(//g, '$1') + .replace(/<[^>]*>/g, ' ') + .replace(/\s+/g, ' ') + .trim(); + } + /** * @param {string} distro * @param {string} pkg @@ -285,10 +316,12 @@ var pkg = picked[j]; var li = document.createElement('li'); var a = document.createElement('a'); + var description = extractDescription(xmls[pkg] || ''); a.href = docsPackageUrl(distro, pkg); a.textContent = pkg; a.rel = 'noopener noreferrer'; li.appendChild(a); + li.appendChild(document.createTextNode(': ' + description)); ul.appendChild(li); } From 5886634ad55f6672d7935ad37f146d18427db4b7 Mon Sep 17 00:00:00 2001 From: 3di-techx Date: Wed, 20 May 2026 10:36:48 +0000 Subject: [PATCH 30/59] add page find --- .github/workflows/test.yml | 18 + Makefile | 16 +- README.md | 35 ++ conf.py | 26 +- plugins/meta_util.py | 70 ++++ plugins/pagefind_meta.py | 261 ++++++++++++ plugins/showmeta.py | 120 ++++++ requirements.txt | 4 + source/About-ROS.rst | 13 +- source/_static/pagefind-docsearch.css | 219 ++++++++++ source/_templates/layout.html | 9 + source/_templates/search.html | 37 ++ source/_templates/searchbox.html | 573 ++++++++++++++++++++++++++ 13 files changed, 1398 insertions(+), 3 deletions(-) create mode 100644 plugins/meta_util.py create mode 100644 plugins/pagefind_meta.py create mode 100644 plugins/showmeta.py create mode 100644 source/_static/pagefind-docsearch.css create mode 100644 source/_templates/layout.html create mode 100644 source/_templates/search.html create mode 100644 source/_templates/searchbox.html diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 5cea1c262d8..8b40cfe0cca 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -78,6 +78,15 @@ jobs: - name: Build the docs run: make html + + - name: Setup Node.js (Pagefind) + uses: actions/setup-node@v4 + with: + node-version: '20' + + - name: Index HTML with Pagefind + run: make pagefind + - name: Upload document artifacts uses: actions/upload-artifact@v4 id: artifact-upload-step @@ -147,3 +156,12 @@ jobs: - name: Build the docs run: make multiversion + + + - name: Setup Node.js (Pagefind) + uses: actions/setup-node@v4 + with: + node-version: '20' + + - name: Index HTML with Pagefind + run: make pagefind diff --git a/Makefile b/Makefile index f411c155a1e..8f76cc64c69 100644 --- a/Makefile +++ b/Makefile @@ -21,6 +21,20 @@ multiversion: Makefile @echo "" > build/html/index.html $(PYTHON) make_sitemapindex.py +# Pagefind static search index (requires Node.js / npx). Run after html or multiversion. +PAGEFIND_VERSION ?= 1.5.2 +pagefind: + npx -y pagefind@$(PAGEFIND_VERSION) --site "$(OUT)/html" + + +# Convenience: Sphinx build + Pagefind index (does not replace plain html / multiversion). +html-search: + $(MAKE) html + $(MAKE) pagefind + +multiversion-search: multiversion + $(MAKE) pagefind + %: Makefile @$(BUILD) -M $@ "$(SOURCE)" "$(OUT)" $(OPTS) @@ -64,4 +78,4 @@ linkcheck: @echo @echo "Check finished. Report is in $(LINKCHECKDIR)." -.PHONY: help Makefile multiversion test test-tools linkcheck lint spellcheck check-dictionaries sort-dictionaries +.PHONY: help Makefile multiversion pagefind test test-tools linkcheck lint spellcheck check-dictionaries sort-dictionaries diff --git a/README.md b/README.md index 7c613ccbf1f..11f8010e39c 100644 --- a/README.md +++ b/README.md @@ -71,6 +71,41 @@ To test building the multisite version deployed to the website use: **NB:** This will ignore local workspace changes and build from the branches. +### Pagefind search index + +After `make html` or `make multiversion`, run [Pagefind](https://pagefind.app/) so the built HTML under `build/html` is indexed and `build/html/pagefind/` is written (search bundle and Component UI assets). From the repo root: + +`make pagefind` + +Or use convenience targets that run Sphinx and Pagefind in one step: + +- `make html-search` — `make html` then `make pagefind` +- `make multiversion-search` — `make multiversion` then `make pagefind` + +Plain `make html` and `make multiversion` do **not** run Pagefind (Node.js is only required when you index search). + +This requires **Node.js** (for `npx`). Pin the CLI with `PAGEFIND_VERSION` in the Makefile if needed. + +To preview search locally, serve the site over HTTP (Pagefind may not load from `file://`), for example from the repo root: + +`python -m http.server 8000 --directory build/html` + +Then open `http://localhost:8000/` in a browser. + +#### Search results page verification + +After `make html` and `make pagefind`, serve `build/html` over HTTP and check: + +1. **Direct URL** — Open `http://localhost:8000/search.html?q=tutorial` (or the same path under a distro prefix for multiversion builds). The input should show the query and results should load (not stay empty or skeleton-only). +2. **Modal redirect** — From a nested page (e.g. a tutorial), open the sidebar search modal (Ctrl/Cmd+K), type a term, press Enter. You should land on the search page with `?q=` set and matching results visible. +3. **Empty query** — Open `search.html` with no `q` parameter. The page should load without errors; no search is run until you type in the input. +4. **Result metadata** — Search for `Ubuntu deb` and open a result card. Metadata labels (e.g. Area, Content Type, Experience) should match that page’s `` `` tags from its `.. meta::` block (e.g. `area: installation` on the Ubuntu deb install page), not URL-path guesses. + +In DevTools Network, confirm `pagefind/` bundle requests return 200 (not 404). + +The production [Jenkins doc job](https://build.ros.org/job/doc_ros2doc) should run the same `pagefind` step on `build/html` after Sphinx so deployed pages include the search bundle. + + ### Note for Windows (WSL) Users When building the documentation on windows using WSL, it is recommended to clone and work with this repository inside the Linux filesystem (for example, under `/home//`) rather than under `/mnt/c`. diff --git a/conf.py b/conf.py index 2a9def973fd..f172f9d4674 100644 --- a/conf.py +++ b/conf.py @@ -89,6 +89,29 @@ 'sphinx_adopters', 'sphinxcontrib.googleanalytics', 'sphinxcontrib.mermaid', + 'pagefind_meta', + 'showmeta', +] + +pagefind_merge_enabled = False +pagefind_merge_package_pkgs = [] +pagefind_merge_index_base = 'https://docs.ros.org' +pagefind_merge_index_overrides = {} +pagefind_merge_filter_per_pkg = None +pagefind_merge_index_weight_per_pkg = None + +pagefind_filter_labels = { + 'contentType': 'Content type', +} + +pagefind_result_meta_order = [ + 'product', + 'distro', + 'area', + 'capability', + 'contentType', + 'experience', + ] # Intersphinx mapping @@ -168,6 +191,7 @@ 'DISTRO_TITLE': 'Rolling', 'DISTRO_TITLE_FULL': 'Rolling Ridley', 'REPOS_FILE_BRANCH': 'rolling', + 'PRODUCT': 'ROS 2', } html_favicon = 'favicon.ico' @@ -181,7 +205,7 @@ html_sourcelink_suffix = '' # Relative to html_static_path -html_css_files = ['custom.css', 'adopters.css'] +html_css_files = ['custom.css', 'adopters.css', 'pagefind-docsearch.css'] html_js_files = ['adopters.js'] # -- Options for HTMLHelp output ------------------------------------------ diff --git a/plugins/meta_util.py b/plugins/meta_util.py new file mode 100644 index 00000000000..32aef4d2f3b --- /dev/null +++ b/plugins/meta_util.py @@ -0,0 +1,70 @@ +# Copyright 2026 Open Robotics — shared helpers for ``.. meta::`` / Pagefind +""" +Collect every ``.. meta::`` field from the doctree, sanitize keys, and expand +``{MACRO}`` placeholders using the Sphinx ``macros`` config (longest keys first). + +Sphinx / the HTML theme may also emit plain ```` tags for the same fields. +The Pagefind extension emits additional tags with ``data-pagefind-filter`` and may +split comma-separated values into multiple tags for faceted search. +""" + +from __future__ import annotations + +import re +from typing import Dict, List, Optional + +from docutils import nodes + +# HTML ```` names should be conservative; allow common patterns. +_META_NAME_RE = re.compile(r'^[A-Za-z0-9_.:-]+$') + + +def sanitize_meta_key(raw: str) -> Optional[str]: + s = str(raw).strip() + if not s or not _META_NAME_RE.match(s): + return None + return s + + +def all_doctree_meta(doctree: Optional[nodes.document]) -> Dict[str, str]: + """Return last-wins mapping of every ``nodes.meta`` ``name``/``property`` → ``content``.""" + if doctree is None: + return {} + + out: Dict[str, str] = {} + for meta in doctree.findall(nodes.meta): + if meta.get('http-equiv'): + continue + content = meta.get('content') + if not content: + continue + key: Optional[str] = None + name = meta.get('name') + if name: + key = sanitize_meta_key(str(name)) + else: + prop = meta.get('property') + if prop: + key = sanitize_meta_key(str(prop)) + if not key: + continue + out[key] = str(content).strip() + return out + + +def expand_meta_macros(text: str, macros: Dict[str, str]) -> str: + """Expand ``{KEY}`` placeholders; longer macro names first to avoid partial matches.""" + result = text + for key, value in sorted(macros.items(), key=lambda kv: len(kv[0]), reverse=True): + result = result.replace(f'{{{key}}}', value) + return result + + +def expand_all_meta_values(meta: Dict[str, str], macros: Dict[str, str]) -> Dict[str, str]: + """Apply ``expand_meta_macros`` to every meta value.""" + return {k: expand_meta_macros(v, macros) for k, v in meta.items()} + + +def split_meta_values(value: str) -> List[str]: + """Return comma-separated metadata values as individual Pagefind values.""" + return [part.strip() for part in value.split(',') if part.strip()] diff --git a/plugins/pagefind_meta.py b/plugins/pagefind_meta.py new file mode 100644 index 00000000000..e5fc1188949 --- /dev/null +++ b/plugins/pagefind_meta.py @@ -0,0 +1,261 @@ +# Copyright 2026 Open Robotics — Pagefind metadata for ROS 2 documentation +""" +Emit SEO tags, Pagefind ``data-pagefind-meta``, and ``data-pagefind-filter`` +from every ``.. meta::`` field on the page (passthrough, no whitelist). + +Sphinx / the HTML theme typically also emits plain ```` tags for the same +``.. meta::`` fields. We intentionally emit an additional block with +``data-pagefind-filter`` (and split comma-separated values) so Pagefind faceting +works; crawlers may see duplicate name/content pairs for non-split fields. +""" + +from __future__ import annotations + +import html +import re +from pathlib import PurePosixPath +from typing import Any, Dict, List, Optional, Tuple + +from docutils import nodes + +from meta_util import all_doctree_meta, expand_all_meta_values, split_meta_values + + +def _macros_flat(app) -> Dict[str, str]: + macros = getattr(app.config, 'macros', {}) or {} + return {str(k): str(v) for k, v in macros.items()} + + +def _resolved_page_meta(app, doctree: Optional[nodes.document]) -> Dict[str, str]: + raw = all_doctree_meta(doctree) + return expand_all_meta_values(raw, _macros_flat(app)) + + +def _default_filter_label(key: str) -> str: + spaced = re.sub(r'([a-z])([A-Z])', r'\1 \2', key) + return spaced.replace('_', ' ').replace('-', ' ').strip().title() + + +def _metadata_fields_for_keys(app, sorted_keys: List[str]) -> List[List[str]]: + labels = getattr(app.config, 'pagefind_filter_labels', None) or {} + out: List[List[str]] = [] + for k in sorted_keys: + if isinstance(labels, dict) and labels.get(k): + lbl = str(labels[k]) + else: + lbl = _default_filter_label(k) + out.append([k, lbl]) + return out + + +def _pagefind_data_meta_attr(values: Dict[str, str]) -> str: + """Single data-pagefind-meta attribute value with repeated keys for multi-values.""" + parts: List[str] = [] + for key in sorted(values.keys()): + for value in split_meta_values(values.get(key, '')): + parts.append(f'{key}:{value}') + inner = ', '.join(parts) + return html.escape(inner, quote=True) + + +def _seo_and_filter_metas(values: Dict[str, str]) -> str: + """One per value: SEO name/content + data-pagefind-filter (Pagefind filtering docs).""" + lines: List[str] = [] + for key in sorted(values.keys()): + esc_name = html.escape(key, quote=True) + for value in split_meta_values(values.get(key, '')): + esc_val = html.escape(value, quote=True) + lines.append( + f'' + ) + return '\n '.join(lines) + + +def _ensure_meta_keys_store(env) -> Dict[str, Any]: + if not hasattr(env, 'pagefind_meta_keys_by_doc'): + env.pagefind_meta_keys_by_doc = {} + return env.pagefind_meta_keys_by_doc + + +def _collect_meta_keys(app, doctree: nodes.document, docname: str) -> None: + if app.builder.format != 'html': + return + raw = all_doctree_meta(doctree) + store = _ensure_meta_keys_store(app.env) + store[docname] = set(raw.keys()) + + +def _purge_meta_keys(app, env, docname: str) -> None: + if hasattr(env, 'pagefind_meta_keys_by_doc') and docname in env.pagefind_meta_keys_by_doc: + del env.pagefind_meta_keys_by_doc[docname] + + +def _merge_meta_keys(app, env, docnames, other) -> None: + """Merge per-document meta key sets from a parallel read worker environment.""" + if not hasattr(other, 'pagefind_meta_keys_by_doc'): + return + store = _ensure_meta_keys_store(env) + for docname, keys in other.pagefind_meta_keys_by_doc.items(): + store[docname] = set(keys) + + +def _union_meta_keys(env) -> List[str]: + if not hasattr(env, 'pagefind_meta_keys_by_doc'): + return [] + union: set[str] = set() + for keys in env.pagefind_meta_keys_by_doc.values(): + union |= set(keys) + return sorted(union) + + +def _pagefind_bundle_prefix(app, pagename: str) -> str: + """Relative URL prefix from current HTML page to the site root ``pagefind/`` directory. + + Must start with ``./`` or ``../`` so the browser resolves dynamic imports (e.g. + ``import(bundlePath + 'pagefind.js')``) as URLs, not bare module specifiers. + + For ``sphinx-multiversion``, each distro is built with ``pagename`` relative to that + distro tree (e.g. ``index``), but HTML is served under ``/{smv_current_version}/``. + The Pagefind bundle lives at the site root (``build/html/pagefind/``), so add one + ``../`` when ``smv_current_version`` is set. + """ + builder = getattr(app, 'builder', None) + if builder is not None: + target_uri = builder.get_target_uri(pagename, typ='html') + depth = len(PurePosixPath(target_uri).parent.parts) + else: + depth = pagename.count('/') + + version = getattr(app.config, 'smv_current_version', '') or '' + if version: + depth += 1 + + if depth == 0: + return './pagefind/' + return ('../' * depth) + 'pagefind/' + + +def _pagefind_component_urls(app, pagename: str) -> Tuple[str, str]: + """(css_href, js_href) relative to current page.""" + prefix = _pagefind_bundle_prefix(app, pagename) + return prefix + 'pagefind-component-ui.css', prefix + 'pagefind-component-ui.js' + + +def _search_results_href(app, pagename: str) -> str: + """Relative URL from the current page to Sphinx's ``search.html``. + + Uses the HTML builder's relative URI helper so multiversion pages under + ``/{distro}/`` link to ``/{distro}/search.html``, not site-root + ``/search.html`` (which may be wrong after ``make multiversion``). + """ + builder = getattr(app, 'builder', None) + if builder is None: + return 'search.html' + try: + current = builder.get_target_uri(pagename, typ='html') + target = builder.get_target_uri('search', typ='html') + rel = builder.get_relative_uri(current, target) + if rel: + return rel + except (AttributeError, KeyError, ValueError): + pass + return 'search.html' + + +def _merge_index_entries(app, distro: str) -> List[Dict[str, Any]]: + """Build mergeIndex list from conf (pinned docs.ros.org template).""" + pkgs: List[str] = list(getattr(app.config, 'pagefind_merge_package_pkgs', []) or []) + if not pkgs or not getattr(app.config, 'pagefind_merge_enabled', False): + return [] + base = getattr(app.config, 'pagefind_merge_index_base', 'https://docs.ros.org').rstrip('/') + overrides = getattr(app.config, 'pagefind_merge_index_overrides', {}) or {} + out: List[Dict[str, Any]] = [] + for pkg in pkgs: + key = f'{distro}/{pkg}' + if key in overrides: + bundle = overrides[key] + else: + bundle = f'{base}/en/{distro}/p/{pkg}/pagefind' + entry: Dict[str, Any] = {'bundlePath': bundle} + mf = getattr(app.config, 'pagefind_merge_filter_per_pkg', None) + if isinstance(mf, dict) and pkg in mf: + entry['mergeFilter'] = mf[pkg] + iw = getattr(app.config, 'pagefind_merge_index_weight_per_pkg', None) + if isinstance(iw, dict) and pkg in iw: + entry['indexWeight'] = iw[pkg] + out.append(entry) + return out + + +def _html_page_context( + app, + pagename: str, + templatename: str, + context: Dict[str, Any], + doctree, +) -> None: + sorted_keys = _union_meta_keys(app.env) + metadata_fields = _metadata_fields_for_keys(app, sorted_keys) + filter_csv = ','.join(sorted_keys) + + empty = { + 'pagefind_seo_filter_metas': '', + 'pagefind_data_meta_attr': '', + 'pagefind_bundle_prefix': './pagefind/', + 'pagefind_component_css': './pagefind/pagefind-component-ui.css', + 'pagefind_component_js': './pagefind/pagefind-component-ui.js', + 'pagefind_merge_index': [], + 'pagefind_filter_keys_csv': filter_csv, + 'pagefind_metadata_fields': metadata_fields, + 'pagefind_result_meta_order': list( + getattr(app.config, 'pagefind_result_meta_order', []) or [] + ), + 'pagefind_search_results_href': 'search.html', + } + context.update(empty) + + if app.builder.format != 'html' or templatename is None: + return + if not templatename.endswith('.html'): + return + + default_distro = (getattr(app.config, 'macros', {}) or {}).get('DISTRO', 'rolling') + values = _resolved_page_meta(app, doctree) + + seo_filters = _seo_and_filter_metas(values) + data_attr = _pagefind_data_meta_attr(values) + css_href, js_href = _pagefind_component_urls(app, pagename) + bundle_prefix = _pagefind_bundle_prefix(app, pagename) + + merge_distro = values.get('distro') or str(default_distro) + merge = _merge_index_entries(app, merge_distro) + context['pagefind_seo_filter_metas'] = seo_filters + context['pagefind_data_meta_attr'] = data_attr + context['pagefind_bundle_prefix'] = bundle_prefix + context['pagefind_component_css'] = css_href + context['pagefind_component_js'] = js_href + context['pagefind_merge_index'] = merge + context['pagefind_search_results_href'] = _search_results_href(app, pagename) + + +def setup(app) -> Dict[str, Any]: + app.add_config_value('pagefind_merge_enabled', default=False, rebuild='html') + app.add_config_value('pagefind_merge_package_pkgs', default=[], rebuild='html') + app.add_config_value('pagefind_merge_index_base', default='https://docs.ros.org', rebuild='html') + app.add_config_value('pagefind_merge_index_overrides', default={}, rebuild='html') + app.add_config_value('pagefind_merge_filter_per_pkg', default=None, rebuild='html') + app.add_config_value('pagefind_merge_index_weight_per_pkg', default=None, rebuild='html') + app.add_config_value('pagefind_filter_labels', default={}, rebuild='html') + app.add_config_value('pagefind_result_meta_order', default=[], rebuild='html') + + app.connect('html-page-context', _html_page_context) + app.connect('doctree-resolved', _collect_meta_keys) + app.connect('env-purge-doc', _purge_meta_keys) + app.connect('env-merge-info', _merge_meta_keys) + + return { + 'parallel_read_safe': True, + 'parallel_write_safe': True, + 'version': '1.0.0', + } diff --git a/plugins/showmeta.py b/plugins/showmeta.py new file mode 100644 index 00000000000..f11b140429c --- /dev/null +++ b/plugins/showmeta.py @@ -0,0 +1,120 @@ +# Copyright 2026 Open Robotics — explicit in-body ``.. showmeta::`` summary +""" +Render selected ``.. meta::`` fields in the document body with author-controlled +order and labels. Place ``.. showmeta::`` where the summary should appear (HTML only). +""" + +from __future__ import annotations + +import html as html_module +import re +from typing import List + +from docutils import nodes +from docutils.parsers.rst import directives +from sphinx.util.docutils import SphinxDirective + +from meta_util import all_doctree_meta, expand_all_meta_values + + +def _macros_flat(app) -> dict[str, str]: + return {str(k): str(v) for k, v in (getattr(app.config, 'macros', {}) or {}).items()} + + +def _default_showmeta_label(key: str) -> str: + spaced = re.sub(r'([a-z])([A-Z])', r'\1 \2', key) + return spaced.replace('_', ' ').replace('-', ' ').strip().title() + + +class showmeta_node(nodes.General, nodes.Element): + """Placeholder replaced on ``doctree-resolved`` (HTML builds only).""" + + +class ShowMetaDirective(SphinxDirective): + """Insert a visible metadata line built from ``.. meta::`` on this page.""" + + has_content = False + option_spec = { + 'order': directives.unchanged, + 'labels': directives.unchanged, + } + + def run(self) -> List[nodes.Node]: + node = showmeta_node() + node['order'] = self.options.get('order', '') + node['labels'] = self.options.get('labels', '') + self.set_source_info(node) + return [node] + + +def visit_skip_showmeta(self, node: showmeta_node) -> None: + raise nodes.SkipNode + + +def depart_showmeta_noop(self, node: showmeta_node) -> None: + pass + + +def _parse_labels(raw: str) -> dict[str, str]: + out: dict[str, str] = {} + for part in [p.strip() for p in raw.split(',') if p.strip() and '=' in p]: + key, _, value = part.partition('=') + key, value = key.strip(), value.strip() + if key: + out[key] = value + return out + + +def replace_showmeta_nodes(app, doctree: nodes.document, docname: str) -> None: + if app.builder.format != 'html': + for node in list(doctree.findall(showmeta_node)): + node.parent.remove(node) + return + + macros = _macros_flat(app) + meta = expand_all_meta_values(all_doctree_meta(doctree), macros) + + for node in list(doctree.findall(showmeta_node)): + order = [x.strip() for x in node.get('order', '').split(',') if x.strip()] + labels_map = _parse_labels(node.get('labels', '')) + if not order: + node.parent.remove(node) + continue + + parts: List[str] = [] + for key in order: + val = meta.get(key, '').strip() + if not val: + continue + label_base = labels_map.get(key) or _default_showmeta_label(key) + label_display = label_base if label_base.rstrip().endswith(':') else f'{label_base}:' + parts.append( + f'{html_module.escape(label_display)} ' + f'{html_module.escape(val)}' + ) + + if not parts: + node.parent.remove(node) + else: + inner = ' | '.join(parts) + raw = nodes.raw( + '', + f'

{inner}

', + format='html', + ) + node.replace_self(raw) + + +def setup(app): + app.add_node( + showmeta_node, + html=(visit_skip_showmeta, depart_showmeta_noop), + latex=(visit_skip_showmeta, depart_showmeta_noop), + ) + app.add_directive('showmeta', ShowMetaDirective) + app.connect('doctree-resolved', replace_showmeta_nodes) + return { + 'version': '1.0.0', + 'parallel_read_safe': True, + 'parallel_write_safe': True, + } diff --git a/requirements.txt b/requirements.txt index 21c4c057505..71bd6e769fe 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,7 @@ +# Non-Python build dependency (install separately; used by `make pagefind`): +# Node.js 18+ with npx — https://nodejs.org/ +# Verify: node -v && npx -v + codespell doc8 docutils diff --git a/source/About-ROS.rst b/source/About-ROS.rst index 05fe7db14e9..9f8afece373 100644 --- a/source/About-ROS.rst +++ b/source/About-ROS.rst @@ -3,10 +3,21 @@ About ROS ========= +.. meta:: + :contentType: about + :experience: beginner + :area: framework, tools, capabilities + :capability: simulation + :distro: {DISTRO} + :product: {PRODUCT} + + ROS (Robot Operating System) is an open-source ecosystem that provides the framework, tools, and libraries for building, deploying, running, and maintaining robotic applications. This article introduces the main areas of the ecosystem and outlines their intended use. -**Area: ROS-framework, ROS-tools, ROS-capabilities | Content-type: about | Experience: beginner** +.. showmeta:: + :order: product, distro, area, capability, contentType, experience + :labels: product=Product, distro=Distribution, area=Area, capability=Capability, contentType=Content type, experience=Level .. contents:: Table of Contents :depth: 2 diff --git a/source/_static/pagefind-docsearch.css b/source/_static/pagefind-docsearch.css new file mode 100644 index 00000000000..1f507fc202d --- /dev/null +++ b/source/_static/pagefind-docsearch.css @@ -0,0 +1,219 @@ +/* DocSearch-like sidebar trigger for Pagefind modal (plan §3) */ +.ros2-pagefind-search { + margin: 0.5rem 0 1rem; +} + +.ros2-pagefind-search pagefind-modal-trigger { + display: block; + width: 100%; +} + +/* Light styling for the trigger button (Pagefind exposes light DOM button) */ +.ros2-pagefind-search pagefind-modal-trigger::part(button), +.ros2-pagefind-search button { + align-items: center; + background: var(--wy-menu-vertical-background-color, #fcfcfc); + border: 1px solid #ccc; + border-radius: 40px; + color: var(--wy-menu-vertical-color, #404040); + cursor: pointer; + display: flex; + font-size: 0.85rem; + gap: 0.35rem; + justify-content: space-between; + min-height: 2.25rem; + padding: 0.35rem 0.6rem 0.35rem 0.75rem; + text-align: left; + width: 100%; +} + +.ros2-pagefind-search pagefind-modal-trigger::part(button):hover, +.ros2-pagefind-search button:hover { + border-color: #999; + box-shadow: 0 1px 2px rgba(0, 0, 0, 0.06); +} + +/* Keyboard hint styling (Algolia DocSearch-like) */ +.ros2-pagefind-search .DocSearch-Button-Keys, +.ros2-pagefind-search pagefind-modal-trigger::part(keys) { + display: flex; + gap: 0.2rem; +} + +.ros2-pagefind-search kbd, +.ros2-pagefind-search pagefind-modal-trigger::part(kbd) { + align-items: center; + background: linear-gradient(-225deg, #d5dbe4, #f8f8f8); + border: 0; + border-radius: 3px; + box-shadow: inset 0 -2px 0 0 #cdcde6, inset 0 0 1px 1px #fff, 0 1px 2px 1px rgba(30, 35, 90, 0.2); + color: #969faf; + display: flex; + font-size: 0.65rem; + font-weight: 600; + line-height: 1; + min-height: 1.25rem; + min-width: 1.25rem; + padding: 0 0.3rem; + justify-content: center; +} + +.wy-nav-side-scroll .ros2-pagefind-search { + padding-right: 0.5rem; +} + +.ros-page-meta-summary, +.ros2-pagefind-search .pf-result-meta-block, +#ros-search-page .pf-result-meta-block, +dialog.pf-modal .pf-result-meta-block { + margin: -0.25rem 0 1rem !important; + padding: 0.45rem 0.75rem !important; + border-left: 4px solid #6c757d !important; + background: #f8f9fa !important; + color: #495057 !important; + font-size: 0.85rem !important; +} + +.ros2-pagefind-search dialog.pf-modal { + width: clamp(900px, 60vw, 1200px) !important; + max-width: 92vw !important; + min-width: min(900px, 92vw) !important; +} + +.ros2-pagefind-search .ros-search-two-col, +#ros-search-page .ros-search-two-col { + display: grid; + grid-template-columns: minmax(220px, 260px) minmax(0, 1fr); + gap: 1rem; + min-height: 0; + width: 100%; +} + +.ros2-pagefind-search .ros-search-facets, +.ros2-pagefind-search .ros-search-results { + max-height: 62vh; + overflow: auto; + min-width: 0; +} + +#ros-search-page .ros-search-facets, +#ros-search-page .ros-search-results { + min-width: 0; +} + +.ros2-pagefind-search .ros-search-facets { + border-right: 1px solid #e9ecef; + padding-right: 0.75rem; +} + +#ros-search-page .ros-search-facets { + border-right: 1px solid #e9ecef; + padding-right: 0.75rem; +} + +.ros2-pagefind-search .ros-search-facets pagefind-filter-pane, +.ros2-pagefind-search .ros-search-results pagefind-summary, +.ros2-pagefind-search .ros-search-results pagefind-results, +#ros-search-page .ros-search-facets pagefind-filter-pane, +#ros-search-page .ros-search-results pagefind-summary, +#ros-search-page .ros-search-results pagefind-results { + display: block; +} + +.ros2-pagefind-search .ros-search-results pagefind-summary, +#ros-search-page .ros-search-results pagefind-summary { + margin-bottom: 0.75rem; +} + +.ros2-pagefind-search .pf-result-link, +#ros-search-page .pf-result-link { + font-size: 1rem; + font-weight: 700; + line-height: 1.25; +} + +.ros2-pagefind-search .pf-result-excerpt, +.ros2-pagefind-search .pf-result-preview, +#ros-search-page .pf-result-excerpt, +#ros-search-page .pf-result-preview { + font-size: 0.85rem; + line-height: 1.35; +} + +.ros2-pagefind-search .pf-result-meta-block, +#ros-search-page .pf-result-meta-block, +dialog.pf-modal .pf-result-meta-block { + margin-top: 0.35rem !important; + margin-bottom: 0.45rem !important; + border-radius: 0 !important; + display: block !important; + line-height: 1.35 !important; +} + +.ros2-pagefind-search .pf-result-meta-block b, +#ros-search-page .pf-result-meta-block b, +dialog.pf-modal .pf-result-meta-block b { + color: #495057 !important; + font-weight: 600 !important; +} + +/* Full-page search results (search.html) */ +.ros-search-page { + padding: 0 0 2rem; +} + +.ros-search-page-input-row { + margin-bottom: 1.5rem; +} + +.ros-search-page-input-row pagefind-input { + display: block; + width: 100%; +} + +.ros-search-page-two-col .ros-search-facets, +.ros-search-page-two-col .ros-search-results { + max-height: none; + overflow: visible; +} + +/* + Force Pagefind's per-result IntersectionObserver to use this + element as its root. The component walks up the DOM looking for an ancestor + whose computed overflow-y is not "visible" or "hidden"; without this, no + ancestor matches on a dedicated search page (everything renders with default + overflow), the observer never fires, and result cards remain skeletons. + + Setting overflow-y: auto with no max-height gives the observer a valid root + without producing any visible scrollbar - the element grows to fit content + naturally and the page itself remains the scroll context for the user. +*/ +#ros-search-page pagefind-results { + overflow-y: auto !important; +} + +@media (max-width: 980px) { + .ros2-pagefind-search .ros-search-two-col, + #ros-search-page .ros-search-two-col { + grid-template-columns: 1fr; + } + + .ros2-pagefind-search .ros-search-facets, + .ros2-pagefind-search .ros-search-results { + max-height: none; + } + + .ros2-pagefind-search .ros-search-facets { + border-right: 0; + border-bottom: 1px solid #e9ecef; + margin-bottom: 0.75rem; + padding: 0 0 0.75rem; + } + + #ros-search-page .ros-search-facets { + border-right: 0; + border-bottom: 1px solid #e9ecef; + margin-bottom: 0.75rem; + padding: 0 0 0.75rem; + } +} diff --git a/source/_templates/layout.html b/source/_templates/layout.html new file mode 100644 index 00000000000..94830854a69 --- /dev/null +++ b/source/_templates/layout.html @@ -0,0 +1,9 @@ +{% extends "!layout.html" %} +{% block extrahead %} + {{ super() }} + {% if pagefind_seo_filter_metas %} + + {{ pagefind_seo_filter_metas|safe }} + + {% endif %} +{% endblock %} diff --git a/source/_templates/search.html b/source/_templates/search.html new file mode 100644 index 00000000000..520bfb9b715 --- /dev/null +++ b/source/_templates/search.html @@ -0,0 +1,37 @@ +{# + Override RTD/Sphinx search page: Pagefind full-page UI instead of searchtools.js. +#} +{%- extends "layout.html" %} +{% set title = _('Search') %} +{% set display_vcs_links = False %} +{%- block scripts %} + {{ super() }} +{%- endblock %} +{% block footer %} + {{ super() }} +{% endblock %} +{% block body %} + + +
+
+ +
+
+ +
+ + +
+
+
+{% endblock %} diff --git a/source/_templates/searchbox.html b/source/_templates/searchbox.html new file mode 100644 index 00000000000..4485c542d3c --- /dev/null +++ b/source/_templates/searchbox.html @@ -0,0 +1,573 @@ +{# Pagefind Component UI (plan §3) + DocSearch-inspired styling via pagefind-docsearch.css #} + + + + From 55716fda890c1cc1dca55346475f7b097949aef5 Mon Sep 17 00:00:00 2001 From: Keith Kirkwood Date: Wed, 20 May 2026 12:22:55 +0100 Subject: [PATCH 31/59] OPENR-89: Next iteration of short description enhancement --- scripts/config.py | 75 +++++++++ scripts/enhance_topics.py | 232 ++++++++++++++-------------- scripts/openai_retrieval.py | 22 ++- scripts/test/test_enhance_topics.py | 71 ++++++++- 4 files changed, 269 insertions(+), 131 deletions(-) create mode 100644 scripts/config.py diff --git a/scripts/config.py b/scripts/config.py new file mode 100644 index 00000000000..33bd80c8216 --- /dev/null +++ b/scripts/config.py @@ -0,0 +1,75 @@ +""" +Central configuration for the enhancement scripts. + +Holds tuning constants and prompt strings used by ``enhance_topics`` and +``openai_retrieval``. Kept as a leaf module (no imports from sibling scripts) +so it can be imported freely without risk of circular dependencies. +""" + +# Define constants +GPT_MODEL = "gpt-5.4-nano" # GPT model to use for the API calls +# Maximum content length in characters, approximately 300k tokens (leaving 100k for instructions/output) +MAX_CONTENT_LENGTH = 1_200_000 +RST_EXTENSION = '.rst' # File extension for RST files + +# Define timeout and retry parameters for API calls +# - Individual API calls timeout after DEFAULT_TIMEOUT seconds +# - On rate limits/connection errors, retry up to MAX_RETRIES times +# - Wait between retries, increasing exponentially: MIN_WAIT → MAX_WAIT (capped) +DEFAULT_TIMEOUT = 30 # Default timeout in seconds for an individual API call +MAX_RETRIES = 10 # Maximum number of retry attempts for exponential backoff +MIN_WAIT = 10 # Minimum wait time between retries in seconds +MAX_WAIT = 120 # Maximum wait time between retries in seconds + +# Assistant-run tuning (used by openai_retrieval) +# Maximum time for one assistant run (thread message + run + polling) +ASSISTANT_RUN_TIMEOUT = 120 +# Interval between run status polls +ASSISTANT_POLL_INTERVAL = 1.5 + +# Example RST paths (relative to repository root) indexed into the vector store for file_search +SHORT_DESCRIPTION_EXAMPLE_PATHS = [ + "source/About-ROS.rst", +] + +# Define prompts for the AI model + +SHORT_DESCRIPTION_PROMPT = """You are a technical author, and your role is to analyze RST content within supplied documents, and then create new, supplementary content for a new draft article based on this analysis. + +## Examples +Use file_search to read through the following RST files in their entirety as examples of completed articles: + +- About-ROS.rst +- First-Steps.rst +- Interfaces-Topics-Services-Actions.rst + +## Short Description +For each article in this set of examples, analyse the content associated with the "short-description" directive, and what it constitutes in relation to the article it describes. +For example, in the First-Steps article, the 3 sentences which begin as follows comprise the specified short description: + +* "Interfaces in ROS..." +* "This article explains the..." +* "With this information..." + +This short description content does not include the single line of text commencing with "**Area...", or the "contents" (Table of Contents) directive. + +When you have identified the short description in all example articles, remember the formatting and how the paragraph is constructed, including tone/style and length. We call this the article Short Description. + +Finally, generate the short description for the new article given in the user message, with no additional styling, characters, or formatting. +""" + +KEYWORDS_PROMPT = """You are a content analyst, and your role is to analyze text content within supplied documents. + +Your role is to extract 3 to 5 keywords from the content for use in metadata. The keywords should be single words that are the most important and relevant words to the content topic. + +Finally, generate a comma-separated list of these keywords, in lowercase, with no additional styling, characters, or formatting.""" + +DESCRIPTION_PROMPT = """You are a content analyst, and your role is to analyze text content within supplied documents. + +Your role is to create a concise description of the content for use in metadata. The description should be a single sentence (of a maximum of 130 characters) that captures the main idea of the content. + +Finally, generate this description, with no additional styling, characters, or formatting.""" + +ENGLISH_LANGUAGE_CHECK_PROMPT = """You are a validation assistant, and your role is to determine whether the following text is written entirely in English. Common technical terms, acronyms, and internationally recognised proper nouns are acceptable if they are normally used in English technical documentation. + +Answer ONLY with the single word yes or no in lowercase, with no punctuation, explanation, or additional text.""" diff --git a/scripts/enhance_topics.py b/scripts/enhance_topics.py index 62ad88012f2..c77519473d3 100644 --- a/scripts/enhance_topics.py +++ b/scripts/enhance_topics.py @@ -2,6 +2,7 @@ import re import sys import os +from abc import ABC, abstractmethod from dataclasses import dataclass from typing import Callable, Optional @@ -10,6 +11,21 @@ from tenacity import retry, stop_after_attempt, wait_random_exponential, retry_if_exception_type from concurrent.futures import ThreadPoolExecutor +from config import ( + ASSISTANT_RUN_TIMEOUT, + DEFAULT_TIMEOUT, + DESCRIPTION_PROMPT, + ENGLISH_LANGUAGE_CHECK_PROMPT, + GPT_MODEL, + KEYWORDS_PROMPT, + MAX_CONTENT_LENGTH, + MAX_RETRIES, + MAX_WAIT, + MIN_WAIT, + RST_EXTENSION, + SHORT_DESCRIPTION_EXAMPLE_PATHS, + SHORT_DESCRIPTION_PROMPT, +) from enhance_data import ( EnhanceData, add_analysis_result, @@ -19,7 +35,6 @@ mark_file_updated, ) from openai_retrieval import ( - ASSISTANT_RUN_TIMEOUT, RetrievalResources, analyze_with_file_search, cleanup_short_description_resources, @@ -35,67 +50,46 @@ logger = logging.getLogger(__name__) -# Define constants -GPT_MODEL = "gpt-5.4-nano" # GPT model to use for the API calls -# Maximum content length in characters, approximately 300k tokens (leaving 100k for instructions/output) -MAX_CONTENT_LENGTH = 1200000 -RST_EXTENSION = '.rst' # File extension for RST files - -# Define timeout and retry parameters for API calls -# - Individual API calls timeout after DEFAULT_TIMEOUT seconds -# - On rate limits/connection errors, retry up to MAX_RETRIES times -# - Wait between retries, increasing exponentially: MIN_WAIT → MAX_WAIT (capped) -DEFAULT_TIMEOUT = 30 # Default timeout in seconds for an individual API call -MAX_RETRIES = 10 # Maximum number of retry attempts for exponential backoff -MIN_WAIT = 10 # Minimum wait time between retries in seconds -MAX_WAIT = 120 # Maximum wait time between retries in seconds - -# Example RST paths (relative to repository root) indexed into the vector store for file_search -SHORT_DESCRIPTION_EXAMPLE_PATHS = [ - "source/About-ROS.rst", -] - -# Define prompts for the AI model -SHORT_DESCRIPTION_PROMPT = """You are a Technical Author in the technology industry working on documenting a robotics product, and your role is to analyze RST content within supplied documents. -You'll then create new content based on this analysis for a new draft article, which I can use to supplement that article. - -## Examples -Use file_search to read through the following RST files in their entirety as examples of completed articles: - -- About-ROS.rst -- First-Steps.rst -- Interfaces-Topics-Services-Actions.rst - -## Short Description -For each article in this set of examples, analyse the content associated with the "short-description" directive, and what it constitutes in relation to the article it describes. For example, in the First-Steps article, the 3 sentences which begin as follows comprise the specified short description: - -* "Interfaces in ROS..." -* "This article explains the..." -* "With this information..." +@dataclass(frozen=True) +class AppliedContent: + """RST body after applying analysis results, and whether it differs from the input.""" -This short description content does not include the single line of text commencing with "**Area...", or the "contents" (Table of Contents) directive. + content: str + changed: bool -When you have identified the short description in all example articles, remember the formatting and how the paragraph is constructed, including tone/style and length. We call this the article Short Description. -Finally, generate the short description for the new article given in the user message, with no additional styling, characters, or formatting. -""" +class ApplyHook(ABC): + """Apply stored analysis results to an RST file body.""" -KEYWORDS_PROMPT = """You are a content analyst, and your role is to analyze text content within supplied documents. + @abstractmethod + def apply(self, content: str, results: dict[str, str]) -> AppliedContent: + """Return updated content and whether the source was modified.""" -Your role is to extract 3 to 5 keywords from the content for use in metadata. The keywords should be single words that are the most important and relevant words to the content topic. -Finally, generate a comma-separated list of these keywords, in lowercase, with no additional styling, characters, or formatting.""" +@dataclass(frozen=True) +class MetadataApplyHook(ApplyHook): + """Merge ``description`` / ``keywords`` results into ``.. meta::``.""" -DESCRIPTION_PROMPT = """You are a content analyst, and your role is to analyze text content within supplied documents. + def apply(self, content: str, results: dict[str, str]) -> AppliedContent: + subset = {k: v for k, v in results.items() if k in ("description", "keywords")} + if not subset: + return AppliedContent(content=content, changed=False) + new_content, changed = inject_metadata_to_content(content, subset) + return AppliedContent(content=new_content, changed=changed) -Your role is to create a concise description of the content for use in metadata. The description should be a single sentence (of a maximum of 130 characters) that captures the main idea of the content. -Finally, generate this description, with no additional styling, characters, or formatting.""" +@dataclass(frozen=True) +class ShortDescriptionApplyHook(ApplyHook): + """Insert or fill ``.. short-description::`` from analysis results.""" -ENGLISH_LANGUAGE_CHECK_PROMPT = """You are a validation assistant, and your role is to determine whether the following text is written entirely in English. Common technical terms, acronyms, and internationally recognised proper nouns are acceptable if they are normally used in English technical documentation. + def apply(self, content: str, results: dict[str, str]) -> AppliedContent: + val = results.get("short-description") + if not val or not val.strip(): + return AppliedContent(content=content, changed=False) + new_content, changed = inject_short_description_to_content(content, val) + return AppliedContent(content=new_content, changed=changed) -Answer ONLY with the single word yes or no in lowercase, with no punctuation, explanation, or additional text.""" @dataclass(frozen=True) class EnhancementTask: @@ -122,13 +116,13 @@ def analyze(cl: OpenAI, content: str, to: int) -> str: def _short_description_enhancement_task(assistant_id: str) -> EnhancementTask: """Build a task that writes to the ``.. short-description::`` directive body.""" - def analyze(cl: OpenAI, content: str, to: int) -> str: - return analyze_with_file_search(cl, assistant_id, content, timeout=to) - def should_skip(content: str) -> bool: return has_short_description_content(content) - return EnhancementTask("short-description", should_skip, analyze, ASSISTANT_RUN_TIMEOUT) + def analyze(cl: OpenAI, content: str, to: int) -> str: + return analyze_with_file_search(cl, assistant_id, content, timeout=to) + + return EnhancementTask(key="short-description", should_skip=should_skip, analyze=analyze, timeout=ASSISTANT_RUN_TIMEOUT) @retry( @@ -285,6 +279,7 @@ def _run_validation() -> bool: logger.warning("Retryable error during language validation: %s", e) raise + # Get the answer from the completion (should be yes or no) answer = completion.choices[0].message.content raw = (answer or "").strip().lower() # Accept a single leading yes/no token even if the model adds stray whitespace @@ -307,7 +302,12 @@ def _run_validation() -> bool: logger.error("Validation timed out after %s seconds", timeout) raise -def analyze_files(files: list[str], client: OpenAI, tasks: list[EnhancementTask]) -> EnhanceData: +def analyze_files( + files: list[str], + client: OpenAI, + tasks: list[EnhancementTask], + data: Optional[EnhanceData] = None, +) -> EnhanceData: """ Process a list of files and analyse their content using each enhancement task. @@ -315,11 +315,12 @@ def analyze_files(files: list[str], client: OpenAI, tasks: list[EnhancementTask] files (list[str]): List of paths to files. client (OpenAI): OpenAI client instance. tasks (list[EnhancementTask]): Enhancement tasks to run per file. + data (EnhanceData, optional): Accumulator for results; empty if omitted. Returns: EnhanceData: Enhancement data structure containing analysis results and update tracking. """ - data = create_enhance_data() + acc = data if data is not None else create_enhance_data() logger.debug("============================") logger.debug("Performing content analysis:") @@ -355,10 +356,13 @@ def analyze_files(files: list[str], client: OpenAI, tasks: list[EnhancementTask] continue logger.debug("Running analysis: %s", task.key) try: + # Analyse the content using the task's analyze function result = task.analyze(client, content, task.timeout) if result: + # Validate the generated content if validate_content(client, result, timeout=DEFAULT_TIMEOUT): - data = add_analysis_result(data, file_path, task.key, result) + # Add the analysis result to the enhancement data + acc = add_analysis_result(acc, file_path, task.key, result) else: logger.warning( "Validation failed for generated %s in %s; result not stored", @@ -366,6 +370,7 @@ def analyze_files(files: list[str], client: OpenAI, tasks: list[EnhancementTask] file_path, ) else: + # Log a warning if no result was generated logger.warning("No result for %s with task %r", file_path, task.key) except (RateLimitError, APIConnectionError) as e: @@ -378,7 +383,7 @@ def analyze_files(files: list[str], client: OpenAI, tasks: list[EnhancementTask] logger.error("Failed to analyse %s with task %r: %s", file_path, task.key, e) continue - return data + return acc def get_openai_client() -> OpenAI: @@ -405,37 +410,17 @@ def get_openai_client() -> OpenAI: return OpenAI(api_key=api_key) -def _apply_metadata_results(content: str, results: dict[str, str]) -> tuple[str, bool]: - """Merge ``description`` / ``keywords`` results into ``.. meta::``.""" - # Create a subset of the results dictionary containing only the description and keywords - subset = {k: v for k, v in results.items() if k in ("description", "keywords")} - if not subset: - return content, False - return inject_metadata_to_content(content, subset) - - -def _apply_short_description_results(content: str, results: dict[str, str]) -> tuple[str, bool]: - """Insert or fill ``.. short-description::`` from analysis results.""" - # Get the short description result from the results dictionary - val = results.get("short-description") - # If the short description is not found or is empty, return the content and False - if not val or not val.strip(): - return content, False - return inject_short_description_to_content(content, val) - - def update_enhanced_files( files: list[str], data: EnhanceData, - apply_hooks: list[Callable[[str, dict[str, str]], tuple[str, bool]]], + apply_hook: ApplyHook, log_label: str, ) -> EnhanceData: """ - Process a list of files and apply enhancement hooks that may rewrite RST. + Process a list of files and apply an enhancement hook that may rewrite RST. - Each hook receives the current file content and the per-file results dictionary, - and returns ``(new_content, changed)``. Hooks run in order; the file is written - once if any hook reported a change. + The hook receives the file content and the per-file results dictionary, and + returns ``AppliedContent``. The file is written when ``changed`` is true. """ logger.debug("===========================") logger.debug("Updating %s in files:", log_label) @@ -463,23 +448,16 @@ def update_enhanced_files( logger.error("Unicode decode error reading file %s: %s", file_path, exc) continue - # Apply the enhancement hooks to the content - working = content - changed_any = False - # Iterate through each hook and apply it to the content - for hook in apply_hooks: - working, changed = hook(working, file_results) - changed_any = changed_any or changed + applied = apply_hook.apply(content, file_results) - # If no changes were made, log a message and continue - if not changed_any: + if not applied.changed: logger.debug("No %s changes applied for %s", log_label, file_path) continue # Write the updated content to the file try: with open(file_path, "w", encoding="utf-8") as file: - file.write(working) + file.write(applied.content) except (OSError, PermissionError) as exc: logger.error("Error writing file %s: %s", file_path, exc) continue @@ -502,13 +480,18 @@ def update_enhanced_files( return current_data -def enhance_metadata(files: list[str], client: Optional[OpenAI] = None) -> EnhanceData: +def enhance_metadata( + files: list[str], + client: Optional[OpenAI] = None, + data: Optional[EnhanceData] = None, +) -> EnhanceData: """ Enhance files with metadata based on content analysis. Args: files (list[str]): Paths to files to enhance. client (OpenAI, optional): OpenAI client instance. If None, creates new instance. + data (EnhanceData, optional): Accumulator to extend (for multi-phase CLI runs). Returns: EnhanceData: Enhancement data structure containing analysis results and update tracking. @@ -516,45 +499,50 @@ def enhance_metadata(files: list[str], client: Optional[OpenAI] = None) -> Enhan Raises: OpenAIError: If no valid API key is found when creating a new client. """ + acc = data if data is not None else create_enhance_data() try: client = client or get_openai_client() except OpenAIError as e: logger.error(f"Failed to initialise OpenAI client: {e}") - return create_enhance_data() + return acc - # TODO: Make this config-driven, so that we can easily add more prompts and analysis types + # Create the list of enhancement tasks for the metadata analysis tasks = [ _metadata_enhancement_task("description", DESCRIPTION_PROMPT), _metadata_enhancement_task("keywords", KEYWORDS_PROMPT), ] - data = analyze_files(files, client, tasks) - data = update_meta_files(files, data) + acc = analyze_files(files, client, tasks, acc) + return update_meta_files(files, acc) - return data - -def enhance_short_descriptions(files: list[str], client: Optional[OpenAI] = None) -> EnhanceData: +def enhance_short_descriptions( + files: list[str], + client: Optional[OpenAI] = None, + data: Optional[EnhanceData] = None, +) -> EnhanceData: """ Enhance RST files with a ``.. short-description::`` body using an assistant with file_search. Example articles are taken from ``SHORT_DESCRIPTION_EXAMPLE_PATHS`` (indexed once per run). Each target file is sent in its own thread; the vector store and assistant are deleted - afterwards. Not wired to ``main()``; import and call from a REPL or another script. + afterwards. Args: files: Paths to RST files to enhance. client: Optional pre-built OpenAI client. + data: Optional accumulator to extend (for multi-phase CLI runs). Returns: ``EnhanceData`` with results under the key ``short-description`` and ``updated_files`` set after successful writes. """ + acc = data if data is not None else create_enhance_data() try: client = client or get_openai_client() except OpenAIError as e: logger.error("Failed to initialise OpenAI client: %s", e) - return create_enhance_data() + return acc resources: RetrievalResources | None = None try: @@ -568,14 +556,13 @@ def enhance_short_descriptions(files: list[str], client: Optional[OpenAI] = None resources = RetrievalResources(assistant_id, vector_store_id) tasks = [_short_description_enhancement_task(assistant_id)] - data = analyze_files(files, client, tasks) - data = update_enhanced_files( + acc = analyze_files(files, client, tasks, acc) + return update_enhanced_files( files, - data, - [_apply_short_description_results], + acc, + ShortDescriptionApplyHook(), "short description", ) - return data finally: cleanup_short_description_resources(client, resources) @@ -591,7 +578,7 @@ def update_meta_files(files: list[str], data: EnhanceData) -> EnhanceData: Returns: EnhanceData: Updated enhancement data with files marked as updated. """ - return update_enhanced_files(files, data, [_apply_metadata_results], "metadata") + return update_enhanced_files(files, data, MetadataApplyHook(), "metadata") def main() -> None: """ @@ -599,14 +586,13 @@ def main() -> None: - Parses command-line arguments to collect input file paths. - Filters the provided files to include only reStructuredText (.rst) files. - - Enhances the metadata of each RST file using AI-based analysis (keywords and description). - - Writes updated metadata back to files and logs processing metrics. + - Enhances ``.. meta::`` fields and ``.. short-description::`` bodies. + - Writes updates back to files and logs a single combined metrics summary. Usage: python enhance_topics.py ... - Only files with the .rst extension will be processed. - Logs the number of files successfully enhanced. + Only files with the .rst extension will be processed. """ logging.basicConfig( @@ -625,11 +611,25 @@ def main() -> None: logger.error("No input files provided. Pass a list of RST files as arguments.") sys.exit(0) - # Enhance the metadata in the RST files and return the enhancement data with updated files - data = enhance_metadata(rst_files) - # Log the metrics for the enhancement data + # Get the OpenAI client and create the enhancement data + try: + client = get_openai_client() + except OpenAIError as e: + logger.error("Failed to initialise OpenAI client: %s", e) + data = create_enhance_data() + else: + data = create_enhance_data() + data = enhance_metadata(rst_files, client, data) + data = enhance_short_descriptions(rst_files, client, data) + metrics = calculate_metrics(data) - logger.info(f"Enhanced files: {metrics.files_with_results_count} with at least one valid analysis result, and {metrics.updated_files_count} files updated, out of {len(rst_files)} RST files.") + logger.info( + "Enhanced files: %s with at least one valid analysis result, and %s files updated, " + "out of %s RST files.", + metrics.files_with_results_count, + metrics.updated_files_count, + len(rst_files), + ) if __name__ == "__main__": main() diff --git a/scripts/openai_retrieval.py b/scripts/openai_retrieval.py index c07f57d2ec7..54bd08f0fbc 100644 --- a/scripts/openai_retrieval.py +++ b/scripts/openai_retrieval.py @@ -16,20 +16,16 @@ from openai import OpenAI, RateLimitError, APIConnectionError from tenacity import retry, stop_after_attempt, wait_random_exponential, retry_if_exception_type -logger = logging.getLogger(__name__) - -# Align with enhance_topics retry policy for vector store / assistant creation -MAX_RETRIES = 10 -MIN_WAIT = 10 -MAX_WAIT = 120 - -# Maximum time for one assistant run (thread message + run + polling) -ASSISTANT_RUN_TIMEOUT = 120 -# Interval between run status polls -ASSISTANT_POLL_INTERVAL = 1.5 +from config import ( + ASSISTANT_POLL_INTERVAL, + ASSISTANT_RUN_TIMEOUT, + MAX_CONTENT_LENGTH, + MAX_RETRIES, + MAX_WAIT, + MIN_WAIT, +) -# Match enhance_topics.MAX_CONTENT_LENGTH for user message payload -MAX_CONTENT_LENGTH = 1_200_000 +logger = logging.getLogger(__name__) _SCRIPTS_DIR = Path(__file__).resolve().parent REPO_ROOT = _SCRIPTS_DIR.parent diff --git a/scripts/test/test_enhance_topics.py b/scripts/test/test_enhance_topics.py index bb3703701c4..8b1984aaeeb 100644 --- a/scripts/test/test_enhance_topics.py +++ b/scripts/test/test_enhance_topics.py @@ -7,6 +7,7 @@ # Add the scripts directory to sys.path to allow importing enhance_topics sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +from config import MAX_CONTENT_LENGTH from enhance_topics import ( analyze_content, get_openai_client, @@ -14,10 +15,10 @@ update_meta_files, enhance_metadata, enhance_short_descriptions, + main, _metadata_enhancement_task, - MAX_CONTENT_LENGTH, ) -from enhance_data import EnhanceData +from enhance_data import EnhanceData, create_enhance_data, calculate_metrics @pytest.fixture def mock_client(): @@ -174,6 +175,32 @@ def test_update_meta_files_skips_no_change(mock_inject, mock_get_results): # Verify write was NOT called m_open().write.assert_not_called() +@patch("enhance_topics.get_meta_names_from_content") +@patch("enhance_topics.analyze_content") +@patch("enhance_topics.validate_content") +def test_analyze_files_accumulates_onto_initial_data( + mock_validate, + mock_analyze, + mock_get_meta, + mock_client, +): + """Passing an accumulator extends per-file results via add_analysis_result.""" + mock_get_meta.return_value = [] + mock_analyze.return_value = "Generated description" + mock_validate.return_value = True + initial = EnhanceData( + results={"file1.rst": {"keywords": "existing"}}, + updated_files=set(), + ) + tasks = [_metadata_enhancement_task("description", "desc prompt")] + + with patch("builtins.open", mock_open(read_data="File content")): + result = analyze_files(["file1.rst"], mock_client, tasks, initial) + + assert result.results["file1.rst"]["keywords"] == "existing" + assert result.results["file1.rst"]["description"] == "Generated description" + + # --- Tests for enhance_metadata --- @patch('enhance_topics.get_openai_client') @@ -227,3 +254,43 @@ def test_enhance_short_descriptions_orchestration( assert res is not None assert res.assistant_id == "asst_1" assert res.vector_store_id == "vs_1" + + +@patch("enhance_topics.enhance_short_descriptions") +@patch("enhance_topics.enhance_metadata") +@patch("enhance_topics.get_openai_client") +def test_main_threads_accumulator_through_both_enhancements( + mock_get_client, + mock_metadata, + mock_short_descriptions, +): + """CLI entry point folds one EnhanceData through metadata then short description.""" + mock_get_client.return_value = MagicMock() + empty = create_enhance_data() + after_meta = EnhanceData( + results={"topic.rst": {"description": "d"}}, + updated_files={"topic.rst"}, + ) + after_short = EnhanceData( + results={"topic.rst": {"description": "d", "short-description": "s"}}, + updated_files={"topic.rst"}, + ) + + def metadata_side_effect(files, client, data): + assert data == empty + return after_meta + + def short_side_effect(files, client, data): + assert data == after_meta + return after_short + + mock_metadata.side_effect = metadata_side_effect + mock_short_descriptions.side_effect = short_side_effect + + with patch.object(sys, "argv", ["enhance_topics.py", "topic.rst"]): + main() + + mock_get_client.assert_called_once() + metrics = calculate_metrics(after_short) + assert metrics.files_with_results_count == 1 + assert metrics.updated_files_count == 1 From 2bfb3c439dbe6e9fd45041b560600388bb5259ec Mon Sep 17 00:00:00 2001 From: 3di-techx Date: Wed, 20 May 2026 12:32:21 +0000 Subject: [PATCH 32/59] url fix --- .github/workflows/test.yml | 2 +- plugins/pagefind_meta.py | 6 +++--- source/_templates/searchbox.html | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 8b40cfe0cca..59de116356c 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -82,7 +82,7 @@ jobs: - name: Setup Node.js (Pagefind) uses: actions/setup-node@v4 with: - node-version: '20' + node-version: '24' - name: Index HTML with Pagefind run: make pagefind diff --git a/plugins/pagefind_meta.py b/plugins/pagefind_meta.py index e5fc1188949..e17815988de 100644 --- a/plugins/pagefind_meta.py +++ b/plugins/pagefind_meta.py @@ -151,7 +151,7 @@ def _search_results_href(app, pagename: str) -> str: """ builder = getattr(app, 'builder', None) if builder is None: - return 'search.html' + return 'search' try: current = builder.get_target_uri(pagename, typ='html') target = builder.get_target_uri('search', typ='html') @@ -160,7 +160,7 @@ def _search_results_href(app, pagename: str) -> str: return rel except (AttributeError, KeyError, ValueError): pass - return 'search.html' + return 'search' def _merge_index_entries(app, distro: str) -> List[Dict[str, Any]]: @@ -211,7 +211,7 @@ def _html_page_context( 'pagefind_result_meta_order': list( getattr(app.config, 'pagefind_result_meta_order', []) or [] ), - 'pagefind_search_results_href': 'search.html', + 'pagefind_search_results_href': 'search', } context.update(empty) diff --git a/source/_templates/searchbox.html b/source/_templates/searchbox.html index 4485c542d3c..9d60484e094 100644 --- a/source/_templates/searchbox.html +++ b/source/_templates/searchbox.html @@ -3,7 +3,7 @@