From 3d437e4deeffc7c42c993916c690e1f2d7beff01 Mon Sep 17 00:00:00 2001 From: Keith Kirkwood Date: Fri, 1 May 2026 16:35:49 +0100 Subject: [PATCH 01/18] OPENR-89: Simple test for adding an enhance job and run script on changed files --- .github/scripts/enhance_topics.py | 4 ++ .github/workflows/test.yml | 83 +++++++++++++++++++------------ Makefile | 3 ++ 3 files changed, 57 insertions(+), 33 deletions(-) create mode 100644 .github/scripts/enhance_topics.py diff --git a/.github/scripts/enhance_topics.py b/.github/scripts/enhance_topics.py new file mode 100644 index 00000000000..70d137d09d3 --- /dev/null +++ b/.github/scripts/enhance_topics.py @@ -0,0 +1,4 @@ +import sys + +for arg in sys.argv[1:]: + print(f"Enhancing topic: {arg}") \ No newline at end of file diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 5cea1c262d8..e412c58b4c8 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -3,6 +3,23 @@ name: Test on: pull_request jobs: + enhance: + runs-on: ubuntu-24.04 + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: '3.12' + + - name: Enhance topics + env: + PR_BASE_SHA: ${{ github.event.pull_request.base.sha }} + PR_HEAD_SHA: ${{ github.event.pull_request.head.sha }} + run: make enhance-topics + test: runs-on: ubuntu-24.04 steps: @@ -114,36 +131,36 @@ jobs: 3. Open `html-artifacts-${{ github.event.pull_request.number }}/index.html` in your favorite browser edit-mode: replace - multi-build: - needs: [test, lint, spellcheck] - runs-on: ubuntu-24.04 - steps: - - name: Checkout - uses: actions/checkout@v4 - - # Create a fake remote ref matching the target branch name - - name: Setup branch for multiversion - run: | - TARGET_BRANCH="${{ github.base_ref }}" - echo "PR target branch: $TARGET_BRANCH" - - # Create a remote ref that sphinx-multiversion will find - git update-ref "refs/remotes/origin/$TARGET_BRANCH" HEAD - - # Verify the ref was created - echo "Created refs:" - git show-ref | grep "$TARGET_BRANCH" - - - name: Setup Python - uses: actions/setup-python@v5 - with: - python-version: '3.12' - - - name: Setup Graphviz - uses: ts-graphviz/setup-graphviz@v2 - - - name: Install dependencies with pip - run: pip install --no-warn-script-location --user -r requirements.txt -c constraints.txt - - - name: Build the docs - run: make multiversion + # multi-build: + # needs: [test, lint, spellcheck] + # runs-on: ubuntu-24.04 + # steps: + # - name: Checkout + # uses: actions/checkout@v4 + # + # # Create a fake remote ref matching the target branch name + # - name: Setup branch for multiversion + # run: | + # TARGET_BRANCH="${{ github.base_ref }}" + # echo "PR target branch: $TARGET_BRANCH" + # + # # Create a remote ref that sphinx-multiversion will find + # git update-ref "refs/remotes/origin/$TARGET_BRANCH" HEAD + # + # # Verify the ref was created + # echo "Created refs:" + # git show-ref | grep "$TARGET_BRANCH" + # + # - name: Setup Python + # uses: actions/setup-python@v5 + # with: + # python-version: '3.12' + # + # - name: Setup Graphviz + # uses: ts-graphviz/setup-graphviz@v2 + # + # - name: Install dependencies with pip + # run: pip install --no-warn-script-location --user -r requirements.txt -c constraints.txt + # + # - name: Build the docs + # run: make multiversion diff --git a/Makefile b/Makefile index f2d90d3a3a8..5a85e9dfe64 100644 --- a/Makefile +++ b/Makefile @@ -24,6 +24,9 @@ multiversion: Makefile %: Makefile @$(BUILD) -M $@ "$(SOURCE)" "$(OUT)" $(OPTS) +enhance-topics: + git diff --name-only "$(PR_BASE_SHA)" "$(PR_HEAD_SHA)" | xargs -r $(PYTHON) scripts/enhance_topics.py + lint: ./sphinx-lint-with-ros source From 73517017851a3cede7e65c021053ccb07fcd60ea Mon Sep 17 00:00:00 2001 From: Keith Kirkwood Date: Mon, 4 May 2026 12:38:10 +0100 Subject: [PATCH 02/18] OPENR-89: Return the test workflow to original state --- .github/workflows/test.yml | 83 +++++++++++++++----------------------- 1 file changed, 33 insertions(+), 50 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index e412c58b4c8..5cea1c262d8 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -3,23 +3,6 @@ name: Test on: pull_request jobs: - enhance: - runs-on: ubuntu-24.04 - steps: - - name: Checkout - uses: actions/checkout@v4 - - - name: Setup Python - uses: actions/setup-python@v5 - with: - python-version: '3.12' - - - name: Enhance topics - env: - PR_BASE_SHA: ${{ github.event.pull_request.base.sha }} - PR_HEAD_SHA: ${{ github.event.pull_request.head.sha }} - run: make enhance-topics - test: runs-on: ubuntu-24.04 steps: @@ -131,36 +114,36 @@ jobs: 3. Open `html-artifacts-${{ github.event.pull_request.number }}/index.html` in your favorite browser edit-mode: replace - # multi-build: - # needs: [test, lint, spellcheck] - # runs-on: ubuntu-24.04 - # steps: - # - name: Checkout - # uses: actions/checkout@v4 - # - # # Create a fake remote ref matching the target branch name - # - name: Setup branch for multiversion - # run: | - # TARGET_BRANCH="${{ github.base_ref }}" - # echo "PR target branch: $TARGET_BRANCH" - # - # # Create a remote ref that sphinx-multiversion will find - # git update-ref "refs/remotes/origin/$TARGET_BRANCH" HEAD - # - # # Verify the ref was created - # echo "Created refs:" - # git show-ref | grep "$TARGET_BRANCH" - # - # - name: Setup Python - # uses: actions/setup-python@v5 - # with: - # python-version: '3.12' - # - # - name: Setup Graphviz - # uses: ts-graphviz/setup-graphviz@v2 - # - # - name: Install dependencies with pip - # run: pip install --no-warn-script-location --user -r requirements.txt -c constraints.txt - # - # - name: Build the docs - # run: make multiversion + multi-build: + needs: [test, lint, spellcheck] + runs-on: ubuntu-24.04 + steps: + - name: Checkout + uses: actions/checkout@v4 + + # Create a fake remote ref matching the target branch name + - name: Setup branch for multiversion + run: | + TARGET_BRANCH="${{ github.base_ref }}" + echo "PR target branch: $TARGET_BRANCH" + + # Create a remote ref that sphinx-multiversion will find + git update-ref "refs/remotes/origin/$TARGET_BRANCH" HEAD + + # Verify the ref was created + echo "Created refs:" + git show-ref | grep "$TARGET_BRANCH" + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: '3.12' + + - name: Setup Graphviz + uses: ts-graphviz/setup-graphviz@v2 + + - name: Install dependencies with pip + run: pip install --no-warn-script-location --user -r requirements.txt -c constraints.txt + + - name: Build the docs + run: make multiversion From 20efaf83513d6371ed414d673515d9410059bcde Mon Sep 17 00:00:00 2001 From: Keith Kirkwood Date: Mon, 4 May 2026 12:40:06 +0100 Subject: [PATCH 03/18] OPENR-89: Use new workflow based on push. List out files which remain to be enhanced. --- .github/workflows/enhance.yml | 23 +++++++++++++++++++++++ Makefile | 2 +- 2 files changed, 24 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/enhance.yml diff --git a/.github/workflows/enhance.yml b/.github/workflows/enhance.yml new file mode 100644 index 00000000000..db354226907 --- /dev/null +++ b/.github/workflows/enhance.yml @@ -0,0 +1,23 @@ +name: Enhance content + +on: push + +jobs: + enhance: + # Runs only on forks when contributor pushes to their fork + if: github.event.repository.fork == true + runs-on: ubuntu-24.04 + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: '3.12' + + - name: Enhance topics + env: + BASE_SHA: ${{ github.event.before }} + HEAD_SHA: ${{ github.event.after }} + run: make enhance-topics \ No newline at end of file diff --git a/Makefile b/Makefile index 5a85e9dfe64..f411c155a1e 100644 --- a/Makefile +++ b/Makefile @@ -25,7 +25,7 @@ multiversion: Makefile @$(BUILD) -M $@ "$(SOURCE)" "$(OUT)" $(OPTS) enhance-topics: - git diff --name-only "$(PR_BASE_SHA)" "$(PR_HEAD_SHA)" | xargs -r $(PYTHON) scripts/enhance_topics.py + git diff --name-only --diff-filter=d $(BASE_SHA) $(HEAD_SHA) | xargs -r $(PYTHON) scripts/enhance_topics.py lint: ./sphinx-lint-with-ros source From 737797225e679dab0a9f1467be48fb631aa8b095 Mon Sep 17 00:00:00 2001 From: Keith Kirkwood Date: Mon, 4 May 2026 12:53:58 +0100 Subject: [PATCH 04/18] OPENR-89: Fix to fetch history so that SHAs exist --- .github/workflows/enhance.yml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/.github/workflows/enhance.yml b/.github/workflows/enhance.yml index db354226907..e03ce90d395 100644 --- a/.github/workflows/enhance.yml +++ b/.github/workflows/enhance.yml @@ -9,10 +9,14 @@ jobs: runs-on: ubuntu-24.04 steps: - name: Checkout - uses: actions/checkout@v4 + # Using checkout v5, as v4 was warning that it will soon be deprecated (Node 20) + uses: actions/checkout@v5 + with: + fetch-depth: 0 - name: Setup Python - uses: actions/setup-python@v5 + # Using setup-python v6, as v5 has same warning as above + uses: actions/setup-python@v6 with: python-version: '3.12' From 39eb9ac74ded0cd83fbda038aea47cb734ea4a5d Mon Sep 17 00:00:00 2001 From: Keith Kirkwood Date: Mon, 4 May 2026 12:55:52 +0100 Subject: [PATCH 05/18] OPENR-89: Test script in wrong location --- {.github/scripts => scripts}/enhance_topics.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename {.github/scripts => scripts}/enhance_topics.py (100%) diff --git a/.github/scripts/enhance_topics.py b/scripts/enhance_topics.py similarity index 100% rename from .github/scripts/enhance_topics.py rename to scripts/enhance_topics.py From 292e4551a4b4e1e8310e86bf039ed9cd98a62bbb Mon Sep 17 00:00:00 2001 From: Keith Kirkwood Date: Mon, 4 May 2026 18:00:14 +0100 Subject: [PATCH 06/18] OPENR-89: Creat RST-specific parsing and updates in new module, and update topic enhance script --- constraints.txt | 4 + requirements.txt | 4 + scripts/enhance_data.py | 213 +++++++++++++++++++++++++ scripts/enhance_topics.py | 323 +++++++++++++++++++++++++++++++++++++- scripts/rst_utils.py | 124 +++++++++++++++ 5 files changed, 666 insertions(+), 2 deletions(-) create mode 100644 scripts/enhance_data.py create mode 100644 scripts/rst_utils.py diff --git a/constraints.txt b/constraints.txt index 56ae59259be..2ba36b535b7 100644 --- a/constraints.txt +++ b/constraints.txt @@ -11,11 +11,13 @@ imagesize==1.4.1 iniconfig==2.1.0 Jinja2==3.1.6 MarkupSafe==3.0.3 +openai==2.33.0 packaging==25.0 pluggy==1.6.0 polib==1.2.0 Pygments==2.19.2 pytest==8.4.2 +python-dotenv==1.1.0 PyYAML==6.0.3 regex==2025.9.18 requests==2.32.5 @@ -39,4 +41,6 @@ sphinxcontrib-mermaid==1.0.0 sphinxcontrib-qthelp==2.0.0 sphinxcontrib-serializinghtml==2.0.0 stevedore==5.5.0 +tenacity==9.1.4 +timeout-decorator==0.5.0 urllib3==2.5.0 diff --git a/requirements.txt b/requirements.txt index f952c4882fb..21c4c057505 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,8 @@ codespell doc8 docutils +openai +python-dotenv pip pytest sphinx @@ -12,3 +14,5 @@ sphinx-tabs sphinx-tamer sphinxcontrib-googleanalytics sphinxcontrib-mermaid +tenacity +timeout-decorator diff --git a/scripts/enhance_data.py b/scripts/enhance_data.py new file mode 100644 index 00000000000..e7e7928c6b2 --- /dev/null +++ b/scripts/enhance_data.py @@ -0,0 +1,213 @@ +""" +Data structures and pure functions for tracking enhancement results and computing metrics. + +This module provides a functional-programming-oriented core for managing analysis results +and deriving metrics. It is independent of the domain logic (e.g. RST file handling, OpenAI integration) +and can be reused in other contexts. +""" + +from typing import NamedTuple, Dict, Set, List, Optional + + +class EnhanceMetrics(NamedTuple): + """ + Immutable data structure representing analysis metrics derived from enhancement results. + + Attributes: + counts_by_analysis: Dictionary mapping analysis types to their value counts. + Example: {"content-type": {"task": 5, "concept": 3, "reference": 2}} + files_with_results_count: Number of files that had analysis results. + updated_files_count: Number of files that had metadata successfully updated. + """ + counts_by_analysis: Dict[str, Dict[str, int]] + files_with_results_count: int + updated_files_count: int + + def get_total_analysis_count(self) -> int: + """ + Calculate the total number of analysis results across all analysis types. + + Note: Files with multiple analysis types contribute multiple counts. + For unique file count, use files_with_results_count instead. + + Returns: + Total count of all analysis results across all analysis types. + """ + return sum(sum(counts.values()) for counts in self.counts_by_analysis.values()) + + +class EnhanceData(NamedTuple): + """ + Immutable data structure representing enhancement results. + + Attributes: + results: Dictionary mapping filename to analysis results. + Format: {filename: {analysis_type: result_value}} + updated_files: Set of filenames that had metadata successfully updated. + """ + results: Dict[str, Dict[str, str]] + updated_files: Set[str] + + +def create_enhance_data() -> EnhanceData: + """ + Initialise an empty EnhanceData structure. + + Returns: + Empty EnhanceData with no results or updated files. + """ + return EnhanceData(results={}, updated_files=set()) + + +def add_analysis_result(data: EnhanceData, filename: str, analysis_type: str, result: str) -> EnhanceData: + """ + Add an analysis result to the enhancement data. + + Returns a new EnhanceData instance with the added result. + + Args: + data: Current enhancement data. + filename: Name of the file. + analysis_type: Type of analysis (e.g., "content-type"). + result: Analysis result value. + + Returns: + New EnhanceData with the result added. + """ + new_results = {**data.results} # Shallow copy: replace one filename entry immutably + file_results = {**new_results.get(filename, {})} # Preserve other analysis keys for this file + file_results[analysis_type] = result + new_results[filename] = file_results + return EnhanceData(results=new_results, updated_files=data.updated_files) # ``updated_files`` unchanged here + + +def mark_file_updated(data: EnhanceData, filename: str) -> EnhanceData: + """ + Mark a file as having been successfully updated with metadata. + + Returns a new EnhanceData instance with the file added to updated_files. + + Args: + data: Current enhancement data. + filename: Name of the file that was updated. + + Returns: + New EnhanceData with the file marked as updated. + """ + return EnhanceData(results=data.results, updated_files=data.updated_files | {filename}) # Set union adds one basename + + +def calculate_metrics(data: EnhanceData) -> EnhanceMetrics: + """ + Derive metrics from enhancement data. + + Pure function that transforms EnhanceData into EnhanceMetrics for analysis and reporting. + + Args: + data: Current enhancement data. + + Returns: + EnhanceMetrics containing counts, file counts, and update counts. + """ + counts_by_analysis: Dict[str, Dict[str, int]] = {} + + for file_results in data.results.values(): + if file_results: + for analysis_type, result_value in file_results.items(): + clean_value = result_value.strip().lower() # Normalise so ``Task`` and ``task`` aggregate together + if analysis_type not in counts_by_analysis: + counts_by_analysis[analysis_type] = {} + counts_by_analysis[analysis_type][clean_value] = counts_by_analysis[analysis_type].get(clean_value, 0) + 1 + + files_with_results_count = sum(1 for file_results in data.results.values() if file_results) # Files with at least one non-empty result dict + + return EnhanceMetrics( + counts_by_analysis=counts_by_analysis, + files_with_results_count=files_with_results_count, + updated_files_count=len(data.updated_files) # Distinct files whose RST was rewritten on disk + ) + + +def get_files_with_results(data: EnhanceData) -> List[str]: + """ + Get list of filenames that had analysis results. + + Args: + data: Current enhancement data. + + Returns: + List of filenames with at least one analysis result. + """ + return [filename for filename, file_results in data.results.items() if file_results] + + +def get_updated_files(data: EnhanceData) -> List[str]: + """ + Get list of filenames that had metadata successfully updated. + + Args: + data: Current enhancement data. + + Returns: + List of filenames that were updated with metadata. + """ + return list(data.updated_files) + + +def is_file_updated(data: EnhanceData, filename: str) -> bool: + """ + Check if a file was successfully updated with metadata. + + Args: + data: Current enhancement data. + filename: Name of the file to check. + + Returns: + True if the file was updated, False otherwise. + """ + return filename in data.updated_files + + +def get_analysis_types(data: EnhanceData) -> List[str]: + """ + Get list of all analysis types performed. + + Args: + data: Current enhancement data. + + Returns: + List of unique analysis types found in results. + """ + analysis_types: Set[str] = set() + for file_results in data.results.values(): + analysis_types.update(file_results.keys()) + return list(analysis_types) + + +def get_result_for_file(data: EnhanceData, filename: str, analysis_type: str) -> Optional[str]: + """ + Get analysis result for a specific file and analysis type. + + Args: + data: Current enhancement data. + filename: Name of the file. + analysis_type: Type of analysis (e.g., "content-type"). + + Returns: + Analysis result or None if not found. + """ + return data.results.get(filename, {}).get(analysis_type) + + +def get_results_for_file(data: EnhanceData, filename: str) -> Dict[str, str]: + """ + Get all analysis results for a specific file. + + Args: + data: Current enhancement data. + filename: Name of the file. + + Returns: + Dictionary of analysis results for the file, or empty dict if not found. + """ + return data.results.get(filename, {}) # Consumed by ``update_meta_rst_files`` as ``.. meta::`` field names diff --git a/scripts/enhance_topics.py b/scripts/enhance_topics.py index 70d137d09d3..01572d0e6e6 100644 --- a/scripts/enhance_topics.py +++ b/scripts/enhance_topics.py @@ -1,4 +1,323 @@ +import logging import sys +import os +from typing import Optional -for arg in sys.argv[1:]: - print(f"Enhancing topic: {arg}") \ No newline at end of file +from dotenv import load_dotenv +from openai import OpenAI, RateLimitError, APIConnectionError, OpenAIError +from tenacity import retry, stop_after_attempt, wait_random_exponential, retry_if_exception_type +from concurrent.futures import ThreadPoolExecutor + +from enhance_data import EnhanceData, create_enhance_data, add_analysis_result, calculate_metrics +from rst_utils import get_results_for_file, inject_metadata_to_content, mark_file_updated + +logger = logging.getLogger(__name__) + +# Define constants +GPT_MODEL = "gpt-5.4-nano" # GPT model to use for the API calls +# Maximum content length in characters for topic analysis , approximately 300k tokens (leaving 100k for instructions/output) +MAX_CONTENT_LENGTH = 1200000 +RST_EXTENSION = '.rst' # File extension for RST files + +# Define timeout and retry parameters for API calls +# - Individual API calls timeout after DEFAULT_TIMEOUT seconds +# - On rate limits/connection errors, retry up to MAX_RETRIES times +# - Wait between retries, increasing exponentially: MIN_WAIT → MAX_WAIT (capped) +DEFAULT_TIMEOUT = 60 # Default timeout in seconds for an individual API call +MAX_RETRIES = 10 # Maximum number of retry attempts for exponential backoff +MIN_WAIT = 10 # Minimum wait time between retries in seconds +MAX_WAIT = 120 # Maximum wait time between retries in seconds + +# Content type classification prompt +CONTENT_TYPE_PROMPT = """You are a content analyst, and your role is to analyze text content within supplied HTML documents. You can distinguish between three types of content: task, concept, and reference. + +*Concept topics* +Concept topics explain or define ideas. These topics often include background information that users need to understand before they start working with a specific product. Concept topics help the users understand the product, its purpose and benefits, before using the product. Concept topics do the following: describe a system, product, or a solution, outline a process, introduce tools or features, explain features, components, characteristics, restrictions, or capabilities, define terms in more detail than a simple glossary. + +*Task topics* +Task topics help achieve a specific goal by presenting instructions as 'procedures'. The first paragraph of the topic usually provides an overview and the benefits or importance of the task. A task is usually a numbered list of individual steps that help users achieve the goal. + +*Reference topics* +Reference topics provide quick access to information that users need to perform a task effectively. For example, lists all necessary links. Information in the main body of a reference topic may also be presented in a list or table format, for quick access and easy readability. + +When analyzed content is a mixture of different content types, classify based on the majority of content. + +Finally, generate a single-word lowercase output which is the recognized content type, with no additional styling, characters, or formatting.""" + +@retry( + retry=retry_if_exception_type((RateLimitError, APIConnectionError)), + stop=stop_after_attempt(MAX_RETRIES), + wait=wait_random_exponential(multiplier=MIN_WAIT, max=MAX_WAIT), + reraise=True +) +def analyze_content(client: OpenAI, content: str, prompt: str, timeout: int = DEFAULT_TIMEOUT) -> str: + """ + Analyse content using OpenAI's API with retry and timeout logic. + Uses ThreadPoolExecutor for cross-platform timeout handling and retries for transient API errors. + + Args: + client (OpenAI): OpenAI client instance. + content (str): Preprocessed content. + prompt (str): Prompt for the AI model. + timeout (int): Maximum time to wait for response in seconds. + + Returns: + str: Analysis result from the AI model, or empty string if analysis fails. + + Raises: + TimeoutError: If the API call exceeds the specified timeout. + RateLimitError: If API rate limits are exceeded (will trigger retry). + APIConnectionError: If connection fails (will trigger retry). + """ + # Log the content length before potential truncation + logger.debug(f"Processing content of length: {len(content)} characters") + + # Truncate content if its too long + if len(content) > MAX_CONTENT_LENGTH: + logger.warning(f"Content truncated to {MAX_CONTENT_LENGTH} characters for analysis.") + content = content[:MAX_CONTENT_LENGTH] + + def _make_api_call() -> str: + """ + Inner function to handle the OpenAI API call. + Separated to allow for clean timeout handling via ThreadPoolExecutor. + + Returns: + str: The model's response content + + Raises: + RateLimitError, APIConnectionError: Propagated for retry handling + """ + try: + logger.debug("Sending request to OpenAI API...") + completion = client.chat.completions.create( + model=GPT_MODEL, + messages=[ + {"role": "system", "content": prompt}, + {"role": "user", "content": f"Content:\n\n{content}"} + ] + ) + result = completion.choices[0].message.content + logger.debug("Successfully received response from OpenAI API") + return result if result is not None else "" + except (RateLimitError, APIConnectionError) as e: + logger.warning(f"Retryable error occurred: {str(e)}") + raise # Re-raise for retry decorator to handle + + # Use ThreadPoolExecutor for cross-platform timeout handling + with ThreadPoolExecutor() as executor: + try: + future = executor.submit(_make_api_call) + return future.result(timeout=timeout) + except TimeoutError: + logger.error(f"API call timed out after {timeout} seconds") + raise # Re-raise the original timeout error + +def analyze_files(files: list[str], client: OpenAI, prompts: dict[str, str], timeout: int = DEFAULT_TIMEOUT) -> EnhanceData: + """ + Process a list of files and analyse their content using each of the passed prompts. + + Args: + files (list[str]): List of paths to files. + client (OpenAI): OpenAI client instance. + prompts (dict[str, str]): Dictionary of prompts for the AI model. + timeout (int): Maximum time to wait for each API call in seconds. + + Returns: + EnhanceData: Enhancement data structure containing analysis results and update tracking. + """ + data = create_enhance_data() + + logger.debug("============================") + logger.debug("Performing content analysis:") + logger.debug("============================") + + for file_path in files: # Iterate through each file in the list + logger.debug(f"Analysing file: {file_path}") + + # Read the content of the file + try: + with open(file_path, 'r', encoding='utf-8') as f: + content = f.read() + except (OSError, PermissionError) as e: + logger.error("Error reading file %s: %s", file_path, e) + continue + except UnicodeDecodeError as e: + logger.error("Unicode decode error reading file %s: %s", file_path, e) + continue + + # Check if the content is not empty + if content.strip(): + filename = os.path.basename(file_path) # Extract filename from file path + for prompt_name, prompt in prompts.items(): # Iterate through each prompt in the dictionary + logger.debug(f"Running analysis: {prompt_name}") + try: + # Analyse the content using API with timeout and retry logic + result = analyze_content( + client, + content, + prompt, + timeout=timeout + ) + if result: + # Add the analysis result to the data structure + data = add_analysis_result(data, filename, prompt_name, result) + else: + logger.warning(f"No result for {filename} with prompt name: {prompt_name}") + + except (RateLimitError, APIConnectionError) as e: + # Exhausted all retries due to rate limits or connection errors + logger.error(f"Failed to analyse {filename} with prompt {prompt_name} after {MAX_RETRIES} retries: {e}") + continue + except TimeoutError as e: + # Timeout error due to an individual API call timing out + logger.error(f"Analysis timed out for {filename} with prompt {prompt_name}: {e}") + continue + except (OpenAIError, ValueError) as e: + # Other API errors and value errors + logger.error(f"Failed to analyse {filename} with prompt {prompt_name}: {e}") + continue + else: + logger.info(f"No analysable content found for {file_path}") + + metrics = calculate_metrics(data) + logger.info(f"Analysed {metrics.files_with_results_count} out of {len(files)} files with the configured prompts.") + return data + + +def get_openai_client() -> OpenAI: + """ + Create an OpenAI client with proper authentication. + + The API key is sourced in the following order: + 1. Environment variable OPENAI_API_KEY + 2. .env file in the project root + + Returns: + OpenAI: Authenticated OpenAI client instance + + Raises: + AuthenticationError: If no valid API key is found + """ + # Load environment variables from .env file if present + load_dotenv() + + api_key = os.environ.get("OPENAI_API_KEY") + if not api_key: + raise OpenAIError("OpenAI API key not found. Set OPENAI_API_KEY environment variable.") + + return OpenAI(api_key=api_key) + +def enhance_metadata(files: list[str], client: Optional[OpenAI] = None) -> EnhanceData: + """ + Enhance RST files with metadata based on content analysis. + + Args: + files (list[str]): Paths to files to enhance. + client (OpenAI, optional): OpenAI client instance. If None, creates new instance. + + Returns: + EnhanceData: Enhancement data structure containing analysis results and update tracking. + + Raises: + OpenAIError: If no valid API key is found when creating a new client. + """ + try: + client = client or get_openai_client() + except OpenAIError as e: + logger.error(f"Failed to initialise OpenAI client: {e}") + return create_enhance_data() + + # TODO: Make this config-driven, so that we can easily add more prompts and analysis types + prompts: dict[str, str] = {"content-type": CONTENT_TYPE_PROMPT} + + data = analyze_files(files, client, prompts) # Populate ``EnhanceData.results`` from the model + data = update_meta_files(files, data) # Persist results as metadata fields and set ``updated_files`` + + return data + +def update_meta_files(files: list[str], data: EnhanceData) -> EnhanceData: + """ + Process a list of files and update them with passed metadata. + + Args: + files (list[str]): List of paths to files. + data (EnhanceData): Enhancement data structure containing metadata for files. + + Returns: + EnhanceData: Updated enhancement data with files marked as updated. + """ + + logger.debug("===========================") + logger.debug("Updating metadata in files:") + logger.debug("===========================") + + current_data = data # Thread results through ``mark_file_updated`` immutably + + for file_path in files: + logger.debug("Updating metadata in file: %s", file_path) + filename = os.path.basename(file_path) # Keys in ``EnhanceData.results`` are basenames only + metadata = get_results_for_file(current_data, filename) + + # Confirm the metadata is not empty for the file, else skip + if not metadata: + logger.info("Skipping %s as it has no results for enhancement", filename) + continue + + logger.debug("Metadata found for %s, proceeding with updates.", filename) + + try: + with open(file_path, encoding="utf-8") as file: + content = file.read() # Full document; helpers locate or synthesise ``.. meta::`` + except (OSError, PermissionError) as exc: + logger.error("Error reading RST file %s: %s", file_path, exc) + continue + except UnicodeDecodeError as exc: + logger.error("Unicode decode error reading RST file %s: %s", file_path, exc) + continue + + new_content, changed = inject_metadata_to_content(content, metadata) + + # Confirm that at least one metadata has been changed for the file, else skip + if not changed: + logger.debug("No metadata changes applied for %s", filename) + continue # All keys already present or no additions—do not touch the file + + try: + with open(file_path, "w", encoding="utf-8") as file: + file.write(new_content) # Full-document rewrite (same path as read) + except (OSError, PermissionError) as exc: + logger.error("Error writing RST file %s: %s", file_path, exc) + continue + except UnicodeEncodeError as exc: + logger.error("Unicode encode error while writing RST file %s: %s", file_path, exc) + continue + + current_data = mark_file_updated(current_data, filename) # Record success for metrics only after a clean write + logger.debug("Updated file with supplied metadata: %s", filename) + logger.debug("-" * 50) + + metrics = calculate_metrics(current_data) # ``updated_files_count`` reflects files we rewrote + logger.info("Enhanced %s files' metadata out of %s files processed.", metrics.updated_files_count, len(files)) + return current_data + +def main() -> None: + logging.basicConfig( + level=logging.INFO, + format="%(levelname)s %(name)s: %(message)s", + ) + + # Collect filenames from command line arguments + rst_files = sys.argv[1:] + if not rst_files: + logger.error("No input files provided. Pass a list of RST files as arguments.") + sys.exit(1) + + # Enhance the metadata in the RST files and return the enhancement data with updated files + data = enhance_metadata(rst_files) + # Log the metrics for the enhancement data + metrics = calculate_metrics(data) + logger.info(f"Enhanced {metrics.updated_files_count} RST files metadata out of {len(rst_files)} files with results.") + +if __name__ == "__main__": + main() diff --git a/scripts/rst_utils.py b/scripts/rst_utils.py new file mode 100644 index 00000000000..c40d3cd6022 --- /dev/null +++ b/scripts/rst_utils.py @@ -0,0 +1,124 @@ +""" +Utilities for editing reStructuredText source, in particular ``.. meta::`` directives. +""" + +import logging +import os +import re + +from enhance_data import ( + EnhanceData, + calculate_metrics, + get_results_for_file, + mark_file_updated, +) + +logger = logging.getLogger(__name__) + + +def _find_meta_block(content: str) -> tuple[int, int, int, str, str]: + """ + Locate the first ``.. meta::`` directive in RST source. + + The directive block consists of the explicit marker line followed by + contiguous indented lines; a blank line or a less-indented line ends the + block (per reStructuredText directive block rules). + + Returns: + Tuple of ``(start, marker_end, block_end, inner, indent)``. + If no directive is found, ``start``, ``marker_end``, and ``block_end`` + are ``-1``, ``inner`` is ``''``, and ``indent`` defaults to three spaces. + """ + # Explicit markup + directive name; block body starts on the following line only + match = re.search(r"^\.\.\s+meta::\s*\n", content, re.MULTILINE) + if not match: + return -1, -1, -1, "", " " + + start = match.start() # Byte index of ``.. meta::`` (for whole-directive splice) + marker_end = match.end() # First character after the marker line's newline + indent = " " # Default field indent when the block is empty or we prepend a new block + inner_parts: list[str] = [] + consumed = 0 # Length of directive body in ``content`` (may omit final ``\n`` on last line) + remainder = content[marker_end:] # Scan forward only inside this file slice + + for line in remainder.splitlines(keepends=True): + if line.strip() == "": + break # Blank line terminates the directive block + if not line.startswith((" ", "\t")): + break # Body element at column 0 ends the block + if not inner_parts: + ws_len = len(line) - len(line.lstrip(" \t")) + indent = line[:ws_len] # Reuse the author's indent for new ``:name:`` lines + inner_parts.append(line) + consumed += len(line) + + block_end = marker_end + consumed # Exclusive end of the directive in ``content`` + inner = "".join(inner_parts) + # EOF without ``\n`` yields a last ``splitlines`` element with no newline—append one before new fields + if inner and not inner.endswith("\n"): + inner += "\n" + return start, marker_end, block_end, inner, indent + + +def _get_existing_meta_names(meta_block_inner: str) -> set[str]: + """ + Collect field names from the body of a ``.. meta::`` directive. + + Each line of the form ``:name: value`` contributes ``name`` (Docutils also + allows forms such as ``:name attr=value:``; the captured segment matches + that usage). + """ + names: set[str] = set() + # Field list lines only; group 1 is the name segment (includes ``attr=value`` forms before the final ``:``) + for field_match in re.finditer(r"^[ \t]+:([^:\n]+?):", meta_block_inner, re.MULTILINE): + names.add(field_match.group(1).strip()) + return names + + +def _normalise_meta_field_value(value: str) -> str: + """Collapse whitespace so the meta field body stays a single logical line.""" + return " ".join(value.split()) # Docutils treats the field body as one string; keep it one physical line + + +def inject_metadata_to_content(content: str, metadata: dict[str, str]) -> tuple[str, bool]: + """ + Insert or append ``.. meta::`` field entries for the given name/value pairs. + + Appends to an existing ``.. meta::`` block when present; otherwise prepends + a new block at the start of the document (leading whitespace is stripped so + the directive is the first element). Skips keys that already appear in the + block. + + Returns: + Updated source and whether any change was made. + """ + start, marker_end, block_end, inner, indent = _find_meta_block(content) + names = _get_existing_meta_names(inner) # Snapshot before we add keys from this same batch + additions: list[str] = [] + + for key, raw_value in metadata.items(): + if key in names: + logger.warning( + "Existing meta field %r in .. meta:: block; skipping", + key, + ) + continue + value = _normalise_meta_field_value(raw_value) + additions.append(f"{indent}:{key}: {value}\n") + names.add(key) # Prevent duplicate inserts if ``metadata`` repeats a key + + if not additions: + return content, False # Nothing new to write; leave the file untouched + + new_inner = inner + "".join(additions) # Existing fields unchanged, then appended lines + + if start >= 0: + # Replace only the directive body slice; ``marker_end``/``block_end`` bracket the original inner + new_content = content[:marker_end] + new_inner + content[block_end:] + else: + # No ``.. meta::`` yet: insert at document start; strip leading whitespace so the block is truly first + remainder = content.lstrip() + new_content = ".. meta::\n" + "".join(additions) + "\n" + remainder # Blank line after block separates it from the body + + return new_content, True + From 6fd04f8751792ed7e7d5e122f573bc265304a859 Mon Sep 17 00:00:00 2001 From: Keith Kirkwood Date: Mon, 4 May 2026 18:07:25 +0100 Subject: [PATCH 07/18] OPENR-89: Fix bug using base filename for enhance results data --- scripts/enhance_topics.py | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/scripts/enhance_topics.py b/scripts/enhance_topics.py index 01572d0e6e6..45145e444df 100644 --- a/scripts/enhance_topics.py +++ b/scripts/enhance_topics.py @@ -148,7 +148,6 @@ def analyze_files(files: list[str], client: OpenAI, prompts: dict[str, str], tim # Check if the content is not empty if content.strip(): - filename = os.path.basename(file_path) # Extract filename from file path for prompt_name, prompt in prompts.items(): # Iterate through each prompt in the dictionary logger.debug(f"Running analysis: {prompt_name}") try: @@ -161,21 +160,21 @@ def analyze_files(files: list[str], client: OpenAI, prompts: dict[str, str], tim ) if result: # Add the analysis result to the data structure - data = add_analysis_result(data, filename, prompt_name, result) + data = add_analysis_result(data, file_path, prompt_name, result) else: - logger.warning(f"No result for {filename} with prompt name: {prompt_name}") + logger.warning(f"No result for {file_path} with prompt name: {prompt_name}") except (RateLimitError, APIConnectionError) as e: # Exhausted all retries due to rate limits or connection errors - logger.error(f"Failed to analyse {filename} with prompt {prompt_name} after {MAX_RETRIES} retries: {e}") + logger.error(f"Failed to analyse {file_path} with prompt {prompt_name} after {MAX_RETRIES} retries: {e}") continue except TimeoutError as e: # Timeout error due to an individual API call timing out - logger.error(f"Analysis timed out for {filename} with prompt {prompt_name}: {e}") + logger.error(f"Analysis timed out for {file_path} with prompt {prompt_name}: {e}") continue except (OpenAIError, ValueError) as e: # Other API errors and value errors - logger.error(f"Failed to analyse {filename} with prompt {prompt_name}: {e}") + logger.error(f"Failed to analyse {file_path} with prompt {prompt_name}: {e}") continue else: logger.info(f"No analysable content found for {file_path}") @@ -256,15 +255,14 @@ def update_meta_files(files: list[str], data: EnhanceData) -> EnhanceData: for file_path in files: logger.debug("Updating metadata in file: %s", file_path) - filename = os.path.basename(file_path) # Keys in ``EnhanceData.results`` are basenames only - metadata = get_results_for_file(current_data, filename) + metadata = get_results_for_file(current_data, file_path) # Confirm the metadata is not empty for the file, else skip if not metadata: - logger.info("Skipping %s as it has no results for enhancement", filename) + logger.info("Skipping %s as it has no results for enhancement", file_path) continue - logger.debug("Metadata found for %s, proceeding with updates.", filename) + logger.debug("Metadata found for %s, proceeding with updates.", file_path) try: with open(file_path, encoding="utf-8") as file: @@ -280,7 +278,7 @@ def update_meta_files(files: list[str], data: EnhanceData) -> EnhanceData: # Confirm that at least one metadata has been changed for the file, else skip if not changed: - logger.debug("No metadata changes applied for %s", filename) + logger.debug("No metadata changes applied for %s", file_path) continue # All keys already present or no additions—do not touch the file try: @@ -293,8 +291,8 @@ def update_meta_files(files: list[str], data: EnhanceData) -> EnhanceData: logger.error("Unicode encode error while writing RST file %s: %s", file_path, exc) continue - current_data = mark_file_updated(current_data, filename) # Record success for metrics only after a clean write - logger.debug("Updated file with supplied metadata: %s", filename) + current_data = mark_file_updated(current_data, file_path) # Record success for metrics only after a clean write + logger.debug("Updated file with supplied metadata: %s", file_path) logger.debug("-" * 50) metrics = calculate_metrics(current_data) # ``updated_files_count`` reflects files we rewrote From 93734f450bff3a2b1f9e352243d2ac308ded5935 Mon Sep 17 00:00:00 2001 From: Keith Kirkwood Date: Tue, 5 May 2026 10:18:00 +0100 Subject: [PATCH 08/18] OPENR-89: Install python dependencies in workflow --- .github/workflows/enhance.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/enhance.yml b/.github/workflows/enhance.yml index e03ce90d395..d60fb72a88c 100644 --- a/.github/workflows/enhance.yml +++ b/.github/workflows/enhance.yml @@ -20,6 +20,9 @@ jobs: with: python-version: '3.12' + - name: Install dependencies + run: pip install --no-warn-script-location --user -r requirements.txt -c constraints.txt + - name: Enhance topics env: BASE_SHA: ${{ github.event.before }} From 88bf36a23b12d11af4e03e5d8abf503d9e1eefa6 Mon Sep 17 00:00:00 2001 From: Keith Kirkwood Date: Tue, 5 May 2026 11:54:30 +0100 Subject: [PATCH 09/18] OPENR-89: Prompt adjustments --- .gitignore | 1 + scripts/enhance_topics.py | 18 +++++++----------- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/.gitignore b/.gitignore index 652f1b03313..24c42db21a7 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,4 @@ _build/ __pycache__ ros2doc/ .DS_Store +.env diff --git a/scripts/enhance_topics.py b/scripts/enhance_topics.py index 45145e444df..d2f39c6770f 100644 --- a/scripts/enhance_topics.py +++ b/scripts/enhance_topics.py @@ -28,21 +28,17 @@ MIN_WAIT = 10 # Minimum wait time between retries in seconds MAX_WAIT = 120 # Maximum wait time between retries in seconds -# Content type classification prompt -CONTENT_TYPE_PROMPT = """You are a content analyst, and your role is to analyze text content within supplied HTML documents. You can distinguish between three types of content: task, concept, and reference. +KEYWORDS_PROMPT = """You are a content analyst, and your role is to analyze text content within supplied documents. -*Concept topics* -Concept topics explain or define ideas. These topics often include background information that users need to understand before they start working with a specific product. Concept topics help the users understand the product, its purpose and benefits, before using the product. Concept topics do the following: describe a system, product, or a solution, outline a process, introduce tools or features, explain features, components, characteristics, restrictions, or capabilities, define terms in more detail than a simple glossary. +Your role is to extract 3 to 5 keywords from the content for use in metadata. The keywords should be single words that are the most important and relevant words to the content topic. -*Task topics* -Task topics help achieve a specific goal by presenting instructions as 'procedures'. The first paragraph of the topic usually provides an overview and the benefits or importance of the task. A task is usually a numbered list of individual steps that help users achieve the goal. +Finally, generate a comma-separated list of these keywords, in lowercase, with no additional styling, characters, or formatting.""" -*Reference topics* -Reference topics provide quick access to information that users need to perform a task effectively. For example, lists all necessary links. Information in the main body of a reference topic may also be presented in a list or table format, for quick access and easy readability. +DESCRIPTION_PROMPT = """You are a content analyst, and your role is to analyze text content within supplied documents. -When analyzed content is a mixture of different content types, classify based on the majority of content. +Your role is to create a concise description of the content for use in metadata. The description should be a single sentence (of a maximum of 130 characters) that captures the main idea of the content. -Finally, generate a single-word lowercase output which is the recognized content type, with no additional styling, characters, or formatting.""" +Finally, generate this description, with no additional styling, characters, or formatting.""" @retry( retry=retry_if_exception_type((RateLimitError, APIConnectionError)), @@ -228,7 +224,7 @@ def enhance_metadata(files: list[str], client: Optional[OpenAI] = None) -> Enhan return create_enhance_data() # TODO: Make this config-driven, so that we can easily add more prompts and analysis types - prompts: dict[str, str] = {"content-type": CONTENT_TYPE_PROMPT} + prompts: dict[str, str] = {"description": DESCRIPTION_PROMPT, "keywords": KEYWORDS_PROMPT} data = analyze_files(files, client, prompts) # Populate ``EnhanceData.results`` from the model data = update_meta_files(files, data) # Persist results as metadata fields and set ``updated_files`` From 2b8021e363c99e873095896c8dc184244303a315 Mon Sep 17 00:00:00 2001 From: Keith Kirkwood Date: Tue, 5 May 2026 12:17:46 +0100 Subject: [PATCH 10/18] OPENR-89: Only make API calls for fields which do not exist --- scripts/enhance_topics.py | 12 ++++++++++-- scripts/rst_utils.py | 22 ++++++++++++---------- 2 files changed, 22 insertions(+), 12 deletions(-) diff --git a/scripts/enhance_topics.py b/scripts/enhance_topics.py index d2f39c6770f..2c17ce67ee4 100644 --- a/scripts/enhance_topics.py +++ b/scripts/enhance_topics.py @@ -8,8 +8,8 @@ from tenacity import retry, stop_after_attempt, wait_random_exponential, retry_if_exception_type from concurrent.futures import ThreadPoolExecutor -from enhance_data import EnhanceData, create_enhance_data, add_analysis_result, calculate_metrics -from rst_utils import get_results_for_file, inject_metadata_to_content, mark_file_updated +from enhance_data import EnhanceData, add_analysis_result, calculate_metrics, create_enhance_data, get_results_for_file, mark_file_updated +from rst_utils import get_meta_names_from_content, inject_metadata_to_content logger = logging.getLogger(__name__) @@ -144,7 +144,15 @@ def analyze_files(files: list[str], client: OpenAI, prompts: dict[str, str], tim # Check if the content is not empty if content.strip(): + existing_meta_names = get_meta_names_from_content(content) for prompt_name, prompt in prompts.items(): # Iterate through each prompt in the dictionary + if prompt_name in existing_meta_names: + logger.info( + "Skipping analysis for %s: meta field %r already present in .. meta::", + file_path, + prompt_name, + ) + continue logger.debug(f"Running analysis: {prompt_name}") try: # Analyse the content using API with timeout and retry logic diff --git a/scripts/rst_utils.py b/scripts/rst_utils.py index c40d3cd6022..6599f7a97d9 100644 --- a/scripts/rst_utils.py +++ b/scripts/rst_utils.py @@ -3,16 +3,8 @@ """ import logging -import os import re -from enhance_data import ( - EnhanceData, - calculate_metrics, - get_results_for_file, - mark_file_updated, -) - logger = logging.getLogger(__name__) @@ -60,7 +52,7 @@ def _find_meta_block(content: str) -> tuple[int, int, int, str, str]: return start, marker_end, block_end, inner, indent -def _get_existing_meta_names(meta_block_inner: str) -> set[str]: +def _extract_meta_names_from_block(meta_block_inner: str) -> set[str]: """ Collect field names from the body of a ``.. meta::`` directive. @@ -75,6 +67,16 @@ def _get_existing_meta_names(meta_block_inner: str) -> set[str]: return names +def get_meta_names_from_content(content: str) -> set[str]: + """ + Return the set of field names already present in the first ``.. meta::`` block. + + If no ``.. meta::`` directive exists, returns an empty set. + """ + _start, _marker_end, _block_end, inner, _indent = _find_meta_block(content) + return _extract_meta_names_from_block(inner) + + def _normalise_meta_field_value(value: str) -> str: """Collapse whitespace so the meta field body stays a single logical line.""" return " ".join(value.split()) # Docutils treats the field body as one string; keep it one physical line @@ -93,7 +95,7 @@ def inject_metadata_to_content(content: str, metadata: dict[str, str]) -> tuple[ Updated source and whether any change was made. """ start, marker_end, block_end, inner, indent = _find_meta_block(content) - names = _get_existing_meta_names(inner) # Snapshot before we add keys from this same batch + names = _extract_meta_names_from_block(inner) # Snapshot before we add keys from this same batch additions: list[str] = [] for key, raw_value in metadata.items(): From 9badeb9cf194ab05da57bd572f07462f82ab5dc9 Mon Sep 17 00:00:00 2001 From: Keith Kirkwood Date: Tue, 5 May 2026 13:21:50 +0100 Subject: [PATCH 11/18] OPENR-89: Enhance only RST files --- scripts/enhance_data.py | 54 +++++++++++++++++++++------------------ scripts/enhance_topics.py | 40 +++++++++++++++++++++-------- 2 files changed, 59 insertions(+), 35 deletions(-) diff --git a/scripts/enhance_data.py b/scripts/enhance_data.py index e7e7928c6b2..dea61b02600 100644 --- a/scripts/enhance_data.py +++ b/scripts/enhance_data.py @@ -22,18 +22,6 @@ class EnhanceMetrics(NamedTuple): counts_by_analysis: Dict[str, Dict[str, int]] files_with_results_count: int updated_files_count: int - - def get_total_analysis_count(self) -> int: - """ - Calculate the total number of analysis results across all analysis types. - - Note: Files with multiple analysis types contribute multiple counts. - For unique file count, use files_with_results_count instead. - - Returns: - Total count of all analysis results across all analysis types. - """ - return sum(sum(counts.values()) for counts in self.counts_by_analysis.values()) class EnhanceData(NamedTuple): @@ -41,14 +29,30 @@ class EnhanceData(NamedTuple): Immutable data structure representing enhancement results. Attributes: - results: Dictionary mapping filename to analysis results. - Format: {filename: {analysis_type: result_value}} - updated_files: Set of filenames that had metadata successfully updated. + results: Dictionary mapping file paths to analysis results. + Format: {file_path: {analysis_type: result_value}} + updated_files: Set of file paths that had metadata successfully updated. """ results: Dict[str, Dict[str, str]] updated_files: Set[str] +def get_total_analysis_count(metrics: EnhanceMetrics) -> int: + """ + Calculate the total number of analysis results across all analysis types. + + Note: Files with multiple analysis types contribute multiple counts. + For unique file count, use metrics.files_with_results_count instead. + + Args: + metrics: The metrics structure to analyse. + + Returns: + Total count of all analysis results across all analysis types. + """ + return sum(sum(counts.values()) for counts in metrics.counts_by_analysis.values()) + + def create_enhance_data() -> EnhanceData: """ Initialise an empty EnhanceData structure. @@ -67,7 +71,7 @@ def add_analysis_result(data: EnhanceData, filename: str, analysis_type: str, re Args: data: Current enhancement data. - filename: Name of the file. + filename: Path to the file (relative to repository root). analysis_type: Type of analysis (e.g., "content-type"). result: Analysis result value. @@ -89,12 +93,12 @@ def mark_file_updated(data: EnhanceData, filename: str) -> EnhanceData: Args: data: Current enhancement data. - filename: Name of the file that was updated. + filename: Path to the file that was updated (relative to repository root). Returns: New EnhanceData with the file marked as updated. """ - return EnhanceData(results=data.results, updated_files=data.updated_files | {filename}) # Set union adds one basename + return EnhanceData(results=data.results, updated_files=data.updated_files | {filename}) # Set union adds one file path def calculate_metrics(data: EnhanceData) -> EnhanceMetrics: @@ -130,26 +134,26 @@ def calculate_metrics(data: EnhanceData) -> EnhanceMetrics: def get_files_with_results(data: EnhanceData) -> List[str]: """ - Get list of filenames that had analysis results. + Get list of file paths that had analysis results. Args: data: Current enhancement data. Returns: - List of filenames with at least one analysis result. + List of file paths with at least one analysis result. """ return [filename for filename, file_results in data.results.items() if file_results] def get_updated_files(data: EnhanceData) -> List[str]: """ - Get list of filenames that had metadata successfully updated. + Get list of file paths that had metadata successfully updated. Args: data: Current enhancement data. Returns: - List of filenames that were updated with metadata. + List of file paths that were updated with metadata. """ return list(data.updated_files) @@ -160,7 +164,7 @@ def is_file_updated(data: EnhanceData, filename: str) -> bool: Args: data: Current enhancement data. - filename: Name of the file to check. + filename: Path to the file to check (relative to repository root). Returns: True if the file was updated, False otherwise. @@ -190,7 +194,7 @@ def get_result_for_file(data: EnhanceData, filename: str, analysis_type: str) -> Args: data: Current enhancement data. - filename: Name of the file. + filename: Path to the file (relative to repository root). analysis_type: Type of analysis (e.g., "content-type"). Returns: @@ -205,7 +209,7 @@ def get_results_for_file(data: EnhanceData, filename: str) -> Dict[str, str]: Args: data: Current enhancement data. - filename: Name of the file. + filename: Path to the file (relative to repository root). Returns: Dictionary of analysis results for the file, or empty dict if not found. diff --git a/scripts/enhance_topics.py b/scripts/enhance_topics.py index 2c17ce67ee4..6b0cb1c079c 100644 --- a/scripts/enhance_topics.py +++ b/scripts/enhance_topics.py @@ -213,7 +213,7 @@ def get_openai_client() -> OpenAI: def enhance_metadata(files: list[str], client: Optional[OpenAI] = None) -> EnhanceData: """ - Enhance RST files with metadata based on content analysis. + Enhance files with metadata based on content analysis. Args: files (list[str]): Paths to files to enhance. @@ -272,10 +272,10 @@ def update_meta_files(files: list[str], data: EnhanceData) -> EnhanceData: with open(file_path, encoding="utf-8") as file: content = file.read() # Full document; helpers locate or synthesise ``.. meta::`` except (OSError, PermissionError) as exc: - logger.error("Error reading RST file %s: %s", file_path, exc) + logger.error("Error reading file %s: %s", file_path, exc) continue except UnicodeDecodeError as exc: - logger.error("Unicode decode error reading RST file %s: %s", file_path, exc) + logger.error("Unicode decode error reading file %s: %s", file_path, exc) continue new_content, changed = inject_metadata_to_content(content, metadata) @@ -289,10 +289,10 @@ def update_meta_files(files: list[str], data: EnhanceData) -> EnhanceData: with open(file_path, "w", encoding="utf-8") as file: file.write(new_content) # Full-document rewrite (same path as read) except (OSError, PermissionError) as exc: - logger.error("Error writing RST file %s: %s", file_path, exc) + logger.error("Error writing file %s: %s", file_path, exc) continue except UnicodeEncodeError as exc: - logger.error("Unicode encode error while writing RST file %s: %s", file_path, exc) + logger.error("Unicode encode error while writing file %s: %s", file_path, exc) continue current_data = mark_file_updated(current_data, file_path) # Record success for metrics only after a clean write @@ -304,22 +304,42 @@ def update_meta_files(files: list[str], data: EnhanceData) -> EnhanceData: return current_data def main() -> None: + """ + Main entry point for the script. + + - Parses command-line arguments to collect input file paths. + - Filters the provided files to include only reStructuredText (.rst) files. + - Enhances the metadata of each RST file using AI-based analysis (keywords and description). + - Writes updated metadata back to files and logs processing metrics. + + Usage: + python enhance_topics.py ... + + Only files with the .rst extension will be processed. + Logs the number of files successfully enhanced. + """ + logging.basicConfig( level=logging.INFO, format="%(levelname)s %(name)s: %(message)s", ) - # Collect filenames from command line arguments - rst_files = sys.argv[1:] + # Collect filenames from command line arguments and filter for RST files + input_files = sys.argv[1:] + rst_files = [f for f in input_files if f.lower().endswith(RST_EXTENSION)] + if not rst_files: - logger.error("No input files provided. Pass a list of RST files as arguments.") - sys.exit(1) + if input_files: + logger.info("No RST files found among provided arguments. Skipping enhancement.") + else: + logger.error("No input files provided. Pass a list of RST files as arguments.") + sys.exit(0) # Enhance the metadata in the RST files and return the enhancement data with updated files data = enhance_metadata(rst_files) # Log the metrics for the enhancement data metrics = calculate_metrics(data) - logger.info(f"Enhanced {metrics.updated_files_count} RST files metadata out of {len(rst_files)} files with results.") + logger.info(f"Enhanced {metrics.updated_files_count} RST files metadata out of {len(rst_files)} files processed.") if __name__ == "__main__": main() From 0ededa3c3c81f622f857eabeb1e1343466a237a1 Mon Sep 17 00:00:00 2001 From: Keith Kirkwood Date: Tue, 5 May 2026 15:16:34 +0100 Subject: [PATCH 12/18] OPENR-89: Small tweaks to logging and constants --- scripts/enhance_topics.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/enhance_topics.py b/scripts/enhance_topics.py index 6b0cb1c079c..6c13dd29eb9 100644 --- a/scripts/enhance_topics.py +++ b/scripts/enhance_topics.py @@ -23,7 +23,7 @@ # - Individual API calls timeout after DEFAULT_TIMEOUT seconds # - On rate limits/connection errors, retry up to MAX_RETRIES times # - Wait between retries, increasing exponentially: MIN_WAIT → MAX_WAIT (capped) -DEFAULT_TIMEOUT = 60 # Default timeout in seconds for an individual API call +DEFAULT_TIMEOUT = 30 # Default timeout in seconds for an individual API call MAX_RETRIES = 10 # Maximum number of retry attempts for exponential backoff MIN_WAIT = 10 # Minimum wait time between retries in seconds MAX_WAIT = 120 # Maximum wait time between retries in seconds @@ -300,7 +300,7 @@ def update_meta_files(files: list[str], data: EnhanceData) -> EnhanceData: logger.debug("-" * 50) metrics = calculate_metrics(current_data) # ``updated_files_count`` reflects files we rewrote - logger.info("Enhanced %s files' metadata out of %s files processed.", metrics.updated_files_count, len(files)) + logger.info("Updated metadata in %s files out of %s files processed.", metrics.updated_files_count, len(files)) return current_data def main() -> None: @@ -317,7 +317,7 @@ def main() -> None: Only files with the .rst extension will be processed. Logs the number of files successfully enhanced. - """ + """b logging.basicConfig( level=logging.INFO, @@ -339,7 +339,7 @@ def main() -> None: data = enhance_metadata(rst_files) # Log the metrics for the enhancement data metrics = calculate_metrics(data) - logger.info(f"Enhanced {metrics.updated_files_count} RST files metadata out of {len(rst_files)} files processed.") + logger.info(f"Enhanced files: {metrics.files_with_results_count} with analysis results, and {metrics.updated_files_count} files updated, out of {len(rst_files)} RST files.") if __name__ == "__main__": main() From e6389f8c664a985c1a7958dfe8cc470c9502f964 Mon Sep 17 00:00:00 2001 From: Keith Kirkwood Date: Wed, 6 May 2026 18:28:52 +0100 Subject: [PATCH 13/18] OPENR-89: Adding some sanity check validation for generated values - moderation API and basic language check --- scripts/enhance_topics.py | 139 +++++++++++++++++++++++++++++++++++--- 1 file changed, 129 insertions(+), 10 deletions(-) diff --git a/scripts/enhance_topics.py b/scripts/enhance_topics.py index 6c13dd29eb9..0a3b5745672 100644 --- a/scripts/enhance_topics.py +++ b/scripts/enhance_topics.py @@ -1,4 +1,5 @@ import logging +import re import sys import os from typing import Optional @@ -15,7 +16,7 @@ # Define constants GPT_MODEL = "gpt-5.4-nano" # GPT model to use for the API calls -# Maximum content length in characters for topic analysis , approximately 300k tokens (leaving 100k for instructions/output) +# Maximum content length in characters, approximately 300k tokens (leaving 100k for instructions/output) MAX_CONTENT_LENGTH = 1200000 RST_EXTENSION = '.rst' # File extension for RST files @@ -40,6 +41,10 @@ Finally, generate this description, with no additional styling, characters, or formatting.""" +ENGLISH_LANGUAGE_CHECK_PROMPT = """You are a validation assistant, and your role is to determine whether the following text is written entirely in English. Common technical terms, acronyms, and internationally recognised proper nouns are acceptable if they are normally used in English technical documentation. + +Answer ONLY with the single word yes or no in lowercase, with no punctuation, explanation, or additional text.""" + @retry( retry=retry_if_exception_type((RateLimitError, APIConnectionError)), stop=stop_after_attempt(MAX_RETRIES), @@ -109,6 +114,113 @@ def _make_api_call() -> str: logger.error(f"API call timed out after {timeout} seconds") raise # Re-raise the original timeout error +@retry( + retry=retry_if_exception_type((RateLimitError, APIConnectionError)), + stop=stop_after_attempt(MAX_RETRIES), + wait=wait_random_exponential(multiplier=MIN_WAIT, max=MAX_WAIT), + reraise=True +) +def validate_content(client: OpenAI, generated: str, timeout: int = DEFAULT_TIMEOUT) -> bool: + """ + Validate generated content using the moderation API and a separate English-language check. + + Intended for any model-generated text before it is persisted (metadata today; other content later). + Uses ThreadPoolExecutor for cross-platform timeout handling and retries for transient API errors. + + Args: + client (OpenAI): OpenAI client instance. + generated (str): Model-generated text to validate. + timeout (int): Maximum time to wait for the combined validation calls in seconds. + + Returns: + bool: True if content passes moderation and the language check; False otherwise. + + Raises: + TimeoutError: If the validation calls exceed the specified timeout. + RateLimitError: If API rate limits are exceeded (will trigger retry). + APIConnectionError: If connection fails (will trigger retry). + """ + if not generated.strip(): + logger.debug("Validation skipped: empty generated content") + return False + + text = generated + if len(text) > MAX_CONTENT_LENGTH: + logger.warning( + "Generated text truncated to %s characters for validation.", + MAX_CONTENT_LENGTH, + ) + text = text[:MAX_CONTENT_LENGTH] + + def _run_validation() -> bool: + """ + Run moderation and English checks sequentially. + + Returns: + bool: True if both checks pass. + + Raises: + RateLimitError, APIConnectionError: Propagated for retry handling. + """ + try: + logger.debug("Sending generated text to moderation API...") + moderation = client.moderations.create(input=text) + except (RateLimitError, APIConnectionError) as e: + logger.warning("Retryable error during moderation: %s", e) + raise + + if not moderation.results: + logger.warning("Moderation API returned no results; treating as validation failure") + return False + + result0 = moderation.results[0] + if result0.flagged: + categories = [ + name + for name, flagged in result0.categories.model_dump().items() + if flagged + ] + logger.warning( + "Content failed moderation (flagged). Categories: %s", + ", ".join(categories) if categories else "unknown", + ) + return False + + try: + logger.debug("Sending generated text for English-language validation...") + completion = client.chat.completions.create( + model=GPT_MODEL, + messages=[ + {"role": "system", "content": ENGLISH_LANGUAGE_CHECK_PROMPT}, + {"role": "user", "content": f"Text:\n\n{text}"}, + ], + ) + except (RateLimitError, APIConnectionError) as e: + logger.warning("Retryable error during language validation: %s", e) + raise + + answer = completion.choices[0].message.content + raw = (answer or "").strip().lower() + # Accept a single leading yes/no token even if the model adds stray whitespace + match = re.match(r"^(yes|no)\b", raw) + if not match or match.group(1) != "yes": + logger.warning( + "Content failed English-language validation (model answer: %r)", + answer, + ) + return False + + logger.debug("Generated content passed moderation and English-language validation") + return True + + with ThreadPoolExecutor() as executor: + try: + future = executor.submit(_run_validation) + return future.result(timeout=timeout) + except TimeoutError: + logger.error("Validation timed out after %s seconds", timeout) + raise + def analyze_files(files: list[str], client: OpenAI, prompts: dict[str, str], timeout: int = DEFAULT_TIMEOUT) -> EnhanceData: """ Process a list of files and analyse their content using each of the passed prompts. @@ -144,10 +256,11 @@ def analyze_files(files: list[str], client: OpenAI, prompts: dict[str, str], tim # Check if the content is not empty if content.strip(): + # Check if the content has any meta fields already existing_meta_names = get_meta_names_from_content(content) for prompt_name, prompt in prompts.items(): # Iterate through each prompt in the dictionary if prompt_name in existing_meta_names: - logger.info( + logger.warning( "Skipping analysis for %s: meta field %r already present in .. meta::", file_path, prompt_name, @@ -163,8 +276,15 @@ def analyze_files(files: list[str], client: OpenAI, prompts: dict[str, str], tim timeout=timeout ) if result: - # Add the analysis result to the data structure - data = add_analysis_result(data, file_path, prompt_name, result) + if validate_content(client, result, timeout=timeout): + # Add the analysis result to the data structure + data = add_analysis_result(data, file_path, prompt_name, result) + else: + logger.warning( + "Validation failed for generated %s in %s; result not stored", + prompt_name, + file_path, + ) else: logger.warning(f"No result for {file_path} with prompt name: {prompt_name}") @@ -183,8 +303,6 @@ def analyze_files(files: list[str], client: OpenAI, prompts: dict[str, str], tim else: logger.info(f"No analysable content found for {file_path}") - metrics = calculate_metrics(data) - logger.info(f"Analysed {metrics.files_with_results_count} out of {len(files)} files with the configured prompts.") return data @@ -234,7 +352,7 @@ def enhance_metadata(files: list[str], client: Optional[OpenAI] = None) -> Enhan # TODO: Make this config-driven, so that we can easily add more prompts and analysis types prompts: dict[str, str] = {"description": DESCRIPTION_PROMPT, "keywords": KEYWORDS_PROMPT} - data = analyze_files(files, client, prompts) # Populate ``EnhanceData.results`` from the model + data = analyze_files(files, client, prompts) # Populate and validate ``EnhanceData.results`` from the model data = update_meta_files(files, data) # Persist results as metadata fields and set ``updated_files`` return data @@ -299,7 +417,8 @@ def update_meta_files(files: list[str], data: EnhanceData) -> EnhanceData: logger.debug("Updated file with supplied metadata: %s", file_path) logger.debug("-" * 50) - metrics = calculate_metrics(current_data) # ``updated_files_count`` reflects files we rewrote + # ``files_with_results_count`` reflects files with at least one valid analysis result, and ``updated_files_count`` reflects files we rewrote + metrics = calculate_metrics(current_data) logger.info("Updated metadata in %s files out of %s files processed.", metrics.updated_files_count, len(files)) return current_data @@ -317,7 +436,7 @@ def main() -> None: Only files with the .rst extension will be processed. Logs the number of files successfully enhanced. - """b + """ logging.basicConfig( level=logging.INFO, @@ -339,7 +458,7 @@ def main() -> None: data = enhance_metadata(rst_files) # Log the metrics for the enhancement data metrics = calculate_metrics(data) - logger.info(f"Enhanced files: {metrics.files_with_results_count} with analysis results, and {metrics.updated_files_count} files updated, out of {len(rst_files)} RST files.") + logger.info(f"Enhanced files: {metrics.files_with_results_count} with at least one valid analysis result, and {metrics.updated_files_count} files updated, out of {len(rst_files)} RST files.") if __name__ == "__main__": main() From b0513cde510edc837d3a5b4f1e4413e32221bdb3 Mon Sep 17 00:00:00 2001 From: Keith Kirkwood Date: Wed, 6 May 2026 18:29:31 +0100 Subject: [PATCH 14/18] OPENR-89: Add some unit tests for central enhance topics module --- scripts/test/__init__.py | 0 scripts/test/test_enhance_topics.py | 191 ++++++++++++++++++ .../test/test_enhance_topics_validation.py | 61 ++++++ 3 files changed, 252 insertions(+) create mode 100644 scripts/test/__init__.py create mode 100644 scripts/test/test_enhance_topics.py create mode 100644 scripts/test/test_enhance_topics_validation.py diff --git a/scripts/test/__init__.py b/scripts/test/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/scripts/test/test_enhance_topics.py b/scripts/test/test_enhance_topics.py new file mode 100644 index 00000000000..4b8bdbd64aa --- /dev/null +++ b/scripts/test/test_enhance_topics.py @@ -0,0 +1,191 @@ +import pytest +from unittest.mock import MagicMock, patch, mock_open +import sys +import os +from openai import OpenAIError + +# Add the scripts directory to sys.path to allow importing enhance_topics +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from enhance_topics import ( + analyze_content, + get_openai_client, + analyze_files, + update_meta_files, + enhance_metadata, + MAX_CONTENT_LENGTH +) +from enhance_data import EnhanceData + +@pytest.fixture +def mock_client(): + """Provides a mocked OpenAI client.""" + return MagicMock() + +# --- Tests for analyze_content --- + +def test_analyze_content_success(mock_client): + """Test successful content analysis.""" + mock_completion = MagicMock() + mock_completion.choices = [MagicMock(message=MagicMock(content='Analysis result'))] + mock_client.chat.completions.create.return_value = mock_completion + + result = analyze_content(mock_client, "Some content", "Some prompt") + assert result == 'Analysis result' + mock_client.chat.completions.create.assert_called_once() + +def test_analyze_content_truncation(mock_client): + """Test that content is truncated if it exceeds MAX_CONTENT_LENGTH.""" + long_content = "a" * (MAX_CONTENT_LENGTH + 100) + mock_completion = MagicMock() + mock_completion.choices = [MagicMock(message=MagicMock(content='Result'))] + mock_client.chat.completions.create.return_value = mock_completion + + analyze_content(mock_client, long_content, "Prompt") + + # Check the call arguments to ensure content was truncated + args, kwargs = mock_client.chat.completions.create.call_args + sent_content = kwargs['messages'][1]['content'] + assert len(sent_content) <= MAX_CONTENT_LENGTH + len("Content:\n\n") + +def test_analyze_content_empty_response(mock_client): + """Test handling of empty response from API.""" + mock_completion = MagicMock() + mock_completion.choices = [MagicMock(message=MagicMock(content=None))] + mock_client.chat.completions.create.return_value = mock_completion + + result = analyze_content(mock_client, "Content", "Prompt") + assert result == "" + +# --- Tests for get_openai_client --- + +@patch('enhance_topics.load_dotenv') +@patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}) +def test_get_openai_client_success(mock_load_dotenv): + """Test successful client initialisation.""" + client = get_openai_client() + assert client.api_key == "test-key" + +@patch('enhance_topics.load_dotenv') +@patch.dict(os.environ, {}, clear=True) +def test_get_openai_client_missing_key(mock_load_dotenv): + """Test error when API key is missing.""" + with pytest.raises(OpenAIError, match="OpenAI API key not found"): + get_openai_client() + +# --- Tests for analyze_files --- + +@patch('enhance_topics.get_meta_names_from_content') +@patch('enhance_topics.analyze_content') +@patch('enhance_topics.validate_content') +@patch('enhance_topics.add_analysis_result') +@patch('enhance_topics.create_enhance_data') +def test_analyze_files_basic_flow( + mock_create_data, + mock_add_result, + mock_validate, + mock_analyze, + mock_get_meta, + mock_client +): + """Test the basic flow of analyze_files.""" + mock_create_data.return_value = EnhanceData(results={}, updated_files=set()) + mock_get_meta.return_value = [] # No existing metadata + mock_analyze.return_value = "Generated result" + mock_validate.return_value = True + mock_add_result.return_value = EnhanceData( + results={"file1.rst": {"description": "res"}}, + updated_files=set() + ) + + files = ["file1.rst"] + prompts = {"description": "desc prompt"} + + with patch("builtins.open", mock_open(read_data="File content")): + analyze_files(files, mock_client, prompts) + + mock_analyze.assert_called_once() + mock_validate.assert_called_once() + mock_add_result.assert_called_once() + +@patch('enhance_topics.get_meta_names_from_content') +def test_analyze_files_skips_existing_meta(mock_get_meta, mock_client): + """Test that files with existing metadata are skipped.""" + mock_get_meta.return_value = ["description"] # Description already exists + + files = ["file1.rst"] + prompts = {"description": "desc prompt"} + + with patch("builtins.open", mock_open(read_data="File content")): + with patch('enhance_topics.analyze_content') as mock_analyze: + analyze_files(files, mock_client, prompts) + mock_analyze.assert_not_called() + +# --- Tests for update_meta_files --- + +@patch('enhance_topics.get_results_for_file') +@patch('enhance_topics.inject_metadata_to_content') +@patch('enhance_topics.mark_file_updated') +def test_update_meta_files_writes_on_change( + mock_mark_updated, + mock_inject, + mock_get_results, + mock_client +): + """Test that files are written only when metadata changes.""" + mock_get_results.return_value = {"description": "new desc"} + mock_inject.return_value = ("New content", True) # Changed is True + mock_mark_updated.return_value = EnhanceData( + results={}, + updated_files={"file1.rst"} + ) + + data = EnhanceData( + results={"file1.rst": {"description": "new desc"}}, + updated_files=set() + ) + + m_open = mock_open(read_data="Old content") + with patch("builtins.open", m_open): + update_meta_files(["file1.rst"], data) + + # Verify write was called + m_open().write.assert_called_once_with("New content") + mock_mark_updated.assert_called_once() + +@patch('enhance_topics.get_results_for_file') +@patch('enhance_topics.inject_metadata_to_content') +def test_update_meta_files_skips_no_change(mock_inject, mock_get_results): + """Test that files are NOT written when no metadata changes.""" + mock_get_results.return_value = {"description": "same desc"} + mock_inject.return_value = ("Old content", False) # Changed is False + + data = EnhanceData( + results={"file1.rst": {"description": "same desc"}}, + updated_files=set() + ) + + m_open = mock_open(read_data="Old content") + with patch("builtins.open", m_open): + update_meta_files(["file1.rst"], data) + + # Verify write was NOT called + m_open().write.assert_not_called() + +# --- Tests for enhance_metadata --- + +@patch('enhance_topics.get_openai_client') +@patch('enhance_topics.analyze_files') +@patch('enhance_topics.update_meta_files') +def test_enhance_metadata_orchestration(mock_update, mock_analyze, mock_get_client): + """Test the orchestration in enhance_metadata.""" + mock_get_client.return_value = MagicMock() + mock_analyze.return_value = EnhanceData(results={"f": {"d": "r"}}, updated_files=set()) + mock_update.return_value = EnhanceData(results={"f": {"d": "r"}}, updated_files={"f"}) + + result = enhance_metadata(["file1.rst"]) + + assert result.updated_files == {"f"} + mock_get_client.assert_called_once() + mock_analyze.assert_called_once() + mock_update.assert_called_once() diff --git a/scripts/test/test_enhance_topics_validation.py b/scripts/test/test_enhance_topics_validation.py new file mode 100644 index 00000000000..54309bbf340 --- /dev/null +++ b/scripts/test/test_enhance_topics_validation.py @@ -0,0 +1,61 @@ +import pytest +from unittest.mock import MagicMock +import sys +import os + +# Add the scripts directory to sys.path to allow importing enhance_topics +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from enhance_topics import validate_content + +@pytest.fixture +def mock_client(): + """Provides a mocked OpenAI client.""" + return MagicMock() + +def test_validate_content_success(mock_client): + """Test that valid English content passes both moderation and language checks.""" + # Mock Moderation: Not flagged + mock_result = MagicMock() + mock_result.flagged = False + mock_client.moderations.create.return_value.results = [mock_result] + + # Mock Chat: Returns 'yes' + mock_completion = MagicMock() + mock_completion.choices = [MagicMock(message=MagicMock(content='yes'))] + mock_client.chat.completions.create.return_value = mock_completion + + assert validate_content(mock_client, "This is a valid English sentence.") is True + +def test_validate_content_moderation_fail(mock_client): + """Test that content flagged by moderation returns False.""" + # Mock Moderation: Flagged + mock_result = MagicMock() + mock_result.flagged = True + # Mock categories.model_dump() for the logger + mock_result.categories.model_dump.return_value = {"hate": True, "violence": False} + mock_client.moderations.create.return_value.results = [mock_result] + + assert validate_content(mock_client, "Some offensive content.") is False + # Verify chat.completions was NOT called (short-circuit) + mock_client.chat.completions.create.assert_not_called() + +def test_validate_content_language_fail(mock_client): + """Test that non-English content (as determined by the LLM) returns False.""" + # Mock Moderation: Not flagged + mock_result = MagicMock() + mock_result.flagged = False + mock_client.moderations.create.return_value.results = [mock_result] + + # Mock Chat: Returns 'no' + mock_completion = MagicMock() + mock_completion.choices = [MagicMock(message=MagicMock(content='no'))] + mock_client.chat.completions.create.return_value = mock_completion + + assert validate_content(mock_client, "Ceci n'est pas anglais.") is False + +def test_validate_content_empty_input(mock_client): + """Test that empty or whitespace-only input returns False immediately.""" + assert validate_content(mock_client, "") is False + assert validate_content(mock_client, " ") is False + mock_client.moderations.create.assert_not_called() From f506799b17d0e44b7e5fe186fd0c37917dcac260 Mon Sep 17 00:00:00 2001 From: GeorgeL Date: Mon, 18 May 2026 16:28:20 +0100 Subject: [PATCH 15/18] add pagefind from prototype --- .github/workflows/test.yml | 16 + Makefile | 6 + conf.py | 29 +- plugins/meta_util.py | 70 ++++ plugins/pagefind_meta.py | 222 +++++++++++++ plugins/showmeta.py | 120 +++++++ requirements.txt | 4 + source/About-ROS.rst | 14 +- source/_static/pagefind-docsearch.css | 219 ++++++++++++ source/_templates/layout.html | 9 + source/_templates/searchbox.html | 462 ++++++++++++++++++++++++++ source/search_results.rst | 21 ++ 12 files changed, 1190 insertions(+), 2 deletions(-) create mode 100644 plugins/meta_util.py create mode 100644 plugins/pagefind_meta.py create mode 100644 plugins/showmeta.py create mode 100644 source/_static/pagefind-docsearch.css create mode 100644 source/_templates/layout.html create mode 100644 source/_templates/searchbox.html create mode 100644 source/search_results.rst diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 5cea1c262d8..5a890815fcd 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -77,6 +77,14 @@ jobs: - name: Build the docs run: make html + + - name: Setup Node.js (Pagefind) + uses: actions/setup-node@v4 + with: + node-version: '20' + + - name: Index HTML with Pagefind + run: make pagefind - name: Upload document artifacts uses: actions/upload-artifact@v4 @@ -147,3 +155,11 @@ jobs: - name: Build the docs run: make multiversion + + - name: Setup Node.js (Pagefind) + uses: actions/setup-node@v4 + with: + node-version: '20' + + - name: Index HTML with Pagefind + run: make pagefind diff --git a/Makefile b/Makefile index f411c155a1e..f5a18e791ba 100644 --- a/Makefile +++ b/Makefile @@ -20,6 +20,12 @@ multiversion: Makefile sphinx-multiversion $(OPTS) "$(SOURCE)" build/html @echo "" > build/html/index.html $(PYTHON) make_sitemapindex.py + +# Pagefind static search index (requires Node.js / npx). Run after html or multiversion. +PAGEFIND_VERSION ?= 1.5.2 +pagefind: + npx -y pagefind@$(PAGEFIND_VERSION) --site "$(OUT)/html" + %: Makefile @$(BUILD) -M $@ "$(SOURCE)" "$(OUT)" $(OPTS) diff --git a/conf.py b/conf.py index 2a9def973fd..2955a31393a 100644 --- a/conf.py +++ b/conf.py @@ -89,8 +89,34 @@ 'sphinx_adopters', 'sphinxcontrib.googleanalytics', 'sphinxcontrib.mermaid', + 'pagefind_meta', + 'showmeta', ] +# Pagefind mergeIndex: optional per-package API doc bundles. +# Enable only when upstream sites publish Pagefind at .../en/{distro}/p/{pkg}/pagefind +pagefind_merge_enabled = False +pagefind_merge_package_pkgs = [] +pagefind_merge_index_base = 'https://docs.ros.org' +pagefind_merge_index_overrides = {} +pagefind_merge_filter_per_pkg = None +pagefind_merge_index_weight_per_pkg = None + +# Optional display labels for Pagefind filter UI (key → label). Unlisted keys use title-case. +pagefind_filter_labels = { + 'contentType': 'Content Type', +} + +pagefind_result_meta_order = [ + 'product', + 'distro', + 'area', + 'capability', + 'contentType', + 'experience', +] + + # Intersphinx mapping intersphinx_mapping = { @@ -168,6 +194,7 @@ 'DISTRO_TITLE': 'Rolling', 'DISTRO_TITLE_FULL': 'Rolling Ridley', 'REPOS_FILE_BRANCH': 'rolling', + 'PRODUCT': 'ROS 2', } html_favicon = 'favicon.ico' @@ -181,7 +208,7 @@ html_sourcelink_suffix = '' # Relative to html_static_path -html_css_files = ['custom.css', 'adopters.css'] +html_css_files = ['custom.css', 'adopters.css', 'pagefind-docsearch.css'] html_js_files = ['adopters.js'] # -- Options for HTMLHelp output ------------------------------------------ diff --git a/plugins/meta_util.py b/plugins/meta_util.py new file mode 100644 index 00000000000..32aef4d2f3b --- /dev/null +++ b/plugins/meta_util.py @@ -0,0 +1,70 @@ +# Copyright 2026 Open Robotics — shared helpers for ``.. meta::`` / Pagefind +""" +Collect every ``.. meta::`` field from the doctree, sanitize keys, and expand +``{MACRO}`` placeholders using the Sphinx ``macros`` config (longest keys first). + +Sphinx / the HTML theme may also emit plain ```` tags for the same fields. +The Pagefind extension emits additional tags with ``data-pagefind-filter`` and may +split comma-separated values into multiple tags for faceted search. +""" + +from __future__ import annotations + +import re +from typing import Dict, List, Optional + +from docutils import nodes + +# HTML ```` names should be conservative; allow common patterns. +_META_NAME_RE = re.compile(r'^[A-Za-z0-9_.:-]+$') + + +def sanitize_meta_key(raw: str) -> Optional[str]: + s = str(raw).strip() + if not s or not _META_NAME_RE.match(s): + return None + return s + + +def all_doctree_meta(doctree: Optional[nodes.document]) -> Dict[str, str]: + """Return last-wins mapping of every ``nodes.meta`` ``name``/``property`` → ``content``.""" + if doctree is None: + return {} + + out: Dict[str, str] = {} + for meta in doctree.findall(nodes.meta): + if meta.get('http-equiv'): + continue + content = meta.get('content') + if not content: + continue + key: Optional[str] = None + name = meta.get('name') + if name: + key = sanitize_meta_key(str(name)) + else: + prop = meta.get('property') + if prop: + key = sanitize_meta_key(str(prop)) + if not key: + continue + out[key] = str(content).strip() + return out + + +def expand_meta_macros(text: str, macros: Dict[str, str]) -> str: + """Expand ``{KEY}`` placeholders; longer macro names first to avoid partial matches.""" + result = text + for key, value in sorted(macros.items(), key=lambda kv: len(kv[0]), reverse=True): + result = result.replace(f'{{{key}}}', value) + return result + + +def expand_all_meta_values(meta: Dict[str, str], macros: Dict[str, str]) -> Dict[str, str]: + """Apply ``expand_meta_macros`` to every meta value.""" + return {k: expand_meta_macros(v, macros) for k, v in meta.items()} + + +def split_meta_values(value: str) -> List[str]: + """Return comma-separated metadata values as individual Pagefind values.""" + return [part.strip() for part in value.split(',') if part.strip()] diff --git a/plugins/pagefind_meta.py b/plugins/pagefind_meta.py new file mode 100644 index 00000000000..690f6363d6b --- /dev/null +++ b/plugins/pagefind_meta.py @@ -0,0 +1,222 @@ +# Copyright 2026 Open Robotics — Pagefind metadata for ROS 2 documentation +""" +Emit SEO tags, Pagefind ``data-pagefind-meta``, and ``data-pagefind-filter`` +from every ``.. meta::`` field on the page (passthrough, no whitelist). + +Sphinx / the HTML theme typically also emits plain ```` tags for the same +``.. meta::`` fields. We intentionally emit an additional block with +``data-pagefind-filter`` (and split comma-separated values) so Pagefind faceting +works; crawlers may see duplicate name/content pairs for non-split fields. +""" + +from __future__ import annotations + +import html +import re +from typing import Any, Dict, List, Optional, Tuple + +from docutils import nodes + +from meta_util import all_doctree_meta, expand_all_meta_values, split_meta_values + + +def _macros_flat(app) -> Dict[str, str]: + macros = getattr(app.config, 'macros', {}) or {} + return {str(k): str(v) for k, v in macros.items()} + + +def _resolved_page_meta(app, doctree: Optional[nodes.document]) -> Dict[str, str]: + raw = all_doctree_meta(doctree) + return expand_all_meta_values(raw, _macros_flat(app)) + + +def _default_filter_label(key: str) -> str: + spaced = re.sub(r'([a-z])([A-Z])', r'\1 \2', key) + return spaced.replace('_', ' ').replace('-', ' ').strip().title() + + +def _metadata_fields_for_keys(app, sorted_keys: List[str]) -> List[List[str]]: + labels = getattr(app.config, 'pagefind_filter_labels', None) or {} + out: List[List[str]] = [] + for k in sorted_keys: + if isinstance(labels, dict) and labels.get(k): + lbl = str(labels[k]) + else: + lbl = _default_filter_label(k) + out.append([k, lbl]) + return out + + +def _pagefind_data_meta_attr(values: Dict[str, str]) -> str: + """Single data-pagefind-meta attribute value with repeated keys for multi-values.""" + parts: List[str] = [] + for key in sorted(values.keys()): + for value in split_meta_values(values.get(key, '')): + parts.append(f'{key}:{value}') + inner = ', '.join(parts) + return html.escape(inner, quote=True) + + +def _seo_and_filter_metas(values: Dict[str, str]) -> str: + """One per value: SEO name/content + data-pagefind-filter (Pagefind filtering docs).""" + lines: List[str] = [] + for key in sorted(values.keys()): + esc_name = html.escape(key, quote=True) + for value in split_meta_values(values.get(key, '')): + esc_val = html.escape(value, quote=True) + lines.append( + f'' + ) + return '\n '.join(lines) + + +def _ensure_meta_keys_store(env) -> Dict[str, Any]: + if not hasattr(env, 'pagefind_meta_keys_by_doc'): + env.pagefind_meta_keys_by_doc = {} + return env.pagefind_meta_keys_by_doc + + +def _collect_meta_keys(app, doctree: nodes.document, docname: str) -> None: + if app.builder.format != 'html': + return + raw = all_doctree_meta(doctree) + store = _ensure_meta_keys_store(app.env) + store[docname] = set(raw.keys()) + + +def _purge_meta_keys(app, env, docname: str) -> None: + if hasattr(env, 'pagefind_meta_keys_by_doc') and docname in env.pagefind_meta_keys_by_doc: + del env.pagefind_meta_keys_by_doc[docname] + + +def _merge_meta_keys(app, env, docnames, other) -> None: + """Merge per-document meta key sets from a parallel read worker environment.""" + if not hasattr(other, 'pagefind_meta_keys_by_doc'): + return + store = _ensure_meta_keys_store(env) + for docname, keys in other.pagefind_meta_keys_by_doc.items(): + store[docname] = set(keys) + + +def _union_meta_keys(env) -> List[str]: + if not hasattr(env, 'pagefind_meta_keys_by_doc'): + return [] + union: set[str] = set() + for keys in env.pagefind_meta_keys_by_doc.values(): + union |= set(keys) + return sorted(union) + + +def _pagefind_bundle_prefix(pagename: str) -> str: + """Relative URL prefix from current HTML page to the site root ``pagefind/`` directory. + + Must start with ``./`` or ``../`` so the browser resolves dynamic imports (e.g. + ``import(bundlePath + 'pagefind.js')``) as URLs, not bare module specifiers. + """ + depth = pagename.count('/') + if depth == 0: + return './pagefind/' + return ('../' * depth) + 'pagefind/' + + +def _pagefind_component_urls(pagename: str) -> Tuple[str, str]: + """(css_href, js_href) relative to current page.""" + prefix = _pagefind_bundle_prefix(pagename) + return prefix + 'pagefind-component-ui.css', prefix + 'pagefind-component-ui.js' + + +def _merge_index_entries(app, distro: str) -> List[Dict[str, Any]]: + """Build mergeIndex list from conf (pinned docs.ros.org template).""" + pkgs: List[str] = list(getattr(app.config, 'pagefind_merge_package_pkgs', []) or []) + if not pkgs or not getattr(app.config, 'pagefind_merge_enabled', False): + return [] + base = getattr(app.config, 'pagefind_merge_index_base', 'https://docs.ros.org').rstrip('/') + overrides = getattr(app.config, 'pagefind_merge_index_overrides', {}) or {} + out: List[Dict[str, Any]] = [] + for pkg in pkgs: + key = f'{distro}/{pkg}' + if key in overrides: + bundle = overrides[key] + else: + bundle = f'{base}/en/{distro}/p/{pkg}/pagefind' + entry: Dict[str, Any] = {'bundlePath': bundle} + mf = getattr(app.config, 'pagefind_merge_filter_per_pkg', None) + if isinstance(mf, dict) and pkg in mf: + entry['mergeFilter'] = mf[pkg] + iw = getattr(app.config, 'pagefind_merge_index_weight_per_pkg', None) + if isinstance(iw, dict) and pkg in iw: + entry['indexWeight'] = iw[pkg] + out.append(entry) + return out + + +def _html_page_context( + app, + pagename: str, + templatename: str, + context: Dict[str, Any], + doctree, +) -> None: + sorted_keys = _union_meta_keys(app.env) + metadata_fields = _metadata_fields_for_keys(app, sorted_keys) + filter_csv = ','.join(sorted_keys) + + empty = { + 'pagefind_seo_filter_metas': '', + 'pagefind_data_meta_attr': '', + 'pagefind_bundle_prefix': './pagefind/', + 'pagefind_component_css': './pagefind/pagefind-component-ui.css', + 'pagefind_component_js': './pagefind/pagefind-component-ui.js', + 'pagefind_merge_index': [], + 'pagefind_filter_keys_csv': filter_csv, + 'pagefind_metadata_fields': metadata_fields, + 'pagefind_result_meta_order': list( + getattr(app.config, 'pagefind_result_meta_order', []) or [] + ), + } + context.update(empty) + + if app.builder.format != 'html' or templatename is None: + return + if not templatename.endswith('.html'): + return + + default_distro = (getattr(app.config, 'macros', {}) or {}).get('DISTRO', 'rolling') + values = _resolved_page_meta(app, doctree) + + seo_filters = _seo_and_filter_metas(values) + data_attr = _pagefind_data_meta_attr(values) + css_href, js_href = _pagefind_component_urls(pagename) + bundle_prefix = _pagefind_bundle_prefix(pagename) + + merge_distro = values.get('distro') or str(default_distro) + merge = _merge_index_entries(app, merge_distro) + context['pagefind_seo_filter_metas'] = seo_filters + context['pagefind_data_meta_attr'] = data_attr + context['pagefind_bundle_prefix'] = bundle_prefix + context['pagefind_component_css'] = css_href + context['pagefind_component_js'] = js_href + context['pagefind_merge_index'] = merge + + +def setup(app) -> Dict[str, Any]: + app.add_config_value('pagefind_merge_enabled', default=False, rebuild='html') + app.add_config_value('pagefind_merge_package_pkgs', default=[], rebuild='html') + app.add_config_value('pagefind_merge_index_base', default='https://docs.ros.org', rebuild='html') + app.add_config_value('pagefind_merge_index_overrides', default={}, rebuild='html') + app.add_config_value('pagefind_merge_filter_per_pkg', default=None, rebuild='html') + app.add_config_value('pagefind_merge_index_weight_per_pkg', default=None, rebuild='html') + app.add_config_value('pagefind_filter_labels', default={}, rebuild='html') + app.add_config_value('pagefind_result_meta_order', default=[], rebuild='html') + + app.connect('html-page-context', _html_page_context) + app.connect('doctree-resolved', _collect_meta_keys) + app.connect('env-purge-doc', _purge_meta_keys) + app.connect('env-merge-info', _merge_meta_keys) + + return { + 'parallel_read_safe': True, + 'parallel_write_safe': True, + 'version': '1.0.0', + } diff --git a/plugins/showmeta.py b/plugins/showmeta.py new file mode 100644 index 00000000000..f11b140429c --- /dev/null +++ b/plugins/showmeta.py @@ -0,0 +1,120 @@ +# Copyright 2026 Open Robotics — explicit in-body ``.. showmeta::`` summary +""" +Render selected ``.. meta::`` fields in the document body with author-controlled +order and labels. Place ``.. showmeta::`` where the summary should appear (HTML only). +""" + +from __future__ import annotations + +import html as html_module +import re +from typing import List + +from docutils import nodes +from docutils.parsers.rst import directives +from sphinx.util.docutils import SphinxDirective + +from meta_util import all_doctree_meta, expand_all_meta_values + + +def _macros_flat(app) -> dict[str, str]: + return {str(k): str(v) for k, v in (getattr(app.config, 'macros', {}) or {}).items()} + + +def _default_showmeta_label(key: str) -> str: + spaced = re.sub(r'([a-z])([A-Z])', r'\1 \2', key) + return spaced.replace('_', ' ').replace('-', ' ').strip().title() + + +class showmeta_node(nodes.General, nodes.Element): + """Placeholder replaced on ``doctree-resolved`` (HTML builds only).""" + + +class ShowMetaDirective(SphinxDirective): + """Insert a visible metadata line built from ``.. meta::`` on this page.""" + + has_content = False + option_spec = { + 'order': directives.unchanged, + 'labels': directives.unchanged, + } + + def run(self) -> List[nodes.Node]: + node = showmeta_node() + node['order'] = self.options.get('order', '') + node['labels'] = self.options.get('labels', '') + self.set_source_info(node) + return [node] + + +def visit_skip_showmeta(self, node: showmeta_node) -> None: + raise nodes.SkipNode + + +def depart_showmeta_noop(self, node: showmeta_node) -> None: + pass + + +def _parse_labels(raw: str) -> dict[str, str]: + out: dict[str, str] = {} + for part in [p.strip() for p in raw.split(',') if p.strip() and '=' in p]: + key, _, value = part.partition('=') + key, value = key.strip(), value.strip() + if key: + out[key] = value + return out + + +def replace_showmeta_nodes(app, doctree: nodes.document, docname: str) -> None: + if app.builder.format != 'html': + for node in list(doctree.findall(showmeta_node)): + node.parent.remove(node) + return + + macros = _macros_flat(app) + meta = expand_all_meta_values(all_doctree_meta(doctree), macros) + + for node in list(doctree.findall(showmeta_node)): + order = [x.strip() for x in node.get('order', '').split(',') if x.strip()] + labels_map = _parse_labels(node.get('labels', '')) + if not order: + node.parent.remove(node) + continue + + parts: List[str] = [] + for key in order: + val = meta.get(key, '').strip() + if not val: + continue + label_base = labels_map.get(key) or _default_showmeta_label(key) + label_display = label_base if label_base.rstrip().endswith(':') else f'{label_base}:' + parts.append( + f'{html_module.escape(label_display)} ' + f'{html_module.escape(val)}' + ) + + if not parts: + node.parent.remove(node) + else: + inner = ' | '.join(parts) + raw = nodes.raw( + '', + f'

{inner}

', + format='html', + ) + node.replace_self(raw) + + +def setup(app): + app.add_node( + showmeta_node, + html=(visit_skip_showmeta, depart_showmeta_noop), + latex=(visit_skip_showmeta, depart_showmeta_noop), + ) + app.add_directive('showmeta', ShowMetaDirective) + app.connect('doctree-resolved', replace_showmeta_nodes) + return { + 'version': '1.0.0', + 'parallel_read_safe': True, + 'parallel_write_safe': True, + } diff --git a/requirements.txt b/requirements.txt index 21c4c057505..71bd6e769fe 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,7 @@ +# Non-Python build dependency (install separately; used by `make pagefind`): +# Node.js 18+ with npx — https://nodejs.org/ +# Verify: node -v && npx -v + codespell doc8 docutils diff --git a/source/About-ROS.rst b/source/About-ROS.rst index 05fe7db14e9..e377fa738ef 100644 --- a/source/About-ROS.rst +++ b/source/About-ROS.rst @@ -3,10 +3,22 @@ About ROS ========= +.. meta:: + :contentType: about + :experience: beginner + :area: framework, tools, capabilities + :capability: simulation + :distro: {DISTRO} + :product: {PRODUCT} + + + ROS (Robot Operating System) is an open-source ecosystem that provides the framework, tools, and libraries for building, deploying, running, and maintaining robotic applications. This article introduces the main areas of the ecosystem and outlines their intended use. -**Area: ROS-framework, ROS-tools, ROS-capabilities | Content-type: about | Experience: beginner** +.. showmeta:: + :order: product, area, capability, contentType, experience + :labels: product=Product, area=Area, capability=Capability, contentType=Content type, experience=Level .. contents:: Table of Contents :depth: 2 diff --git a/source/_static/pagefind-docsearch.css b/source/_static/pagefind-docsearch.css new file mode 100644 index 00000000000..5932d5ceec0 --- /dev/null +++ b/source/_static/pagefind-docsearch.css @@ -0,0 +1,219 @@ +/* DocSearch-like sidebar trigger for Pagefind modal (plan §3) */ +.ros2-pagefind-search { + margin: 0.5rem 0 1rem; +} + +.ros2-pagefind-search pagefind-modal-trigger { + display: block; + width: 100%; +} + +/* Light styling for the trigger button (Pagefind exposes light DOM button) */ +.ros2-pagefind-search pagefind-modal-trigger::part(button), +.ros2-pagefind-search button { + align-items: center; + background: var(--wy-menu-vertical-background-color, #fcfcfc); + border: 1px solid #ccc; + border-radius: 40px; + color: var(--wy-menu-vertical-color, #404040); + cursor: pointer; + display: flex; + font-size: 0.85rem; + gap: 0.35rem; + justify-content: space-between; + min-height: 2.25rem; + padding: 0.35rem 0.6rem 0.35rem 0.75rem; + text-align: left; + width: 100%; +} + +.ros2-pagefind-search pagefind-modal-trigger::part(button):hover, +.ros2-pagefind-search button:hover { + border-color: #999; + box-shadow: 0 1px 2px rgba(0, 0, 0, 0.06); +} + +/* Keyboard hint styling (Algolia DocSearch-like) */ +.ros2-pagefind-search .DocSearch-Button-Keys, +.ros2-pagefind-search pagefind-modal-trigger::part(keys) { + display: flex; + gap: 0.2rem; +} + +.ros2-pagefind-search kbd, +.ros2-pagefind-search pagefind-modal-trigger::part(kbd) { + align-items: center; + background: linear-gradient(-225deg, #d5dbe4, #f8f8f8); + border: 0; + border-radius: 3px; + box-shadow: inset 0 -2px 0 0 #cdcde6, inset 0 0 1px 1px #fff, 0 1px 2px 1px rgba(30, 35, 90, 0.2); + color: #969faf; + display: flex; + font-size: 0.65rem; + font-weight: 600; + line-height: 1; + min-height: 1.25rem; + min-width: 1.25rem; + padding: 0 0.3rem; + justify-content: center; +} + +.wy-nav-side-scroll .ros2-pagefind-search { + padding-right: 0.5rem; +} + +.ros-page-meta-summary, +.ros2-pagefind-search .pf-result-meta-block, +#ros-search-page .pf-result-meta-block, +dialog.pf-modal .pf-result-meta-block { + margin: -0.25rem 0 1rem !important; + padding: 0.45rem 0.75rem !important; + border-left: 4px solid #6c757d !important; + background: #f8f9fa !important; + color: #495057 !important; + font-size: 0.85rem !important; +} + +.ros2-pagefind-search dialog.pf-modal { + width: clamp(900px, 60vw, 1200px) !important; + max-width: 92vw !important; + min-width: min(900px, 92vw) !important; +} + +.ros2-pagefind-search .ros-search-two-col, +#ros-search-page .ros-search-two-col { + display: grid; + grid-template-columns: minmax(220px, 260px) minmax(0, 1fr); + gap: 1rem; + min-height: 0; + width: 100%; +} + +.ros2-pagefind-search .ros-search-facets, +.ros2-pagefind-search .ros-search-results { + max-height: 62vh; + overflow: auto; + min-width: 0; +} + +#ros-search-page .ros-search-facets, +#ros-search-page .ros-search-results { + min-width: 0; +} + +.ros2-pagefind-search .ros-search-facets { + border-right: 1px solid #e9ecef; + padding-right: 0.75rem; +} + +#ros-search-page .ros-search-facets { + border-right: 1px solid #e9ecef; + padding-right: 0.75rem; +} + +.ros2-pagefind-search .ros-search-facets pagefind-filter-pane, +.ros2-pagefind-search .ros-search-results pagefind-summary, +.ros2-pagefind-search .ros-search-results pagefind-results, +#ros-search-page .ros-search-facets pagefind-filter-pane, +#ros-search-page .ros-search-results pagefind-summary, +#ros-search-page .ros-search-results pagefind-results { + display: block; +} + +.ros2-pagefind-search .ros-search-results pagefind-summary, +#ros-search-page .ros-search-results pagefind-summary { + margin-bottom: 0.75rem; +} + +.ros2-pagefind-search .pf-result-link, +#ros-search-page .pf-result-link { + font-size: 1rem; + font-weight: 700; + line-height: 1.25; +} + +.ros2-pagefind-search .pf-result-excerpt, +.ros2-pagefind-search .pf-result-preview, +#ros-search-page .pf-result-excerpt, +#ros-search-page .pf-result-preview { + font-size: 0.85rem; + line-height: 1.35; +} + +.ros2-pagefind-search .pf-result-meta-block, +#ros-search-page .pf-result-meta-block, +dialog.pf-modal .pf-result-meta-block { + margin-top: 0.35rem !important; + margin-bottom: 0.45rem !important; + border-radius: 0 !important; + display: block !important; + line-height: 1.35 !important; +} + +.ros2-pagefind-search .pf-result-meta-block b, +#ros-search-page .pf-result-meta-block b, +dialog.pf-modal .pf-result-meta-block b { + color: #495057 !important; + font-weight: 600 !important; +} + +/* Full-page search results (search_results.rst) */ +.ros-search-page { + padding: 0 0 2rem; +} + +.ros-search-page-input-row { + margin-bottom: 1.5rem; +} + +.ros-search-page-input-row pagefind-input { + display: block; + width: 100%; +} + +.ros-search-page-two-col .ros-search-facets, +.ros-search-page-two-col .ros-search-results { + max-height: none; + overflow: visible; +} + +/* + Force Pagefind's per-result IntersectionObserver to use this + element as its root. The component walks up the DOM looking for an ancestor + whose computed overflow-y is not "visible" or "hidden"; without this, no + ancestor matches on a dedicated search page (everything renders with default + overflow), the observer never fires, and result cards remain skeletons. + + Setting overflow-y: auto with no max-height gives the observer a valid root + without producing any visible scrollbar - the element grows to fit content + naturally and the page itself remains the scroll context for the user. +*/ +#ros-search-page pagefind-results { + overflow-y: auto !important; +} + +@media (max-width: 980px) { + .ros2-pagefind-search .ros-search-two-col, + #ros-search-page .ros-search-two-col { + grid-template-columns: 1fr; + } + + .ros2-pagefind-search .ros-search-facets, + .ros2-pagefind-search .ros-search-results { + max-height: none; + } + + .ros2-pagefind-search .ros-search-facets { + border-right: 0; + border-bottom: 1px solid #e9ecef; + margin-bottom: 0.75rem; + padding: 0 0 0.75rem; + } + + #ros-search-page .ros-search-facets { + border-right: 0; + border-bottom: 1px solid #e9ecef; + margin-bottom: 0.75rem; + padding: 0 0 0.75rem; + } +} diff --git a/source/_templates/layout.html b/source/_templates/layout.html new file mode 100644 index 00000000000..94830854a69 --- /dev/null +++ b/source/_templates/layout.html @@ -0,0 +1,9 @@ +{% extends "!layout.html" %} +{% block extrahead %} + {{ super() }} + {% if pagefind_seo_filter_metas %} + + {{ pagefind_seo_filter_metas|safe }} + + {% endif %} +{% endblock %} diff --git a/source/_templates/searchbox.html b/source/_templates/searchbox.html new file mode 100644 index 00000000000..c63231694fa --- /dev/null +++ b/source/_templates/searchbox.html @@ -0,0 +1,462 @@ +{# Pagefind Component UI (plan §3) + DocSearch-inspired styling via pagefind-docsearch.css #} + + + + diff --git a/source/search_results.rst b/source/search_results.rst new file mode 100644 index 00000000000..ac2620b9d19 --- /dev/null +++ b/source/search_results.rst @@ -0,0 +1,21 @@ +:orphan: + +Search Results +============== + +.. raw:: html + +
+
+ +
+
+ +
+ + +
+
+
From 25a2cff7160b13b86b7215e5ec0194d3fd80b034 Mon Sep 17 00:00:00 2001 From: GeorgeL Date: Mon, 18 May 2026 16:54:52 +0100 Subject: [PATCH 16/18] Revert "add pagefind from prototype" This reverts commit f506799b17d0e44b7e5fe186fd0c37917dcac260. --- .github/workflows/test.yml | 16 - Makefile | 6 - conf.py | 29 +- plugins/meta_util.py | 70 ---- plugins/pagefind_meta.py | 222 ------------- plugins/showmeta.py | 120 ------- requirements.txt | 4 - source/About-ROS.rst | 14 +- source/_static/pagefind-docsearch.css | 219 ------------ source/_templates/layout.html | 9 - source/_templates/searchbox.html | 462 -------------------------- source/search_results.rst | 21 -- 12 files changed, 2 insertions(+), 1190 deletions(-) delete mode 100644 plugins/meta_util.py delete mode 100644 plugins/pagefind_meta.py delete mode 100644 plugins/showmeta.py delete mode 100644 source/_static/pagefind-docsearch.css delete mode 100644 source/_templates/layout.html delete mode 100644 source/_templates/searchbox.html delete mode 100644 source/search_results.rst diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 5a890815fcd..5cea1c262d8 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -77,14 +77,6 @@ jobs: - name: Build the docs run: make html - - - name: Setup Node.js (Pagefind) - uses: actions/setup-node@v4 - with: - node-version: '20' - - - name: Index HTML with Pagefind - run: make pagefind - name: Upload document artifacts uses: actions/upload-artifact@v4 @@ -155,11 +147,3 @@ jobs: - name: Build the docs run: make multiversion - - - name: Setup Node.js (Pagefind) - uses: actions/setup-node@v4 - with: - node-version: '20' - - - name: Index HTML with Pagefind - run: make pagefind diff --git a/Makefile b/Makefile index f5a18e791ba..f411c155a1e 100644 --- a/Makefile +++ b/Makefile @@ -20,12 +20,6 @@ multiversion: Makefile sphinx-multiversion $(OPTS) "$(SOURCE)" build/html @echo "" > build/html/index.html $(PYTHON) make_sitemapindex.py - -# Pagefind static search index (requires Node.js / npx). Run after html or multiversion. -PAGEFIND_VERSION ?= 1.5.2 -pagefind: - npx -y pagefind@$(PAGEFIND_VERSION) --site "$(OUT)/html" - %: Makefile @$(BUILD) -M $@ "$(SOURCE)" "$(OUT)" $(OPTS) diff --git a/conf.py b/conf.py index 2955a31393a..2a9def973fd 100644 --- a/conf.py +++ b/conf.py @@ -89,34 +89,8 @@ 'sphinx_adopters', 'sphinxcontrib.googleanalytics', 'sphinxcontrib.mermaid', - 'pagefind_meta', - 'showmeta', ] -# Pagefind mergeIndex: optional per-package API doc bundles. -# Enable only when upstream sites publish Pagefind at .../en/{distro}/p/{pkg}/pagefind -pagefind_merge_enabled = False -pagefind_merge_package_pkgs = [] -pagefind_merge_index_base = 'https://docs.ros.org' -pagefind_merge_index_overrides = {} -pagefind_merge_filter_per_pkg = None -pagefind_merge_index_weight_per_pkg = None - -# Optional display labels for Pagefind filter UI (key → label). Unlisted keys use title-case. -pagefind_filter_labels = { - 'contentType': 'Content Type', -} - -pagefind_result_meta_order = [ - 'product', - 'distro', - 'area', - 'capability', - 'contentType', - 'experience', -] - - # Intersphinx mapping intersphinx_mapping = { @@ -194,7 +168,6 @@ 'DISTRO_TITLE': 'Rolling', 'DISTRO_TITLE_FULL': 'Rolling Ridley', 'REPOS_FILE_BRANCH': 'rolling', - 'PRODUCT': 'ROS 2', } html_favicon = 'favicon.ico' @@ -208,7 +181,7 @@ html_sourcelink_suffix = '' # Relative to html_static_path -html_css_files = ['custom.css', 'adopters.css', 'pagefind-docsearch.css'] +html_css_files = ['custom.css', 'adopters.css'] html_js_files = ['adopters.js'] # -- Options for HTMLHelp output ------------------------------------------ diff --git a/plugins/meta_util.py b/plugins/meta_util.py deleted file mode 100644 index 32aef4d2f3b..00000000000 --- a/plugins/meta_util.py +++ /dev/null @@ -1,70 +0,0 @@ -# Copyright 2026 Open Robotics — shared helpers for ``.. meta::`` / Pagefind -""" -Collect every ``.. meta::`` field from the doctree, sanitize keys, and expand -``{MACRO}`` placeholders using the Sphinx ``macros`` config (longest keys first). - -Sphinx / the HTML theme may also emit plain ```` tags for the same fields. -The Pagefind extension emits additional tags with ``data-pagefind-filter`` and may -split comma-separated values into multiple tags for faceted search. -""" - -from __future__ import annotations - -import re -from typing import Dict, List, Optional - -from docutils import nodes - -# HTML ```` names should be conservative; allow common patterns. -_META_NAME_RE = re.compile(r'^[A-Za-z0-9_.:-]+$') - - -def sanitize_meta_key(raw: str) -> Optional[str]: - s = str(raw).strip() - if not s or not _META_NAME_RE.match(s): - return None - return s - - -def all_doctree_meta(doctree: Optional[nodes.document]) -> Dict[str, str]: - """Return last-wins mapping of every ``nodes.meta`` ``name``/``property`` → ``content``.""" - if doctree is None: - return {} - - out: Dict[str, str] = {} - for meta in doctree.findall(nodes.meta): - if meta.get('http-equiv'): - continue - content = meta.get('content') - if not content: - continue - key: Optional[str] = None - name = meta.get('name') - if name: - key = sanitize_meta_key(str(name)) - else: - prop = meta.get('property') - if prop: - key = sanitize_meta_key(str(prop)) - if not key: - continue - out[key] = str(content).strip() - return out - - -def expand_meta_macros(text: str, macros: Dict[str, str]) -> str: - """Expand ``{KEY}`` placeholders; longer macro names first to avoid partial matches.""" - result = text - for key, value in sorted(macros.items(), key=lambda kv: len(kv[0]), reverse=True): - result = result.replace(f'{{{key}}}', value) - return result - - -def expand_all_meta_values(meta: Dict[str, str], macros: Dict[str, str]) -> Dict[str, str]: - """Apply ``expand_meta_macros`` to every meta value.""" - return {k: expand_meta_macros(v, macros) for k, v in meta.items()} - - -def split_meta_values(value: str) -> List[str]: - """Return comma-separated metadata values as individual Pagefind values.""" - return [part.strip() for part in value.split(',') if part.strip()] diff --git a/plugins/pagefind_meta.py b/plugins/pagefind_meta.py deleted file mode 100644 index 690f6363d6b..00000000000 --- a/plugins/pagefind_meta.py +++ /dev/null @@ -1,222 +0,0 @@ -# Copyright 2026 Open Robotics — Pagefind metadata for ROS 2 documentation -""" -Emit SEO tags, Pagefind ``data-pagefind-meta``, and ``data-pagefind-filter`` -from every ``.. meta::`` field on the page (passthrough, no whitelist). - -Sphinx / the HTML theme typically also emits plain ```` tags for the same -``.. meta::`` fields. We intentionally emit an additional block with -``data-pagefind-filter`` (and split comma-separated values) so Pagefind faceting -works; crawlers may see duplicate name/content pairs for non-split fields. -""" - -from __future__ import annotations - -import html -import re -from typing import Any, Dict, List, Optional, Tuple - -from docutils import nodes - -from meta_util import all_doctree_meta, expand_all_meta_values, split_meta_values - - -def _macros_flat(app) -> Dict[str, str]: - macros = getattr(app.config, 'macros', {}) or {} - return {str(k): str(v) for k, v in macros.items()} - - -def _resolved_page_meta(app, doctree: Optional[nodes.document]) -> Dict[str, str]: - raw = all_doctree_meta(doctree) - return expand_all_meta_values(raw, _macros_flat(app)) - - -def _default_filter_label(key: str) -> str: - spaced = re.sub(r'([a-z])([A-Z])', r'\1 \2', key) - return spaced.replace('_', ' ').replace('-', ' ').strip().title() - - -def _metadata_fields_for_keys(app, sorted_keys: List[str]) -> List[List[str]]: - labels = getattr(app.config, 'pagefind_filter_labels', None) or {} - out: List[List[str]] = [] - for k in sorted_keys: - if isinstance(labels, dict) and labels.get(k): - lbl = str(labels[k]) - else: - lbl = _default_filter_label(k) - out.append([k, lbl]) - return out - - -def _pagefind_data_meta_attr(values: Dict[str, str]) -> str: - """Single data-pagefind-meta attribute value with repeated keys for multi-values.""" - parts: List[str] = [] - for key in sorted(values.keys()): - for value in split_meta_values(values.get(key, '')): - parts.append(f'{key}:{value}') - inner = ', '.join(parts) - return html.escape(inner, quote=True) - - -def _seo_and_filter_metas(values: Dict[str, str]) -> str: - """One per value: SEO name/content + data-pagefind-filter (Pagefind filtering docs).""" - lines: List[str] = [] - for key in sorted(values.keys()): - esc_name = html.escape(key, quote=True) - for value in split_meta_values(values.get(key, '')): - esc_val = html.escape(value, quote=True) - lines.append( - f'' - ) - return '\n '.join(lines) - - -def _ensure_meta_keys_store(env) -> Dict[str, Any]: - if not hasattr(env, 'pagefind_meta_keys_by_doc'): - env.pagefind_meta_keys_by_doc = {} - return env.pagefind_meta_keys_by_doc - - -def _collect_meta_keys(app, doctree: nodes.document, docname: str) -> None: - if app.builder.format != 'html': - return - raw = all_doctree_meta(doctree) - store = _ensure_meta_keys_store(app.env) - store[docname] = set(raw.keys()) - - -def _purge_meta_keys(app, env, docname: str) -> None: - if hasattr(env, 'pagefind_meta_keys_by_doc') and docname in env.pagefind_meta_keys_by_doc: - del env.pagefind_meta_keys_by_doc[docname] - - -def _merge_meta_keys(app, env, docnames, other) -> None: - """Merge per-document meta key sets from a parallel read worker environment.""" - if not hasattr(other, 'pagefind_meta_keys_by_doc'): - return - store = _ensure_meta_keys_store(env) - for docname, keys in other.pagefind_meta_keys_by_doc.items(): - store[docname] = set(keys) - - -def _union_meta_keys(env) -> List[str]: - if not hasattr(env, 'pagefind_meta_keys_by_doc'): - return [] - union: set[str] = set() - for keys in env.pagefind_meta_keys_by_doc.values(): - union |= set(keys) - return sorted(union) - - -def _pagefind_bundle_prefix(pagename: str) -> str: - """Relative URL prefix from current HTML page to the site root ``pagefind/`` directory. - - Must start with ``./`` or ``../`` so the browser resolves dynamic imports (e.g. - ``import(bundlePath + 'pagefind.js')``) as URLs, not bare module specifiers. - """ - depth = pagename.count('/') - if depth == 0: - return './pagefind/' - return ('../' * depth) + 'pagefind/' - - -def _pagefind_component_urls(pagename: str) -> Tuple[str, str]: - """(css_href, js_href) relative to current page.""" - prefix = _pagefind_bundle_prefix(pagename) - return prefix + 'pagefind-component-ui.css', prefix + 'pagefind-component-ui.js' - - -def _merge_index_entries(app, distro: str) -> List[Dict[str, Any]]: - """Build mergeIndex list from conf (pinned docs.ros.org template).""" - pkgs: List[str] = list(getattr(app.config, 'pagefind_merge_package_pkgs', []) or []) - if not pkgs or not getattr(app.config, 'pagefind_merge_enabled', False): - return [] - base = getattr(app.config, 'pagefind_merge_index_base', 'https://docs.ros.org').rstrip('/') - overrides = getattr(app.config, 'pagefind_merge_index_overrides', {}) or {} - out: List[Dict[str, Any]] = [] - for pkg in pkgs: - key = f'{distro}/{pkg}' - if key in overrides: - bundle = overrides[key] - else: - bundle = f'{base}/en/{distro}/p/{pkg}/pagefind' - entry: Dict[str, Any] = {'bundlePath': bundle} - mf = getattr(app.config, 'pagefind_merge_filter_per_pkg', None) - if isinstance(mf, dict) and pkg in mf: - entry['mergeFilter'] = mf[pkg] - iw = getattr(app.config, 'pagefind_merge_index_weight_per_pkg', None) - if isinstance(iw, dict) and pkg in iw: - entry['indexWeight'] = iw[pkg] - out.append(entry) - return out - - -def _html_page_context( - app, - pagename: str, - templatename: str, - context: Dict[str, Any], - doctree, -) -> None: - sorted_keys = _union_meta_keys(app.env) - metadata_fields = _metadata_fields_for_keys(app, sorted_keys) - filter_csv = ','.join(sorted_keys) - - empty = { - 'pagefind_seo_filter_metas': '', - 'pagefind_data_meta_attr': '', - 'pagefind_bundle_prefix': './pagefind/', - 'pagefind_component_css': './pagefind/pagefind-component-ui.css', - 'pagefind_component_js': './pagefind/pagefind-component-ui.js', - 'pagefind_merge_index': [], - 'pagefind_filter_keys_csv': filter_csv, - 'pagefind_metadata_fields': metadata_fields, - 'pagefind_result_meta_order': list( - getattr(app.config, 'pagefind_result_meta_order', []) or [] - ), - } - context.update(empty) - - if app.builder.format != 'html' or templatename is None: - return - if not templatename.endswith('.html'): - return - - default_distro = (getattr(app.config, 'macros', {}) or {}).get('DISTRO', 'rolling') - values = _resolved_page_meta(app, doctree) - - seo_filters = _seo_and_filter_metas(values) - data_attr = _pagefind_data_meta_attr(values) - css_href, js_href = _pagefind_component_urls(pagename) - bundle_prefix = _pagefind_bundle_prefix(pagename) - - merge_distro = values.get('distro') or str(default_distro) - merge = _merge_index_entries(app, merge_distro) - context['pagefind_seo_filter_metas'] = seo_filters - context['pagefind_data_meta_attr'] = data_attr - context['pagefind_bundle_prefix'] = bundle_prefix - context['pagefind_component_css'] = css_href - context['pagefind_component_js'] = js_href - context['pagefind_merge_index'] = merge - - -def setup(app) -> Dict[str, Any]: - app.add_config_value('pagefind_merge_enabled', default=False, rebuild='html') - app.add_config_value('pagefind_merge_package_pkgs', default=[], rebuild='html') - app.add_config_value('pagefind_merge_index_base', default='https://docs.ros.org', rebuild='html') - app.add_config_value('pagefind_merge_index_overrides', default={}, rebuild='html') - app.add_config_value('pagefind_merge_filter_per_pkg', default=None, rebuild='html') - app.add_config_value('pagefind_merge_index_weight_per_pkg', default=None, rebuild='html') - app.add_config_value('pagefind_filter_labels', default={}, rebuild='html') - app.add_config_value('pagefind_result_meta_order', default=[], rebuild='html') - - app.connect('html-page-context', _html_page_context) - app.connect('doctree-resolved', _collect_meta_keys) - app.connect('env-purge-doc', _purge_meta_keys) - app.connect('env-merge-info', _merge_meta_keys) - - return { - 'parallel_read_safe': True, - 'parallel_write_safe': True, - 'version': '1.0.0', - } diff --git a/plugins/showmeta.py b/plugins/showmeta.py deleted file mode 100644 index f11b140429c..00000000000 --- a/plugins/showmeta.py +++ /dev/null @@ -1,120 +0,0 @@ -# Copyright 2026 Open Robotics — explicit in-body ``.. showmeta::`` summary -""" -Render selected ``.. meta::`` fields in the document body with author-controlled -order and labels. Place ``.. showmeta::`` where the summary should appear (HTML only). -""" - -from __future__ import annotations - -import html as html_module -import re -from typing import List - -from docutils import nodes -from docutils.parsers.rst import directives -from sphinx.util.docutils import SphinxDirective - -from meta_util import all_doctree_meta, expand_all_meta_values - - -def _macros_flat(app) -> dict[str, str]: - return {str(k): str(v) for k, v in (getattr(app.config, 'macros', {}) or {}).items()} - - -def _default_showmeta_label(key: str) -> str: - spaced = re.sub(r'([a-z])([A-Z])', r'\1 \2', key) - return spaced.replace('_', ' ').replace('-', ' ').strip().title() - - -class showmeta_node(nodes.General, nodes.Element): - """Placeholder replaced on ``doctree-resolved`` (HTML builds only).""" - - -class ShowMetaDirective(SphinxDirective): - """Insert a visible metadata line built from ``.. meta::`` on this page.""" - - has_content = False - option_spec = { - 'order': directives.unchanged, - 'labels': directives.unchanged, - } - - def run(self) -> List[nodes.Node]: - node = showmeta_node() - node['order'] = self.options.get('order', '') - node['labels'] = self.options.get('labels', '') - self.set_source_info(node) - return [node] - - -def visit_skip_showmeta(self, node: showmeta_node) -> None: - raise nodes.SkipNode - - -def depart_showmeta_noop(self, node: showmeta_node) -> None: - pass - - -def _parse_labels(raw: str) -> dict[str, str]: - out: dict[str, str] = {} - for part in [p.strip() for p in raw.split(',') if p.strip() and '=' in p]: - key, _, value = part.partition('=') - key, value = key.strip(), value.strip() - if key: - out[key] = value - return out - - -def replace_showmeta_nodes(app, doctree: nodes.document, docname: str) -> None: - if app.builder.format != 'html': - for node in list(doctree.findall(showmeta_node)): - node.parent.remove(node) - return - - macros = _macros_flat(app) - meta = expand_all_meta_values(all_doctree_meta(doctree), macros) - - for node in list(doctree.findall(showmeta_node)): - order = [x.strip() for x in node.get('order', '').split(',') if x.strip()] - labels_map = _parse_labels(node.get('labels', '')) - if not order: - node.parent.remove(node) - continue - - parts: List[str] = [] - for key in order: - val = meta.get(key, '').strip() - if not val: - continue - label_base = labels_map.get(key) or _default_showmeta_label(key) - label_display = label_base if label_base.rstrip().endswith(':') else f'{label_base}:' - parts.append( - f'{html_module.escape(label_display)} ' - f'{html_module.escape(val)}' - ) - - if not parts: - node.parent.remove(node) - else: - inner = ' | '.join(parts) - raw = nodes.raw( - '', - f'

{inner}

', - format='html', - ) - node.replace_self(raw) - - -def setup(app): - app.add_node( - showmeta_node, - html=(visit_skip_showmeta, depart_showmeta_noop), - latex=(visit_skip_showmeta, depart_showmeta_noop), - ) - app.add_directive('showmeta', ShowMetaDirective) - app.connect('doctree-resolved', replace_showmeta_nodes) - return { - 'version': '1.0.0', - 'parallel_read_safe': True, - 'parallel_write_safe': True, - } diff --git a/requirements.txt b/requirements.txt index 71bd6e769fe..21c4c057505 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,3 @@ -# Non-Python build dependency (install separately; used by `make pagefind`): -# Node.js 18+ with npx — https://nodejs.org/ -# Verify: node -v && npx -v - codespell doc8 docutils diff --git a/source/About-ROS.rst b/source/About-ROS.rst index e377fa738ef..05fe7db14e9 100644 --- a/source/About-ROS.rst +++ b/source/About-ROS.rst @@ -3,22 +3,10 @@ About ROS ========= -.. meta:: - :contentType: about - :experience: beginner - :area: framework, tools, capabilities - :capability: simulation - :distro: {DISTRO} - :product: {PRODUCT} - - - ROS (Robot Operating System) is an open-source ecosystem that provides the framework, tools, and libraries for building, deploying, running, and maintaining robotic applications. This article introduces the main areas of the ecosystem and outlines their intended use. -.. showmeta:: - :order: product, area, capability, contentType, experience - :labels: product=Product, area=Area, capability=Capability, contentType=Content type, experience=Level +**Area: ROS-framework, ROS-tools, ROS-capabilities | Content-type: about | Experience: beginner** .. contents:: Table of Contents :depth: 2 diff --git a/source/_static/pagefind-docsearch.css b/source/_static/pagefind-docsearch.css deleted file mode 100644 index 5932d5ceec0..00000000000 --- a/source/_static/pagefind-docsearch.css +++ /dev/null @@ -1,219 +0,0 @@ -/* DocSearch-like sidebar trigger for Pagefind modal (plan §3) */ -.ros2-pagefind-search { - margin: 0.5rem 0 1rem; -} - -.ros2-pagefind-search pagefind-modal-trigger { - display: block; - width: 100%; -} - -/* Light styling for the trigger button (Pagefind exposes light DOM button) */ -.ros2-pagefind-search pagefind-modal-trigger::part(button), -.ros2-pagefind-search button { - align-items: center; - background: var(--wy-menu-vertical-background-color, #fcfcfc); - border: 1px solid #ccc; - border-radius: 40px; - color: var(--wy-menu-vertical-color, #404040); - cursor: pointer; - display: flex; - font-size: 0.85rem; - gap: 0.35rem; - justify-content: space-between; - min-height: 2.25rem; - padding: 0.35rem 0.6rem 0.35rem 0.75rem; - text-align: left; - width: 100%; -} - -.ros2-pagefind-search pagefind-modal-trigger::part(button):hover, -.ros2-pagefind-search button:hover { - border-color: #999; - box-shadow: 0 1px 2px rgba(0, 0, 0, 0.06); -} - -/* Keyboard hint styling (Algolia DocSearch-like) */ -.ros2-pagefind-search .DocSearch-Button-Keys, -.ros2-pagefind-search pagefind-modal-trigger::part(keys) { - display: flex; - gap: 0.2rem; -} - -.ros2-pagefind-search kbd, -.ros2-pagefind-search pagefind-modal-trigger::part(kbd) { - align-items: center; - background: linear-gradient(-225deg, #d5dbe4, #f8f8f8); - border: 0; - border-radius: 3px; - box-shadow: inset 0 -2px 0 0 #cdcde6, inset 0 0 1px 1px #fff, 0 1px 2px 1px rgba(30, 35, 90, 0.2); - color: #969faf; - display: flex; - font-size: 0.65rem; - font-weight: 600; - line-height: 1; - min-height: 1.25rem; - min-width: 1.25rem; - padding: 0 0.3rem; - justify-content: center; -} - -.wy-nav-side-scroll .ros2-pagefind-search { - padding-right: 0.5rem; -} - -.ros-page-meta-summary, -.ros2-pagefind-search .pf-result-meta-block, -#ros-search-page .pf-result-meta-block, -dialog.pf-modal .pf-result-meta-block { - margin: -0.25rem 0 1rem !important; - padding: 0.45rem 0.75rem !important; - border-left: 4px solid #6c757d !important; - background: #f8f9fa !important; - color: #495057 !important; - font-size: 0.85rem !important; -} - -.ros2-pagefind-search dialog.pf-modal { - width: clamp(900px, 60vw, 1200px) !important; - max-width: 92vw !important; - min-width: min(900px, 92vw) !important; -} - -.ros2-pagefind-search .ros-search-two-col, -#ros-search-page .ros-search-two-col { - display: grid; - grid-template-columns: minmax(220px, 260px) minmax(0, 1fr); - gap: 1rem; - min-height: 0; - width: 100%; -} - -.ros2-pagefind-search .ros-search-facets, -.ros2-pagefind-search .ros-search-results { - max-height: 62vh; - overflow: auto; - min-width: 0; -} - -#ros-search-page .ros-search-facets, -#ros-search-page .ros-search-results { - min-width: 0; -} - -.ros2-pagefind-search .ros-search-facets { - border-right: 1px solid #e9ecef; - padding-right: 0.75rem; -} - -#ros-search-page .ros-search-facets { - border-right: 1px solid #e9ecef; - padding-right: 0.75rem; -} - -.ros2-pagefind-search .ros-search-facets pagefind-filter-pane, -.ros2-pagefind-search .ros-search-results pagefind-summary, -.ros2-pagefind-search .ros-search-results pagefind-results, -#ros-search-page .ros-search-facets pagefind-filter-pane, -#ros-search-page .ros-search-results pagefind-summary, -#ros-search-page .ros-search-results pagefind-results { - display: block; -} - -.ros2-pagefind-search .ros-search-results pagefind-summary, -#ros-search-page .ros-search-results pagefind-summary { - margin-bottom: 0.75rem; -} - -.ros2-pagefind-search .pf-result-link, -#ros-search-page .pf-result-link { - font-size: 1rem; - font-weight: 700; - line-height: 1.25; -} - -.ros2-pagefind-search .pf-result-excerpt, -.ros2-pagefind-search .pf-result-preview, -#ros-search-page .pf-result-excerpt, -#ros-search-page .pf-result-preview { - font-size: 0.85rem; - line-height: 1.35; -} - -.ros2-pagefind-search .pf-result-meta-block, -#ros-search-page .pf-result-meta-block, -dialog.pf-modal .pf-result-meta-block { - margin-top: 0.35rem !important; - margin-bottom: 0.45rem !important; - border-radius: 0 !important; - display: block !important; - line-height: 1.35 !important; -} - -.ros2-pagefind-search .pf-result-meta-block b, -#ros-search-page .pf-result-meta-block b, -dialog.pf-modal .pf-result-meta-block b { - color: #495057 !important; - font-weight: 600 !important; -} - -/* Full-page search results (search_results.rst) */ -.ros-search-page { - padding: 0 0 2rem; -} - -.ros-search-page-input-row { - margin-bottom: 1.5rem; -} - -.ros-search-page-input-row pagefind-input { - display: block; - width: 100%; -} - -.ros-search-page-two-col .ros-search-facets, -.ros-search-page-two-col .ros-search-results { - max-height: none; - overflow: visible; -} - -/* - Force Pagefind's per-result IntersectionObserver to use this - element as its root. The component walks up the DOM looking for an ancestor - whose computed overflow-y is not "visible" or "hidden"; without this, no - ancestor matches on a dedicated search page (everything renders with default - overflow), the observer never fires, and result cards remain skeletons. - - Setting overflow-y: auto with no max-height gives the observer a valid root - without producing any visible scrollbar - the element grows to fit content - naturally and the page itself remains the scroll context for the user. -*/ -#ros-search-page pagefind-results { - overflow-y: auto !important; -} - -@media (max-width: 980px) { - .ros2-pagefind-search .ros-search-two-col, - #ros-search-page .ros-search-two-col { - grid-template-columns: 1fr; - } - - .ros2-pagefind-search .ros-search-facets, - .ros2-pagefind-search .ros-search-results { - max-height: none; - } - - .ros2-pagefind-search .ros-search-facets { - border-right: 0; - border-bottom: 1px solid #e9ecef; - margin-bottom: 0.75rem; - padding: 0 0 0.75rem; - } - - #ros-search-page .ros-search-facets { - border-right: 0; - border-bottom: 1px solid #e9ecef; - margin-bottom: 0.75rem; - padding: 0 0 0.75rem; - } -} diff --git a/source/_templates/layout.html b/source/_templates/layout.html deleted file mode 100644 index 94830854a69..00000000000 --- a/source/_templates/layout.html +++ /dev/null @@ -1,9 +0,0 @@ -{% extends "!layout.html" %} -{% block extrahead %} - {{ super() }} - {% if pagefind_seo_filter_metas %} - - {{ pagefind_seo_filter_metas|safe }} - - {% endif %} -{% endblock %} diff --git a/source/_templates/searchbox.html b/source/_templates/searchbox.html deleted file mode 100644 index c63231694fa..00000000000 --- a/source/_templates/searchbox.html +++ /dev/null @@ -1,462 +0,0 @@ -{# Pagefind Component UI (plan §3) + DocSearch-inspired styling via pagefind-docsearch.css #} - - - - diff --git a/source/search_results.rst b/source/search_results.rst deleted file mode 100644 index ac2620b9d19..00000000000 --- a/source/search_results.rst +++ /dev/null @@ -1,21 +0,0 @@ -:orphan: - -Search Results -============== - -.. raw:: html - -
-
- -
-
- -
- - -
-
-
From 795f1a00a95d987e4a1bf522dc2a62038ba2fa83 Mon Sep 17 00:00:00 2001 From: 3di-techx Date: Tue, 19 May 2026 07:40:30 +0000 Subject: [PATCH 17/18] update RT Corporation url to fix build --- source/The-ROS2-Project/Adopters/adopters.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/The-ROS2-Project/Adopters/adopters.yaml b/source/The-ROS2-Project/Adopters/adopters.yaml index 30fd1772086..a064c11f8f1 100644 --- a/source/The-ROS2-Project/Adopters/adopters.yaml +++ b/source/The-ROS2-Project/Adopters/adopters.yaml @@ -95,7 +95,7 @@ adopters: description: "Development platform for autonomous mobile robots across logistics, construction, and retail." - organization: "RT Corporation" - organization_url: "https://rt-net.jp" + organization_url: "https://en.rt-net.jp" project: "CRANE-X7" project_url: "https://github.com/rt-net/crane_x7_ros" domain: From 5886634ad55f6672d7935ad37f146d18427db4b7 Mon Sep 17 00:00:00 2001 From: 3di-techx Date: Wed, 20 May 2026 10:36:48 +0000 Subject: [PATCH 18/18] add page find --- .github/workflows/test.yml | 18 + Makefile | 16 +- README.md | 35 ++ conf.py | 26 +- plugins/meta_util.py | 70 ++++ plugins/pagefind_meta.py | 261 ++++++++++++ plugins/showmeta.py | 120 ++++++ requirements.txt | 4 + source/About-ROS.rst | 13 +- source/_static/pagefind-docsearch.css | 219 ++++++++++ source/_templates/layout.html | 9 + source/_templates/search.html | 37 ++ source/_templates/searchbox.html | 573 ++++++++++++++++++++++++++ 13 files changed, 1398 insertions(+), 3 deletions(-) create mode 100644 plugins/meta_util.py create mode 100644 plugins/pagefind_meta.py create mode 100644 plugins/showmeta.py create mode 100644 source/_static/pagefind-docsearch.css create mode 100644 source/_templates/layout.html create mode 100644 source/_templates/search.html create mode 100644 source/_templates/searchbox.html diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 5cea1c262d8..8b40cfe0cca 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -78,6 +78,15 @@ jobs: - name: Build the docs run: make html + + - name: Setup Node.js (Pagefind) + uses: actions/setup-node@v4 + with: + node-version: '20' + + - name: Index HTML with Pagefind + run: make pagefind + - name: Upload document artifacts uses: actions/upload-artifact@v4 id: artifact-upload-step @@ -147,3 +156,12 @@ jobs: - name: Build the docs run: make multiversion + + + - name: Setup Node.js (Pagefind) + uses: actions/setup-node@v4 + with: + node-version: '20' + + - name: Index HTML with Pagefind + run: make pagefind diff --git a/Makefile b/Makefile index f411c155a1e..8f76cc64c69 100644 --- a/Makefile +++ b/Makefile @@ -21,6 +21,20 @@ multiversion: Makefile @echo "" > build/html/index.html $(PYTHON) make_sitemapindex.py +# Pagefind static search index (requires Node.js / npx). Run after html or multiversion. +PAGEFIND_VERSION ?= 1.5.2 +pagefind: + npx -y pagefind@$(PAGEFIND_VERSION) --site "$(OUT)/html" + + +# Convenience: Sphinx build + Pagefind index (does not replace plain html / multiversion). +html-search: + $(MAKE) html + $(MAKE) pagefind + +multiversion-search: multiversion + $(MAKE) pagefind + %: Makefile @$(BUILD) -M $@ "$(SOURCE)" "$(OUT)" $(OPTS) @@ -64,4 +78,4 @@ linkcheck: @echo @echo "Check finished. Report is in $(LINKCHECKDIR)." -.PHONY: help Makefile multiversion test test-tools linkcheck lint spellcheck check-dictionaries sort-dictionaries +.PHONY: help Makefile multiversion pagefind test test-tools linkcheck lint spellcheck check-dictionaries sort-dictionaries diff --git a/README.md b/README.md index 7c613ccbf1f..11f8010e39c 100644 --- a/README.md +++ b/README.md @@ -71,6 +71,41 @@ To test building the multisite version deployed to the website use: **NB:** This will ignore local workspace changes and build from the branches. +### Pagefind search index + +After `make html` or `make multiversion`, run [Pagefind](https://pagefind.app/) so the built HTML under `build/html` is indexed and `build/html/pagefind/` is written (search bundle and Component UI assets). From the repo root: + +`make pagefind` + +Or use convenience targets that run Sphinx and Pagefind in one step: + +- `make html-search` — `make html` then `make pagefind` +- `make multiversion-search` — `make multiversion` then `make pagefind` + +Plain `make html` and `make multiversion` do **not** run Pagefind (Node.js is only required when you index search). + +This requires **Node.js** (for `npx`). Pin the CLI with `PAGEFIND_VERSION` in the Makefile if needed. + +To preview search locally, serve the site over HTTP (Pagefind may not load from `file://`), for example from the repo root: + +`python -m http.server 8000 --directory build/html` + +Then open `http://localhost:8000/` in a browser. + +#### Search results page verification + +After `make html` and `make pagefind`, serve `build/html` over HTTP and check: + +1. **Direct URL** — Open `http://localhost:8000/search.html?q=tutorial` (or the same path under a distro prefix for multiversion builds). The input should show the query and results should load (not stay empty or skeleton-only). +2. **Modal redirect** — From a nested page (e.g. a tutorial), open the sidebar search modal (Ctrl/Cmd+K), type a term, press Enter. You should land on the search page with `?q=` set and matching results visible. +3. **Empty query** — Open `search.html` with no `q` parameter. The page should load without errors; no search is run until you type in the input. +4. **Result metadata** — Search for `Ubuntu deb` and open a result card. Metadata labels (e.g. Area, Content Type, Experience) should match that page’s `` `` tags from its `.. meta::` block (e.g. `area: installation` on the Ubuntu deb install page), not URL-path guesses. + +In DevTools Network, confirm `pagefind/` bundle requests return 200 (not 404). + +The production [Jenkins doc job](https://build.ros.org/job/doc_ros2doc) should run the same `pagefind` step on `build/html` after Sphinx so deployed pages include the search bundle. + + ### Note for Windows (WSL) Users When building the documentation on windows using WSL, it is recommended to clone and work with this repository inside the Linux filesystem (for example, under `/home//`) rather than under `/mnt/c`. diff --git a/conf.py b/conf.py index 2a9def973fd..f172f9d4674 100644 --- a/conf.py +++ b/conf.py @@ -89,6 +89,29 @@ 'sphinx_adopters', 'sphinxcontrib.googleanalytics', 'sphinxcontrib.mermaid', + 'pagefind_meta', + 'showmeta', +] + +pagefind_merge_enabled = False +pagefind_merge_package_pkgs = [] +pagefind_merge_index_base = 'https://docs.ros.org' +pagefind_merge_index_overrides = {} +pagefind_merge_filter_per_pkg = None +pagefind_merge_index_weight_per_pkg = None + +pagefind_filter_labels = { + 'contentType': 'Content type', +} + +pagefind_result_meta_order = [ + 'product', + 'distro', + 'area', + 'capability', + 'contentType', + 'experience', + ] # Intersphinx mapping @@ -168,6 +191,7 @@ 'DISTRO_TITLE': 'Rolling', 'DISTRO_TITLE_FULL': 'Rolling Ridley', 'REPOS_FILE_BRANCH': 'rolling', + 'PRODUCT': 'ROS 2', } html_favicon = 'favicon.ico' @@ -181,7 +205,7 @@ html_sourcelink_suffix = '' # Relative to html_static_path -html_css_files = ['custom.css', 'adopters.css'] +html_css_files = ['custom.css', 'adopters.css', 'pagefind-docsearch.css'] html_js_files = ['adopters.js'] # -- Options for HTMLHelp output ------------------------------------------ diff --git a/plugins/meta_util.py b/plugins/meta_util.py new file mode 100644 index 00000000000..32aef4d2f3b --- /dev/null +++ b/plugins/meta_util.py @@ -0,0 +1,70 @@ +# Copyright 2026 Open Robotics — shared helpers for ``.. meta::`` / Pagefind +""" +Collect every ``.. meta::`` field from the doctree, sanitize keys, and expand +``{MACRO}`` placeholders using the Sphinx ``macros`` config (longest keys first). + +Sphinx / the HTML theme may also emit plain ```` tags for the same fields. +The Pagefind extension emits additional tags with ``data-pagefind-filter`` and may +split comma-separated values into multiple tags for faceted search. +""" + +from __future__ import annotations + +import re +from typing import Dict, List, Optional + +from docutils import nodes + +# HTML ```` names should be conservative; allow common patterns. +_META_NAME_RE = re.compile(r'^[A-Za-z0-9_.:-]+$') + + +def sanitize_meta_key(raw: str) -> Optional[str]: + s = str(raw).strip() + if not s or not _META_NAME_RE.match(s): + return None + return s + + +def all_doctree_meta(doctree: Optional[nodes.document]) -> Dict[str, str]: + """Return last-wins mapping of every ``nodes.meta`` ``name``/``property`` → ``content``.""" + if doctree is None: + return {} + + out: Dict[str, str] = {} + for meta in doctree.findall(nodes.meta): + if meta.get('http-equiv'): + continue + content = meta.get('content') + if not content: + continue + key: Optional[str] = None + name = meta.get('name') + if name: + key = sanitize_meta_key(str(name)) + else: + prop = meta.get('property') + if prop: + key = sanitize_meta_key(str(prop)) + if not key: + continue + out[key] = str(content).strip() + return out + + +def expand_meta_macros(text: str, macros: Dict[str, str]) -> str: + """Expand ``{KEY}`` placeholders; longer macro names first to avoid partial matches.""" + result = text + for key, value in sorted(macros.items(), key=lambda kv: len(kv[0]), reverse=True): + result = result.replace(f'{{{key}}}', value) + return result + + +def expand_all_meta_values(meta: Dict[str, str], macros: Dict[str, str]) -> Dict[str, str]: + """Apply ``expand_meta_macros`` to every meta value.""" + return {k: expand_meta_macros(v, macros) for k, v in meta.items()} + + +def split_meta_values(value: str) -> List[str]: + """Return comma-separated metadata values as individual Pagefind values.""" + return [part.strip() for part in value.split(',') if part.strip()] diff --git a/plugins/pagefind_meta.py b/plugins/pagefind_meta.py new file mode 100644 index 00000000000..e5fc1188949 --- /dev/null +++ b/plugins/pagefind_meta.py @@ -0,0 +1,261 @@ +# Copyright 2026 Open Robotics — Pagefind metadata for ROS 2 documentation +""" +Emit SEO tags, Pagefind ``data-pagefind-meta``, and ``data-pagefind-filter`` +from every ``.. meta::`` field on the page (passthrough, no whitelist). + +Sphinx / the HTML theme typically also emits plain ```` tags for the same +``.. meta::`` fields. We intentionally emit an additional block with +``data-pagefind-filter`` (and split comma-separated values) so Pagefind faceting +works; crawlers may see duplicate name/content pairs for non-split fields. +""" + +from __future__ import annotations + +import html +import re +from pathlib import PurePosixPath +from typing import Any, Dict, List, Optional, Tuple + +from docutils import nodes + +from meta_util import all_doctree_meta, expand_all_meta_values, split_meta_values + + +def _macros_flat(app) -> Dict[str, str]: + macros = getattr(app.config, 'macros', {}) or {} + return {str(k): str(v) for k, v in macros.items()} + + +def _resolved_page_meta(app, doctree: Optional[nodes.document]) -> Dict[str, str]: + raw = all_doctree_meta(doctree) + return expand_all_meta_values(raw, _macros_flat(app)) + + +def _default_filter_label(key: str) -> str: + spaced = re.sub(r'([a-z])([A-Z])', r'\1 \2', key) + return spaced.replace('_', ' ').replace('-', ' ').strip().title() + + +def _metadata_fields_for_keys(app, sorted_keys: List[str]) -> List[List[str]]: + labels = getattr(app.config, 'pagefind_filter_labels', None) or {} + out: List[List[str]] = [] + for k in sorted_keys: + if isinstance(labels, dict) and labels.get(k): + lbl = str(labels[k]) + else: + lbl = _default_filter_label(k) + out.append([k, lbl]) + return out + + +def _pagefind_data_meta_attr(values: Dict[str, str]) -> str: + """Single data-pagefind-meta attribute value with repeated keys for multi-values.""" + parts: List[str] = [] + for key in sorted(values.keys()): + for value in split_meta_values(values.get(key, '')): + parts.append(f'{key}:{value}') + inner = ', '.join(parts) + return html.escape(inner, quote=True) + + +def _seo_and_filter_metas(values: Dict[str, str]) -> str: + """One per value: SEO name/content + data-pagefind-filter (Pagefind filtering docs).""" + lines: List[str] = [] + for key in sorted(values.keys()): + esc_name = html.escape(key, quote=True) + for value in split_meta_values(values.get(key, '')): + esc_val = html.escape(value, quote=True) + lines.append( + f'' + ) + return '\n '.join(lines) + + +def _ensure_meta_keys_store(env) -> Dict[str, Any]: + if not hasattr(env, 'pagefind_meta_keys_by_doc'): + env.pagefind_meta_keys_by_doc = {} + return env.pagefind_meta_keys_by_doc + + +def _collect_meta_keys(app, doctree: nodes.document, docname: str) -> None: + if app.builder.format != 'html': + return + raw = all_doctree_meta(doctree) + store = _ensure_meta_keys_store(app.env) + store[docname] = set(raw.keys()) + + +def _purge_meta_keys(app, env, docname: str) -> None: + if hasattr(env, 'pagefind_meta_keys_by_doc') and docname in env.pagefind_meta_keys_by_doc: + del env.pagefind_meta_keys_by_doc[docname] + + +def _merge_meta_keys(app, env, docnames, other) -> None: + """Merge per-document meta key sets from a parallel read worker environment.""" + if not hasattr(other, 'pagefind_meta_keys_by_doc'): + return + store = _ensure_meta_keys_store(env) + for docname, keys in other.pagefind_meta_keys_by_doc.items(): + store[docname] = set(keys) + + +def _union_meta_keys(env) -> List[str]: + if not hasattr(env, 'pagefind_meta_keys_by_doc'): + return [] + union: set[str] = set() + for keys in env.pagefind_meta_keys_by_doc.values(): + union |= set(keys) + return sorted(union) + + +def _pagefind_bundle_prefix(app, pagename: str) -> str: + """Relative URL prefix from current HTML page to the site root ``pagefind/`` directory. + + Must start with ``./`` or ``../`` so the browser resolves dynamic imports (e.g. + ``import(bundlePath + 'pagefind.js')``) as URLs, not bare module specifiers. + + For ``sphinx-multiversion``, each distro is built with ``pagename`` relative to that + distro tree (e.g. ``index``), but HTML is served under ``/{smv_current_version}/``. + The Pagefind bundle lives at the site root (``build/html/pagefind/``), so add one + ``../`` when ``smv_current_version`` is set. + """ + builder = getattr(app, 'builder', None) + if builder is not None: + target_uri = builder.get_target_uri(pagename, typ='html') + depth = len(PurePosixPath(target_uri).parent.parts) + else: + depth = pagename.count('/') + + version = getattr(app.config, 'smv_current_version', '') or '' + if version: + depth += 1 + + if depth == 0: + return './pagefind/' + return ('../' * depth) + 'pagefind/' + + +def _pagefind_component_urls(app, pagename: str) -> Tuple[str, str]: + """(css_href, js_href) relative to current page.""" + prefix = _pagefind_bundle_prefix(app, pagename) + return prefix + 'pagefind-component-ui.css', prefix + 'pagefind-component-ui.js' + + +def _search_results_href(app, pagename: str) -> str: + """Relative URL from the current page to Sphinx's ``search.html``. + + Uses the HTML builder's relative URI helper so multiversion pages under + ``/{distro}/`` link to ``/{distro}/search.html``, not site-root + ``/search.html`` (which may be wrong after ``make multiversion``). + """ + builder = getattr(app, 'builder', None) + if builder is None: + return 'search.html' + try: + current = builder.get_target_uri(pagename, typ='html') + target = builder.get_target_uri('search', typ='html') + rel = builder.get_relative_uri(current, target) + if rel: + return rel + except (AttributeError, KeyError, ValueError): + pass + return 'search.html' + + +def _merge_index_entries(app, distro: str) -> List[Dict[str, Any]]: + """Build mergeIndex list from conf (pinned docs.ros.org template).""" + pkgs: List[str] = list(getattr(app.config, 'pagefind_merge_package_pkgs', []) or []) + if not pkgs or not getattr(app.config, 'pagefind_merge_enabled', False): + return [] + base = getattr(app.config, 'pagefind_merge_index_base', 'https://docs.ros.org').rstrip('/') + overrides = getattr(app.config, 'pagefind_merge_index_overrides', {}) or {} + out: List[Dict[str, Any]] = [] + for pkg in pkgs: + key = f'{distro}/{pkg}' + if key in overrides: + bundle = overrides[key] + else: + bundle = f'{base}/en/{distro}/p/{pkg}/pagefind' + entry: Dict[str, Any] = {'bundlePath': bundle} + mf = getattr(app.config, 'pagefind_merge_filter_per_pkg', None) + if isinstance(mf, dict) and pkg in mf: + entry['mergeFilter'] = mf[pkg] + iw = getattr(app.config, 'pagefind_merge_index_weight_per_pkg', None) + if isinstance(iw, dict) and pkg in iw: + entry['indexWeight'] = iw[pkg] + out.append(entry) + return out + + +def _html_page_context( + app, + pagename: str, + templatename: str, + context: Dict[str, Any], + doctree, +) -> None: + sorted_keys = _union_meta_keys(app.env) + metadata_fields = _metadata_fields_for_keys(app, sorted_keys) + filter_csv = ','.join(sorted_keys) + + empty = { + 'pagefind_seo_filter_metas': '', + 'pagefind_data_meta_attr': '', + 'pagefind_bundle_prefix': './pagefind/', + 'pagefind_component_css': './pagefind/pagefind-component-ui.css', + 'pagefind_component_js': './pagefind/pagefind-component-ui.js', + 'pagefind_merge_index': [], + 'pagefind_filter_keys_csv': filter_csv, + 'pagefind_metadata_fields': metadata_fields, + 'pagefind_result_meta_order': list( + getattr(app.config, 'pagefind_result_meta_order', []) or [] + ), + 'pagefind_search_results_href': 'search.html', + } + context.update(empty) + + if app.builder.format != 'html' or templatename is None: + return + if not templatename.endswith('.html'): + return + + default_distro = (getattr(app.config, 'macros', {}) or {}).get('DISTRO', 'rolling') + values = _resolved_page_meta(app, doctree) + + seo_filters = _seo_and_filter_metas(values) + data_attr = _pagefind_data_meta_attr(values) + css_href, js_href = _pagefind_component_urls(app, pagename) + bundle_prefix = _pagefind_bundle_prefix(app, pagename) + + merge_distro = values.get('distro') or str(default_distro) + merge = _merge_index_entries(app, merge_distro) + context['pagefind_seo_filter_metas'] = seo_filters + context['pagefind_data_meta_attr'] = data_attr + context['pagefind_bundle_prefix'] = bundle_prefix + context['pagefind_component_css'] = css_href + context['pagefind_component_js'] = js_href + context['pagefind_merge_index'] = merge + context['pagefind_search_results_href'] = _search_results_href(app, pagename) + + +def setup(app) -> Dict[str, Any]: + app.add_config_value('pagefind_merge_enabled', default=False, rebuild='html') + app.add_config_value('pagefind_merge_package_pkgs', default=[], rebuild='html') + app.add_config_value('pagefind_merge_index_base', default='https://docs.ros.org', rebuild='html') + app.add_config_value('pagefind_merge_index_overrides', default={}, rebuild='html') + app.add_config_value('pagefind_merge_filter_per_pkg', default=None, rebuild='html') + app.add_config_value('pagefind_merge_index_weight_per_pkg', default=None, rebuild='html') + app.add_config_value('pagefind_filter_labels', default={}, rebuild='html') + app.add_config_value('pagefind_result_meta_order', default=[], rebuild='html') + + app.connect('html-page-context', _html_page_context) + app.connect('doctree-resolved', _collect_meta_keys) + app.connect('env-purge-doc', _purge_meta_keys) + app.connect('env-merge-info', _merge_meta_keys) + + return { + 'parallel_read_safe': True, + 'parallel_write_safe': True, + 'version': '1.0.0', + } diff --git a/plugins/showmeta.py b/plugins/showmeta.py new file mode 100644 index 00000000000..f11b140429c --- /dev/null +++ b/plugins/showmeta.py @@ -0,0 +1,120 @@ +# Copyright 2026 Open Robotics — explicit in-body ``.. showmeta::`` summary +""" +Render selected ``.. meta::`` fields in the document body with author-controlled +order and labels. Place ``.. showmeta::`` where the summary should appear (HTML only). +""" + +from __future__ import annotations + +import html as html_module +import re +from typing import List + +from docutils import nodes +from docutils.parsers.rst import directives +from sphinx.util.docutils import SphinxDirective + +from meta_util import all_doctree_meta, expand_all_meta_values + + +def _macros_flat(app) -> dict[str, str]: + return {str(k): str(v) for k, v in (getattr(app.config, 'macros', {}) or {}).items()} + + +def _default_showmeta_label(key: str) -> str: + spaced = re.sub(r'([a-z])([A-Z])', r'\1 \2', key) + return spaced.replace('_', ' ').replace('-', ' ').strip().title() + + +class showmeta_node(nodes.General, nodes.Element): + """Placeholder replaced on ``doctree-resolved`` (HTML builds only).""" + + +class ShowMetaDirective(SphinxDirective): + """Insert a visible metadata line built from ``.. meta::`` on this page.""" + + has_content = False + option_spec = { + 'order': directives.unchanged, + 'labels': directives.unchanged, + } + + def run(self) -> List[nodes.Node]: + node = showmeta_node() + node['order'] = self.options.get('order', '') + node['labels'] = self.options.get('labels', '') + self.set_source_info(node) + return [node] + + +def visit_skip_showmeta(self, node: showmeta_node) -> None: + raise nodes.SkipNode + + +def depart_showmeta_noop(self, node: showmeta_node) -> None: + pass + + +def _parse_labels(raw: str) -> dict[str, str]: + out: dict[str, str] = {} + for part in [p.strip() for p in raw.split(',') if p.strip() and '=' in p]: + key, _, value = part.partition('=') + key, value = key.strip(), value.strip() + if key: + out[key] = value + return out + + +def replace_showmeta_nodes(app, doctree: nodes.document, docname: str) -> None: + if app.builder.format != 'html': + for node in list(doctree.findall(showmeta_node)): + node.parent.remove(node) + return + + macros = _macros_flat(app) + meta = expand_all_meta_values(all_doctree_meta(doctree), macros) + + for node in list(doctree.findall(showmeta_node)): + order = [x.strip() for x in node.get('order', '').split(',') if x.strip()] + labels_map = _parse_labels(node.get('labels', '')) + if not order: + node.parent.remove(node) + continue + + parts: List[str] = [] + for key in order: + val = meta.get(key, '').strip() + if not val: + continue + label_base = labels_map.get(key) or _default_showmeta_label(key) + label_display = label_base if label_base.rstrip().endswith(':') else f'{label_base}:' + parts.append( + f'{html_module.escape(label_display)} ' + f'{html_module.escape(val)}' + ) + + if not parts: + node.parent.remove(node) + else: + inner = ' | '.join(parts) + raw = nodes.raw( + '', + f'

{inner}

', + format='html', + ) + node.replace_self(raw) + + +def setup(app): + app.add_node( + showmeta_node, + html=(visit_skip_showmeta, depart_showmeta_noop), + latex=(visit_skip_showmeta, depart_showmeta_noop), + ) + app.add_directive('showmeta', ShowMetaDirective) + app.connect('doctree-resolved', replace_showmeta_nodes) + return { + 'version': '1.0.0', + 'parallel_read_safe': True, + 'parallel_write_safe': True, + } diff --git a/requirements.txt b/requirements.txt index 21c4c057505..71bd6e769fe 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,7 @@ +# Non-Python build dependency (install separately; used by `make pagefind`): +# Node.js 18+ with npx — https://nodejs.org/ +# Verify: node -v && npx -v + codespell doc8 docutils diff --git a/source/About-ROS.rst b/source/About-ROS.rst index 05fe7db14e9..9f8afece373 100644 --- a/source/About-ROS.rst +++ b/source/About-ROS.rst @@ -3,10 +3,21 @@ About ROS ========= +.. meta:: + :contentType: about + :experience: beginner + :area: framework, tools, capabilities + :capability: simulation + :distro: {DISTRO} + :product: {PRODUCT} + + ROS (Robot Operating System) is an open-source ecosystem that provides the framework, tools, and libraries for building, deploying, running, and maintaining robotic applications. This article introduces the main areas of the ecosystem and outlines their intended use. -**Area: ROS-framework, ROS-tools, ROS-capabilities | Content-type: about | Experience: beginner** +.. showmeta:: + :order: product, distro, area, capability, contentType, experience + :labels: product=Product, distro=Distribution, area=Area, capability=Capability, contentType=Content type, experience=Level .. contents:: Table of Contents :depth: 2 diff --git a/source/_static/pagefind-docsearch.css b/source/_static/pagefind-docsearch.css new file mode 100644 index 00000000000..1f507fc202d --- /dev/null +++ b/source/_static/pagefind-docsearch.css @@ -0,0 +1,219 @@ +/* DocSearch-like sidebar trigger for Pagefind modal (plan §3) */ +.ros2-pagefind-search { + margin: 0.5rem 0 1rem; +} + +.ros2-pagefind-search pagefind-modal-trigger { + display: block; + width: 100%; +} + +/* Light styling for the trigger button (Pagefind exposes light DOM button) */ +.ros2-pagefind-search pagefind-modal-trigger::part(button), +.ros2-pagefind-search button { + align-items: center; + background: var(--wy-menu-vertical-background-color, #fcfcfc); + border: 1px solid #ccc; + border-radius: 40px; + color: var(--wy-menu-vertical-color, #404040); + cursor: pointer; + display: flex; + font-size: 0.85rem; + gap: 0.35rem; + justify-content: space-between; + min-height: 2.25rem; + padding: 0.35rem 0.6rem 0.35rem 0.75rem; + text-align: left; + width: 100%; +} + +.ros2-pagefind-search pagefind-modal-trigger::part(button):hover, +.ros2-pagefind-search button:hover { + border-color: #999; + box-shadow: 0 1px 2px rgba(0, 0, 0, 0.06); +} + +/* Keyboard hint styling (Algolia DocSearch-like) */ +.ros2-pagefind-search .DocSearch-Button-Keys, +.ros2-pagefind-search pagefind-modal-trigger::part(keys) { + display: flex; + gap: 0.2rem; +} + +.ros2-pagefind-search kbd, +.ros2-pagefind-search pagefind-modal-trigger::part(kbd) { + align-items: center; + background: linear-gradient(-225deg, #d5dbe4, #f8f8f8); + border: 0; + border-radius: 3px; + box-shadow: inset 0 -2px 0 0 #cdcde6, inset 0 0 1px 1px #fff, 0 1px 2px 1px rgba(30, 35, 90, 0.2); + color: #969faf; + display: flex; + font-size: 0.65rem; + font-weight: 600; + line-height: 1; + min-height: 1.25rem; + min-width: 1.25rem; + padding: 0 0.3rem; + justify-content: center; +} + +.wy-nav-side-scroll .ros2-pagefind-search { + padding-right: 0.5rem; +} + +.ros-page-meta-summary, +.ros2-pagefind-search .pf-result-meta-block, +#ros-search-page .pf-result-meta-block, +dialog.pf-modal .pf-result-meta-block { + margin: -0.25rem 0 1rem !important; + padding: 0.45rem 0.75rem !important; + border-left: 4px solid #6c757d !important; + background: #f8f9fa !important; + color: #495057 !important; + font-size: 0.85rem !important; +} + +.ros2-pagefind-search dialog.pf-modal { + width: clamp(900px, 60vw, 1200px) !important; + max-width: 92vw !important; + min-width: min(900px, 92vw) !important; +} + +.ros2-pagefind-search .ros-search-two-col, +#ros-search-page .ros-search-two-col { + display: grid; + grid-template-columns: minmax(220px, 260px) minmax(0, 1fr); + gap: 1rem; + min-height: 0; + width: 100%; +} + +.ros2-pagefind-search .ros-search-facets, +.ros2-pagefind-search .ros-search-results { + max-height: 62vh; + overflow: auto; + min-width: 0; +} + +#ros-search-page .ros-search-facets, +#ros-search-page .ros-search-results { + min-width: 0; +} + +.ros2-pagefind-search .ros-search-facets { + border-right: 1px solid #e9ecef; + padding-right: 0.75rem; +} + +#ros-search-page .ros-search-facets { + border-right: 1px solid #e9ecef; + padding-right: 0.75rem; +} + +.ros2-pagefind-search .ros-search-facets pagefind-filter-pane, +.ros2-pagefind-search .ros-search-results pagefind-summary, +.ros2-pagefind-search .ros-search-results pagefind-results, +#ros-search-page .ros-search-facets pagefind-filter-pane, +#ros-search-page .ros-search-results pagefind-summary, +#ros-search-page .ros-search-results pagefind-results { + display: block; +} + +.ros2-pagefind-search .ros-search-results pagefind-summary, +#ros-search-page .ros-search-results pagefind-summary { + margin-bottom: 0.75rem; +} + +.ros2-pagefind-search .pf-result-link, +#ros-search-page .pf-result-link { + font-size: 1rem; + font-weight: 700; + line-height: 1.25; +} + +.ros2-pagefind-search .pf-result-excerpt, +.ros2-pagefind-search .pf-result-preview, +#ros-search-page .pf-result-excerpt, +#ros-search-page .pf-result-preview { + font-size: 0.85rem; + line-height: 1.35; +} + +.ros2-pagefind-search .pf-result-meta-block, +#ros-search-page .pf-result-meta-block, +dialog.pf-modal .pf-result-meta-block { + margin-top: 0.35rem !important; + margin-bottom: 0.45rem !important; + border-radius: 0 !important; + display: block !important; + line-height: 1.35 !important; +} + +.ros2-pagefind-search .pf-result-meta-block b, +#ros-search-page .pf-result-meta-block b, +dialog.pf-modal .pf-result-meta-block b { + color: #495057 !important; + font-weight: 600 !important; +} + +/* Full-page search results (search.html) */ +.ros-search-page { + padding: 0 0 2rem; +} + +.ros-search-page-input-row { + margin-bottom: 1.5rem; +} + +.ros-search-page-input-row pagefind-input { + display: block; + width: 100%; +} + +.ros-search-page-two-col .ros-search-facets, +.ros-search-page-two-col .ros-search-results { + max-height: none; + overflow: visible; +} + +/* + Force Pagefind's per-result IntersectionObserver to use this + element as its root. The component walks up the DOM looking for an ancestor + whose computed overflow-y is not "visible" or "hidden"; without this, no + ancestor matches on a dedicated search page (everything renders with default + overflow), the observer never fires, and result cards remain skeletons. + + Setting overflow-y: auto with no max-height gives the observer a valid root + without producing any visible scrollbar - the element grows to fit content + naturally and the page itself remains the scroll context for the user. +*/ +#ros-search-page pagefind-results { + overflow-y: auto !important; +} + +@media (max-width: 980px) { + .ros2-pagefind-search .ros-search-two-col, + #ros-search-page .ros-search-two-col { + grid-template-columns: 1fr; + } + + .ros2-pagefind-search .ros-search-facets, + .ros2-pagefind-search .ros-search-results { + max-height: none; + } + + .ros2-pagefind-search .ros-search-facets { + border-right: 0; + border-bottom: 1px solid #e9ecef; + margin-bottom: 0.75rem; + padding: 0 0 0.75rem; + } + + #ros-search-page .ros-search-facets { + border-right: 0; + border-bottom: 1px solid #e9ecef; + margin-bottom: 0.75rem; + padding: 0 0 0.75rem; + } +} diff --git a/source/_templates/layout.html b/source/_templates/layout.html new file mode 100644 index 00000000000..94830854a69 --- /dev/null +++ b/source/_templates/layout.html @@ -0,0 +1,9 @@ +{% extends "!layout.html" %} +{% block extrahead %} + {{ super() }} + {% if pagefind_seo_filter_metas %} + + {{ pagefind_seo_filter_metas|safe }} + + {% endif %} +{% endblock %} diff --git a/source/_templates/search.html b/source/_templates/search.html new file mode 100644 index 00000000000..520bfb9b715 --- /dev/null +++ b/source/_templates/search.html @@ -0,0 +1,37 @@ +{# + Override RTD/Sphinx search page: Pagefind full-page UI instead of searchtools.js. +#} +{%- extends "layout.html" %} +{% set title = _('Search') %} +{% set display_vcs_links = False %} +{%- block scripts %} + {{ super() }} +{%- endblock %} +{% block footer %} + {{ super() }} +{% endblock %} +{% block body %} + + +
+
+ +
+
+ +
+ + +
+
+
+{% endblock %} diff --git a/source/_templates/searchbox.html b/source/_templates/searchbox.html new file mode 100644 index 00000000000..4485c542d3c --- /dev/null +++ b/source/_templates/searchbox.html @@ -0,0 +1,573 @@ +{# Pagefind Component UI (plan §3) + DocSearch-inspired styling via pagefind-docsearch.css #} + + + +