From b0fb84e0aaee98cc3a1d9f45d307c564e01aca8c Mon Sep 17 00:00:00 2001 From: buehlere Date: Tue, 20 Jan 2026 10:13:10 -0500 Subject: [PATCH 01/32] Update gitbook-sync.yml --- .github/workflows/gitbook-sync.yml | 43 +++++++++++++++++++++++++----- 1 file changed, 36 insertions(+), 7 deletions(-) diff --git a/.github/workflows/gitbook-sync.yml b/.github/workflows/gitbook-sync.yml index 1b97bf1a..37fe2c4b 100644 --- a/.github/workflows/gitbook-sync.yml +++ b/.github/workflows/gitbook-sync.yml @@ -48,7 +48,34 @@ jobs: - name: Combine all tags.yml files id: get_tags - run: find . -name "tags.yml" -not -path "./.github/*" -exec cat {} + > .github/tags.yml + run: | + echo "{}" > .github/tags.yml + + for f in $(find . -name "tags.yml" -not -path "./.github/*"); do + # Determine feature type from path + if [[ "$f" == *"/modules/"* ]]; then + PREFIX="modules" + elif [[ "$f" == *"/subworkflows/"* ]]; then + PREFIX="subworkflows" + else + echo "Unknown feature type for $f" + exit 1 + fi + + # Rewrite keys to be namespaced + yq eval " + with_entries( + .key = \"${PREFIX}/\" + .key + ) + " "$f" > /tmp/tags.prefixed.yml + + # Merge into final tags.yml + yq eval-all '. as \$item ireduce ({}; . *+ \$item)' \ + .github/tags.yml /tmp/tags.prefixed.yml > /tmp/tags.merged.yml + + mv /tmp/tags.merged.yml .github/tags.yml + done + - name: debug run: cat .github/tags.yml @@ -85,6 +112,8 @@ jobs: uses: actions/setup-python@v4 with: python-version: "3.10" # install the python version needed + - name: Install yq + run: sudo apt-get update && sudo apt-get install -y yq - uses: actions/checkout@v4 with: ref: docs @@ -156,18 +185,18 @@ jobs: run: | echo ${{ matrix.tags }} python ${{ github.workspace }}/yaml_to_md.py all --yaml-file ${{ github.workspace }}/temp.yml --output-file ./${{ env.FEATURE_TYPE }}/${{ env.MD_NAME }}.md --schema-url https://raw.githubusercontent.com/mskcc-omics-workflows/yaml_to_md/0.0.3/nextflow_schema/${{ env.FEATURE_TYPE }}/meta-schema.json ${{ env.SUBWORKFLOW }} - - name: Check file existence for modules - id: check_files - uses: andstor/file-existence-action@v1 - with: - branch: docs - files: ${{ env.FEATURE_TYPE}}/${{ env.MD_NAME }}.md - name: Add to SUMMARY for new features run: | curl -o ${{ github.workspace }}/update_summary.py https://raw.githubusercontent.com/mskcc-omics-workflows/modules/develop/.github/workflows/update_summary.py python ${{ github.workspace }}/update_summary.py SUMMARY.md "* [${{ env.MD_NAME }}](${{ env.FEATURE_TYPE }}/${{ env.MD_NAME }}.md)" ${{ env.SUMMARY_TYPE }} > tmp_summary.md mv tmp_summary.md SUMMARY.md + - name: Check for changes + run: | + if git diff --quiet; then + echo "NO_CHANGES=true" >> $GITHUB_ENV + fi - uses: EndBug/add-and-commit@v9 + if: env.NO_CHANGES != 'true' with: default_author: github_actions message: "add doc for ${{ matrix.tags }}" From c3c6b0dec177731bdaa7558db8dce426c4317534 Mon Sep 17 00:00:00 2001 From: buehlere Date: Tue, 20 Jan 2026 10:20:47 -0500 Subject: [PATCH 02/32] Update gitbook-sync.yml --- .github/workflows/gitbook-sync.yml | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/.github/workflows/gitbook-sync.yml b/.github/workflows/gitbook-sync.yml index 37fe2c4b..cc951468 100644 --- a/.github/workflows/gitbook-sync.yml +++ b/.github/workflows/gitbook-sync.yml @@ -47,12 +47,10 @@ jobs: fetch-depth: 2 # To retrieve the preceding commit. - name: Combine all tags.yml files - id: get_tags run: | echo "{}" > .github/tags.yml for f in $(find . -name "tags.yml" -not -path "./.github/*"); do - # Determine feature type from path if [[ "$f" == *"/modules/"* ]]; then PREFIX="modules" elif [[ "$f" == *"/subworkflows/"* ]]; then @@ -62,15 +60,13 @@ jobs: exit 1 fi - # Rewrite keys to be namespaced yq eval " with_entries( .key = \"${PREFIX}/\" + .key ) " "$f" > /tmp/tags.prefixed.yml - # Merge into final tags.yml - yq eval-all '. as \$item ireduce ({}; . *+ \$item)' \ + yq eval-all '. as $item ireduce ({}; . *+ $item)' \ .github/tags.yml /tmp/tags.prefixed.yml > /tmp/tags.merged.yml mv /tmp/tags.merged.yml .github/tags.yml From 862efb5e11d033537340cb407700f024d6a31b53 Mon Sep 17 00:00:00 2001 From: buehlere Date: Tue, 20 Jan 2026 11:15:40 -0500 Subject: [PATCH 03/32] yml formatting --- modules/msk/calculatenoise/environment.yml | 2 +- modules/msk/calculatenoise/meta.yml | 4 ++-- modules/msk/fgbio/collectduplexseqmetrics/meta.yml | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/modules/msk/calculatenoise/environment.yml b/modules/msk/calculatenoise/environment.yml index 4b3c9d37..4c59b932 100644 --- a/modules/msk/calculatenoise/environment.yml +++ b/modules/msk/calculatenoise/environment.yml @@ -4,4 +4,4 @@ channels: - conda-forge - bioconda dependencies: - - "YOUR-TOOL-HERE" + - "YOUR-TOOL=HERE" diff --git a/modules/msk/calculatenoise/meta.yml b/modules/msk/calculatenoise/meta.yml index 54568d25..a2a51b94 100644 --- a/modules/msk/calculatenoise/meta.yml +++ b/modules/msk/calculatenoise/meta.yml @@ -12,7 +12,7 @@ tools: tool_dev_url: "https://github.com/msk-access/sequence_qc" doi: "" licence: ["MIT"] - identifier: null + identifier: "" input: - - meta: @@ -141,4 +141,4 @@ output: authors: - "@mikefeixu" maintainers: - - "@mikefeixu" \ No newline at end of file + - "@mikefeixu" diff --git a/modules/msk/fgbio/collectduplexseqmetrics/meta.yml b/modules/msk/fgbio/collectduplexseqmetrics/meta.yml index 5219fc6f..60c76972 100644 --- a/modules/msk/fgbio/collectduplexseqmetrics/meta.yml +++ b/modules/msk/fgbio/collectduplexseqmetrics/meta.yml @@ -8,9 +8,9 @@ keywords: tools: - "fgbio": description: "A set of tools for working with genomic and high throughput sequencing data, including UMIs" - homepage: "None" - documentation: "None" - tool_dev_url: "None" + homepage: "https://github.com/fulcrumgenomics/fgbio" + documentation: "https://github.com/fulcrumgenomics/fgbio" + tool_dev_url: "https://github.com/fulcrumgenomics/fgbio" doi: "" licence: ["MIT"] identifier: biotools:fgbio From 99ac7ea0ea662e91635797d9f83b3d57810e6c9f Mon Sep 17 00:00:00 2001 From: buehlere Date: Tue, 20 Jan 2026 11:20:50 -0500 Subject: [PATCH 04/32] yml formatting --- modules/msk/calculatenoise/meta.yml | 2 +- modules/msk/fgbio/collectduplexseqmetrics/meta.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/msk/calculatenoise/meta.yml b/modules/msk/calculatenoise/meta.yml index a2a51b94..714dbaf7 100644 --- a/modules/msk/calculatenoise/meta.yml +++ b/modules/msk/calculatenoise/meta.yml @@ -10,7 +10,7 @@ tools: homepage: "https://github.com/msk-access/sequence_qc" documentation: "https://github.com/msk-access/sequence_qc" tool_dev_url: "https://github.com/msk-access/sequence_qc" - doi: "" + doi: "no DOI available" licence: ["MIT"] identifier: "" diff --git a/modules/msk/fgbio/collectduplexseqmetrics/meta.yml b/modules/msk/fgbio/collectduplexseqmetrics/meta.yml index 60c76972..97c772bb 100644 --- a/modules/msk/fgbio/collectduplexseqmetrics/meta.yml +++ b/modules/msk/fgbio/collectduplexseqmetrics/meta.yml @@ -11,7 +11,7 @@ tools: homepage: "https://github.com/fulcrumgenomics/fgbio" documentation: "https://github.com/fulcrumgenomics/fgbio" tool_dev_url: "https://github.com/fulcrumgenomics/fgbio" - doi: "" + doi: "no DOI available" licence: ["MIT"] identifier: biotools:fgbio From 78662a54695e2b99c4d033f11d435ba38146d5a4 Mon Sep 17 00:00:00 2001 From: buehlere Date: Tue, 20 Jan 2026 11:24:49 -0500 Subject: [PATCH 05/32] Update gitbook-sync.yml --- .github/workflows/gitbook-sync.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/gitbook-sync.yml b/.github/workflows/gitbook-sync.yml index cc951468..ec4e7829 100644 --- a/.github/workflows/gitbook-sync.yml +++ b/.github/workflows/gitbook-sync.yml @@ -198,3 +198,6 @@ jobs: message: "add doc for ${{ matrix.tags }}" add: '["*/*.md --force", "SUMMARY.md --force"]' cwd: "./" + pull: "--rebase" + push: true + From a9967df53bb53b369e78b5c21476e5c903b60a4b Mon Sep 17 00:00:00 2001 From: buehlere Date: Tue, 20 Jan 2026 11:31:36 -0500 Subject: [PATCH 06/32] Update gitbook-sync.yml --- .github/workflows/gitbook-sync.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/gitbook-sync.yml b/.github/workflows/gitbook-sync.yml index ec4e7829..d45c5c34 100644 --- a/.github/workflows/gitbook-sync.yml +++ b/.github/workflows/gitbook-sync.yml @@ -191,6 +191,8 @@ jobs: if git diff --quiet; then echo "NO_CHANGES=true" >> $GITHUB_ENV fi + - name: Stage all changes + run: git add -A - uses: EndBug/add-and-commit@v9 if: env.NO_CHANGES != 'true' with: @@ -200,4 +202,3 @@ jobs: cwd: "./" pull: "--rebase" push: true - From f8de3d1a02bc49a675f2198929b93f006afd9d60 Mon Sep 17 00:00:00 2001 From: buehlere Date: Tue, 20 Jan 2026 11:34:03 -0500 Subject: [PATCH 07/32] Update gitbook-sync.yml --- .github/workflows/gitbook-sync.yml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/.github/workflows/gitbook-sync.yml b/.github/workflows/gitbook-sync.yml index d45c5c34..727ca5ba 100644 --- a/.github/workflows/gitbook-sync.yml +++ b/.github/workflows/gitbook-sync.yml @@ -191,8 +191,6 @@ jobs: if git diff --quiet; then echo "NO_CHANGES=true" >> $GITHUB_ENV fi - - name: Stage all changes - run: git add -A - uses: EndBug/add-and-commit@v9 if: env.NO_CHANGES != 'true' with: @@ -200,5 +198,4 @@ jobs: message: "add doc for ${{ matrix.tags }}" add: '["*/*.md --force", "SUMMARY.md --force"]' cwd: "./" - pull: "--rebase" - push: true + push: --force From f190c26e24fdabd146b83ef5f3d5ac273668ef87 Mon Sep 17 00:00:00 2001 From: buehlere Date: Tue, 20 Jan 2026 11:59:30 -0500 Subject: [PATCH 08/32] Update gitbook-sync.yml --- .github/workflows/gitbook-sync.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/gitbook-sync.yml b/.github/workflows/gitbook-sync.yml index 727ca5ba..d8d0f3e9 100644 --- a/.github/workflows/gitbook-sync.yml +++ b/.github/workflows/gitbook-sync.yml @@ -123,9 +123,8 @@ jobs: id: replace_slash run: | FRAGMENT="${{ env.SW_NAME }}" - MD_NAME=$(echo $FRAGMENT | sed 's/\//_/g') - echo "MD_NAME=$MD_NAME" >> $GITHUB_ENV - echo "${MD_NAME}" + MD_PATH="${FRAGMENT}" # Keep subfolders + echo "MD_PATH=$MD_PATH" >> $GITHUB_ENV - uses: EndBug/add-and-commit@v9 with: default_author: github_actions @@ -180,11 +179,12 @@ jobs: - name: Run convertor to generate md file for new module run: | echo ${{ matrix.tags }} - python ${{ github.workspace }}/yaml_to_md.py all --yaml-file ${{ github.workspace }}/temp.yml --output-file ./${{ env.FEATURE_TYPE }}/${{ env.MD_NAME }}.md --schema-url https://raw.githubusercontent.com/mskcc-omics-workflows/yaml_to_md/0.0.3/nextflow_schema/${{ env.FEATURE_TYPE }}/meta-schema.json ${{ env.SUBWORKFLOW }} + mkdir -p ./${{ env.FEATURE_TYPE }}/$(dirname $MD_PATH) + python ${{ github.workspace }}/yaml_to_md.py all --yaml-file ${{ github.workspace }}/temp.yml --output-file ./${{ env.FEATURE_TYPE }}/${{ env.MD_PATH }}.md --schema-url https://raw.githubusercontent.com/mskcc-omics-workflows/yaml_to_md/0.0.3/nextflow_schema/${{ env.FEATURE_TYPE }}/meta-schema.json ${{ env.SUBWORKFLOW }} - name: Add to SUMMARY for new features run: | curl -o ${{ github.workspace }}/update_summary.py https://raw.githubusercontent.com/mskcc-omics-workflows/modules/develop/.github/workflows/update_summary.py - python ${{ github.workspace }}/update_summary.py SUMMARY.md "* [${{ env.MD_NAME }}](${{ env.FEATURE_TYPE }}/${{ env.MD_NAME }}.md)" ${{ env.SUMMARY_TYPE }} > tmp_summary.md + python ${{ github.workspace }}/update_summary.py SUMMARY.md "* [${{ env.MD_PATH }}](${{ env.FEATURE_TYPE }}/${{ env.MD_PATH }}.md)" ${{ env.SUMMARY_TYPE }} > tmp_summary.md mv tmp_summary.md SUMMARY.md - name: Check for changes run: | @@ -196,6 +196,6 @@ jobs: with: default_author: github_actions message: "add doc for ${{ matrix.tags }}" - add: '["*/*.md --force", "SUMMARY.md --force"]' + add: '["**/*.md --force", "SUMMARY.md --force"]' cwd: "./" push: --force From 82ae92055b8481992868eb14f8f22b49ca39cf6e Mon Sep 17 00:00:00 2001 From: buehlere Date: Tue, 20 Jan 2026 12:16:35 -0500 Subject: [PATCH 09/32] Update gitbook-sync.yml --- .github/workflows/gitbook-sync.yml | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/.github/workflows/gitbook-sync.yml b/.github/workflows/gitbook-sync.yml index d8d0f3e9..436f542a 100644 --- a/.github/workflows/gitbook-sync.yml +++ b/.github/workflows/gitbook-sync.yml @@ -123,8 +123,10 @@ jobs: id: replace_slash run: | FRAGMENT="${{ env.SW_NAME }}" - MD_PATH="${FRAGMENT}" # Keep subfolders + MD_PATH="${FRAGMENT}" # keep subfolders for output file + MD_NAME="${MD_PATH}" # <-- key change: preserve nested folder for display text echo "MD_PATH=$MD_PATH" >> $GITHUB_ENV + echo "MD_NAME=$MD_NAME" >> $GITHUB_ENV - uses: EndBug/add-and-commit@v9 with: default_author: github_actions @@ -176,15 +178,20 @@ jobs: echo "FEATURE_TYPE=$FEATURE_TYPE" >> $GITHUB_ENV echo "SUMMARY_TYPE=$SUMMARY_TYPE" >> $GITHUB_ENV echo "SUBWORKFLOW=$SUBWORKFLOW" >> $GITHUB_ENV + - name: Run convertor to generate md file for new module run: | echo ${{ matrix.tags }} mkdir -p ./${{ env.FEATURE_TYPE }}/$(dirname $MD_PATH) - python ${{ github.workspace }}/yaml_to_md.py all --yaml-file ${{ github.workspace }}/temp.yml --output-file ./${{ env.FEATURE_TYPE }}/${{ env.MD_PATH }}.md --schema-url https://raw.githubusercontent.com/mskcc-omics-workflows/yaml_to_md/0.0.3/nextflow_schema/${{ env.FEATURE_TYPE }}/meta-schema.json ${{ env.SUBWORKFLOW }} + python ${{ github.workspace }}/yaml_to_md.py all \ + --yaml-file ${{ github.workspace }}/temp.yml \ + --output-file ./${{ env.FEATURE_TYPE }}/${{ env.MD_PATH }}.md \ + --schema-url https://raw.githubusercontent.com/mskcc-omics-workflows/yaml_to_md/0.0.3/nextflow_schema/${{ env.FEATURE_TYPE }}/meta-schema.json \ + ${{ env.SUBWORKFLOW }} - name: Add to SUMMARY for new features run: | curl -o ${{ github.workspace }}/update_summary.py https://raw.githubusercontent.com/mskcc-omics-workflows/modules/develop/.github/workflows/update_summary.py - python ${{ github.workspace }}/update_summary.py SUMMARY.md "* [${{ env.MD_PATH }}](${{ env.FEATURE_TYPE }}/${{ env.MD_PATH }}.md)" ${{ env.SUMMARY_TYPE }} > tmp_summary.md + python ${{ github.workspace }}/update_summary.py SUMMARY.md "* [${{ env.MD_NAME }}](${{ env.FEATURE_TYPE }}/${{ env.MD_PATH }}.md)" ${{ env.SUMMARY_TYPE }} > tmp_summary.md mv tmp_summary.md SUMMARY.md - name: Check for changes run: | @@ -196,6 +203,6 @@ jobs: with: default_author: github_actions message: "add doc for ${{ matrix.tags }}" - add: '["**/*.md --force", "SUMMARY.md --force"]' + add: '["**/*.md", "SUMMARY.md"]' cwd: "./" push: --force From ecec9801b212ae03f67ee8a8b5f1a5768b8437a6 Mon Sep 17 00:00:00 2001 From: buehlere Date: Tue, 20 Jan 2026 13:38:30 -0500 Subject: [PATCH 10/32] Update gitbook-sync.yml --- .github/workflows/gitbook-sync.yml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/workflows/gitbook-sync.yml b/.github/workflows/gitbook-sync.yml index 436f542a..add43467 100644 --- a/.github/workflows/gitbook-sync.yml +++ b/.github/workflows/gitbook-sync.yml @@ -198,11 +198,14 @@ jobs: if git diff --quiet; then echo "NO_CHANGES=true" >> $GITHUB_ENV fi + - name: Stage new files + run: | + git add "./${{ env.FEATURE_TYPE }}/${{ env.MD_PATH }}.md" + git add SUMMARY.md - uses: EndBug/add-and-commit@v9 - if: env.NO_CHANGES != 'true' with: default_author: github_actions message: "add doc for ${{ matrix.tags }}" - add: '["**/*.md", "SUMMARY.md"]' + add: '["*/*.md --force", "SUMMARY.md --force"]' cwd: "./" push: --force From 2ee566faa3ba57b6e4288f9ec07f1aa199c6f55f Mon Sep 17 00:00:00 2001 From: buehlere Date: Tue, 20 Jan 2026 13:52:39 -0500 Subject: [PATCH 11/32] Update update_summary.py --- .github/workflows/update_summary.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/.github/workflows/update_summary.py b/.github/workflows/update_summary.py index 8dc679cd..131321a5 100644 --- a/.github/workflows/update_summary.py +++ b/.github/workflows/update_summary.py @@ -35,24 +35,26 @@ def add_new_feature(sections: dict, new_feature: str, feature_type: str): new_list = [] found = False if feature_type == "module": - # Check if the new feature is already in current summary file if new_feature.replace("\\", "") in [line.strip().replace("\\", "") for line in sections["Modules"]]: return sections - # Get module name from [] and remove the _ part. - # Check if the first part of the module name exists - new_feature_category = new_feature.split("]")[0].split("_")[0].replace("* [", "").replace("\\", "").strip() + + # Use folder as category (first part before /) + if "/" in new_feature: + new_feature_category = new_feature.split("]")[0].split("/")[0].replace("* [", "").strip() + else: + new_feature_category = new_feature.split("]")[0].split("_")[0].replace("* [", "").strip() + for line in sections["Modules"]: new_list.append(line) if f"[{new_feature_category}]" in line: found = True - new_list.append(f" {new_feature}\n") + new_list.append(f" {new_feature}\n") # two spaces indent for submodules if not found: - # If not found, put the new feature to the last + # If no parent exists, add at the end new_list.append(new_feature + "\n\n") sections["Modules"] = new_list if feature_type == "subworkflow" and new_feature not in sections["Subworkflows"]: - # There is no subset of subworkflow, so put the new feature to the last sections["Subworkflows"].append(new_feature + "\n") return sections From 223e3de2828a55f414505e5476f935f8dbef13ce Mon Sep 17 00:00:00 2001 From: buehlere Date: Tue, 20 Jan 2026 14:00:43 -0500 Subject: [PATCH 12/32] Update update_summary.py --- .github/workflows/update_summary.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/.github/workflows/update_summary.py b/.github/workflows/update_summary.py index 131321a5..7fe90198 100644 --- a/.github/workflows/update_summary.py +++ b/.github/workflows/update_summary.py @@ -49,9 +49,15 @@ def add_new_feature(sections: dict, new_feature: str, feature_type: str): if f"[{new_feature_category}]" in line: found = True new_list.append(f" {new_feature}\n") # two spaces indent for submodules - if not found: - # If no parent exists, add at the end - new_list.append(new_feature + "\n\n") + if not found: + # If no parent exists, add parent + new feature at the end + if "/" in new_feature: + parent_name = new_feature.split("]")[0].split("/")[0].replace("* [", "").strip() + parent_line = f"* [{parent_name}](modules/{parent_name}/README.md)" + new_list.append(parent_line + "\n") + new_list.append(f" {new_feature}\n") + else: + new_list.append(new_feature + "\n\n") sections["Modules"] = new_list if feature_type == "subworkflow" and new_feature not in sections["Subworkflows"]: From 63ef113085963093236f537f5965fbcb2d9ad974 Mon Sep 17 00:00:00 2001 From: buehlere Date: Tue, 20 Jan 2026 14:07:42 -0500 Subject: [PATCH 13/32] Update update_summary.py --- .github/workflows/update_summary.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/.github/workflows/update_summary.py b/.github/workflows/update_summary.py index 7fe90198..64472f92 100644 --- a/.github/workflows/update_summary.py +++ b/.github/workflows/update_summary.py @@ -49,15 +49,16 @@ def add_new_feature(sections: dict, new_feature: str, feature_type: str): if f"[{new_feature_category}]" in line: found = True new_list.append(f" {new_feature}\n") # two spaces indent for submodules - if not found: - # If no parent exists, add parent + new feature at the end - if "/" in new_feature: - parent_name = new_feature.split("]")[0].split("/")[0].replace("* [", "").strip() - parent_line = f"* [{parent_name}](modules/{parent_name}/README.md)" - new_list.append(parent_line + "\n") - new_list.append(f" {new_feature}\n") - else: - new_list.append(new_feature + "\n\n") + + if not found: + if "/" in new_feature: + parent_name = new_feature.split("]")[0].split("/")[0].replace("* [", "").strip() + parent_line = f"* [{parent_name}](modules/{parent_name}/README.md)" + new_list.append(parent_line + "\n") + new_list.append(f" {new_feature}\n") + else: + new_list.append(new_feature + "\n\n") + sections["Modules"] = new_list if feature_type == "subworkflow" and new_feature not in sections["Subworkflows"]: From a4559b90d80df94614d14f2070f9b5b5a482fa5e Mon Sep 17 00:00:00 2001 From: buehlere Date: Tue, 20 Jan 2026 14:45:58 -0500 Subject: [PATCH 14/32] Update update_summary.py --- .github/workflows/update_summary.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/update_summary.py b/.github/workflows/update_summary.py index 64472f92..efc8061c 100644 --- a/.github/workflows/update_summary.py +++ b/.github/workflows/update_summary.py @@ -40,7 +40,10 @@ def add_new_feature(sections: dict, new_feature: str, feature_type: str): # Use folder as category (first part before /) if "/" in new_feature: - new_feature_category = new_feature.split("]")[0].split("/")[0].replace("* [", "").strip() + new_feature_category = (new_feature.split("]")[0] + .replace("* [", "") + .rsplit("/", 1)[0] + .strip()) else: new_feature_category = new_feature.split("]")[0].split("_")[0].replace("* [", "").strip() From 0ce574ba799abec28ca75bfcb1b8eff4e16cd9e8 Mon Sep 17 00:00:00 2001 From: buehlere Date: Tue, 20 Jan 2026 15:21:23 -0500 Subject: [PATCH 15/32] Update update_summary.py --- .github/workflows/update_summary.py | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/.github/workflows/update_summary.py b/.github/workflows/update_summary.py index efc8061c..db5fed76 100644 --- a/.github/workflows/update_summary.py +++ b/.github/workflows/update_summary.py @@ -38,14 +38,13 @@ def add_new_feature(sections: dict, new_feature: str, feature_type: str): if new_feature.replace("\\", "") in [line.strip().replace("\\", "") for line in sections["Modules"]]: return sections - # Use folder as category (first part before /) - if "/" in new_feature: - new_feature_category = (new_feature.split("]")[0] - .replace("* [", "") - .rsplit("/", 1)[0] - .strip()) + full_name = new_feature.split("]")[0].replace("* [", "").strip() + + if "/" in full_name: + new_feature_category, display_name = full_name.split("/", 1) + new_feature = new_feature.replace(full_name, display_name) else: - new_feature_category = new_feature.split("]")[0].split("_")[0].replace("* [", "").strip() + new_feature_category = full_name for line in sections["Modules"]: new_list.append(line) @@ -53,14 +52,11 @@ def add_new_feature(sections: dict, new_feature: str, feature_type: str): found = True new_list.append(f" {new_feature}\n") # two spaces indent for submodules - if not found: - if "/" in new_feature: - parent_name = new_feature.split("]")[0].split("/")[0].replace("* [", "").strip() - parent_line = f"* [{parent_name}](modules/{parent_name}/README.md)" - new_list.append(parent_line + "\n") - new_list.append(f" {new_feature}\n") - else: - new_list.append(new_feature + "\n\n") + if not found and "/" in full_name: + parent_name = new_feature_category + parent_line = f"* [{parent_name}](modules/{parent_name}/README.md)" + new_list.append(parent_line + "\n") + new_list.append(f" {new_feature}\n") sections["Modules"] = new_list From 68aa0e8312cc0d58f530a195ec2a64206335be5b Mon Sep 17 00:00:00 2001 From: buehlere Date: Tue, 20 Jan 2026 15:35:25 -0500 Subject: [PATCH 16/32] fixing update summary --- .github/workflows/gitbook-sync.yml | 10 +++-- .github/workflows/update_summary.py | 60 +++++++++++++++++++---------- 2 files changed, 47 insertions(+), 23 deletions(-) diff --git a/.github/workflows/gitbook-sync.yml b/.github/workflows/gitbook-sync.yml index add43467..693043f8 100644 --- a/.github/workflows/gitbook-sync.yml +++ b/.github/workflows/gitbook-sync.yml @@ -123,8 +123,12 @@ jobs: id: replace_slash run: | FRAGMENT="${{ env.SW_NAME }}" - MD_PATH="${FRAGMENT}" # keep subfolders for output file - MD_NAME="${MD_PATH}" # <-- key change: preserve nested folder for display text + DIR="$(dirname "$FRAGMENT")" + BASE="$(basename "$FRAGMENT")" + + MD_PATH="${DIR}/${DIR}_${BASE}" + MD_NAME="${FRAGMENT}" + echo "MD_PATH=$MD_PATH" >> $GITHUB_ENV echo "MD_NAME=$MD_NAME" >> $GITHUB_ENV - uses: EndBug/add-and-commit@v9 @@ -191,7 +195,7 @@ jobs: - name: Add to SUMMARY for new features run: | curl -o ${{ github.workspace }}/update_summary.py https://raw.githubusercontent.com/mskcc-omics-workflows/modules/develop/.github/workflows/update_summary.py - python ${{ github.workspace }}/update_summary.py SUMMARY.md "* [${{ env.MD_NAME }}](${{ env.FEATURE_TYPE }}/${{ env.MD_PATH }}.md)" ${{ env.SUMMARY_TYPE }} > tmp_summary.md + python ${{ github.workspace }}/update_summary.py SUMMARY.md "[${{ env.MD_NAME }}](${{ env.FEATURE_TYPE }}/${{ env.MD_PATH }}.md)" ${{ env.SUMMARY_TYPE }} > tmp_summary.md mv tmp_summary.md SUMMARY.md - name: Check for changes run: | diff --git a/.github/workflows/update_summary.py b/.github/workflows/update_summary.py index db5fed76..58824d8e 100644 --- a/.github/workflows/update_summary.py +++ b/.github/workflows/update_summary.py @@ -1,6 +1,23 @@ import sys from collections import defaultdict +def build_module_entry(module_id: str): + """ + module_id: fgbio/collectduplexseqmetrics + """ + if "/" in module_id: + namespace, name = module_id.split("/", 1) + path = f"modules/{namespace}/{namespace}_{name}.md" + display = name + parent = namespace + else: + path = f"modules/{module_id}.md" + display = module_id + parent = None + + entry = f"* [{display}]({path})" + return entry, parent + def update_summary_old(origin: str, new_feature: str, feature_type: str): out_summary = "" @@ -32,36 +49,39 @@ def load_summary_file(origin: str): def add_new_feature(sections: dict, new_feature: str, feature_type: str): - new_list = [] - found = False if feature_type == "module": - if new_feature.replace("\\", "") in [line.strip().replace("\\", "") for line in sections["Modules"]]: - return sections + entry, parent = build_module_entry(new_feature) - full_name = new_feature.split("]")[0].replace("* [", "").strip() + # Avoid duplicates (exact match) + existing = [line.strip() for line in sections["Modules"]] + if entry in existing or f" {entry}" in existing: + return sections - if "/" in full_name: - new_feature_category, display_name = full_name.split("/", 1) - new_feature = new_feature.replace(full_name, display_name) - else: - new_feature_category = full_name + new_list = [] + inserted = False for line in sections["Modules"]: new_list.append(line) - if f"[{new_feature_category}]" in line: - found = True - new_list.append(f" {new_feature}\n") # two spaces indent for submodules - if not found and "/" in full_name: - parent_name = new_feature_category - parent_line = f"* [{parent_name}](modules/{parent_name}/README.md)" - new_list.append(parent_line + "\n") - new_list.append(f" {new_feature}\n") + if parent and not inserted and line.startswith(f"* [{parent}]("): + new_list.append(f" {entry}\n") + inserted = True + + # Parent section doesn't exist yet + if parent and not inserted: + new_list.append(f"* [{parent}](modules/{parent}/README.md)\n") + new_list.append(f" {entry}\n") + + # Top-level module (no namespace) + if not parent: + new_list.append(entry + "\n") sections["Modules"] = new_list - if feature_type == "subworkflow" and new_feature not in sections["Subworkflows"]: - sections["Subworkflows"].append(new_feature + "\n") + elif feature_type == "subworkflow": + if new_feature + "\n" not in sections["Subworkflows"]: + sections["Subworkflows"].append(new_feature + "\n") + return sections From 2e8428b7dcf2be5c0b0155bcf8b784a9974eadf9 Mon Sep 17 00:00:00 2001 From: buehlere Date: Tue, 20 Jan 2026 15:47:34 -0500 Subject: [PATCH 17/32] Update gitbook-sync.yml --- .github/workflows/gitbook-sync.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/gitbook-sync.yml b/.github/workflows/gitbook-sync.yml index 693043f8..b2bc2ea9 100644 --- a/.github/workflows/gitbook-sync.yml +++ b/.github/workflows/gitbook-sync.yml @@ -195,7 +195,7 @@ jobs: - name: Add to SUMMARY for new features run: | curl -o ${{ github.workspace }}/update_summary.py https://raw.githubusercontent.com/mskcc-omics-workflows/modules/develop/.github/workflows/update_summary.py - python ${{ github.workspace }}/update_summary.py SUMMARY.md "[${{ env.MD_NAME }}](${{ env.FEATURE_TYPE }}/${{ env.MD_PATH }}.md)" ${{ env.SUMMARY_TYPE }} > tmp_summary.md + python ${{ github.workspace }}/update_summary.py SUMMARY.md "${{ env.SW_NAME }}" ${{ env.SUMMARY_TYPE }} > tmp_summary.md mv tmp_summary.md SUMMARY.md - name: Check for changes run: | From 80956132bfd5d83b1c05491398afc8650c42d866 Mon Sep 17 00:00:00 2001 From: buehlere Date: Tue, 20 Jan 2026 16:24:24 -0500 Subject: [PATCH 18/32] fixing summary --- .github/workflows/gitbook-sync.yml | 6 ++++++ .github/workflows/update_summary.py | 7 ++----- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/.github/workflows/gitbook-sync.yml b/.github/workflows/gitbook-sync.yml index b2bc2ea9..7ec18b22 100644 --- a/.github/workflows/gitbook-sync.yml +++ b/.github/workflows/gitbook-sync.yml @@ -137,6 +137,12 @@ jobs: message: "pull changes before adding doc for ${{ matrix.tags }}" pull: "--rebase" cwd: "./" + - name: Ensure output folder exists and debug + run: | + echo "MD_PATH=${MD_PATH}" + echo "Creating folder: ./modules/$(dirname ${MD_PATH})" + mkdir -p ./modules/$(dirname ${MD_PATH}) + ls -la ./modules - name: Download convertor from yml to md run: | curl -o ${{ github.workspace }}/yaml_to_md.py https://raw.githubusercontent.com/mskcc-omics-workflows/yaml_to_md/0.0.3/yaml_to_md.py diff --git a/.github/workflows/update_summary.py b/.github/workflows/update_summary.py index 58824d8e..ba5c6a5c 100644 --- a/.github/workflows/update_summary.py +++ b/.github/workflows/update_summary.py @@ -2,17 +2,14 @@ from collections import defaultdict def build_module_entry(module_id: str): - """ - module_id: fgbio/collectduplexseqmetrics - """ if "/" in module_id: namespace, name = module_id.split("/", 1) + display = f"{namespace}_{name}" path = f"modules/{namespace}/{namespace}_{name}.md" - display = name parent = namespace else: - path = f"modules/{module_id}.md" display = module_id + path = f"modules/{module_id}.md" parent = None entry = f"* [{display}]({path})" From 45c1795b82ccebca9f82b266f7ff42d9eae3c7c5 Mon Sep 17 00:00:00 2001 From: buehlere Date: Tue, 20 Jan 2026 16:39:43 -0500 Subject: [PATCH 19/32] generate READMEs --- .github/workflows/gitbook-sync.yml | 6 ++++++ .github/workflows/update_summary.py | 4 ++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/.github/workflows/gitbook-sync.yml b/.github/workflows/gitbook-sync.yml index 7ec18b22..e0f89390 100644 --- a/.github/workflows/gitbook-sync.yml +++ b/.github/workflows/gitbook-sync.yml @@ -143,6 +143,12 @@ jobs: echo "Creating folder: ./modules/$(dirname ${MD_PATH})" mkdir -p ./modules/$(dirname ${MD_PATH}) ls -la ./modules + - name: Ensure parent README exists + run: | + DIR="$(dirname "$MD_PATH")" + if [ ! -f "./modules/$DIR/README.md" ]; then + echo "# $DIR" > "./modules/$DIR/README.md" + fi - name: Download convertor from yml to md run: | curl -o ${{ github.workspace }}/yaml_to_md.py https://raw.githubusercontent.com/mskcc-omics-workflows/yaml_to_md/0.0.3/yaml_to_md.py diff --git a/.github/workflows/update_summary.py b/.github/workflows/update_summary.py index ba5c6a5c..a85677a7 100644 --- a/.github/workflows/update_summary.py +++ b/.github/workflows/update_summary.py @@ -60,9 +60,9 @@ def add_new_feature(sections: dict, new_feature: str, feature_type: str): for line in sections["Modules"]: new_list.append(line) - if parent and not inserted and line.startswith(f"* [{parent}]("): + if parent and not inserted: + new_list.append(f"* [{parent}](modules/{parent}/README.md)\n") new_list.append(f" {entry}\n") - inserted = True # Parent section doesn't exist yet if parent and not inserted: From 8b19b9c0b3a842024c80def56c2f92b99880df6d Mon Sep 17 00:00:00 2001 From: buehlere Date: Tue, 20 Jan 2026 16:52:25 -0500 Subject: [PATCH 20/32] update summary --- .github/workflows/gitbook-sync.yml | 2 +- .github/workflows/update_summary.py | 11 ++--------- 2 files changed, 3 insertions(+), 10 deletions(-) diff --git a/.github/workflows/gitbook-sync.yml b/.github/workflows/gitbook-sync.yml index e0f89390..9e1a51a3 100644 --- a/.github/workflows/gitbook-sync.yml +++ b/.github/workflows/gitbook-sync.yml @@ -117,7 +117,7 @@ jobs: run: | MATRIX_FRAGMENT="${{ matrix.tags }}" TEMP_NAME=$(echo $MATRIX_FRAGMENT | sed 's/subworkflows\///g') - SW_NAME=$(echo $TEMP_NAME | sed 's/modules\///g') + SW_NAME=$(echo $TEMP_NAME | sed 's/modules\///g' | sed 's/\//\//') echo "SW_NAME=$SW_NAME" >> $GITHUB_ENV - name: Rename md file id: replace_slash diff --git a/.github/workflows/update_summary.py b/.github/workflows/update_summary.py index a85677a7..ba6b4cf6 100644 --- a/.github/workflows/update_summary.py +++ b/.github/workflows/update_summary.py @@ -15,7 +15,6 @@ def build_module_entry(module_id: str): entry = f"* [{display}]({path})" return entry, parent - def update_summary_old(origin: str, new_feature: str, feature_type: str): out_summary = "" with open(origin, 'r') as f: @@ -48,8 +47,6 @@ def load_summary_file(origin: str): def add_new_feature(sections: dict, new_feature: str, feature_type: str): if feature_type == "module": entry, parent = build_module_entry(new_feature) - - # Avoid duplicates (exact match) existing = [line.strip() for line in sections["Modules"]] if entry in existing or f" {entry}" in existing: return sections @@ -59,17 +56,14 @@ def add_new_feature(sections: dict, new_feature: str, feature_type: str): for line in sections["Modules"]: new_list.append(line) - - if parent and not inserted: - new_list.append(f"* [{parent}](modules/{parent}/README.md)\n") + if parent and not inserted and line.startswith(f"* [{parent}]("): new_list.append(f" {entry}\n") + inserted = True - # Parent section doesn't exist yet if parent and not inserted: new_list.append(f"* [{parent}](modules/{parent}/README.md)\n") new_list.append(f" {entry}\n") - # Top-level module (no namespace) if not parent: new_list.append(entry + "\n") @@ -81,7 +75,6 @@ def add_new_feature(sections: dict, new_feature: str, feature_type: str): return sections - def rebuild_summary(origin: str, new_feature: str, feature_type: str): # Load current summary file to dictionary sections = load_summary_file(origin=origin) From b2ac3a4222d66569d9f9574f1d21527e64434539 Mon Sep 17 00:00:00 2001 From: buehlere Date: Tue, 20 Jan 2026 16:57:11 -0500 Subject: [PATCH 21/32] Update update_summary.py --- .github/workflows/update_summary.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/workflows/update_summary.py b/.github/workflows/update_summary.py index ba6b4cf6..4ed70faf 100644 --- a/.github/workflows/update_summary.py +++ b/.github/workflows/update_summary.py @@ -48,19 +48,21 @@ def add_new_feature(sections: dict, new_feature: str, feature_type: str): if feature_type == "module": entry, parent = build_module_entry(new_feature) existing = [line.strip() for line in sections["Modules"]] + if entry in existing or f" {entry}" in existing: return sections new_list = [] + parent_exists = any(line.startswith(f"* [{parent}](") for line in existing) inserted = False for line in sections["Modules"]: new_list.append(line) - if parent and not inserted and line.startswith(f"* [{parent}]("): + if parent and parent_exists and not inserted and line.startswith(f"* [{parent}]("): new_list.append(f" {entry}\n") inserted = True - if parent and not inserted: + if parent and not parent_exists: new_list.append(f"* [{parent}](modules/{parent}/README.md)\n") new_list.append(f" {entry}\n") @@ -75,6 +77,7 @@ def add_new_feature(sections: dict, new_feature: str, feature_type: str): return sections + def rebuild_summary(origin: str, new_feature: str, feature_type: str): # Load current summary file to dictionary sections = load_summary_file(origin=origin) From 0eff8d2e6de0d0e7fd132143954d2a6c55ee4f25 Mon Sep 17 00:00:00 2001 From: buehlere Date: Tue, 20 Jan 2026 17:03:59 -0500 Subject: [PATCH 22/32] Update update_summary.py --- .github/workflows/update_summary.py | 88 ++++++++++++----------------- 1 file changed, 35 insertions(+), 53 deletions(-) diff --git a/.github/workflows/update_summary.py b/.github/workflows/update_summary.py index 4ed70faf..c7586f61 100644 --- a/.github/workflows/update_summary.py +++ b/.github/workflows/update_summary.py @@ -15,87 +15,69 @@ def build_module_entry(module_id: str): entry = f"* [{display}]({path})" return entry, parent -def update_summary_old(origin: str, new_feature: str, feature_type: str): - out_summary = "" - with open(origin, 'r') as f: - for line in f.readlines(): - if feature_type == "module" and line.startswith("## Subworkflows") and new_feature not in out_summary: - out_summary += new_feature + '\n\n' - if line.strip(): - out_summary += line - if feature_type == "subworkflow" and new_feature not in out_summary: - out_summary += new_feature - return out_summary - - def load_summary_file(origin: str): - # Read the summary file to dict - # keys, "Table of contents", "Modules", "Subworkflows" - # values are list of lines sections = defaultdict(list) current_section = None - with open(origin, "r") as file_read: - for row in file_read: + with open(origin, "r") as f: + for row in f: if row.startswith("#"): current_section = row.replace("#", "").strip() continue if row.strip(): - sections[current_section].append(row) + sections[current_section].append(row.rstrip("\n")) return sections - def add_new_feature(sections: dict, new_feature: str, feature_type: str): if feature_type == "module": entry, parent = build_module_entry(new_feature) - existing = [line.strip() for line in sections["Modules"]] + existing = sections["Modules"] - if entry in existing or f" {entry}" in existing: - return sections - - new_list = [] - parent_exists = any(line.startswith(f"* [{parent}](") for line in existing) - inserted = False + parent_header = f"* [{parent}](modules/{parent}/README.md)" if parent else None + child_entry = f" {entry}" if parent else entry - for line in sections["Modules"]: - new_list.append(line) - if parent and parent_exists and not inserted and line.startswith(f"* [{parent}]("): - new_list.append(f" {entry}\n") - inserted = True + parent_exists = parent and any(line.strip() == parent_header for line in existing) + child_exists = any(line.strip() == child_entry.strip() for line in existing) - if parent and not parent_exists: - new_list.append(f"* [{parent}](modules/{parent}/README.md)\n") - new_list.append(f" {entry}\n") + if child_exists: + return sections - if not parent: - new_list.append(entry + "\n") + new_list = [] + if parent: + inserted = False + for line in existing: + new_list.append(line) + if not inserted and line.strip() == parent_header: + new_list.append(child_entry) + inserted = True + if not parent_exists: + new_list.append(parent_header) + new_list.append(child_entry) + else: + new_list = existing + [entry] sections["Modules"] = new_list elif feature_type == "subworkflow": - if new_feature + "\n" not in sections["Subworkflows"]: - sections["Subworkflows"].append(new_feature + "\n") + if new_feature not in sections["Subworkflows"]: + sections["Subworkflows"].append(new_feature) return sections - def rebuild_summary(origin: str, new_feature: str, feature_type: str): - # Load current summary file to dictionary - sections = load_summary_file(origin=origin) - # Add the new feature to summary file dict - updated_sections = add_new_feature( - sections=sections, new_feature=new_feature, feature_type=feature_type) - # Output the updated summary file to string + sections = load_summary_file(origin) + sections = add_new_feature(sections, new_feature, feature_type) + out_summary = "# Table of contents\n\n" - for line in updated_sections["Table of contents"]: - out_summary += line + for line in sections["Table of contents"]: + out_summary += f"{line}\n" out_summary += "\n## Modules\n\n" - for line in updated_sections["Modules"]: - out_summary += line + for line in sections["Modules"]: + out_summary += f"{line}\n" out_summary += "\n## Subworkflows\n\n" - for line in updated_sections["Subworkflows"]: - out_summary += line - return out_summary + for line in sections["Subworkflows"]: + out_summary += f"{line}\n" + return out_summary if __name__ == "__main__": origin_summary = sys.argv[1] From fcf4fe93979318723fc32a4a15c198859add068f Mon Sep 17 00:00:00 2001 From: buehlere Date: Tue, 20 Jan 2026 17:11:21 -0500 Subject: [PATCH 23/32] Update update_summary.py --- .github/workflows/update_summary.py | 44 ++++++++++++++--------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/.github/workflows/update_summary.py b/.github/workflows/update_summary.py index c7586f61..e0a01524 100644 --- a/.github/workflows/update_summary.py +++ b/.github/workflows/update_summary.py @@ -30,38 +30,38 @@ def load_summary_file(origin: str): def add_new_feature(sections: dict, new_feature: str, feature_type: str): if feature_type == "module": entry, parent = build_module_entry(new_feature) - existing = sections["Modules"] + module_lines = sections["Modules"] + existing_lines = [line.rstrip("\n") for line in module_lines] - parent_header = f"* [{parent}](modules/{parent}/README.md)" if parent else None - child_entry = f" {entry}" if parent else entry - - parent_exists = parent and any(line.strip() == parent_header for line in existing) - child_exists = any(line.strip() == child_entry.strip() for line in existing) - - if child_exists: + if entry in existing_lines or f" {entry}" in existing_lines: return sections - new_list = [] if parent: + parent_line = f"* [{parent}](modules/{parent}/README.md)" + parent_present = any(line.strip() == parent_line for line in existing_lines) + new_module_lines = [] inserted = False - for line in existing: - new_list.append(line) - if not inserted and line.strip() == parent_header: - new_list.append(child_entry) + + for line in module_lines: + new_module_lines.append(line) + if parent_present and not inserted and line.strip() == parent_line: + new_module_lines.append(f" {entry}\n") inserted = True - if not parent_exists: - new_list.append(parent_header) - new_list.append(child_entry) - else: - new_list = existing + [entry] - sections["Modules"] = new_list + if not parent_present: + new_module_lines.append(f"{parent_line}\n") + new_module_lines.append(f" {entry}\n") + + sections["Modules"] = new_module_lines + + else: + module_lines.append(f"{entry}\n") + sections["Modules"] = module_lines elif feature_type == "subworkflow": - if new_feature not in sections["Subworkflows"]: - sections["Subworkflows"].append(new_feature) + if new_feature + "\n" not in sections["Subworkflows"]: + sections["Subworkflows"].append(new_feature + "\n") - return sections def rebuild_summary(origin: str, new_feature: str, feature_type: str): sections = load_summary_file(origin) From b8c64426e09e854f37034136a6965b0b3afada09 Mon Sep 17 00:00:00 2001 From: buehlere Date: Tue, 20 Jan 2026 17:17:46 -0500 Subject: [PATCH 24/32] Update update_summary.py --- .github/workflows/update_summary.py | 44 +++++++++++++---------------- 1 file changed, 19 insertions(+), 25 deletions(-) diff --git a/.github/workflows/update_summary.py b/.github/workflows/update_summary.py index e0a01524..4ed08673 100644 --- a/.github/workflows/update_summary.py +++ b/.github/workflows/update_summary.py @@ -11,7 +11,6 @@ def build_module_entry(module_id: str): display = module_id path = f"modules/{module_id}.md" parent = None - entry = f"* [{display}]({path})" return entry, parent @@ -31,37 +30,33 @@ def add_new_feature(sections: dict, new_feature: str, feature_type: str): if feature_type == "module": entry, parent = build_module_entry(new_feature) module_lines = sections["Modules"] - existing_lines = [line.rstrip("\n") for line in module_lines] - if entry in existing_lines or f" {entry}" in existing_lines: + # Check if entry already exists + existing_entries = [line.strip() for line in module_lines] + if entry in existing_entries or f" {entry}" in existing_entries: return sections if parent: + # Check if parent already exists parent_line = f"* [{parent}](modules/{parent}/README.md)" - parent_present = any(line.strip() == parent_line for line in existing_lines) - new_module_lines = [] - inserted = False - - for line in module_lines: - new_module_lines.append(line) - if parent_present and not inserted and line.strip() == parent_line: - new_module_lines.append(f" {entry}\n") - inserted = True - - if not parent_present: - new_module_lines.append(f"{parent_line}\n") - new_module_lines.append(f" {entry}\n") - - sections["Modules"] = new_module_lines - + parent_indices = [i for i, line in enumerate(module_lines) if line.strip() == parent_line] + if parent_indices: + # Insert under existing parent + index = parent_indices[-1] + 1 + module_lines.insert(index, f" {entry}") + else: + # Add parent at end if missing, then child + module_lines.append(parent_line) + module_lines.append(f" {entry}") else: - module_lines.append(f"{entry}\n") - sections["Modules"] = module_lines + module_lines.append(entry) - elif feature_type == "subworkflow": - if new_feature + "\n" not in sections["Subworkflows"]: - sections["Subworkflows"].append(new_feature + "\n") + sections["Modules"] = module_lines + elif feature_type == "subworkflow": + if new_feature not in sections["Subworkflows"]: + sections["Subworkflows"].append(new_feature) + return sections def rebuild_summary(origin: str, new_feature: str, feature_type: str): sections = load_summary_file(origin) @@ -76,7 +71,6 @@ def rebuild_summary(origin: str, new_feature: str, feature_type: str): out_summary += "\n## Subworkflows\n\n" for line in sections["Subworkflows"]: out_summary += f"{line}\n" - return out_summary if __name__ == "__main__": From 7c5e35cd0517428c3377289d09a21f8ed6ac854c Mon Sep 17 00:00:00 2001 From: buehlere Date: Tue, 20 Jan 2026 17:31:07 -0500 Subject: [PATCH 25/32] Update gitbook-sync.yml --- .github/workflows/gitbook-sync.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/gitbook-sync.yml b/.github/workflows/gitbook-sync.yml index 9e1a51a3..f59ae7d8 100644 --- a/.github/workflows/gitbook-sync.yml +++ b/.github/workflows/gitbook-sync.yml @@ -206,7 +206,7 @@ jobs: ${{ env.SUBWORKFLOW }} - name: Add to SUMMARY for new features run: | - curl -o ${{ github.workspace }}/update_summary.py https://raw.githubusercontent.com/mskcc-omics-workflows/modules/develop/.github/workflows/update_summary.py + cp .github/workflows/update_summary.py /home/runner/work/modules/modules/update_summary.py python ${{ github.workspace }}/update_summary.py SUMMARY.md "${{ env.SW_NAME }}" ${{ env.SUMMARY_TYPE }} > tmp_summary.md mv tmp_summary.md SUMMARY.md - name: Check for changes From 3a20429d7c0e2caa3b686dceb43fa025fe833001 Mon Sep 17 00:00:00 2001 From: buehlere Date: Tue, 20 Jan 2026 17:39:07 -0500 Subject: [PATCH 26/32] Update gitbook-sync.yml --- .github/workflows/gitbook-sync.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/gitbook-sync.yml b/.github/workflows/gitbook-sync.yml index f59ae7d8..a08434d9 100644 --- a/.github/workflows/gitbook-sync.yml +++ b/.github/workflows/gitbook-sync.yml @@ -206,7 +206,7 @@ jobs: ${{ env.SUBWORKFLOW }} - name: Add to SUMMARY for new features run: | - cp .github/workflows/update_summary.py /home/runner/work/modules/modules/update_summary.py + curl -o ${{ github.workspace }}/update_summary.py https://raw.githubusercontent.com/mskcc-omics-workflows/modules/release/0.2.4/.github/workflows/update_summary.py python ${{ github.workspace }}/update_summary.py SUMMARY.md "${{ env.SW_NAME }}" ${{ env.SUMMARY_TYPE }} > tmp_summary.md mv tmp_summary.md SUMMARY.md - name: Check for changes From 9b9742425ba3066baa903fa43dcafd4b1b5a9a99 Mon Sep 17 00:00:00 2001 From: buehlere Date: Wed, 21 Jan 2026 10:07:00 -0500 Subject: [PATCH 27/32] sub-tools vs no sub-tools --- .github/workflows/gitbook-sync.yml | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/.github/workflows/gitbook-sync.yml b/.github/workflows/gitbook-sync.yml index a08434d9..726a8990 100644 --- a/.github/workflows/gitbook-sync.yml +++ b/.github/workflows/gitbook-sync.yml @@ -123,10 +123,15 @@ jobs: id: replace_slash run: | FRAGMENT="${{ env.SW_NAME }}" - DIR="$(dirname "$FRAGMENT")" - BASE="$(basename "$FRAGMENT")" + if [[ "$FRAGMENT" == *"/"* ]]; then + DIR="$(dirname "$FRAGMENT")" + BASE="$(basename "$FRAGMENT")" + MD_PATH="${DIR}/${DIR}_${BASE}" + else + MD_PATH="$FRAGMENT" + fi - MD_PATH="${DIR}/${DIR}_${BASE}" + echo "MD_PATH=$MD_PATH" >> $GITHUB_ENV MD_NAME="${FRAGMENT}" echo "MD_PATH=$MD_PATH" >> $GITHUB_ENV From 384c73224c26ac3c226d8bdf4db78708ed9e8174 Mon Sep 17 00:00:00 2001 From: buehlere Date: Wed, 21 Jan 2026 10:38:46 -0500 Subject: [PATCH 28/32] Update gitbook-sync.yml --- .github/workflows/gitbook-sync.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/gitbook-sync.yml b/.github/workflows/gitbook-sync.yml index 726a8990..08faec64 100644 --- a/.github/workflows/gitbook-sync.yml +++ b/.github/workflows/gitbook-sync.yml @@ -227,6 +227,5 @@ jobs: with: default_author: github_actions message: "add doc for ${{ matrix.tags }}" - add: '["*/*.md --force", "SUMMARY.md --force"]' cwd: "./" push: --force From 7e3492115f933c0fa77eb1b3b1b8cff4b341f581 Mon Sep 17 00:00:00 2001 From: buehlere Date: Wed, 21 Jan 2026 10:56:59 -0500 Subject: [PATCH 29/32] Update gitbook-sync.yml --- .github/workflows/gitbook-sync.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/gitbook-sync.yml b/.github/workflows/gitbook-sync.yml index 08faec64..6825c2e3 100644 --- a/.github/workflows/gitbook-sync.yml +++ b/.github/workflows/gitbook-sync.yml @@ -90,6 +90,7 @@ jobs: needs: [pytest-changes, nf-test-changes] strategy: fail-fast: false + max-parallel: 1 matrix: tags: [ From 83e2dc87cf4d5975920c98c6dc8971d03c0dd6b7 Mon Sep 17 00:00:00 2001 From: John Orgera <65687576+johnoooh@users.noreply.github.com> Date: Fri, 6 Feb 2026 16:12:06 -0500 Subject: [PATCH 30/32] Fix FASTA header to preserve mutation identity through netMHCpan Prepend identifier_key (e.g. 2CAE_MC) to MUT and WT FASTA headers so that netMHCpan Identity column retains the encoded mutation type needed by generate_input.py for downstream neoantigen input generation. Co-Authored-By: Claude Opus 4.6 --- .../1.2/resources/usr/bin/generateMutFasta.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/msk/generatemutfasta/1.2/resources/usr/bin/generateMutFasta.py b/modules/msk/generatemutfasta/1.2/resources/usr/bin/generateMutFasta.py index ed576e38..6d309efe 100755 --- a/modules/msk/generatemutfasta/1.2/resources/usr/bin/generateMutFasta.py +++ b/modules/msk/generatemutfasta/1.2/resources/usr/bin/generateMutFasta.py @@ -131,9 +131,9 @@ def main(): + " Alt:" + str(mut.maf_row["Tumor_Seq_Allele2"]) ) - out_fa.write(">" + id_string + "\n") + out_fa.write(">" + mut.identifier_key + " " + id_string + "\n") out_fa.write(mut.mt_altered_aa + "\n") - out_WT_fa.write(">" + id_string + "\n") + out_WT_fa.write(">" + mut.identifier_key + " " + id_string + "\n") out_WT_fa.write(mut.wt_altered_aa + "\n") ### write out WT/MT CDS + AA for debugging purposes From 81acf713656392b60b394c9e549389c3fa622f6b Mon Sep 17 00:00:00 2001 From: John Orgera <65687576+johnoooh@users.noreply.github.com> Date: Fri, 6 Feb 2026 16:24:55 -0500 Subject: [PATCH 31/32] Update generatemutfasta/1.2 test snapshots for identifier_key header Co-Authored-By: Claude Opus 4.6 --- .../generatemutfasta/1.2/tests/main.nf.test.snap | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/modules/msk/generatemutfasta/1.2/tests/main.nf.test.snap b/modules/msk/generatemutfasta/1.2/tests/main.nf.test.snap index 60c19112..ee0f19e9 100644 --- a/modules/msk/generatemutfasta/1.2/tests/main.nf.test.snap +++ b/modules/msk/generatemutfasta/1.2/tests/main.nf.test.snap @@ -10,7 +10,7 @@ "id": "test", "single_end": false }, - "test.MUT.sequences.fa:md5,dff338ec438ac75aa674b64ea8e26544" + "test.MUT.sequences.fa:md5,5c86389b9b56eaa6f9aeb9391c7dea9d" ] ], [ @@ -19,16 +19,16 @@ "id": "test", "single_end": false }, - "test.WT.sequences.fa:md5,51415a40a725a16eaa8f5c51fa43799e" + "test.WT.sequences.fa:md5,02e4c68d88d856416b40cef391049211" ] ], "test_generate_mut_fasta.log" ], "meta": { "nf-test": "0.9.2", - "nextflow": "25.10.2" + "nextflow": "24.10.5" }, - "timestamp": "2026-01-28T15:30:02.572804474" + "timestamp": "2026-02-06T16:24:15.454193" }, "generatemutfasta_1.2 - maf - fasta - stub": { "content": [ @@ -96,9 +96,9 @@ } ], "meta": { - "nf-test": "0.9.3", - "nextflow": "25.10.2" + "nf-test": "0.9.2", + "nextflow": "24.10.5" }, - "timestamp": "2025-12-18T16:08:27.448965" + "timestamp": "2026-02-06T16:24:30.641252" } } \ No newline at end of file From 0e81776d00ecd79b6cc5edd8f6b607d1a6b21c91 Mon Sep 17 00:00:00 2001 From: John Orgera <65687576+johnoooh@users.noreply.github.com> Date: Mon, 9 Feb 2026 15:25:32 -0500 Subject: [PATCH 32/32] Use identifier_key with _M/_W suffix as sole FASTA header netMHCpan concatenates the entire FASTA header (replacing spaces with underscores) and truncates to 15 characters. The previous commit included transcript and variant info after identifier_key, producing truncated Identity values that didn't match generate_input.py's mutation_dict keys. Restore the original format of >identifier_key_M and >identifier_key_W to keep identities under 15 chars. Co-Authored-By: Claude Opus 4.6 --- .../1.2/resources/usr/bin/generateMutFasta.py | 17 ++--------------- .../1.2/tests/main.nf.test.snap | 8 ++++---- 2 files changed, 6 insertions(+), 19 deletions(-) diff --git a/modules/msk/generatemutfasta/1.2/resources/usr/bin/generateMutFasta.py b/modules/msk/generatemutfasta/1.2/resources/usr/bin/generateMutFasta.py index 6d309efe..1afc95ce 100755 --- a/modules/msk/generatemutfasta/1.2/resources/usr/bin/generateMutFasta.py +++ b/modules/msk/generatemutfasta/1.2/resources/usr/bin/generateMutFasta.py @@ -118,22 +118,9 @@ def main(): n_missing_tx_id += 1 if len(mut.mt_altered_aa) > 5: - id_string = ( - str(mut.maf_row["Transcript_ID"]) - + " Variant " - + str(mut.maf_row["Chromosome"]) - + ":" - + str(mut.maf_row["Start_Position"]) - + "-" - + str(mut.maf_row["End_Position"]) - + " Ref:" - + str(mut.maf_row["Reference_Allele"]) - + " Alt:" - + str(mut.maf_row["Tumor_Seq_Allele2"]) - ) - out_fa.write(">" + mut.identifier_key + " " + id_string + "\n") + out_fa.write(">" + mut.identifier_key + "_M\n") out_fa.write(mut.mt_altered_aa + "\n") - out_WT_fa.write(">" + mut.identifier_key + " " + id_string + "\n") + out_WT_fa.write(">" + mut.identifier_key + "_W\n") out_WT_fa.write(mut.wt_altered_aa + "\n") ### write out WT/MT CDS + AA for debugging purposes diff --git a/modules/msk/generatemutfasta/1.2/tests/main.nf.test.snap b/modules/msk/generatemutfasta/1.2/tests/main.nf.test.snap index ee0f19e9..cc9060bc 100644 --- a/modules/msk/generatemutfasta/1.2/tests/main.nf.test.snap +++ b/modules/msk/generatemutfasta/1.2/tests/main.nf.test.snap @@ -10,7 +10,7 @@ "id": "test", "single_end": false }, - "test.MUT.sequences.fa:md5,5c86389b9b56eaa6f9aeb9391c7dea9d" + "test.MUT.sequences.fa:md5,3d2ff66590a4329f9a24e03bdf84e0ab" ] ], [ @@ -19,7 +19,7 @@ "id": "test", "single_end": false }, - "test.WT.sequences.fa:md5,02e4c68d88d856416b40cef391049211" + "test.WT.sequences.fa:md5,4bfcfc4d29d01ddc4108f39350936228" ] ], "test_generate_mut_fasta.log" @@ -28,7 +28,7 @@ "nf-test": "0.9.2", "nextflow": "24.10.5" }, - "timestamp": "2026-02-06T16:24:15.454193" + "timestamp": "2026-02-09T15:03:37.884362" }, "generatemutfasta_1.2 - maf - fasta - stub": { "content": [ @@ -99,6 +99,6 @@ "nf-test": "0.9.2", "nextflow": "24.10.5" }, - "timestamp": "2026-02-06T16:24:30.641252" + "timestamp": "2026-02-09T15:03:51.569621" } } \ No newline at end of file