diff --git a/.github/workflows/gitbook-sync.yml b/.github/workflows/gitbook-sync.yml index 1b97bf1a..6825c2e3 100644 --- a/.github/workflows/gitbook-sync.yml +++ b/.github/workflows/gitbook-sync.yml @@ -47,8 +47,31 @@ jobs: fetch-depth: 2 # To retrieve the preceding commit. - name: Combine all tags.yml files - id: get_tags - run: find . -name "tags.yml" -not -path "./.github/*" -exec cat {} + > .github/tags.yml + run: | + echo "{}" > .github/tags.yml + + for f in $(find . -name "tags.yml" -not -path "./.github/*"); do + if [[ "$f" == *"/modules/"* ]]; then + PREFIX="modules" + elif [[ "$f" == *"/subworkflows/"* ]]; then + PREFIX="subworkflows" + else + echo "Unknown feature type for $f" + exit 1 + fi + + yq eval " + with_entries( + .key = \"${PREFIX}/\" + .key + ) + " "$f" > /tmp/tags.prefixed.yml + + yq eval-all '. as $item ireduce ({}; . *+ $item)' \ + .github/tags.yml /tmp/tags.prefixed.yml > /tmp/tags.merged.yml + + mv /tmp/tags.merged.yml .github/tags.yml + done + - name: debug run: cat .github/tags.yml @@ -67,6 +90,7 @@ jobs: needs: [pytest-changes, nf-test-changes] strategy: fail-fast: false + max-parallel: 1 matrix: tags: [ @@ -85,6 +109,8 @@ jobs: uses: actions/setup-python@v4 with: python-version: "3.10" # install the python version needed + - name: Install yq + run: sudo apt-get update && sudo apt-get install -y yq - uses: actions/checkout@v4 with: ref: docs @@ -92,21 +118,43 @@ jobs: run: | MATRIX_FRAGMENT="${{ matrix.tags }}" TEMP_NAME=$(echo $MATRIX_FRAGMENT | sed 's/subworkflows\///g') - SW_NAME=$(echo $TEMP_NAME | sed 's/modules\///g') + SW_NAME=$(echo $TEMP_NAME | sed 's/modules\///g' | sed 's/\//\//') echo "SW_NAME=$SW_NAME" >> $GITHUB_ENV - name: Rename md file id: replace_slash run: | FRAGMENT="${{ env.SW_NAME }}" - MD_NAME=$(echo $FRAGMENT | sed 's/\//_/g') + if [[ "$FRAGMENT" == *"/"* ]]; then + DIR="$(dirname "$FRAGMENT")" + BASE="$(basename "$FRAGMENT")" + MD_PATH="${DIR}/${DIR}_${BASE}" + else + MD_PATH="$FRAGMENT" + fi + + echo "MD_PATH=$MD_PATH" >> $GITHUB_ENV + MD_NAME="${FRAGMENT}" + + echo "MD_PATH=$MD_PATH" >> $GITHUB_ENV echo "MD_NAME=$MD_NAME" >> $GITHUB_ENV - echo "${MD_NAME}" - uses: EndBug/add-and-commit@v9 with: default_author: github_actions message: "pull changes before adding doc for ${{ matrix.tags }}" pull: "--rebase" cwd: "./" + - name: Ensure output folder exists and debug + run: | + echo "MD_PATH=${MD_PATH}" + echo "Creating folder: ./modules/$(dirname ${MD_PATH})" + mkdir -p ./modules/$(dirname ${MD_PATH}) + ls -la ./modules + - name: Ensure parent README exists + run: | + DIR="$(dirname "$MD_PATH")" + if [ ! -f "./modules/$DIR/README.md" ]; then + echo "# $DIR" > "./modules/$DIR/README.md" + fi - name: Download convertor from yml to md run: | curl -o ${{ github.workspace }}/yaml_to_md.py https://raw.githubusercontent.com/mskcc-omics-workflows/yaml_to_md/0.0.3/yaml_to_md.py @@ -152,24 +200,33 @@ jobs: echo "FEATURE_TYPE=$FEATURE_TYPE" >> $GITHUB_ENV echo "SUMMARY_TYPE=$SUMMARY_TYPE" >> $GITHUB_ENV echo "SUBWORKFLOW=$SUBWORKFLOW" >> $GITHUB_ENV + - name: Run convertor to generate md file for new module run: | echo ${{ matrix.tags }} - python ${{ github.workspace }}/yaml_to_md.py all --yaml-file ${{ github.workspace }}/temp.yml --output-file ./${{ env.FEATURE_TYPE }}/${{ env.MD_NAME }}.md --schema-url https://raw.githubusercontent.com/mskcc-omics-workflows/yaml_to_md/0.0.3/nextflow_schema/${{ env.FEATURE_TYPE }}/meta-schema.json ${{ env.SUBWORKFLOW }} - - name: Check file existence for modules - id: check_files - uses: andstor/file-existence-action@v1 - with: - branch: docs - files: ${{ env.FEATURE_TYPE}}/${{ env.MD_NAME }}.md + mkdir -p ./${{ env.FEATURE_TYPE }}/$(dirname $MD_PATH) + python ${{ github.workspace }}/yaml_to_md.py all \ + --yaml-file ${{ github.workspace }}/temp.yml \ + --output-file ./${{ env.FEATURE_TYPE }}/${{ env.MD_PATH }}.md \ + --schema-url https://raw.githubusercontent.com/mskcc-omics-workflows/yaml_to_md/0.0.3/nextflow_schema/${{ env.FEATURE_TYPE }}/meta-schema.json \ + ${{ env.SUBWORKFLOW }} - name: Add to SUMMARY for new features run: | - curl -o ${{ github.workspace }}/update_summary.py https://raw.githubusercontent.com/mskcc-omics-workflows/modules/develop/.github/workflows/update_summary.py - python ${{ github.workspace }}/update_summary.py SUMMARY.md "* [${{ env.MD_NAME }}](${{ env.FEATURE_TYPE }}/${{ env.MD_NAME }}.md)" ${{ env.SUMMARY_TYPE }} > tmp_summary.md + curl -o ${{ github.workspace }}/update_summary.py https://raw.githubusercontent.com/mskcc-omics-workflows/modules/release/0.2.4/.github/workflows/update_summary.py + python ${{ github.workspace }}/update_summary.py SUMMARY.md "${{ env.SW_NAME }}" ${{ env.SUMMARY_TYPE }} > tmp_summary.md mv tmp_summary.md SUMMARY.md + - name: Check for changes + run: | + if git diff --quiet; then + echo "NO_CHANGES=true" >> $GITHUB_ENV + fi + - name: Stage new files + run: | + git add "./${{ env.FEATURE_TYPE }}/${{ env.MD_PATH }}.md" + git add SUMMARY.md - uses: EndBug/add-and-commit@v9 with: default_author: github_actions message: "add doc for ${{ matrix.tags }}" - add: '["*/*.md --force", "SUMMARY.md --force"]' cwd: "./" + push: --force diff --git a/.github/workflows/update_summary.py b/.github/workflows/update_summary.py index 8dc679cd..4ed08673 100644 --- a/.github/workflows/update_summary.py +++ b/.github/workflows/update_summary.py @@ -1,81 +1,78 @@ import sys from collections import defaultdict - -def update_summary_old(origin: str, new_feature: str, feature_type: str): - out_summary = "" - with open(origin, 'r') as f: - for line in f.readlines(): - if feature_type == "module" and line.startswith("## Subworkflows") and new_feature not in out_summary: - out_summary += new_feature + '\n\n' - if line.strip(): - out_summary += line - if feature_type == "subworkflow" and new_feature not in out_summary: - out_summary += new_feature - return out_summary - +def build_module_entry(module_id: str): + if "/" in module_id: + namespace, name = module_id.split("/", 1) + display = f"{namespace}_{name}" + path = f"modules/{namespace}/{namespace}_{name}.md" + parent = namespace + else: + display = module_id + path = f"modules/{module_id}.md" + parent = None + entry = f"* [{display}]({path})" + return entry, parent def load_summary_file(origin: str): - # Read the summary file to dict - # keys, "Table of contents", "Modules", "Subworkflows" - # values are list of lines sections = defaultdict(list) current_section = None - with open(origin, "r") as file_read: - for row in file_read: + with open(origin, "r") as f: + for row in f: if row.startswith("#"): current_section = row.replace("#", "").strip() continue if row.strip(): - sections[current_section].append(row) + sections[current_section].append(row.rstrip("\n")) return sections - def add_new_feature(sections: dict, new_feature: str, feature_type: str): - new_list = [] - found = False if feature_type == "module": - # Check if the new feature is already in current summary file - if new_feature.replace("\\", "") in [line.strip().replace("\\", "") for line in sections["Modules"]]: + entry, parent = build_module_entry(new_feature) + module_lines = sections["Modules"] + + # Check if entry already exists + existing_entries = [line.strip() for line in module_lines] + if entry in existing_entries or f" {entry}" in existing_entries: return sections - # Get module name from [] and remove the _ part. - # Check if the first part of the module name exists - new_feature_category = new_feature.split("]")[0].split("_")[0].replace("* [", "").replace("\\", "").strip() - for line in sections["Modules"]: - new_list.append(line) - if f"[{new_feature_category}]" in line: - found = True - new_list.append(f" {new_feature}\n") - if not found: - # If not found, put the new feature to the last - new_list.append(new_feature + "\n\n") - sections["Modules"] = new_list - if feature_type == "subworkflow" and new_feature not in sections["Subworkflows"]: - # There is no subset of subworkflow, so put the new feature to the last - sections["Subworkflows"].append(new_feature + "\n") - return sections + if parent: + # Check if parent already exists + parent_line = f"* [{parent}](modules/{parent}/README.md)" + parent_indices = [i for i, line in enumerate(module_lines) if line.strip() == parent_line] + if parent_indices: + # Insert under existing parent + index = parent_indices[-1] + 1 + module_lines.insert(index, f" {entry}") + else: + # Add parent at end if missing, then child + module_lines.append(parent_line) + module_lines.append(f" {entry}") + else: + module_lines.append(entry) + + sections["Modules"] = module_lines + elif feature_type == "subworkflow": + if new_feature not in sections["Subworkflows"]: + sections["Subworkflows"].append(new_feature) + return sections def rebuild_summary(origin: str, new_feature: str, feature_type: str): - # Load current summary file to dictionary - sections = load_summary_file(origin=origin) - # Add the new feature to summary file dict - updated_sections = add_new_feature( - sections=sections, new_feature=new_feature, feature_type=feature_type) - # Output the updated summary file to string + sections = load_summary_file(origin) + sections = add_new_feature(sections, new_feature, feature_type) + out_summary = "# Table of contents\n\n" - for line in updated_sections["Table of contents"]: - out_summary += line + for line in sections["Table of contents"]: + out_summary += f"{line}\n" out_summary += "\n## Modules\n\n" - for line in updated_sections["Modules"]: - out_summary += line + for line in sections["Modules"]: + out_summary += f"{line}\n" out_summary += "\n## Subworkflows\n\n" - for line in updated_sections["Subworkflows"]: - out_summary += line + for line in sections["Subworkflows"]: + out_summary += f"{line}\n" return out_summary - if __name__ == "__main__": origin_summary = sys.argv[1] new_feature = sys.argv[2] diff --git a/modules/msk/calculatenoise/environment.yml b/modules/msk/calculatenoise/environment.yml index 4b3c9d37..4c59b932 100644 --- a/modules/msk/calculatenoise/environment.yml +++ b/modules/msk/calculatenoise/environment.yml @@ -4,4 +4,4 @@ channels: - conda-forge - bioconda dependencies: - - "YOUR-TOOL-HERE" + - "YOUR-TOOL=HERE" diff --git a/modules/msk/calculatenoise/meta.yml b/modules/msk/calculatenoise/meta.yml index 54568d25..714dbaf7 100644 --- a/modules/msk/calculatenoise/meta.yml +++ b/modules/msk/calculatenoise/meta.yml @@ -10,9 +10,9 @@ tools: homepage: "https://github.com/msk-access/sequence_qc" documentation: "https://github.com/msk-access/sequence_qc" tool_dev_url: "https://github.com/msk-access/sequence_qc" - doi: "" + doi: "no DOI available" licence: ["MIT"] - identifier: null + identifier: "" input: - - meta: @@ -141,4 +141,4 @@ output: authors: - "@mikefeixu" maintainers: - - "@mikefeixu" \ No newline at end of file + - "@mikefeixu" diff --git a/modules/msk/fgbio/collectduplexseqmetrics/meta.yml b/modules/msk/fgbio/collectduplexseqmetrics/meta.yml index 5219fc6f..97c772bb 100644 --- a/modules/msk/fgbio/collectduplexseqmetrics/meta.yml +++ b/modules/msk/fgbio/collectduplexseqmetrics/meta.yml @@ -8,10 +8,10 @@ keywords: tools: - "fgbio": description: "A set of tools for working with genomic and high throughput sequencing data, including UMIs" - homepage: "None" - documentation: "None" - tool_dev_url: "None" - doi: "" + homepage: "https://github.com/fulcrumgenomics/fgbio" + documentation: "https://github.com/fulcrumgenomics/fgbio" + tool_dev_url: "https://github.com/fulcrumgenomics/fgbio" + doi: "no DOI available" licence: ["MIT"] identifier: biotools:fgbio diff --git a/modules/msk/generatemutfasta/1.2/resources/usr/bin/generateMutFasta.py b/modules/msk/generatemutfasta/1.2/resources/usr/bin/generateMutFasta.py index ed576e38..1afc95ce 100755 --- a/modules/msk/generatemutfasta/1.2/resources/usr/bin/generateMutFasta.py +++ b/modules/msk/generatemutfasta/1.2/resources/usr/bin/generateMutFasta.py @@ -118,22 +118,9 @@ def main(): n_missing_tx_id += 1 if len(mut.mt_altered_aa) > 5: - id_string = ( - str(mut.maf_row["Transcript_ID"]) - + " Variant " - + str(mut.maf_row["Chromosome"]) - + ":" - + str(mut.maf_row["Start_Position"]) - + "-" - + str(mut.maf_row["End_Position"]) - + " Ref:" - + str(mut.maf_row["Reference_Allele"]) - + " Alt:" - + str(mut.maf_row["Tumor_Seq_Allele2"]) - ) - out_fa.write(">" + id_string + "\n") + out_fa.write(">" + mut.identifier_key + "_M\n") out_fa.write(mut.mt_altered_aa + "\n") - out_WT_fa.write(">" + id_string + "\n") + out_WT_fa.write(">" + mut.identifier_key + "_W\n") out_WT_fa.write(mut.wt_altered_aa + "\n") ### write out WT/MT CDS + AA for debugging purposes diff --git a/modules/msk/generatemutfasta/1.2/tests/main.nf.test.snap b/modules/msk/generatemutfasta/1.2/tests/main.nf.test.snap index 60c19112..cc9060bc 100644 --- a/modules/msk/generatemutfasta/1.2/tests/main.nf.test.snap +++ b/modules/msk/generatemutfasta/1.2/tests/main.nf.test.snap @@ -10,7 +10,7 @@ "id": "test", "single_end": false }, - "test.MUT.sequences.fa:md5,dff338ec438ac75aa674b64ea8e26544" + "test.MUT.sequences.fa:md5,3d2ff66590a4329f9a24e03bdf84e0ab" ] ], [ @@ -19,16 +19,16 @@ "id": "test", "single_end": false }, - "test.WT.sequences.fa:md5,51415a40a725a16eaa8f5c51fa43799e" + "test.WT.sequences.fa:md5,4bfcfc4d29d01ddc4108f39350936228" ] ], "test_generate_mut_fasta.log" ], "meta": { "nf-test": "0.9.2", - "nextflow": "25.10.2" + "nextflow": "24.10.5" }, - "timestamp": "2026-01-28T15:30:02.572804474" + "timestamp": "2026-02-09T15:03:37.884362" }, "generatemutfasta_1.2 - maf - fasta - stub": { "content": [ @@ -96,9 +96,9 @@ } ], "meta": { - "nf-test": "0.9.3", - "nextflow": "25.10.2" + "nf-test": "0.9.2", + "nextflow": "24.10.5" }, - "timestamp": "2025-12-18T16:08:27.448965" + "timestamp": "2026-02-09T15:03:51.569621" } } \ No newline at end of file