From 59af31c87c51adcf67ef11dda8797b0ec154a1a3 Mon Sep 17 00:00:00 2001 From: cazzlewazzle89 Date: Sun, 3 May 2026 19:29:22 +1000 Subject: [PATCH 1/8] added bam output --- src/hostile/aligner.py | 20 ++++++++++++++++++++ src/hostile/cli.py | 4 ++++ 2 files changed, 24 insertions(+) diff --git a/src/hostile/aligner.py b/src/hostile/aligner.py index 439d89c..07241a2 100644 --- a/src/hostile/aligner.py +++ b/src/hostile/aligner.py @@ -146,6 +146,14 @@ def gen_clean_cmd( fastq_out_path = output / f"{fastq_stem}.clean.fastq.gz" count_before_path = output / f"{fastq_stem}.reads_in.txt" count_after_path = output / f"{fastq_stem}.reads_out.txt" + mapped_bam_path = output / f"{fastq_stem}.mapped.bam" + + bam_cmd = ( + if output_bam: + bam_cmd = f" | tee >(samtools view -F 2304 -b - > '{mapped_bam_path}')" + else: + bam_cmd = "" + ) if not stdout and not force and fastq_out_path.exists(): raise FileExistsError( @@ -196,6 +204,8 @@ def gen_clean_cmd( cmd = ( # Align, stream reads to stdout in SAM format f"{alignment_cmd}" + # optional - output bam + f"{bam_cmd}" # Count reads in stream before filtering (2048 + 256 = 2304) f" | tee >(samtools view -F 2304 -c - > '{count_before_path}')" # Discard mapped reads (or inverse) @@ -237,6 +247,14 @@ def gen_paired_clean_cmd( fastq2_out_path = output / f"{fastq2_stem}.clean_2.fastq.gz" count_before_path = output / f"{fastq1_stem}.reads_in.txt" count_after_path = output / f"{fastq1_stem}.reads_out.txt" + mapped_bam_path = output / f"{fastq1_stem.removesuffix('_R1_paired')}.mapped.bam" + + bam_cmd = ( + if output_bam: + bam_cmd = f" | tee >(samtools view -F 2304 -b - > '{mapped_bam_path}')" + else: + bam_cmd = "" + ) if ( not stdout @@ -320,6 +338,8 @@ def gen_paired_clean_cmd( ) cmd = ( f"{alignment_cmd}" + # optional - output bam + f"{bam_cmd}" f" | tee >(samtools view -F 2304 -c - > '{count_before_path}')" f"{filter_cmd}" f" | tee >(samtools view -F 2304 -c - > '{count_after_path}')" diff --git a/src/hostile/cli.py b/src/hostile/cli.py index 6cf3079..a50d6cc 100644 --- a/src/hostile/cli.py +++ b/src/hostile/cli.py @@ -34,6 +34,7 @@ def clean( force: bool = False, airplane: bool = False, debug: bool = False, + output_bam: bool = False, ) -> None: """ Remove reads aligning to an index from fastq[.gz] input files or stdin. @@ -53,6 +54,7 @@ def clean( :arg force: overwrite existing output files :arg airplane: disable automatic index download (offline mode) :arg debug: show debug messages + :arg output_bam: save mapped reads to BAM for downstream analysis """ if debug: @@ -81,6 +83,7 @@ def clean( threads=threads, force=force, airplane=airplane, + output_bam=output_bam, ) else: stats = lib.clean_fastqs( @@ -96,6 +99,7 @@ def clean( threads=threads, force=force, airplane=airplane, + output_bam=output_bam, ) print( json.dumps(stats, indent=4), From 87b9b2f2cc67d2c0f12993ecf71b3d5ad515c7e4 Mon Sep 17 00:00:00 2001 From: cazzlewazzle89 Date: Sun, 3 May 2026 22:40:24 +1000 Subject: [PATCH 2/8] fixed output bam --- src/hostile/aligner.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/src/hostile/aligner.py b/src/hostile/aligner.py index 07241a2..21da712 100644 --- a/src/hostile/aligner.py +++ b/src/hostile/aligner.py @@ -149,10 +149,9 @@ def gen_clean_cmd( mapped_bam_path = output / f"{fastq_stem}.mapped.bam" bam_cmd = ( - if output_bam: - bam_cmd = f" | tee >(samtools view -F 2304 -b - > '{mapped_bam_path}')" - else: - bam_cmd = "" + f" | tee >(samtools view -F 2304 -b - > '{mapped_bam_path}')" + if output_bam + else "" ) if not stdout and not force and fastq_out_path.exists(): @@ -250,10 +249,9 @@ def gen_paired_clean_cmd( mapped_bam_path = output / f"{fastq1_stem.removesuffix('_R1_paired')}.mapped.bam" bam_cmd = ( - if output_bam: - bam_cmd = f" | tee >(samtools view -F 2304 -b - > '{mapped_bam_path}')" - else: - bam_cmd = "" + f" | tee >(samtools view -F 2304 -b - > '{mapped_bam_path}')" + if output_bam + else "" ) if ( From e29757cd477595a29dbfa6caf15d0608af686c71 Mon Sep 17 00:00:00 2001 From: cazzlewazzle89 Date: Sun, 3 May 2026 22:46:35 +1000 Subject: [PATCH 3/8] passing output bool --- src/hostile/cli.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/hostile/cli.py b/src/hostile/cli.py index a50d6cc..6ed3e6a 100644 --- a/src/hostile/cli.py +++ b/src/hostile/cli.py @@ -83,7 +83,6 @@ def clean( threads=threads, force=force, airplane=airplane, - output_bam=output_bam, ) else: stats = lib.clean_fastqs( @@ -99,7 +98,6 @@ def clean( threads=threads, force=force, airplane=airplane, - output_bam=output_bam, ) print( json.dumps(stats, indent=4), From c5243886ef1117bcc1fb7fe2b42c673a74e01854 Mon Sep 17 00:00:00 2001 From: cazzlewazzle89 Date: Sun, 3 May 2026 22:50:43 +1000 Subject: [PATCH 4/8] gg --- src/hostile/lib.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/hostile/lib.py b/src/hostile/lib.py index 20052b8..8ecd3ce 100644 --- a/src/hostile/lib.py +++ b/src/hostile/lib.py @@ -174,6 +174,7 @@ def clean_fastqs( threads: int = util.CPU_COUNT, force: bool = False, airplane: bool = False, + output_bam: bool = False, ): stdin = str(fastqs[0]) == "-" stdout = str(output) == "-" @@ -212,6 +213,7 @@ def clean_fastqs( aligner_threads=aligner_threads, compression_threads=compression_threads, force=force, + output_bam=output_bam, ) for fastq in fastqs ] @@ -252,6 +254,7 @@ def clean_paired_fastqs( threads: int = util.CPU_COUNT, force: bool = False, airplane: bool = False, + output_bam: bool = False, ): stdin = str(fastqs[0][0]) == "-" stdout = str(output) == "-" @@ -292,6 +295,7 @@ def clean_paired_fastqs( aligner_threads=aligner_threads, compression_threads=compression_threads, force=force, + output_bam=output_bam, ) for fastq_pair in fastqs ] From 3ff49ef0ef43a07a3857688e3f8655657c90a345 Mon Sep 17 00:00:00 2001 From: cazzlewazzle89 Date: Sun, 3 May 2026 22:53:10 +1000 Subject: [PATCH 5/8] ff --- src/hostile/aligner.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/hostile/aligner.py b/src/hostile/aligner.py index 21da712..ad78572 100644 --- a/src/hostile/aligner.py +++ b/src/hostile/aligner.py @@ -139,6 +139,7 @@ def gen_clean_cmd( aligner_threads: int, compression_threads: int, force: bool, + output_bam: bool = False, ) -> str: fastq, output = Path(fastq), Path(output) output.mkdir(exist_ok=True, parents=True) @@ -237,6 +238,7 @@ def gen_paired_clean_cmd( aligner_threads: int, compression_threads: int, force: bool, + output_bam: bool = False, ) -> str: fastq1, fastq2, output = Path(fastq1), Path(fastq2), Path(output) output.mkdir(exist_ok=True, parents=True) From 1481a4bb5abe28b8b82b604b366b96e4389ad9a2 Mon Sep 17 00:00:00 2001 From: Calum Walsh Date: Mon, 4 May 2026 10:19:57 +1000 Subject: [PATCH 6/8] fixing bam bool --- src/hostile/aligner.py | 4 ++-- src/hostile/cli.py | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/hostile/aligner.py b/src/hostile/aligner.py index ad78572..b7a3efc 100644 --- a/src/hostile/aligner.py +++ b/src/hostile/aligner.py @@ -139,7 +139,7 @@ def gen_clean_cmd( aligner_threads: int, compression_threads: int, force: bool, - output_bam: bool = False, + output_bam: bool, ) -> str: fastq, output = Path(fastq), Path(output) output.mkdir(exist_ok=True, parents=True) @@ -238,7 +238,7 @@ def gen_paired_clean_cmd( aligner_threads: int, compression_threads: int, force: bool, - output_bam: bool = False, + output_bam: bool, ) -> str: fastq1, fastq2, output = Path(fastq1), Path(fastq2), Path(output) output.mkdir(exist_ok=True, parents=True) diff --git a/src/hostile/cli.py b/src/hostile/cli.py index 6ed3e6a..a50d6cc 100644 --- a/src/hostile/cli.py +++ b/src/hostile/cli.py @@ -83,6 +83,7 @@ def clean( threads=threads, force=force, airplane=airplane, + output_bam=output_bam, ) else: stats = lib.clean_fastqs( @@ -98,6 +99,7 @@ def clean( threads=threads, force=force, airplane=airplane, + output_bam=output_bam, ) print( json.dumps(stats, indent=4), From 27c6dc6631828eb5406bd5d7de0a0fc0f3634c7b Mon Sep 17 00:00:00 2001 From: Calum Walsh Date: Mon, 4 May 2026 10:37:06 +1000 Subject: [PATCH 7/8] updated readme --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 6bbc774..e847415 100644 --- a/README.md +++ b/README.md @@ -132,6 +132,8 @@ options: -t, --threads THREADS number of alignment threads. A sensible default is chosen automatically (default: 10) + --output-bam save mapped reads to BAM for downstream analysis + (default: False) --force overwrite existing output files (default: False) --airplane disable automatic index download (offline mode) From 7cf9337f978697fdfef6d5a1b076df17c40cf7a2 Mon Sep 17 00:00:00 2001 From: cazzlewazzle89 Date: Sun, 3 May 2026 19:29:22 +1000 Subject: [PATCH 8/8] added bam output option --- README.md | 2 ++ src/hostile/aligner.py | 20 ++++++++++++++++++++ src/hostile/cli.py | 4 ++++ src/hostile/lib.py | 4 ++++ 4 files changed, 30 insertions(+) diff --git a/README.md b/README.md index 6bbc774..e847415 100644 --- a/README.md +++ b/README.md @@ -132,6 +132,8 @@ options: -t, --threads THREADS number of alignment threads. A sensible default is chosen automatically (default: 10) + --output-bam save mapped reads to BAM for downstream analysis + (default: False) --force overwrite existing output files (default: False) --airplane disable automatic index download (offline mode) diff --git a/src/hostile/aligner.py b/src/hostile/aligner.py index 439d89c..b7a3efc 100644 --- a/src/hostile/aligner.py +++ b/src/hostile/aligner.py @@ -139,6 +139,7 @@ def gen_clean_cmd( aligner_threads: int, compression_threads: int, force: bool, + output_bam: bool, ) -> str: fastq, output = Path(fastq), Path(output) output.mkdir(exist_ok=True, parents=True) @@ -146,6 +147,13 @@ def gen_clean_cmd( fastq_out_path = output / f"{fastq_stem}.clean.fastq.gz" count_before_path = output / f"{fastq_stem}.reads_in.txt" count_after_path = output / f"{fastq_stem}.reads_out.txt" + mapped_bam_path = output / f"{fastq_stem}.mapped.bam" + + bam_cmd = ( + f" | tee >(samtools view -F 2304 -b - > '{mapped_bam_path}')" + if output_bam + else "" + ) if not stdout and not force and fastq_out_path.exists(): raise FileExistsError( @@ -196,6 +204,8 @@ def gen_clean_cmd( cmd = ( # Align, stream reads to stdout in SAM format f"{alignment_cmd}" + # optional - output bam + f"{bam_cmd}" # Count reads in stream before filtering (2048 + 256 = 2304) f" | tee >(samtools view -F 2304 -c - > '{count_before_path}')" # Discard mapped reads (or inverse) @@ -228,6 +238,7 @@ def gen_paired_clean_cmd( aligner_threads: int, compression_threads: int, force: bool, + output_bam: bool, ) -> str: fastq1, fastq2, output = Path(fastq1), Path(fastq2), Path(output) output.mkdir(exist_ok=True, parents=True) @@ -237,6 +248,13 @@ def gen_paired_clean_cmd( fastq2_out_path = output / f"{fastq2_stem}.clean_2.fastq.gz" count_before_path = output / f"{fastq1_stem}.reads_in.txt" count_after_path = output / f"{fastq1_stem}.reads_out.txt" + mapped_bam_path = output / f"{fastq1_stem.removesuffix('_R1_paired')}.mapped.bam" + + bam_cmd = ( + f" | tee >(samtools view -F 2304 -b - > '{mapped_bam_path}')" + if output_bam + else "" + ) if ( not stdout @@ -320,6 +338,8 @@ def gen_paired_clean_cmd( ) cmd = ( f"{alignment_cmd}" + # optional - output bam + f"{bam_cmd}" f" | tee >(samtools view -F 2304 -c - > '{count_before_path}')" f"{filter_cmd}" f" | tee >(samtools view -F 2304 -c - > '{count_after_path}')" diff --git a/src/hostile/cli.py b/src/hostile/cli.py index 6cf3079..a50d6cc 100644 --- a/src/hostile/cli.py +++ b/src/hostile/cli.py @@ -34,6 +34,7 @@ def clean( force: bool = False, airplane: bool = False, debug: bool = False, + output_bam: bool = False, ) -> None: """ Remove reads aligning to an index from fastq[.gz] input files or stdin. @@ -53,6 +54,7 @@ def clean( :arg force: overwrite existing output files :arg airplane: disable automatic index download (offline mode) :arg debug: show debug messages + :arg output_bam: save mapped reads to BAM for downstream analysis """ if debug: @@ -81,6 +83,7 @@ def clean( threads=threads, force=force, airplane=airplane, + output_bam=output_bam, ) else: stats = lib.clean_fastqs( @@ -96,6 +99,7 @@ def clean( threads=threads, force=force, airplane=airplane, + output_bam=output_bam, ) print( json.dumps(stats, indent=4), diff --git a/src/hostile/lib.py b/src/hostile/lib.py index 20052b8..8ecd3ce 100644 --- a/src/hostile/lib.py +++ b/src/hostile/lib.py @@ -174,6 +174,7 @@ def clean_fastqs( threads: int = util.CPU_COUNT, force: bool = False, airplane: bool = False, + output_bam: bool = False, ): stdin = str(fastqs[0]) == "-" stdout = str(output) == "-" @@ -212,6 +213,7 @@ def clean_fastqs( aligner_threads=aligner_threads, compression_threads=compression_threads, force=force, + output_bam=output_bam, ) for fastq in fastqs ] @@ -252,6 +254,7 @@ def clean_paired_fastqs( threads: int = util.CPU_COUNT, force: bool = False, airplane: bool = False, + output_bam: bool = False, ): stdin = str(fastqs[0][0]) == "-" stdout = str(output) == "-" @@ -292,6 +295,7 @@ def clean_paired_fastqs( aligner_threads=aligner_threads, compression_threads=compression_threads, force=force, + output_bam=output_bam, ) for fastq_pair in fastqs ]