Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,8 @@ options:
-t, --threads THREADS
number of alignment threads. A sensible default is chosen automatically
(default: 10)
--output-bam save mapped reads to BAM for downstream analysis
(default: False)
--force overwrite existing output files
(default: False)
--airplane disable automatic index download (offline mode)
Expand Down
20 changes: 20 additions & 0 deletions src/hostile/aligner.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,13 +139,21 @@ def gen_clean_cmd(
aligner_threads: int,
compression_threads: int,
force: bool,
output_bam: bool,
) -> str:
fastq, output = Path(fastq), Path(output)
output.mkdir(exist_ok=True, parents=True)
fastq_stem = util.fastq_path_to_stem(fastq)
fastq_out_path = output / f"{fastq_stem}.clean.fastq.gz"
count_before_path = output / f"{fastq_stem}.reads_in.txt"
count_after_path = output / f"{fastq_stem}.reads_out.txt"
mapped_bam_path = output / f"{fastq_stem}.mapped.bam"

bam_cmd = (
f" | tee >(samtools view -F 2304 -b - > '{mapped_bam_path}')"
if output_bam
else ""
)

if not stdout and not force and fastq_out_path.exists():
raise FileExistsError(
Expand Down Expand Up @@ -196,6 +204,8 @@ def gen_clean_cmd(
cmd = (
# Align, stream reads to stdout in SAM format
f"{alignment_cmd}"
# optional - output bam
f"{bam_cmd}"
# Count reads in stream before filtering (2048 + 256 = 2304)
f" | tee >(samtools view -F 2304 -c - > '{count_before_path}')"
# Discard mapped reads (or inverse)
Expand Down Expand Up @@ -228,6 +238,7 @@ def gen_paired_clean_cmd(
aligner_threads: int,
compression_threads: int,
force: bool,
output_bam: bool,
) -> str:
fastq1, fastq2, output = Path(fastq1), Path(fastq2), Path(output)
output.mkdir(exist_ok=True, parents=True)
Expand All @@ -237,6 +248,13 @@ def gen_paired_clean_cmd(
fastq2_out_path = output / f"{fastq2_stem}.clean_2.fastq.gz"
count_before_path = output / f"{fastq1_stem}.reads_in.txt"
count_after_path = output / f"{fastq1_stem}.reads_out.txt"
mapped_bam_path = output / f"{fastq1_stem.removesuffix('_R1_paired')}.mapped.bam"

bam_cmd = (
f" | tee >(samtools view -F 2304 -b - > '{mapped_bam_path}')"
if output_bam
else ""
)

if (
not stdout
Expand Down Expand Up @@ -320,6 +338,8 @@ def gen_paired_clean_cmd(
)
cmd = (
f"{alignment_cmd}"
# optional - output bam
f"{bam_cmd}"
f" | tee >(samtools view -F 2304 -c - > '{count_before_path}')"
f"{filter_cmd}"
f" | tee >(samtools view -F 2304 -c - > '{count_after_path}')"
Expand Down
4 changes: 4 additions & 0 deletions src/hostile/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ def clean(
force: bool = False,
airplane: bool = False,
debug: bool = False,
output_bam: bool = False,
) -> None:
"""
Remove reads aligning to an index from fastq[.gz] input files or stdin.
Expand All @@ -53,6 +54,7 @@ def clean(
:arg force: overwrite existing output files
:arg airplane: disable automatic index download (offline mode)
:arg debug: show debug messages
:arg output_bam: save mapped reads to BAM for downstream analysis
"""

if debug:
Expand Down Expand Up @@ -81,6 +83,7 @@ def clean(
threads=threads,
force=force,
airplane=airplane,
output_bam=output_bam,
)
else:
stats = lib.clean_fastqs(
Expand All @@ -96,6 +99,7 @@ def clean(
threads=threads,
force=force,
airplane=airplane,
output_bam=output_bam,
)
print(
json.dumps(stats, indent=4),
Expand Down
4 changes: 4 additions & 0 deletions src/hostile/lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,7 @@ def clean_fastqs(
threads: int = util.CPU_COUNT,
force: bool = False,
airplane: bool = False,
output_bam: bool = False,
):
stdin = str(fastqs[0]) == "-"
stdout = str(output) == "-"
Expand Down Expand Up @@ -212,6 +213,7 @@ def clean_fastqs(
aligner_threads=aligner_threads,
compression_threads=compression_threads,
force=force,
output_bam=output_bam,
)
for fastq in fastqs
]
Expand Down Expand Up @@ -252,6 +254,7 @@ def clean_paired_fastqs(
threads: int = util.CPU_COUNT,
force: bool = False,
airplane: bool = False,
output_bam: bool = False,
):
stdin = str(fastqs[0][0]) == "-"
stdout = str(output) == "-"
Expand Down Expand Up @@ -292,6 +295,7 @@ def clean_paired_fastqs(
aligner_threads=aligner_threads,
compression_threads=compression_threads,
force=force,
output_bam=output_bam,
)
for fastq_pair in fastqs
]
Expand Down