Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 60 additions & 0 deletions snakemake_wrapper_utils/bcftools.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,3 +144,63 @@ def get_bcftools_opts(
)

return bcftools_opts


def read_write_variants(snakemake, variant_key_name="call"):
"""Obtain command lines to read gzipped VCF or BCF files and path to named pipes"""
if isinstance(variant_key_name, int):
in_call = snakemake.input[0]
else:
in_call = snakemake.input.get(variant_key_name)
Comment on lines +151 to +154
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

variant_key_name integer path is ignored (always index 0).

Line 152 and Line 182 always read index 0, so passing variant_key_name=1 (or any other int) silently selects the wrong input/output.

🔧 Proposed fix
-    if isinstance(variant_key_name, int):
-        in_call = snakemake.input[0]
+    if isinstance(variant_key_name, int):
+        in_call = snakemake.input[variant_key_name]
@@
-    if isinstance(variant_key_name, int):
-        out_call = snakemake.output[0]
+    if isinstance(variant_key_name, int):
+        out_call = snakemake.output[variant_key_name]

Also applies to: 181-184

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@snakemake_wrapper_utils/bcftools.py` around lines 151 - 154, The code
incorrectly treats any integer variant_key_name as selecting snakemake.input[0];
update the logic in the block that sets in_call (and the analogous block that
sets out_call) to index snakemake.input and snakemake.output using the provided
integer variant_key_name (e.g., snakemake.input[variant_key_name] and
snakemake.output[variant_key_name]) instead of hardcoding 0, and add a simple
bounds check to raise a clear error if the provided integer is out of range;
refer to the variables variant_key_name, in_call, out_call, snakemake.input, and
snakemake.output to locate the two affected sections.

min_threads = 1
if not in_call:
raise KeyError(
f"Could not find {variant_key_name} within available snakemake input keys"
)

command_lines = ""
input_file_name = in_call
if str(in_call).endswith((".gz", ".bcf")):
input_file_name = f"{in_call}.snakemake_wrapper_utils_read_variants.vcf"
min_threads += 1

# Case there is a comparison:
bcftools_opts = get_bcftools_opts(
snakemake,
parse_threads=False, # Since no additional threads are required for reading
parse_output=False, # Since we do not write to any file
parse_output_format=False,
)

# Create named pipe
command_lines += str(
f"mkfifo {input_file_name} ; "
f"bcftools view {bcftools_opts} {in_call} > {input_file_name} & "
)
Comment thread
coderabbitai[bot] marked this conversation as resolved.
Comment on lines +176 to +179
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

🧩 Analysis chain

🏁 Script executed:

{prefix} tool_using_fifos ... ; wait

Repository: snakemake/snakemake-wrapper-utils

Length of output: 123


🏁 Script executed:

#!/bin/bash
rg -n -C4 '\bread_write_variants\s*\('
rg -n -C4 '\bwait\b'

Repository: snakemake/snakemake-wrapper-utils

Length of output: 702


🏁 Script executed:

# Get the file structure and lines in question
head -220 snakemake_wrapper_utils/bcftools.py | tail -80

Repository: snakemake/snakemake-wrapper-utils

Length of output: 2867


🏁 Script executed:

# Find all callers of read_write_variants
rg -n 'read_write_variants' --type py

Repository: snakemake/snakemake-wrapper-utils

Length of output: 177


🏁 Script executed:

# Search in snakemake wrappers repo for usage patterns
git log --oneline --all | head -5

Repository: snakemake/snakemake-wrapper-utils

Length of output: 310


🏁 Script executed:

# Search for all references to read_write_variants across the repo
rg 'read_write_variants' -A 10 -B 2

Repository: snakemake/snakemake-wrapper-utils

Length of output: 1001


🏁 Script executed:

# Check if this is exported in __init__.py or similar
find . -name "*.py" -type f | xargs grep -l "read_write_variants"

Repository: snakemake/snakemake-wrapper-utils

Length of output: 114


🏁 Script executed:

# Check documentation or examples
find . -name "README*" -o -name "*.md" | xargs grep -l "read_write_variants" 2>/dev/null || echo "No markdown found"

Repository: snakemake/snakemake-wrapper-utils

Length of output: 94


🏁 Script executed:

# Check if this is published/used in snakemake-wrappers
curl -s "https://raw.githubusercontent.com/snakemake/snakemake-wrappers/master/bio/bcftools/index/wrapper.smk" | head -50 || echo "Could not fetch"

Repository: snakemake/snakemake-wrapper-utils

Length of output: 91


🏁 Script executed:

# Look at the complete function to understand the intended usage pattern
sed -n '149,210p' snakemake_wrapper_utils/bcftools.py

Repository: snakemake/snakemake-wrapper-utils

Length of output: 2221


🏁 Script executed:

# Check if there's any documentation on how command_lines should be used
cat snakemake_wrapper_utils/bcftools.py | head -30

Repository: snakemake/snakemake-wrapper-utils

Length of output: 849


🏁 Script executed:

# Look for test files or examples
find . -type f -name "*test*" -o -name "*example*" | head -20

Repository: snakemake/snakemake-wrapper-utils

Length of output: 146


🏁 Script executed:

# Check git history for usage context
git log --all --oneline -- snakemake_wrapper_utils/bcftools.py | head -10

Repository: snakemake/snakemake-wrapper-utils

Length of output: 812


🏁 Script executed:

# Search in the entire git history for references to read_write_variants
git log -p --all -S "read_write_variants" | head -200

Repository: snakemake/snakemake-wrapper-utils

Length of output: 59


🏁 Script executed:

# Check __init__.py to see what's exported
cat snakemake_wrapper_utils/__init__.py 2>/dev/null || echo "No __init__.py or empty"

Repository: snakemake/snakemake-wrapper-utils

Length of output: 98


🏁 Script executed:

# Check the test file for usage patterns
cat tests/test_snakemake_wrapper_utils.py

Repository: snakemake/snakemake-wrapper-utils

Length of output: 180


🏁 Script executed:

# Check if there are any examples in comments or docstrings
rg -A 20 "read_write_variants" snakemake_wrapper_utils/bcftools.py

Repository: snakemake/snakemake-wrapper-utils

Length of output: 843


🏁 Script executed:

# Look at the git commit that introduced read_write_variants
git show c6f0fa7 -- snakemake_wrapper_utils/bcftools.py | head -100

Repository: snakemake/snakemake-wrapper-utils

Length of output: 2991


🏁 Script executed:

# Check recent commits to understand context better
git log --oneline -20

Repository: snakemake/snakemake-wrapper-utils

Length of output: 103


🏁 Script executed:

# See if there's any documentation or README
cat README.md 2>/dev/null || echo "No README"

Repository: snakemake/snakemake-wrapper-utils

Length of output: 367


🏁 Script executed:

# Look at the full context of how command_lines is assembled
sed -n '149,220p' snakemake_wrapper_utils/bcftools.py | cat -n

Repository: snakemake/snakemake-wrapper-utils

Length of output: 2627


🏁 Script executed:

# Check the complete docstring and surrounding context
sed -n '149,160p' snakemake_wrapper_utils/bcftools.py

Repository: snakemake/snakemake-wrapper-utils

Length of output: 532


🏁 Script executed:

# Search for any usage documentation or patterns in comments
rg -B 5 -A 30 "def read_write_variants" snakemake_wrapper_utils/bcftools.py

Repository: snakemake/snakemake-wrapper-utils

Length of output: 1278


Background bcftools processes need explicit wait to propagate failures.

The function returns a command string with backgrounded bcftools processes (&) but doesn't enforce or document that callers must append wait to detect conversion failures. If a caller's wrapper script exits before waiting for background processes, failed conversions will be masked while the foreground command succeeds, risking silent data corruption.

Either:

  1. Return a separate teardown string (e.g., "; wait") that must be appended to the command, or
  2. Expand the docstring to explicitly require that callers append wait to the returned prefix

This applies to lines 176-179, 197-199, and the return statement at line 206.


if isinstance(variant_key_name, int):
out_call = snakemake.output[variant_key_name]
else:
out_call = snakemake.output.get(variant_key_name)
output_file_name = out_call
if not out_call:
raise KeyError(
f"Could not find {variant_key_name} within available snakemake output keys"
)

if str(out_call).endswith((".gz", ".bcf")):
output_file_name = f"{out_call}.snakemake_wrapper_utils_write_variants.vcf"
min_threads += 1

# This time, we include threading and output formats
bcftools_opts = get_bcftools_opts(snakemake)
command_lines += str(
f"mkfifo {output_file_name} ; "
f"bcftools view {bcftools_opts} {output_file_name} & "
)
Comment thread
tdayris marked this conversation as resolved.

if snakemake.threads < min_threads:
raise ValueError(
f"At least {min_threads} threads required, got {snakemake.threads}"
)
return command_lines, input_file_name, output_file_name
6 changes: 3 additions & 3 deletions snakemake_wrapper_utils/gatk.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,16 +25,16 @@ def get_gatk_opts(
if parse_arg_file:
if is_arg("--arguments_file", extra):
sys.exit(
"You have specified an argument file (`--argument_file`) in `params.extra`; this is automatically inferred from `input.arg_file`."
"You have specified an argument file (`--arguments_file`) in `params.extra`; this is automatically inferred from `input.arg_file`."
)

# Multiple argument files can be provided. Order matters.
arg_file = snakemake.input.get("arg_file", "")
if arg_file:
if isinstance(arg_file, list):
arg_file = " --argument_file ".join(arg_file)
arg_file = " --arguments_file ".join(arg_file)

gatk_opts += f" --argument_file {arg_file}"
gatk_opts += f" --arguments_file {arg_file}"

######################
### Reference file ###
Expand Down
Loading