diff --git a/snakemake_wrapper_utils/bcftools.py b/snakemake_wrapper_utils/bcftools.py index 31a5e4b..477dc10 100644 --- a/snakemake_wrapper_utils/bcftools.py +++ b/snakemake_wrapper_utils/bcftools.py @@ -144,3 +144,63 @@ def get_bcftools_opts( ) return bcftools_opts + + +def read_write_variants(snakemake, variant_key_name="call"): + """Obtain command lines to read gzipped VCF or BCF files and path to named pipes""" + if isinstance(variant_key_name, int): + in_call = snakemake.input[0] + else: + in_call = snakemake.input.get(variant_key_name) + min_threads = 1 + if not in_call: + raise KeyError( + f"Could not find {variant_key_name} within available snakemake input keys" + ) + + command_lines = "" + input_file_name = in_call + if str(in_call).endswith((".gz", ".bcf")): + input_file_name = f"{in_call}.snakemake_wrapper_utils_read_variants.vcf" + min_threads += 1 + + # Case there is a comparison: + bcftools_opts = get_bcftools_opts( + snakemake, + parse_threads=False, # Since no additional threads are required for reading + parse_output=False, # Since we do not write to any file + parse_output_format=False, + ) + + # Create named pipe + command_lines += str( + f"mkfifo {input_file_name} ; " + f"bcftools view {bcftools_opts} {in_call} > {input_file_name} & " + ) + + if isinstance(variant_key_name, int): + out_call = snakemake.output[variant_key_name] + else: + out_call = snakemake.output.get(variant_key_name) + output_file_name = out_call + if not out_call: + raise KeyError( + f"Could not find {variant_key_name} within available snakemake output keys" + ) + + if str(out_call).endswith((".gz", ".bcf")): + output_file_name = f"{out_call}.snakemake_wrapper_utils_write_variants.vcf" + min_threads += 1 + + # This time, we include threading and output formats + bcftools_opts = get_bcftools_opts(snakemake) + command_lines += str( + f"mkfifo {output_file_name} ; " + f"bcftools view {bcftools_opts} {output_file_name} & " + ) + + if snakemake.threads < min_threads: + raise ValueError( + f"At least {min_threads} threads required, got {snakemake.threads}" + ) + return command_lines, input_file_name, output_file_name diff --git a/snakemake_wrapper_utils/gatk.py b/snakemake_wrapper_utils/gatk.py index 7a5dc3e..f3ef237 100644 --- a/snakemake_wrapper_utils/gatk.py +++ b/snakemake_wrapper_utils/gatk.py @@ -25,16 +25,16 @@ def get_gatk_opts( if parse_arg_file: if is_arg("--arguments_file", extra): sys.exit( - "You have specified an argument file (`--argument_file`) in `params.extra`; this is automatically inferred from `input.arg_file`." + "You have specified an argument file (`--arguments_file`) in `params.extra`; this is automatically inferred from `input.arg_file`." ) # Multiple argument files can be provided. Order matters. arg_file = snakemake.input.get("arg_file", "") if arg_file: if isinstance(arg_file, list): - arg_file = " --argument_file ".join(arg_file) + arg_file = " --arguments_file ".join(arg_file) - gatk_opts += f" --argument_file {arg_file}" + gatk_opts += f" --arguments_file {arg_file}" ###################### ### Reference file ###