Conversation
Add `--minlen 1` to the `funannotate sort` command because otherwise it throws error
|
hi so code has already been fixed in the sort routine to default to minlen=0 -- what version are you using of funannotate? |
|
there's no harm in adding this to the tutorial though. |
|
I am using |
|
you can check if which has |
|
Hi, Despite the fact that it has Here's the output of my time funannotate sort -i 1318_nanopore_r10_flye.genome.cleaned.fasta -b scaffold -o 1318_nanopore_r10_flye.genome.cleaned.sorted.fasta
48 contigs records loaded
Sorting and renaming contig headers
Traceback (most recent call last):
File "/home/intelliyeast/micromamba/envs/funannotate/bin/funannotate", line 10, in <module>
sys.exit(main())
File "/home/intelliyeast/micromamba/envs/funannotate/lib/python3.8/site-packages/funannotate/funannotate.py", line 717, in main
mod.main(arguments)
File "/home/intelliyeast/micromamba/envs/funannotate/lib/python3.8/site-packages/funannotate/sort.py", line 80, in main
SortRenameHeaders(
File "/home/intelliyeast/micromamba/envs/funannotate/lib/python3.8/site-packages/funannotate/sort.py", line 37, in SortRenameHeaders
if minlen > 0:
TypeError: '>' not supported between instances of 'NoneType' and 'int'
real 0m0.197s
user 0m0.569s
sys 0m1.162sAnd here's the cat /home/intelliyeast/micromamba/envs/funannotate/lib/python3.8/site-packages/funannotate/sort.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import absolute_import, division, print_function, unicode_literals
import sys
import argparse
from Bio.SeqIO.FastaIO import SimpleFastaParser
from funannotate.library import countfasta, softwrap
def SortRenameHeaders(input, basename, output, minlen=0, simplify=False):
Seqs = []
with open(input, "r") as infile:
for header, sequence in SimpleFastaParser(infile):
Seqs.append((header, len(sequence), sequence))
# sort by length
sortedSeqs = sorted(Seqs, key=lambda x: x[1], reverse=True)
# loop through and return contigs and keepers
counter = 1
with open(output, "w") as outfile:
for name, length, seq in sortedSeqs:
if simplify: # try to just split at first space
if " " in name:
newName = name.split(" ")[0]
else:
newName = name
else:
newName = f"{basename}_{counter}"
if len(newName) > 16:
print(
f"Error. {newName} fasta header too long.",
"Choose a different --base name.",
"NCBI/GenBank max is 16 characters.",
)
raise SystemExit(1)
if minlen > 0:
if length >= minlen:
# ony write if length
outfile.write(">{:}\n{:}\n".format(newName, softwrap(seq)))
else:
# always write if we aren't filtering by length
outfile.write(">{:}\n{:}\n".format(newName, softwrap(seq)))
counter += 1
def main(args):
# setup menu with argparse
class MyFormatter(argparse.ArgumentDefaultsHelpFormatter):
def __init__(self, prog):
super(MyFormatter, self).__init__(prog, max_help_position=48)
parser = argparse.ArgumentParser(
prog="sort_rename.py",
usage="%(prog)s [options] -i genome.fa -o sorted.fa",
description="Script that sorts input by length and then renames contig headers.",
epilog="""Written by Jon Palmer (2016) nextgenusfs@gmail.com""",
formatter_class=MyFormatter,
)
parser.add_argument("-i", "--input", required=True, help="Multi-fasta genome file")
parser.add_argument("-o", "--out", required=True, help="Cleaned output (FASTA)")
parser.add_argument(
"-b", "--base", default="scaffold", help="Basename of contig header"
)
parser.add_argument(
"-s",
"--simplify",
action="store_true",
help="Try to simplify headers, split at first space",
)
parser.add_argument(
"-m", "--minlen", type=int, help="Contigs shorter than threshold are discarded"
)
args = parser.parse_args(args)
print(("{:,} contigs records loaded".format(countfasta(args.input))))
print("Sorting and renaming contig headers")
if args.minlen:
print(("Removing contigs less than {:} bp".format(args.minlen)))
SortRenameHeaders(
args.input, args.base, args.out, minlen=args.minlen, simplify=args.simplify
)
print(("{:,} contigs saved to file".format(countfasta(args.out))))
if __name__ == "__main__":
main(sys.argv[1:]) |
|
just to follow this code has the minlen value with a default in the argparse step which is needed so this is fixed on the master branch funannotate/funannotate/sort.py Line 72 in cbf5071 |
The current instructions for
funannotate sortin the tutorial throw the following error:This is because the minlen variable is not being set currently. Therefore, the current version of the code necessitates
--minlen 1argument.