From 7bf20c7b0e420505681c738eac68f5edf3ba2023 Mon Sep 17 00:00:00 2001 From: Markus Haak Date: Thu, 11 Apr 2019 17:59:17 +0200 Subject: [PATCH] Fix to issue #82 refering to failed barcode determination error Fix to the error message "Error: Porechop could not determine barcode orientation". I analysed the error and found out that the orientation was determined the following way: Each read in the processed fastq file is aligned against every adapter set, where there exist two seperate sets (forward and reverse) for each barcode. For each barcode set, only the best score among all alignments is stored. To determine the orientation, Porechop compares the sum of max scores of all forward sets to the sum of max scores of all reverse sets. The problem occurs because this strategy, considering only the max scores, is highly dependent on both the read quality and the number of reads per fastq file. By analysing the cases were the error occured I discovered that both orientations of a set, in my case forward and reverse of BC11, had a max score of 100.0, meaning at least one perfectly aligning read. As a solution to this problem, I changed the determination of orientation to comparing the sum of all scores instead of only the maximum score. This should be much more robust. --- porechop/adapters.py | 1 + porechop/nanopore_read.py | 2 ++ porechop/porechop.py | 12 ++++++------ 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/porechop/adapters.py b/porechop/adapters.py index dce7a4f..06ff0e5 100644 --- a/porechop/adapters.py +++ b/porechop/adapters.py @@ -25,6 +25,7 @@ def __init__(self, name, start_sequence=None, end_sequence=None, both_ends_seque self.start_sequence = both_ends_sequence self.end_sequence = both_ends_sequence self.best_start_score, self.best_end_score = 0.0, 0.0 + self.sum_start_scores, self.sum_end_scores = 0.0, 0.0 def best_start_or_end_score(self): return max(self.best_start_score, self.best_end_score) diff --git a/porechop/nanopore_read.py b/porechop/nanopore_read.py index 8a03776..80f9642 100755 --- a/porechop/nanopore_read.py +++ b/porechop/nanopore_read.py @@ -157,11 +157,13 @@ def align_adapter_set(self, adapter_set, end_size, scoring_scheme_vals): score, _, _, _ = align_adapter(read_seq_start, adapter_set.start_sequence[1], scoring_scheme_vals) adapter_set.best_start_score = max(adapter_set.best_start_score, score) + adapter_set.sum_start_scores += score if adapter_set.end_sequence: read_seq_end = self.seq[-end_size:] score, _, _, _ = align_adapter(read_seq_end, adapter_set.end_sequence[1], scoring_scheme_vals) adapter_set.best_end_score = max(adapter_set.best_end_score, score) + adapter_set.sum_end_scores += score def find_start_trim(self, adapters, end_size, extra_trim_size, end_threshold, scoring_scheme_vals, min_trim_size, check_barcodes, forward_or_reverse): diff --git a/porechop/porechop.py b/porechop/porechop.py index 25febe6..8d6d2c6 100755 --- a/porechop/porechop.py +++ b/porechop/porechop.py @@ -338,13 +338,13 @@ def choose_barcoding_kit(adapter_sets, verbosity, print_dest): for adapter_set in adapter_sets: if 'barcode' in adapter_set.name.lower(): if '(forward)' in adapter_set.name.lower(): - forward_start_or_end += adapter_set.best_start_or_end_score() - forward_start_and_end += adapter_set.best_start_score - forward_start_and_end += adapter_set.best_end_score + forward_start_or_end += max(adapter_set.sum_start_scores, adapter_set.sum_end_scores) + forward_start_and_end += adapter_set.sum_start_scores + forward_start_and_end += adapter_set.sum_end_scores elif '(reverse)' in adapter_set.name.lower(): - reverse_start_or_end += adapter_set.best_start_or_end_score() - reverse_start_and_end += adapter_set.best_start_score - reverse_start_and_end += adapter_set.best_end_score + reverse_start_or_end += max(adapter_set.sum_start_scores, adapter_set.sum_end_scores) + reverse_start_and_end += adapter_set.sum_start_scores + reverse_start_and_end += adapter_set.sum_end_scores if forward_start_or_end == 0 and reverse_start_or_end == 0: sys.exit('Error: no barcodes were found, so Porechop cannot perform barcode demultiplexing')