diff --git a/src/nerpa_pipeline/NRPSPredictor_utils/json_handler.py b/src/nerpa_pipeline/NRPSPredictor_utils/json_handler.py index 806ab90..7dd2081 100644 --- a/src/nerpa_pipeline/NRPSPredictor_utils/json_handler.py +++ b/src/nerpa_pipeline/NRPSPredictor_utils/json_handler.py @@ -68,7 +68,7 @@ def __init__(self, domain_prediction): for stachelhaus_match in prediction_data['stachelhaus_matches']], default=0) self.uncertain = stachelhaus_match_count < 7 # not so sure about this - else: # older version of antismash + elif 'NRPSPredictor2' in domain_prediction: # older version of antismash prediction_data = domain_prediction['NRPSPredictor2'] self.angstrom_code = prediction_data['angstrom_code'] @@ -78,6 +78,8 @@ def __init__(self, domain_prediction): self.small_cluster_pred = prediction_data['small_cluster_pred'] self.single_amino_pred = prediction_data['single_amino_pred'] self.uncertain = prediction_data['uncertain'] + else: + raise RuntimeError('Neither "nrpys" nor "NRPSPredictor2" in domain prediction.') def __str__(self): return '\t'.join([self.angstrom_code, diff --git a/src/nerpa_pipeline/NRPSPredictor_utils/main.py b/src/nerpa_pipeline/NRPSPredictor_utils/main.py index d2a5673..fbd93ad 100755 --- a/src/nerpa_pipeline/NRPSPredictor_utils/main.py +++ b/src/nerpa_pipeline/NRPSPredictor_utils/main.py @@ -99,9 +99,16 @@ def main(args): is_root_outdir = True if (args.output_dir is not None and len(args.inputs) > 1) else False processed_output_dirs = [] for input_path in args.inputs: - processed_output_dirs.append(json_handler.handle_single_input( - Path(input_path), args.output_dir, is_root_outdir, args.naming_style, - known_codes, scoring_mode=args.mode, verbose=args.verbose)) + try: + processed_output_dirs.append(json_handler.handle_single_input( + Path(input_path), args.output_dir, is_root_outdir, args.naming_style, + known_codes, scoring_mode=args.mode, verbose=args.verbose)) + except KeyboardInterrupt as e: + raise e + except RuntimeError as e: + info(f'ERROR: Unable to parse the input at "{input_path}": {e}') + except Exception as e: + info(f'ERROR: Unmanaged Exception while parsing the input at "{input_path}": {e}') return processed_output_dirs diff --git a/src/nerpa_pipeline/predictions_preprocessor.py b/src/nerpa_pipeline/predictions_preprocessor.py index 08ee132..6898749 100644 --- a/src/nerpa_pipeline/predictions_preprocessor.py +++ b/src/nerpa_pipeline/predictions_preprocessor.py @@ -77,45 +77,54 @@ def create_predictions_by_antiSMASH_out(antiSMASH_outs, outdir, log): predictions_info_file = os.path.join(outdir, "predictions.info") predictions_info_list = [] for dirname in antiSMASH_outs: - if dirname[-1] == '\n': - dirname = dirname[:-1] - - orf_pos = handle_helper.get_orf_position(dirname) - orf_ori = handle_helper.get_orf_orientation(dirname) - orf_domains = handle_helper.get_orf_domain_list(dirname) - - print("====PARTS BEFORE: ") - parts = handle_helper.get_parts(dirname) - handle_helper.debug_print_parts(dirname, parts, orf_domains, orf_ori, orf_pos) - - #print("====SPLIT BY DIST:") - parts = splitter.split_by_dist(parts, orf_pos) - #handle_helper.debug_print_parts(dirname, parts, orf_domains, orf_ori, orf_pos) - - #print("====SPLIT BY SINGLE ORF WITH Starter-TE") - parts = splitter.split_by_one_orf_Starter_TE(parts, orf_ori, orf_domains) - #handle_helper.debug_print_parts(dirname, parts, orf_domains, orf_ori, orf_pos) - - #print("====REMOVE SINGLE DOMAINs ORFS") - parts = splitter.split_by_single_domain_orf(parts, orf_ori, orf_domains) - #handle_helper.debug_print_parts(dirname, parts, orf_domains, orf_ori, orf_pos) - - print("====SPLIT AND REORDER") - parts = splitter.split_and_reorder(parts, orf_ori, orf_pos, orf_domains) - handle_helper.debug_print_parts(dirname, parts, orf_domains, orf_ori, orf_pos) - - - nrpspred_dir = os.path.join(dirname, "nrpspks_predictions_txt") - if os.path.isdir(nrpspred_dir): - for filename in os.listdir(nrpspred_dir): - if filename.endswith('nrpspredictor2_codes.txt'): - base_antiSMASHout_name = os.path.basename(dirname) - base_pred_name = os.path.basename(filename) - #predictions_info_list.append(os.path.join(dir_for_predictions, base_antiSMASHout_name + "_" + base_pred_name)) - #shutil.copyfile(os.path.join(nrpspred_dir, filename), os.path.join(dir_for_predictions, base_antiSMASHout_name + "_" + base_pred_name)) - gen_predictions(parts, os.path.join(nrpspred_dir, filename), - os.path.join(dir_for_predictions, base_antiSMASHout_name + "_" + base_pred_name)[:-4], - 0, predictions_info_list, dirname) + try: + if dirname[-1] == '\n': + dirname = dirname[:-1] + + orf_pos = handle_helper.get_orf_position(dirname) + orf_ori = handle_helper.get_orf_orientation(dirname) + orf_domains = handle_helper.get_orf_domain_list(dirname) + + print("====PARTS BEFORE: ") + parts = handle_helper.get_parts(dirname) + handle_helper.debug_print_parts(dirname, parts, orf_domains, orf_ori, orf_pos) + + #print("====SPLIT BY DIST:") + parts = splitter.split_by_dist(parts, orf_pos) + #handle_helper.debug_print_parts(dirname, parts, orf_domains, orf_ori, orf_pos) + + #print("====SPLIT BY SINGLE ORF WITH Starter-TE") + parts = splitter.split_by_one_orf_Starter_TE(parts, orf_ori, orf_domains) + #handle_helper.debug_print_parts(dirname, parts, orf_domains, orf_ori, orf_pos) + + #print("====REMOVE SINGLE DOMAINs ORFS") + parts = splitter.split_by_single_domain_orf(parts, orf_ori, orf_domains) + #handle_helper.debug_print_parts(dirname, parts, orf_domains, orf_ori, orf_pos) + + print("====SPLIT AND REORDER") + parts = splitter.split_and_reorder(parts, orf_ori, orf_pos, orf_domains) + handle_helper.debug_print_parts(dirname, parts, orf_domains, orf_ori, orf_pos) + + if len(parts) > 100: + raise RuntimeError(f'Too many parts: {len(parts)}') + + + nrpspred_dir = os.path.join(dirname, "nrpspks_predictions_txt") + if os.path.isdir(nrpspred_dir): + for filename in os.listdir(nrpspred_dir): + if filename.endswith('nrpspredictor2_codes.txt'): + base_antiSMASHout_name = os.path.basename(dirname) + base_pred_name = os.path.basename(filename) + #predictions_info_list.append(os.path.join(dir_for_predictions, base_antiSMASHout_name + "_" + base_pred_name)) + #shutil.copyfile(os.path.join(nrpspred_dir, filename), os.path.join(dir_for_predictions, base_antiSMASHout_name + "_" + base_pred_name)) + gen_predictions(parts, os.path.join(nrpspred_dir, filename), + os.path.join(dir_for_predictions, base_antiSMASHout_name + "_" + base_pred_name)[:-4], + 0, predictions_info_list, dirname) + except KeyboardInterrupt as e: + raise e + except Exception as e: + print(f'Error: {type(e).__name__}: {e}') + print(f'Skipping {dirname}') f = open(predictions_info_file, 'w') for line in predictions_info_list: