From 57ffcda96b772daf6802b9f2961db14592437e0e Mon Sep 17 00:00:00 2001 From: kathryn1995 Date: Mon, 17 Nov 2025 14:25:22 -0800 Subject: [PATCH 1/3] Add function to merge panels --- .../pmo_builder/panel_information_to_pmo.py | 90 ++++++++- .../test_panel_information_to_pmo.py | 177 ++++++++++++++++++ 2 files changed, 266 insertions(+), 1 deletion(-) diff --git a/src/pmotools/pmo_builder/panel_information_to_pmo.py b/src/pmotools/pmo_builder/panel_information_to_pmo.py index 36d381e..45251ea 100644 --- a/src/pmotools/pmo_builder/panel_information_to_pmo.py +++ b/src/pmotools/pmo_builder/panel_information_to_pmo.py @@ -1,6 +1,9 @@ #!/usr/bin/env python3 -import pandas as pd +import copy +import json + import numpy as np +import pandas as pd import warnings from ..pmo_builder.json_convert_utils import check_additional_columns_exist @@ -383,3 +386,88 @@ def check_genome_info(genome_info): raise TypeError( f"genome_info must be a dict, but got {type(genome_info).__name__}" ) + + +def merge_panel_info_dicts(panel_info_dicts: list[dict]) -> dict: + """ + Merge multiple panel_info dictionaries produced by panel_info_table_to_pmo. + + Target lists are concatenated (deduplicated by target_name) and all + genome references are collapsed so that genome identifiers remain valid + across the merged structure. + """ + if not panel_info_dicts: + raise ValueError("panel_info_dicts must contain at least one entry.") + + merged_targets: list[dict] = [] + target_name_to_index: dict[str, int] = {} + merged_panels: list[dict] = [] + + merged_genomes: list[dict] = [] + genome_signature_to_index: dict[str, int] = {} + + def canonicalise_genome(genome: dict) -> str: + return json.dumps(genome, sort_keys=True) + + def remap_genome_ids(target_entry: dict, mapping: dict[int, int]) -> None: + insert_loc = target_entry.get("insert_location") + if insert_loc and "genome_id" in insert_loc: + old_id = insert_loc["genome_id"] + if old_id in mapping: + insert_loc["genome_id"] = mapping[old_id] + + for primer_key in ("forward_primer", "reverse_primer"): + primer = target_entry.get(primer_key) + if primer and isinstance(primer, dict): + primer_loc = primer.get("location") + if primer_loc and "genome_id" in primer_loc: + old_id = primer_loc["genome_id"] + if old_id in mapping: + primer_loc["genome_id"] = mapping[old_id] + + for panel_dict in panel_info_dicts: + if "targeted_genomes" not in panel_dict: + raise ValueError("panel_info_dict missing 'targeted_genomes'.") + + genome_mapping: dict[int, int] = {} + for idx, genome in enumerate(panel_dict["targeted_genomes"]): + signature = canonicalise_genome(genome) + if signature not in genome_signature_to_index: + genome_signature_to_index[signature] = len(merged_genomes) + merged_genomes.append(genome) + genome_mapping[idx] = genome_signature_to_index[signature] + + if "target_info" not in panel_dict: + raise ValueError("panel_info_dict missing 'target_info'.") + + for target in panel_dict["target_info"]: + target_name = target.get("target_name") + if target_name is None: + raise ValueError("Each target_info entry must include a 'target_name'.") + + if target_name not in target_name_to_index: + target_copy = copy.deepcopy(target) + remap_genome_ids(target_copy, genome_mapping) + target_name_to_index[target_name] = len(merged_targets) + merged_targets.append(target_copy) + + for panel in panel_dict.get("panel_info", []): + remapped_panel = {"panel_name": panel["panel_name"], "reactions": []} + for reaction in panel.get("reactions", []): + remapped_targets = [] + for target_idx in reaction["panel_targets"]: + target_name = panel_dict["target_info"][target_idx]["target_name"] + remapped_targets.append(target_name_to_index[target_name]) + remapped_panel["reactions"].append( + { + "reaction_name": reaction["reaction_name"], + "panel_targets": remapped_targets, + } + ) + merged_panels.append(remapped_panel) + + return { + "panel_info": merged_panels, + "target_info": merged_targets, + "targeted_genomes": merged_genomes, + } diff --git a/tests/test_pmo_builder/test_panel_information_to_pmo.py b/tests/test_pmo_builder/test_panel_information_to_pmo.py index 8c87fbd..34fef08 100644 --- a/tests/test_pmo_builder/test_panel_information_to_pmo.py +++ b/tests/test_pmo_builder/test_panel_information_to_pmo.py @@ -5,6 +5,7 @@ from pmotools.pmo_builder.panel_information_to_pmo import ( PMOPanelBuilder, check_genome_info, + merge_panel_info_dicts, ) @@ -185,6 +186,182 @@ def test_build_panel_info(self): } self.assertEqual(panel_info, expected_panel_info) + def test_merge_panel_info_dicts_no_overlap(self): + target_info_b = [ + { + "target_name": "target4", + "forward_primer": {"seq": "CTA"}, + "reverse_primer": {"seq": "TGG"}, + }, + { + "target_name": "target5", + "forward_primer": {"seq": "TTG"}, + "reverse_primer": {"seq": "ATT"}, + }, + ] + + panel_info_dict_a = { + "panel_info": [ + { + "panel_name": "test_panel1", + "reactions": [{"reaction_name": "1", "panel_targets": [0, 1, 2]}], + } + ], + "targeted_genomes": [self.genome_info], + "target_info": self.min_target_json, + } + panel_info_dict_b = { + "panel_info": [ + { + "panel_name": "test_panel2", + "reactions": [{"reaction_name": "1", "panel_targets": [0, 1]}], + } + ], + "targeted_genomes": [self.genome_info], + "target_info": target_info_b, + } + + merged = merge_panel_info_dicts([panel_info_dict_a, panel_info_dict_b]) + + expected_merged = { + "panel_info": [ + { + "panel_name": "test_panel1", + "reactions": [{"reaction_name": "1", "panel_targets": [0, 1, 2]}], + }, + { + "panel_name": "test_panel2", + "reactions": [{"reaction_name": "1", "panel_targets": [3, 4]}], + }, + ], + "targeted_genomes": [self.genome_info], + "target_info": self.min_target_json + target_info_b, + } + + self.assertEqual(merged, expected_merged) + + def test_merge_panel_info_dicts_with_overlap_no_reaction(self): + target_info_b = [ + { + "target_name": "target2", + "forward_primer": {"seq": "CTA"}, + "reverse_primer": {"seq": "TGG"}, + }, + { + "target_name": "target5", + "forward_primer": {"seq": "TTG"}, + "reverse_primer": {"seq": "ATT"}, + }, + ] + + panel_info_dict_a = { + "panel_info": [ + { + "panel_name": "test_panel1", + "reactions": [{"reaction_name": "1", "panel_targets": [0, 1, 2]}], + } + ], + "targeted_genomes": [self.genome_info], + "target_info": self.min_target_json, + } + panel_info_dict_b = { + "panel_info": [ + { + "panel_name": "test_panel2", + "reactions": [{"reaction_name": "1", "panel_targets": [0, 1]}], + } + ], + "targeted_genomes": [self.genome_info], + "target_info": target_info_b, + } + + merged = merge_panel_info_dicts([panel_info_dict_a, panel_info_dict_b]) + + expected_merged = { + "panel_info": [ + { + "panel_name": "test_panel1", + "reactions": [{"reaction_name": "1", "panel_targets": [0, 1, 2]}], + }, + { + "panel_name": "test_panel2", + "reactions": [{"reaction_name": "1", "panel_targets": [1, 3]}], + }, + ], + "targeted_genomes": [self.genome_info], + "target_info": self.min_target_json + + [ + { + "target_name": "target5", + "forward_primer": {"seq": "TTG"}, + "reverse_primer": {"seq": "ATT"}, + }, + ], + } + + self.assertEqual(merged, expected_merged) + + def test_merge_panel_info_dicts_with_overlap(self): + target_info_b = [ + { + "target_name": "target2", + "forward_primer": {"seq": "CTA"}, + "reverse_primer": {"seq": "TGG"}, + }, + { + "target_name": "target5", + "forward_primer": {"seq": "TTG"}, + "reverse_primer": {"seq": "ATT"}, + }, + ] + + panel_info_dict_a = { + "panel_info": [ + { + "panel_name": "test_panel1", + "reactions": [{"reaction_name": "1", "panel_targets": [0, 1, 2]}], + } + ], + "targeted_genomes": [self.genome_info], + "target_info": self.min_target_json, + } + panel_info_dict_b = { + "panel_info": [ + { + "panel_name": "test_panel2", + "reactions": [{"reaction_name": "1", "panel_targets": [0, 1]}], + } + ], + "targeted_genomes": [self.genome_info], + "target_info": target_info_b, + } + + merged = merge_panel_info_dicts([panel_info_dict_a, panel_info_dict_b]) + + expected_merged = { + "panel_info": [ + { + "panel_name": "test_panel1", + "reactions": [{"reaction_name": "1", "panel_targets": [0, 1, 2]}], + }, + { + "panel_name": "test_panel2", + "reactions": [{"reaction_name": "1", "panel_targets": [1, 3]}], + }, + ], + "targeted_genomes": [self.genome_info], + "target_info": self.min_target_json + + [ + { + "target_name": "target5", + "forward_primer": {"seq": "TTG"}, + "reverse_primer": {"seq": "ATT"}, + }, + ], + } + + self.assertEqual(merged, expected_merged) + def test_build_panel_info_multi_reaction(self): target_table_with_reactions = self.min_target_table target_table_with_reactions["reaction"] = [ From c69f0324b7fa7c02179669f7d6faa2b0433bc842 Mon Sep 17 00:00:00 2001 From: kathryn1995 Date: Tue, 18 Nov 2025 17:34:42 -0800 Subject: [PATCH 2/3] Remove duplicate test --- .../test_panel_information_to_pmo.py | 62 +------------------ 1 file changed, 1 insertion(+), 61 deletions(-) diff --git a/tests/test_pmo_builder/test_panel_information_to_pmo.py b/tests/test_pmo_builder/test_panel_information_to_pmo.py index 34fef08..9db3c02 100644 --- a/tests/test_pmo_builder/test_panel_information_to_pmo.py +++ b/tests/test_pmo_builder/test_panel_information_to_pmo.py @@ -210,6 +210,7 @@ def test_merge_panel_info_dicts_no_overlap(self): "targeted_genomes": [self.genome_info], "target_info": self.min_target_json, } + panel_info_dict_b = { "panel_info": [ { @@ -240,67 +241,6 @@ def test_merge_panel_info_dicts_no_overlap(self): self.assertEqual(merged, expected_merged) - def test_merge_panel_info_dicts_with_overlap_no_reaction(self): - target_info_b = [ - { - "target_name": "target2", - "forward_primer": {"seq": "CTA"}, - "reverse_primer": {"seq": "TGG"}, - }, - { - "target_name": "target5", - "forward_primer": {"seq": "TTG"}, - "reverse_primer": {"seq": "ATT"}, - }, - ] - - panel_info_dict_a = { - "panel_info": [ - { - "panel_name": "test_panel1", - "reactions": [{"reaction_name": "1", "panel_targets": [0, 1, 2]}], - } - ], - "targeted_genomes": [self.genome_info], - "target_info": self.min_target_json, - } - panel_info_dict_b = { - "panel_info": [ - { - "panel_name": "test_panel2", - "reactions": [{"reaction_name": "1", "panel_targets": [0, 1]}], - } - ], - "targeted_genomes": [self.genome_info], - "target_info": target_info_b, - } - - merged = merge_panel_info_dicts([panel_info_dict_a, panel_info_dict_b]) - - expected_merged = { - "panel_info": [ - { - "panel_name": "test_panel1", - "reactions": [{"reaction_name": "1", "panel_targets": [0, 1, 2]}], - }, - { - "panel_name": "test_panel2", - "reactions": [{"reaction_name": "1", "panel_targets": [1, 3]}], - }, - ], - "targeted_genomes": [self.genome_info], - "target_info": self.min_target_json - + [ - { - "target_name": "target5", - "forward_primer": {"seq": "TTG"}, - "reverse_primer": {"seq": "ATT"}, - }, - ], - } - - self.assertEqual(merged, expected_merged) - def test_merge_panel_info_dicts_with_overlap(self): target_info_b = [ { From bf2370170d7ed4cdf1a955838fc9784277f92c3d Mon Sep 17 00:00:00 2001 From: Nicholas Hathaway Date: Wed, 19 Nov 2025 15:21:29 -0500 Subject: [PATCH 3/3] update run_accession; --- tests/test_pmo_builder/test_schema_validation_integration.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_pmo_builder/test_schema_validation_integration.py b/tests/test_pmo_builder/test_schema_validation_integration.py index df00da3..3ac2627 100644 --- a/tests/test_pmo_builder/test_schema_validation_integration.py +++ b/tests/test_pmo_builder/test_schema_validation_integration.py @@ -97,7 +97,7 @@ def test_toy_pmo_validates_against_schema(): ) library_sample_info = library_sample_info_table_to_pmo( library_df, - accession_col="accession", + run_accession_col="accession", library_prep_plate_name_col="prep_plate_name", library_prep_plate_row_col="prep_plate_row", library_prep_plate_col_col="prep_plate_col", @@ -312,7 +312,7 @@ def test_toy_pmo_validates_against_schema(): assert sequencing_entry["library_kit"] == "MiSeq Reagent Kit v3" library_entry = pmo["library_sample_info"][0] - assert library_entry["accession"] == "ACC123" + assert library_entry["run_accession"] == "ACC123" assert library_entry["library_note"] == "High quality" genome_entry = pmo["targeted_genomes"][0]