Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 89 additions & 1 deletion src/pmotools/pmo_builder/panel_information_to_pmo.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
#!/usr/bin/env python3
import pandas as pd
import copy
import json

import numpy as np
import pandas as pd
import warnings

from ..pmo_builder.json_convert_utils import check_additional_columns_exist
Expand Down Expand Up @@ -416,3 +419,88 @@ def check_genome_info(genome_info):
raise TypeError(
f"genome_info must be a dict or list, but got {type(genome_info).__name__}"
)


def merge_panel_info_dicts(panel_info_dicts: list[dict]) -> dict:
"""
Merge multiple panel_info dictionaries produced by panel_info_table_to_pmo.

Target lists are concatenated (deduplicated by target_name) and all
genome references are collapsed so that genome identifiers remain valid
across the merged structure.
"""
if not panel_info_dicts:
raise ValueError("panel_info_dicts must contain at least one entry.")

merged_targets: list[dict] = []
target_name_to_index: dict[str, int] = {}
merged_panels: list[dict] = []

merged_genomes: list[dict] = []
genome_signature_to_index: dict[str, int] = {}

def canonicalise_genome(genome: dict) -> str:
return json.dumps(genome, sort_keys=True)

def remap_genome_ids(target_entry: dict, mapping: dict[int, int]) -> None:
insert_loc = target_entry.get("insert_location")
if insert_loc and "genome_id" in insert_loc:
old_id = insert_loc["genome_id"]
if old_id in mapping:
insert_loc["genome_id"] = mapping[old_id]

for primer_key in ("forward_primer", "reverse_primer"):
primer = target_entry.get(primer_key)
if primer and isinstance(primer, dict):
primer_loc = primer.get("location")
if primer_loc and "genome_id" in primer_loc:
old_id = primer_loc["genome_id"]
if old_id in mapping:
primer_loc["genome_id"] = mapping[old_id]

for panel_dict in panel_info_dicts:
if "targeted_genomes" not in panel_dict:
raise ValueError("panel_info_dict missing 'targeted_genomes'.")

genome_mapping: dict[int, int] = {}
for idx, genome in enumerate(panel_dict["targeted_genomes"]):
signature = canonicalise_genome(genome)
if signature not in genome_signature_to_index:
genome_signature_to_index[signature] = len(merged_genomes)
merged_genomes.append(genome)
genome_mapping[idx] = genome_signature_to_index[signature]

if "target_info" not in panel_dict:
raise ValueError("panel_info_dict missing 'target_info'.")

for target in panel_dict["target_info"]:
target_name = target.get("target_name")
if target_name is None:
raise ValueError("Each target_info entry must include a 'target_name'.")

if target_name not in target_name_to_index:
target_copy = copy.deepcopy(target)
remap_genome_ids(target_copy, genome_mapping)
target_name_to_index[target_name] = len(merged_targets)
merged_targets.append(target_copy)

for panel in panel_dict.get("panel_info", []):
remapped_panel = {"panel_name": panel["panel_name"], "reactions": []}
for reaction in panel.get("reactions", []):
remapped_targets = []
for target_idx in reaction["panel_targets"]:
target_name = panel_dict["target_info"][target_idx]["target_name"]
remapped_targets.append(target_name_to_index[target_name])
remapped_panel["reactions"].append(
{
"reaction_name": reaction["reaction_name"],
"panel_targets": remapped_targets,
}
)
merged_panels.append(remapped_panel)

return {
"panel_info": merged_panels,
"target_info": merged_targets,
"targeted_genomes": merged_genomes,
}
117 changes: 117 additions & 0 deletions tests/test_pmo_builder/test_panel_information_to_pmo.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from pmotools.pmo_builder.panel_information_to_pmo import (
PMOPanelBuilder,
check_genome_info,
merge_panel_info_dicts,
panel_info_table_to_pmo,
)

Expand Down Expand Up @@ -224,6 +225,122 @@ def test_build_panel_info(self):
}
self.assertEqual(panel_info, expected_panel_info)

def test_merge_panel_info_dicts_no_overlap(self):
target_info_b = [
{
"target_name": "target4",
"forward_primer": {"seq": "CTA"},
"reverse_primer": {"seq": "TGG"},
},
{
"target_name": "target5",
"forward_primer": {"seq": "TTG"},
"reverse_primer": {"seq": "ATT"},
},
]

panel_info_dict_a = {
"panel_info": [
{
"panel_name": "test_panel1",
"reactions": [{"reaction_name": "1", "panel_targets": [0, 1, 2]}],
}
],
"targeted_genomes": [self.genome_info],
"target_info": self.min_target_json,
}

panel_info_dict_b = {
"panel_info": [
{
"panel_name": "test_panel2",
"reactions": [{"reaction_name": "1", "panel_targets": [0, 1]}],
}
],
"targeted_genomes": [self.genome_info],
"target_info": target_info_b,
}

merged = merge_panel_info_dicts([panel_info_dict_a, panel_info_dict_b])

expected_merged = {
"panel_info": [
{
"panel_name": "test_panel1",
"reactions": [{"reaction_name": "1", "panel_targets": [0, 1, 2]}],
},
{
"panel_name": "test_panel2",
"reactions": [{"reaction_name": "1", "panel_targets": [3, 4]}],
},
],
"targeted_genomes": [self.genome_info],
"target_info": self.min_target_json + target_info_b,
}

self.assertEqual(merged, expected_merged)

def test_merge_panel_info_dicts_with_overlap(self):
target_info_b = [
{
"target_name": "target2",
"forward_primer": {"seq": "CTA"},
"reverse_primer": {"seq": "TGG"},
},
{
"target_name": "target5",
"forward_primer": {"seq": "TTG"},
"reverse_primer": {"seq": "ATT"},
},
]

panel_info_dict_a = {
"panel_info": [
{
"panel_name": "test_panel1",
"reactions": [{"reaction_name": "1", "panel_targets": [0, 1, 2]}],
}
],
"targeted_genomes": [self.genome_info],
"target_info": self.min_target_json,
}
panel_info_dict_b = {
"panel_info": [
{
"panel_name": "test_panel2",
"reactions": [{"reaction_name": "1", "panel_targets": [0, 1]}],
}
],
"targeted_genomes": [self.genome_info],
"target_info": target_info_b,
}

merged = merge_panel_info_dicts([panel_info_dict_a, panel_info_dict_b])

expected_merged = {
"panel_info": [
{
"panel_name": "test_panel1",
"reactions": [{"reaction_name": "1", "panel_targets": [0, 1, 2]}],
},
{
"panel_name": "test_panel2",
"reactions": [{"reaction_name": "1", "panel_targets": [1, 3]}],
},
],
"targeted_genomes": [self.genome_info],
"target_info": self.min_target_json
+ [
{
"target_name": "target5",
"forward_primer": {"seq": "TTG"},
"reverse_primer": {"seq": "ATT"},
},
],
}

self.assertEqual(merged, expected_merged)

def test_build_panel_info_multi_reaction(self):
target_table_with_reactions = self.min_target_table
target_table_with_reactions["reaction"] = [
Expand Down
4 changes: 2 additions & 2 deletions tests/test_pmo_builder/test_schema_validation_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def test_toy_pmo_validates_against_schema():
)
library_sample_info = library_sample_info_table_to_pmo(
library_df,
accession_col="accession",
run_accession_col="accession",
library_prep_plate_name_col="prep_plate_name",
library_prep_plate_row_col="prep_plate_row",
library_prep_plate_col_col="prep_plate_col",
Expand Down Expand Up @@ -312,7 +312,7 @@ def test_toy_pmo_validates_against_schema():
assert sequencing_entry["library_kit"] == "MiSeq Reagent Kit v3"

library_entry = pmo["library_sample_info"][0]
assert library_entry["accession"] == "ACC123"
assert library_entry["run_accession"] == "ACC123"
assert library_entry["library_note"] == "High quality"

genome_entry = pmo["targeted_genomes"][0]
Expand Down