Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 27 additions & 19 deletions mavisp/modules.py
Original file line number Diff line number Diff line change
Expand Up @@ -907,15 +907,21 @@ def ingest(self, mutations):

# Compare with mutations:
result = []


efoldmine_parsed = efoldmine_parsed.copy()
efoldmine_parsed['mavisp_residue_index'] = efoldmine_parsed['residue_index'] + 1
residue_counts = efoldmine_parsed['mavisp_residue_index'].value_counts().to_dict()
efoldmine_by_residue = {row.mavisp_residue_index: row for row in efoldmine_parsed.itertuples(index=False)}

for mut in mutations:
mut_resn = int(mut[1:-1])
row = efoldmine_parsed[efoldmine_parsed['residue_index']+1 == mut_resn]
if len(row) != 1:
this_error = f"Expected exactly one row for residue index {mut_resn}, but found {len(row)} rows."
row_count = residue_counts.get(mut_resn, 0)
if row_count != 1:
this_error = f"Expected exactly one row for residue index {mut_resn}, but found {row_count} rows."
raise MAVISpMultipleError(warning=warnings, critical=[MAVISpCriticalError(this_error)])
is_early_folding = row['is_early_folding'].iloc[0]
efoldmine_score = row['earlyFolding'].iloc[0]
row = efoldmine_by_residue[mut_resn]
is_early_folding = row.is_early_folding
efoldmine_score = row.earlyFolding
result.append((is_early_folding, efoldmine_score))

# Create DataFrame:
Expand Down Expand Up @@ -2625,13 +2631,15 @@ def ingest(self, mutations):
# Dictionary of muts + res_numbers
mutation_residues = {mut: int(mut[1:-1]) for mut in mutations}

pfam_intervals = [(row.start, row.end, f"{row.pfam_domain} ({row.accession})")
for row in pfam.itertuples(index=False)]

# Map res_numbers to PFAM domains
pfam_annotations = {}
for mutation, resn in mutation_residues.items():
matching_domains = pfam[(pfam['start'] <= resn) & (pfam['end'] >= resn)]
pfam_annotations[mutation] = ", ".join(
f"{row['pfam_domain']} ({row['accession']})" for _, row in matching_domains.iterrows()) if not matching_domains.empty else None

matching_domains = [label for start, end, label in pfam_intervals if start <= resn <= end]
pfam_annotations[mutation] = ", ".join(matching_domains) if matching_domains else None

# Add new column to data
self.data = pd.DataFrame.from_dict(pfam_annotations, orient='index', columns=['Pfam domain classification'])

Expand Down Expand Up @@ -2688,21 +2696,21 @@ def ingest(self, mutations):
raise MAVISpMultipleError(warning=warnings,
critical=[MAVISpCriticalError(this_error)])

ted_expanded = pd.DataFrame(multi_boundaries)
ted_intervals = [(row['start'], row['end'], row['CATH_label']) for row in multi_boundaries]

# Dictionary of muts + res_numbers
mutation_residues = {mut: int(mut[1:-1]) for mut in mutations}

# Map res_numbers to TED domains
ted_annotations = {}
for mutation, resn in mutation_residues.items():
matching = ted_expanded[(ted_expanded['start'] <= resn) & (ted_expanded['end'] >= resn)]
if matching.empty:
continue
else:
labels = matching['CATH_label'].dropna().astype(str)
labels = labels[labels.str.strip() != ""]
if labels.empty:
continue
labels = []
for start, end, cath_label in ted_intervals:
if start <= resn <= end and not pd.isna(cath_label):
label = str(cath_label)
if label.strip() != "":
labels.append(label)
if labels:
ted_annotations[mutation] = " | ".join(labels)

# Add new column to data
Expand Down