Skip to content

Commit af0629d

Browse files
committed
Amended GEXF export
1 parent 0fbd035 commit af0629d

File tree

2 files changed

+16757
-5105
lines changed

2 files changed

+16757
-5105
lines changed

GEXF-export.py

Lines changed: 75 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,10 @@
22
import json
33
import networkx as nx
44
import matplotlib.pyplot as plt
5+
from collections import Counter, defaultdict
56

67
# --- CONFIG ---
7-
url = "https://raw.githubusercontent.com/SingularityNET-Archive/SingularityNET-Archive/refs/heads/main/Data/Snet-Ambassador-Program/Meeting-Summaries/2025/meeting-summaries-array.json" # Replace with your URL
8+
url = "https://raw.githubusercontent.com/SingularityNET-Archive/SingularityNET-Archive/refs/heads/main/Data/Snet-Ambassador-Program/Meeting-Summaries/2025/meeting-summaries-array.json"
89
output_gexf = "all_workgroups_graph_sanitized.gexf"
910

1011
# --- 1. Fetch remote JSON safely ---
@@ -22,13 +23,17 @@
2223
else:
2324
raise Exception("Unexpected JSON structure; expected dict or list")
2425

25-
# --- NEW: Count top-level values ---
26+
# Debug: top-level count
2627
top_level_count = len(workgroups)
2728
print(f"🔹 Number of top-level workgroup entries: {top_level_count}")
2829

30+
# Quick check for repeated workgroup_id values (diagnostic)
31+
ids = [ (wg.get("workgroup_id") or "") for wg in workgroups ]
32+
c = Counter(ids)
33+
print("Top repeated explicit workgroup_id values (empty string means missing):", c.most_common(10))
34+
2935
# --- 2. Helper functions ---
3036
def safe_get(d, keys, default=None):
31-
"""Safely walk nested dict keys."""
3237
for key in keys:
3338
if isinstance(d, dict) and key in d:
3439
d = d[key]
@@ -37,25 +42,16 @@ def safe_get(d, keys, default=None):
3742
return d
3843

3944
def sanitize_value(v):
40-
"""
41-
Return a GEXF-safe representation of v:
42-
- keep str/int/float/bool
43-
- convert lists/dicts/other -> json string
44-
- drop None -> return None
45-
"""
4645
if v is None:
4746
return None
4847
if isinstance(v, (str, int, float, bool)):
4948
return v
5049
try:
51-
# Prefer JSON representation for lists/dicts
5250
return json.dumps(v, ensure_ascii=False)
5351
except Exception:
54-
# Fallback to string
5552
return str(v)
5653

5754
def find_invalid_attrs(G):
58-
"""Return list of (node_or_edge, attr_name, value_type) that are invalid for GEXF."""
5955
bad = []
6056
for n, attrs in G.nodes(data=True):
6157
for k, v in attrs.items():
@@ -70,79 +66,107 @@ def find_invalid_attrs(G):
7066
# --- 3. Build the directed graph for all workgroups ---
7167
G = nx.DiGraph()
7268

73-
for wg_data in workgroups:
74-
workgroup = safe_get(wg_data, ["workgroup"], "Unknown Workgroup")
75-
meeting_id = safe_get(wg_data, ["workgroup_id"], f"MeetingID_{workgroup}")
69+
# Track seen meeting_ids to detect duplicates
70+
meeting_id_counts = Counter()
71+
for idx, wg_data in enumerate(workgroups, start=1):
72+
# Prefer explicit workgroup_id; fall back to unique generated id using index
73+
explicit_meeting_id = safe_get(wg_data, ["workgroup_id"], None)
74+
workgroup_name = safe_get(wg_data, ["workgroup"], "Unknown Workgroup")
75+
76+
# ensure meeting_node_id is unique and string:
77+
if explicit_meeting_id:
78+
meeting_node_id = str(explicit_meeting_id)
79+
# Append index if this explicit id appears more than once
80+
if meeting_id_counts[meeting_node_id] > 0:
81+
meeting_node_id = f"{meeting_node_id}__{idx}"
82+
else:
83+
# No explicit meeting id -> generate one using workgroup + index to guarantee uniqueness
84+
meeting_node_id = f"Meeting_{workgroup_name}_{idx}"
85+
86+
meeting_id_counts[meeting_node_id] += 1
87+
7688
meeting_info = safe_get(wg_data, ["meetingInfo"], {})
7789

78-
# Workgroup & Meeting
79-
G.add_node(workgroup, type="Workgroup")
80-
# Provide attributes using sanitize_value where appropriate (we'll sanitize later too)
81-
G.add_node(meeting_id, type="Meeting",
90+
# Make sure node ids are strings
91+
workgroup_node_id = str(workgroup_name)
92+
meeting_node_id = str(meeting_node_id)
93+
94+
# Workgroup & Meeting nodes
95+
G.add_node(workgroup_node_id, type="Workgroup", label=workgroup_node_id)
96+
G.add_node(meeting_node_id, type="Meeting",
8297
date=meeting_info.get("date", "") or "",
83-
typeOfMeeting=meeting_info.get("typeOfMeeting", "") or "")
84-
G.add_edge(workgroup, meeting_id, relation="has_meeting")
98+
typeOfMeeting=meeting_info.get("typeOfMeeting", "") or "",
99+
label=meeting_node_id)
100+
G.add_edge(workgroup_node_id, meeting_node_id, relation="has_meeting")
85101

86102
# Host & Documenter
87103
host = meeting_info.get("host", "Unknown Host")
88104
documenter = meeting_info.get("documenter", "Unknown Documenter")
89105
for person in [host, documenter]:
90-
G.add_node(person, type="Person")
91-
G.add_edge(meeting_id, host, relation="hosted_by")
92-
G.add_edge(meeting_id, documenter, relation="documented_by")
106+
if person:
107+
G.add_node(str(person), type="Person", label=str(person))
108+
G.add_edge(meeting_node_id, str(host), relation="hosted_by")
109+
G.add_edge(meeting_node_id, str(documenter), relation="documented_by")
93110

94111
# Attendees
95112
people_present = meeting_info.get("peoplePresent", "")
96113
for person in [p.strip() for p in people_present.split(",") if p.strip()]:
97-
G.add_node(person, type="Person")
98-
G.add_edge(meeting_id, person, relation="attended_by")
114+
G.add_node(str(person), type="Person", label=str(person))
115+
G.add_edge(meeting_node_id, str(person), relation="attended_by")
99116

100117
# Working Docs
101118
for doc in meeting_info.get("workingDocs", []):
102119
title = doc.get("title", "Untitled Document")
103120
link = doc.get("link", "")
104-
G.add_node(title, type="Document", link=link or "")
105-
G.add_edge(meeting_id, title, relation="references_doc")
121+
# ensure document node id is unique-ish by combining title+index
122+
doc_node_id = f"Doc_{title}_{idx}"
123+
G.add_node(str(doc_node_id), type="Document", link=link or "", label=title)
124+
G.add_edge(meeting_node_id, str(doc_node_id), relation="references_doc")
106125

107126
# Agenda Items -> ActionItems & DecisionItems
108-
for agenda in wg_data.get("agendaItems", []):
127+
for aindex, agenda in enumerate(wg_data.get("agendaItems", []), start=1):
109128
agenda_status = agenda.get("status", "unknown")
110-
agenda_id = f"Agenda_{agenda_status}_{meeting_id}"
111-
G.add_node(agenda_id, type="AgendaItem", status=agenda_status)
112-
G.add_edge(meeting_id, agenda_id, relation="has_agenda")
129+
agenda_id = f"Agenda_{agenda_status}_{idx}_{aindex}"
130+
G.add_node(agenda_id, type="AgendaItem", status=agenda_status, label=agenda_id)
131+
G.add_edge(meeting_node_id, agenda_id, relation="has_agenda")
113132

114133
# ActionItems
115-
for action in agenda.get("actionItems", []):
134+
for action_index, action in enumerate(agenda.get("actionItems", []), start=1):
116135
action_text = action.get("text", "Unnamed Action")
117-
action_id = action_text[:40] + "..."
118-
G.add_node(action_id, type="ActionItem", dueDate=action.get("dueDate", "") or "")
136+
action_id = f"Action_{idx}_{aindex}_{action_index}"
137+
G.add_node(action_id, type="ActionItem", dueDate=action.get("dueDate", "") or "", label=action_text[:60])
119138
G.add_edge(agenda_id, action_id, relation="has_actionItem")
120139
assignee = action.get("assignee")
121140
if assignee:
122-
G.add_node(assignee, type="Person")
123-
G.add_edge(action_id, assignee, relation="assigned_to")
141+
G.add_node(str(assignee), type="Person", label=str(assignee))
142+
G.add_edge(action_id, str(assignee), relation="assigned_to")
124143

125144
# DecisionItems
126-
for decision in agenda.get("decisionItems", []):
145+
for decision_index, decision in enumerate(agenda.get("decisionItems", []), start=1):
127146
dec_text = decision.get("decision", "Unnamed Decision")
128-
dec_id = dec_text[:40] + "..."
147+
dec_id = f"Decision_{idx}_{aindex}_{decision_index}"
129148
G.add_node(dec_id, type="DecisionItem",
130149
effect=decision.get("effect"),
131-
rationale=decision.get("rationale"))
150+
rationale=decision.get("rationale"),
151+
label=dec_text[:60])
132152
G.add_edge(agenda_id, dec_id, relation="has_decisionItem")
133153

134154
# Tags & Emotions
135155
tags = safe_get(wg_data, ["tags"], {})
136156
for topic in tags.get("topicsCovered", "").split(","):
137157
topic = topic.strip()
138158
if topic:
139-
G.add_node(topic, type="Tag")
140-
G.add_edge(meeting_id, topic, relation="tagged_with")
159+
G.add_node(str(topic), type="Tag", label=str(topic))
160+
G.add_edge(meeting_node_id, str(topic), relation="tagged_with")
141161
for emotion in tags.get("emotions", "").split(","):
142162
emotion = emotion.strip()
143163
if emotion:
144-
G.add_node(emotion, type="Emotion")
145-
G.add_edge(meeting_id, emotion, relation="tagged_with")
164+
G.add_node(str(emotion), type="Emotion", label=str(emotion))
165+
G.add_edge(meeting_node_id, str(emotion), relation="tagged_with")
166+
167+
# Debug: counts BEFORE sanitization
168+
print("DEBUG BEFORE SANITIZATION -> node count:", len(G.nodes()), "edge count:", len(G.edges()))
169+
print("Sample nodes (first 50):", list(G.nodes())[:50])
146170

147171
# --- 4. (Optional) Inspect current invalid attributes BEFORE sanitization ---
148172
bad_before = find_invalid_attrs(G)
@@ -160,7 +184,7 @@ def find_invalid_attrs(G):
160184
san = sanitize_value(v)
161185
if san is not None:
162186
new_attrs[k] = san
163-
# Replace attributes atomically
187+
# Keep label attr if present to increase clarity in Gephi
164188
G.nodes[n].clear()
165189
G.nodes[n].update(new_attrs)
166190

@@ -185,10 +209,14 @@ def find_invalid_attrs(G):
185209
nx.write_gexf(G, output_gexf)
186210
print(f"✅ Graph exported to {output_gexf}")
187211

212+
# Final debug counts
213+
print("DEBUG AFTER SANITIZATION -> node count:", len(G.nodes()), "edge count:", len(G.edges()))
214+
print("Sample nodes (first 50):", list(G.nodes())[:50])
215+
188216
# --- 8. Optional: visualize quickly in Python ---
189217
plt.figure(figsize=(18, 12))
190218
pos = nx.spring_layout(G, seed=42)
191-
nx.draw(G, pos, with_labels=True, node_color="lightblue", node_size=1500, font_size=8, arrows=True)
219+
nx.draw(G, pos, with_labels=True, node_size=400, font_size=7, arrows=True)
192220
edge_labels = nx.get_edge_attributes(G, "relation")
193-
nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels, font_size=7)
221+
nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels, font_size=6)
194222
plt.show()

0 commit comments

Comments
 (0)