Skip to content

Commit cdca598

Browse files
committed
GEXF-export script
1 parent d014f9d commit cdca598

File tree

2 files changed

+10246
-0
lines changed

2 files changed

+10246
-0
lines changed

GEXF-export.py

Lines changed: 190 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,190 @@
1+
import requests
2+
import json
3+
import networkx as nx
4+
import matplotlib.pyplot as plt
5+
6+
# --- CONFIG ---
7+
url = url = "https://raw.githubusercontent.com/SingularityNET-Archive/SingularityNET-Archive/refs/heads/main/Data/Snet-Ambassador-Program/Meeting-Summaries/2025/meeting-summaries-array.json" # Replace with your URL
8+
output_gexf = "all_workgroups_graph_sanitized.gexf"
9+
10+
# --- 1. Fetch remote JSON safely ---
11+
response = requests.get(url)
12+
if response.status_code != 200:
13+
raise Exception(f"Failed to fetch JSON. Status code: {response.status_code}")
14+
15+
data = response.json()
16+
17+
# Normalize to list of workgroups
18+
if isinstance(data, dict):
19+
workgroups = [data]
20+
elif isinstance(data, list):
21+
workgroups = data
22+
else:
23+
raise Exception("Unexpected JSON structure; expected dict or list")
24+
25+
# --- 2. Helper functions ---
26+
def safe_get(d, keys, default=None):
27+
"""Safely walk nested dict keys."""
28+
for key in keys:
29+
if isinstance(d, dict) and key in d:
30+
d = d[key]
31+
else:
32+
return default
33+
return d
34+
35+
def sanitize_value(v):
36+
"""
37+
Return a GEXF-safe representation of v:
38+
- keep str/int/float/bool
39+
- convert lists/dicts/other -> json string
40+
- drop None -> return None
41+
"""
42+
if v is None:
43+
return None
44+
if isinstance(v, (str, int, float, bool)):
45+
return v
46+
try:
47+
# Prefer JSON representation for lists/dicts
48+
return json.dumps(v, ensure_ascii=False)
49+
except Exception:
50+
# Fallback to string
51+
return str(v)
52+
53+
def find_invalid_attrs(G):
54+
"""Return list of (node_or_edge, attr_name, value_type) that are invalid for GEXF."""
55+
bad = []
56+
for n, attrs in G.nodes(data=True):
57+
for k, v in attrs.items():
58+
if v is None or not isinstance(v, (str, int, float, bool)):
59+
bad.append(("node", n, k, type(v).__name__))
60+
for u, v, attrs in G.edges(data=True):
61+
for k, val in attrs.items():
62+
if val is None or not isinstance(val, (str, int, float, bool)):
63+
bad.append(("edge", (u, v), k, type(val).__name__))
64+
return bad
65+
66+
# --- 3. Build the directed graph for all workgroups ---
67+
G = nx.DiGraph()
68+
69+
for wg_data in workgroups:
70+
workgroup = safe_get(wg_data, ["workgroup"], "Unknown Workgroup")
71+
meeting_id = safe_get(wg_data, ["workgroup_id"], f"MeetingID_{workgroup}")
72+
meeting_info = safe_get(wg_data, ["meetingInfo"], {})
73+
74+
# Workgroup & Meeting
75+
G.add_node(workgroup, type="Workgroup")
76+
# Provide attributes using sanitize_value where appropriate (we'll sanitize later too)
77+
G.add_node(meeting_id, type="Meeting",
78+
date=meeting_info.get("date", "") or "",
79+
typeOfMeeting=meeting_info.get("typeOfMeeting", "") or "")
80+
G.add_edge(workgroup, meeting_id, relation="has_meeting")
81+
82+
# Host & Documenter
83+
host = meeting_info.get("host", "Unknown Host")
84+
documenter = meeting_info.get("documenter", "Unknown Documenter")
85+
for person in [host, documenter]:
86+
G.add_node(person, type="Person")
87+
G.add_edge(meeting_id, host, relation="hosted_by")
88+
G.add_edge(meeting_id, documenter, relation="documented_by")
89+
90+
# Attendees
91+
people_present = meeting_info.get("peoplePresent", "")
92+
for person in [p.strip() for p in people_present.split(",") if p.strip()]:
93+
G.add_node(person, type="Person")
94+
G.add_edge(meeting_id, person, relation="attended_by")
95+
96+
# Working Docs
97+
for doc in meeting_info.get("workingDocs", []):
98+
title = doc.get("title", "Untitled Document")
99+
link = doc.get("link", "")
100+
G.add_node(title, type="Document", link=link or "")
101+
G.add_edge(meeting_id, title, relation="references_doc")
102+
103+
# Agenda Items -> ActionItems & DecisionItems
104+
for agenda in wg_data.get("agendaItems", []):
105+
agenda_status = agenda.get("status", "unknown")
106+
agenda_id = f"Agenda_{agenda_status}_{meeting_id}"
107+
G.add_node(agenda_id, type="AgendaItem", status=agenda_status)
108+
G.add_edge(meeting_id, agenda_id, relation="has_agenda")
109+
110+
# ActionItems
111+
for action in agenda.get("actionItems", []):
112+
action_text = action.get("text", "Unnamed Action")
113+
action_id = action_text[:40] + "..."
114+
G.add_node(action_id, type="ActionItem", dueDate=action.get("dueDate", "") or "")
115+
G.add_edge(agenda_id, action_id, relation="has_actionItem")
116+
assignee = action.get("assignee")
117+
if assignee:
118+
G.add_node(assignee, type="Person")
119+
G.add_edge(action_id, assignee, relation="assigned_to")
120+
121+
# DecisionItems
122+
for decision in agenda.get("decisionItems", []):
123+
dec_text = decision.get("decision", "Unnamed Decision")
124+
dec_id = dec_text[:40] + "..."
125+
G.add_node(dec_id, type="DecisionItem",
126+
effect=decision.get("effect"),
127+
rationale=decision.get("rationale"))
128+
G.add_edge(agenda_id, dec_id, relation="has_decisionItem")
129+
130+
# Tags & Emotions
131+
tags = safe_get(wg_data, ["tags"], {})
132+
for topic in tags.get("topicsCovered", "").split(","):
133+
topic = topic.strip()
134+
if topic:
135+
G.add_node(topic, type="Tag")
136+
G.add_edge(meeting_id, topic, relation="tagged_with")
137+
for emotion in tags.get("emotions", "").split(","):
138+
emotion = emotion.strip()
139+
if emotion:
140+
G.add_node(emotion, type="Emotion")
141+
G.add_edge(meeting_id, emotion, relation="tagged_with")
142+
143+
# --- 4. (Optional) Inspect current invalid attributes BEFORE sanitization ---
144+
bad_before = find_invalid_attrs(G)
145+
if bad_before:
146+
print("Found attributes with potentially invalid types before sanitization (showing up to 20):")
147+
for item in bad_before[:20]:
148+
print(item)
149+
else:
150+
print("No obviously invalid attributes detected before sanitization.")
151+
152+
# --- 5. Sanitize node attributes ---
153+
for n, attrs in list(G.nodes(data=True)):
154+
new_attrs = {}
155+
for k, v in attrs.items():
156+
san = sanitize_value(v)
157+
if san is not None:
158+
new_attrs[k] = san
159+
# Replace attributes atomically
160+
G.nodes[n].clear()
161+
G.nodes[n].update(new_attrs)
162+
163+
# --- 6. Sanitize edge attributes ---
164+
for u, v, attrs in list(G.edges(data=True)):
165+
new_attrs = {}
166+
for k, val in attrs.items():
167+
san = sanitize_value(val)
168+
if san is not None:
169+
new_attrs[k] = san
170+
G[u][v].clear()
171+
G[u][v].update(new_attrs)
172+
173+
# --- 7. Final check and write GEXF ---
174+
bad_after = find_invalid_attrs(G)
175+
if bad_after:
176+
print("⚠ After sanitization, still some problematic attributes (should not happen):")
177+
for item in bad_after:
178+
print(item)
179+
raise Exception("Graph still contains invalid attribute types for GEXF export.")
180+
else:
181+
nx.write_gexf(G, output_gexf)
182+
print(f"✅ Graph exported to {output_gexf}")
183+
184+
# --- 8. Optional: visualize quickly in Python ---
185+
plt.figure(figsize=(18, 12))
186+
pos = nx.spring_layout(G, seed=42)
187+
nx.draw(G, pos, with_labels=True, node_color="lightblue", node_size=1500, font_size=8, arrows=True)
188+
edge_labels = nx.get_edge_attributes(G, "relation")
189+
nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels, font_size=7)
190+
plt.show()

0 commit comments

Comments
 (0)