|
| 1 | +import requests |
| 2 | +import json |
| 3 | +import networkx as nx |
| 4 | +import matplotlib.pyplot as plt |
| 5 | + |
| 6 | +# --- CONFIG --- |
| 7 | +url = url = "https://raw.githubusercontent.com/SingularityNET-Archive/SingularityNET-Archive/refs/heads/main/Data/Snet-Ambassador-Program/Meeting-Summaries/2025/meeting-summaries-array.json" # Replace with your URL |
| 8 | +output_gexf = "all_workgroups_graph_sanitized.gexf" |
| 9 | + |
| 10 | +# --- 1. Fetch remote JSON safely --- |
| 11 | +response = requests.get(url) |
| 12 | +if response.status_code != 200: |
| 13 | + raise Exception(f"Failed to fetch JSON. Status code: {response.status_code}") |
| 14 | + |
| 15 | +data = response.json() |
| 16 | + |
| 17 | +# Normalize to list of workgroups |
| 18 | +if isinstance(data, dict): |
| 19 | + workgroups = [data] |
| 20 | +elif isinstance(data, list): |
| 21 | + workgroups = data |
| 22 | +else: |
| 23 | + raise Exception("Unexpected JSON structure; expected dict or list") |
| 24 | + |
| 25 | +# --- 2. Helper functions --- |
| 26 | +def safe_get(d, keys, default=None): |
| 27 | + """Safely walk nested dict keys.""" |
| 28 | + for key in keys: |
| 29 | + if isinstance(d, dict) and key in d: |
| 30 | + d = d[key] |
| 31 | + else: |
| 32 | + return default |
| 33 | + return d |
| 34 | + |
| 35 | +def sanitize_value(v): |
| 36 | + """ |
| 37 | + Return a GEXF-safe representation of v: |
| 38 | + - keep str/int/float/bool |
| 39 | + - convert lists/dicts/other -> json string |
| 40 | + - drop None -> return None |
| 41 | + """ |
| 42 | + if v is None: |
| 43 | + return None |
| 44 | + if isinstance(v, (str, int, float, bool)): |
| 45 | + return v |
| 46 | + try: |
| 47 | + # Prefer JSON representation for lists/dicts |
| 48 | + return json.dumps(v, ensure_ascii=False) |
| 49 | + except Exception: |
| 50 | + # Fallback to string |
| 51 | + return str(v) |
| 52 | + |
| 53 | +def find_invalid_attrs(G): |
| 54 | + """Return list of (node_or_edge, attr_name, value_type) that are invalid for GEXF.""" |
| 55 | + bad = [] |
| 56 | + for n, attrs in G.nodes(data=True): |
| 57 | + for k, v in attrs.items(): |
| 58 | + if v is None or not isinstance(v, (str, int, float, bool)): |
| 59 | + bad.append(("node", n, k, type(v).__name__)) |
| 60 | + for u, v, attrs in G.edges(data=True): |
| 61 | + for k, val in attrs.items(): |
| 62 | + if val is None or not isinstance(val, (str, int, float, bool)): |
| 63 | + bad.append(("edge", (u, v), k, type(val).__name__)) |
| 64 | + return bad |
| 65 | + |
| 66 | +# --- 3. Build the directed graph for all workgroups --- |
| 67 | +G = nx.DiGraph() |
| 68 | + |
| 69 | +for wg_data in workgroups: |
| 70 | + workgroup = safe_get(wg_data, ["workgroup"], "Unknown Workgroup") |
| 71 | + meeting_id = safe_get(wg_data, ["workgroup_id"], f"MeetingID_{workgroup}") |
| 72 | + meeting_info = safe_get(wg_data, ["meetingInfo"], {}) |
| 73 | + |
| 74 | + # Workgroup & Meeting |
| 75 | + G.add_node(workgroup, type="Workgroup") |
| 76 | + # Provide attributes using sanitize_value where appropriate (we'll sanitize later too) |
| 77 | + G.add_node(meeting_id, type="Meeting", |
| 78 | + date=meeting_info.get("date", "") or "", |
| 79 | + typeOfMeeting=meeting_info.get("typeOfMeeting", "") or "") |
| 80 | + G.add_edge(workgroup, meeting_id, relation="has_meeting") |
| 81 | + |
| 82 | + # Host & Documenter |
| 83 | + host = meeting_info.get("host", "Unknown Host") |
| 84 | + documenter = meeting_info.get("documenter", "Unknown Documenter") |
| 85 | + for person in [host, documenter]: |
| 86 | + G.add_node(person, type="Person") |
| 87 | + G.add_edge(meeting_id, host, relation="hosted_by") |
| 88 | + G.add_edge(meeting_id, documenter, relation="documented_by") |
| 89 | + |
| 90 | + # Attendees |
| 91 | + people_present = meeting_info.get("peoplePresent", "") |
| 92 | + for person in [p.strip() for p in people_present.split(",") if p.strip()]: |
| 93 | + G.add_node(person, type="Person") |
| 94 | + G.add_edge(meeting_id, person, relation="attended_by") |
| 95 | + |
| 96 | + # Working Docs |
| 97 | + for doc in meeting_info.get("workingDocs", []): |
| 98 | + title = doc.get("title", "Untitled Document") |
| 99 | + link = doc.get("link", "") |
| 100 | + G.add_node(title, type="Document", link=link or "") |
| 101 | + G.add_edge(meeting_id, title, relation="references_doc") |
| 102 | + |
| 103 | + # Agenda Items -> ActionItems & DecisionItems |
| 104 | + for agenda in wg_data.get("agendaItems", []): |
| 105 | + agenda_status = agenda.get("status", "unknown") |
| 106 | + agenda_id = f"Agenda_{agenda_status}_{meeting_id}" |
| 107 | + G.add_node(agenda_id, type="AgendaItem", status=agenda_status) |
| 108 | + G.add_edge(meeting_id, agenda_id, relation="has_agenda") |
| 109 | + |
| 110 | + # ActionItems |
| 111 | + for action in agenda.get("actionItems", []): |
| 112 | + action_text = action.get("text", "Unnamed Action") |
| 113 | + action_id = action_text[:40] + "..." |
| 114 | + G.add_node(action_id, type="ActionItem", dueDate=action.get("dueDate", "") or "") |
| 115 | + G.add_edge(agenda_id, action_id, relation="has_actionItem") |
| 116 | + assignee = action.get("assignee") |
| 117 | + if assignee: |
| 118 | + G.add_node(assignee, type="Person") |
| 119 | + G.add_edge(action_id, assignee, relation="assigned_to") |
| 120 | + |
| 121 | + # DecisionItems |
| 122 | + for decision in agenda.get("decisionItems", []): |
| 123 | + dec_text = decision.get("decision", "Unnamed Decision") |
| 124 | + dec_id = dec_text[:40] + "..." |
| 125 | + G.add_node(dec_id, type="DecisionItem", |
| 126 | + effect=decision.get("effect"), |
| 127 | + rationale=decision.get("rationale")) |
| 128 | + G.add_edge(agenda_id, dec_id, relation="has_decisionItem") |
| 129 | + |
| 130 | + # Tags & Emotions |
| 131 | + tags = safe_get(wg_data, ["tags"], {}) |
| 132 | + for topic in tags.get("topicsCovered", "").split(","): |
| 133 | + topic = topic.strip() |
| 134 | + if topic: |
| 135 | + G.add_node(topic, type="Tag") |
| 136 | + G.add_edge(meeting_id, topic, relation="tagged_with") |
| 137 | + for emotion in tags.get("emotions", "").split(","): |
| 138 | + emotion = emotion.strip() |
| 139 | + if emotion: |
| 140 | + G.add_node(emotion, type="Emotion") |
| 141 | + G.add_edge(meeting_id, emotion, relation="tagged_with") |
| 142 | + |
| 143 | +# --- 4. (Optional) Inspect current invalid attributes BEFORE sanitization --- |
| 144 | +bad_before = find_invalid_attrs(G) |
| 145 | +if bad_before: |
| 146 | + print("Found attributes with potentially invalid types before sanitization (showing up to 20):") |
| 147 | + for item in bad_before[:20]: |
| 148 | + print(item) |
| 149 | +else: |
| 150 | + print("No obviously invalid attributes detected before sanitization.") |
| 151 | + |
| 152 | +# --- 5. Sanitize node attributes --- |
| 153 | +for n, attrs in list(G.nodes(data=True)): |
| 154 | + new_attrs = {} |
| 155 | + for k, v in attrs.items(): |
| 156 | + san = sanitize_value(v) |
| 157 | + if san is not None: |
| 158 | + new_attrs[k] = san |
| 159 | + # Replace attributes atomically |
| 160 | + G.nodes[n].clear() |
| 161 | + G.nodes[n].update(new_attrs) |
| 162 | + |
| 163 | +# --- 6. Sanitize edge attributes --- |
| 164 | +for u, v, attrs in list(G.edges(data=True)): |
| 165 | + new_attrs = {} |
| 166 | + for k, val in attrs.items(): |
| 167 | + san = sanitize_value(val) |
| 168 | + if san is not None: |
| 169 | + new_attrs[k] = san |
| 170 | + G[u][v].clear() |
| 171 | + G[u][v].update(new_attrs) |
| 172 | + |
| 173 | +# --- 7. Final check and write GEXF --- |
| 174 | +bad_after = find_invalid_attrs(G) |
| 175 | +if bad_after: |
| 176 | + print("⚠ After sanitization, still some problematic attributes (should not happen):") |
| 177 | + for item in bad_after: |
| 178 | + print(item) |
| 179 | + raise Exception("Graph still contains invalid attribute types for GEXF export.") |
| 180 | +else: |
| 181 | + nx.write_gexf(G, output_gexf) |
| 182 | + print(f"✅ Graph exported to {output_gexf}") |
| 183 | + |
| 184 | +# --- 8. Optional: visualize quickly in Python --- |
| 185 | +plt.figure(figsize=(18, 12)) |
| 186 | +pos = nx.spring_layout(G, seed=42) |
| 187 | +nx.draw(G, pos, with_labels=True, node_color="lightblue", node_size=1500, font_size=8, arrows=True) |
| 188 | +edge_labels = nx.get_edge_attributes(G, "relation") |
| 189 | +nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels, font_size=7) |
| 190 | +plt.show() |
0 commit comments