22import json
33import networkx as nx
44import matplotlib .pyplot as plt
5+ from collections import Counter , defaultdict
56
67# --- CONFIG ---
7- url = "https://raw.githubusercontent.com/SingularityNET-Archive/SingularityNET-Archive/refs/heads/main/Data/Snet-Ambassador-Program/Meeting-Summaries/2025/meeting-summaries-array.json" # Replace with your URL
8+ url = "https://raw.githubusercontent.com/SingularityNET-Archive/SingularityNET-Archive/refs/heads/main/Data/Snet-Ambassador-Program/Meeting-Summaries/2025/meeting-summaries-array.json"
89output_gexf = "all_workgroups_graph_sanitized.gexf"
910
1011# --- 1. Fetch remote JSON safely ---
2223else :
2324 raise Exception ("Unexpected JSON structure; expected dict or list" )
2425
25- # --- NEW: Count top-level values ---
26+ # Debug: top-level count
2627top_level_count = len (workgroups )
2728print (f"🔹 Number of top-level workgroup entries: { top_level_count } " )
2829
30+ # Quick check for repeated workgroup_id values (diagnostic)
31+ ids = [ (wg .get ("workgroup_id" ) or "" ) for wg in workgroups ]
32+ c = Counter (ids )
33+ print ("Top repeated explicit workgroup_id values (empty string means missing):" , c .most_common (10 ))
34+
2935# --- 2. Helper functions ---
3036def safe_get (d , keys , default = None ):
31- """Safely walk nested dict keys."""
3237 for key in keys :
3338 if isinstance (d , dict ) and key in d :
3439 d = d [key ]
@@ -37,25 +42,16 @@ def safe_get(d, keys, default=None):
3742 return d
3843
3944def sanitize_value (v ):
40- """
41- Return a GEXF-safe representation of v:
42- - keep str/int/float/bool
43- - convert lists/dicts/other -> json string
44- - drop None -> return None
45- """
4645 if v is None :
4746 return None
4847 if isinstance (v , (str , int , float , bool )):
4948 return v
5049 try :
51- # Prefer JSON representation for lists/dicts
5250 return json .dumps (v , ensure_ascii = False )
5351 except Exception :
54- # Fallback to string
5552 return str (v )
5653
5754def find_invalid_attrs (G ):
58- """Return list of (node_or_edge, attr_name, value_type) that are invalid for GEXF."""
5955 bad = []
6056 for n , attrs in G .nodes (data = True ):
6157 for k , v in attrs .items ():
@@ -70,79 +66,107 @@ def find_invalid_attrs(G):
7066# --- 3. Build the directed graph for all workgroups ---
7167G = nx .DiGraph ()
7268
73- for wg_data in workgroups :
74- workgroup = safe_get (wg_data , ["workgroup" ], "Unknown Workgroup" )
75- meeting_id = safe_get (wg_data , ["workgroup_id" ], f"MeetingID_{ workgroup } " )
69+ # Track seen meeting_ids to detect duplicates
70+ meeting_id_counts = Counter ()
71+ for idx , wg_data in enumerate (workgroups , start = 1 ):
72+ # Prefer explicit workgroup_id; fall back to unique generated id using index
73+ explicit_meeting_id = safe_get (wg_data , ["workgroup_id" ], None )
74+ workgroup_name = safe_get (wg_data , ["workgroup" ], "Unknown Workgroup" )
75+
76+ # ensure meeting_node_id is unique and string:
77+ if explicit_meeting_id :
78+ meeting_node_id = str (explicit_meeting_id )
79+ # Append index if this explicit id appears more than once
80+ if meeting_id_counts [meeting_node_id ] > 0 :
81+ meeting_node_id = f"{ meeting_node_id } __{ idx } "
82+ else :
83+ # No explicit meeting id -> generate one using workgroup + index to guarantee uniqueness
84+ meeting_node_id = f"Meeting_{ workgroup_name } _{ idx } "
85+
86+ meeting_id_counts [meeting_node_id ] += 1
87+
7688 meeting_info = safe_get (wg_data , ["meetingInfo" ], {})
7789
78- # Workgroup & Meeting
79- G .add_node (workgroup , type = "Workgroup" )
80- # Provide attributes using sanitize_value where appropriate (we'll sanitize later too)
81- G .add_node (meeting_id , type = "Meeting" ,
90+ # Make sure node ids are strings
91+ workgroup_node_id = str (workgroup_name )
92+ meeting_node_id = str (meeting_node_id )
93+
94+ # Workgroup & Meeting nodes
95+ G .add_node (workgroup_node_id , type = "Workgroup" , label = workgroup_node_id )
96+ G .add_node (meeting_node_id , type = "Meeting" ,
8297 date = meeting_info .get ("date" , "" ) or "" ,
83- typeOfMeeting = meeting_info .get ("typeOfMeeting" , "" ) or "" )
84- G .add_edge (workgroup , meeting_id , relation = "has_meeting" )
98+ typeOfMeeting = meeting_info .get ("typeOfMeeting" , "" ) or "" ,
99+ label = meeting_node_id )
100+ G .add_edge (workgroup_node_id , meeting_node_id , relation = "has_meeting" )
85101
86102 # Host & Documenter
87103 host = meeting_info .get ("host" , "Unknown Host" )
88104 documenter = meeting_info .get ("documenter" , "Unknown Documenter" )
89105 for person in [host , documenter ]:
90- G .add_node (person , type = "Person" )
91- G .add_edge (meeting_id , host , relation = "hosted_by" )
92- G .add_edge (meeting_id , documenter , relation = "documented_by" )
106+ if person :
107+ G .add_node (str (person ), type = "Person" , label = str (person ))
108+ G .add_edge (meeting_node_id , str (host ), relation = "hosted_by" )
109+ G .add_edge (meeting_node_id , str (documenter ), relation = "documented_by" )
93110
94111 # Attendees
95112 people_present = meeting_info .get ("peoplePresent" , "" )
96113 for person in [p .strip () for p in people_present .split ("," ) if p .strip ()]:
97- G .add_node (person , type = "Person" )
98- G .add_edge (meeting_id , person , relation = "attended_by" )
114+ G .add_node (str ( person ) , type = "Person" , label = str ( person ) )
115+ G .add_edge (meeting_node_id , str ( person ) , relation = "attended_by" )
99116
100117 # Working Docs
101118 for doc in meeting_info .get ("workingDocs" , []):
102119 title = doc .get ("title" , "Untitled Document" )
103120 link = doc .get ("link" , "" )
104- G .add_node (title , type = "Document" , link = link or "" )
105- G .add_edge (meeting_id , title , relation = "references_doc" )
121+ # ensure document node id is unique-ish by combining title+index
122+ doc_node_id = f"Doc_{ title } _{ idx } "
123+ G .add_node (str (doc_node_id ), type = "Document" , link = link or "" , label = title )
124+ G .add_edge (meeting_node_id , str (doc_node_id ), relation = "references_doc" )
106125
107126 # Agenda Items -> ActionItems & DecisionItems
108- for agenda in wg_data .get ("agendaItems" , []):
127+ for aindex , agenda in enumerate ( wg_data .get ("agendaItems" , []), start = 1 ):
109128 agenda_status = agenda .get ("status" , "unknown" )
110- agenda_id = f"Agenda_{ agenda_status } _{ meeting_id } "
111- G .add_node (agenda_id , type = "AgendaItem" , status = agenda_status )
112- G .add_edge (meeting_id , agenda_id , relation = "has_agenda" )
129+ agenda_id = f"Agenda_{ agenda_status } _{ idx } _ { aindex } "
130+ G .add_node (agenda_id , type = "AgendaItem" , status = agenda_status , label = agenda_id )
131+ G .add_edge (meeting_node_id , agenda_id , relation = "has_agenda" )
113132
114133 # ActionItems
115- for action in agenda .get ("actionItems" , []):
134+ for action_index , action in enumerate ( agenda .get ("actionItems" , []), start = 1 ):
116135 action_text = action .get ("text" , "Unnamed Action" )
117- action_id = action_text [: 40 ] + "... "
118- G .add_node (action_id , type = "ActionItem" , dueDate = action .get ("dueDate" , "" ) or "" )
136+ action_id = f"Action_ { idx } _ { aindex } _ { action_index } "
137+ G .add_node (action_id , type = "ActionItem" , dueDate = action .get ("dueDate" , "" ) or "" , label = action_text [: 60 ] )
119138 G .add_edge (agenda_id , action_id , relation = "has_actionItem" )
120139 assignee = action .get ("assignee" )
121140 if assignee :
122- G .add_node (assignee , type = "Person" )
123- G .add_edge (action_id , assignee , relation = "assigned_to" )
141+ G .add_node (str ( assignee ) , type = "Person" , label = str ( assignee ) )
142+ G .add_edge (action_id , str ( assignee ) , relation = "assigned_to" )
124143
125144 # DecisionItems
126- for decision in agenda .get ("decisionItems" , []):
145+ for decision_index , decision in enumerate ( agenda .get ("decisionItems" , []), start = 1 ):
127146 dec_text = decision .get ("decision" , "Unnamed Decision" )
128- dec_id = dec_text [: 40 ] + "... "
147+ dec_id = f"Decision_ { idx } _ { aindex } _ { decision_index } "
129148 G .add_node (dec_id , type = "DecisionItem" ,
130149 effect = decision .get ("effect" ),
131- rationale = decision .get ("rationale" ))
150+ rationale = decision .get ("rationale" ),
151+ label = dec_text [:60 ])
132152 G .add_edge (agenda_id , dec_id , relation = "has_decisionItem" )
133153
134154 # Tags & Emotions
135155 tags = safe_get (wg_data , ["tags" ], {})
136156 for topic in tags .get ("topicsCovered" , "" ).split ("," ):
137157 topic = topic .strip ()
138158 if topic :
139- G .add_node (topic , type = "Tag" )
140- G .add_edge (meeting_id , topic , relation = "tagged_with" )
159+ G .add_node (str ( topic ) , type = "Tag" , label = str ( topic ) )
160+ G .add_edge (meeting_node_id , str ( topic ) , relation = "tagged_with" )
141161 for emotion in tags .get ("emotions" , "" ).split ("," ):
142162 emotion = emotion .strip ()
143163 if emotion :
144- G .add_node (emotion , type = "Emotion" )
145- G .add_edge (meeting_id , emotion , relation = "tagged_with" )
164+ G .add_node (str (emotion ), type = "Emotion" , label = str (emotion ))
165+ G .add_edge (meeting_node_id , str (emotion ), relation = "tagged_with" )
166+
167+ # Debug: counts BEFORE sanitization
168+ print ("DEBUG BEFORE SANITIZATION -> node count:" , len (G .nodes ()), "edge count:" , len (G .edges ()))
169+ print ("Sample nodes (first 50):" , list (G .nodes ())[:50 ])
146170
147171# --- 4. (Optional) Inspect current invalid attributes BEFORE sanitization ---
148172bad_before = find_invalid_attrs (G )
@@ -160,7 +184,7 @@ def find_invalid_attrs(G):
160184 san = sanitize_value (v )
161185 if san is not None :
162186 new_attrs [k ] = san
163- # Replace attributes atomically
187+ # Keep label attr if present to increase clarity in Gephi
164188 G .nodes [n ].clear ()
165189 G .nodes [n ].update (new_attrs )
166190
@@ -185,10 +209,14 @@ def find_invalid_attrs(G):
185209 nx .write_gexf (G , output_gexf )
186210 print (f"✅ Graph exported to { output_gexf } " )
187211
212+ # Final debug counts
213+ print ("DEBUG AFTER SANITIZATION -> node count:" , len (G .nodes ()), "edge count:" , len (G .edges ()))
214+ print ("Sample nodes (first 50):" , list (G .nodes ())[:50 ])
215+
188216# --- 8. Optional: visualize quickly in Python ---
189217plt .figure (figsize = (18 , 12 ))
190218pos = nx .spring_layout (G , seed = 42 )
191- nx .draw (G , pos , with_labels = True , node_color = "lightblue" , node_size = 1500 , font_size = 8 , arrows = True )
219+ nx .draw (G , pos , with_labels = True , node_size = 400 , font_size = 7 , arrows = True )
192220edge_labels = nx .get_edge_attributes (G , "relation" )
193- nx .draw_networkx_edge_labels (G , pos , edge_labels = edge_labels , font_size = 7 )
221+ nx .draw_networkx_edge_labels (G , pos , edge_labels = edge_labels , font_size = 6 )
194222plt .show ()
0 commit comments