-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgen_comp_data.py
More file actions
145 lines (123 loc) · 5.58 KB
/
gen_comp_data.py
File metadata and controls
145 lines (123 loc) · 5.58 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
from collections import defaultdict, OrderedDict
import math
import json
import networkx as nx
import numpy as np
import sim_lib.util as util
import sim_lib.attr_lib.util as alu
from sim_lib.attr_lib.formation import *
# Overall parameters
save_to = 'data/simplify_comparisons.json'
_N = 36
iter_count = 16
num_runs = 20
params = {
'context_count' : 2, # Needed for simple utility
'k' : 1, # Needed for simple attribute utility
'edge_selection' : alu.seq_projection_edge_edit,
'seed_type' : 'grid', # Type of seed network
'max_clique_size' : 5,
'revelation_proposals' : alu.resistance_distance_revelation,
'util_agg' : lambda a, s, c: a + s, # How to aggregate utility values
'vtx_types' :
{
'type1' : { 'likelihood' : 0.5,
'struct_util' : None,
'init_attrs' : alu.init_cont_heterophily, # context 1
'edge_attr_util' : None,
'total_attr_util' : None,
'color' : 'blue' },
'type0' : { 'likelihood' : 0.5,
'struct_util' : None,
'init_attrs' : alu.init_cont_homophily, # context 0
'edge_attr_util' : None,
'total_attr_util' : None,
'color' : 'red' }
}
}
type1_count = math.floor(_N * params['vtx_types']['type1']['likelihood'])
vtx_types_list = ['type1'] * type1_count + ['type0'] * (_N - type1_count)
np.random.shuffle(vtx_types_list)
params['type_assignment'] = { i : vtx_types_list[i] for i in range(_N) }
assert sum([ t['likelihood'] for t in params['vtx_types'].values() ]) == 1.0
# Data collection functions
def get_component_sizes(G_comps):
comp_sizes = {}
for gc in G_comps:
gc_size = gc.number_of_nodes()
if gc_size in comp_sizes:
comp_sizes[gc_size] += 1
else:
comp_sizes[gc_size] = 1
return comp_sizes
def get_clique_sizes(G_comps):
clique_sizes = {}
for gc in G_comps:
gc_size = gc.number_of_nodes()
is_clique = gc.number_of_edges() == (gc_size * (gc_size - 1) / 2)
if not is_clique:
continue
if gc_size in clique_sizes:
clique_sizes[gc_size] += 1
else:
clique_sizes[gc_size] = 1
return clique_sizes
# Run simulation
# Parameters
similarity_funcs = list(alu.gen_similarity_funcs())
attr_func_named = list(zip(similarity_funcs, ['homophily', 'heterophily']))
theta_values = [0.0, 0.25, 0.5, 0.75, 1.0][::-1]
struct_funcs = [alu.average_neighborhood_overlap]
struct_func_named = list(zip(struct_funcs, ['embedded']))
seed_types = ['trivial']
# Prepare json data
sim_results = {}
for theta in theta_values:
sim_results[theta] = {}
for idx, (sim_func, af_name) in enumerate(attr_func_named):
sim_results[theta][af_name] = {}
attr_func = alu.gen_schelling_seg_funcs(theta, 'satisfice')[idx]
for struct_func, sf_name in struct_func_named:
sim_results[theta][af_name][sf_name] = {}
for vtype in ['type0', 'type1']:
params['vtx_types'][vtype]['struct_util'] = struct_func
params['vtx_types'][vtype]['edge_attr_util'] = sim_func
params['vtx_types'][vtype]['total_attr_util'] = attr_func
for seed in seed_types:
params['seed_type'] = seed
sim_results[theta][af_name][sf_name][seed] = {}
cur_setting_dict = sim_results[theta][af_name][sf_name][seed]
cur_setting_dict['component'] = []
cur_setting_dict['clique'] = []
run_comp_sizes = []
run_clique_sizes = []
for _ in range(num_runs):
comp_counts = []
clique_counts = []
G = attribute_network(_N, params)
for it in range(iter_count):
# Iterate simulation
iteration_metadata = calc_edges(G)
# Record values
G_nx = alu.graph_to_nx(G)
G_nx_comp_nodes = nx.algorithms.components.connected_components(G_nx)
G_nx_comps = [ G_nx.subgraph(c).copy() for c in G_nx_comp_nodes ]
comp_counts.append(get_component_sizes(G_nx_comps))
clique_counts.append(get_clique_sizes(G_nx_comps))
run_comp_sizes.append(comp_counts)
run_clique_sizes.append(clique_counts)
# Get averages across different runs per iteration
for it in range(iter_count):
total_comp_counts = defaultdict(lambda : 0)
total_clique_counts = defaultdict(lambda : 0)
for r in range(num_runs):
for size, count in run_comp_sizes[r][it].items():
total_comp_counts[size] += count
for size, count in run_clique_sizes[r][it].items():
total_clique_counts[size] += count
comp_avgs = { size : count / num_runs for size, count in total_comp_counts.items() }
clique_avgs = { size : count / num_runs for size, count in total_clique_counts.items() }
cur_setting_dict['component'].append(comp_avgs)
cur_setting_dict['clique'].append(clique_avgs)
with open(save_to, 'w+') as sf:
sf.write(json.dumps(sim_results))