From a84ed6d10eaedfeba377e64a481953768fd5a314 Mon Sep 17 00:00:00 2001 From: lukezhang01 Date: Wed, 11 Feb 2026 00:51:07 -0500 Subject: [PATCH 1/6] add requirements and fixed compilation in data_gen --- code/data_gen.cpp | 44 +++++++++++++++++++++++++++++--------------- requirements.txt | 16 ++++++++++++++++ 2 files changed, 45 insertions(+), 15 deletions(-) create mode 100644 requirements.txt diff --git a/code/data_gen.cpp b/code/data_gen.cpp index 0d00122..b775e1a 100644 --- a/code/data_gen.cpp +++ b/code/data_gen.cpp @@ -361,7 +361,7 @@ namespace node_relations{ if (edges[node_stack[i+1]][node_stack[i]]==1) left_length+=1; else break; - for (int i = node_stack_pointer; i > 1; i++) + for (int i = node_stack_pointer; i > 1; i--) if (edges[node_stack[i-1]][node_stack[i]]==1) right_length+=1; else break; @@ -450,14 +450,16 @@ namespace json_output{ int len = x.size(); putchar('['); for (int i = 0;i > &x){ int len = x.size(); putchar('['); for (int i = 0;i1) putchar(','); + write(shuffleDag[flag_generated_graph[graph.reconstruct_graph[i]]].order_list[j]); + } + putchar(']'); + }printf("},"); printf("\"MEC_graph\":");jump_single_vector(graph.MEC_graph_idx); printf("\"pair_relations\":["); for (int i = 1; i<=n; i++){ + if (i>1) putchar(','); putchar('['); - for (int j=1;j<=n;j++)printf("%d,",graph.pair_relations[i][j]); - putchar(']');putchar(','); - }putchar(']'); - putchar('}');puts(""); + for (int j=1;j<=n;j++){ + if (j>1) putchar(','); + printf("%d",graph.pair_relations[i][j]); + } + putchar(']'); + }printf("],"); printf("\"pair_relations_count\":["); for (int i = 1; i<=n; i++){ + if (i>1) putchar(','); putchar('['); for (int j=1;j<=n;j++){ + if (j>1) putchar(','); putchar('['); - for (int k=0;k<7;k++) printf("%d,",graph.pair_relation_count[i][j][k]); - putchar(']');putchar(','); + for (int k=0;k<7;k++){ + if (k>0) putchar(','); + printf("%d",graph.pair_relation_count[i][j][k]); + } + putchar(']'); } - putchar(']');putchar(','); + putchar(']'); }putchar(']'); putchar('}');puts(""); } void jump_final_answer(){ - freopen("/Users/lvzhiheng/causal_relation_new_n=6.jsonl", "w", stdout); + freopen("causal_relation_new_n=5.jsonl", "w", stdout); for (int i = 1; i <= unique_DAG_num; i++){ jump_single_graph(uniqueDag[i]); } diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..08501dd --- /dev/null +++ b/requirements.txt @@ -0,0 +1,16 @@ +numpy +pandas +torch +transformers +scikit-learn +tqdm +torchmetrics +matplotlib +seaborn +nltk +openai +huggingface_hub +efficiency +shap +scipy +pytorch_transformers From ffd8bd6ddedbf86f006d6f9243a3f20c5b551293 Mon Sep 17 00:00:00 2001 From: lukezhang01 Date: Thu, 26 Mar 2026 23:47:54 -0400 Subject: [PATCH 2/6] data gen fix --- code/data_gen.cpp | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/code/data_gen.cpp b/code/data_gen.cpp index b775e1a..6f3e4c4 100644 --- a/code/data_gen.cpp +++ b/code/data_gen.cpp @@ -8,7 +8,7 @@ //#include "json.hpp" using namespace std; //using json = nlohmann::json; -const int n = 5; +const int n = 3; const int nn = n*(n-1)/2; const int max_n = 8; //const int total_graph_number = ; @@ -381,13 +381,19 @@ namespace node_relations{ } int count_relation(unique_DAG& graph_1, unique_DAG& graph_2, int* order_list){ + int inv_order[max_n + 1]; + for (int i = 1; i <= n; i++) + inv_order[order_list[i]] = i; + for (int i = 1; i <= n; i++) for (int j = 1; j <= n; j++) if (i != j) { - int new_x = order_list[i], new_y = order_list[j]; + int new_x = inv_order[i], new_y = inv_order[j]; for (int k = 0; k < 7; k++) { - if ((graph_2.pair_relations[new_x][new_y]>>k)&1) + if ((graph_2.pair_relations[new_x][new_y]>>k)&1){ //is the kth bit of graph_2.pair_relations[new_x][new_y] set to 1 graph_1.pair_relation_count[i][j][k]++; + } + } } } @@ -526,7 +532,8 @@ namespace json_output{ putchar('}');puts(""); } void jump_final_answer(){ - freopen("causal_relation_new_n=5.jsonl", "w", stdout); + std::string filename = "causal_relation_new_n=" + std::to_string(n) + ".jsonl"; + freopen(filename.c_str(), "w", stdout); for (int i = 1; i <= unique_DAG_num; i++){ jump_single_graph(uniqueDag[i]); } From 7c2711e34c77d97ec375b6da5089d476ef57dde9 Mon Sep 17 00:00:00 2001 From: lukezhang01 Date: Fri, 3 Apr 2026 11:15:56 -0400 Subject: [PATCH 3/6] easier to read changes --- code/data_gen.cpp | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/code/data_gen.cpp b/code/data_gen.cpp index 6f3e4c4..45a0e75 100644 --- a/code/data_gen.cpp +++ b/code/data_gen.cpp @@ -8,7 +8,7 @@ //#include "json.hpp" using namespace std; //using json = nlohmann::json; -const int n = 3; +const int n = 4; const int nn = n*(n-1)/2; const int max_n = 8; //const int total_graph_number = ; @@ -381,17 +381,13 @@ namespace node_relations{ } int count_relation(unique_DAG& graph_1, unique_DAG& graph_2, int* order_list){ - int inv_order[max_n + 1]; - for (int i = 1; i <= n; i++) - inv_order[order_list[i]] = i; - for (int i = 1; i <= n; i++) for (int j = 1; j <= n; j++) if (i != j) { - int new_x = inv_order[i], new_y = inv_order[j]; + int new_x = order_list[i], new_y = order_list[j]; for (int k = 0; k < 7; k++) { - if ((graph_2.pair_relations[new_x][new_y]>>k)&1){ //is the kth bit of graph_2.pair_relations[new_x][new_y] set to 1 - graph_1.pair_relation_count[i][j][k]++; + if ((graph_2.pair_relations[i][j]>>k)&1){ //is the kth bit of graph_2.pair_relations[new_x][new_y] set to 1 + graph_1.pair_relation_count[new_x][new_y][k]++; } } From 1700ca0ed3d46fcb98bb06bf96d6dc7a9540076f Mon Sep 17 00:00:00 2001 From: lukezhang01 Date: Tue, 7 Apr 2026 11:18:36 -0400 Subject: [PATCH 4/6] oob memory fix for n=6 --- code/data_gen.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/code/data_gen.cpp b/code/data_gen.cpp index 45a0e75..1f7ebd7 100644 --- a/code/data_gen.cpp +++ b/code/data_gen.cpp @@ -8,13 +8,13 @@ //#include "json.hpp" using namespace std; //using json = nlohmann::json; -const int n = 4; +const int n = 6; const int nn = n*(n-1)/2; const int max_n = 8; //const int total_graph_number = ; //const int total_order_number = ; -int power_3[10]; +int power_3[max_n * (max_n - 1) / 2]; int flag[1000010]; bool f[1000010]; int mp[100][100]; @@ -36,8 +36,8 @@ int unique_DAG_num = 0; struct Shuffle_DAG{ int *order_list; int graph_idx;//是下表 -}shuffleDag[1000010]; -int flag_generated_graph[1000010] = {}, number_shuffleDag = 0;// If the graph is already generated, then skip it. +}shuffleDag[15000010]; +int flag_generated_graph[15000010] = {}, number_shuffleDag = 0;// If the graph is already generated, then skip it. //using namespace std; namespace d_seperate{ inline ll read(){ @@ -537,7 +537,7 @@ namespace json_output{ } int main() { power_3[0]=1; - for (int i=1;i<=9;i++)power_3[i]=power_3[i-1]*3; + for (int i=1;i Date: Thu, 16 Apr 2026 18:47:53 -0400 Subject: [PATCH 5/6] removed misunderstandings and useless changes --- code/data_gen.cpp | 64 +++++++++++++++++------------------------- code/data_verbalize.py | 2 +- 2 files changed, 27 insertions(+), 39 deletions(-) diff --git a/code/data_gen.cpp b/code/data_gen.cpp index 1f7ebd7..d9f821d 100644 --- a/code/data_gen.cpp +++ b/code/data_gen.cpp @@ -5,16 +5,17 @@ #include #include #include +#include //#include "json.hpp" using namespace std; //using json = nlohmann::json; -const int n = 6; -const int nn = n*(n-1)/2; +int n = 5; +int nn; const int max_n = 8; //const int total_graph_number = ; //const int total_order_number = ; -int power_3[max_n * (max_n - 1) / 2]; +int power_3[max_n*(max_n-1)/2]; int flag[1000010]; bool f[1000010]; int mp[100][100]; @@ -361,7 +362,7 @@ namespace node_relations{ if (edges[node_stack[i+1]][node_stack[i]]==1) left_length+=1; else break; - for (int i = node_stack_pointer; i > 1; i--) + for (int i = node_stack_pointer; i > 1; i++) if (edges[node_stack[i-1]][node_stack[i]]==1) right_length+=1; else break; @@ -386,10 +387,8 @@ namespace node_relations{ if (i != j) { int new_x = order_list[i], new_y = order_list[j]; for (int k = 0; k < 7; k++) { - if ((graph_2.pair_relations[i][j]>>k)&1){ //is the kth bit of graph_2.pair_relations[new_x][new_y] set to 1 + if ((graph_2.pair_relations[i][j]>>k)&1) graph_1.pair_relation_count[new_x][new_y][k]++; - } - } } } @@ -452,16 +451,14 @@ namespace json_output{ int len = x.size(); putchar('['); for (int i = 0;i > &x){ int len = x.size(); putchar('['); for (int i = 0;i1) putchar(','); - write(shuffleDag[flag_generated_graph[graph.reconstruct_graph[i]]].order_list[j]); - } - putchar(']'); - }printf("},"); + printf("%d:[", graph.reconstruct_graph[i]); + for (int j = 1; j<=n; j++) write(shuffleDag[flag_generated_graph[graph.reconstruct_graph[i]]].order_list[j]), putchar(','); + printf("],"); + }putchar('}'); printf("\"MEC_graph\":");jump_single_vector(graph.MEC_graph_idx); printf("\"pair_relations\":["); for (int i = 1; i<=n; i++){ - if (i>1) putchar(','); putchar('['); - for (int j=1;j<=n;j++){ - if (j>1) putchar(','); - printf("%d",graph.pair_relations[i][j]); - } - putchar(']'); - }printf("],"); + for (int j=1;j<=n;j++)printf("%d,",graph.pair_relations[i][j]); + putchar(']');putchar(','); + }putchar(']'); + putchar('}');puts(""); printf("\"pair_relations_count\":["); for (int i = 1; i<=n; i++){ - if (i>1) putchar(','); putchar('['); for (int j=1;j<=n;j++){ - if (j>1) putchar(','); putchar('['); - for (int k=0;k<7;k++){ - if (k>0) putchar(','); - printf("%d",graph.pair_relation_count[i][j][k]); - } - putchar(']'); + for (int k=0;k<7;k++) printf("%d,",graph.pair_relation_count[i][j][k]); + putchar(']');putchar(','); } - putchar(']'); + putchar(']');putchar(','); }putchar(']'); putchar('}');puts(""); } void jump_final_answer(){ - std::string filename = "causal_relation_new_n=" + std::to_string(n) + ".jsonl"; - freopen(filename.c_str(), "w", stdout); + char filename[256]; + sprintf(filename, "../data/causal_relation_new_n=%d.jsonl", n); + freopen(filename, "w", stdout); for (int i = 1; i <= unique_DAG_num; i++){ jump_single_graph(uniqueDag[i]); } } } -int main() { +int main(int argc, char* argv[]) { + if (argc > 1) n = atoi(argv[1]); + nn = n*(n-1)/2; power_3[0]=1; - for (int i=1;i Date: Thu, 16 Apr 2026 18:50:54 -0400 Subject: [PATCH 6/6] Reset code/data_verbalize.py to previous version --- code/data_verbalize.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/code/data_verbalize.py b/code/data_verbalize.py index a3bacb3..32571ca 100644 --- a/code/data_verbalize.py +++ b/code/data_verbalize.py @@ -9,7 +9,7 @@ class Constants: file_causal_relation = root + '/causal_relation_new_n={num_nodes}.jsonl' file_out_template_var1_var2 = root + '/causalnli_{num_nodes}nodes_var1_var2.json' - variable_refactor = False + variable_refactor = True data_folder_suffix = '_from_Z' if variable_refactor else '' data_folder = f'{root}/data_3class{data_folder_suffix}/'