forked from CamillaCzapla/Two_Sample_MR
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrun_TwoSampleMR_once.R
More file actions
476 lines (438 loc) · 26.6 KB
/
run_TwoSampleMR_once.R
File metadata and controls
476 lines (438 loc) · 26.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
##running TwoSampleMR for one phenotype of interest
setwd('/home/camilla/TwoSampleMR/MiBioGen_Summary_Statistics')
suppressMessages(library(dplyr))
suppressMessages(library(stringr))
suppressMessages(library(data.table))
#suppressMessages(library(argparse))
suppressMessages(library(TwoSampleMR))
suppressMessages(library(ieugwasr))
'%&%' = function(a,b) paste (a,b,sep='')
#dictionary of study accession number to microbiome phenotype
microbiome_phenotypes <- c(
"GCST90016908" = "Gut microbiota abundance (class Actinobacteria id.419)",
"GCST90016909" = "Gut microbiota abundance (class Alphaproteobacteria id.2379)",
"GCST90016910" = "Gut microbiota abundance (class Bacilli id.1673)",
"GCST90016911" = "Gut microbiota abundance (class Bacteroidia id.912)",
"GCST90016912" = "Gut microbiota abundance (class Betaproteobacteria id.2867)",
"GCST90016913" = "Gut microbiota abundance (class Clostridia id.1859)",
"GCST90016914" = "Gut microbiota abundance (class Coriobacteriia id.809)",
"GCST90016915" = "Gut microbiota abundance (class Deltaproteobacteria id.3087)",
"GCST90016916" = "Gut microbiota abundance (class Erysipelotrichia id.2147)",
"GCST90016917" = "Gut microbiota abundance (class Gammaproteobacteria id.3303)",
"GCST90016918" = "Gut microbiota abundance (class Lentisphaeria id.2250)",
"GCST90016919" = "Gut microbiota abundance (class Melainabacteria id.1589)",
"GCST90016920" = "Gut microbiota abundance (class Methanobacteria id.119)",
"GCST90016921" = "Gut microbiota abundance (class Mollicutes id.3920)",
"GCST90016922" = "Gut microbiota abundance (class Negativicutes id.2164)",
"GCST90016923" = "Gut microbiota abundance (class Verrucomicrobiae id.4029)",
"GCST90016924" = "Gut microbiota abundance (family Acidaminococcaceae id.2166)",
"GCST90016925" = "Gut microbiota abundance (family Actinomycetaceae id.421)",
"GCST90016926" = "Gut microbiota abundance (family Alcaligenaceae id.2875)",
"GCST90016927" = "Gut microbiota abundance (family Bacteroidaceae id.917)",
"GCST90016928" = "Gut microbiota abundance (family Bacteroidales S24 7group id.11173)",
"GCST90016929" = "Gut microbiota abundance (family Bifidobacteriaceae id.433)",
"GCST90016930" = "Gut microbiota abundance (family Christensenellaceae id.1866)",
"GCST90016931" = "Gut microbiota abundance (family Clostridiaceae1 id.1869)",
"GCST90016932" = "Gut microbiota abundance (family Clostridiales vadin BB60 group id.11286)",
"GCST90016933" = "Gut microbiota abundance (family Coriobacteriaceae id.811)",
"GCST90016934" = "Gut microbiota abundance (family Defluviitaleaceae id.1924)",
"GCST90016935" = "Gut microbiota abundance (family Desulfovibrionaceae id.3169)",
"GCST90016936" = "Gut microbiota abundance (family Enterobacteriaceae id.3469)",
"GCST90016937" = "Gut microbiota abundance (family Erysipelotrichaceae id.2149)",
"GCST90016938"= "Gut microbiota abundance (family Family XI id.1936)",
"GCST90016939" = "Gut microbiota abundance (family Family XIII id.1957)",
"GCST90016940" = "Gut microbiota abundance (family Lachnospiraceae id.1987)",
"GCST90016941" = "Gut microbiota abundance (family Lactobacillaceae id.1836)",
"GCST90016942" = "Gut microbiota abundance (family Methanobacteriaceae id.121)",
"GCST90016943" = "Gut microbiota abundance (family Oxalobacteraceae id.2966)",
"GCST90016944" = "Gut microbiota abundance (family Pasteurellaceae id.3689)",
"GCST90016945" = "Gut microbiota abundance (family Peptococcaceae id.2024)",
"GCST90016946" = "Gut microbiota abundance (family Peptostreptococcaceae id.2042)",
"GCST90016947" = "Gut microbiota abundance (family Porphyromonadaceae id.943)",
"GCST90016948" = "Gut microbiota abundance (family Prevotellaceae id.960)",
"GCST90016949" = "Gut microbiota abundance (family Rhodospirillaceae id.2717)",
"GCST90016950" = "Gut microbiota abundance (family Rikenellaceae id.967)",
"GCST90016951" = "Gut microbiota abundance (family Ruminococcaceae id.2050)",
"GCST90016952" = "Gut microbiota abundance (family Streptococcaceae id.1850)",
"GCST90017058" = "Gut microbiota abundance (genus Ruminococcaceae UCG010 id.11367)",
"GCST90017059" = "Gut microbiota abundance (genus Ruminococcaceae UCG011 id.11368)",
"GCST90017060" = "Gut microbiota abundance (genus Ruminococcaceae UCG013 id.11370)",
"GCST90017061" = "Gut microbiota abundance (genus Ruminococcaceae UCG014 id.11371)",
"GCST90017062" = "Gut microbiota abundance (genus Ruminococcus1 id.11373)",
"GCST90017063" = "Gut microbiota abundance (genus Ruminococcus2 id.11374)",
"GCST90017064" = "Gut microbiota abundance (genus Ruminococcus gauvreauii group id.11342)",
"GCST90017065" = "Gut microbiota abundance (genus Ruminococcus gnavus group id.14376)",
"GCST90017066" = "Gut microbiota abundance (genus Ruminococcus torques group id.14377)",
"GCST90017067" = "Gut microbiota abundance (genus Sellimonas id.14369)",
"GCST90017068" = "Gut microbiota abundance (genus Senegalimassilia id.11160)",
"GCST90017069" = "Gut microbiota abundance (genus Slackia id.825)",
"GCST90017070" = "Gut microbiota abundance (genus Streptococcus id.1853)",
"GCST90017071" = "Gut microbiota abundance (genus Subdoligranulum id.2070)",
"GCST90017072" = "Gut microbiota abundance (genus Sutterella id.2896)",
"GCST90017073" = "Gut microbiota abundance (genus Terrisporobacter id.11348)",
"GCST90017074" = "Gut microbiota abundance (genus Turicibacter id.2162)",
"GCST90017075" = "Gut microbiota abundance (genus Tyzzerella3 id.11335)",
"GCST90017076" = "Gut microbiota abundance (unknown genus id.1000000073)",
"GCST90017077" = "Gut microbiota abundance (unknown genus id.1000001215)",
"GCST90017078" = "Gut microbiota abundance (unknown genus id.1000005472)",
"GCST90017079" = "Gut microbiota abundance (unknown genus id.1000005479)",
"GCST90017080" = "Gut microbiota abundance (unknown genus id.1000006162)",
"GCST90017081" = "Gut microbiota abundance (unknown genus id.1868)",
"GCST90017082" = "Gut microbiota abundance (unknown genus id.2001)",
"GCST90017083" = "Gut microbiota abundance (unknown genus id.2041)",
"GCST90017084" = "Gut microbiota abundance (unknown genus id.2071)",
"GCST90017085" = "Gut microbiota abundance (unknown genus id.2755)",
"GCST90017086" = "Gut microbiota abundance (unknown genus id.826)",
"GCST90017087" = "Gut microbiota abundance (unknown genus id.959)",
"GCST90017033" = "Gut microbiota abundance (genus Methanobrevibacter id.123)",
"GCST90017034" = "Gut microbiota abundance (genus Odoribacter id.952)",
"GCST90017035" = "Gut microbiota abundance (genus Olsenella id.822)",
"GCST90017036" = "Gut microbiota abundance (genus Oscillibacter id.2063)",
"GCST90017037" = "Gut microbiota abundance (genus Oscillospira id.2064)",
"GCST90017038" = "Gut microbiota abundance (genus Oxalobacter id.2978)",
"GCST90017039" = "Gut microbiota abundance (genus Parabacteroides id.954)",
"GCST90017040" = "Gut microbiota abundance (genus Paraprevotella id.962)",
"GCST90017041" = "Gut microbiota abundance (genus Parasutterella id.2892)",
"GCST90017042" = "Gut microbiota abundance (genus Peptococcus id.2037)",
"GCST90017043" = "Gut microbiota abundance (genus Phascolarctobacterium id.2168)",
"GCST90017044" = "Gut microbiota abundance (genus Prevotella7 id.11182)",
"GCST90017045" = "Gut microbiota abundance (genus Prevotella9 id.11183)",
"GCST90017046" = "Gut microbiota abundance (genus Rikenellaceae RC9 gut group id.11191)",
"GCST90017047" = "Gut microbiota abundance (genus Romboutsia id.11347)",
"GCST90017048" = "Gut microbiota abundance (genus Roseburia id.2012)",
"GCST90017049" = "Gut microbiota abundance (genus Ruminiclostridium5 id.11355)",
"GCST90017050" = "Gut microbiota abundance (genus Ruminiclostridium6 id.11356)",
"GCST90017051" = "Gut microbiota abundance (genus Ruminiclostridium9 id.11357)",
"GCST90017052" = "Gut microbiota abundance (genus Ruminococcaceae NK4A214 group id.11358)",
"GCST90017053" = "Gut microbiota abundance (genus Ruminococcaceae UCG002 id.11360)",
"GCST90017054" = "Gut microbiota abundance (genus Ruminococcaceae UCG003 id.11361)",
"GCST90017055" = "Gut microbiota abundance (genus Ruminococcaceae UCG004 id.11362)",
"GCST90017056" = "Gut microbiota abundance (genus Ruminococcaceae UCG005 id.11363)",
"GCST90017057" = "Gut microbiota abundance (genus Ruminococcaceae UCG009 id.11366)",
"GCST90017008" = "Gut microbiota abundance (genus Family XIII AD3011 group id.11293)",
"GCST90017009" = "Gut microbiota abundance (genus Family XIII UCG001 id.11294)",
"GCST90017010" = "Gut microbiota abundance (genus Flavonifractor id.2059)",
"GCST90017011" = "Gut microbiota abundance (genus Fusicatenibacter id.11305)",
"GCST90017012" = "Gut microbiota abundance (genus Gordonibacter id.821)",
"GCST90017013" = "Gut microbiota abundance (genus Haemophilus id.3698)",
"GCST90017014" = "Gut microbiota abundance (genus Holdemanella id.11393)",
"GCST90017015" = "Gut microbiota abundance (genus Holdemania id.2157)",
"GCST90017016" = "Gut microbiota abundance (genus Howardella id.2000)",
"GCST90017017" = "Gut microbiota abundance (genus Hungatella id.11306)",
"GCST90017018" = "Gut microbiota abundance (genus Intestinibacter id.11345)",
"GCST90017019" = "Gut microbiota abundance (genus Intestinimonas id.2062)",
"GCST90017020" = "Gut microbiota abundance (genus Lachnoclostridium id.11308)",
"GCST90017021" = "Gut microbiota abundance (genus Lachnospiraceae FCS020 group id.11314)",
"GCST90017022" = "Gut microbiota abundance (genus Lachnospiraceae NC2004 group id.11316)",
"GCST90017023" = "Gut microbiota abundance (genus Lachnospiraceae ND3007 group id.11317)",
"GCST90017024" = "Gut microbiota abundance (genus Lachnospiraceae NK4A136 group id.11319)",
"GCST90017025" = "Gut microbiota abundance (genus Lachnospiraceae UCG001 id.11321)",
"GCST90017026" = "Gut microbiota abundance (genus Lachnospiraceae UCG004 id.11324)",
"GCST90017027" = "Gut microbiota abundance (genus Lachnospiraceae UCG008 id.11328)",
"GCST90017028" = "Gut microbiota abundance (genus Lachnospiraceae UCG010 id.11330)",
"GCST90017029" = "Gut microbiota abundance (genus Lachnospira id.2004)",
"GCST90017030" = "Gut microbiota abundance (genus Lactobacillus id.1837)",
"GCST90017031" = "Gut microbiota abundance (genus Lactococcus id.1851)",
"GCST90017032" = "Gut microbiota abundance (genus Marvinbryantia id.2005)",
"GCST90017127" = "Gut microbiota presence (genus Anaerostipes id.1991)",
"GCST90017128" = "Gut microbiota presence (family Bacteroidales S24 7group id.11173)",
"GCST90017129" = "Gut microbiota presence (unknown genus id.1000005479)",
"GCST90017130" = "Gut microbiota alpha diversity (Inverse Simpson index)",
"GCST90017131" = "Gut microbiota alpha diversity (Shannon index)",
"GCST90017132" = "Gut microbiota alpha diversity (Simpson index)",
"GCST90016964" = "Gut microbiota abundance (genus Alloprevotella id.961)",
"GCST90016965" = "Gut microbiota abundance (genus Anaerofilum id.2053)",
"GCST90016966" = "Gut microbiota abundance (genus Anaerostipes id.1991)",
"GCST90016967" = "Gut microbiota abundance (genus Anaerotruncus id.2054)",
"GCST90016968" = "Gut microbiota abundance (genus Bacteroides id.918)",
"GCST90016969" = "Gut microbiota abundance (genus Barnesiella id.944)",
"GCST90016970" = "Gut microbiota abundance (genus Bifidobacterium id.436)",
"GCST90016971" = "Gut microbiota abundance (genus Bilophila id.3170)",
"GCST90016972" = "Gut microbiota abundance (genus Blautia id.1992)",
"GCST90016973" = "Gut microbiota abundance (genus Butyricicoccus id.2055)",
"GCST90016974" = "Gut microbiota abundance (genus Butyricimonas id.945)",
"GCST90016975" = "Gut microbiota abundance (genus Butyrivibrio id.1993)",
"GCST90016976" = "Gut microbiota abundance (genus Candidatus Soleaferrea id.11350)",
"GCST90016977" = "Gut microbiota abundance (genus Catenibacterium id.2153)",
"GCST90016978" = "Gut microbiota abundance (genus Christensenellaceae R 7group id.11283)",
"GCST90016979" = "Gut microbiota abundance (genus Clostridium innocuum group id.14397)",
"GCST90016980" = "Gut microbiota abundance (genus Clostridium sensustricto1 id.1873)",
"GCST90016981" = "Gut microbiota abundance (genus Collinsella id.815)",
"GCST90016982" = "Gut microbiota abundance (genus Coprobacter id.949)",
"GCST90016983" = "Gut microbiota abundance (genus Coprococcus1 id.11301)",
"GCST90016984" = "Gut microbiota abundance (genus Coprococcus2 id.11302)",
"GCST90016985" = "Gut microbiota abundance (genus Coprococcus3 id.11303)",
"GCST90016986" = "Gut microbiota abundance (genus Defluviitaleaceae UCG011 id.11287)",
"GCST90016987" = "Gut microbiota abundance (genus Desulfovibrio id.3173)",
"GCST90016988" = "Gut microbiota abundance (genus Dialister id.2183)",
"GCST90016989" = "Gut microbiota abundance (genus Dorea id.1997)",
"GCST90016990" = "Gut microbiota abundance (genus Eggerthella id.819)",
"GCST90016991" = "Gut microbiota abundance (genus Eisenbergiella id.11304)",
"GCST90016992" = "Gut microbiota abundance (genus Enterorhabdus id.820)",
"GCST90016993" = "Gut microbiota abundance (genus Erysipelatoclostridium id.11381)",
"GCST90016994" = "Gut microbiota abundance (genus Erysipelotrichaceae UCG003 id.11384)",
"GCST90016995" = "Gut microbiota abundance (genus Escherichia Shigella id.3504)",
"GCST90016996" = "Gut microbiota abundance (genus Eubacterium brachy group id.11296)",
"GCST90016997" = "Gut microbiota abundance (genus Eubacterium coprostanoligenes group id.11375)",
"GCST90016998" = "Gut microbiota abundance (genus Eubacterium eligens group id.14372)",
"GCST90016999" = "Gut microbiota abundance (genus Eubacterium fissicatena group id.14373)",
"GCST90017000" = "Gut microbiota abundance (genus Eubacterium hallii group id.11338)",
"GCST90017001" = "Gut microbiota abundance (genus Eubacterium nodatum group id.11297)",
"GCST90017002" = "Gut microbiota abundance (genus Eubacterium oxidoreducens group id.11339)",
"GCST90017003" = "Gut microbiota abundance (genus Eubacterium rectale group id.14374)",
"GCST90017004" = "Gut microbiota abundance (genus Eubacterium ruminantium group id.11340)",
"GCST90017005" = "Gut microbiota abundance (genus Eubacterium ventriosum group id.11341)",
"GCST90017006" = "Gut microbiota abundance (genus Eubacterium xylanophilum group id.14375)",
"GCST90017007" = "Gut microbiota abundance (genus Faecalibacterium id.2057)",
"GCST90017088" = "Gut microbiota abundance (genus Veillonella id.2198)",
"GCST90017089" = "Gut microbiota abundance (genus Victivallis id.2256)",
"GCST90017090" = "Gut microbiota abundance (order Actinomycetales id.420)",
"GCST90017091" = "Gut microbiota abundance (order Bacillales id.1674)",
"GCST90017092" = "Gut microbiota abundance (order Bacteroidales id.913)",
"GCST90017093" = "Gut microbiota abundance (order Bifidobacteriales id.432)",
"GCST90017094" = "Gut microbiota abundance (order Burkholderiales id.2874)",
"GCST90017095" = "Gut microbiota abundance (order Clostridiales id.1863)",
"GCST90017096" = "Gut microbiota abundance (order Coriobacteriales id.810)",
"GCST90017097" = "Gut microbiota abundance (order Desulfovibrionales id.3156)",
"GCST90017098" = "Gut microbiota abundance (order Enterobacteriales id.3468)",
"GCST90017099" = "Gut microbiota abundance (order Erysipelotrichales id.2148)",
"GCST90017100" = "Gut microbiota abundance (order Gastranaerophilales id.1591)",
"GCST90016953" = "Gut microbiota abundance (unknown family id.1000001214)",
"GCST90016954" = "Gut microbiota abundance (unknown family id.1000005471)",
"GCST90016955" = "Gut microbiota abundance (unknown family id.1000006161)",
"GCST90016956" = "Gut microbiota abundance (family Veillonellaceae id.2172)",
"GCST90016957" = "Gut microbiota abundance (family Verrucomicrobiaceae id.4036)",
"GCST90016958" = "Gut microbiota abundance (family Victivallaceae id.2255)",
"GCST90016959" = "Gut microbiota abundance (genus Actinomyces id.423)",
"GCST90016960" = "Gut microbiota abundance (genus Adlercreutzia id.812)",
"GCST90016961" = "Gut microbiota abundance (genus Akkermansia id.4037)",
"GCST90016962" = "Gut microbiota abundance (genus Alistipes id.968)",
"GCST90016963" = "Gut microbiota abundance (genus Allisonella id.2174)",
"GCST90017101" = "Gut microbiota abundance (order Lactobacillales id.1800)",
"GCST90017102" = "Gut microbiota abundance (order Methanobacteriales id.120)",
"GCST90017103" = "Gut microbiota abundance (order Mollicutes RF9 id.11579)",
"GCST90017104" = "Gut microbiota abundance (order NB1n id.3953)",
"GCST90017105" = "Gut microbiota abundance (order Pasteurellales id.3688)",
"GCST90017106" = "Gut microbiota abundance (order Rhodospirillales id.2667)",
"GCST90017107" = "Gut microbiota abundance (order Selenomonadales id.2165)",
"GCST90017108" = "Gut microbiota abundance (order Verrucomicrobiales id.4030)",
"GCST90017109" = "Gut microbiota abundance (order Victivallales id.2254)",
"GCST90017110" = "Gut microbiota abundance (phylum Actinobacteria id.400)",
"GCST90017111" = "Gut microbiota abundance (phylum Bacteroidetes id.905)",
"GCST90017112" = "Gut microbiota abundance (phylum Cyanobacteria id.1500)",
"GCST90017113" = "Gut microbiota abundance (phylum Euryarchaeota id.55)",
"GCST90017114" = "Gut microbiota abundance (phylum Firmicutes id.1672)",
"GCST90017115" = "Gut microbiota abundance (phylum Lentisphaerae id.2238)",
"GCST90017116" = "Gut microbiota abundance (phylum Proteobacteria id.2375)",
"GCST90017117" = "Gut microbiota abundance (phylum Tenericutes id.3919)",
"GCST90017118" = "Gut microbiota abundance (phylum Verrucomicrobia id.3982)",
"GCST90017119" = "Gut microbiota presence (genus Turicibacter id.2162)",
"GCST90017120" = "Gut microbiota presence (genus Ruminiclostridium6 id.11356)",
"GCST90017121" = "Gut microbiota presence (genus Odoribacter id.952)",
"GCST90017122" = "Gut microbiota presence (family Peptococcaceae id.2024)",
"GCST90017123" = "Gut microbiota presence (genus Enterorhabdus id.820)",
"GCST90017124" = "Gut microbiota presence (genus Coprococcus1 id.11301)",
"GCST90017125" = "Gut microbiota presence (family Pseudomonadaceae id.3718)",
"GCST90017126" = "Gut microbiota presence (genus Lachnospiraceae UCG 010 id.11330)"
)
File.Name = "33462485-GCST90016913-EFO_0007874-Build37.f.tsv.gz"
##need unique file name from dictionary key/phenotype variable for next function
uniquename <- substr(File.Name, 10, 21)
from_dict <- microbiome_phenotypes[uniquename]
##reading in exposure data
microbiome_exp_dat <- read_exposure_data(
filename = File.Name,
#clump = TRUE,
sep = "\t",
snp_col = "variant_id",
beta_col = "beta",
se_col = "standard_error",
effect_allele_col = "effect_allele",
other_allele_col = "other_allele",
pval_col = "p_value",
#min_pval = 1e-04,
)
##renaming exposure column to phenotype name to identify the genera for each test
microbiome_exp_dat$exposure <- from_dict
microbiome_exp_dat <- filter(microbiome_exp_dat, pval.exposure <= 1e-04)
my_command_1 = "python3 /home/camilla/TwoSampleMR/MiBioGen_Summary_Statistics/00_mibiogen_panukbb_4_twosampleMR.py --micro /home/camilla/TwoSampleMR/MiBioGen_Summary_Statistics/" %&% File.Name %&%" --panukb /home/camilla/TwoSampleMR/MiBioGen_Summary_Statistics/categorical-20002-both_sexes-1075.tsv.bgz --output /home/camilla/TwoSampleMR/MiBioGen_Summary_Statistics/intersection"
#call python script with system() in order to have a smaller file with necessary info for read_outcome_data()
#otherwise, a Cstack usage error
system(my_command_1)
#read in outcome locally
chd_out_dat <- read_outcome_data(
#snps = microbiome_exp_dat$SNP,
filename = "intersection.txt.gz",
#filename = "intersection.tsv",
#snps = microbiome_exp_dat$SNP,
sep = "\t",
#phenotype_col = "Phenotype",
snp_col = "SNP",
beta_col = "panukb_beta",
se_col = "panukb_se",
#eaf_col = "eaf",
effect_allele_col = "panukb_effect",
other_allele_col = "panukb_other",
pval_col = "panukb_p",
#units_col = "units",
#ncase_col = "ncase",
#ncontrol_col = "ncontrol",
#samplesize_col = "samplesize",
#gene_col = "gene",
#id_col = "id",
#min_pval = 5e-08,
#log_pval = FALSE,
#chr_col = "chr",
#pos_col = "pos"
)
chd_out_dat$outcome <- "Myocardial Infarction"
##HARMONISE DATA (Intersection/CREATES NEW COMBINED DATA FRAME WITH EXPOSURE AND OUTCOME DATA)
dat_for <- harmonise_data(microbiome_exp_dat, chd_out_dat)
if (dim(dat_for)[1] != 0) {
dat_for <- dplyr::rename(dat_for, rsid = SNP)
dat_for <- dplyr::rename(dat_for, pval = pval.exposure)
#hacked ld_clump_local function with --allow-extra-chr flag
#skip if not necessary?
ld_clump_local_hacked <- function(dat, clump_kb, clump_r2, clump_p, bfile, plink_bin, allow_extra_chr)
{
# Make textfile
shell <- ifelse(Sys.info()['sysname'] == "Windows", "cmd", "sh")
fn <- tempfile()
write.table(data.frame(SNP=dat[["rsid"]], P=dat[["pval"]]), file=fn, row.names=F, col.names=T, quote=F)
fun2 <- paste0(
shQuote(plink_bin, type=shell),
" --bfile ", shQuote(bfile, type=shell),
" --keep /home/wheelerlab3/bioi397/homeworks/HW2/1000g_plink2/EUR_subset",
" --clump ", shQuote(fn, type=shell),
" --clump-p1 ", clump_p,
" --clump-r2 ", clump_r2,
" --clump-kb ", clump_kb,
" --allow-extra-chr", #allow_extra_chr,
" --out ", shQuote(fn, type=shell)
)
system(fun2)
res <- read.table(paste(fn, ".clumped", sep=""), header=T)
unlink(paste(fn, "*", sep=""))
y <- subset(dat, !dat[["rsid"]] %in% res[["SNP"]])
if(nrow(y) > 0)
{
message("Removing ", length(y[["rsid"]]), " of ", nrow(dat), " variants due to LD with other variants or absence from LD reference panel")
}
return(subset(dat, dat[["rsid"]] %in% res[["SNP"]]))
}
##LD clumping locally
##Error HERE
dat_for <- ld_clump_local_hacked(dat_for, clump_kb = 10000, clump_r2 = 0.001, clump_p = 1, bfile = "/home/camilla/all_phase3.rsid.maf001.dups2.removed",
plink_bin = "/usr/local/bin/plink")
#change column names back
dat_for <- dplyr::rename(dat_for, SNP = rsid)
dat_for <- dplyr::rename(dat_for, pval.exposure = pval)
}
##Perform MR
res_for <- mr(dat_for)
##Sensitivity analyses
if (dim(dat_for)[1] != 0) {
het_for<- mr_heterogeneity(dat_for)
plt_for<- mr_pleiotropy_test(dat_for)
sin_for<- mr_singlesnp(dat_for)
}
##saving files one at a time
if (dim(dat_for)[1] != 0) {
fwrite(dat_for, file = "phecode-411-both_sexes_vs_" %&% from_dict %&% "_MR_data.csv", quote = FALSE, na = "NA")
}
if (dim(res_for)[1] != 0) {
fwrite(res_for, file = "phecode-411-both_sexes_vs_" %&% from_dict %&% "_MR_results.csv", quote = FALSE, na = "NA")
}
exist_check_het_for <- exists("het_for")
if (exist_check_het_for == TRUE && dim(het_for)[1] != 0) {
fwrite(het_for, file = "phecode-411-both_sexes_vs_" %&% from_dict %&% "_MR_heterogeneity.csv", quote = FALSE, na = "NA")
}
exist_check_plt_for <- exists("plt_for")
if (exist_check_plt_for == TRUE && dim(plt_for)[1] != 0) {
fwrite(plt_for, file = "phecode-411-both_sexes_vs_" %&% from_dict %&% "_MR_pleiotropy.csv", quote = FALSE, na = "NA")
}
exist_check_sin_for <- exists("sin_for")
if (exist_check_sin_for == TRUE && dim(sin_for)[1] != 0) {
fwrite(sin_for, file = "phecode-411-both_sexes_vs_" %&% from_dict %&% "_MR_singlesnp.csv", quote = FALSE, na = "NA")
}
my_command_2 = "python3 /home/camilla/TwoSampleMR/MiBioGen_Summary_Statistics/00_mibiogen_panukbb_4_twosampleMR.py --panukb /home/camilla/TwoSampleMR/MiBioGen_Summary_Statistics/phecode-411-both_sexes.tsv.bgz --micro /home/camilla/TwoSampleMR/MiBioGen_Summary_Statistics/" %&% File.Name %&% " --output /home/camilla/TwoSampleMR/MiBioGen_Summary_Statistics/" %&% args$mbfile %&% "_intersection"
#call python script with system() in order to have a smaller file for read_outcome_data()
#otherwise, a Cstack usage error
system(my_command_2)
##reading in chd as exposure data
chd_exp_dat <- read_exposure_data(
filename = filename_string,
#clump = TRUE,
sep = "\t",
snp_col = "SNP",
beta_col = "panukb_beta",
se_col = "panukb_se",
#eaf_col = "eaf",
effect_allele_col = "panukb_effect",
other_allele_col = "panukb_other",
pval_col = "panukb_p",
#min_pval = 5e-08,
)
chd_exp_dat$exposure <- "Ischemic Heart Disease"
chd_exp_dat <- filter(chd_exp_dat, pval.exposure <= 5e-08)
#chd_out_dat <- filter(chd_out_dat, pval.exposure < 5e-08)
microbiome_out_dat <- read_outcome_data(
filename = filename_string, ##output file name from python script
#clump = TRUE,
sep = "\t",
snp_col = "SNP",
beta_col = "micro_beta",
se_col = "micro_se",
effect_allele_col = "micro_effect",
other_allele_col = "micro_other",
pval_col = "micro_p",
#min_pval = 1e-04,
)
microbiome_out_dat$outcome <- from_dict
#microbiome_out_dat <- filter(microbiome_out_dat, pval.outcome < 1e-04)
##HARMONISE REVERSED DATA (CREATES NEW COMBINED DATA FRAME WITH EXPOSURE AND OUTCOME DATA)
dat_rev <- harmonise_data(chd_exp_dat, microbiome_out_dat)
if (dim(dat_rev)[1] != 0) {
dat_rev <- dplyr::rename(dat_rev, rsid = SNP)
dat_rev <- dplyr::rename(dat_rev, pval = pval.exposure)
dat_rev <- ld_clump_local_hacked(dat_rev, clump_kb = 10000, clump_r2 = 0.001, clump_p = 1, bfile = "/home/camilla/all_phase3.rsid.maf001.dups2.removed",
plink_bin = "/usr/local/bin/plink")
#change column names back
dat_rev <- dplyr::rename(dat_rev, SNP = rsid)
dat_rev <- dplyr::rename(dat_rev, pval.exposure = pval)
}
##Perform MR
res_rev <- mr(dat_rev)
##Sensitivity analyses
if (dim(dat_rev)[1] != 0) {
het_rev <- mr_heterogeneity(dat_rev)
plt_rev <- mr_pleiotropy_test(dat_rev)
sin_rev <- mr_singlesnp(dat_rev)
}
##saving files one at a time
if (dim(dat_rev)[1] != 0) {
fwrite(dat_rev, file = from_dict %&% "_vs_phecode-411-both_sexes_MR_data.csv", quote = FALSE, na = "NA")
}
if (dim(res_rev)[1] != 0) {
fwrite(res_rev, file = from_dict %&% "_vs_phecode-411-both_sexes_MR_results.csv", quote = FALSE, na = "NA")
}
exist_check_het_rev <- exists("het_rev")
if (exist_check_het_rev == TRUE && dim(het_rev)[1] != 0) {
fwrite(het_rev, file = from_dict %&% "_vs_phecode-411-both_sexes_MR_heterogeneity.csv", quote = FALSE, na = "NA")
}
exist_check_plt_rev <- exists("plt_rev")
if (exist_check_plt_rev == TRUE && dim(plt_rev)[1] != 0) {
fwrite(plt_rev, file = from_dict %&% "_vs_phecode-411-both_sexes_MR_pleiotropy.csv", quote = FALSE, na = "NA")
}
exist_check_sin_rev <- exists("sin_rev")
if (exist_check_sin_rev == TRUE && dim(sin_rev)[1] != 0) {
fwrite(sin_rev, file = from_dict %&% "_vs_phecode-411-both_sexes_MR_singlesnp.csv", quote = FALSE, na = "NA")
}
p1 <- mr_scatter_plot(res_for, dat_for)
p1[[1]]
#p1[[2]]
##saving scatter plot as .png file
png(file="/home/camilla/TwoSampleMR/Myocardial_Infarction_vs_class_Clostridia_id_1859_scatterplot.png",
width=600, height=350)
p1[[1]]
dev.off()