-
Notifications
You must be signed in to change notification settings - Fork 6
Expand file tree
/
Copy pathutils.py
More file actions
67 lines (49 loc) · 1.87 KB
/
utils.py
File metadata and controls
67 lines (49 loc) · 1.87 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import numpy as np
import pandas as pd
# Rpy
import rpy2.robjects as rpy
from rpy2.robjects import numpy2ri
rpy.r('suppressMessages(library(selectiveInference)); suppressMessages(library(knockoff))') # R libraries we will use
def gaussian_setup(X, Y, run_CV=True):
"""
Some calculations that can be reused by methods:
lambda.min, lambda.1se, lambda.theory and Reid et al. estimate of noise
"""
n, p = X.shape
Xn = X / np.sqrt((X**2).sum(0))[None, :]
l_theory = np.fabs(Xn.T.dot(np.random.standard_normal((n, 500)))).max(1).mean() * np.ones(p) * np.std(Y)
if run_CV:
numpy2ri.activate()
rpy.r.assign('X', X)
rpy.r.assign('Y', Y)
rpy.r('X=as.matrix(X)')
rpy.r('Y=as.numeric(Y)')
rpy.r('G = cv.glmnet(X, Y, intercept=FALSE, standardize=FALSE)')
rpy.r('sigma_reid = selectiveInference:::estimate_sigma(X, Y, coef(G, s="lambda.min")[-1]) # sigma via Reid et al.')
rpy.r("L = G[['lambda.min']]")
rpy.r("L1 = G[['lambda.1se']]")
L = rpy.r('L')
L1 = rpy.r('L1')
sigma_reid = rpy.r('sigma_reid')[0]
numpy2ri.deactivate()
return L * np.sqrt(X.shape[0]), L1 * np.sqrt(X.shape[0]), l_theory, sigma_reid
else:
return None, None, l_theory, None
def BHfilter(pval, q=0.2):
numpy2ri.activate()
rpy.r.assign('pval', pval)
rpy.r.assign('q', q)
rpy.r('Pval = p.adjust(pval, method="BH")')
rpy.r('S = which((Pval < q)) - 1')
S = rpy.r('S')
numpy2ri.deactivate()
return np.asarray(S, np.int)
def summarize(groupings,
results_df,
summary):
grouped = results_df.groupby(groupings, as_index=False)
summaries = []
summaries = [(n, summary(g)) for n, g in grouped]
summary_df = pd.concat([s for _, s in summaries])
summary_df.index = [n for n, _ in summaries]
return summary_df