Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
172 changes: 165 additions & 7 deletions R_BSFG/BSFG_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,12 @@ sample_Lambda = function( Y_tilde,F,resid_Y_prec, E_a_prec,Plam,invert_aI_bZAZ )
Lambda = matrix(0,nr = p,nc = k)

for(j in 1:p) {
means = c()
Qlam = c()
FUDi = E_a_prec[j] * sweep(FtU,2,(s + E_a_prec[j]/resid_Y_prec[j]),'/')
means = FUDi %*% UtY[,j]
Qlam = FUDi %*% t(FtU) + diag(Plam[j,])
means = means + FUDi %*% UtY[,j]
Qlam = Qlam + FUDi %*% t(FtU)
Qlam = Qlam + diag(Plam[j,])

# recover()
Llam = t(chol(Qlam))
Expand All @@ -60,6 +63,59 @@ sample_Lambda = function( Y_tilde,F,resid_Y_prec, E_a_prec,Plam,invert_aI_bZAZ )
return(Lambda)
}

sample_Lambda_comb = function( Y,X,B,Z_2,W,F,resid_Y_prec, E_a_prec,Plam,invert_aI_bZAZ ) {

#Sample factor loadings Lambda while marginalizing over residual
#genetic effects: Y - Z_2W = F*Lambda' + E, vec(E)~N(0,kron(Psi_E,In) + kron(Psi_U, ZAZ^T))
#note: conditioning on F, but marginalizing over E_a.
#sampling is done separately by trait because each column of Lambda is
#independent in the conditional posterior
#note: invert_aI_bZAZ has parameters that diagonalize aI + bZAZ for fast
#inversion: inv(aI + bZAZ) = 1/b*U*diag(1./(s+a/b))*U'

p = length(resid_Y_prec[[1]])
k = ncol(F[[1]])

Zlams = matrix(rnorm(k*p),nr = k, nc = p)
Lambda = matrix(0,nr = p,nc = k)


for(j in 1:p) {
#means = matrix(0,nr=p,nc=1)
means = matrix(0,nr=k,nc=1)
Qlam = matrix(0,k,k)
Y_tilde = list();
for(pop in pops){
#conditioning on W, B, F, marginalizing over E _a
Y_tilde[[pop]] = Y[[pop]] - X[[pop]] %*% B[[pop]] - Z_2[[pop]] %*% W[[pop]]
Y_tilde[[pop]] = as.matrix(Y_tilde[[pop]])

U = invert_aI_bZAZ[[pop]]$U
s = invert_aI_bZAZ[[pop]]$s

FtU = t(F[[pop]]) %*% U
UtY = t(U) %*% Y_tilde[[pop]]

FUDi = E_a_prec[[pop]][j] * sweep(FtU,2,(s + E_a_prec[[pop]][j]/resid_Y_prec[[pop]][j]),'/')
means = means + FUDi %*% UtY[,j]
Qlam = Qlam + FUDi %*% t(FtU)
#recover()
}
Qlam = Qlam + diag(Plam[j,])

#recover()
Llam = t(chol(Qlam))
vlam = forwardsolve(Llam,means)
mlam = backsolve(t(Llam),vlam)
ylam = backsolve(t(Llam),Zlams[,j])

Lambda[j,] = ylam + mlam

}

return(Lambda)
}

sample_prec_discrete_conditional = function(Y,h2_divisions,h2_priors,invert_aI_bZAZ,res_prec) {
#sample factor heritibilties conditional on a given residual precision
#(res_precision)
Expand Down Expand Up @@ -201,6 +257,7 @@ sample_means = function( Y_tilde, resid_Y_prec, E_a_prec, invert_aPXA_bDesignDes
return(location_sample)
}


sample_F_a = function(F,Z_1,F_h2,invert_aZZt_Ainv) {
#samples genetic effects on factors (F_a) conditional on the factor scores F:
# F_i = F_{a_i} + E_i, E_i~N(0,s2*(1-h2)*I) for each latent trait i
Expand Down Expand Up @@ -241,6 +298,8 @@ sample_F_a = function(F,Z_1,F_h2,invert_aZZt_Ainv) {
return(F_a)
}



sample_factors_scores = function( Y_tilde, Z_1,Lambda,resid_Y_prec,F_a,F_h2 ) {
#Sample factor scores given factor loadings (F_a), factor heritabilities (F_h2) and
#phenotype residuals
Expand Down Expand Up @@ -269,6 +328,7 @@ sample_factors_scores = function( Y_tilde, Z_1,Lambda,resid_Y_prec,F_a,F_h2 ) {
}



sample_delta = function( delta,tauh,Lambda_prec,delta_1_shape,delta_1_rate,delta_2_shape,delta_2_rate,Lambda2 ) {
#sample delta and tauh parameters that control the magnitudes of higher
#index factor loadings.
Expand Down Expand Up @@ -336,10 +396,12 @@ update_k = function( F,Lambda,F_a,F_h2,Lambda_prec,Plam,delta,tauh,Z_W,Lambda_df
tauh = cumprod(delta)
Plam = sweep(Lambda_prec,2,tauh,'*')
Lambda = cbind(Lambda,rnorm(p,0,sqrt(1/Plam[,k])))

F_h2[k] = runif(1)
F_a = cbind(F_a,rnorm(r,0,sqrt(F_h2[k])))
F = cbind(F,rnorm(n,Z_W %*% F_a[,k],sqrt(1-F_h2[k])))
} else if(num > 0) { # drop redundant columns

} else if(num > 0) { # drop redundant columns
nonred = which(vec == 0) # non-redundant loadings columns
Lambda = Lambda[,nonred]
Lambda_prec = Lambda_prec[,nonred]
Expand Down Expand Up @@ -403,10 +465,58 @@ save_posterior_samples = function( sp_num,Posterior,Lambda,F,F_a,B,W,E_a,delta,F
Posterior$B = (Posterior$B*(sp_num-1) + B)/sp_num
Posterior$E_a = (Posterior$E_a*(sp_num-1) + E_a)/sp_num
Posterior$W = (Posterior$W*(sp_num-1) + W)/sp_num

return(Posterior)
}


save_posterior_samples_comb = function( sp_num,Posterior,Lambda,F,F_a,B,W,E_a,delta,F_h2,resid_Y_prec,E_a_prec,W_prec) {
# save posteriors. Full traces are kept of the more interesting parameters.
# Only the posterior means are kept of less interesting parameters. These
# should be correctly calculated over several re-starts of the sampler.

# first unlist F, F_a, F_h2, Y, B, E_a
nl = length(F_h2)
F = unlist(do.call("rbind", F)) # n*k
F_a = unlist(do.call("rbind", F_a)) #n*k
F_h2 = Reduce("+", F_h2)/nl # k taking average
B = Reduce("+",B)/nl # b*p taking average
E_a = unlist(do.call("rbind", E_a)) # n*k
W = Reduce("+",W)/nl # k*p taking average
resid_Y_prec = Reduce("+",resid_Y_prec)/nl #p taking average
E_a_prec = Reduce("+",E_a_prec)/nl #p
W_prec = Reduce("+",W_prec)/nl #p

#sp = ncol(Posterior$Lambda)
sp = ncol(Posterior$Lambda)
#size(Posterior$Lambda,2)
#save factor samples
# change the if condition to be if sp_num == 1. So we don't need to run the update k part and we can also assign new value to out posterior variables.
if(sp_num==1){
# expand factor sample matrix if necessary
Posterior$Lambda = rbind(Posterior$Lambda, matrix(0,nr = length(Lambda)-nrow(Posterior$Lambda),nc = sp))
Posterior[["F"]] = rbind(Posterior[["F"]], matrix(0,nr = length(F) -nrow(Posterior[["F"]]), nc = sp))
Posterior[["F_a"]] = rbind(Posterior[["F_a"]], matrix(0,nr = length(F_a) -nrow(Posterior[["F_a"]]), nc = sp))
Posterior[["delta"]] = rbind(Posterior[["delta"]], matrix(0,nr = length(delta) -nrow(Posterior[["delta"]]), nc = sp))
Posterior[["F_h2"]] = rbind(Posterior[["F_h2"]], matrix(0,nr = length(F_h2) -nrow(Posterior[["F_h2"]]), nc = sp))
}
Posterior$Lambda[1:length(Lambda),sp_num] = c(Lambda)
Posterior$F[1:length(F),sp_num] = c(F)
Posterior$F_a[1:length(F_a),sp_num] = c(F_a)
Posterior$delta[1:length(delta),sp_num] = delta
Posterior$F_h2[1:length(F_h2),sp_num] = F_h2

Posterior$resid_Y_prec[,sp_num] = resid_Y_prec
Posterior$E_a_prec[,sp_num] = E_a_prec
Posterior$W_prec[,sp_num] = W_prec

# save B,U,W
Posterior$B = (Posterior$B*(sp_num-1) + B)/sp_num
Posterior$E_a = (Posterior$E_a*(sp_num-1) + E_a)/sp_num
Posterior$W = (Posterior$W*(sp_num-1) + W)/sp_num

return(Posterior)
}


save_posterior_samples_fixedlambda = function( j,Posterior,F,F_a,B,W,E_a,F_h2,resid_Y_prec,E_a_prec,W_prec) {
# save posteriors. Full traces are kept of the more interesting parameters.
# Only the posterior means are kept of less interesting parameters. These
Expand All @@ -430,12 +540,12 @@ save_posterior_samples_fixedlambda = function( j,Posterior,F,F_a,B,W,E_a,F_h2,re
Posterior$E_a_prec[,j] = E_a_prec
Posterior$W_prec[,j] = W_prec

# save B,U,W
# save U,W

Posterior$E_a = (Posterior$E_a*(j-1) + E_a)/j
Posterior$W = (Posterior$W*(j-1) + W)/j

return(Posterior)
return(Posterior)
}


Expand Down Expand Up @@ -520,6 +630,54 @@ G_Matrix_Comp = function(BSFG_state){
return(GMatrix)
}

G_Matrix_comb_Comp = function(BSFG_state){
Posterior = BSFG_state$Posterior
traitnames = BSFG_state$traitnames
run_variables = BSFG_state$run_variables
sp_num = ncol(Posterior$Lambda)
#n = run_variables$n
k = 10
p = run_variables$p
n = nrow(Posterior$F_a)/k
k1 = nrow(Posterior$F_a)/n;
k2 = nrow(Posterior$Lambda)/p;
if (k2 >= k1){
k = k1
}else{
k=k2
}
h2s = Posterior$F_h2[,1:sp_num]
#G_Lambdas = array(0,dim = dim(Posterior$Lambda))
#Lambda_est = matrix(0,p,k)
G_est = E_est = matrix(0,p,p)
#traces_G = matrix(,p*(p+1)/2,sp_num)
#traces_G_cor = matrix(,p*(p+1)/2,sp_num)
#traces_E = matrix(,p*(p+1)/2,sp_num)
GMatrix = NULL
for(j in 1:sp_num) {
Lj = matrix(Posterior$Lambda[,j],p,k)
h2j = Posterior$F_h2[,j]
G_Lj = Lj %*% diag(sqrt(h2j))
#G_Lambdas[,j] = c(G_Lj)
Gj = G_Lj %*% t(G_Lj) + diag(1/Posterior$E_a_prec[,j])
rownames(Gj) = traitnames
#posterior mean
GMatrix[[j]] = Gj
G_est = G_est + Gj/sp_num
#library(gdata)
#traces_G[,j] = lowerTriangle(Gj,diag = TRUE)
#traces_G_cor[,j] = lowerTriangle(CovToCor(Gj),diag = TRUE)

#E_Lj = Lj %*% diag(1-h2j) %*% t(Lj) + diag(1/Posterior$resid_Y_prec[,j])
#E_est = E_est + E_Lj/sp_num;
#posterior mean for lambda
#Lambda_est = Lambda_est + matrix(Posterior$Lambda[,j],p,k)/sp_num;
#traces_E[,j] = lowerTriangle(E_Lj,diag = TRUE)
}
#G_Lambda = matrix(rowMeans(G_Lambdas),p,k)
return(G_est)
}

G_Traces_Comp = function(BSFG_state){
Posterior = BSFG_state$Posterior
traitnames = BSFG_state$traitnames
Expand Down
143 changes: 143 additions & 0 deletions R_BSFG/Comparing.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
# First method
# Fixed lambda
# all the BSFG G matrix
load("~/../Desktop/G_BSFG_sc.RData")
# 5 G matrix
BSFG.sc5 = G_BSFG_sc[5]

model_path = "~/Runcie Lab/SparseFactorMixedModel_v2/R_BSFG"
library(Rcpp)
library(RcppArmadillo)
source(paste(model_path,'BSFG_functions.R',sep='/'))
sourceCpp(paste(model_path,'BSFG_functions_c.cpp',sep='/'))
source(paste(model_path,'fast_BSFG_sampler_init_fixedlambda.R',sep='/'))
source(paste(model_path,'fast_BSFG_sampler_fixedlambda.R',sep='/'))

run_parameters = list(
b0 = 1,
b1 = 0.0005,
epsilon = 1e-2,
prop = 1.00,
h2_divisions = 50,
save_freq = 100,
burn = 2000, #100
thin = 400, #2
draw_iter = 200
)
# 5 BSFG_state
setwd("~/Runcie Lab/SparseFactorMixedModel_v2/MCMC/5/5/Lambda1.5_delta2shape3")
load("BSFG_state.RData")
priors = BSFG_state$priors

YNew="setup6.RData"
YOld="setup5.RData"
BSFG_state = fast_BSFG_sampler_init_fixedlambda(priors,run_parameters,YNew,YOld)
BSFG_state = fast_BSFG_sampler_fixedlambda(BSFG_state,YNew,YOld)

#load("BSFG_fixedlambda65.RData")
BSFGOld = "BSFG_state.RData"
BSFGNew = "BSFG_fixedlambda65.RData"
BSFG3 =
# Comparisons:

load(BSFGOld)
spn = dim(BSFG_state$Posterior[[target]])[2]
old.n = dim(BSFG_state$data_matrices$Y)[1]
old.k = nrow(BSFG_state$Posterior[[target]])
#pos = BSFG_state$Posterior[[target]][,spn]
#pos = BSFG_state$Posterior[[target]]
old.pos.fa = BSFG_state$Posterior$F_a
old.pos.fa.rb = vector()
l=5
for (i in 1:spn){
old.pos.fa.rb = c(old.pos.fa.rb,old.pos.fa[1:(l*old.n),i])
}
#combine posterior of F_a into a big matrix

#old.pos.fa.matrix = matrix(0,nr=0,nc=l)
#for (i in 1:spn){
# old.fa.matrix = matrix(old.pos.fa[,i],nr=old.n,nc=k)
# old.pos.fa.matrix = rbind(old.pos.fa.matrix,old.fa.matrix[,1:l])
#}
#boxplot(old.pos.fa.matrix)
# combine a column which specify the latent factor


load(BSFGNew)
spn = dim(BSFG_state$Posterior[[target]])[2]
new.n = dim(BSFG_state$data_matrices$Y)[1]
new.k = nrow(BSFG_state$Posterior[[target]])
#pos = BSFG_state$Posterior[[target]][,spn]
#pos = BSFG_state$Posterior[[target]]
new.pos.fa = BSFG_state$Posterior$F_a
#combine posterior of F_a into a big matrix
l=5
new.pos.fa.rb = vector()
for (i in 1:spn){
new.pos.fa.rb = c(new.pos.fa.rb,new.pos.fa[1:(l*new.n),i])
}
#new.pos.fa.matrix = matrix(0,nr=0,nc=l)
#for (i in 1:spn){
# new.fa.matrix = matrix(new.pos.fa[,i],nr=new.n,nc=k)
# new.pos.fa.matrix = rbind(new.pos.fa.matrix,new.fa.matrix[,1:l])
#}
#boxplot(new.pos.fa.matrix)



#combine old pos.fa.matrix and new pos.fa.matrix into one dataframe
# use ggplot to plot
#it looks like we do not need so much latent factors. choose the first8 factors.
#pos.fa.matrix = cbind(rbind(old.pos.fa.matrix,new.pos.fa.matrix),rep(c("old","new"),c(old.n*spn,new.n*spn)))
# column "old", "new"
pos.fa.b = cbind(c(old.pos.fa.rb,new.pos.fa.rb),rep(c("old","new"),c(old.n*l*spn,new.n*l*spn)))
# column "factor"
pos.fa.b = cbind(pos.fa.b,c(rep(rep(sprintf("f%d",1:l),each=old.n),spn),rep(rep(sprintf("f%d",1:l),each=new.n),spn)))
colnames(pos.fa.b) = c("Fa","model", "factor")
pos.fa.b=as.data.frame(pos.fa.b)
#pos.fa.sample = sample(1:nrow(pos.fa.b),200*(old.n+new.n))
#pos.fa.sample = pos.fa.b[pos.fa.sample,]
#plot boxplot of distribution of Fa
library(ggplot2)
ggplot(pos.fa.b,aes(x = factor, y = as.numeric(Fa), color=model,fill=model))+
geom_boxplot()+
geom_abline(a=0,b=0,lty=2)
# For each trait, compare the correlation of G1 vs G2

#


if (target!="F_h2"){
pos = matrix(pos,nr=n)
k = nrow(BSFG_state$Posterior[[target]])/n
}
#load data from new population
load(BSFGNew)
n = dim(BSFG_state$data_matrices$Y)[1]
star = BSFG_state$Posterior[[target]][,spn]
if (target!="F_h2"){
star = matrix(star,nr=n)
pdf(sprintf("comparing_%s_densityplot.pdf",target))
for(i in 1:k){
plot(density(pos[,i]),main = sprintf("%s %d",target,i),col = "blue",type = "l",xlab = "#obs")
# plot(density(F_a_pos[,i]),main = sprintf("%d",i),col = "blue",type = "l",ylim = c(min(F_a_pos)-5,max(F_a_pos)+5),xlab = "#obs")
lines(density(star[,i]),col="red",type = "l")
abline
legend("topright",legend = c("original","new"),col = c("blue","red"),text.col = c("blue","red"),bty = "n",pch = 1)
}
dev.off()
}else{
pdf(sprintf("comparing_%s_densityplot.pdf",target))
plot(density(pos),main = sprintf("%s",target),col = "blue",type = "l",xlab = "#obs")
# plot(density(F_a_pos[,i]),main = sprintf("%d",i),col = "blue",type = "l",ylim = c(min(F_a_pos)-5,max(F_a_pos)+5),xlab = "#obs")
lines(density(star),col="red",type = "l")
abline
legend("topright",legend = c("original","new"),col = c("blue","red"),text.col = c("blue","red"),bty = "n",pch = 1)
dev.off()
}
}

ComparingGMatrix_plot("F_h2",BSFGNew,BSFGOld)



Loading