deruncie · xinxin63 · Oct 21, 2016 · Oct 21, 2016 · Oct 21, 2016 · Oct 21, 2016
diff --git a/R_BSFG/BSFG_functions.R b/R_BSFG/BSFG_functions.R
@@ -43,9 +43,12 @@ sample_Lambda = function( Y_tilde,F,resid_Y_prec, E_a_prec,Plam,invert_aI_bZAZ )
 	Lambda = matrix(0,nr = p,nc = k)
 
 	for(j in 1:p) {
+	  means = c()
+	  Qlam = c()
 		FUDi = E_a_prec[j] * sweep(FtU,2,(s + E_a_prec[j]/resid_Y_prec[j]),'/')
-		means = FUDi %*% UtY[,j]
-		Qlam = FUDi %*% t(FtU) + diag(Plam[j,])
+		means = means + FUDi %*% UtY[,j]
+		Qlam = Qlam + FUDi %*% t(FtU) 
+	  Qlam = Qlam + diag(Plam[j,])
 
 		# recover()
 		Llam = t(chol(Qlam))
@@ -60,6 +63,59 @@ sample_Lambda = function( Y_tilde,F,resid_Y_prec, E_a_prec,Plam,invert_aI_bZAZ )
 	return(Lambda)
 }
 
+sample_Lambda_comb = function( Y,X,B,Z_2,W,F,resid_Y_prec, E_a_prec,Plam,invert_aI_bZAZ ) {
+
+  #Sample factor loadings Lambda while marginalizing over residual
+  #genetic effects: Y - Z_2W = F*Lambda' + E, vec(E)~N(0,kron(Psi_E,In) + kron(Psi_U, ZAZ^T))
+  #note: conditioning on F, but marginalizing over E_a.
+  #sampling is done separately by trait because each column of Lambda is
+  #independent in the conditional posterior
+  #note: invert_aI_bZAZ has parameters that diagonalize aI + bZAZ for fast
+  #inversion: inv(aI + bZAZ) = 1/b*U*diag(1./(s+a/b))*U'
+
+  p = length(resid_Y_prec[[1]])
+  k = ncol(F[[1]])
+
+  Zlams = matrix(rnorm(k*p),nr = k, nc = p)
+  Lambda = matrix(0,nr = p,nc = k)
+
+
+  for(j in 1:p) {
+    #means = matrix(0,nr=p,nc=1)
+    means = matrix(0,nr=k,nc=1)
+    Qlam = matrix(0,k,k)
+    Y_tilde = list();
+    for(pop in pops){
+      #conditioning on W, B, F, marginalizing over E _a
+      Y_tilde[[pop]] = Y[[pop]] - X[[pop]] %*% B[[pop]] - Z_2[[pop]] %*% W[[pop]]
+      Y_tilde[[pop]] = as.matrix(Y_tilde[[pop]])
+
+      U = invert_aI_bZAZ[[pop]]$U
+      s = invert_aI_bZAZ[[pop]]$s
+
+      FtU = t(F[[pop]]) %*% U
+      UtY = t(U) %*% Y_tilde[[pop]]
+
+      FUDi = E_a_prec[[pop]][j] * sweep(FtU,2,(s + E_a_prec[[pop]][j]/resid_Y_prec[[pop]][j]),'/')
+      means = means + FUDi %*% UtY[,j]
+      Qlam = Qlam + FUDi %*% t(FtU) 
+      #recover()
+    }
+    Qlam = Qlam + diag(Plam[j,])
+
+     #recover()
+    Llam = t(chol(Qlam))
+    vlam = forwardsolve(Llam,means)
+    mlam = backsolve(t(Llam),vlam)
+    ylam = backsolve(t(Llam),Zlams[,j])
+
+    Lambda[j,] = ylam + mlam
+
+  } 
+
+  return(Lambda)
+}
+
 sample_prec_discrete_conditional = function(Y,h2_divisions,h2_priors,invert_aI_bZAZ,res_prec) {
 	#sample factor heritibilties conditional on a given residual precision
 	#(res_precision)
@@ -201,6 +257,7 @@ sample_means = function( Y_tilde, resid_Y_prec, E_a_prec, invert_aPXA_bDesignDes
 	return(location_sample)
 }
 
+
 sample_F_a = function(F,Z_1,F_h2,invert_aZZt_Ainv) {
 	#samples genetic effects on factors (F_a) conditional on the factor scores F:
 	# F_i = F_{a_i} + E_i, E_i~N(0,s2*(1-h2)*I) for each latent trait i
@@ -241,6 +298,8 @@ sample_F_a = function(F,Z_1,F_h2,invert_aZZt_Ainv) {
 	return(F_a)
 }
 
+
+
 sample_factors_scores = function( Y_tilde, Z_1,Lambda,resid_Y_prec,F_a,F_h2 ) {
 #Sample factor scores given factor loadings (F_a), factor heritabilities (F_h2) and
 #phenotype residuals
@@ -269,6 +328,7 @@ sample_factors_scores = function( Y_tilde, Z_1,Lambda,resid_Y_prec,F_a,F_h2 ) {
 }
 
 
+
 sample_delta = function( delta,tauh,Lambda_prec,delta_1_shape,delta_1_rate,delta_2_shape,delta_2_rate,Lambda2 ) {
 	#sample delta and tauh parameters that control the magnitudes of higher
 	#index factor loadings.
@@ -336,10 +396,12 @@ update_k = function( F,Lambda,F_a,F_h2,Lambda_prec,Plam,delta,tauh,Z_W,Lambda_df
 			tauh = cumprod(delta)
 			Plam = sweep(Lambda_prec,2,tauh,'*')
 			Lambda = cbind(Lambda,rnorm(p,0,sqrt(1/Plam[,k])))
+
 			F_h2[k] = runif(1)
 			F_a = cbind(F_a,rnorm(r,0,sqrt(F_h2[k])))
 			F = cbind(F,rnorm(n,Z_W %*% F_a[,k],sqrt(1-F_h2[k])))
-		} else if(num > 0) { # drop redundant columns
+
+			} else if(num > 0) { # drop redundant columns
 			nonred = which(vec == 0) # non-redundant loadings columns
 			Lambda = Lambda[,nonred]
 			Lambda_prec = Lambda_prec[,nonred]
@@ -403,10 +465,58 @@ save_posterior_samples = function( sp_num,Posterior,Lambda,F,F_a,B,W,E_a,delta,F
 	Posterior$B   = (Posterior$B*(sp_num-1) + B)/sp_num
 	Posterior$E_a = (Posterior$E_a*(sp_num-1) + E_a)/sp_num
 	Posterior$W   = (Posterior$W*(sp_num-1) + W)/sp_num
-
 	return(Posterior)
 }
-
+
+save_posterior_samples_comb = function( sp_num,Posterior,Lambda,F,F_a,B,W,E_a,delta,F_h2,resid_Y_prec,E_a_prec,W_prec) {
+  # save posteriors. Full traces are kept of the more interesting parameters.
+  # Only the posterior means are kept of less interesting parameters. These
+  # should be correctly calculated over several re-starts of the sampler.
+
+  # first unlist F, F_a, F_h2, Y, B, E_a
+  nl = length(F_h2)
+  F = unlist(do.call("rbind", F)) # n*k
+  F_a = unlist(do.call("rbind", F_a)) #n*k
+  F_h2 = Reduce("+", F_h2)/nl # k taking average
+  B = Reduce("+",B)/nl # b*p taking average
+  E_a = unlist(do.call("rbind", E_a)) # n*k
+  W = Reduce("+",W)/nl  # k*p taking average
+  resid_Y_prec = Reduce("+",resid_Y_prec)/nl #p taking average
+  E_a_prec = Reduce("+",E_a_prec)/nl #p
+  W_prec = Reduce("+",W_prec)/nl  #p
+
+  #sp = ncol(Posterior$Lambda)
+  sp = ncol(Posterior$Lambda)
+  #size(Posterior$Lambda,2)
+  #save factor samples
+  # change the if condition to be if sp_num == 1. So we don't need to run the update k part and we can also assign new value to out posterior variables.
+  if(sp_num==1){  
+     # expand factor sample matrix if necessary
+     Posterior$Lambda = rbind(Posterior$Lambda, 	matrix(0,nr = length(Lambda)-nrow(Posterior$Lambda),nc = sp))
+     Posterior[["F"]]      = rbind(Posterior[["F"]], 	   	matrix(0,nr = length(F)     -nrow(Posterior[["F"]]),		nc = sp))
+     Posterior[["F_a"]]    = rbind(Posterior[["F_a"]], 	matrix(0,nr = length(F_a) 	-nrow(Posterior[["F_a"]]),	nc = sp))
+     Posterior[["delta"]]  = rbind(Posterior[["delta"]], 	matrix(0,nr = length(delta) -nrow(Posterior[["delta"]]),	nc = sp))
+     Posterior[["F_h2"]]   = rbind(Posterior[["F_h2"]], 	matrix(0,nr = length(F_h2) 	-nrow(Posterior[["F_h2"]]),	nc = sp))
+   }
+  Posterior$Lambda[1:length(Lambda),sp_num] = c(Lambda)
+  Posterior$F[1:length(F),sp_num]     = c(F)
+  Posterior$F_a[1:length(F_a),sp_num] = c(F_a)
+  Posterior$delta[1:length(delta),sp_num] = delta
+  Posterior$F_h2[1:length(F_h2),sp_num] = F_h2
+
+  Posterior$resid_Y_prec[,sp_num] = resid_Y_prec
+  Posterior$E_a_prec[,sp_num]     = E_a_prec
+  Posterior$W_prec[,sp_num]       = W_prec
+
+  # save B,U,W
+  Posterior$B   = (Posterior$B*(sp_num-1) + B)/sp_num
+  Posterior$E_a = (Posterior$E_a*(sp_num-1) + E_a)/sp_num
+  Posterior$W   = (Posterior$W*(sp_num-1) + W)/sp_num
+
+  return(Posterior)
+}
+
+
 save_posterior_samples_fixedlambda = function( j,Posterior,F,F_a,B,W,E_a,F_h2,resid_Y_prec,E_a_prec,W_prec) {
   # save posteriors. Full traces are kept of the more interesting parameters.
   # Only the posterior means are kept of less interesting parameters. These
@@ -430,12 +540,12 @@ save_posterior_samples_fixedlambda = function( j,Posterior,F,F_a,B,W,E_a,F_h2,re
   Posterior$E_a_prec[,j]     = E_a_prec
   Posterior$W_prec[,j]       = W_prec
 
-  # save B,U,W
+  # save U,W
 
   Posterior$E_a = (Posterior$E_a*(j-1) + E_a)/j
   Posterior$W   = (Posterior$W*(j-1) + W)/j
 
-  return(Posterior)
+  return(Posterior) 
 }
 
 
@@ -520,6 +630,54 @@ G_Matrix_Comp = function(BSFG_state){
   return(GMatrix)
 }
 
+G_Matrix_comb_Comp = function(BSFG_state){
+  Posterior      = BSFG_state$Posterior
+  traitnames     = BSFG_state$traitnames
+  run_variables  = BSFG_state$run_variables
+  sp_num = ncol(Posterior$Lambda) 
+  #n = run_variables$n
+  k = 10
+  p = run_variables$p
+  n = nrow(Posterior$F_a)/k
+  k1 = nrow(Posterior$F_a)/n;
+  k2 = nrow(Posterior$Lambda)/p;
+  if (k2 >= k1){
+    k = k1
+  }else{
+    k=k2
+  }
+  h2s = Posterior$F_h2[,1:sp_num]
+  #G_Lambdas = array(0,dim = dim(Posterior$Lambda))
+  #Lambda_est = matrix(0,p,k)
+  G_est = E_est = matrix(0,p,p)
+  #traces_G = matrix(,p*(p+1)/2,sp_num)
+  #traces_G_cor = matrix(,p*(p+1)/2,sp_num)
+  #traces_E = matrix(,p*(p+1)/2,sp_num)
+  GMatrix = NULL
+  for(j in 1:sp_num) {
+    Lj = matrix(Posterior$Lambda[,j],p,k)
+    h2j = Posterior$F_h2[,j]
+    G_Lj = Lj %*%  diag(sqrt(h2j))
+    #G_Lambdas[,j] = c(G_Lj)
+    Gj = G_Lj %*%  t(G_Lj) + diag(1/Posterior$E_a_prec[,j])
+    rownames(Gj) = traitnames
+    #posterior mean
+    GMatrix[[j]] = Gj
+    G_est = G_est + Gj/sp_num
+    #library(gdata)
+    #traces_G[,j] = lowerTriangle(Gj,diag = TRUE)
+    #traces_G_cor[,j] = lowerTriangle(CovToCor(Gj),diag = TRUE)
+
+    #E_Lj = Lj  %*% diag(1-h2j) %*%  t(Lj) + diag(1/Posterior$resid_Y_prec[,j])
+    #E_est = E_est + E_Lj/sp_num;
+    #posterior mean for lambda
+    #Lambda_est = Lambda_est + matrix(Posterior$Lambda[,j],p,k)/sp_num;
+    #traces_E[,j] = lowerTriangle(E_Lj,diag = TRUE)
+  }
+  #G_Lambda = matrix(rowMeans(G_Lambdas),p,k)
+  return(G_est)
+}
+
 G_Traces_Comp = function(BSFG_state){
   Posterior      = BSFG_state$Posterior
   traitnames     = BSFG_state$traitnames

diff --git a/R_BSFG/Comparing.R b/R_BSFG/Comparing.R
@@ -0,0 +1,143 @@
+# First method
+# Fixed lambda
+# all the BSFG G matrix
+load("~/../Desktop/G_BSFG_sc.RData")
+# 5 G matrix
+BSFG.sc5 = G_BSFG_sc[5]
+
+model_path = "~/Runcie Lab/SparseFactorMixedModel_v2/R_BSFG"
+library(Rcpp)
+library(RcppArmadillo)
+source(paste(model_path,'BSFG_functions.R',sep='/'))
+sourceCpp(paste(model_path,'BSFG_functions_c.cpp',sep='/'))
+source(paste(model_path,'fast_BSFG_sampler_init_fixedlambda.R',sep='/'))
+source(paste(model_path,'fast_BSFG_sampler_fixedlambda.R',sep='/'))
+
+run_parameters = list(
+  b0           = 1,
+  b1           = 0.0005,
+  epsilon      = 1e-2,
+  prop         = 1.00,
+  h2_divisions = 50,
+  save_freq    = 100,
+  burn = 2000,       #100
+  thin = 400,         #2
+  draw_iter = 200
+)
+# 5 BSFG_state
+setwd("~/Runcie Lab/SparseFactorMixedModel_v2/MCMC/5/5/Lambda1.5_delta2shape3")
+load("BSFG_state.RData")
+priors = BSFG_state$priors
+
+YNew="setup6.RData"
+YOld="setup5.RData"
+BSFG_state = fast_BSFG_sampler_init_fixedlambda(priors,run_parameters,YNew,YOld)
+BSFG_state = fast_BSFG_sampler_fixedlambda(BSFG_state,YNew,YOld)
+
+#load("BSFG_fixedlambda65.RData")
+BSFGOld = "BSFG_state.RData"
+BSFGNew = "BSFG_fixedlambda65.RData"
+BSFG3 = 
+# Comparisons:
+
+  load(BSFGOld)
+  spn = dim(BSFG_state$Posterior[[target]])[2]
+  old.n   = dim(BSFG_state$data_matrices$Y)[1]
+  old.k   = nrow(BSFG_state$Posterior[[target]])
+  #pos = BSFG_state$Posterior[[target]][,spn]
+  #pos = BSFG_state$Posterior[[target]]
+  old.pos.fa = BSFG_state$Posterior$F_a
+  old.pos.fa.rb = vector()
+  l=5
+  for (i in 1:spn){
+  old.pos.fa.rb = c(old.pos.fa.rb,old.pos.fa[1:(l*old.n),i])
+  }
+  #combine posterior of F_a into a big matrix
+
+  #old.pos.fa.matrix = matrix(0,nr=0,nc=l)
+  #for (i in 1:spn){
+  #  old.fa.matrix = matrix(old.pos.fa[,i],nr=old.n,nc=k)
+  #  old.pos.fa.matrix = rbind(old.pos.fa.matrix,old.fa.matrix[,1:l])
+  #}
+  #boxplot(old.pos.fa.matrix)
+  # combine a column which specify the latent factor
+
+
+  load(BSFGNew)
+  spn = dim(BSFG_state$Posterior[[target]])[2]
+  new.n   = dim(BSFG_state$data_matrices$Y)[1]
+  new.k   = nrow(BSFG_state$Posterior[[target]])
+  #pos = BSFG_state$Posterior[[target]][,spn]
+  #pos = BSFG_state$Posterior[[target]]
+  new.pos.fa = BSFG_state$Posterior$F_a
+  #combine posterior of F_a into a big matrix
+  l=5
+  new.pos.fa.rb = vector()
+  for (i in 1:spn){
+    new.pos.fa.rb = c(new.pos.fa.rb,new.pos.fa[1:(l*new.n),i])
+  }
+  #new.pos.fa.matrix = matrix(0,nr=0,nc=l)
+  #for (i in 1:spn){
+  #  new.fa.matrix = matrix(new.pos.fa[,i],nr=new.n,nc=k)
+  #  new.pos.fa.matrix = rbind(new.pos.fa.matrix,new.fa.matrix[,1:l])
+  #}
+  #boxplot(new.pos.fa.matrix)
+
+
+
+#combine old pos.fa.matrix and new pos.fa.matrix into one dataframe 
+# use ggplot to plot
+#it looks like we do not need so much latent factors. choose the first8 factors.
+  #pos.fa.matrix = cbind(rbind(old.pos.fa.matrix,new.pos.fa.matrix),rep(c("old","new"),c(old.n*spn,new.n*spn)))
+  # column "old", "new"
+  pos.fa.b = cbind(c(old.pos.fa.rb,new.pos.fa.rb),rep(c("old","new"),c(old.n*l*spn,new.n*l*spn)))
+  # column "factor"
+  pos.fa.b = cbind(pos.fa.b,c(rep(rep(sprintf("f%d",1:l),each=old.n),spn),rep(rep(sprintf("f%d",1:l),each=new.n),spn)))
+  colnames(pos.fa.b) = c("Fa","model", "factor")
+  pos.fa.b=as.data.frame(pos.fa.b)
+  #pos.fa.sample = sample(1:nrow(pos.fa.b),200*(old.n+new.n))
+  #pos.fa.sample = pos.fa.b[pos.fa.sample,]
+  #plot boxplot of distribution of Fa
+  library(ggplot2)
+  ggplot(pos.fa.b,aes(x = factor, y = as.numeric(Fa), color=model,fill=model))+
+    geom_boxplot()+
+    geom_abline(a=0,b=0,lty=2)
+  # For each trait, compare the correlation of G1 vs G2
+
+  # 
+
+
+  if (target!="F_h2"){
+    pos = matrix(pos,nr=n)
+    k   = nrow(BSFG_state$Posterior[[target]])/n
+  }  
+  #load data from new population
+  load(BSFGNew)
+  n   = dim(BSFG_state$data_matrices$Y)[1]
+  star = BSFG_state$Posterior[[target]][,spn]    
+  if (target!="F_h2"){
+    star = matrix(star,nr=n)
+    pdf(sprintf("comparing_%s_densityplot.pdf",target))
+    for(i in 1:k){
+      plot(density(pos[,i]),main = sprintf("%s %d",target,i),col = "blue",type = "l",xlab = "#obs")
+      # plot(density(F_a_pos[,i]),main = sprintf("%d",i),col = "blue",type = "l",ylim = c(min(F_a_pos)-5,max(F_a_pos)+5),xlab = "#obs")
+      lines(density(star[,i]),col="red",type = "l")
+      abline
+      legend("topright",legend = c("original","new"),col = c("blue","red"),text.col = c("blue","red"),bty = "n",pch = 1)
+    }
+    dev.off()
+  }else{
+    pdf(sprintf("comparing_%s_densityplot.pdf",target))
+    plot(density(pos),main = sprintf("%s",target),col = "blue",type = "l",xlab = "#obs")
+    # plot(density(F_a_pos[,i]),main = sprintf("%d",i),col = "blue",type = "l",ylim = c(min(F_a_pos)-5,max(F_a_pos)+5),xlab = "#obs")
+    lines(density(star),col="red",type = "l")
+    abline
+    legend("topright",legend = c("original","new"),col = c("blue","red"),text.col = c("blue","red"),bty = "n",pch = 1)
+    dev.off()
+  }
+}
+
+ComparingGMatrix_plot("F_h2",BSFGNew,BSFGOld)
+
+
+