From 9a95fd339bd4c01512fe4ec267e440c22e82e25e Mon Sep 17 00:00:00 2001
From: xin li <xxrli@ucdavis.edu>
Date: Fri, 21 Oct 2016 10:50:43 -0700
Subject: [PATCH 01/18] subtract the mean of itself

---
 R_BSFG/fast_BSFG_sampler_init_fixedlambda.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R_BSFG/fast_BSFG_sampler_init_fixedlambda.R b/R_BSFG/fast_BSFG_sampler_init_fixedlambda.R
index b22b3fc..a092137 100644
--- a/R_BSFG/fast_BSFG_sampler_init_fixedlambda.R
+++ b/R_BSFG/fast_BSFG_sampler_init_fixedlambda.R
@@ -71,7 +71,7 @@ fast_BSFG_sampler_init_fixedlambda = function(priors,run_parameters,YNew,YOld){
     load(paste0("../",YOld))
     Y_old = setup$Y
 
-    Mean_Y  = colMeans(Y_old,na.rm=T)
+    Mean_Y  = colMeans(Y,na.rm=T) # subtract the mean of newY if we only care about the variance of F_a
     VY      = apply(Y_old,2,var,na.rm=T)
  
   

From 8568f75a0f5b6b4ad0f49f959ed7015b460919f1 Mon Sep 17 00:00:00 2001
From: xin li <xxrli@ucdavis.edu>
Date: Fri, 21 Oct 2016 10:53:51 -0700
Subject: [PATCH 02/18] do not fix B matrix

---
 R_BSFG/BSFG_functions.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R_BSFG/BSFG_functions.R b/R_BSFG/BSFG_functions.R
index 41d430a..59f8560 100644
--- a/R_BSFG/BSFG_functions.R
+++ b/R_BSFG/BSFG_functions.R
@@ -430,7 +430,7 @@ save_posterior_samples_fixedlambda = function( j,Posterior,F,F_a,B,W,E_a,F_h2,re
   Posterior$E_a_prec[,j]     = E_a_prec
   Posterior$W_prec[,j]       = W_prec
   
-  # save B,U,W
+  # save U,W
 
   Posterior$E_a = (Posterior$E_a*(j-1) + E_a)/j
   Posterior$W   = (Posterior$W*(j-1) + W)/j

From 07eee42526091a87ab9cec26c4e51675a8d28c4f Mon Sep 17 00:00:00 2001
From: xin li <xxrli@ucdavis.edu>
Date: Fri, 21 Oct 2016 10:54:33 -0700
Subject: [PATCH 03/18] adding options for saving G_est

---
 R_BSFG/plotting_diagnostics.R | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/R_BSFG/plotting_diagnostics.R b/R_BSFG/plotting_diagnostics.R
index e2f6cf2..a8344f1 100644
--- a/R_BSFG/plotting_diagnostics.R
+++ b/R_BSFG/plotting_diagnostics.R
@@ -238,7 +238,7 @@ draw_results_diagnostics = function(sp_num,params,run_variables,Lambda, F_h2, Po
         }
         G_Lambda = matrix(rowMeans(G_Lambdas),p,k)
         #dev.off()
-
+        save(G_est,file = "G_est.RData")
         # Figure of posterior mean estimates
         #dev.set(devices[4])
         #pdf('Figure of posterior mean estimates.pdf')
@@ -380,3 +380,4 @@ ComparingGMatrix_plot = function(target){
   }
 }
 
+

From 2965af18d12eb076dcb3d2a51ee3e9c6f9f4a22b Mon Sep 17 00:00:00 2001
From: xin li <xxrli@ucdavis.edu>
Date: Fri, 21 Oct 2016 10:55:22 -0700
Subject: [PATCH 04/18] changes of ways to plots

---
 R_BSFG/plottting_comparing.R | 93 +++++++++++++++++++++++++++---------
 1 file changed, 70 insertions(+), 23 deletions(-)

diff --git a/R_BSFG/plottting_comparing.R b/R_BSFG/plottting_comparing.R
index e4ac4f6..15ab005 100644
--- a/R_BSFG/plottting_comparing.R
+++ b/R_BSFG/plottting_comparing.R
@@ -5,11 +5,12 @@ BSFGStar = "BSFG_fixedlambda75.RData"
 target = "F_h2"
 #it looks like we do not need so much latent factors. choose the first l factors.
 #choose the first l latent traits and make them a vector for all iterations 
-l=5  # 2:6
+l=5 # 2:6
 pos_fa = function(spn,pos.fa,n){
   pos.fa.rb = vector()
   for (i in 1:spn){
-    pos.fa.rb = c(pos.fa.rb,pos.fa[1:(l*n),i])
+    #pos.fa.rb = c(pos.fa.rb,pos.fa[1:(l*n),i])
+    pos.fa.rb = c(pos.fa.rb,pos.fa[1:l,i]) #F_h2
   }
   return(pos.fa.rb)}
 # old
@@ -17,53 +18,69 @@ load(BSFGOld)
 spn = dim(BSFG_state$Posterior[[target]])[2]
 old.n   = dim(BSFG_state$data_matrices$Y)[1]
 old.k   = nrow(BSFG_state$Posterior[[target]])
-old.pos.fa = BSFG_state$Posterior$F_a
+#old.pos.fa = BSFG_state$Posterior$F_a
+old.pos.fh2 = BSFG_state$Posterior$F_h2
 
-old.pos.fa.rb = pos_fa(spn,old.pos.fa,old.n)
+#old.pos.fa.rb = pos_fa(spn,old.pos.fa,old.n)
+old.pos.fh2.rb = pos_fa(spn,old.pos.fh2,old.n)
 # new
 load(BSFGNew)
 spn = dim(BSFG_state$Posterior[[target]])[2]
 new.n   = dim(BSFG_state$data_matrices$Y)[1]
 new.k   = nrow(BSFG_state$Posterior[[target]])
-new.pos.fa = BSFG_state$Posterior$F_a
-new.pos.fa.rb = pos_fa(spn,new.pos.fa,new.n)
+#new.pos.fa = BSFG_state$Posterior$F_a
+new.pos.fh2 = BSFG_state$Posterior$F_h2
+#new.pos.fa.rb = pos_fa(spn,new.pos.fa,new.n)
+new.pos.fh2.rb = pos_fa(spn,new.pos.fh2,new.n)
 # star : represent the third comparison
 load(BSFGStar)
 spn = dim(BSFG_state$Posterior[[target]])[2]
 star.n   = dim(BSFG_state$data_matrices$Y)[1]
 star.k   = nrow(BSFG_state$Posterior[[target]])
-star.pos.fa = BSFG_state$Posterior$F_a
-star.pos.fa.rb = pos_fa(spn,star.pos.fa,star.n)
+#star.pos.fa = BSFG_state$Posterior$F_a
+star.pos.fh2 = BSFG_state$Posterior$F_h2
+#star.pos.fa.rb = pos_fa(spn,star.pos.fa,star.n)
+star.pos.fh2.rb = pos_fa(spn,star.pos.fh2,star.n)
 # combine
 # column "old", "new", "star"
-pos.fa.b = cbind(c(old.pos.fa.rb,new.pos.fa.rb,star.pos.fa.rb),rep(c("old","new","star"),c(old.n*l*spn,new.n*l*spn,star.n*l*spn)))
+#pos.fa.b = cbind(c(old.pos.fa.rb,new.pos.fa.rb,star.pos.fa.rb),rep(c("old","new","star"),c(old.n*l*spn,new.n*l*spn,star.n*l*spn)))
+pos.fh2.b = data.frame(c(old.pos.fh2.rb,new.pos.fh2.rb,star.pos.fh2.rb),rep(c("old","new","star"),c(l*spn,l*spn,l*spn)))
 # column "factor"
-pos.fa.b = cbind(pos.fa.b,c(rep(rep(sprintf("f%d",1:l),each=old.n),spn),rep(rep(sprintf("f%d",1:l),each=new.n),spn),rep(rep(sprintf("f%d",1:l),each=star.n),spn)))
-colnames(pos.fa.b) = c("Fa","model", "factor")
-pos.fa.b=as.data.frame(pos.fa.b)
+#pos.fa.b = data.frame(pos.fa.b,c(rep(rep(sprintf("f%d",1:l),each=old.n),spn),rep(rep(sprintf("f%d",1:l),each=new.n),spn),rep(rep(sprintf("f%d",1:l),each=star.n),spn)))
+pos.fh2.b = data.frame(pos.fh2.b,c(rep(sprintf("f%d",1:l),spn),rep(sprintf("f%d",1:l),spn),rep(sprintf("f%d",1:l),spn)))
+#colnames(pos.fa.b) = c("Fa","model", "factor")
+colnames(pos.fh2.b) = c("Fh2","model", "factor")
+#pos.fa.b=as.data.frame(pos.fa.b)
 #save(pos.fa.b,file="pos_fa_bind.RData")
-load("pos_fa_bind.RData")
+save(pos.fh2.b,file="pos_fh2_bind.RData")
+#load("pos_fa_bind.RData")
+load("pos_fh2_bind.RData")
+
 # plot
 # 1.boxplot of distribution of Fa
 library(ggplot2)
-ggplot(pos.fa.b, aes(x = factor, y = as.numeric(Fa), color=model,fill=model))+
-  geom_boxplot()
-  #geom_tile("The distribution of Latent traits")
+ggplot(pos.fh2.b, aes(x = factor, y = Fh2, color=model,fill=model))+
+  geom_boxplot()+
+  ggtitle("The distribution of Latent traits")
   #geom_abline(intercept=0,slope=0,lty=2)
 
-ggplot(pos.fa.b)+
-  geom_density(aes(x = factor, y = as.numeric(Fa), color=model))
-  #geom_tile("The distribution of Latent traits")
+ggplot(pos.fh2.b,aes( x = Fh2, color=model))+
+  geom_density()+
+  facet_wrap(~factor,scales = "free_y")+
+  ggtitle("The distribution of Latent traits")
+
 # 2.For each trait, compare the correlation of G1 vs G2
+source('~/Runcie Lab/SparseFactorMixedModel_v2/R_BSFG/BSFG_functions.R')
 load(BSFGOld)
-old.traces.G = G_Traces_Comp(BSFG_state)
+#old.traces.G = G_Traces_Comp(BSFG_state)
 old.G = G_Matrix_Comp(BSFG_state)
 p = BSFG_state$run_variables$p
 # do this in case the value of k differ for this two models and there in no lambda in BSFG_state_fixed case
 Lambda = BSFG_state$Posterior$Lambda
 load(BSFGNew)
+load(BSFGStar)
 BSFG_state$Posterior$Lambda = Lambda
-new.traces.G = G_Traces_Comp(BSFG_state)
+#new.traces.G = G_Traces_Comp(BSFG_state)
 new.G = G_Matrix_Comp(BSFG_state)
 #For all the elements in G
 #pairs(t(old.traces.G)~t(new.traces.G))
@@ -72,6 +89,10 @@ old.G = array(unlist(old.G),dim = c(nrow(old.G[[1]]),ncol(old.G[[1]]),length(old
 new.G = array(unlist(new.G),dim = c(nrow(new.G[[1]]),ncol(new.G[[1]]),length(new.G)))
 old.G.b = apply(old.G,2,rbind)
 new.G.b = apply(new.G,2,rbind)
+save(old.G.b,file = "old_G_b.RData")
+load("old_G_b.RData")
+save(new.G.b,file = "new_G_b.RData")
+load("new_G_b.RData")
 #plot(old.G.b[,2],new.G.b[,2])
 #combine old.G and new.G as a dataframe
 #G.b = c(old.G.b[,2],new.G.b[,2])
@@ -79,6 +100,7 @@ new.G.b = apply(new.G,2,rbind)
 #G.b = as.data.frame(G.b)
 #colnames(G.b) = c("covariance","model")
 # 1st triat
+
 each_trait_G_pair = function(i){
 G.b = cbind(old.G.b[,i],new.G.b[,i])
 G.b = as.data.frame(G.b)
@@ -87,11 +109,36 @@ library(ggplot2)
 ggplot(G.b,aes(y = as.numeric(Old),x=as.numeric(New)))+
   #geom_dotplot(binwidth = 15)
   geom_point()+
-  geom_abline(intercept = 0,slope = 1)
+  geom_abline(intercept = 0,slope = 1)+
+  ggtitle(paste0(BSFG_state$traitnames[i]))
 }
+pdf("each_trait_G_pair.pdf")
+#par(mfrow = c(3,3))
+for (i in 1:p){
+  each_trait_G_pair(i)
+}
+dev.off()
 each_trait_G_pair(1)
 each_trait_G_pair(2)
 each_trait_G_pair(3)
+each_trait_G_pair(4)
+each_trait_G_pair(5)
 # 3.delta z
 # The distance of two matrix(in predicted phenotype value/level)
-
+#selection gradient
+beta = 0.3609
+beta_v = c(0,beta,rep(0,16))
+traces_traits = matrix(,p,sp_num)
+for(j in 1:sp_num) {
+  Lj = matrix(Posterior$Lambda[,j],p,k)
+  h2j = Posterior$F_h2[,j]
+  G_Lj = Lj %*%  diag(sqrt(h2j))
+  Gj = G_Lj %*%  t(G_Lj) + diag(1/Posterior$E_a_prec[,j])
+  Gj = sqrt(diag(VY))%*%Gj%*%t(sqrt(diag(VY)))
+  traces_traits[,j] = Gj%*%beta_v
+}
+boxplot(t(traces_traits))
+save(traces_traits,file = 'traces_traits.RData')
+# For each G matrix
+setwd("~/Runcie Lab/SparseFactorMixedModel_v2/MCMC/19/19/Lambda1.5_delta2shape3")
+load("G_BSFG_sc.RData")

From e22f2719de9e7c2596b3f822f81f0d8515f6faa1 Mon Sep 17 00:00:00 2001
From: xin li <xxrli@ucdavis.edu>
Date: Fri, 9 Dec 2016 14:42:08 -0800
Subject: [PATCH 05/18] newest version update

---
 R_BSFG/BSFG_functions.R               |  15 +-
 R_BSFG/Comparing.R                    | 143 ++++++++
 R_BSFG/G_matrix.R                     | 114 ++++++
 R_BSFG/G_matrix_output.R              |   2 +-
 R_BSFG/Sim_2/model_setup_generation.R |  84 +++++
 R_BSFG/code_comparing_plotting4.R     | 277 +++++++++++++++
 R_BSFG/deltaZ_posterior_mean.R        | 147 ++++++++
 R_BSFG/factor_3Dplot.R                |  49 ++-
 R_BSFG/fast_BSFG_sampler_current.R    |  22 +-
 R_BSFG/model_setup.R                  |  43 ++-
 R_BSFG/model_setup_fixedlambda.R      |  39 ++
 R_BSFG/model_setup_generation.R       |  70 ++++
 R_BSFG/plottting_comparing.R          | 489 +++++++++++++++++++++++---
 R_BSFG/setup_pedigree.R               |   6 +-
 14 files changed, 1400 insertions(+), 100 deletions(-)
 create mode 100644 R_BSFG/Comparing.R
 create mode 100644 R_BSFG/G_matrix.R
 create mode 100644 R_BSFG/Sim_2/model_setup_generation.R
 create mode 100644 R_BSFG/code_comparing_plotting4.R
 create mode 100644 R_BSFG/deltaZ_posterior_mean.R
 create mode 100644 R_BSFG/model_setup_fixedlambda.R
 create mode 100644 R_BSFG/model_setup_generation.R

diff --git a/R_BSFG/BSFG_functions.R b/R_BSFG/BSFG_functions.R
index 59f8560..ff340d0 100644
--- a/R_BSFG/BSFG_functions.R
+++ b/R_BSFG/BSFG_functions.R
@@ -43,9 +43,14 @@ sample_Lambda = function( Y_tilde,F,resid_Y_prec, E_a_prec,Plam,invert_aI_bZAZ )
 	Lambda = matrix(0,nr = p,nc = k)
 
 	for(j in 1:p) {
-		FUDi = E_a_prec[j] * sweep(FtU,2,(s + E_a_prec[j]/resid_Y_prec[j]),'/')
-		means = FUDi %*% UtY[,j]
-		Qlam = FUDi %*% t(FtU) + diag(Plam[j,])
+	  means = c()
+	  Qlam = c()
+	  for(pop in pops){
+		  FUDi = E_a_prec[[pop]][j] * sweep(FtU[[pop]],2,(s[[pop]] + E_a_prec[[pop]][j]/resid_Y_prec[j]),'/')
+		  means = means + FUDi %*% UtY[[pop]][,j]
+		  Qlam = Qlam + FUDi %*% t(FtU[[pop]]) 
+		}
+	  Qlam = Qlam + diag(Plam[j,])
 
 		# recover()
 		Llam = t(chol(Qlam))
@@ -336,10 +341,12 @@ update_k = function( F,Lambda,F_a,F_h2,Lambda_prec,Plam,delta,tauh,Z_W,Lambda_df
 			tauh = cumprod(delta)
 			Plam = sweep(Lambda_prec,2,tauh,'*')
 			Lambda = cbind(Lambda,rnorm(p,0,sqrt(1/Plam[,k])))
+			
 			F_h2[k] = runif(1)
 			F_a = cbind(F_a,rnorm(r,0,sqrt(F_h2[k])))
 			F = cbind(F,rnorm(n,Z_W %*% F_a[,k],sqrt(1-F_h2[k])))
-		} else if(num > 0) { # drop redundant columns
+		
+			} else if(num > 0) { # drop redundant columns
 			nonred = which(vec == 0) # non-redundant loadings columns
 			Lambda = Lambda[,nonred]
 			Lambda_prec = Lambda_prec[,nonred]
diff --git a/R_BSFG/Comparing.R b/R_BSFG/Comparing.R
new file mode 100644
index 0000000..895ab8e
--- /dev/null
+++ b/R_BSFG/Comparing.R
@@ -0,0 +1,143 @@
+# First method
+# Fixed lambda
+# all the BSFG G matrix
+load("~/../Desktop/G_BSFG_sc.RData")
+# 5 G matrix
+BSFG.sc5 = G_BSFG_sc[5]
+
+model_path = "~/Runcie Lab/SparseFactorMixedModel_v2/R_BSFG"
+library(Rcpp)
+library(RcppArmadillo)
+source(paste(model_path,'BSFG_functions.R',sep='/'))
+sourceCpp(paste(model_path,'BSFG_functions_c.cpp',sep='/'))
+source(paste(model_path,'fast_BSFG_sampler_init_fixedlambda.R',sep='/'))
+source(paste(model_path,'fast_BSFG_sampler_fixedlambda.R',sep='/'))
+
+run_parameters = list(
+  b0           = 1,
+  b1           = 0.0005,
+  epsilon      = 1e-2,
+  prop         = 1.00,
+  h2_divisions = 50,
+  save_freq    = 100,
+  burn = 2000,       #100
+  thin = 400,         #2
+  draw_iter = 200
+)
+# 5 BSFG_state
+setwd("~/Runcie Lab/SparseFactorMixedModel_v2/MCMC/5/5/Lambda1.5_delta2shape3")
+load("BSFG_state.RData")
+priors = BSFG_state$priors
+
+YNew="setup6.RData"
+YOld="setup5.RData"
+BSFG_state = fast_BSFG_sampler_init_fixedlambda(priors,run_parameters,YNew,YOld)
+BSFG_state = fast_BSFG_sampler_fixedlambda(BSFG_state,YNew,YOld)
+
+#load("BSFG_fixedlambda65.RData")
+BSFGOld = "BSFG_state.RData"
+BSFGNew = "BSFG_fixedlambda65.RData"
+BSFG3 = 
+# Comparisons:
+
+  load(BSFGOld)
+  spn = dim(BSFG_state$Posterior[[target]])[2]
+  old.n   = dim(BSFG_state$data_matrices$Y)[1]
+  old.k   = nrow(BSFG_state$Posterior[[target]])
+  #pos = BSFG_state$Posterior[[target]][,spn]
+  #pos = BSFG_state$Posterior[[target]]
+  old.pos.fa = BSFG_state$Posterior$F_a
+  old.pos.fa.rb = vector()
+  l=5
+  for (i in 1:spn){
+  old.pos.fa.rb = c(old.pos.fa.rb,old.pos.fa[1:(l*old.n),i])
+  }
+  #combine posterior of F_a into a big matrix
+
+  #old.pos.fa.matrix = matrix(0,nr=0,nc=l)
+  #for (i in 1:spn){
+  #  old.fa.matrix = matrix(old.pos.fa[,i],nr=old.n,nc=k)
+  #  old.pos.fa.matrix = rbind(old.pos.fa.matrix,old.fa.matrix[,1:l])
+  #}
+  #boxplot(old.pos.fa.matrix)
+  # combine a column which specify the latent factor
+  
+  
+  load(BSFGNew)
+  spn = dim(BSFG_state$Posterior[[target]])[2]
+  new.n   = dim(BSFG_state$data_matrices$Y)[1]
+  new.k   = nrow(BSFG_state$Posterior[[target]])
+  #pos = BSFG_state$Posterior[[target]][,spn]
+  #pos = BSFG_state$Posterior[[target]]
+  new.pos.fa = BSFG_state$Posterior$F_a
+  #combine posterior of F_a into a big matrix
+  l=5
+  new.pos.fa.rb = vector()
+  for (i in 1:spn){
+    new.pos.fa.rb = c(new.pos.fa.rb,new.pos.fa[1:(l*new.n),i])
+  }
+  #new.pos.fa.matrix = matrix(0,nr=0,nc=l)
+  #for (i in 1:spn){
+  #  new.fa.matrix = matrix(new.pos.fa[,i],nr=new.n,nc=k)
+  #  new.pos.fa.matrix = rbind(new.pos.fa.matrix,new.fa.matrix[,1:l])
+  #}
+  #boxplot(new.pos.fa.matrix)
+  
+ 
+  
+#combine old pos.fa.matrix and new pos.fa.matrix into one dataframe 
+# use ggplot to plot
+#it looks like we do not need so much latent factors. choose the first8 factors.
+  #pos.fa.matrix = cbind(rbind(old.pos.fa.matrix,new.pos.fa.matrix),rep(c("old","new"),c(old.n*spn,new.n*spn)))
+  # column "old", "new"
+  pos.fa.b = cbind(c(old.pos.fa.rb,new.pos.fa.rb),rep(c("old","new"),c(old.n*l*spn,new.n*l*spn)))
+  # column "factor"
+  pos.fa.b = cbind(pos.fa.b,c(rep(rep(sprintf("f%d",1:l),each=old.n),spn),rep(rep(sprintf("f%d",1:l),each=new.n),spn)))
+  colnames(pos.fa.b) = c("Fa","model", "factor")
+  pos.fa.b=as.data.frame(pos.fa.b)
+  #pos.fa.sample = sample(1:nrow(pos.fa.b),200*(old.n+new.n))
+  #pos.fa.sample = pos.fa.b[pos.fa.sample,]
+  #plot boxplot of distribution of Fa
+  library(ggplot2)
+  ggplot(pos.fa.b,aes(x = factor, y = as.numeric(Fa), color=model,fill=model))+
+    geom_boxplot()+
+    geom_abline(a=0,b=0,lty=2)
+  # For each trait, compare the correlation of G1 vs G2
+  
+  # 
+  
+  
+  if (target!="F_h2"){
+    pos = matrix(pos,nr=n)
+    k   = nrow(BSFG_state$Posterior[[target]])/n
+  }  
+  #load data from new population
+  load(BSFGNew)
+  n   = dim(BSFG_state$data_matrices$Y)[1]
+  star = BSFG_state$Posterior[[target]][,spn]    
+  if (target!="F_h2"){
+    star = matrix(star,nr=n)
+    pdf(sprintf("comparing_%s_densityplot.pdf",target))
+    for(i in 1:k){
+      plot(density(pos[,i]),main = sprintf("%s %d",target,i),col = "blue",type = "l",xlab = "#obs")
+      # plot(density(F_a_pos[,i]),main = sprintf("%d",i),col = "blue",type = "l",ylim = c(min(F_a_pos)-5,max(F_a_pos)+5),xlab = "#obs")
+      lines(density(star[,i]),col="red",type = "l")
+      abline
+      legend("topright",legend = c("original","new"),col = c("blue","red"),text.col = c("blue","red"),bty = "n",pch = 1)
+    }
+    dev.off()
+  }else{
+    pdf(sprintf("comparing_%s_densityplot.pdf",target))
+    plot(density(pos),main = sprintf("%s",target),col = "blue",type = "l",xlab = "#obs")
+    # plot(density(F_a_pos[,i]),main = sprintf("%d",i),col = "blue",type = "l",ylim = c(min(F_a_pos)-5,max(F_a_pos)+5),xlab = "#obs")
+    lines(density(star),col="red",type = "l")
+    abline
+    legend("topright",legend = c("original","new"),col = c("blue","red"),text.col = c("blue","red"),bty = "n",pch = 1)
+    dev.off()
+  }
+}
+
+ComparingGMatrix_plot("F_h2",BSFGNew,BSFGOld)
+
+
+
diff --git a/R_BSFG/G_matrix.R b/R_BSFG/G_matrix.R
new file mode 100644
index 0000000..4b5756c
--- /dev/null
+++ b/R_BSFG/G_matrix.R
@@ -0,0 +1,114 @@
+CovToCor = function(X){
+  # Normalize a covariance matrix into a correlation matrix
+  corX=diag(1/sqrt(diag(X))) %*% X %*% diag(1/sqrt(diag(X)));
+  return(corX)
+}
+
+run_variables  = BSFG_state$run_variables
+Posterior      = BSFG_state$Posterior
+E_a_prec       = BSFG_state$current_state$E_a_prec
+resid_Y_prec   = BSFG_state$current_state$resid_Y_prec
+traitnames     = BSFG_state$traitnames
+
+sp_num = ncol(BSFG_state$Posterior$Lambda)   
+p = run_variables$p
+k = nrow(Posterior$Lambda)/p;
+
+G_Lambdas = array(0,dim = dim(Posterior$Lambda))
+Lambda_est = matrix(0,p,k)
+G_est = E_est = matrix(0,p,p)
+traces_G = matrix(,p*(p+1)/2,sp_num)
+diag_G = matrix(,p,sp_num)
+traces_G_cor = matrix(,p*(p+1)/2,sp_num)
+traces_E = matrix(,p*(p+1)/2,sp_num)
+diag_E = matrix(,p,sp_num)
+
+
+
+for(j in 1:sp_num) {
+  Lj = matrix(Posterior$Lambda[,j],p,k)
+  h2j = Posterior$F_h2[,j]
+  G_Lj = Lj %*%  diag(sqrt(h2j))
+  G_Lambdas[,j] = c(G_Lj)
+  Gj = G_Lj %*%  t(G_Lj) + diag(1/Posterior$E_a_prec[,j])
+  G_est = G_est + Gj/sp_num
+  traces_traits[,j] = Gj%*%beta_v
+  library(gdata)
+  traces_G[,j] = lowerTriangle(Gj,diag = TRUE)
+  diag_G[,j] = diag(Gj)
+  traces_G_cor[,j] = lowerTriangle(CovToCor(Gj),diag = TRUE)
+  
+  E_Lj = Lj  %*% diag(1-h2j) %*%  t(Lj) + diag(1/Posterior$resid_Y_prec[,j])
+  E_est = E_est + E_Lj/sp_num;
+  Lambda_est = Lambda_est + matrix(Posterior$Lambda[,j],p,k)/sp_num;
+  traces_E[,j] = lowerTriangle(E_Lj,diag = TRUE)
+  diag_E[,j] = diag(E_Lj)
+}
+G_Lambda = matrix(rowMeans(G_Lambdas),p,k)
+
+
+
+load("C:/Users/Xin~/Desktop/setup_LineCode1.RData")
+Y = setup$Y
+VY = apply(Y,2,var,na.rm=T)
+
+#selection gradient
+beta = 0.3609
+beta_v = c(0,beta,rep(0,16))
+traces_traits = matrix(,p,sp_num)
+for(j in 1:sp_num) {
+  Lj = matrix(Posterior$Lambda[,j],p,k)
+  h2j = Posterior$F_h2[,j]
+  G_Lj = Lj %*%  diag(sqrt(h2j))
+  Gj = G_Lj %*%  t(G_Lj) + diag(1/Posterior$E_a_prec[,j])
+  Gj = sqrt(diag(VY))%*%Gj%*%t(sqrt(diag(VY)))
+  traces_traits[,j] = Gj%*%beta_v
+}
+boxplot(t(traces_traits))
+save(traces_traits,file = 'traces_traits.RData')
+
+#heritabiity of posterior mean
+G_est1 = sqrt(diag(VY))%*%G_est%*%t(sqrt(diag(VY)))
+save(G_est1,file = "G_est_posterior.RData")
+
+E_est1 = sqrt(diag(VY))%*%E_est%*%t(sqrt(diag(VY)))
+heritability = diag(G_est1)/diag(G_est1+E_est1)
+names(heritability) = traitnames
+plot(heritability)
+
+#heritability confidence interval 95%  
+diag_E1 = sweep(diag_E,1,VY,'*')
+diag_G1 = sweep(diag_G,1,VY,'*')
+P = diag_G1+diag_E1
+trace_h2 = matrix(mapply(function(x,y)x/y, diag_G1,P),nr=p)
+
+CI_h2 = t(apply(trace_h2,1,function(x)quantile(x,c(0.025,0.975))))
+q_h2 = cbind(CI_h2,heritability)
+colnames(q_h2)[3] = "posterior"
+save(q_h2, file = "q_h2.RData")
+
+#G_matrix confidence interval 95%
+CI_G = t(apply(traces_G,1,function(x)quantile(x,c(0.025,0.975))))
+q_G = cbind(CI_G,lowerTriangle(G_est,diag = TRUE))
+colnames(q_G)[3] = "posterior"
+
+#2.5%
+G_low = matrix(,nr=p,nc=p)
+G_low[lower.tri(G_low,diag=TRUE)]=q_G[,1]
+G_low[upper.tri(G_low)]=upperTriangle(t(G_low))
+G_low1 = sqrt(diag(VY))%*%G_low%*%t(sqrt(diag(VY)))
+save(G_low1,file = "G_est_low.RData")
+#97.5%
+G_high = matrix(,nr=p,nc=p)
+G_high[lower.tri(G_high,diag=TRUE)]=q_G[,2]
+G_high[upper.tri(G_high)]=upperTriangle(t(G_high))
+G_high1 = sqrt(diag(VY))%*%G_high%*%t(sqrt(diag(VY)))
+save(G_high1,file = "G_est_high.RData")
+
+library(lattice)
+levelplot(CovToCor(G_est1))
+
+load("C:/Users/Xin~/Desktop/Gmatrix_MCMCglmm.r")
+library(gdata)
+plot(upperTriangle(G_est1),upperTriangle(G),xlab = "BSFG",ylab = "MCMCglm")
+lines(x = c(-2,100), y = c(-2,100))
diff --git a/R_BSFG/G_matrix_output.R b/R_BSFG/G_matrix_output.R
index 8c9be4d..71aef1e 100644
--- a/R_BSFG/G_matrix_output.R
+++ b/R_BSFG/G_matrix_output.R
@@ -21,7 +21,7 @@ setwd(sprintf('~/Runcie Lab/SparseFactorMixedModel_v2/MCMC/%d/%d',i,i))
 setwd("Lambda1.5_delta2shape3")
 load("BSFG_state.RData")
 # Scaled G Matrix
-G_BSFG_sc[[i]] = G_Matrix_Comp(BSFG_state)
+G_BSFG_sc[i] = G_Matrix_Comp(BSFG_state)
 G_est_all = G_BSFG_sc[[i]]
 sp_num = length(G_est_all)
 p = BSFG_state$run_variables$P
diff --git a/R_BSFG/Sim_2/model_setup_generation.R b/R_BSFG/Sim_2/model_setup_generation.R
new file mode 100644
index 0000000..cd739a8
--- /dev/null
+++ b/R_BSFG/Sim_2/model_setup_generation.R
@@ -0,0 +1,84 @@
+setwd("~/Runcie Lab/pedigree")
+
+data=read.csv('IDDammSirelnUpdate2.csv',header = TRUE)
+source("~/Runcie Lab/SparseFactorMixedModel_v2/R_BSFG/setup_pedigree.R")
+setup=setup_pedigree_GeneLine(data,GenerationCode=c(0,1),LineCode=1)
+
+
+setwd("~/Runcie Lab/SparseFactorMixedModel_v2/R_BSFG")
+model_path = "~/Runcie Lab/SparseFactorMixedModel_v2/R_BSFG"
+library(Rcpp)
+library(RcppArmadillo)
+source(paste(model_path,'fast_BSFG_sampler_init.R',sep='/'))
+#source(paste(model_path,'fast_BSFG_sampler_current.R',sep='/'))
+source(paste(model_path,'BSFG_functions.R',sep='/'))
+source(paste(model_path,'plotting_diagnostics.R',sep='/'))
+#source(paste(model_path,'setup_pedigree.R',sep='/'))
+sourceCpp(paste(model_path,'BSFG_functions_c.cpp',sep='/'))
+#source(paste(model_path,'fast_BSFG_sampler_init_fixedlambda.R',sep='/'))
+#source(paste(model_path,'fast_BSFG_sampler_fixedlambda.R',sep='/'))
+source(paste(model_path,'fast_BSFG_sampler_generation.R',sep='/'))
+
+setwd('Sim_3')
+
+folder = "generation0"
+folder = "Line1Gene1"
+folder = "Line1Gene01"
+try(dir.create(folder))
+setwd(folder)
+
+# initialize priors
+run_parameters = list(
+  b0           = 1,
+  b1           = 0.0005,
+  epsilon      = 1e-2,
+  prop         = 1.00,
+  h2_divisions = 50,
+  save_freq    = 100,
+  burn = 2000,       #100
+  thin = 400,         #2
+  draw_iter = 200
+)
+
+#load("./priors.RData")
+priors = list(
+  k_init              =   20,
+  resid_Y_prec_shape  =   2,
+  resid_Y_prec_rate   =   1/10,
+  E_a_prec_shape      =   2,
+  E_a_prec_rate       =   1/10,
+  W_prec_shape        =   2,
+  W_prec_rate         =   1/10,
+  Lambda_df           =   1.5,         # 3/2, 3, 1
+  delta_1_shape       =   2.1,
+  delta_1_rate        =   1/20,
+  delta_2_shape       =   3,           # 3, 3/2, 9/2
+  delta_2_rate        =   1,
+  h2_priors_factors   =   c(run_parameters$h2_divisions-1,rep(1,run_parameters$h2_divisions-1))/(2*(run_parameters$h2_divisions-1))
+)
+
+print('Initializing')
+save(priors,file = 'Priors.RData')
+
+# Initialize Chain, prep runs
+BSFG_state = fast_BSFG_sampler_init(priors,run_parameters)
+save(BSFG_state,file="BSFG_state.RData")
+
+n_samples = 4000;
+for(i  in 1:40) {
+  print(sprintf('Run %d',i))
+  BSFG_state = fast_BSFG_sampler_generation(BSFG_state,n_samples)
+  print(i)
+} 
+#filenames = list.files("~/Runcie Lab/pedigree",pattern="*.RData")
+#setwd("~/Runcie Lab/pedigree")
+#allfiles = lapply(filenames,function(x){get(load(x))})
+#names(allfiles) = sapply(filenames ,function(x)substr(x,1,(nchar(x)-6)))
+
+for (i in 1:4){
+  load(filenames[i])
+}
+
+a = load(filenames[1])
+  A=  get(a)
+  
\ No newline at end of file
diff --git a/R_BSFG/code_comparing_plotting4.R b/R_BSFG/code_comparing_plotting4.R
new file mode 100644
index 0000000..b64c288
--- /dev/null
+++ b/R_BSFG/code_comparing_plotting4.R
@@ -0,0 +1,277 @@
+source('~/Runcie Lab/SparseFactorMixedModel_v2/R_BSFG/BSFG_functions.R')
+setwd("~/Runcie Lab/SparseFactorMixedModel_v2/MCMC")
+MCMC = readRDS("G_MCMC.Rdata")
+
+traitnames = rownames(MCMC[[1]][[1]])
+
+old.G = MCMC[[5]]
+
+p = 18
+spn = 2000
+
+new.G = MCMC[[1]]
+
+star.G = MCMC[[8]]
+
+four.G = MCMC[[12]]
+
+# The distance of two matrix(in predicted phenotype value/level)
+#selection gradient
+beta = 0.3609
+beta.v = c(0,beta,rep(0,16))
+traces.traits = matrix(,p,spn)
+
+#old.G in list form
+old.traces.traits = sapply(old.G,function(x)x%*%beta.v)
+#old.traits.posmean = apply(old.traces.traits,1,mean)
+new.traces.traits = sapply(new.G,function(x)x%*%beta.v)
+#new.traits.posmean = apply(new.traces.traits,1,mean)
+star.traces.traits = sapply(star.G,function(x)x%*%beta.v)
+four.traces.traits = sapply(four.G,function(x)x%*%beta.v)
+#star.traits.posmean = apply(star.traces.traits,1,mean)
+#boxplot(old.traces_traits[2,])
+#boxplot(t(old.traces.traits))
+#boxplot(new.traces_traits[2,])
+#boxplot(t(new.traces.traits))
+# use ggplot 
+traces.traits.df = data.frame(c(c(old.traces.traits),c(new.traces.traits),c(star.traces.traits),c(four.traces.traits)))
+traces.traits.df$model = rep(c("old","new","star","four"),c(p*spn,p*spn,p*spn,p*spn))
+
+traces.traits.df$model = factor(traces.traits.df$model, level=c("old","new","star","four"))
+#traces.traits.df$model = factor(traces.traits.df$model, level=c("star","new","old","four"))
+
+traces.traits.df$traits = rep(traitnames,4*spn)
+names(traces.traits.df) = c("response","model","traits")
+
+summarySE <- function(data=NULL, measurevar, groupvars=NULL, na.rm=FALSE,
+                      conf.interval=.95, .drop=TRUE) {
+  library(plyr)
+  
+  # New version of length which can handle NA's: if na.rm==T, don't count them
+  length2 <- function (x, na.rm=FALSE) {
+    if (na.rm) sum(!is.na(x))
+    else       length(x)
+  }
+  
+  # This does the summary. For each group's data frame, return a vector with
+  # N, mean, and sd
+  datac <- ddply(data, groupvars, .drop=.drop,
+                 .fun = function(xx, col) {
+                   c(
+                     q1 = quantile   (xx[[col]],0.025, na.rm=na.rm)- mean   (xx[[col]], na.rm=na.rm),
+                     q2   = quantile     (xx[[col]],0.975, na.rm=na.rm)- mean   (xx[[col]], na.rm=na.rm),
+                     mean = mean   (xx[[col]], na.rm=na.rm)
+                     
+                   )
+                 },
+                 measurevar
+  )
+  
+  # Rename the "mean" column    
+  datac <- rename(datac, c("mean" = measurevar))
+  
+  #datac$se <- datac$sd / sqrt(datac$N)  # Calculate standard error of the mean
+  
+  # Confidence interval multiplier for standard error
+  # Calculate t-statistic for confidence interval: 
+  # e.g., if conf.interval is .95, use .975 (above/below), and use df=N-1
+  #ciMult <- qt(conf.interval/2 + .5, datac$N-1)
+  #datac$ci <- datac$se * ciMult
+  
+  return(datac)
+}
+
+traces.traits.df.sum = summarySE(traces.traits.df,measurevar = "response",groupvars = c("model","traits"))
+names(traces.traits.df.sum) = c("model","traits","q1","q2","response")
+
+
+#save(traces.traits.df.sum,file = "plot6.RData")
+
+
+library(ggplot2)
+#ggplot(traces.traits.df,aes(x =traits,y=response,col=model,fill=model))+
+# geom_boxplot()
+#plot(x=rep(1:18,2),y=c(old.traits.posmean,new.traits.posmean),col=rep(c(1,2),eac#h=18),type = "o")
+pd <- position_dodge(width = 0.6)
+#load("plot5.RData")
+ggplot(traces.traits.df.sum,aes(x = traits, y = response, color=model))+
+  ylab("Response")+
+  geom_errorbar(aes(ymin=q1+response, ymax=q2+response), position=pd)+
+  geom_point(position=pd,size=1)
+
+
+#5. PCA decomposition
+
+old.traces.eigen = sapply(old.G, function(x)eigen(x)$values)
+new.traces.eigen = sapply(new.G, function(x)eigen(x)$values)
+star.traces.eigen = sapply(star.G, function(x)eigen(x)$values)
+four.traces.eigen = sapply(four.G, function(x)eigen(x)$values)
+
+traces.eigen.df = data.frame(c(c(old.traces.eigen),c(new.traces.eigen),c(star.traces.eigen),c(four.traces.eigen)))
+traces.eigen.df$model = rep(c("old","new","star","four"),c(p*spn,p*spn,p*spn,p*spn))
+
+traces.eigen.df$model = factor(traces.eigen.df$model, level=c("old","new","star","four"))
+#traces.eigen.df$model = factor(traces.eigen.df$model, level=c("star","new","old","four"))
+
+traces.eigen.df$lambda = rep(paste0("PC",1:18),4*spn)
+names(traces.eigen.df) = c("response","model","PC")
+
+traces.eigen.df.sum = summarySE(traces.eigen.df,measurevar = "response",groupvars = c("model","PC"))
+names(traces.eigen.df.sum) = c("model","PC","q1","q2","response")
+
+# reorder the PC value in the data
+traces.eigen.df.sum$PC = factor(traces.eigen.df.sum$PC,level=paste0("PC",1:18))
+
+# size dataframe
+old.traces.size = apply(old.traces.eigen,2,sum)
+new.traces.size = apply(new.traces.eigen,2,sum)
+star.traces.size = apply(star.traces.eigen,2,sum)
+four.traces.size = apply(four.traces.eigen,2,sum)
+size.df = data.frame(old.traces.size,new.traces.size,star.traces.size,four.traces.size)
+library(reshape2)
+size.df = melt(size.df,value.name = "Size")
+size.df$variable = factor(size.df$variable,levels = c("star.traces.size","new.traces.size","old.traces.size","four.traces.size"))
+
+
+old.traces.pc1c = apply(old.traces.eigen,2,function(x)sum(x[2])/sum(x))
+new.traces.pc1c = apply(new.traces.eigen,2,function(x)sum(x[2])/sum(x))
+star.traces.pc1c = apply(star.traces.eigen,2,function(x)sum(x[2])/sum(x))
+four.traces.pc1c = apply(four.traces.eigen,2,function(x)sum(x[2])/sum(x))
+
+# PC1
+old.traces.pc1c = apply(old.traces.eigen,2,function(x)x[1]/sum(x))
+new.traces.pc1c = apply(new.traces.eigen,2,function(x)x[1]/sum(x))
+star.traces.pc1c = apply(star.traces.eigen,2,function(x)x[1]/sum(x))
+four.traces.pc1c = apply(four.traces.eigen,2,function(x)x[1]/sum(x))
+pc1c.df = data.frame(old.traces.pc1c,new.traces.pc1c,star.traces.pc1c,four.traces.pc1c)
+library(reshape2)
+pc1c.df = melt(pc1c.df,value.name = "PC1.Contribution")
+pc1c.df$variable = factor(pc1c.df$variable,levels = c("star.traces.pc1c","new.traces.pc1c","old.traces.pc1c","four.traces.pc1c"))
+
+
+# size of each model 
+library(dplyr)
+traces.eigen.df.sum %>%
+  group_by(model) %>%
+  summarize(size = sum(response))
+
+library(ggplot2)
+ggplot(size.df, aes(x="",y=Size, fill=variable))+
+  geom_boxplot(alpha=0.85)+
+    theme(axis.title.x = element_blank())
+
+
+ggplot(pc1c.df, aes(x="",y=PC1.Contribution, fill=variable))+
+  geom_boxplot(alpha=0.85)+
+  theme(axis.title.x = element_blank())
+
+
+pd <- position_dodge(width = 0.6)
+
+ggplot(traces.eigen.df.sum,aes(x = PC, y = response, color=model))+
+   ylab("Variance")+
+  geom_errorbar(aes(ymin=q1+response, ymax=q2+response), position=pd)+
+  geom_point(position=pd,size=1)+
+  xlab("Principle Components")
+
+
+# 6. angle and length of deltaZ and first eigenvector
+
+# distribution within population
+dist_within_theta = function(traces.traits){
+  theta = vector(,spn)
+  for (i in 1:spn){
+    pairs = randcomb(1:spn, 2)
+    a = traces.traits[,pairs[1]]
+    b = traces.traits[,pairs[2]]
+    a.length = sqrt(sum(a * a)) 
+    b.length = sqrt(sum(b * b)) 
+    theta[i] = acos(sum(a*b) / (a.length * b.length))
+  }
+  return(theta)
+}
+
+# first eigen
+old.traces.vector = sapply(old.G, function(x)eigen(x)$vector[,1])
+new.traces.vector = sapply(new.G, function(x)eigen(x)$vector[,1])
+star.traces.vector = sapply(star.G, function(x)eigen(x)$vector[,1])
+four.traces.vector = sapply(four.G, function(x)eigen(x)$vector[,1])
+
+old.theta = dist_within_theta(old.traces.vector)
+new.theta = dist_within_theta(new.traces.vector)
+star.theta = dist_within_theta(star.traces.vector)
+four.theta = dist_within_theta(four.traces.vector)
+# deltaZ
+old.theta = dist_within_theta(old.traces.traits) #old.traces.traits is from deltaZ_posterior_mean
+new.theta = dist_within_theta(new.traces.traits)
+star.theta = dist_within_theta(star.traces.traits)
+four.theta = dist_within_theta(four.traces.traits)
+
+# distribution between population
+dist_between_theta = function(Atraces.traits,Btraces.traits){
+  theta = vector(,spn)
+  for (i in 1:spn){
+    pairs = randcomb(1:spn, 2)
+    a = Atraces.traits[,pairs[1]]
+    b = Btraces.traits[,pairs[2]]
+    a.length = sqrt(sum(a * a)) 
+    b.length = sqrt(sum(b * b)) 
+    theta[i] = acos(sum(a*b) / (a.length * b.length))
+  }
+  return(theta)
+}
+
+# first eigen vector
+old.new.theta = dist_between_theta(old.traces.vector,new.traces.vector)
+# K-S test
+#ks.test(old.theta,old.new.theta) #different
+old.star.theta = dist_between_theta(old.traces.vector,star.traces.vector)
+four.star.theta = dist_between_theta(four.traces.vector,star.traces.vector)
+new.star.theta = dist_between_theta(new.traces.vector,star.traces.vector)
+new.four.theta = dist_between_theta(new.traces.vector,four.traces.vector)
+old.four.theta = dist_between_theta(old.traces.vector,four.traces.vector)
+
+# deltaZ
+old.new.theta = dist_between_theta(old.traces.traits,new.traces.traits)
+# K-S test
+#ks.test(old.theta,old.new.theta) #different
+old.star.theta = dist_between_theta(old.traces.traits,star.traces.traits)
+new.star.theta = dist_between_theta(new.traces.traits,star.traces.traits)
+#--------------------------------
+# plot
+theta.df = data.frame(old.theta,new.theta,star.theta,four.theta, old.new.theta, old.star.theta, new.star.theta,four.star.theta,new.four.theta,old.four.theta)
+
+library(reshape2)
+theta.df = melt(theta.df, value.name = "Degree")
+names(theta.df) = c("labels", "theta")
+ggplot(theta.df, aes(x = theta, col=labels))+
+  geom_density(aes(fill=labels),alpha=0.1)
+#+scale_color_manual(breaks = c("old.theta","new.theta","star.theta","old.new.theta","old.star.theta","new.star.theta"),
+#                     values=rainbow(6))
+
+#-------------------------------
+# 7.length of deltaZ
+
+length_deltaZ = function(traces.traits){
+  #posmean.deltaZ = apply(traces.traits,1,mean)
+  #deltaZ.length = sqrt(sum(posmean.deltaZ * posmean.deltaZ)) 
+  deltaZ.length = apply(traces.traits,2,function(x)sqrt(sum(x * x)))
+  return(deltaZ.length)
+}
+old.deltaZ.length = length_deltaZ(old.traces.traits)
+new.deltaZ.length = length_deltaZ(new.traces.traits)
+star.deltaZ.length = length_deltaZ(star.traces.traits)
+four.deltaZ.length = length_deltaZ(four.traces.traits)
+
+deltaZ.length.df = data.frame(old.deltaZ.length,new.deltaZ.length,star.deltaZ.length,four.deltaZ.length)
+library(reshape2)
+deltaZ.length.df = melt(deltaZ.length.df,value.name = "Length")
+deltaZ.length.df$variable = factor(deltaZ.length.df$variable,levels = c("star.deltaZ.length","new.deltaZ.length","old.deltaZ.length","four.deltaZ.length"))
+
+
+
+ggplot(deltaZ.length.df, aes(x="",y=Length, fill=variable))+
+  geom_boxplot(alpha=0.85)+
+   theme(axis.title.x = element_blank())
+ 
+
diff --git a/R_BSFG/deltaZ_posterior_mean.R b/R_BSFG/deltaZ_posterior_mean.R
new file mode 100644
index 0000000..5810972
--- /dev/null
+++ b/R_BSFG/deltaZ_posterior_mean.R
@@ -0,0 +1,147 @@
+source('~/Runcie Lab/SparseFactorMixedModel_v2/R_BSFG/BSFG_functions.R')
+setwd("~/Runcie Lab/SparseFactorMixedModel_v2/MCMC/5/5/Lambda1.5_delta2shape3")
+BSFGOld = "BSFG_state.RData"
+#BSFGNew = "BSFG_fixedlambda65.RData"
+#BSFGStar = "BSFG_fixedlambda75.RData"
+BSFGNew = "BSFG_state6.RData"
+BSFGStar = "BSFG_state7.RData"
+target = "F_h2"
+load(BSFGOld)
+traitnames = BSFG_state$traitnames
+#old.traces.G = G_Traces_Comp(BSFG_state)
+old.G = G_Matrix_Comp(BSFG_state)
+p = BSFG_state$run_variables$p
+spn = dim(BSFG_state$Posterior[[target]])[2]
+# posterior mean
+# do this in case the value of k differ for this two models and there in no lambda in BSFG_state_fixed case
+#Lambda = BSFG_state$Posterior$Lambda
+load(BSFGNew)
+#BSFG_state$Posterior$Lambda = Lambda
+new.G = G_Matrix_Comp(BSFG_state)
+
+load(BSFGStar)
+#BSFG_state$Posterior$Lambda = Lambda
+star.G = G_Matrix_Comp(BSFG_state)
+#new.traces.G = G_Traces_Comp(BSFG_state)
+
+
+# The distance of two matrix(in predicted phenotype value/level)
+#selection gradient
+beta = 0.3609
+beta.v = c(0,beta,rep(0,16))
+traces.traits = matrix(,p,spn)
+
+#old.G in list form
+old.traces.traits = sapply(old.G,function(x)x%*%beta.v)
+#old.traits.posmean = apply(old.traces.traits,1,mean)
+new.traces.traits = sapply(new.G,function(x)x%*%beta.v)
+#new.traits.posmean = apply(new.traces.traits,1,mean)
+star.traces.traits = sapply(star.G,function(x)x%*%beta.v)
+#star.traits.posmean = apply(star.traces.traits,1,mean)
+#boxplot(old.traces_traits[2,])
+#boxplot(t(old.traces.traits))
+#boxplot(new.traces_traits[2,])
+#boxplot(t(new.traces.traits))
+# use ggplot 
+traces.traits.df = data.frame(c(c(old.traces.traits),c(new.traces.traits),c(star.traces.traits)))
+traces.traits.df$model = rep(c("old","new","star"),c(p*spn,p*spn,p*spn))
+
+traces.traits.df$model = factor(traces.traits.df$model, level=c("old","new","star"))
+traces.traits.df$model = factor(traces.traits.df$model, level=c("star","new","old"))
+
+traces.traits.df$traits = rep(traitnames,3*spn)
+names(traces.traits.df) = c("response","model","traits")
+
+summarySE <- function(data=NULL, measurevar, groupvars=NULL, na.rm=FALSE,
+                      conf.interval=.95, .drop=TRUE) {
+  library(plyr)
+  
+  # New version of length which can handle NA's: if na.rm==T, don't count them
+  length2 <- function (x, na.rm=FALSE) {
+    if (na.rm) sum(!is.na(x))
+    else       length(x)
+  }
+  
+  # This does the summary. For each group's data frame, return a vector with
+  # N, mean, and sd
+  datac <- ddply(data, groupvars, .drop=.drop,
+                 .fun = function(xx, col) {
+                   c(
+                     q1 = quantile   (xx[[col]],0.025, na.rm=na.rm)- mean   (xx[[col]], na.rm=na.rm),
+                     q2   = quantile     (xx[[col]],0.975, na.rm=na.rm)- mean   (xx[[col]], na.rm=na.rm),
+                     mean = mean   (xx[[col]], na.rm=na.rm)
+                     
+                   )
+                 },
+                 measurevar
+  )
+  
+  # Rename the "mean" column    
+  datac <- rename(datac, c("mean" = measurevar))
+  
+  #datac$se <- datac$sd / sqrt(datac$N)  # Calculate standard error of the mean
+  
+  # Confidence interval multiplier for standard error
+  # Calculate t-statistic for confidence interval: 
+  # e.g., if conf.interval is .95, use .975 (above/below), and use df=N-1
+  #ciMult <- qt(conf.interval/2 + .5, datac$N-1)
+  #datac$ci <- datac$se * ciMult
+  
+  return(datac)
+}
+
+traces.traits.df.sum = summarySE(traces.traits.df,measurevar = "response",groupvars = c("model","traits"))
+names(traces.traits.df.sum) = c("model","traits","q1","q2","response")
+
+
+save(traces.traits.df.sum,file = "plot6.RData")
+
+
+library(ggplot2)
+#ggplot(traces.traits.df,aes(x =traits,y=response,col=model,fill=model))+
+# geom_boxplot()
+#plot(x=rep(1:18,2),y=c(old.traits.posmean,new.traits.posmean),col=rep(c(1,2),eac#h=18),type = "o")
+pd <- position_dodge(width = 0.6)
+load("plot5.RData")
+plot5 <- ggplot(traces.traits.df.sum,aes(x = traits, y = response, color=model))+
+  scale_color_manual(breaks =rev(c("star","new","old")),
+                     values=rev(c( "blue","red2","grey0")),labels=rev(c("Low Line","High Line","Control Line")))+
+  ylab("Response")+
+  geom_errorbar(aes(ymin=q1+response, ymax=q2+response), position=pd)+
+  geom_point(position=pd,size=1)+
+  theme(axis.text=element_text(size=10),
+        axis.title=element_text(size=15),legend.text=element_text(size=12),
+        legend.title=element_text(size=12))+
+  labs(fill = "G (F3)",color = "G (F3)")
+
+
+library(ggplot2)
+#ggplot(traces.traits.df,aes(x =traits,y=response,col=model,fill=model))+
+# geom_boxplot()
+#plot(x=rep(1:18,2),y=c(old.traits.posmean,new.traits.posmean),col=rep(c(1,2),eac#h=18),type = "o")
+#pd <- position_dodge(width = 0.3)
+load("plot6.RData")
+plot6<- ggplot(traces.traits.df.sum,aes(x = traits, y = response, color=model))+
+  scale_color_manual(breaks = c("star","new","old"),
+                     values=c("grey60","grey30","grey0"),labels=c("F1","F2","F3"))+
+  ylab("Response")+
+  geom_errorbar(aes(ymin=q1+response, ymax=q2+response), position=pd)+
+  geom_point(position=pd,size=1)+
+  theme(axis.text=element_text(size=10),
+        axis.title=element_text(size=15),legend.text=element_text(size=12),
+        legend.title=element_text(size=12))+
+  labs(fill = "G (Control)",color = "G (Control)")
+
+
+plots1=list()
+  plots1=lapply(c(1,3),function(i) get(paste0("plot",i)))
+plots2=list()
+plots2=lapply(c(2,4),function(i) get(paste0("plot",i)))
+plots3=list()
+plots3=lapply(c(5,6),function(i) get(paste0("plot",i)))
+plots4=list(g11,g22) 
+library(gridExtra)
+ggsave("BSFG_comparison_plots1.pdf", marrangeGrob(grobs = plots1, nrow=2, ncol=1,top = NULL),width = 297, height = 210, units = "mm")
+ggsave("BSFG_comparison_plots2.pdf", marrangeGrob(grobs = plots2, nrow=2, ncol=1,top = NULL),width = 297, height = 210, units = "mm")
+ggsave("BSFG_comparison_plots3.pdf", marrangeGrob(grobs = plots3, nrow=2, ncol=1,top = NULL),width = 297, height = 210, units = "mm")
+ggsave("EigenvaluesComparison.pdf", marrangeGrob(grobs = plots4, nrow=2, ncol=1,top = NULL),width = 297, height = 210, units = "mm")
diff --git a/R_BSFG/factor_3Dplot.R b/R_BSFG/factor_3Dplot.R
index 8caa2de..95407c9 100644
--- a/R_BSFG/factor_3Dplot.R
+++ b/R_BSFG/factor_3Dplot.R
@@ -1,5 +1,6 @@
-setwd('~/Runcie Lab/SparseFactorMixedModel_v2/R_BSFG/Sim_1/R_rep_test')
-setwd('~/Runcie Lab/SparseFactorMixedModel_v2/R_BSFG/Sim_1/R_rep_fixedlambda')
+#setwd('~/Runcie Lab/SparseFactorMixedModel_v2/R_BSFG/Sim_1/R_rep_test')
+#setwd('~/Runcie Lab/SparseFactorMixedModel_v2/R_BSFG/Sim_1/R_rep_fixedlambda')
+setwd('~/../Desktop/5/Lambda1.5_delta2shape3')
 
 #function for plotting 3D plot
 rgl_add_axes <- function(x, y, z, axis.col = "grey",
@@ -46,13 +47,13 @@ F_a_old = matrix(F_a_old[,ncol(F_a_old)],nr=n_old)
 #Y_sd = apply(na.omit(BSFG_state$data_matrices$Y),2,sd)
 #F_a_old = apply(F_a_old,2,function(x) x*Y_sd+Y_mean)
 
-load('BSFG_fixedlambda2.RData')
+load('BSFG_fixedlambda65.RData')
 F_a_new2 = BSFG_state$Posterior$F_a
 n_new2 = BSFG_state$run_variables$n
 F_a_new2 = matrix(F_a_new2[,ncol(F_a_new2)],nr=n_new2)
 #F_a_new = F_a_new*Y_sd+Y_mean
 
-load('BSFG_fixedlambda3.RData')
+load('BSFG_fixedlambda75.RData')
 F_a_new3 = BSFG_state$Posterior$F_a
 n_new3 = BSFG_state$run_variables$n
 F_a_new3 = matrix(F_a_new3[,ncol(F_a_new3)],nr=n_new3)
@@ -61,7 +62,9 @@ F_a_new3 = matrix(F_a_new3[,ncol(F_a_new3)],nr=n_new3)
 
 
 
-#=======================================================
+#===========================================
+#---------- 3D plot Function----------------
+#===========================================
 factor_3Dplot = function(x1,x2,x3){
   
   if(exists('n_new3')){
@@ -94,15 +97,19 @@ factor_3Dplot = function(x1,x2,x3){
   movie3d(spin3d(axis = c(0, 0, 1)), duration = 10,dir = getwd())
 }
 
-#-------------------------------------------
-#plot~~~~~
+#---------------------------------
+#------------- 3D plot -----------
+#---------------------------------
 #latent factor we choose
 x1=1
-x2=5
-x3=10
+x2=2
+x3=3
 factor_3Dplot(x1,x2,x3)
-
-#How much contribution for each of these factors
+#---------------------------------
+#---Contribution of ith Factor----
+#---------------------------------
+# How much contribution for each of these factors
+# You can load different BSFG_state here
 load('BSFG_state.RData')
 p = BSFG_state$run_variables$p
 
@@ -113,8 +120,28 @@ Lambda = matrix(Lambda[,ncol(Lambda)],nr=p)
 Lambda_x1 = t(Lambda[,x1])%*%Lambda[,x1]
 Lambda_x2 = t(Lambda[,x2])%*%Lambda[,x2]
 Lambda_x3 = t(Lambda[,x3])%*%Lambda[,x3]
+
+# The way to get G_est:
+# Run plot_diagnostics(BSFG_state)
+# Be careful: The BSFG_state is the one you load in line113
+# a file called 'G_est.RData' will be saved in the current work-directory
+# load('G_est.RData')
 S = sum(diag(G_est))
+
 #The proportion of total sample variance explained by the i-th factor
 Lambda_x1/S
 Lambda_x2/S
 Lambda_x3/S
+
+#===================================
+#--------  Unscale G_est------------
+#===================================
+# Load setup dataset which is used for BSFG_state you load before
+load('../setup5.RData')
+Y = setup$Y
+n = nrow(Y)
+p = ncol(Y)
+Y_missing = is.na(Y)        # matrix recording missing data in Y
+Mean_Y = colMeans(Y,na.rm=T)
+VY = apply(Y,2,var,na.rm=T)
+G_est_unscale =sqrt(diag(VY))%*%G_est%*%sqrt(diag(VY))
diff --git a/R_BSFG/fast_BSFG_sampler_current.R b/R_BSFG/fast_BSFG_sampler_current.R
index efd1c14..497daf2 100644
--- a/R_BSFG/fast_BSFG_sampler_current.R
+++ b/R_BSFG/fast_BSFG_sampler_current.R
@@ -185,7 +185,7 @@ fast_BSFG_sampler_current = function(BSFG_state,n_samples) {
     # recover()
     
     # -----Sample Lambda------------------ #
-    #conditioning on W, B, F, marginalizing over E_a
+    #conditioning on W, B, F, marginalizing over E _a
     Y_tilde = Y - X %*% B - Z_2 %*% W
     Y_tilde = as.matrix(Y_tilde)
     # Lambda = sample_Lambda( Y_tilde,F,resid_Y_prec, E_a_prec,Plam,invert_aI_bZAZ )
@@ -201,15 +201,17 @@ fast_BSFG_sampler_current = function(BSFG_state,n_samples) {
     
     # -----Sample B and E_a--------------- #
     #conditioning on W, F, Lambda
-    Y_tilde = Y - F %*% t(Lambda) - Z_2 %*% W
-    Y_tilde = as.matrix(Y_tilde)
-    # location_sample = sample_means( Y_tilde, resid_Y_prec, E_a_prec, invert_aPXA_bDesignDesignT )
-    location_sample = sample_means_c( Y_tilde, resid_Y_prec, E_a_prec, invert_aPXA_bDesignDesignT )
-      B   = location_sample[1:b,]
-      if(length(B)==p){
-        B = matrix(0,nr=0,nc=p)
-      }
-    E_a = location_sample[b+(1:r),]
+    for��pop in pops){
+      Y_tilde = Y - F %*% t(Lambda) - Z_2 %*% W
+      Y_tilde = as.matrix(Y_tilde)
+      # location_sample = sample_means( Y_tilde, resid_Y_prec, E_a_prec, invert_aPXA_bDesignDesignT )
+      location_sample = sample_means_c( Y_tilde, resid_Y_prec, E_a_prec, invert_aPXA_bDesignDesignT )
+        B   = location_sample[1:b,]
+        if(length(B)==p){
+          B = matrix(0,nr=0,nc=p)
+        }
+      E_a = location_sample[b+(1:r),]
+    }
     
     # -----Sample W ---------------------- #
     #conditioning on B, E_a, F, Lambda
diff --git a/R_BSFG/model_setup.R b/R_BSFG/model_setup.R
index bffa643..af1f218 100644
--- a/R_BSFG/model_setup.R
+++ b/R_BSFG/model_setup.R
@@ -1,32 +1,38 @@
 
-setwd("~/Runcie Lab/SparseFactorMixedModel/R_BSFG")
+setwd("~/Runcie Lab/SparseFactorMixedModel_v2/R_BSFG")
 #model_path = '~/Box Sync/DER_projects/BSFG/R_BSFG'
 #model_path = "../SparseFactorMixedModel/R_BSFG"
-model_path = "~/Runcie Lab/SparseFactorMixedModel/R_BSFG"
+#model_path = "~/Runcie Lab/SparseFactorMixedModel_v2/R_BSFG"
+
+model_path = "~/Runcie Lab/SparseFactorMixedModel_v2/R_BSFG"
 source(paste(model_path,'fast_BSFG_sampler_init.R',sep='/'))
 source(paste(model_path,'fast_BSFG_sampler_current.R',sep='/'))
 source(paste(model_path,'BSFG_functions.R',sep='/'))
-source(paste(model_path,'fast_BSFG_sampler_fixedlambda.R',sep='/'))
-source(paste(model_path,'fast_BSFG_sampler_init_fixedlambda.R',sep='/'))
 source(paste(model_path,'plotting_diagnostics.R',sep='/'))
+source(paste(model_path,'setup_pedigree.R',sep='/'))
+sourceCpp(paste(model_path,'BSFG_functions_c.cpp',sep='/'))
+source(paste(model_path,'fast_BSFG_sampler_init_fixedlambda.R',sep='/'))
+source(paste(model_path,'fast_BSFG_sampler_fixedlambda.R',sep='/'))
+source(paste(model_path,'fast_BSFG_sampler_generation.R',sep='/'))
+
 library(Rcpp)
 library(RcppArmadillo)
 
-sourceCpp(paste(model_path,'BSFG_functions_c.cpp',sep='/'))
 
 
 # set the directory to the location of the setup.RData or setup.mat file
 #setwd('Example_simulation')
-setwd('Sim_1')
+setwd('1')
 # choose a seed for the random number generator. This can be a random seed (for analysis), or you can choose your seed so that 
 # you can repeat the MCMC exactly
 #seed = sample(1:1e3,1)
 #set.seed(seed)
 
 # create a folder for holding the posterior samples of the current chain (multiple folders could be used for different chains)
-rep = "Lambda1.5_delta2shape4.5"   #original
+folder = "Lambda1.5_delta2shape3"   #original
 #rep = "fixedlambda1"
-#rep = "test9"
+rep = "test666"
+folder = "generation0"
 folder = sprintf('R_rep_%s',rep)
 try(dir.create(folder))
 setwd(folder)
@@ -54,10 +60,10 @@ priors = list(
     E_a_prec_rate       =   1/10,
     W_prec_shape        =   2,
     W_prec_rate         =   1/10,
-    Lambda_df           =   3,         # 3/2, 3, 1
+    Lambda_df           =   1.5,         # 3/2, 3, 1
     delta_1_shape       =   2.1,
     delta_1_rate        =   1/20,
-    delta_2_shape       =   4.5,           # 3, 3/2, 9/2
+    delta_2_shape       =   3,           # 3, 3/2, 9/2
     delta_2_rate        =   1,
     h2_priors_factors   =   c(run_parameters$h2_divisions-1,rep(1,run_parameters$h2_divisions-1))/(2*(run_parameters$h2_divisions-1))
 )
@@ -83,30 +89,33 @@ save(BSFG_state,file="BSFG_state.RData")
 # Run Gibbs sampler. Run in smallish chunks. Output can be used to re-start chain where it left off.
 # c1=fix(clock);
 n_samples = 4000;
-for(i  in 1:50) {
+for(i  in 1:1) {
     print(sprintf('Run %d',i))
     BSFG_state = fast_BSFG_sampler_current(BSFG_state,n_samples)
+    BSFG_state = fast_BSFG_sampler_generation(BSFG_state,n_samples)
     print(i)
 } 
 
 ## function only for plotting
+setwd('~/Runcie Lab/SparseFactorMixedModel/R_BSFG/R_rep/R_tune_5')
 load("BSFG_state.RData")
 plot_diagnostics(BSFG_state)
 
 #fixed lambda
 #load("BSFG_state.RData")
 #priors = BSFG_state$priors
-BSFG_state = fast_BSFG_sampler_init_fixedlambda(priors,run_parameters,YNew="setup_LineCode1.RData",YOld="setup_LineCode1.RData")
+BSFG_state = fast_BSFG_sampler_init_fixedlambda(priors,run_parameters,YNew="setup_LineCode2.RData",YOld="setup_LineCode1.RData")
 
 load("BSFG_fixedlambda.RData")
 BSFG_state = fast_BSFG_sampler_fixedlambda(BSFG_state)
 
 
 
+setwd('~/Runcie Lab/SparseFactorMixedModel/R_BSFG/Sim_1/R_rep_test')
+ComparingGMatrix_plot("F_a")
 
-ComparingGMatrix_plot("F_h2")
-
-
-
-
+setwd('~/../Desktop')
+load('BSFG_state.RData')
 
+data = read.csv('ScentDataAll (2).csv')
+setup = setup_pedigree(data,3)
diff --git a/R_BSFG/model_setup_fixedlambda.R b/R_BSFG/model_setup_fixedlambda.R
new file mode 100644
index 0000000..615723b
--- /dev/null
+++ b/R_BSFG/model_setup_fixedlambda.R
@@ -0,0 +1,39 @@
+#model_path = "~/Runcie Lab/SparseFactorMixedModel_v2/R_BSFG"
+model_path="."
+library(Rcpp)
+library(RcppArmadillo)
+source(paste(model_path,'BSFG_functions.R',sep='/'))
+sourceCpp(paste(model_path,'BSFG_functions_c.cpp',sep='/'))
+source(paste(model_path,'fast_BSFG_sampler_init_fixedlambda.R',sep='/'))
+source(paste(model_path,'fast_BSFG_sampler_fixedlambda.R',sep='/'))
+
+run_parameters = list(
+  b0           = 1,
+  b1           = 0.0005,
+  epsilon      = 1e-2,
+  prop         = 1.00,
+  h2_divisions = 50,
+  save_freq    = 100,
+  burn = 2000,       #100
+  thin = 400,         #2
+  draw_iter = 200
+)
+
+# For the 1st comparation: choose 5 as old BSFG_state. Pairs(5,6; 5,7)
+# For the 2nd comparation: choose 5 as old BSFG_state. Pairs(5,14; 5,17)
+setwd('~/../Desktop/5/Lambda1.5_delta2shape3')
+# fixed lambda
+# load 'BSFG_state' as the old BSFG_state
+load("BSFG_state.RData")
+priors = BSFG_state$priors
+
+# YNew and YOld dataset should be in the upper directory of current dir
+# 'setup datasets' will have the same file names by using setup_pedigree.R function :'setup.RData'. 
+# Need to change the names of 'setup dataset' files as the example below
+YNew="setup6.RData"
+YOld="setup5.RData"
+BSFG_state = fast_BSFG_sampler_init_fixedlambda(priors,run_parameters,YNew,YOld)
+
+# load 'BSFG_fixedlambda' as the new BSFG_state
+BSFG_state = fast_BSFG_sampler_fixedlambda(BSFG_state,YNew,YOld)
+load("BSFG_fixedlambda.RData")
diff --git a/R_BSFG/model_setup_generation.R b/R_BSFG/model_setup_generation.R
new file mode 100644
index 0000000..2cf6708
--- /dev/null
+++ b/R_BSFG/model_setup_generation.R
@@ -0,0 +1,70 @@
+setwd("~/Runcie Lab/pedigree")
+
+data=read.csv('IDDammSirelnUpdate(1).csv',header = TRUE)
+source("~/Runcie Lab/pedigree/setup_pedigree_GeneLine.R")
+setup=setup_pedigree_GeneLine(data,GenerationCode=0,LineCode=c(1,2,3))
+
+
+setwd("~/Runcie Lab/SparseFactorMixedModel_v2/R_BSFG")
+model_path = "~/Runcie Lab/SparseFactorMixedModel_v2/R_BSFG"
+source(paste(model_path,'fast_BSFG_sampler_init.R',sep='/'))
+source(paste(model_path,'fast_BSFG_sampler_current.R',sep='/'))
+source(paste(model_path,'BSFG_functions.R',sep='/'))
+source(paste(model_path,'plotting_diagnostics.R',sep='/'))
+source(paste(model_path,'setup_pedigree.R',sep='/'))
+sourceCpp(paste(model_path,'BSFG_functions_c.cpp',sep='/'))
+source(paste(model_path,'fast_BSFG_sampler_init_fixedlambda.R',sep='/'))
+source(paste(model_path,'fast_BSFG_sampler_fixedlambda.R',sep='/'))
+source(paste(model_path,'fast_BSFG_sampler_generation.R',sep='/'))
+
+library(Rcpp)
+library(RcppArmadillo)
+
+setwd('Sim_2')
+folder = "generation0"
+try(dir.create(folder))
+setwd(folder)
+
+# initialize priors
+run_parameters = list(
+  b0           = 1,
+  b1           = 0.0005,
+  epsilon      = 1e-2,
+  prop         = 1.00,
+  h2_divisions = 50,
+  save_freq    = 100,
+  burn = 2000,       #100
+  thin = 400,         #2
+  draw_iter = 200
+)
+
+#load("./priors.RData")
+priors = list(
+  k_init              =   20,
+  resid_Y_prec_shape  =   2,
+  resid_Y_prec_rate   =   1/10,
+  E_a_prec_shape      =   2,
+  E_a_prec_rate       =   1/10,
+  W_prec_shape        =   2,
+  W_prec_rate         =   1/10,
+  Lambda_df           =   1.5,         # 3/2, 3, 1
+  delta_1_shape       =   2.1,
+  delta_1_rate        =   1/20,
+  delta_2_shape       =   3,           # 3, 3/2, 9/2
+  delta_2_rate        =   1,
+  h2_priors_factors   =   c(run_parameters$h2_divisions-1,rep(1,run_parameters$h2_divisions-1))/(2*(run_parameters$h2_divisions-1))
+)
+
+print('Initializing')
+save(priors,file = 'Priors.RData')
+
+# Initialize Chain, prep runs
+BSFG_state = fast_BSFG_sampler_init(priors,run_parameters)
+save(BSFG_state,file="BSFG_state.RData")
+
+n_samples = 4000;
+for(i  in 1:10) {
+  print(sprintf('Run %d',i))
+  BSFG_state = fast_BSFG_sampler_generation(BSFG_state,n_samples)
+  print(i)
+} 
diff --git a/R_BSFG/plottting_comparing.R b/R_BSFG/plottting_comparing.R
index 15ab005..1e76a67 100644
--- a/R_BSFG/plottting_comparing.R
+++ b/R_BSFG/plottting_comparing.R
@@ -1,11 +1,38 @@
 setwd("~/Runcie Lab/SparseFactorMixedModel_v2/MCMC/5/5/Lambda1.5_delta2shape3")
-BSFGOld = "BSFG_state.RData"
-BSFGNew = "BSFG_fixedlambda65.RData"
-BSFGStar = "BSFG_fixedlambda75.RData"
+BSFGOld = "BSFG_state1.RData"
+BSFGNew = "BSFG_state2.RData"
+BSFGStar = "BSFG_state3.RData"
 target = "F_h2"
+
+#function change list into a array
+L_to_A = function(l){
+  l.to.a = array(unlist(l),dim = c(nrow(l[[1]]),ncol(l[[1]]),length(l)))
+  return(l.to.a)
+}
+
+#To interpret lambda/latent factors
+#Lambda.array = L_to_A(BSFG_state$Posterior$Lambda)
+#take posterior mean
+Lambda.pos = apply(Lambda,1,mean)
+Lambda.pos = matrix(Lambda.pos,nr=p)
+
+Lambda.pos[,2]=(abs(Lambda.pos[,1])+abs(Lambda.pos[,2]))/2
+Lambda.pos=Lambda.pos[,-1]
+
+rownames(Lambda.pos) = BSFG_state$traitnames
+pdf("interpret_lambda1.pdf")
+par(mfrow=c(3,3))
+for (i in 1:17){
+  names(Lambda.pos[,i]) = rownames(Lambda.pos)
+plot(Lambda.pos[,i], xlab = paste0(i),col=1:18,)
+  
+abline(0,0)
+}
+dev.off()
+save(Lambda.pos,file="LambdaMatrix.RData")
 #it looks like we do not need so much latent factors. choose the first l factors.
 #choose the first l latent traits and make them a vector for all iterations 
-l=5 # 2:6
+l=8 # 2:6
 pos_fa = function(spn,pos.fa,n){
   pos.fa.rb = vector()
   for (i in 1:spn){
@@ -17,28 +44,33 @@ pos_fa = function(spn,pos.fa,n){
 load(BSFGOld)
 spn = dim(BSFG_state$Posterior[[target]])[2]
 old.n   = dim(BSFG_state$data_matrices$Y)[1]
-old.k   = nrow(BSFG_state$Posterior[[target]])
+old.k   = nrow(BSFG_state$Posterior[[target]])-1
 #old.pos.fa = BSFG_state$Posterior$F_a
 old.pos.fh2 = BSFG_state$Posterior$F_h2
-
+old.pos.fh2[2,] = (old.pos.fh2[1,]+old.pos.fh2[2,])/2
+old.pos.fh2 = old.pos.fh2[-1,]
 #old.pos.fa.rb = pos_fa(spn,old.pos.fa,old.n)
 old.pos.fh2.rb = pos_fa(spn,old.pos.fh2,old.n)
 # new
 load(BSFGNew)
 spn = dim(BSFG_state$Posterior[[target]])[2]
 new.n   = dim(BSFG_state$data_matrices$Y)[1]
-new.k   = nrow(BSFG_state$Posterior[[target]])
+new.k   = nrow(BSFG_state$Posterior[[target]])-1
 #new.pos.fa = BSFG_state$Posterior$F_a
 new.pos.fh2 = BSFG_state$Posterior$F_h2
+new.pos.fh2[2,] = (new.pos.fh2[1,]+new.pos.fh2[2,])/2
+new.pos.fh2 = new.pos.fh2[-1,]
 #new.pos.fa.rb = pos_fa(spn,new.pos.fa,new.n)
 new.pos.fh2.rb = pos_fa(spn,new.pos.fh2,new.n)
 # star : represent the third comparison
 load(BSFGStar)
 spn = dim(BSFG_state$Posterior[[target]])[2]
 star.n   = dim(BSFG_state$data_matrices$Y)[1]
-star.k   = nrow(BSFG_state$Posterior[[target]])
+star.k   = nrow(BSFG_state$Posterior[[target]])-1
 #star.pos.fa = BSFG_state$Posterior$F_a
 star.pos.fh2 = BSFG_state$Posterior$F_h2
+star.pos.fh2[2,] = (star.pos.fh2[1,]+star.pos.fh2[2,])/2
+star.pos.fh2 = star.pos.fh2[-1,]
 #star.pos.fa.rb = pos_fa(spn,star.pos.fa,star.n)
 star.pos.fh2.rb = pos_fa(spn,star.pos.fh2,star.n)
 # combine
@@ -52,32 +84,155 @@ pos.fh2.b = data.frame(pos.fh2.b,c(rep(sprintf("f%d",1:l),spn),rep(sprintf("f%d"
 colnames(pos.fh2.b) = c("Fh2","model", "factor")
 #pos.fa.b=as.data.frame(pos.fa.b)
 #save(pos.fa.b,file="pos_fa_bind.RData")
-save(pos.fh2.b,file="pos_fh2_bind.RData")
+#save(pos.fh2.b,file="pos_fh2_bind.RData")
 #load("pos_fa_bind.RData")
-load("pos_fh2_bind.RData")
+#load("pos_fh2_bind.RData")
+pos.fh2.b$model = factor(pos.fh2.b$model, level=c("old","new","star"))
+pos.fh2.b$model = factor(pos.fh2.b$model, level=c("star","new","old"))
+summarySE <- function(data=NULL, measurevar, groupvars=NULL, na.rm=FALSE,
+                      conf.interval=.95, .drop=TRUE) {
+  library(plyr)
+  
+  # New version of length which can handle NA's: if na.rm==T, don't count them
+  length2 <- function (x, na.rm=FALSE) {
+    if (na.rm) sum(!is.na(x))
+    else       length(x)
+  }
+  
+  # This does the summary. For each group's data frame, return a vector with
+  # N, mean, and sd
+  datac <- ddply(data, groupvars, .drop=.drop,
+                 .fun = function(xx, col) {
+                   c(
+                     q0 = min(xx[[col]], na.rm=na.rm),
+                     q1 = quantile   (xx[[col]],0.025, na.rm=na.rm),
+                     q2 = median(xx[[col]], na.rm=na.rm),
+                      q3   = quantile     (xx[[col]],0.975, na.rm=na.rm),
+                     q4 = max  (xx[[col]], na.rm=na.rm)
+                     
+                   )
+                 },
+                 measurevar
+  )
+  
+  # Rename the "mean" column    
+ # datac <- rename(datac, c("mmm" = measurevar))
+  
+  #datac$se <- datac$sd / sqrt(datac$N)  # Calculate standard error of the mean
+  
+  # Confidence interval multiplier for standard error
+  # Calculate t-statistic for confidence interval: 
+  # e.g., if conf.interval is .95, use .975 (above/below), and use df=N-1
+  #ciMult <- qt(conf.interval/2 + .5, datac$N-1)
+  #datac$ci <- datac$se * ciMult
+  
+  return(datac)
+}
+
+pos.fh2.b.sum = summarySE(pos.fh2.b,measurevar = "Fh2",groupvars = c("model","factor"))
+
+names(pos.fh2.b.sum) = c("model","factor","q0","q1" ,"q2","q3","q4" )
+
+save(pos.fh2.b,file = "plot3.RData")
+
+save(pos.fh2.b.sum,file="plot4.RData")
 
 # plot
-# 1.boxplot of distribution of Fa
+# 1.boxplot of distribution of Fh2
 library(ggplot2)
-ggplot(pos.fh2.b, aes(x = factor, y = Fh2, color=model,fill=model))+
-  geom_boxplot()+
-  ggtitle("The distribution of Latent traits")
+load("plot1.RData")
+plot1 <- ggplot(pos.fh2.b, aes(x = factor, y = Fh2, fill=model,color=model))+
+  geom_boxplot(alpha = 0.6,position="dodge")+
+  scale_color_manual(breaks =rev(c("star","new","old")),
+                     values=rev(c( "blue","red2","grey0")),labels=rev(c("Low Line","High Line","Control Line")))+
+  scale_fill_manual(breaks =rev(c("star","new","old")),
+                    values=rev(c( "blue","red2","grey0")),labels=rev(c("Low Line","High Line","Control Line")))+
+  ylab("Variance")+
+  theme(axis.text=element_text(size=10),
+        axis.title=element_text(size=15),legend.text=element_text(size=12),
+        legend.title=element_text(size=12))+
+  labs(fill = "G (F3)",color = "G (F3)")
+
+
+# 95% boxplot
+load("plot2.RData")
+plot2<- ggplot(pos.fh2.b.sum, aes(x = factor, color=model,fill=model))+
+  geom_boxplot(alpha = 0.5,aes(ymin=q0,lower=q1,middle=q2,upper=q3,ymax=q4),stat = "identity")+
+  scale_color_manual(breaks =rev(c("star","new","old")),
+                     values=rev(c( "blue","red2","grey0")),labels=rev(c("Low Line","High Line","Control Line")))+
+  scale_fill_manual(breaks =rev(c("star","new","old")),
+                    values=rev(c( "blue","red2","grey0")),labels=rev(c("Low Line","High Line","Control Line")))+
+  #geom_boxplot(alpha = 0.5)+
+  ylab("Variance")+
+  theme(axis.text=element_text(size=10),
+        axis.title=element_text(size=15),legend.text=element_text(size=12),
+        legend.title=element_text(size=12))+
+  labs(fill = "G (F3)",color = "G (F3)")
+
   #geom_abline(intercept=0,slope=0,lty=2)
+load("plot3.RData")
+plot3<- ggplot(pos.fh2.b, aes(x = factor,y = Fh2, color=model,fill=model))+
+  geom_boxplot(alpha = 0.5)+
+  #geom_boxplot(alpha = 0.5,aes(ymin=q0,lower=q1,middle=q2,upper=q3,ymax=q4),stat = "identity")+
+  scale_color_manual(breaks = c("star","new","old"),
+                     values=c("grey60","grey30","grey0"),labels=c("F1","F2","F3"))+
+  scale_fill_manual(breaks = c("star","new","old"),
+                    values=c("grey60","grey30","grey0"),labels=c("F1","F2","F3"))+
+  ylab("Variance")+
+  theme(axis.text=element_text(size=10),
+        axis.title=element_text(size=15),legend.text=element_text(size=12),
+        legend.title=element_text(size=12),legend.margin=unit(0.455,"cm"))+
+  labs(fill = "G (Control)",color = "G (Control)")
 
-ggplot(pos.fh2.b,aes( x = Fh2, color=model))+
-  geom_density()+
-  facet_wrap(~factor,scales = "free_y")+
-  ggtitle("The distribution of Latent traits")
+load("plot4.RData")
+plot4 <- ggplot(pos.fh2.b.sum, aes(x = factor, color=model,fill=model))+
+  #geom_boxplot(alpha = 0.5)+
+  geom_boxplot(alpha = 0.5,aes(ymin=q0,lower=q1,middle=q2,upper=q3,ymax=q4),stat = "identity")+
+  scale_color_manual(breaks = c("star","new","old"),
+                     values=c("grey60","grey30","grey0"),labels=c("F1","F2","F3"))+
+  scale_fill_manual(breaks = c("star","new","old"),
+                    values=c("grey60","grey30","grey0"),labels=c("F1","F2","F3"))+
+  ylab("Variance")+
+  theme(axis.text=element_text(size=10),
+        axis.title=element_text(size=15),legend.text=element_text(size=12),
+        legend.title=element_text(size=12),legend.margin=unit(0.455,"cm"))+
+  labs(fill = "G (Control)",color = "G (Control)")
+# percentage og heribility
+load('BSFG_state.RData')
+p = BSFG_state$run_variables$p
 
+Lambda = BSFG_state$Posterior$Lambda
+Lambda = matrix(Lambda[,ncol(Lambda)],nr=p)
+old.G = G_Matrix_Comp(BSFG_state)
+
+G_pos = function(G){
+G = array(unlist(G),dim = c(nrow(G[[1]]),ncol(G[[1]]),length(G)))
+G.pos = apply(G,c(1,2),mean)
+return(G.pos)
+}
+old.G.pos = G_pos(old.G)
+#the size of x1, x2, x3 column
+
+#Contribution of ith Factor
+con_factor = function(l,G.pos){
+Lambda_l = t(Lambda[,l])%*%Lambda[,l]
+S = sum(diag(G.pos))
+return(Lambda_l/S)
+}
+
+con_factor(3,old.G.pos)
+sapply(1:8,function(x)con_factor(x,old.G.pos))
 # 2.For each trait, compare the correlation of G1 vs G2
 source('~/Runcie Lab/SparseFactorMixedModel_v2/R_BSFG/BSFG_functions.R')
 load(BSFGOld)
 #old.traces.G = G_Traces_Comp(BSFG_state)
 old.G = G_Matrix_Comp(BSFG_state)
 p = BSFG_state$run_variables$p
+# posterior mean
 # do this in case the value of k differ for this two models and there in no lambda in BSFG_state_fixed case
 Lambda = BSFG_state$Posterior$Lambda
 load(BSFGNew)
+load("BSFG_state7.RData")
 load(BSFGStar)
 BSFG_state$Posterior$Lambda = Lambda
 #new.traces.G = G_Traces_Comp(BSFG_state)
@@ -85,14 +240,16 @@ new.G = G_Matrix_Comp(BSFG_state)
 #For all the elements in G
 #pairs(t(old.traces.G)~t(new.traces.G))
 #For each trait
+
 old.G = array(unlist(old.G),dim = c(nrow(old.G[[1]]),ncol(old.G[[1]]),length(old.G)))
 new.G = array(unlist(new.G),dim = c(nrow(new.G[[1]]),ncol(new.G[[1]]),length(new.G)))
-old.G.b = apply(old.G,2,rbind)
-new.G.b = apply(new.G,2,rbind)
-save(old.G.b,file = "old_G_b.RData")
-load("old_G_b.RData")
-save(new.G.b,file = "new_G_b.RData")
-load("new_G_b.RData")
+old.G.pos = apply(old.G,c(1,2),mean)
+new.G.pos = apply(new.G,c(1,2),mean)
+#new.G.b = apply(new.G,2,rbind)
+#save(old.G.pos,file = "old_G_b.RData")
+#load("old_G_b.RData")
+#save(new.G.pos,file = "new_G_b.RData")
+#load("new_G_b.RData")
 #plot(old.G.b[,2],new.G.b[,2])
 #combine old.G and new.G as a dataframe
 #G.b = c(old.G.b[,2],new.G.b[,2])
@@ -101,44 +258,266 @@ load("new_G_b.RData")
 #colnames(G.b) = c("covariance","model")
 # 1st triat
 
-each_trait_G_pair = function(i){
-G.b = cbind(old.G.b[,i],new.G.b[,i])
-G.b = as.data.frame(G.b)
-colnames(G.b) = c("Old","New")
-library(ggplot2)
-ggplot(G.b,aes(y = as.numeric(Old),x=as.numeric(New)))+
+#variance
+par(mfrow=c(1,2))
+plot(diag(new.G.pos),diag(old.G.pos),main = "Variance")
+abline(0,1)
+#covariance
+diag(new.G.pos)=NA
+diag(old.G.pos)=NA
+plot(old.G.pos,new.G.pos, main="Covariance")
+abline(0,1)
+par(mfrow=c(1,1))
+#each_trait_G_pair = function(i){
+#G.b = cbind(old.G.b[,i],new.G.b[,i])
+#G.b = as.data.frame(G.b)
+#colnames(G.b) = c("Old","New")
+#library(ggplot2)
+#ggplot(G.b,aes(y = as.numeric(Old),x=as.numeric(New)))+
   #geom_dotplot(binwidth = 15)
-  geom_point()+
-  geom_abline(intercept = 0,slope = 1)+
-  ggtitle(paste0(BSFG_state$traitnames[i]))
-}
-pdf("each_trait_G_pair.pdf")
+#  geom_point()+
+#  geom_abline(intercept = 0,slope = 1)+
+#  ggtitle(paste0(BSFG_state$traitnames[i]))
+#}
+#pdf("each_trait_G_pair.pdf")
 #par(mfrow = c(3,3))
-for (i in 1:p){
-  each_trait_G_pair(i)
-}
-dev.off()
-each_trait_G_pair(1)
-each_trait_G_pair(2)
-each_trait_G_pair(3)
-each_trait_G_pair(4)
-each_trait_G_pair(5)
+#for (i in 1:p){
+#  each_trait_G_pair(i)
+#}
+#dev.off()
+#each_trait_G_pair(1)
+#each_trait_G_pair(2)
+#each_trait_G_pair(3)
+#each_trait_G_pair(4)
+#each_trait_G_pair(5)
+
 # 3.delta z
+source('~/Runcie Lab/SparseFactorMixedModel_v2/R_BSFG/BSFG_functions.R')
+load(BSFGOld)
+#old.traces.G = G_Traces_Comp(BSFG_state)
+old.G = G_Matrix_Comp(BSFG_state)
+p = BSFG_state$run_variables$p
+# posterior mean
+# do this in case the value of k differ for this two models and there in no lambda in BSFG_state_fixed case
+Lambda = BSFG_state$Posterior$Lambda
+load(BSFGNew)
+load("BSFG_state6.RData")
+load(BSFGStar)
+BSFG_state$Posterior$Lambda = Lambda
+#new.traces.G = G_Traces_Comp(BSFG_state)
+new.G = G_Matrix_Comp(BSFG_state)
+
 # The distance of two matrix(in predicted phenotype value/level)
 #selection gradient
 beta = 0.3609
-beta_v = c(0,beta,rep(0,16))
-traces_traits = matrix(,p,sp_num)
-for(j in 1:sp_num) {
-  Lj = matrix(Posterior$Lambda[,j],p,k)
-  h2j = Posterior$F_h2[,j]
-  G_Lj = Lj %*%  diag(sqrt(h2j))
-  Gj = G_Lj %*%  t(G_Lj) + diag(1/Posterior$E_a_prec[,j])
-  Gj = sqrt(diag(VY))%*%Gj%*%t(sqrt(diag(VY)))
-  traces_traits[,j] = Gj%*%beta_v
-}
-boxplot(t(traces_traits))
+beta.v = c(0,beta,rep(0,16))
+#traces.traits = matrix(,p,spn)
+
+#old.G in list form
+old.traces.traits = sapply(old.G,function(x)x%*%beta.v)
+old.traits.posmean = apply(old.traces.traits,1,mean)
+new.traces.traits = sapply(new.G,function(x)x%*%beta.v)
+new.traits.posmean = apply(new.traces.traits,1,mean)
+star.traces.traits = sapply(star.G,function(x)x%*%beta.v)
+star.traits.posmean = apply(star.traces.traits,1,mean)
+#boxplot(old.traces_traits[2,])
+boxplot(t(old.traces.traits))
+#boxplot(new.traces_traits[2,])
+boxplot(t(new.traces.traits))
+
+#plot(x=rep(1:18,2),y=c(old.traits.posmean,new.traits.posmean),col=rep(c(1,2),each=18),type = "o")
 save(traces_traits,file = 'traces_traits.RData')
 # For each G matrix
 setwd("~/Runcie Lab/SparseFactorMixedModel_v2/MCMC/19/19/Lambda1.5_delta2shape3")
 load("G_BSFG_sc.RData")
+
+ggplot(pos.fh2.b, aes(x = factor, y = Fh2, color=model,fill=model))+
+  geom_boxplot(alpha = 0.5)+
+  scale_color_manual(breaks = c("old", "new", "star"),
+                     values=c("green", "red", "blue"))+
+  scale_fill_manual(breaks = c("old", "new", "star"),
+                    values=c("green", "red", "blue"))+
+  ggtitle("The distribution of variance (Latent traits)")+
+  scale_color_discrete(breaks = c("old", "new", "star"),labels=c("LconG3", "LhighG3", "LlowG3"))+
+  scale_fill_discrete(breaks = c("old", "new", "star"),labels=c("LconG3", "LhighG3", "LlowG3"))+
+  ylab("Variance")+
+  geom_errorbar(aes(ymin=len-se, ymax=len+se), width=.1, position=pd)
+
+#4. E(beta(G1-G2)beta)
+source('~/Runcie Lab/SparseFactorMixedModel_v2/R_BSFG/BSFG_functions.R')
+load(BSFGOld)
+#old.traces.G = G_Traces_Comp(BSFG_state)
+old.G = G_Matrix_Comp(BSFG_state)
+p = BSFG_state$run_variables$p
+# posterior mean
+# do this in case the value of k differ for this two models and there in no lambda in BSFG_state_fixed case
+Lambda = BSFG_state$Posterior$Lambda
+load(BSFGNew)
+load(BSFGStar)
+BSFG_state$Posterior$Lambda = Lambda
+#new.traces.G = G_Traces_Comp(BSFG_state)
+load("BSFG_state6.RData")
+new.G = G_Matrix_Comp(BSFG_state)
+load("BSFG_state7.RData")
+star.G = G_Matrix_Comp(BSFG_state)
+beta = 0.3609
+beta.v = as.vector(c(0,beta,rep(0,16)))
+# calcualte mean diff response value
+diff.traces = mapply(function(x,y) t(beta.v)%*%t(x-y)%*%(x-y)%*%beta.v, old.G,new.G)
+boxplot(diff.traces)
+mean(sqrt(diff.traces))
+
+#5. PCA decomposition
+
+source('~/Runcie Lab/SparseFactorMixedModel_v2/R_BSFG/BSFG_functions.R')
+load(BSFGOld)
+#old.traces.G = G_Traces_Comp(BSFG_state)
+old.G = G_Matrix_Comp(BSFG_state)
+p = BSFG_state$run_variables$p
+traitnames = BSFG_state$traitnames
+#load(BSFGNew)
+load("BSFG_state2.RData")
+new.G = G_Matrix_Comp(BSFG_state)
+load("BSFG_state3.RData")
+star.G = G_Matrix_Comp(BSFG_state)
+
+old.traces.eigen = sapply(old.G, function(x)eigen(x)$values)
+new.traces.eigen = sapply(new.G, function(x)eigen(x)$values)
+star.traces.eigen = sapply(star.G, function(x)eigen(x)$values)
+
+traces.eigen.df = data.frame(c(c(old.traces.eigen),c(new.traces.eigen),c(star.traces.eigen)))
+traces.eigen.df$model = rep(c("old","new","star"),c(p*spn,p*spn,p*spn))
+
+traces.eigen.df$model = factor(traces.eigen.df$model, level=c("old","new","star"))
+traces.eigen.df$model = factor(traces.eigen.df$model, level=c("star","new","old"))
+
+traces.eigen.df$lambda = rep(paste0("PC",1:18),3*spn)
+names(traces.eigen.df) = c("response","model","PC")
+
+traces.eigen.df.sum = summarySE(traces.eigen.df,measurevar = "response",groupvars = c("model","PC"))
+names(traces.eigen.df.sum) = c("model","PC","q1","q2","response")
+
+# reorder the PC value in the data
+traces.eigen.df.sum$PC = factor(traces.eigen.df.sum$PC,level=paste0("PC",1:18))
+
+
+
+save(traces.eigen.df.sum , file = "eigen567.RData")
+save(traces.eigen.df.sum, file = "eigen51417.RData")
+
+library(ggplot2)
+pd <- position_dodge(width = 0.6)
+load("eigen567.RData")
+
+# size of each model
+library(dplyr)
+traces.eigen.df.sum %>%
+  group_by(model) %>%
+  summarize(size = sum(response))
+
+g11<-ggplot(traces.eigen.df.sum,aes(x = PC, y = response, color=model))+
+  scale_color_manual(breaks =rev(c("star","new","old")),
+                     values=rev(c( "blue","red2","grey0")),labels=rev(c("Low Line","High Line","Control Line")))+
+  ylab("Variance")+
+  geom_errorbar(aes(ymin=q1+response, ymax=q2+response), position=pd)+
+  geom_point(position=pd,size=1)+
+  xlab("Principle Components")+
+  theme(axis.text=element_text(size=10),
+        axis.title=element_text(size=15),legend.text=element_text(size=12),
+        legend.title=element_text(size=12))+
+  labs(color = "G (F3)")
+
+
+
+load("eigen51417.RData")
+# size of each model
+library(dplyr)
+traces.eigen.df.sum %>%
+  group_by(model) %>%
+  summarize(size = sum(response))
+
+g22<-ggplot(traces.eigen.df.sum,aes(x = reorder(PC,PC), y = response, color=model))+
+  scale_color_manual(breaks = c("star","new","old"),
+                     values=c("grey60","grey30","grey0"),labels=c("F1","F2","F3"))+
+  ylab("Response")+
+  geom_errorbar(aes(ymin=q1+response, ymax=q2+response), position=pd)+
+  geom_point(position=pd,size=1)+
+  theme(axis.text=element_text(size=10),
+        axis.title=element_text(size=15),legend.text=element_text(size=12),
+        legend.title=element_text(size=12),legend.margin=unit(0.455,"cm"))+
+  labs(color = "G (Control)")+
+  xlab("Principle Components")+
+  ylab("Variance")
+# statistical test for the difference
+# a.ANOVA test
+class(traces.eigen.df)
+anova(lm(response~model, data = traces.eigen.df))
+anova(lm(response~model:PC, data = traces.eigen.df)) #which one to use?
+anova(lm(response~model*PC, data = traces.eigen.df))
+
+# b.Bartlett test
+
+# 6. angle and length of deltaZ
+
+# distribution within population
+dist_within_theta = function(traces.traits){
+theta = vector(,spn)
+for (i in 1:spn){
+  pairs = randcomb(1:spn, 2)
+  a = traces.traits[,pairs[1]]
+  b = traces.traits[,pairs[2]]
+  a.length = sqrt(sum(a * a)) 
+  b.length = sqrt(sum(b * b)) 
+  theta[i] = acos(sum(a*b) / (a.length * b.length))
+}
+return(theta)
+}
+old.theta = dist_within_theta(old.traces.traits)
+
+new.theta = dist_within_theta(new.traces.traits)
+
+star.theta = dist_within_theta(star.traces.traits)
+
+
+# distribution between population
+dist_between_theta = function(Atraces.traits,Btraces.traits){
+  theta = vector(,spn)
+  for (i in 1:spn){
+    pairs = randcomb(1:spn, 2)
+    a = Atraces.traits[,pairs[1]]
+    b = Btraces.traits[,pairs[2]]
+    a.length = sqrt(sum(a * a)) 
+    b.length = sqrt(sum(b * b)) 
+    theta[i] = acos(sum(a*b) / (a.length * b.length))
+  }
+  return(theta)
+}
+old.new.theta = dist_between_theta(old.traces.traits,new.traces.traits)
+# K-S test
+ks.test(old.theta,old.new.theta) #different
+
+old.star.theta = dist_between_theta(old.traces.traits,star.traces.traits)
+new.star.theta = dist_between_theta(new.traces.traits,star.traces.traits)
+
+theta.df = data.frame(old.theta,new.theta,star.theta, old.new.theta, old.star.theta, new.star.theta)
+
+library(reshape2)
+theta.df = melt(theta.df, value.name = "Degree")
+names(theta.df) = c("labels", "theta")
+ggplot(theta.df, aes(x = theta, col=labels))+
+  geom_density(aes(fill=labels),alpha=0.1)
+#+scale_color_manual(breaks = c("old.theta","new.theta","star.theta","old.new.theta","old.star.theta","new.star.theta"),
+#                     values=rainbow(6))
+  
+# length
+length_deltaZ = function(traces.traits){
+    posmean.deltaZ = apply(traces.traits,1,mean)
+    deltaZ.length = sqrt(sum(posmean.deltaZ * posmean.deltaZ)) 
+    return(deltaZ.length)
+}
+old.deltaZ.length = length_deltaZ(old.traces.traits)
+old.deltaZ.length
+new.deltaZ.length = length_deltaZ(new.trace s.traits)
+new.deltaZ.length
+star.deltaZ.length = length_deltaZ(star.traces.traits)
+star.deltaZ.length
diff --git a/R_BSFG/setup_pedigree.R b/R_BSFG/setup_pedigree.R
index b96f952..619a359 100644
--- a/R_BSFG/setup_pedigree.R
+++ b/R_BSFG/setup_pedigree.R
@@ -1,5 +1,5 @@
 
-setup_pedigree = function(data=data,GenerationCode,LineCode,fixedfactor,randomfactor=TRUE){  
+setup_pedigree = function(data=data,GenerationCode,LineCode,fixedfactor,randomfactor=FALSE){  
   
   generation_pos <- data$Generation %in% GenerationCode
   LineCode_pos <- data$LineCode%in%LineCode|data$LineCode==0
@@ -34,7 +34,9 @@ setup_pedigree = function(data=data,GenerationCode,LineCode,fixedfactor,randomfa
   }
     reg=lm(as.matrix(Y)~0+X, na.action= na.omit)
     B_act = reg$coefficients
-  }else{X=matrix(0,nr=n,nc=0);B_act=matrix(0,nr=0,nc=ncol(Y))}
+  }else{
+    # there should be a column of 1 in design matrix as mean value
+    X=matrix(0,nr=n,nc=0);B_act=matrix(0,nr=0,nc=ncol(Y))}
   
   
   traitnames <- names(data)[7:ncol(data)]

From f37181a4b46e82e3778b5b7c640f6c647d1859a6 Mon Sep 17 00:00:00 2001
From: xin li <xxrli@ucdavis.edu>
Date: Mon, 23 Jan 2017 16:09:09 -0800
Subject: [PATCH 06/18] Adding functions for saving posterior samples in
 combined sampling cases

---
 R_BSFG/BSFG_functions.R | 63 +++++++++++++++++++++++++++++++++++++----
 1 file changed, 57 insertions(+), 6 deletions(-)

diff --git a/R_BSFG/BSFG_functions.R b/R_BSFG/BSFG_functions.R
index ff340d0..921255a 100644
--- a/R_BSFG/BSFG_functions.R
+++ b/R_BSFG/BSFG_functions.R
@@ -46,9 +46,9 @@ sample_Lambda = function( Y_tilde,F,resid_Y_prec, E_a_prec,Plam,invert_aI_bZAZ )
 	  means = c()
 	  Qlam = c()
 	  for(pop in pops){
-		  FUDi = E_a_prec[[pop]][j] * sweep(FtU[[pop]],2,(s[[pop]] + E_a_prec[[pop]][j]/resid_Y_prec[j]),'/')
-		  means = means + FUDi %*% UtY[[pop]][,j]
-		  Qlam = Qlam + FUDi %*% t(FtU[[pop]]) 
+		  FUDi = E_a_prec[j] * sweep(FtU,2,(s + E_a_prec[j]/resid_Y_prec[j]),'/')
+		  means = means + FUDi %*% UtY[,j]
+		  Qlam = Qlam + FUDi %*% t(FtU) 
 		}
 	  Qlam = Qlam + diag(Plam[j,])
 
@@ -65,6 +65,7 @@ sample_Lambda = function( Y_tilde,F,resid_Y_prec, E_a_prec,Plam,invert_aI_bZAZ )
 	return(Lambda)
 }
 
+
 sample_prec_discrete_conditional = function(Y,h2_divisions,h2_priors,invert_aI_bZAZ,res_prec) {
 	#sample factor heritibilties conditional on a given residual precision
 	#(res_precision)
@@ -206,6 +207,7 @@ sample_means = function( Y_tilde, resid_Y_prec, E_a_prec, invert_aPXA_bDesignDes
 	return(location_sample)
 }
 
+
 sample_F_a = function(F,Z_1,F_h2,invert_aZZt_Ainv) {
 	#samples genetic effects on factors (F_a) conditional on the factor scores F:
 	# F_i = F_{a_i} + E_i, E_i~N(0,s2*(1-h2)*I) for each latent trait i
@@ -246,6 +248,8 @@ sample_F_a = function(F,Z_1,F_h2,invert_aZZt_Ainv) {
 	return(F_a)
 }
 
+
+
 sample_factors_scores = function( Y_tilde, Z_1,Lambda,resid_Y_prec,F_a,F_h2 ) {
 #Sample factor scores given factor loadings (F_a), factor heritabilities (F_h2) and
 #phenotype residuals
@@ -274,6 +278,7 @@ sample_factors_scores = function( Y_tilde, Z_1,Lambda,resid_Y_prec,F_a,F_h2 ) {
 }
 
 
+
 sample_delta = function( delta,tauh,Lambda_prec,delta_1_shape,delta_1_rate,delta_2_shape,delta_2_rate,Lambda2 ) {
 	#sample delta and tauh parameters that control the magnitudes of higher
 	#index factor loadings.
@@ -410,10 +415,56 @@ save_posterior_samples = function( sp_num,Posterior,Lambda,F,F_a,B,W,E_a,delta,F
 	Posterior$B   = (Posterior$B*(sp_num-1) + B)/sp_num
 	Posterior$E_a = (Posterior$E_a*(sp_num-1) + E_a)/sp_num
 	Posterior$W   = (Posterior$W*(sp_num-1) + W)/sp_num
-
 	return(Posterior)
 }
-                                               
+  
+save_posterior_samples_comb = function( sp_num,Posterior,Lambda,F,F_a,B,W,E_a,delta,F_h2,resid_Y_prec,E_a_prec,W_prec) {
+  # save posteriors. Full traces are kept of the more interesting parameters.
+  # Only the posterior means are kept of less interesting parameters. These
+  # should be correctly calculated over several re-starts of the sampler.
+  
+  # first unlist F, F_a, F_h2, Y, B, E_a
+  nl = length(F_h2)
+  F = unlist(do.call("rbind", F)) # n*k
+  F_a = unlist(do.call("rbind", F_a)) #n*k
+  F_h2 = Reduce("+", F_h2)/nl # k taking average
+  B = Reduce("+",B)/nl # b*p taking average
+  E_a = unlist(do.call("rbind", E_a)) # n*k
+  W = Reduce("+",W)/nl  # k*p taking average
+  resid_Y_prec = Reduce("+",resid_Y_prec)/nl #p taking average
+  E_a_prec = Reduce("+",E_a_prec)/nl #p
+  W_prec = Reduce("+",W_prec)/nl  #p
+  
+  #sp = ncol(Posterior$Lambda)
+  sp = ncol(Posterior$Lambda)
+  #size(Posterior$Lambda,2)
+  #save factor samples
+  # #if(length(Lambda) > nrow(Posterior[["Lambda"]])){  this will not happen since we stop updatek
+  #   # expand factor sample matrix if necessary
+  #   Posterior[["F"]]      = rbind(Posterior[["F"]], 	   	matrix(0,nr = length(F)     -nrow(Posterior[["F"]]),		nc = sp))
+  #   Posterior[["F_a"]]    = rbind(Posterior[["F_a"]], 	matrix(0,nr = length(F_a) 	-nrow(Posterior[["F_a"]]),	nc = sp))
+  #   Posterior[["delta"]]  = rbind(Posterior[["delta"]], 	matrix(0,nr = length(delta) -nrow(Posterior[["delta"]]),	nc = sp))
+  #   Posterior[["F_h2"]]   = rbind(Posterior[["F_h2"]], 	matrix(0,nr = length(F_h2) 	-nrow(Posterior[["F_h2"]]),	nc = sp))
+  # }
+  Posterior$Lambda[1:length(Lambda),sp_num] = c(Lambda)
+  Posterior$F[1:length(F),sp_num]     = c(F)
+  Posterior$F_a[1:length(F_a),sp_num] = c(F_a)
+  Posterior$delta[1:length(delta),sp_num] = delta
+  Posterior$F_h2[1:length(F_h2),sp_num] = F_h2
+  
+  Posterior$resid_Y_prec[,sp_num] = resid_Y_prec
+  Posterior$E_a_prec[,sp_num]     = E_a_prec
+  Posterior$W_prec[,sp_num]       = W_prec
+  
+  # save B,U,W
+  Posterior$B   = (Posterior$B*(sp_num-1) + B)/sp_num
+  Posterior$E_a = (Posterior$E_a*(sp_num-1) + E_a)/sp_num
+  Posterior$W   = (Posterior$W*(sp_num-1) + W)/sp_num
+  
+  return(Posterior)
+}
+
+
 save_posterior_samples_fixedlambda = function( j,Posterior,F,F_a,B,W,E_a,F_h2,resid_Y_prec,E_a_prec,W_prec) {
   # save posteriors. Full traces are kept of the more interesting parameters.
   # Only the posterior means are kept of less interesting parameters. These
@@ -442,7 +493,7 @@ save_posterior_samples_fixedlambda = function( j,Posterior,F,F_a,B,W,E_a,F_h2,re
   Posterior$E_a = (Posterior$E_a*(j-1) + E_a)/j
   Posterior$W   = (Posterior$W*(j-1) + W)/j
   
-  return(Posterior)
+  return(Posterior) 
 }
 
 

From baccd0488eb528d6853c206bab8dfeb23e632205 Mon Sep 17 00:00:00 2001
From: xin li <xxrli@ucdavis.edu>
Date: Mon, 23 Jan 2017 16:10:10 -0800
Subject: [PATCH 07/18] minor changes

---
 R_BSFG/code_comparing_plotting4.R | 224 +++++++++++++++++++++++++-----
 R_BSFG/deltaZ_posterior_mean.R    |  42 +++---
 2 files changed, 209 insertions(+), 57 deletions(-)

diff --git a/R_BSFG/code_comparing_plotting4.R b/R_BSFG/code_comparing_plotting4.R
index b64c288..413788a 100644
--- a/R_BSFG/code_comparing_plotting4.R
+++ b/R_BSFG/code_comparing_plotting4.R
@@ -94,10 +94,20 @@ library(ggplot2)
 #plot(x=rep(1:18,2),y=c(old.traits.posmean,new.traits.posmean),col=rep(c(1,2),eac#h=18),type = "o")
 pd <- position_dodge(width = 0.6)
 #load("plot5.RData")
-ggplot(traces.traits.df.sum,aes(x = traits, y = response, color=model))+
-  ylab("Response")+
-  geom_errorbar(aes(ymin=q1+response, ymax=q2+response), position=pd)+
-  geom_point(position=pd,size=1)
+#ggplot(traces.traits.df.sum,aes(x = traits, y = response, color=model))+
+#  ylab("Response")+
+#  geom_errorbar(aes(ymin=q1+response, ymax=q2+response), position=pd)+
+#  geom_point(position=pd,size=1)
+ggplot(traces.traits.df,aes(x = traits, y = response, fill=model))+
+  ylab("Variance")+
+  geom_boxplot( position="dodge")+
+  scale_fill_manual(breaks = c("old","new","star","four"),
+                     values=c("grey0","grey30","red2","blue"),labels=c("Overall","Control","Tall","Short"))+
+  theme(axis.text=element_text(size=10),
+        axis.title=element_text(size=15),legend.text=element_text(size=12),
+        legend.title=element_text(size=12))+
+  labs(fill = "G")
+
 
 
 #5. PCA decomposition
@@ -106,21 +116,24 @@ old.traces.eigen = sapply(old.G, function(x)eigen(x)$values)
 new.traces.eigen = sapply(new.G, function(x)eigen(x)$values)
 star.traces.eigen = sapply(star.G, function(x)eigen(x)$values)
 four.traces.eigen = sapply(four.G, function(x)eigen(x)$values)
+# we only need the first 8 of them
+cumsum(old.traces.eigen[,1]/sum(old.traces.eigen[,1]))
+l = 8
 
-traces.eigen.df = data.frame(c(c(old.traces.eigen),c(new.traces.eigen),c(star.traces.eigen),c(four.traces.eigen)))
-traces.eigen.df$model = rep(c("old","new","star","four"),c(p*spn,p*spn,p*spn,p*spn))
+traces.eigen.df = data.frame(c(c(old.traces.eigen[1:8,]),c(new.traces.eigen[1:8,]),c(star.traces.eigen[1:8,]),c(four.traces.eigen[1:8,])))
+traces.eigen.df$model = rep(c("old","new","star","four"),c(l*spn,l*spn,l*spn,l*spn))
 
 traces.eigen.df$model = factor(traces.eigen.df$model, level=c("old","new","star","four"))
 #traces.eigen.df$model = factor(traces.eigen.df$model, level=c("star","new","old","four"))
 
-traces.eigen.df$lambda = rep(paste0("PC",1:18),4*spn)
+traces.eigen.df$lambda = rep(paste0("PC",1:l),4*spn)
 names(traces.eigen.df) = c("response","model","PC")
 
-traces.eigen.df.sum = summarySE(traces.eigen.df,measurevar = "response",groupvars = c("model","PC"))
-names(traces.eigen.df.sum) = c("model","PC","q1","q2","response")
+#traces.eigen.df.sum = summarySE(traces.eigen.df,measurevar = "response",groupvars = c("model","PC"))
+#names(traces.eigen.df.sum) = c("model","PC","q1","q2","response")
 
 # reorder the PC value in the data
-traces.eigen.df.sum$PC = factor(traces.eigen.df.sum$PC,level=paste0("PC",1:18))
+traces.eigen.df$PC = factor(traces.eigen.df$PC,level=paste0("PC",1:8))
 
 # size dataframe
 old.traces.size = apply(old.traces.eigen,2,sum)
@@ -130,13 +143,8 @@ four.traces.size = apply(four.traces.eigen,2,sum)
 size.df = data.frame(old.traces.size,new.traces.size,star.traces.size,four.traces.size)
 library(reshape2)
 size.df = melt(size.df,value.name = "Size")
-size.df$variable = factor(size.df$variable,levels = c("star.traces.size","new.traces.size","old.traces.size","four.traces.size"))
-
+size.df$variable = factor(size.df$variable,levels = c("old.traces.size","new.traces.size","star.traces.size","four.traces.size"))
 
-old.traces.pc1c = apply(old.traces.eigen,2,function(x)sum(x[2])/sum(x))
-new.traces.pc1c = apply(new.traces.eigen,2,function(x)sum(x[2])/sum(x))
-star.traces.pc1c = apply(star.traces.eigen,2,function(x)sum(x[2])/sum(x))
-four.traces.pc1c = apply(four.traces.eigen,2,function(x)sum(x[2])/sum(x))
 
 # PC1
 old.traces.pc1c = apply(old.traces.eigen,2,function(x)x[1]/sum(x))
@@ -146,7 +154,7 @@ four.traces.pc1c = apply(four.traces.eigen,2,function(x)x[1]/sum(x))
 pc1c.df = data.frame(old.traces.pc1c,new.traces.pc1c,star.traces.pc1c,four.traces.pc1c)
 library(reshape2)
 pc1c.df = melt(pc1c.df,value.name = "PC1.Contribution")
-pc1c.df$variable = factor(pc1c.df$variable,levels = c("star.traces.pc1c","new.traces.pc1c","old.traces.pc1c","four.traces.pc1c"))
+pc1c.df$variable = factor(pc1c.df$variable,levels = c("old.traces.pc1c","new.traces.pc1c","star.traces.pc1c","four.traces.pc1c"))
 
 
 # size of each model 
@@ -156,23 +164,66 @@ traces.eigen.df.sum %>%
   summarize(size = sum(response))
 
 library(ggplot2)
-ggplot(size.df, aes(x="",y=Size, fill=variable))+
-  geom_boxplot(alpha=0.85)+
-    theme(axis.title.x = element_blank())
-
-
-ggplot(pc1c.df, aes(x="",y=PC1.Contribution, fill=variable))+
-  geom_boxplot(alpha=0.85)+
-  theme(axis.title.x = element_blank())
-
-
-pd <- position_dodge(width = 0.6)
-
-ggplot(traces.eigen.df.sum,aes(x = PC, y = response, color=model))+
-   ylab("Variance")+
-  geom_errorbar(aes(ymin=q1+response, ymax=q2+response), position=pd)+
-  geom_point(position=pd,size=1)+
-  xlab("Principle Components")
+plot1<- ggplot(size.df, aes(x="",y=Size, fill=variable))+
+  geom_boxplot( position="dodge",outlier.shape = NA)+
+  labs(fill = "G")+
+  scale_fill_manual( values=c("grey0","grey30","red2","blue"),labels=c("Overall","Control","Tall","Short"))+
+  theme(axis.text=element_text(size=10),
+      axis.title=element_text(size=15),legend.position="none",axis.title.x = element_blank(),
+      panel.border = element_blank(),
+      panel.grid.major = element_blank(),
+      panel.grid.minor = element_blank(),
+      panel.background = element_rect(fill="white",color = "black"))
+
+plot2<- ggplot(pc1c.df, aes(x="",y=PC1.Contribution, fill=variable))+
+  geom_boxplot( position="dodge",outlier.shape = NA)+
+  labs(fill = "G")+
+  scale_fill_manual( values=c("grey0","grey30","red2","blue"),labels=c("Overall","Control","Tall","Short"))+
+  theme(axis.text=element_text(size=10),
+        axis.title=element_text(size=15),legend.position="none",axis.title.x = element_blank(),
+        panel.border = element_blank(),
+        panel.grid.major = element_blank(),
+        panel.grid.minor = element_blank(),
+        panel.background = element_rect(fill="white",color = "black"))+
+  ylab("PC1")
+
+plot3 <- ggplot(traces.eigen.df,aes(x = PC, y = response, fill=model))+
+  ylab("Variance")+
+  xlab("")+
+  geom_boxplot( position="dodge",outlier.shape = NA)+
+  scale_fill_manual(breaks = c("old","new","star","four"),
+                    values=c("grey0","grey30","red2","blue"),labels=c("Overall","Control","Tall","Short"))+
+  theme(axis.text=element_text(size=10),
+        axis.title=element_text(size=15),legend.text=element_text(size=12),
+        legend.title=element_text(size=12),
+        panel.border = element_blank(),
+        panel.grid.major = element_blank(),
+        panel.grid.minor = element_blank(),
+        panel.background = element_rect(fill="white",color = "black"))+
+  labs(fill = "G")
+
+plot5 <- ggplot(traces.eigen.df,aes(x = PC, y = response, fill=model))+
+  ylab("Variance")+
+  xlab("")+
+  geom_boxplot( position="dodge",outlier.shape = NA)+
+  scale_fill_manual(breaks = c("old","new","star","four"),
+                    values=c("grey0","grey30","red2","blue"),labels=c("Overall","Control","Tall","Short"))+
+  theme(axis.text=element_text(size=10),
+        axis.title=element_text(size=15),legend.text=element_text(size=12),
+        legend.title=element_text(size=12),
+        panel.border = element_blank(),
+        panel.grid.major = element_blank(),
+        panel.grid.minor = element_blank(),
+        panel.background = element_rect(fill="white",color = "black"))+
+  labs(fill = "G")+
+  ylim(c(0,2))
+#pd <- position_dodge(width = 0.6)
+
+#ggplot(traces.eigen.df.sum,aes(x = PC, y = response, color=model))+
+#   ylab("Variance")+
+#  geom_errorbar(aes(ymin=q1+response, ymax=q2+response), position=pd)+
+#  geom_point(position=pd,size=1)+
+#  xlab("Principle Components")
 
 
 # 6. angle and length of deltaZ and first eigenvector
@@ -201,6 +252,29 @@ old.theta = dist_within_theta(old.traces.vector)
 new.theta = dist_within_theta(new.traces.vector)
 star.theta = dist_within_theta(star.traces.vector)
 four.theta = dist_within_theta(four.traces.vector)
+# 95% C.I
+theta_ci = function(theta.trace){
+  quantile(theta.trace,c(0.05,0.95))
+}
+theta_ci(old.theta)
+theta_ci(new.theta)
+theta_ci(star.theta)
+theta_ci(four.theta)
+# mode
+#library(modeest)
+#mlv(old.theta, method = "mfv")
+#mlv(new.theta, method = "mfv")
+#mlv(star.theta, method = "mfv")
+#mlv(four.theta, method = "mfv")
+
+dmode <- function(x) {
+  den <- density(x, kernel=c("gaussian"))
+  ( den$x[den$y==max(den$y)] )   
+}
+dmode(old.theta)
+dmode(new.theta)
+dmode(star.theta)
+dmode(four.theta)
 # deltaZ
 old.theta = dist_within_theta(old.traces.traits) #old.traces.traits is from deltaZ_posterior_mean
 new.theta = dist_within_theta(new.traces.traits)
@@ -231,6 +305,26 @@ new.star.theta = dist_between_theta(new.traces.vector,star.traces.vector)
 new.four.theta = dist_between_theta(new.traces.vector,four.traces.vector)
 old.four.theta = dist_between_theta(old.traces.vector,four.traces.vector)
 
+# 95% C.I
+theta_ci(old.new.theta)
+theta_ci(old.star.theta)
+theta_ci(four.star.theta)
+theta_ci(new.star.theta)
+theta_ci(new.four.theta)
+theta_ci(old.four.theta)
+# mode
+# mlv(old.new.theta, method = "mfv")
+# mlv(old.star.theta, method = "mfv")
+# mlv(four.star.theta, method = "mfv")
+# mlv(new.star.theta, method = "mfv")
+# mlv(new.four.theta, method = "mfv")
+# mlv(old.four.theta, method = "mfv")
+dmode(old.new.theta)
+dmode(old.star.theta)
+dmode(old.four.theta)
+dmode(new.star.theta)
+dmode(new.four.theta)
+dmode(four.star.theta)
 # deltaZ
 old.new.theta = dist_between_theta(old.traces.traits,new.traces.traits)
 # K-S test
@@ -244,11 +338,69 @@ theta.df = data.frame(old.theta,new.theta,star.theta,four.theta, old.new.theta,
 library(reshape2)
 theta.df = melt(theta.df, value.name = "Degree")
 names(theta.df) = c("labels", "theta")
-ggplot(theta.df, aes(x = theta, col=labels))+
-  geom_density(aes(fill=labels),alpha=0.1)
+theta.df$labels = factor(theta.df$labels,levels = c("old.theta","new.theta","star.theta","four.theta", "old.new.theta", "old.star.theta", "old.four.theta","new.star.theta","four.star.theta","new.four.theta"))
+
+plot4 <- ggplot(theta.df, aes(x="",y = theta))+
+  geom_boxplot(aes(fill=labels),outlier.shape = NA)+
+  labs(fill = "G")+
+  scale_fill_manual( values=c("grey0","grey30","red2","blue","grey50","red3","blue2","red4","blue4","purple4"),labels=c("Overall","Control","Tall","Short","Overall_Control",
+                                                                                                            "Overall_Tall","Overall_Short","Control_Tall","Control_Short","Low_Tall"))+
+  theme(axis.text=element_text(size=10),
+        axis.title=element_text(size=15),legend.text=element_text(size=12),
+        legend.title=element_text(size=12),axis.title.x = element_blank(),
+        panel.border = element_blank(),
+        panel.grid.major = element_blank(),
+        panel.grid.minor = element_blank(),
+        panel.background = element_rect(fill="white",color = "black"))+
+  ylab("Theta")
+
+
 #+scale_color_manual(breaks = c("old.theta","new.theta","star.theta","old.new.theta","old.star.theta","new.star.theta"),
 #                     values=rainbow(6))
 
+save(plot1,file = "plot1.RData")
+save(plot2,file = "plot2.RData")
+save(plot3,file = "plot3.RData")
+save(plot4,file = "plot4.RData")
+
+multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
+  library(grid)
+  
+  # Make a list from the ... arguments and plotlist
+  plots <- c(list(...), plotlist)
+  
+  numPlots = length(plots)
+  
+  # If layout is NULL, then use 'cols' to determine layout
+  if (is.null(layout)) {
+    # Make the panel
+    # ncol: Number of columns of plots
+    # nrow: Number of rows needed, calculated from # of cols
+    layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
+                     ncol = cols, nrow = ceiling(numPlots/cols))
+  }
+  
+  if (numPlots==1) {
+    print(plots[[1]])
+    
+  } else {
+    # Set up the page
+    grid.newpage()
+    pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))
+    
+    # Make each plot, in the correct location
+    for (i in 1:numPlots) {
+      # Get the i,j matrix positions of the regions that contain this subplot
+      matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
+      
+      print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
+                                      layout.pos.col = matchidx$col))
+    }
+  }
+}
+
+layout <- matrix(c(1, 1, 1, 2, 3, 4), nrow = 2, byrow = TRUE)
+multiplot(plotlist = list(plot5,plot2,plot1,plot4),layout = layout)
 #-------------------------------
 # 7.length of deltaZ
 
diff --git a/R_BSFG/deltaZ_posterior_mean.R b/R_BSFG/deltaZ_posterior_mean.R
index 5810972..f6ae18d 100644
--- a/R_BSFG/deltaZ_posterior_mean.R
+++ b/R_BSFG/deltaZ_posterior_mean.R
@@ -1,42 +1,42 @@
-source('~/Runcie Lab/SparseFactorMixedModel_v2/R_BSFG/BSFG_functions.R')
-setwd("~/Runcie Lab/SparseFactorMixedModel_v2/MCMC/5/5/Lambda1.5_delta2shape3")
-BSFGOld = "BSFG_state.RData"
+#source('~/Runcie Lab/SparseFactorMixedModel_v2/R_BSFG/BSFG_functions.R')
+#setwd("~/Runcie Lab/SparseFactorMixedModel_v2/MCMC/5/5/Lambda1.5_delta2shape3")
+#BSFGOld = "BSFG_state.RData"
 #BSFGNew = "BSFG_fixedlambda65.RData"
 #BSFGStar = "BSFG_fixedlambda75.RData"
-BSFGNew = "BSFG_state6.RData"
-BSFGStar = "BSFG_state7.RData"
-target = "F_h2"
-load(BSFGOld)
-traitnames = BSFG_state$traitnames
+#BSFGNew = "BSFG_state6.RData"
+#BSFGStar = "BSFG_state7.RData"
+#target = "F_h2"
+#load(BSFGOld)
+#traitnames = BSFG_state$traitnames
 #old.traces.G = G_Traces_Comp(BSFG_state)
-old.G = G_Matrix_Comp(BSFG_state)
-p = BSFG_state$run_variables$p
-spn = dim(BSFG_state$Posterior[[target]])[2]
+#old.G = G_Matrix_Comp(BSFG_state)
+#p = BSFG_state$run_variables$p
+#spn = dim(BSFG_state$Posterior[[target]])[2]
 # posterior mean
 # do this in case the value of k differ for this two models and there in no lambda in BSFG_state_fixed case
 #Lambda = BSFG_state$Posterior$Lambda
-load(BSFGNew)
+#load(BSFGNew)
 #BSFG_state$Posterior$Lambda = Lambda
-new.G = G_Matrix_Comp(BSFG_state)
+#new.G = G_Matrix_Comp(BSFG_state)
 
-load(BSFGStar)
+#load(BSFGStar)
 #BSFG_state$Posterior$Lambda = Lambda
-star.G = G_Matrix_Comp(BSFG_state)
+#star.G = G_Matrix_Comp(BSFG_state)
 #new.traces.G = G_Traces_Comp(BSFG_state)
 
 
 # The distance of two matrix(in predicted phenotype value/level)
 #selection gradient
-beta = 0.3609
-beta.v = c(0,beta,rep(0,16))
-traces.traits = matrix(,p,spn)
+#beta = 0.3609
+#beta.v = c(0,beta,rep(0,16))
+#traces.traits = matrix(,p,spn)
 
 #old.G in list form
-old.traces.traits = sapply(old.G,function(x)x%*%beta.v)
+#old.traces.traits = sapply(old.G,function(x)x%*%beta.v)
 #old.traits.posmean = apply(old.traces.traits,1,mean)
-new.traces.traits = sapply(new.G,function(x)x%*%beta.v)
+#new.traces.traits = sapply(new.G,function(x)x%*%beta.v)
 #new.traits.posmean = apply(new.traces.traits,1,mean)
-star.traces.traits = sapply(star.G,function(x)x%*%beta.v)
+#star.traces.traits = sapply(star.G,function(x)x%*%beta.v)
 #star.traits.posmean = apply(star.traces.traits,1,mean)
 #boxplot(old.traces_traits[2,])
 #boxplot(t(old.traces.traits))

From c28ddde824929698c4eed1bd6ec18136c916c00d Mon Sep 17 00:00:00 2001
From: xin li <xxrli@ucdavis.edu>
Date: Mon, 23 Jan 2017 17:15:05 -0800
Subject: [PATCH 08/18]  Adding function for sampling combined data

---
 R_BSFG/fast_BSFG_sampler_comb.R | 360 ++++++++++++++++++++++++++++++++
 1 file changed, 360 insertions(+)
 create mode 100644 R_BSFG/fast_BSFG_sampler_comb.R

diff --git a/R_BSFG/fast_BSFG_sampler_comb.R b/R_BSFG/fast_BSFG_sampler_comb.R
new file mode 100644
index 0000000..2e6913e
--- /dev/null
+++ b/R_BSFG/fast_BSFG_sampler_comb.R
@@ -0,0 +1,360 @@
+fast_BSFG_sampler_comb = function(BSFG_state,n_samples) {
+  # -- Daniel Runcie -- #
+  #
+  # Gibbs sampler for genetic covariance estimation based on mixed effects
+  # model, with missing data
+  #
+  # Code for:
+  # 
+  # Runcie and Mukherjee (2013) Dissecting high-dimensional traits
+  # with Bayesian sparse factor analysis of genetic covariance matrices.
+  # GENETICS.
+  #
+  # (c) July 30, 2013
+  #
+  # code based on original provided by Anirban Bhattacharya
+  #
+  #         
+  # This function implements the BSF-G partially collapsed Gibbs sampler.
+  # Variable notation follows the Runcie and Mukherjee manuscript as closely
+  # as possible.
+  #
+  # All input and initialization functions are carried out by the function
+  # fast_BSFG_sampler_init See the documentation of that function for details.
+  # 
+  # The sampler is designed to do a short-medium length run and then return
+  # the state of the chain. After assessing the progress of the chain,
+  # (optionally), the Posterior matrices can be reset, and the chain
+  # re-started where it left off. Right now, the random seed is not saved. Probably should
+  # be if I can figure out how.
+  # 
+  # This function takes the following inputs:
+  #     data_matrices: struct holding the (should be imutable) data, design and incidence matrices:
+  #          Y:  		Full phenotype data. n x p
+  # 		   X: 		Fixed effect design matrix. n x b
+  #          Z_1:     Random effect 1 incidence matrix. n x r1
+  #          Z_2:     Random effect 2 incidence matrix. n x r2
+  #          Y_missing: incidence matrix of missing data in Y
+  #     start_i: iteration number of the end of the last run.
+  #     draw_iter: frequency of updating diagnostic plots
+  #     burn: number of burnin samples
+  #     sp: total number of samples to collect
+  #     thin: thinning rate of chain
+  #     simulation: boolean. Is this a simulation?
+  #     params: struct with chain parameters.
+  #     priors: struct with all relevant prior hyperparameters
+  #     Posterior: struct with posterior matrices, or running posterior means. 
+  #            Note: not sure how well posterior means work after re-starting chain. Probably not well.
+  #     current_state: current (initial) conditions of all model parameters
+  # 
+  # Several diagnostic plots are produced during the run. 
+  #     Their interpretation is described within the source codes:
+  #         draw_simulation_diagnostics.m: For simulated data with known true values
+  #         draw_results_diagnostics.m: Otherwise
+  #         
+  
+  data_matrices  = BSFG_state$data_matrices
+  params         = BSFG_state$params
+  priors         = BSFG_state$priors
+  Posterior      = BSFG_state$Posterior
+  current_state  = BSFG_state$current_state
+  run_parameters = BSFG_state$run_parameters
+  run_variables  = BSFG_state$run_variables
+  traitnames     = BSFG_state$traitnames
+  
+  # ----------------------------------------------- #
+  # ----------------Load data matrices------------- #
+  # ----------------------------------------------- #
+  
+  Y  			  = data_matrices$Y
+  Z_1     	= data_matrices$Z_1
+  Z_2     	= data_matrices$Z_2
+  X       	= data_matrices$X
+  Y_missing = data_matrices$Y_missing
+  
+  p   = run_variables$p
+  n   = run_variables$n
+  r   = run_variables$r
+  r2  = run_variables$r2
+  b   = run_variables$b
+  
+  
+  # ----------------------------------------------- #
+  # ----------------Load priors-------------------- #
+  # ----------------------------------------------- #
+  
+  resid_Y_prec_shape =   priors$resid_Y_prec_shape
+  resid_Y_prec_rate  =   priors$resid_Y_prec_rate
+  E_a_prec_shape     =   priors$E_a_prec_shape
+  E_a_prec_rate      =   priors$E_a_prec_rate
+  W_prec_shape       =   priors$W_prec_shape
+  W_prec_rate        =   priors$W_prec_rate
+  Lambda_df          =   priors$Lambda_df
+  delta_1_shape      =   priors$delta_1_shape
+  delta_1_rate       =   priors$delta_1_rate
+  delta_2_shape      =   priors$delta_2_shape
+  delta_2_rate       =   priors$delta_2_rate
+  h2_priors_factors  =   priors$h2_priors_factors
+  
+  
+  # ----------------------------------------------- #
+  # ----------------Load current state------------- #
+  # ----------------------------------------------- #
+  resid_Y_prec =   current_state$resid_Y_prec
+  F            =   current_state$F
+  Lambda       =   current_state$Lambda
+  E_a          =   current_state$E_a
+  W            =   current_state$W
+  Lambda_prec  =   current_state$Lambda_prec
+  delta        =   current_state$delta
+  tauh         =   current_state$tauh
+  E_a_prec     =   current_state$E_a_prec
+  W_prec       =   current_state$W_prec
+  Plam         =   current_state$Plam
+  F_h2         =   current_state$F_h2
+  F_a          =   current_state$F_a
+  B            =   current_state$B
+  start_i      =   current_state$nrun
+  k = ncol(F)
+  
+  # ----------------------------------------------- #
+  # -----------Reset Global Random Number Stream--- #
+  # ----------------------------------------------- #
+  do.call("RNGkind",as.list(BSFG_state$RNG$RNGkind))  ## must be first!
+  assign(".Random.seed", BSFG_state$RNG$Random.seed, .GlobalEnv)
+  
+  # ----------------------------------------------- #
+  # -----------Load pre-calcualted matrices-------- #
+  # ----------------------------------------------- #
+  Ainv                             = run_variables$Ainv
+  A_2_inv                          = run_variables$A_2_inv
+  invert_aI_bZAZ                   = run_variables$invert_aI_bZAZ  
+  invert_aPXA_bDesignDesignT       = run_variables$invert_aPXA_bDesignDesignT 
+  invert_aZZt_Ainv                 = run_variables$invert_aZZt_Ainv
+  invert_aPXA_bDesignDesignT_rand2 = run_variables$invert_aPXA_bDesignDesignT_rand2  
+  
+  
+  # ----------------------------------------------- #
+  # ----------------Set up run--------------------- #
+  # ----------------------------------------------- #
+  #     b0,b1: parameters controlling rate of adaptation of factor model size
+  #     h2_divisions: number of discrete steps for each factor heritability parameter
+  #     epsilon: truncation point for factor loadings during adaptation
+  b0           = run_parameters$b0
+  b1           = run_parameters$b1
+  h2_divisions = run_parameters$h2_divisions
+  epsilon      = run_parameters$epsilon
+  prop         = run_parameters$prop
+  save_freq    = run_parameters$save_freq
+  burn         = run_parameters$burn
+  thin         = run_parameters$thin
+  draw_iter    = run_parameters$draw_iter
+  simulation   = run_parameters$simulation
+  
+  
+  # ----------------------------------------------- #
+  # ---Extend posterior matrices for new samples--- #
+  # ----------------------------------------------- #
+  # change posterior into a list (can the elements in dataframe be lists?)
+  sp = (start_i + n_samples - burn)/thin - ncol(Posterior$Lambda)
+  if(sp > 0){
+    Posterior$Lambda        = cbind(Posterior$Lambda,matrix(0,nr = nrow(Posterior$Lambda),nc = sp))
+    Posterior$F             = cbind(Posterior$F,matrix(0,nr = nrow(Posterior$F),nc = sp))
+    Posterior$F_a           = cbind(Posterior$F_a,matrix(0,nr = nrow(Posterior$F_a),nc = sp))
+    Posterior$delta         = cbind(Posterior$delta,matrix(0,nr = nrow(Posterior$delta),nc = sp))
+    Posterior$F_h2          = cbind(Posterior$F_h2,matrix(0,nr = nrow(Posterior$F_h2),nc = sp))
+    Posterior$resid_Y_prec  = cbind(Posterior$resid_Y_prec,matrix(0,nr = nrow(Posterior$resid_Y_prec),nc = sp))
+    Posterior$E_a_prec      = cbind(Posterior$E_a_prec,matrix(0,nr = nrow(Posterior$E_a_prec),nc = sp))
+    Posterior$W_prec        = cbind(Posterior$W_prec,matrix(0,nr = nrow(Posterior$W_prec),nc = sp))
+  }
+  
+  # ----------------------------------------------- #
+  # --------------start gibbs sampling------------- #
+  # ----------------------------------------------- #
+  
+  start_time = Sys.time()
+  for(i in start_i+(1:n_samples)){
+    
+    # -----fill in missing phenotypes----- #
+    #conditioning on everything else
+    if(sum(Y_missing)>0) {
+      meanTraits = X %*% B + F %*% t(Lambda) + Z_1 %*% E_a + Z_2 %*% W
+      resids = matrix(rnorm(p*n,0,sqrt(1/resid_Y_prec)),nr = n,nc = p,byrow=T)
+      Y[Y_missing] = meanTraits[Y_missing] + resids[Y_missing]
+    }
+    # recover()
+    
+    # -----Sample Lambda------------------ #
+    #conditioning on W, B, F, marginalizing over E _a
+    Y_tilde = Y - X %*% B - Z_2 %*% W
+    Y_tilde = as.matrix(Y_tilde)
+    # Lambda = sample_Lambda( Y_tilde,F,resid_Y_prec, E_a_prec,Plam,invert_aI_bZAZ )
+    
+    Lambda = sample_Lambda_comb( Y_tilde,F,resid_Y_prec, E_a_prec,Plam,invert_aI_bZAZ )
+
+    # -----Sample Lambda_prec------------- #
+    Lambda2 = Lambda^2
+    Lambda_prec = matrix(rgamma(p*k,shape = (Lambda_df + 1)/2,rate = (Lambda_df + sweep(Lambda2,2,tauh,'*'))/2),nr = p,nc = k)
+    
+    # -----Sample delta, update tauh------ #
+    delta = sample_delta( delta,tauh,Lambda_prec,delta_1_shape,delta_1_rate,delta_2_shape,delta_2_rate,Lambda2 )
+    tauh  = cumprod(delta)
+    
+    # -----Update Plam-------------------- #
+    Plam = sweep(Lambda_prec,2,tauh,'*')
+    
+    # -----Sample resid_Y_prec ---------------- #
+    # should be able to sample this here marginalizing over E_a in the same way that E_a_prec is done below.
+    
+    # -----Sample E_a_prec---------------- #
+    #conditioning on W, B, F, marginalizing over E_a 
+    # E_a_prec = sample_prec_discrete_conditional(Y - X %*% B-F %*% t(Lambda) - Z_2 %*% W,h2_divisions,h2_priors_resids,invert_aI_bZAZ,resid_Y_prec)
+    # E_a_prec = sample_prec_discrete_conditional_c(Y - X %*% B-F %*% t(Lambda) - Z_2 %*% W,h2_divisions,h2_priors_resids,invert_aI_bZAZ,resid_Y_prec)
+    
+    # pops should be a output from setup datasets and for pop in pops, the row number for 
+    # each pop should also included in the setup dataset
+    # read pops (number of populations)
+    # read row number for each pop
+    # in the loop, replace Y as Y[pops,], etc (instead of using lists because we need to use all Y together to sample lambda)
+   
+    # -----Sample B and E_a--------------- #
+    #conditioning on W, F, Lambda
+    for(pop in pops){
+      Y_tilde = Y[[pop]] - F[[pop]] %*% t(Lambda) - Z_2[[pop]] %*% W[[pop]]
+      Y_tilde = as.matrix(Y_tilde)
+      # location_sample = sample_means( Y_tilde, resid_Y_prec, E_a_prec, invert_aPXA_bDesignDesignT )
+      location_sample = sample_means( Y_tilde, resid_Y_prec[[pop]], E_a_prec[[pop]], invert_aPXA_bDesignDesignT[[pop]] )
+      B[[pop]]   = location_sample[1:b,]
+      if(length(B[[pop]])==p){
+        B[[pop]] = matrix(0,nr=0,nc=p)
+      }
+      E_a[[pop]] = location_sample[b+(1:r),]
+      
+      
+      # -----Sample W ---------------------- #
+      #conditioning on B, E_a, F, Lambda
+      if(ncol(Z_2) > 0) {
+        Y_tilde = Y[[pop]] - X[[pop]] %*% B[[pop]] - Z_1[[pop]] %*% E_a[[pop]] - F[[pop]] %*% t(Lambda)
+        Y_tilde = as.matrix(Y_tilde)
+        # location_sample = sample_means( Y_tilde, resid_Y_prec, W_prec, invert_aPXA_bDesignDesignT_rand2 )
+        location_sample = sample_means( Y_tilde, resid_Y_prec[[pop]], W_prec[[pop]], invert_aPXA_bDesignDesignT_rand2[[pop]] )
+        W[[pop]] = location_sample
+      }
+      
+      # -----Sample F_h2-------------------- #
+      #conditioning on F, marginalizing over F_a
+      # F_h2 = sample_h2s_discrete(F,h2_divisions,h2_priors_factors,invert_aI_bZAZ)
+      F_h2[[pop]] = sample_h2s_discrete(F[[pop]],h2_divisions,h2_priors_factors,invert_aI_bZAZ[[pop]])
+      
+      # -----Sample F_a--------------------- #
+      #conditioning on F, F_h2
+      # F_a = sample_F_a_c(F,Z_1,F_h2,invert_aZZt_Ainv)
+      F_a[[pop]] = sample_F_a(F[[pop]],Z_1[[pop]],F_h2[[pop]],invert_aZZt_Ainv[[pop]]);
+      
+      # -----Sample F----------------------- #
+      #conditioning on B,F_a,E_a,W,Lambda, F_h2
+      Y_tilde = Y[[pop]] - X[[pop]] %*% B[[pop]] - Z_1[[pop]] %*% E_a[[pop]] - Z_2[[pop]] %*% W[[pop]]
+      Y_tilde = as.matrix(Y_tilde)
+      # F1 = sample_factors_scores( Y_tilde, Z_1,Lambda,resid_Y_prec,F_a,F_h2 )
+      F[[pop]] = sample_factors_scores(Y_tilde, Z_1[[pop]],Lambda,resid_Y_prec[[pop]],F_a[[pop]],F_h2[[pop]] )
+      
+      
+      # -----Sample resid_Y_prec------------ #
+      Y_tilde = Y[[pop]] - X[[pop]] %*% B[[pop]] - F[[pop]] %*% t(Lambda) - Z_1[[pop]] %*% E_a[[pop]] - Z_2[[pop]] %*% W[[pop]]
+      resid_Y_prec[[pop]] = rgamma(p,shape = resid_Y_prec_shape + n/2, rate = resid_Y_prec_rate+1/2*colSums(Y_tilde^2)) #model residual precision
+      # n is the sum of all populations
+      
+      # -----Sample E_a_prec---------------- #
+      #     #random effect 1 (D) residual precision
+      E_a_prec[[pop]] = rgamma(p,shape = E_a_prec_shape + r/2, rate = E_a_prec_rate + 1/2*diag(t(E_a[[pop]]) %*% Ainv[[pop]] %*% E_a[[pop]]))
+      
+      # -----Sample W_prec------------------ #
+      if(ncol(Z_2) > 0) {
+        W_prec[[pop]] =  rgamma(p, W_prec_shape + r2/2,rate = W_prec_rate + 1/2*diag(t(W[[pop]]) %*% A_2_inv[[pop]] %*% W[[pop]]))
+      }
+    }
+      # -- adapt number of factors to samples ---#
+      # adapt_result = update_k( F,Lambda,F_a,F_h2,Lambda_prec,Plam,delta,tauh,Z_1,Lambda_df,delta_2_shape,delta_2_rate,b0,b1,i,epsilon,prop )
+      # F           = adapt_result$F
+      # Lambda      = adapt_result$Lambda
+      # F_a         = adapt_result$F_a
+      # F_h2        = adapt_result$F_h2
+      # Lambda_prec = adapt_result$Lambda_prec
+      # Plam        = adapt_result$Plam
+      # delta       = adapt_result$delta
+      # tauh        = adapt_result$tauh
+      # k = ncol(F)
+      
+      # -- save sampled values (after thinning) -- #
+      if( (i-burn) %% thin == 0 && i > burn) {
+        
+        sp_num = (i-burn)/thin
+        
+        Posterior = save_posterior_samples_comb( sp_num,Posterior,Lambda,F,F_a,B,W,E_a,delta,F_h2,resid_Y_prec,E_a_prec,W_prec)
+        
+        # reorder elements in Lambda
+        Lambda_pos  = Posterior$Lambda
+        Lam_rowmean = rowMeans(Lambda_pos)
+        Lambda_hat  = matrix(Lam_rowmean,nr=p)
+        
+        
+        if(sp_num %% save_freq == 0) save(Posterior,file='Posterior.RData')
+      }
+        
+      
+      # -- provide run diagnostics and plots -- #
+      # if(i %% draw_iter  == 0) {
+      # directory=strread(pwd,'#s','delimiter','/')
+      # disp(directory(end))
+      # disp(i)
+      # elapsed=toc
+      # #output some running statistics on the current factors and their genetic variances
+      # # each row represents a factor:
+      # #    delta[i], [i], F_h2[i], V_P(F_i), V_A(F_i)
+      # [delta,[1:size(F,2)]',F_h2,sum(F.^2)'./(size(F,1)-1),sum(F_a.^2)'./(size(F_a,1)-1)]
+      # disp(strcat('Time remaining:',num2str((nrun-i) * (elapsed/i) * 1/60)))
+      
+      #make some plots of some running statistics
+      
+      #   draw_results_diagnostics(sp_num,run_parameters,run_variables,Lambda, F_h2, Posterior,E_a_prec,resid_Y_prec,traitnames)
+      # }
+    }
+
+     end_time = Sys.time()
+     print(end_time - start_time)
+     # save(Posterior,file = 'Posterior.RData')
+
+
+  # ----------------------------------------------- #
+  # ------------Save state for restart------------- #
+  # ----------------------------------------------- #
+  
+  current_state[["resid_Y_prec"]]  = resid_Y_prec
+  current_state[["Lambda_prec"]]   = Lambda_prec
+  current_state[["delta"]]         = delta
+  current_state[["tauh"]]          = tauh
+  current_state[["Plam"]]          = Plam
+  current_state[["Lambda"]]        = Lambda
+  current_state[["F_h2"]]          = F_h2
+  current_state[["E_a_prec"]]      = E_a_prec
+  current_state[["W_prec"]]        = W_prec
+  current_state[["F_a"]]           = F_a
+  current_state[["F"]]             = F
+  current_state[["E_a"]]           = E_a
+  current_state[["B"]]             = B
+  current_state[["W"]]             = W
+  current_state[["nrun"]] 			= i
+  
+  # save(current_state,file='current_state.RData')
+  
+
+  BSFG_state$current_state = current_state
+  BSFG_state$Posterior = Posterior
+  BSFG_state$RNG = list(
+    Random.seed = .Random.seed,
+    RNGkind = RNGkind()
+  )
+  
+  save(BSFG_state,file = 'BSFG_state.RData')
+  return(BSFG_state)
+} 
\ No newline at end of file

From 6c7a40472d9d8893bc85dd07be70ad4cf59e720b Mon Sep 17 00:00:00 2001
From: xin li <xxrli@ucdavis.edu>
Date: Mon, 23 Jan 2017 18:00:23 -0800
Subject: [PATCH 09/18] Adding function of initial sampler for combined
 datasets

---
 R_BSFG/fast_BSFG_sampler_init_comb.R | 370 +++++++++++++++++++++++++++
 1 file changed, 370 insertions(+)
 create mode 100644 R_BSFG/fast_BSFG_sampler_init_comb.R

diff --git a/R_BSFG/fast_BSFG_sampler_init_comb.R b/R_BSFG/fast_BSFG_sampler_init_comb.R
new file mode 100644
index 0000000..cb6f5fd
--- /dev/null
+++ b/R_BSFG/fast_BSFG_sampler_init_comb.R
@@ -0,0 +1,370 @@
+
+fast_BSFG_sampler_init = function(priors,run_parameters){
+  # require(PEIP)
+  require(Matrix)
+  
+  # data_matrices,run_parameters,priors,current_state,Posterior,simulation = F)
+  #   Preps everything for an analysis with the function MA_sampler. 
+  #       Loads data from setup.mat
+  #       Decides if the data is from a simulation
+  #       Uses prior to find initial values for all parameters
+  #       Initializes Posterior struct
+  #       Saves run parameters
+  #
+  #   Input file setup.mat should contain:
+  #     Y      gene expression data data: n x p
+  #     X      fixed-effect design matrix (optional)
+  #     Z_1    random effects 1 incidence matrix n x r1
+  #     Z_2    random effects 2 incidence matrix n x r2 (optional)
+  #     A      kinship matrix of lines: r1 x r1
+  #     optional:
+  #         U_act
+  #         gen_factor_Lambda
+  #         error_factor_Lambda
+  #         h2_act
+  #         G
+  #         R
+  #         B
+  #         factor_h2s
+  #         name
+  
+  # ----------------------- #
+  # ------read data-------- #
+  # ----------------------- #
+  if(file.exists('../setup.RData')) {
+    load('../setup.RData')
+  }
+  # else{
+  #require(R.matlab)
+  #setup = readMat('../setup.mat')
+  #for(i in 1:10) names(setup) = sub('.','_',names(setup),fixed=T)
+  #}
+  Y       = setup$Y
+  U_act   = setup$U_act
+  E_act   = setup$E_act
+  B       = setup$B_act   #Is B = B_act? There is no use of B_act in this code.
+  Z_1     = setup$Z_1
+  A       = setup$A	
+  X       = setup$X	
+  n       = setup$n	
+  r       = setup$r	
+  traitnames = setup$traitnames
+  
+  #Determine if 'setup.mat' contains output of a simulation, based on if
+  #known factor loadings are included. Affects plotting functions
+  simulation = F
+  if('gen_factor_Lambda' %in% names(setup)){
+    simulation = T
+    print('simulated data')
+    run_parameters$setup = setup
+  }
+  run_parameters$simulation = simulation
+  
+  # Factors:
+  #  initial number of factors
+  k = priors$k_init
+  resid_Y_prec_shape  = priors$resid_Y_prec_shape
+  resid_Y_prec_rate   = priors$resid_Y_prec_rate
+  E_a_prec_shape = priors$E_a_prec_shape
+  E_a_prec_rate  = priors$E_a_prec_rate
+  W_prec_shape = priors$W_prec_shape
+  W_prec_rate  = priors$W_prec_rate
+  p = ncol(Y[[1]])
+  
+  # do a loop for each Y in the list
+  for (pop in pops){
+  #normalize Y to have zero mean and unit variances among observed values,
+  #allowing for NaNs.
+  n = nrow(Y[[pop]])
+  r = ncol(Z_1[[pop]])
+  
+  Y_missing = is.na(Y[[pop]])        # matrix recording missing data in Y
+  Mean_Y = colMeans(Y[[pop]],na.rm=T)
+  VY = apply(Y[[pop]],2,var,na.rm=T)
+  # Don't remove the mean and standardize the variance if it's a
+  # simulation because this makes comparing to the simulated values more
+  # difficult. 
+  # Do we want to do this for real data, or let the user do it?
+  if(simulation) {
+    VY = rep(1,p)
+  } else{
+    Y[[pop]] = sweep(Y[[pop]],2,Mean_Y,'-')
+    Y[[pop]] = sweep(Y[[pop]],2,sqrt(VY),'/')
+  }
+  
+  
+  #determine if a design matrix (X) exists (is loaded from setup.mat). If
+  #not, make a dummy X-matrix with no columns.
+  if(is.null(X[[pop]])) {
+    X[[pop]]=matrix(0,nr = n,nc = 0)
+    B[[pop]]=matrix(0,nr = 0,nc = p)
+  }
+  if(nrow(X[[pop]]) == 1) X[[pop]] = t(X[[pop]])       # because I used to give X transposed
+  stopifnot(nrow(X[[pop]]) == n)
+  b = ncol(X[[pop]])
+  
+  
+  #Determine if a second random effects design matrix exists. If not, make a
+  #dummy matrix
+  if(! 'Z_2' %in% ls()) {
+    Z_2[[pop]]=matrix(0,nr = n,nc = 0)
+  }
+  stopifnot(nrow(Z_2[[pop]]) == n)
+  r2 = ncol(Z_2[[pop]])
+  
+
+  
+  #fixed effect priors
+  
+  priors$b_X_prec[[pop]]            =   1e-10*diag(1,b)   
+  
+  # ----------------------------- #
+  # -----Initialize variables---- #
+  # ----------------------------- # 
+  
+  # --- transcript-level model
+  # p-vector of probe residual precisions. 
+  #  Prior: Gamma distribution for each element
+  #       shape = resid_Y_prec_shape
+  #       rate = resid_Y_prec_rate
+  resid_Y_prec[[pop]]        = rgamma(p,shape = resid_Y_prec_shape,rate = resid_Y_prec_rate)
+  
+  
+  # g-vector of specific precisions of genetic effects. 
+  #  Prior: Gamma distribution for each element
+  #       shape = E_a_prec_shape
+  #       rate = E_a_prec_rate
+  E_a_prec[[pop]]       = rgamma(p,shape = E_a_prec_shape,rate = E_a_prec_rate)
+  
+  # Genetic effects not accounted for by factors.
+  #   Prior: Normal distribution on each element.
+  #       mean = 0
+  #       sd = 1./sqrt(E_a_prec)' on each row
+  E_a[[pop]] = matrix(rnorm(p*r,0,sqrt(1/E_a_prec[[pop]])),nr = r,nc = p, byrow = T)
+  
+  # Latent factor heritabilties. h2 can take h2_divisions values
+  #   between 0 and 1.
+  #   Prior: 0.5: h2=0, .05: h2 > 0. 
+  F_h2[[pop]] = runif(k)
+  
+  # Genetic effects on the factor scores.
+  #  Prior: Normal distribution for each element
+  #       mean = 0
+  #       sd = sqrt(F_h2') for each row.
+  F_a[[pop]] = matrix(rnorm(k*r,0,sqrt(F_h2[[pop]])),nr = r,nc = k, byrow = T)
+  
+  # Full Factor scores. Combination of genetic and residual variation on
+  # each factor.
+  #  Prior: Normal distribution for each element
+  #       mean = Z_1 * F_a
+  #       sd = sqrt(1-F_h2') for each row.
+  F[[pop]] = Z_1[[pop]] %*% F_a[[pop]] + matrix(rnorm(k*n,0,sqrt(1-F_h2[[pop]])),nr = n,nc = k, byrow = T)    
+  
+  # g-vector of specific precisions of genetic effects. 
+  #  Prior: Gamma distribution for each element
+  #       shape = E_a_prec_shape
+  #       rate = E_a_prec_rate
+  W_prec[[pop]]       = rgamma(p,shape = W_prec_shape,rate = W_prec_rate)
+  
+  # Genetic effects not accounted for by factors.
+  #   Prior: Normal distribution on each element.
+  #       mean = 0
+  #       sd = 1./sqrt(E_a_prec)' on each row
+  W[[pop]] = matrix(rnorm(p*r2,0,sqrt(1/W_prec[[pop]])),nr = r2, nc = p, byrow = T)
+  
+  # Fixed effect coefficients.
+  #  Prior: Normal distribution for each element
+  #       mean = 0
+  #       sd = sqrt(1/fixed_effects_prec)
+  B[[pop]] = matrix(rnorm(b*p),nr = b, nc = p)
+  
+  # ------------------------------------ #
+  # ----Precalculate some matrices------ #
+  # ------------------------------------ #
+  
+  # recover()
+  #invert the random effect covariance matrices
+  Ainv[[pop]] = solve(A[[pop]])
+  A_2_inv[[pop]] = diag(1,r2) #Z_2 random effects are assumed to have covariance proportional to the identity. Can be modified.
+  
+  #pre-calculate transformation parameters to diagonalize aI + bZAZ for fast
+  #inversion: inv(aI + bZAZ) = 1/b*U*diag(1./(s+a/b))*U'
+  #uses singular value decomposition of ZAZ for stability when ZAZ is low
+  #rank
+  #     XZ = [X_f Z_1]
+  #     [U,S,~]          = svd(XZ*blkdiag(1e6*eye(b_f),A)*XZ')
+  # recover()
+  # r = rgamma(nrow(Z_1),1,1)
+  # result = GSVD_2_c(cholcov(diag(r)),cholcov(Z_1 %*% A %*% t(Z_1)))
+  # r2 = rgamma(nrow(Z_1),1,1)
+  # result2 = GSVD_2_c(cholcov(diag(r2)),cholcov(Z_1 %*% A %*% t(Z_1)))
+  
+  result = svd(Z_1[[pop]] %*% A[[pop]] %*% t(Z_1[[pop]]))
+  invert_aI_bZAZ[[pop]] = list(
+    U = result$u,
+    s = result$d
+  )
+  
+  #fixed effects + random effects 1
+  #diagonalize mixed model equations for fast inversion: 
+  #inv(a*bdiag(priors$b_X_prec,Ainv) + b*t(cbind(X,Z_1)) %*% cbind(X,Z_1)) = U %*% diag(1/(a*s1+b*s2)) %*% t(U)
+  Design= cbind(X[[pop]],Z_1[[pop]])
+  Design2 = t(Design) %*% Design
+  result = GSVD_2_c(cholcov(bdiag(priors$b_X_prec,Ainv[[pop]])),cholcov(Design2))
+  invert_aPXA_bDesignDesignT[[pop]] = list(
+    U = t(solve(result$X)),
+    s1 = diag(result$C)^2,
+    s2 = diag(result$S)^2
+  )
+  invert_aPXA_bDesignDesignT[[pop]]$Design_U = Design %*% invert_aPXA_bDesignDesignT[[pop]]$U
+  
+  #random effects 2
+  #diagonalize mixed model equations for fast inversion: 
+  #inv(a*A_2_inv + b*Z_2'Z_2]) = U*diag(1./(a.*s1+b.*s2))*U'
+  if(r2 > 0) {
+    Design = Z_2[[pop]]
+    Design2 = t(Design) %*% Design
+    result = GSVD_2_c(cholcov(A_2_inv[[pop]]),cholcov(Design2))
+    invert_aPXA_bDesignDesignT_rand2[[pop]] = list(
+      U = t(solve(result$X)),
+      s1 = diag(result$C)^2,
+      s2 = diag(result$S)^2
+    )
+    invert_aPXA_bDesignDesignT_rand2[[pop]]$Design_U = Design %*% invert_aPXA_bDesignDesignT_rand2[[pop]]$U
+  } else{
+    invert_aPXA_bDesignDesignT_rand2[[pop]] = list()
+  }
+  
+  #genetic effect variances of factor traits
+  # diagonalizing a*Z_1'*Z_1 + b*Ainv for fast inversion
+  #diagonalize mixed model equations for fast inversion: 
+  # inv(a*Z_1'*Z_1 + b*Ainv) = U*diag(1./(a.*s1+b.*s2))*U'
+  #similar to fixed effects + random effects 1 above, but no fixed effects.
+  ZZt = t(Z_1[[pop]]) %*% Z_1[[pop]]
+  result = GSVD_2_c(cholcov(ZZt),cholcov(Ainv[[pop]]))
+  invert_aZZt_Ainv = list(
+    U = t(solve(result$X)),
+    s1 = diag(result$C)^2,
+    s2 = diag(result$S)^2
+  )
+  # the end of loop for pops
+  }
+  
+  
+  data_matrices = list(
+    Y         = Y,
+    Z_1       = Z_1,
+    Z_2       = Z_2,
+    X         = X,
+    Y_missing = Y_missing
+  )
+  
+  # ----------------------------- #
+  # -----Initialize lambda   ---- #
+  # ----------------------------- # 
+  # Factor loading precisions (except column penalty tauh).
+  #  Prior: Gamma distribution for each element. 
+  #       shape = Lambda_df/2
+  #       rate = Lambda_df/2
+  #    Marginilizes to t-distribution with Lambda_df degrees of freedom
+  #    on each factor loading, conditional on tauh
+  Lambda_df   = priors$Lambda_df
+  Lambda_prec = matrix(rgamma(p*k,shape = Lambda_df/2,rate = Lambda_df/2),nr = p,nc = k)
+  
+  # Factor penalty. tauh(h) = \prod_{i=1}^h \delta_i
+  #  Prior: Gamma distribution for each element of delta
+  #     delta_1:
+  #       shape = delta_1_shape
+  #       rate = delta_1_rate
+  #     delta_2 ... delta_m:
+  #       shape = delta_2_shape
+  #       rate = delta_2_rate
+  delta_1_shape  = priors$delta_1_shape
+  delta_1_rate   = priors$delta_1_rate
+  delta_2_shape  = priors$delta_2_shape
+  delta_2_rate   = priors$delta_2_rate
+  delta          = c(rgamma(1,shape = delta_1_shape,rate = delta_1_rate),rgamma(k-1,shape = delta_2_shape,rate = delta_2_rate))
+  tauh           = cumprod(delta)
+  
+  # Total Factor loading precisions Lambda_prec * tauh
+  Plam = sweep(Lambda_prec,2,tauh,'*')
+  
+  # Lambda - factor loadings
+  #   Prior: Normal distribution for each element.
+  #       mu = 0
+  #       sd = sqrt(1/Plam)
+  Lambda = matrix(rnorm(p*k,0,sqrt(1/Plam)),nr = p,nc = k)
+  
+  # ----------------------- #
+  # -Initialize Posterior-- #
+  # ----------------------- #
+  Posterior = list(
+    Lambda        = matrix(0,nr=0,nc=0),
+    F_a           = matrix(0,nr=0,nc=0),
+    F             = matrix(0,nr=0,nc=0),
+    delta         = matrix(0,nr=0,nc=0),
+    F_h2          = matrix(0,nr=0,nc=0),
+    resid_Y_prec  = matrix(0,nr = p,nc = 0),
+    E_a_prec      = matrix(0,nr = p,nc = 0),
+    W_prec        = matrix(0,nr = p,nc = 0),
+    B             = matrix(0,nr = b,nc = p),
+    W             = matrix(0,nr = r2,nc = p),
+    E_a           = matrix(0,nr = r,nc = p)
+  )
+  # ----------------------- #
+  # ---Save initial values- #
+  # ----------------------- #
+  current_state = list(
+    resid_Y_prec  = resid_Y_prec,
+    Lambda_prec   = Lambda_prec,
+    delta         = delta,
+    tauh          = tauh,
+    Plam          = Plam,
+    Lambda        = Lambda,
+    F_h2          = F_h2,
+    E_a_prec      = E_a_prec,
+    W_prec        = W_prec,
+    F_a           = F_a,
+    F             = F,
+    E_a           = E_a,
+    B             = B,         
+    W             = W,
+    nrun 		  = 0
+  )
+  # ----------------------------- #
+  # ----Save run parameters------ #
+  # ----------------------------- #
+  
+  run_variables = list(
+    p       = p,
+    n       = n,
+    r       = r,
+    r2      = r2,
+    b       = b,
+    Mean_Y  = Mean_Y,
+    VY      = VY,
+    Ainv    = Ainv,
+    A_2_inv = A_2_inv,
+    invert_aI_bZAZ                   = invert_aI_bZAZ,
+    invert_aPXA_bDesignDesignT       = invert_aPXA_bDesignDesignT,
+    invert_aZZt_Ainv                 = invert_aZZt_Ainv,
+    invert_aPXA_bDesignDesignT_rand2 = invert_aPXA_bDesignDesignT_rand2
+  )
+  
+  RNG = list(
+    Random.seed = .Random.seed,
+    RNGkind = RNGkind()
+  )
+  
+  return(list(
+    data_matrices  = data_matrices,
+    run_parameters = run_parameters,
+    run_variables  = run_variables,
+    priors         = priors,
+    current_state  = current_state,
+    Posterior      = Posterior,
+    simulation     = simulation,
+    RNG            = RNG,
+    traitnames     = traitnames
+  ))
+}

From ff98738124aac8e442bb468c72d42288f883e508 Mon Sep 17 00:00:00 2001
From: xin li <xxrli@ucdavis.edu>
Date: Fri, 3 Feb 2017 20:43:20 -0800
Subject: [PATCH 10/18] Simulation data based on pedigree in pengjuan's
 datasets

---
 R_BSFG/data_sim.R | 128 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 128 insertions(+)
 create mode 100644 R_BSFG/data_sim.R

diff --git a/R_BSFG/data_sim.R b/R_BSFG/data_sim.R
new file mode 100644
index 0000000..0edca7a
--- /dev/null
+++ b/R_BSFG/data_sim.R
@@ -0,0 +1,128 @@
+library(pedantics)
+#library(R.matlab)
+# define E_a_prec; resid_Y_prec; name
+# get A
+
+
+prepare_simulation2 = function(A_chol,A,Lambda,factor_h2s,E_a_prec,resid_Y_prec,name){
+  n = dim(A_chol)[1]  #A_chol is  Cholesky decomposition of A
+  p = nrow(Lambda) 
+  
+  r = n
+  # n = 2*n
+  
+  k = ncol(Lambda)
+  F_a = t(A_chol) %*% sweep(matrix(rnorm(n*k),n),2,sqrt(factor_h2s),'*') # make sure MN(0,A,sigma)
+  F_r = sweep(matrix(rnorm(n*k),n),2,sqrt(1-factor_h2s),'*') # factor_h2s is the variance of factor
+  F = F_a + F_r
+  
+  E_a_act = t(A_chol) %*% sweep(matrix(rnorm(n*p),n),2,sqrt(1/E_a_prec),'*') #
+  E_r_act = sweep(matrix(rnorm(n*p),n),2,sqrt(1/resid_Y_prec),'*')
+  
+  Y = F %*% t(Lambda) + E_a_act + E_r_act
+  
+  X = matrix(1,nrow=1,ncol=n)
+  B = rep(0,p)
+  Z_1 = diag(r)[,rep(1:r,n/r)] 
+  Va = colSums(Lambda^2) * factor_h2s + 1/E_a_prec  #diagonal elements in G
+  Ve = colSums(Lambda^2) * (1-factor_h2s) + 1/resid_Y_prec #diagonal elements in R
+  h2 = Va/(Va + Ve)   # heribility
+  G = Lambda %*% diag(factor_h2s) %*% t(Lambda) + diag(1/E_a_prec) 
+  R = Lambda %*% diag(1-factor_h2s) %*% t(Lambda) + diag(1/resid_Y_prec)
+ # h2 = diag(G)/(diag(G)+diag(R))  
+  
+  # Z_2 = rbind(rep(c(1,0,0,0),n/4),rep(c(0,1,0,0),n/4),rep(c(0,0,1,0),n/4),rep(c(0,0,0,1),n/4))
+  
+  # plot(h2,type="l")
+  
+  print(name)
+  setup = list(Y=Y,F_act=F,E_a_act=E_a_act,F_a_act = F_a,Z_1=Z_1,A=A,X=X,n=n,r=r,p=p,G=G,R=R,
+               gen_factor_Lambda = Lambda,error_factor_Lambda = Lambda,h2=h2,B=B, factor_h2s=factor_h2s,name=name)
+  
+  return(setup)
+  #do.call(writeMat,c("setup.mat",setup))
+  #save(pedigree,file="pedigree.Robj")
+}
+
+ 
+#==================================================================
+
+
+
+counter = 0
+
+#sample_size simulations
+num_reps = 10
+p = 100
+k=10
+
+
+#the same lambda
+Lambda = matrix(0,p,k)
+numeff = sample((p/30):(p/4),k,replace=T)
+for(h in 1:k){   # even though numeff[h] is not a whole number, by applying sample, it only use the int part. It doesn't round off.
+  Lambda[sample(1:p,numeff[h]),h] = rnorm(numeff[h])
+}
+Lambda = Lambda[,order(-diag(t(Lambda) %*% Lambda))]
+cols=1:k
+E_a_prec = rep(1/0.2,p)
+resid_Y_prec = rep(1/0.2,p)
+# pedigree from pengjuan's dataset to test if diff populations are indep
+setwd("~/Runcie Lab/pedigree")
+data=read.csv('IDDammSirelnUpdate(1).csv',header = TRUE)
+source("~/Runcie Lab/SparseFactorMixedModel_v2/R_BSFG/setup_pedigree.R")
+# find way to change this according to the size(different pops)
+pops = c("LConG1","LConG2","LConG3")
+ori.setup = list()
+ori.setup[[pops[1]]] = setup_pedigree(data,GenerationCode=3,LineCode=c(0,1),fixedfactor=NA) # no fixed factor because we seperate the dataset based on generation
+ori.setup[[pops[2]]] = setup_pedigree(data,GenerationCode=2,LineCode=1,fixedfactor=NA)
+ori.setup[[pops[3]]] = setup_pedigree(data,GenerationCode=1,LineCode=1,fixedfactor=NA)
+
+setwd("~/Runcie Lab/SparseFactorMixedModel_v2/R_BSFG/Simulation")
+ #sample_size = list(Small = c(50,5), Medium = c(100,10), Large = c(500,10))
+# set seed for repeatability
+set.seed(1)
+for(rep in 1:num_reps){
+  # number of individuals that have the same mother:
+  # c(50,5) in this case means that for 50 fathers, each of them have 5 children. There are total 250 individuals.
+  # and for each individual, it has distinct mother
+  setup = list()
+  for(pop in pops){
+    # nM = sample_size[[size]][1]	
+    # nI = sample_size[[size]][2]	
+    # pedigree = data.frame(ind=nM*nI + nM + 1:(nM*nI),dam=1:(nM*nI) + nM, sire = gl(nM,nI))
+    # pedigree<-fixPedigree(pedigree)  #A
+    # children = !is.na(pedigree[,3])
+    # 
+    # #generate A matrix as 2* kinship matrix from whole pedigree
+    # A = 2*kinship(pedigree[,1], pedigree[,2], pedigree[,3])[children,children]
+    
+    # use pedigree from pengjuan's dataset
+    A = ori.setup[[pop]]$A
+    A_chol = chol(A)
+    
+    # this should be different for each pops
+    factor_h2s = runif(k) # k numbers in [0-1]
+    # order by descending order
+    factor_h2s = factor_h2s[order(factor_h2s,decreasing = TRUE)]
+    factor_h2s[seq(1,k,by=2)]=0  # make 5 of them to be zero
+    
+    g_cols = factor_h2s>0
+    Lambda =Lambda[do.call("order", unname(split(-abs(Lambda[,cols[factor_h2s>0]]), col(Lambda[,cols[factor_h2s>0]])))),]
+    # reordering lambda
+    
+    name = paste(pop,rep,sep='_')
+    setup[[pop]]= prepare_simulation2(A_chol,A,Lambda,factor_h2s,E_a_prec,resid_Y_prec,name)
+   
+  }
+  counter = counter+1
+  dir = paste('Sim',counter,sep="_")
+  try(dir.create(dir))
+  setwd(dir)
+  save(setup,file = 'setup.RData')
+  setwd('..')
+  
+}
+
+
+

From a743cd821e3f32e7689414547ded522e994b3ac6 Mon Sep 17 00:00:00 2001
From: xin li <xxrli@ucdavis.edu>
Date: Fri, 3 Mar 2017 21:55:46 -0800
Subject: [PATCH 11/18] Generate initiating and sampling functions for BSFG
 model with several populations in the same time.

---
 R_BSFG/BSFG_functions.R              |  76 ++++++++++++++++---
 R_BSFG/fast_BSFG_sampler_comb.R      |  98 +++++++++++++-----------
 R_BSFG/fast_BSFG_sampler_init_comb.R | 109 +++++++++++++++------------
 3 files changed, 180 insertions(+), 103 deletions(-)

diff --git a/R_BSFG/BSFG_functions.R b/R_BSFG/BSFG_functions.R
index 921255a..52c086b 100644
--- a/R_BSFG/BSFG_functions.R
+++ b/R_BSFG/BSFG_functions.R
@@ -45,11 +45,9 @@ sample_Lambda = function( Y_tilde,F,resid_Y_prec, E_a_prec,Plam,invert_aI_bZAZ )
 	for(j in 1:p) {
 	  means = c()
 	  Qlam = c()
-	  for(pop in pops){
-		  FUDi = E_a_prec[j] * sweep(FtU,2,(s + E_a_prec[j]/resid_Y_prec[j]),'/')
-		  means = means + FUDi %*% UtY[,j]
-		  Qlam = Qlam + FUDi %*% t(FtU) 
-		}
+		FUDi = E_a_prec[j] * sweep(FtU,2,(s + E_a_prec[j]/resid_Y_prec[j]),'/')
+		means = means + FUDi %*% UtY[,j]
+		Qlam = Qlam + FUDi %*% t(FtU) 
 	  Qlam = Qlam + diag(Plam[j,])
 
 		# recover()
@@ -65,6 +63,58 @@ sample_Lambda = function( Y_tilde,F,resid_Y_prec, E_a_prec,Plam,invert_aI_bZAZ )
 	return(Lambda)
 }
 
+sample_Lambda_comb = function( Y,X,B,Z_2,W,F,resid_Y_prec, E_a_prec,Plam,invert_aI_bZAZ ) {
+  
+  #Sample factor loadings Lambda while marginalizing over residual
+  #genetic effects: Y - Z_2W = F*Lambda' + E, vec(E)~N(0,kron(Psi_E,In) + kron(Psi_U, ZAZ^T))
+  #note: conditioning on F, but marginalizing over E_a.
+  #sampling is done separately by trait because each column of Lambda is
+  #independent in the conditional posterior
+  #note: invert_aI_bZAZ has parameters that diagonalize aI + bZAZ for fast
+  #inversion: inv(aI + bZAZ) = 1/b*U*diag(1./(s+a/b))*U'
+  
+  p = length(resid_Y_prec[[1]])
+  k = ncol(F[[1]])
+  
+  Zlams = matrix(rnorm(k*p),nr = k, nc = p)
+  Lambda = matrix(0,nr = p,nc = k)
+  
+  
+  for(j in 1:p) {
+    means = matrix(0,nr=p,nc=1)
+    Qlam = matrix(0,p,p)
+    # Is this part correct????????????????????
+    Y_tilde = list();
+    for(pop in pops){
+      #conditioning on W, B, F, marginalizing over E _a
+      Y_tilde[[pop]] = Y[[pop]] - X[[pop]] %*% B[[pop]] - Z_2[[pop]] %*% W[[pop]]
+      Y_tilde[[pop]] = as.matrix(Y_tilde[[pop]])
+      
+      U = invert_aI_bZAZ[[pop]]$U
+      s = invert_aI_bZAZ[[pop]]$s
+      
+      FtU = t(F[[pop]]) %*% U
+      UtY = t(U) %*% Y_tilde[[pop]]
+      
+      FUDi = E_a_prec[[pop]][j] * sweep(FtU,2,(s + E_a_prec[[pop]][j]/resid_Y_prec[[pop]][j]),'/')
+      means = means + FUDi %*% UtY[,j]
+      Qlam = Qlam + FUDi %*% t(FtU) 
+      #recover()
+    }
+    Qlam = Qlam + diag(Plam[j,])
+    
+     #recover()
+    Llam = t(chol(Qlam))
+    vlam = forwardsolve(Llam,means)
+    mlam = backsolve(t(Llam),vlam)
+    ylam = backsolve(t(Llam),Zlams[,j])
+    
+    Lambda[j,] = ylam + mlam
+    
+  } 
+  
+  return(Lambda)
+}
 
 sample_prec_discrete_conditional = function(Y,h2_divisions,h2_priors,invert_aI_bZAZ,res_prec) {
 	#sample factor heritibilties conditional on a given residual precision
@@ -439,13 +489,15 @@ save_posterior_samples_comb = function( sp_num,Posterior,Lambda,F,F_a,B,W,E_a,de
   sp = ncol(Posterior$Lambda)
   #size(Posterior$Lambda,2)
   #save factor samples
-  # #if(length(Lambda) > nrow(Posterior[["Lambda"]])){  this will not happen since we stop updatek
-  #   # expand factor sample matrix if necessary
-  #   Posterior[["F"]]      = rbind(Posterior[["F"]], 	   	matrix(0,nr = length(F)     -nrow(Posterior[["F"]]),		nc = sp))
-  #   Posterior[["F_a"]]    = rbind(Posterior[["F_a"]], 	matrix(0,nr = length(F_a) 	-nrow(Posterior[["F_a"]]),	nc = sp))
-  #   Posterior[["delta"]]  = rbind(Posterior[["delta"]], 	matrix(0,nr = length(delta) -nrow(Posterior[["delta"]]),	nc = sp))
-  #   Posterior[["F_h2"]]   = rbind(Posterior[["F_h2"]], 	matrix(0,nr = length(F_h2) 	-nrow(Posterior[["F_h2"]]),	nc = sp))
-  # }
+  # change the if condition to be if sp_num == 1. So we don't need to run the update k part and we can also assign new value to out posterior variables.
+  if(sp_num==1){  
+     # expand factor sample matrix if necessary
+     Posterior$Lambda = rbind(Posterior$Lambda, 	matrix(0,nr = length(Lambda)-nrow(Posterior$Lambda),nc = sp))
+     Posterior[["F"]]      = rbind(Posterior[["F"]], 	   	matrix(0,nr = length(F)     -nrow(Posterior[["F"]]),		nc = sp))
+     Posterior[["F_a"]]    = rbind(Posterior[["F_a"]], 	matrix(0,nr = length(F_a) 	-nrow(Posterior[["F_a"]]),	nc = sp))
+     Posterior[["delta"]]  = rbind(Posterior[["delta"]], 	matrix(0,nr = length(delta) -nrow(Posterior[["delta"]]),	nc = sp))
+     Posterior[["F_h2"]]   = rbind(Posterior[["F_h2"]], 	matrix(0,nr = length(F_h2) 	-nrow(Posterior[["F_h2"]]),	nc = sp))
+   }
   Posterior$Lambda[1:length(Lambda),sp_num] = c(Lambda)
   Posterior$F[1:length(F),sp_num]     = c(F)
   Posterior$F_a[1:length(F_a),sp_num] = c(F_a)
diff --git a/R_BSFG/fast_BSFG_sampler_comb.R b/R_BSFG/fast_BSFG_sampler_comb.R
index 2e6913e..1777c25 100644
--- a/R_BSFG/fast_BSFG_sampler_comb.R
+++ b/R_BSFG/fast_BSFG_sampler_comb.R
@@ -115,7 +115,7 @@ fast_BSFG_sampler_comb = function(BSFG_state,n_samples) {
   F_a          =   current_state$F_a
   B            =   current_state$B
   start_i      =   current_state$nrun
-  k = ncol(F)
+  k = ncol(F[[1]])
   
   # ----------------------------------------------- #
   # -----------Reset Global Random Number Stream--- #
@@ -184,39 +184,6 @@ fast_BSFG_sampler_comb = function(BSFG_state,n_samples) {
     }
     # recover()
     
-    # -----Sample Lambda------------------ #
-    #conditioning on W, B, F, marginalizing over E _a
-    Y_tilde = Y - X %*% B - Z_2 %*% W
-    Y_tilde = as.matrix(Y_tilde)
-    # Lambda = sample_Lambda( Y_tilde,F,resid_Y_prec, E_a_prec,Plam,invert_aI_bZAZ )
-    
-    Lambda = sample_Lambda_comb( Y_tilde,F,resid_Y_prec, E_a_prec,Plam,invert_aI_bZAZ )
-
-    # -----Sample Lambda_prec------------- #
-    Lambda2 = Lambda^2
-    Lambda_prec = matrix(rgamma(p*k,shape = (Lambda_df + 1)/2,rate = (Lambda_df + sweep(Lambda2,2,tauh,'*'))/2),nr = p,nc = k)
-    
-    # -----Sample delta, update tauh------ #
-    delta = sample_delta( delta,tauh,Lambda_prec,delta_1_shape,delta_1_rate,delta_2_shape,delta_2_rate,Lambda2 )
-    tauh  = cumprod(delta)
-    
-    # -----Update Plam-------------------- #
-    Plam = sweep(Lambda_prec,2,tauh,'*')
-    
-    # -----Sample resid_Y_prec ---------------- #
-    # should be able to sample this here marginalizing over E_a in the same way that E_a_prec is done below.
-    
-    # -----Sample E_a_prec---------------- #
-    #conditioning on W, B, F, marginalizing over E_a 
-    # E_a_prec = sample_prec_discrete_conditional(Y - X %*% B-F %*% t(Lambda) - Z_2 %*% W,h2_divisions,h2_priors_resids,invert_aI_bZAZ,resid_Y_prec)
-    # E_a_prec = sample_prec_discrete_conditional_c(Y - X %*% B-F %*% t(Lambda) - Z_2 %*% W,h2_divisions,h2_priors_resids,invert_aI_bZAZ,resid_Y_prec)
-    
-    # pops should be a output from setup datasets and for pop in pops, the row number for 
-    # each pop should also included in the setup dataset
-    # read pops (number of populations)
-    # read row number for each pop
-    # in the loop, replace Y as Y[pops,], etc (instead of using lists because we need to use all Y together to sample lambda)
-   
     # -----Sample B and E_a--------------- #
     #conditioning on W, F, Lambda
     for(pop in pops){
@@ -224,16 +191,12 @@ fast_BSFG_sampler_comb = function(BSFG_state,n_samples) {
       Y_tilde = as.matrix(Y_tilde)
       # location_sample = sample_means( Y_tilde, resid_Y_prec, E_a_prec, invert_aPXA_bDesignDesignT )
       location_sample = sample_means( Y_tilde, resid_Y_prec[[pop]], E_a_prec[[pop]], invert_aPXA_bDesignDesignT[[pop]] )
-      B[[pop]]   = location_sample[1:b,]
-      if(length(B[[pop]])==p){
-        B[[pop]] = matrix(0,nr=0,nc=p)
-      }
-      E_a[[pop]] = location_sample[b+(1:r),]
-      
+      B[[pop]]   = location_sample[1:b[[pop]],,drop=FALSE]
+      E_a[[pop]] = location_sample[b[[pop]]+(1:r[[pop]]),]
       
       # -----Sample W ---------------------- #
       #conditioning on B, E_a, F, Lambda
-      if(ncol(Z_2) > 0) {
+      if(ncol(Z_2[[pop]]) > 0) {
         Y_tilde = Y[[pop]] - X[[pop]] %*% B[[pop]] - Z_1[[pop]] %*% E_a[[pop]] - F[[pop]] %*% t(Lambda)
         Y_tilde = as.matrix(Y_tilde)
         # location_sample = sample_means( Y_tilde, resid_Y_prec, W_prec, invert_aPXA_bDesignDesignT_rand2 )
@@ -261,18 +224,65 @@ fast_BSFG_sampler_comb = function(BSFG_state,n_samples) {
       
       # -----Sample resid_Y_prec------------ #
       Y_tilde = Y[[pop]] - X[[pop]] %*% B[[pop]] - F[[pop]] %*% t(Lambda) - Z_1[[pop]] %*% E_a[[pop]] - Z_2[[pop]] %*% W[[pop]]
-      resid_Y_prec[[pop]] = rgamma(p,shape = resid_Y_prec_shape + n/2, rate = resid_Y_prec_rate+1/2*colSums(Y_tilde^2)) #model residual precision
+      resid_Y_prec[[pop]] = rgamma(p,shape = resid_Y_prec_shape + n[[pop]]/2, rate = resid_Y_prec_rate+1/2*colSums(Y_tilde^2)) #model residual precision
       # n is the sum of all populations
       
       # -----Sample E_a_prec---------------- #
       #     #random effect 1 (D) residual precision
-      E_a_prec[[pop]] = rgamma(p,shape = E_a_prec_shape + r/2, rate = E_a_prec_rate + 1/2*diag(t(E_a[[pop]]) %*% Ainv[[pop]] %*% E_a[[pop]]))
+      E_a_prec[[pop]] = rgamma(p,shape = E_a_prec_shape + r[[pop]]/2, rate = E_a_prec_rate + 1/2*diag(t(E_a[[pop]]) %*% Ainv[[pop]] %*% E_a[[pop]]))
       
       # -----Sample W_prec------------------ #
-      if(ncol(Z_2) > 0) {
+      if(ncol(Z_2[[pop]]) > 0) {
         W_prec[[pop]] =  rgamma(p, W_prec_shape + r2/2,rate = W_prec_rate + 1/2*diag(t(W[[pop]]) %*% A_2_inv[[pop]] %*% W[[pop]]))
       }
     }
+    
+    # -----Sample Lambda------------------ #
+    # create a new set of variables for sampling Lambda
+    #Y_lab = do.call(rbind,Y)
+    # Combine X as block diagonal matrix  
+    #library(Matrix)
+    #X_lab = Reduce(bdiag,X)
+    #B_lab = do.call(rbind,B)
+                          
+    #Z_2_lab = do.call(rbind,Z_2)
+    #W_lab = Reduce("+",W)/length(W)
+    #F_lab = do.call(rbind,F)
+    #resid_Y_prec_lab = Reduce("+",resid_Y_prec)/length(resid_Y_prec)
+    #E_a_prec_lab = Reduce("+",E_a_prec)/length(E_a_prec)
+    
+    #conditioning on W, B, F, marginalizing over E _a
+    #Y_tilde = Y - X %*% B - Z_2 %*% W
+    #Y_tilde = as.matrix(Y_tilde)
+    # Lambda = sample_Lambda( Y_tilde,F,resid_Y_prec, E_a_prec,Plam,invert_aI_bZAZ )
+    
+    Lambda = sample_Lambda_comb( Y,X,B,Z_2,W,F,resid_Y_prec, E_a_prec,Plam,invert_aI_bZAZ )
+    
+    # -----Sample Lambda_prec------------- #
+    Lambda2 = Lambda^2
+    Lambda_prec = matrix(rgamma(p*k,shape = (Lambda_df + 1)/2,rate = (Lambda_df + sweep(Lambda2,2,tauh,'*'))/2),nr = p,nc = k)
+    
+    # -----Sample delta, update tauh------ #
+    delta = sample_delta( delta,tauh,Lambda_prec,delta_1_shape,delta_1_rate,delta_2_shape,delta_2_rate,Lambda2 )
+    tauh  = cumprod(delta)
+    
+    # -----Update Plam-------------------- #
+    Plam = sweep(Lambda_prec,2,tauh,'*')
+    
+    # -----Sample resid_Y_prec ---------------- #
+    # should be able to sample this here marginalizing over E_a in the same way that E_a_prec is done below.
+    
+    # -----Sample E_a_prec---------------- #
+    #conditioning on W, B, F, marginalizing over E_a 
+    # E_a_prec = sample_prec_discrete_conditional(Y - X %*% B-F %*% t(Lambda) - Z_2 %*% W,h2_divisions,h2_priors_resids,invert_aI_bZAZ,resid_Y_prec)
+    # E_a_prec = sample_prec_discrete_conditional_c(Y - X %*% B-F %*% t(Lambda) - Z_2 %*% W,h2_divisions,h2_priors_resids,invert_aI_bZAZ,resid_Y_prec)
+    
+    # pops should be a output from setup datasets and for pop in pops, the row number for 
+    # each pop should also included in the setup dataset
+    # read pops (number of populations)
+    # read row number for each pop
+    # in the loop, replace Y as Y[pops,], etc (instead of using lists because we need to use all Y together to sample lambda)
+    
       # -- adapt number of factors to samples ---#
       # adapt_result = update_k( F,Lambda,F_a,F_h2,Lambda_prec,Plam,delta,tauh,Z_1,Lambda_df,delta_2_shape,delta_2_rate,b0,b1,i,epsilon,prop )
       # F           = adapt_result$F
diff --git a/R_BSFG/fast_BSFG_sampler_init_comb.R b/R_BSFG/fast_BSFG_sampler_init_comb.R
index cb6f5fd..8b75d9e 100644
--- a/R_BSFG/fast_BSFG_sampler_init_comb.R
+++ b/R_BSFG/fast_BSFG_sampler_init_comb.R
@@ -1,5 +1,4 @@
-
-fast_BSFG_sampler_init = function(priors,run_parameters){
+fast_BSFG_sampler_init_comb = function(priors,run_parameters){
   # require(PEIP)
   require(Matrix)
   
@@ -34,24 +33,9 @@ fast_BSFG_sampler_init = function(priors,run_parameters){
   if(file.exists('../setup.RData')) {
     load('../setup.RData')
   }
-  # else{
-  #require(R.matlab)
-  #setup = readMat('../setup.mat')
-  #for(i in 1:10) names(setup) = sub('.','_',names(setup),fixed=T)
-  #}
-  Y       = setup$Y
-  U_act   = setup$U_act
-  E_act   = setup$E_act
-  B       = setup$B_act   #Is B = B_act? There is no use of B_act in this code.
-  Z_1     = setup$Z_1
-  A       = setup$A	
-  X       = setup$X	
-  n       = setup$n	
-  r       = setup$r	
-  traitnames = setup$traitnames
   
-  #Determine if 'setup.mat' contains output of a simulation, based on if
-  #known factor loadings are included. Affects plotting functions
+  #k = priors$k_init
+  
   simulation = F
   if('gen_factor_Lambda' %in% names(setup)){
     simulation = T
@@ -60,23 +44,47 @@ fast_BSFG_sampler_init = function(priors,run_parameters){
   }
   run_parameters$simulation = simulation
   
+  pops = c("LConG1","LConG2","LConG3")
+  # do a loop for each Y in the list
+  Y =list();U_act=list();E_act=list();B=list();Z_1=list();A=list()
+  X=list();n=list();r=list();r2=list();b=list();traitnames=list()
+  Z_2=list();priors$b_X_prec = list();resid_Y_prec=list()
+  E_a_prec=list();E_a=list();F_h2=list();F_a=list();F=list();W_prec=list();W=list()
+  Ainv=list();A_2_inv=list();invert_aI_bZAZ=list();invert_aZZt_Ainv=list();invert_aPXA_bDesignDesignT=list()
+  invert_aPXA_bDesignDesignT_rand2=list()
+  
+  for (pop in pops){
+  Y[[pop]]       = setup[[pop]]$Y
+  U_act[[pop]]   = setup[[pop]]$U_act
+  E_act[[pop]]   = setup[[pop]]$E_act
+  B[[pop]]       = setup[[pop]]$B_act   #Is B = B_act? There is no use of B_act in this code.
+  Z_1[[pop]]     = setup[[pop]]$Z_1
+  A[[pop]]       = setup[[pop]]$A	
+  X[[pop]]       = setup[[pop]]$X	
+  n[[pop]]       = setup[[pop]]$n	
+  r[[pop]]       = setup[[pop]]$r	
+  traitnames[[pop]] = setup[[pop]]$traitnames
+  
+  
+  #Determine if 'setup.mat' contains output of a simulation, based on if
+  #known factor loadings are included. Affects plotting functions
+  
   # Factors:
   #  initial number of factors
-  k = priors$k_init
+  
   resid_Y_prec_shape  = priors$resid_Y_prec_shape
   resid_Y_prec_rate   = priors$resid_Y_prec_rate
   E_a_prec_shape = priors$E_a_prec_shape
   E_a_prec_rate  = priors$E_a_prec_rate
   W_prec_shape = priors$W_prec_shape
   W_prec_rate  = priors$W_prec_rate
-  p = ncol(Y[[1]])
-  
-  # do a loop for each Y in the list
-  for (pop in pops){
+  p = ncol(Y[[1]]) 
+  k = p
+
   #normalize Y to have zero mean and unit variances among observed values,
   #allowing for NaNs.
-  n = nrow(Y[[pop]])
-  r = ncol(Z_1[[pop]])
+  n[[pop]] = nrow(Y[[pop]])
+  r[[pop]] = ncol(Z_1[[pop]])
   
   Y_missing = is.na(Y[[pop]])        # matrix recording missing data in Y
   Mean_Y = colMeans(Y[[pop]],na.rm=T)
@@ -96,27 +104,30 @@ fast_BSFG_sampler_init = function(priors,run_parameters){
   #determine if a design matrix (X) exists (is loaded from setup.mat). If
   #not, make a dummy X-matrix with no columns.
   if(is.null(X[[pop]])) {
-    X[[pop]]=matrix(0,nr = n,nc = 0)
+    X[[pop]]=matrix(0,nr = n[[pop]],nc = 0)
     B[[pop]]=matrix(0,nr = 0,nc = p)
   }
   if(nrow(X[[pop]]) == 1) X[[pop]] = t(X[[pop]])       # because I used to give X transposed
-  stopifnot(nrow(X[[pop]]) == n)
-  b = ncol(X[[pop]])
+  stopifnot(nrow(X[[pop]]) == n[[pop]])
+  b[[pop]] = ncol(X[[pop]])
   
   
   #Determine if a second random effects design matrix exists. If not, make a
   #dummy matrix
-  if(! 'Z_2' %in% ls()) {
-    Z_2[[pop]]=matrix(0,nr = n,nc = 0)
-  }
-  stopifnot(nrow(Z_2[[pop]]) == n)
-  r2 = ncol(Z_2[[pop]])
+  # if(! 'Z_2' %in% ls()) {
+  #   Z_2[[pop]]=matrix(0,nr = n,nc = 0)
+  # }
+    if(is.null(Z_2[[pop]])){
+      Z_2[[pop]]=matrix(0,nr = n[[pop]],nc = 0)
+    }
+    stopifnot(nrow(Z_2[[pop]]) == n[[pop]])
+    r2[[pop]] = ncol(Z_2[[pop]])
   
 
   
   #fixed effect priors
   
-  priors$b_X_prec[[pop]]            =   1e-10*diag(1,b)   
+  priors$b_X_prec[[pop]]            =   1e-10*diag(1,b[[pop]])   
   
   # ----------------------------- #
   # -----Initialize variables---- #
@@ -140,25 +151,28 @@ fast_BSFG_sampler_init = function(priors,run_parameters){
   #   Prior: Normal distribution on each element.
   #       mean = 0
   #       sd = 1./sqrt(E_a_prec)' on each row
-  E_a[[pop]] = matrix(rnorm(p*r,0,sqrt(1/E_a_prec[[pop]])),nr = r,nc = p, byrow = T)
+  E_a[[pop]] = matrix(rnorm(p*r[[pop]],0,sqrt(1/E_a_prec[[pop]])),nr = r[[pop]],nc = p, byrow = T)
   
   # Latent factor heritabilties. h2 can take h2_divisions values
   #   between 0 and 1.
   #   Prior: 0.5: h2=0, .05: h2 > 0. 
+  
+  # change k back  to p
   F_h2[[pop]] = runif(k)
   
   # Genetic effects on the factor scores.
   #  Prior: Normal distribution for each element
   #       mean = 0
   #       sd = sqrt(F_h2') for each row.
-  F_a[[pop]] = matrix(rnorm(k*r,0,sqrt(F_h2[[pop]])),nr = r,nc = k, byrow = T)
+  #F_a[[pop]] = matrix(rnorm(k*r,0,sqrt(F_h2[[pop]])),nr = r,nc = k, byrow = T)
+  F_a[[pop]] = matrix(rnorm(k*r[[pop]],0,sqrt(F_h2[[pop]])),nr = r[[pop]],nc = k, byrow = T)
   
   # Full Factor scores. Combination of genetic and residual variation on
   # each factor.
   #  Prior: Normal distribution for each element
   #       mean = Z_1 * F_a
   #       sd = sqrt(1-F_h2') for each row.
-  F[[pop]] = Z_1[[pop]] %*% F_a[[pop]] + matrix(rnorm(k*n,0,sqrt(1-F_h2[[pop]])),nr = n,nc = k, byrow = T)    
+  F[[pop]] = Z_1[[pop]] %*% F_a[[pop]] + matrix(rnorm(k*n[[pop]],0,sqrt(1-F_h2[[pop]])),nr = n[[pop]],nc = k, byrow = T)    
   
   # g-vector of specific precisions of genetic effects. 
   #  Prior: Gamma distribution for each element
@@ -170,13 +184,13 @@ fast_BSFG_sampler_init = function(priors,run_parameters){
   #   Prior: Normal distribution on each element.
   #       mean = 0
   #       sd = 1./sqrt(E_a_prec)' on each row
-  W[[pop]] = matrix(rnorm(p*r2,0,sqrt(1/W_prec[[pop]])),nr = r2, nc = p, byrow = T)
+  W[[pop]] = matrix(rnorm(p*r2[[pop]],0,sqrt(1/W_prec[[pop]])),nr = r2[[pop]], nc = p, byrow = T)
   
   # Fixed effect coefficients.
   #  Prior: Normal distribution for each element
   #       mean = 0
   #       sd = sqrt(1/fixed_effects_prec)
-  B[[pop]] = matrix(rnorm(b*p),nr = b, nc = p)
+  B[[pop]] = matrix(rnorm(b[[pop]]*p),nr = b[[pop]], nc = p)
   
   # ------------------------------------ #
   # ----Precalculate some matrices------ #
@@ -185,7 +199,7 @@ fast_BSFG_sampler_init = function(priors,run_parameters){
   # recover()
   #invert the random effect covariance matrices
   Ainv[[pop]] = solve(A[[pop]])
-  A_2_inv[[pop]] = diag(1,r2) #Z_2 random effects are assumed to have covariance proportional to the identity. Can be modified.
+  A_2_inv[[pop]] = diag(1,r2[[pop]]) #Z_2 random effects are assumed to have covariance proportional to the identity. Can be modified.
   
   #pre-calculate transformation parameters to diagonalize aI + bZAZ for fast
   #inversion: inv(aI + bZAZ) = 1/b*U*diag(1./(s+a/b))*U'
@@ -210,7 +224,7 @@ fast_BSFG_sampler_init = function(priors,run_parameters){
   #inv(a*bdiag(priors$b_X_prec,Ainv) + b*t(cbind(X,Z_1)) %*% cbind(X,Z_1)) = U %*% diag(1/(a*s1+b*s2)) %*% t(U)
   Design= cbind(X[[pop]],Z_1[[pop]])
   Design2 = t(Design) %*% Design
-  result = GSVD_2_c(cholcov(bdiag(priors$b_X_prec,Ainv[[pop]])),cholcov(Design2))
+  result = GSVD_2_c(cholcov(bdiag(priors$b_X_prec[[pop]],Ainv[[pop]])),cholcov(Design2))
   invert_aPXA_bDesignDesignT[[pop]] = list(
     U = t(solve(result$X)),
     s1 = diag(result$C)^2,
@@ -221,7 +235,7 @@ fast_BSFG_sampler_init = function(priors,run_parameters){
   #random effects 2
   #diagonalize mixed model equations for fast inversion: 
   #inv(a*A_2_inv + b*Z_2'Z_2]) = U*diag(1./(a.*s1+b.*s2))*U'
-  if(r2 > 0) {
+  if(r2[[pop]] > 0) {
     Design = Z_2[[pop]]
     Design2 = t(Design) %*% Design
     result = GSVD_2_c(cholcov(A_2_inv[[pop]]),cholcov(Design2))
@@ -242,7 +256,7 @@ fast_BSFG_sampler_init = function(priors,run_parameters){
   #similar to fixed effects + random effects 1 above, but no fixed effects.
   ZZt = t(Z_1[[pop]]) %*% Z_1[[pop]]
   result = GSVD_2_c(cholcov(ZZt),cholcov(Ainv[[pop]]))
-  invert_aZZt_Ainv = list(
+  invert_aZZt_Ainv[[pop]] = list(
     U = t(solve(result$X)),
     s1 = diag(result$C)^2,
     s2 = diag(result$S)^2
@@ -300,6 +314,7 @@ fast_BSFG_sampler_init = function(priors,run_parameters){
   # ----------------------- #
   Posterior = list(
     Lambda        = matrix(0,nr=0,nc=0),
+    E_a           = matrix(0,nr =nrow(do.call(rbind,F_a)) ,nc = p),
     F_a           = matrix(0,nr=0,nc=0),
     F             = matrix(0,nr=0,nc=0),
     delta         = matrix(0,nr=0,nc=0),
@@ -307,9 +322,9 @@ fast_BSFG_sampler_init = function(priors,run_parameters){
     resid_Y_prec  = matrix(0,nr = p,nc = 0),
     E_a_prec      = matrix(0,nr = p,nc = 0),
     W_prec        = matrix(0,nr = p,nc = 0),
-    B             = matrix(0,nr = b,nc = p),
-    W             = matrix(0,nr = r2,nc = p),
-    E_a           = matrix(0,nr = r,nc = p)
+    B             = matrix(0,nr = b[[1]],nc = p),
+    W             = matrix(0,nr = r2[[1]],nc = p)
+    #E_a           = matrix(0,nr = r[[1]],nc = p)
   )
   # ----------------------- #
   # ---Save initial values- #

From 5b5793fb37f6f44751dc899c5283225208647220 Mon Sep 17 00:00:00 2001
From: xin li <xxrli@ucdavis.edu>
Date: Fri, 3 Mar 2017 21:58:34 -0800
Subject: [PATCH 12/18] Data simulation for testing BSFG_comb.

---
 R_BSFG/data_sim.R | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/R_BSFG/data_sim.R b/R_BSFG/data_sim.R
index 0edca7a..146f062 100644
--- a/R_BSFG/data_sim.R
+++ b/R_BSFG/data_sim.R
@@ -1,7 +1,5 @@
 library(pedantics)
 #library(R.matlab)
-# define E_a_prec; resid_Y_prec; name
-# get A
 
 
 prepare_simulation2 = function(A_chol,A,Lambda,factor_h2s,E_a_prec,resid_Y_prec,name){
@@ -37,8 +35,8 @@ prepare_simulation2 = function(A_chol,A,Lambda,factor_h2s,E_a_prec,resid_Y_prec,
   
   print(name)
   setup = list(Y=Y,F_act=F,E_a_act=E_a_act,F_a_act = F_a,Z_1=Z_1,A=A,X=X,n=n,r=r,p=p,G=G,R=R,
-               gen_factor_Lambda = Lambda,error_factor_Lambda = Lambda,h2=h2,B=B, factor_h2s=factor_h2s,name=name)
-  
+               gen_factor_Lambda = Lambda,error_factor_Lambda = Lambda,h2=h2,B=B, factor_h2s=factor_h2s,name=name,pops)
+  # ????????????????????gen_factor_Lambda ; error_factor_Lambda
   return(setup)
   #do.call(writeMat,c("setup.mat",setup))
   #save(pedigree,file="pedigree.Robj")
@@ -46,9 +44,6 @@ prepare_simulation2 = function(A_chol,A,Lambda,factor_h2s,E_a_prec,resid_Y_prec,
 
  
 #==================================================================
-
-
-
 counter = 0
 
 #sample_size simulations
@@ -72,7 +67,7 @@ setwd("~/Runcie Lab/pedigree")
 data=read.csv('IDDammSirelnUpdate(1).csv',header = TRUE)
 source("~/Runcie Lab/SparseFactorMixedModel_v2/R_BSFG/setup_pedigree.R")
 # find way to change this according to the size(different pops)
-pops = c("LConG1","LConG2","LConG3")
+
 ori.setup = list()
 ori.setup[[pops[1]]] = setup_pedigree(data,GenerationCode=3,LineCode=c(0,1),fixedfactor=NA) # no fixed factor because we seperate the dataset based on generation
 ori.setup[[pops[2]]] = setup_pedigree(data,GenerationCode=2,LineCode=1,fixedfactor=NA)
@@ -82,6 +77,7 @@ setwd("~/Runcie Lab/SparseFactorMixedModel_v2/R_BSFG/Simulation")
  #sample_size = list(Small = c(50,5), Medium = c(100,10), Large = c(500,10))
 # set seed for repeatability
 set.seed(1)
+pops = c("LConG1","LConG2","LConG3")
 for(rep in 1:num_reps){
   # number of individuals that have the same mother:
   # c(50,5) in this case means that for 50 fathers, each of them have 5 children. There are total 250 individuals.
@@ -107,7 +103,7 @@ for(rep in 1:num_reps){
     factor_h2s = factor_h2s[order(factor_h2s,decreasing = TRUE)]
     factor_h2s[seq(1,k,by=2)]=0  # make 5 of them to be zero
     
-    g_cols = factor_h2s>0
+    g_cols = factor_h2s>0  # will this step makes lambda matrix to be different in pops?????
     Lambda =Lambda[do.call("order", unname(split(-abs(Lambda[,cols[factor_h2s>0]]), col(Lambda[,cols[factor_h2s>0]])))),]
     # reordering lambda
     
@@ -121,7 +117,6 @@ for(rep in 1:num_reps){
   setwd(dir)
   save(setup,file = 'setup.RData')
   setwd('..')
-  
 }
 
 

From 4ed81e79c49873195d6d353181a5fa26cd8428ed Mon Sep 17 00:00:00 2001
From: "Xin Li(Lexie)" <xxrli@ucdavis.edu>
Date: Mon, 13 Mar 2017 22:37:14 -0700
Subject: [PATCH 13/18] change k = 10.

---
 R_BSFG/BSFG_functions.R              | 6 +++---
 R_BSFG/data_sim.R                    | 4 ++--
 R_BSFG/fast_BSFG_sampler_init_comb.R | 4 ++--
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/R_BSFG/BSFG_functions.R b/R_BSFG/BSFG_functions.R
index 52c086b..1f9dec7 100644
--- a/R_BSFG/BSFG_functions.R
+++ b/R_BSFG/BSFG_functions.R
@@ -81,9 +81,9 @@ sample_Lambda_comb = function( Y,X,B,Z_2,W,F,resid_Y_prec, E_a_prec,Plam,invert_
   
   
   for(j in 1:p) {
-    means = matrix(0,nr=p,nc=1)
-    Qlam = matrix(0,p,p)
-    # Is this part correct????????????????????
+    #means = matrix(0,nr=p,nc=1)
+    means = matrix(0,nr=k,nc=1)
+    Qlam = matrix(0,k,k)
     Y_tilde = list();
     for(pop in pops){
       #conditioning on W, B, F, marginalizing over E _a
diff --git a/R_BSFG/data_sim.R b/R_BSFG/data_sim.R
index 146f062..0d7558d 100644
--- a/R_BSFG/data_sim.R
+++ b/R_BSFG/data_sim.R
@@ -47,7 +47,7 @@ prepare_simulation2 = function(A_chol,A,Lambda,factor_h2s,E_a_prec,resid_Y_prec,
 counter = 0
 
 #sample_size simulations
-num_reps = 10
+num_reps = 5
 p = 100
 k=10
 
@@ -104,7 +104,7 @@ for(rep in 1:num_reps){
     factor_h2s[seq(1,k,by=2)]=0  # make 5 of them to be zero
     
     g_cols = factor_h2s>0  # will this step makes lambda matrix to be different in pops?????
-    Lambda =Lambda[do.call("order", unname(split(-abs(Lambda[,cols[factor_h2s>0]]), col(Lambda[,cols[factor_h2s>0]])))),]
+    #Lambda =Lambda[do.call("order", unname(split(-abs(Lambda[,cols[factor_h2s>0]]), col(Lambda[,cols[factor_h2s>0]])))),]
     # reordering lambda
     
     name = paste(pop,rep,sep='_')
diff --git a/R_BSFG/fast_BSFG_sampler_init_comb.R b/R_BSFG/fast_BSFG_sampler_init_comb.R
index 8b75d9e..6f2cd67 100644
--- a/R_BSFG/fast_BSFG_sampler_init_comb.R
+++ b/R_BSFG/fast_BSFG_sampler_init_comb.R
@@ -34,7 +34,7 @@ fast_BSFG_sampler_init_comb = function(priors,run_parameters){
     load('../setup.RData')
   }
   
-  #k = priors$k_init
+  k = priors$k_init
   
   simulation = F
   if('gen_factor_Lambda' %in% names(setup)){
@@ -79,7 +79,7 @@ fast_BSFG_sampler_init_comb = function(priors,run_parameters){
   W_prec_shape = priors$W_prec_shape
   W_prec_rate  = priors$W_prec_rate
   p = ncol(Y[[1]]) 
-  k = p
+  #k = p
 
   #normalize Y to have zero mean and unit variances among observed values,
   #allowing for NaNs.

From fa47849b18d4323ae6871f2bf9995658e673cd82 Mon Sep 17 00:00:00 2001
From: "Xin Li(Lexie)" <xxrli@ucdavis.edu>
Date: Fri, 24 Mar 2017 03:44:26 -0700
Subject: [PATCH 14/18]  standardize in initial function: subtract the
 mean(overall mean), but not divide the sd.

---
 R_BSFG/fast_BSFG_sampler_init_comb.R | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/R_BSFG/fast_BSFG_sampler_init_comb.R b/R_BSFG/fast_BSFG_sampler_init_comb.R
index 6f2cd67..1f24618 100644
--- a/R_BSFG/fast_BSFG_sampler_init_comb.R
+++ b/R_BSFG/fast_BSFG_sampler_init_comb.R
@@ -32,6 +32,8 @@ fast_BSFG_sampler_init_comb = function(priors,run_parameters){
   # ----------------------- #
   if(file.exists('../setup.RData')) {
     load('../setup.RData')
+    Y = lapply(setup, function(x) x$Y)
+    Mean_Y = colMeans(do.call(rbind,Y),na.rm = TRUE)
   }
   
   k = priors$k_init
@@ -87,7 +89,7 @@ fast_BSFG_sampler_init_comb = function(priors,run_parameters){
   r[[pop]] = ncol(Z_1[[pop]])
   
   Y_missing = is.na(Y[[pop]])        # matrix recording missing data in Y
-  Mean_Y = colMeans(Y[[pop]],na.rm=T)
+  #Mean_Y = colMeans(Y[[pop]],na.rm=T)
   VY = apply(Y[[pop]],2,var,na.rm=T)
   # Don't remove the mean and standardize the variance if it's a
   # simulation because this makes comparing to the simulated values more

From d292075324d3190998b10c051a9b6ee9daaafaaf Mon Sep 17 00:00:00 2001
From: "Xin Li(Lexie)" <xxrli@ucdavis.edu>
Date: Fri, 24 Mar 2017 03:44:48 -0700
Subject: [PATCH 15/18] minor fix

---
 R_BSFG/fast_BSFG_sampler.R | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/R_BSFG/fast_BSFG_sampler.R b/R_BSFG/fast_BSFG_sampler.R
index 7d2d29b..a58df8b 100644
--- a/R_BSFG/fast_BSFG_sampler.R
+++ b/R_BSFG/fast_BSFG_sampler.R
@@ -203,7 +203,11 @@ fast_BSFG_sampler = function(BSFG_state,n_samples) {
 		Y_tilde = Y - F %*% t(Lambda) - Z_2 %*% W
 		# location_sample = sample_means( Y_tilde, resid_Y_prec, E_a_prec, invert_aPXA_bDesignDesignT )
 		location_sample = sample_means_c( Y_tilde, resid_Y_prec, E_a_prec, invert_aPXA_bDesignDesignT )
-		B   = location_sample[1:b,]
+		if(b>0) {
+		  B   = location_sample[1:b,]
+		} else {
+		  B = matrix(0,0,p)
+		}
 		E_a = location_sample[b+(1:r),]
 
 	 # -----Sample W ---------------------- #

From 2764585446cd3cd820a68157662ae77a2bfa4599 Mon Sep 17 00:00:00 2001
From: "Xin Li(Lexie)" <xxrli@ucdavis.edu>
Date: Fri, 24 Mar 2017 03:45:20 -0700
Subject: [PATCH 16/18] minor fix

---
 R_BSFG/fast_BSFG_sampler_current.R | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/R_BSFG/fast_BSFG_sampler_current.R b/R_BSFG/fast_BSFG_sampler_current.R
index 497daf2..af3c40c 100644
--- a/R_BSFG/fast_BSFG_sampler_current.R
+++ b/R_BSFG/fast_BSFG_sampler_current.R
@@ -201,17 +201,13 @@ fast_BSFG_sampler_current = function(BSFG_state,n_samples) {
     
     # -----Sample B and E_a--------------- #
     #conditioning on W, F, Lambda
-    for��pop in pops){
       Y_tilde = Y - F %*% t(Lambda) - Z_2 %*% W
       Y_tilde = as.matrix(Y_tilde)
       # location_sample = sample_means( Y_tilde, resid_Y_prec, E_a_prec, invert_aPXA_bDesignDesignT )
       location_sample = sample_means_c( Y_tilde, resid_Y_prec, E_a_prec, invert_aPXA_bDesignDesignT )
-        B   = location_sample[1:b,]
-        if(length(B)==p){
-          B = matrix(0,nr=0,nc=p)
-        }
+      B   = location_sample[1:b,,drop=FALSE]
       E_a = location_sample[b+(1:r),]
-    }
+
     
     # -----Sample W ---------------------- #
     #conditioning on B, E_a, F, Lambda
@@ -349,4 +345,4 @@ fast_BSFG_sampler_current = function(BSFG_state,n_samples) {
   
   save(BSFG_state,file = 'BSFG_state.RData')
   return(BSFG_state)
-}
\ No newline at end of file
+}

From bb906abe99cd119f305e8be2d6337960a6f17d50 Mon Sep 17 00:00:00 2001
From: "Xin Li(Lexie)" <xxrli@ucdavis.edu>
Date: Tue, 25 Apr 2017 00:06:24 -0700
Subject: [PATCH 17/18] functions for plotting

---
 R_BSFG/code_comparing_plotting.R           | 409 +++++++++++++++++++++
 R_BSFG/deltaZ_posterior_mean.Rmd           | 117 ++++++
 R_BSFG/generate_simulations_halfSib_fast.R | 338 +++++++++++++++++
 R_BSFG/test_indep_Ablocks.R                |  50 +++
 4 files changed, 914 insertions(+)
 create mode 100644 R_BSFG/code_comparing_plotting.R
 create mode 100644 R_BSFG/deltaZ_posterior_mean.Rmd
 create mode 100644 R_BSFG/generate_simulations_halfSib_fast.R
 create mode 100644 R_BSFG/test_indep_Ablocks.R

diff --git a/R_BSFG/code_comparing_plotting.R b/R_BSFG/code_comparing_plotting.R
new file mode 100644
index 0000000..27dedce
--- /dev/null
+++ b/R_BSFG/code_comparing_plotting.R
@@ -0,0 +1,409 @@
+source('~/Runcie Lab/SparseFactorMixedModel_v2/R_BSFG/BSFG_functions.R')
+setwd("~/Runcie Lab/SparseFactorMixedModel_v2/MCMC")
+MCMC = readRDS("G_MCMC.Rdata")
+#setwd("~/Runcie Lab/SparseFactorMixedModel_v2/MCMC/5/5/Lambda1.5_delta2shape3")
+#BSFGOld = "BSFG_state.RData"
+#BSFGNew = "BSFG_state14.RData"
+#BSFGStar = "BSFG_state17.RData"
+#target = "F_h2"
+#load(BSFGOld)
+#traitnames = BSFG_state$traitnames
+traitnames = rownames(MCMC[[1]][[1]])
+
+#old.G = G_Matrix_Comp(BSFG_state)
+old.G = MCMC[[4]]
+#p = BSFG_state$run_variables$p
+#spn = dim(BSFG_state$Posterior[[target]])[2]
+p = 18
+spn = 2000
+#load(BSFGNew)
+#new.G = G_Matrix_Comp(BSFG_state)
+new.G = MCMC[[3]]
+
+#load(BSFGStar)
+#star.G = G_Matrix_Comp(BSFG_state)
+star.G = MCMC[[2]]
+
+
+# The distance of two matrix(in predicted phenotype value/level)
+#selection gradient
+beta = 0.3609
+beta.v = c(0,beta,rep(0,16))
+traces.traits = matrix(,p,spn)
+
+#old.G in list form
+old.traces.traits = sapply(old.G,function(x)x%*%beta.v)
+#old.traits.posmean = apply(old.traces.traits,1,mean)
+new.traces.traits = sapply(new.G,function(x)x%*%beta.v)
+#new.traits.posmean = apply(new.traces.traits,1,mean)
+star.traces.traits = sapply(star.G,function(x)x%*%beta.v)
+#star.traits.posmean = apply(star.traces.traits,1,mean)
+#boxplot(old.traces_traits[2,])
+#boxplot(t(old.traces.traits))
+#boxplot(new.traces_traits[2,])
+#boxplot(t(new.traces.traits))
+# use ggplot 
+traces.traits.df = data.frame(c(c(old.traces.traits),c(new.traces.traits),c(star.traces.traits)))
+traces.traits.df$model = rep(c("old","new","star"),c(p*spn,p*spn,p*spn))
+
+traces.traits.df$model = factor(traces.traits.df$model, level=c("old","new","star"))
+traces.traits.df$model = factor(traces.traits.df$model, level=c("star","new","old"))
+
+traces.traits.df$traits = rep(traitnames,3*spn)
+names(traces.traits.df) = c("response","model","traits")
+
+summarySE <- function(data=NULL, measurevar, groupvars=NULL, na.rm=FALSE,
+                      conf.interval=.95, .drop=TRUE) {
+  library(plyr)
+  
+  # New version of length which can handle NA's: if na.rm==T, don't count them
+  length2 <- function (x, na.rm=FALSE) {
+    if (na.rm) sum(!is.na(x))
+    else       length(x)
+  }
+  
+  # This does the summary. For each group's data frame, return a vector with
+  # N, mean, and sd
+  datac <- ddply(data, groupvars, .drop=.drop,
+                 .fun = function(xx, col) {
+                   c(
+                     q1 = quantile   (xx[[col]],0.025, na.rm=na.rm)- mean   (xx[[col]], na.rm=na.rm),
+                     q2   = quantile     (xx[[col]],0.975, na.rm=na.rm)- mean   (xx[[col]], na.rm=na.rm),
+                     mean = mean   (xx[[col]], na.rm=na.rm)
+                     
+                   )
+                 },
+                 measurevar
+  )
+  
+  # Rename the "mean" column    
+  datac <- rename(datac, c("mean" = measurevar))
+  
+  #datac$se <- datac$sd / sqrt(datac$N)  # Calculate standard error of the mean
+  
+  # Confidence interval multiplier for standard error
+  # Calculate t-statistic for confidence interval: 
+  # e.g., if conf.interval is .95, use .975 (above/below), and use df=N-1
+  #ciMult <- qt(conf.interval/2 + .5, datac$N-1)
+  #datac$ci <- datac$se * ciMult
+  
+  return(datac)
+}
+
+traces.traits.df.sum = summarySE(traces.traits.df,measurevar = "response",groupvars = c("model","traits"))
+names(traces.traits.df.sum) = c("model","traits","q1","q2","response")
+
+
+save(traces.traits.df.sum,file = "plot6.RData")
+
+
+library(ggplot2)
+#ggplot(traces.traits.df,aes(x =traits,y=response,col=model,fill=model))+
+# geom_boxplot()
+#plot(x=rep(1:18,2),y=c(old.traits.posmean,new.traits.posmean),col=rep(c(1,2),eac#h=18),type = "o")
+pd <- position_dodge(width = 0.6)
+load("plot5.RData")
+plot5 <- ggplot(traces.traits.df.sum,aes(x = traits, y = response, color=model))+
+  scale_color_manual(breaks =rev(c("star","new","old")),
+                     values=rev(c( "blue","red2","grey0")),labels=rev(c("Low Line","High Line","Control Line")))+
+  ylab("Response")+
+  geom_errorbar(aes(ymin=q1+response, ymax=q2+response), position=pd)+
+  geom_point(position=pd,size=1)+
+  theme(axis.text=element_text(size=10),
+        axis.title=element_text(size=15),legend.text=element_text(size=12),
+        legend.title=element_text(size=12))+
+  labs(fill = "G (F3)",color = "G (F3)")
+
+
+library(ggplot2)
+#ggplot(traces.traits.df,aes(x =traits,y=response,col=model,fill=model))+
+# geom_boxplot()
+#plot(x=rep(1:18,2),y=c(old.traits.posmean,new.traits.posmean),col=rep(c(1,2),eac#h=18),type = "o")
+#pd <- position_dodge(width = 0.3)
+load("plot6.RData")
+plot6<- ggplot(traces.traits.df.sum,aes(x = traits, y = response, color=model))+
+  scale_color_manual(breaks = c("star","new","old"),
+                     values=c("grey60","grey30","grey0"),labels=c("F1","F2","F3"))+
+  ylab("Response")+
+  geom_errorbar(aes(ymin=q1+response, ymax=q2+response), position=pd)+
+  geom_point(position=pd,size=1)+
+  theme(axis.text=element_text(size=10),
+        axis.title=element_text(size=15),legend.text=element_text(size=12),
+        legend.title=element_text(size=12))+
+  labs(fill = "G (Control)",color = "G (Control)")
+
+
+plots1=list()
+plots1=lapply(c(1,3),function(i) get(paste0("plot",i)))
+plots2=list()
+plots2=lapply(c(2,4),function(i) get(paste0("plot",i)))
+plots3=list()
+plots3=lapply(c(5,6),function(i) get(paste0("plot",i)))
+plots4=list(g11,g22) 
+library(gridExtra)
+ggsave("BSFG_comparison_plots1.pdf", marrangeGrob(grobs = plots1, nrow=2, ncol=1,top = NULL),width = 297, height = 210, units = "mm")
+ggsave("BSFG_comparison_plots2.pdf", marrangeGrob(grobs = plots2, nrow=2, ncol=1,top = NULL),width = 297, height = 210, units = "mm")
+ggsave("BSFG_comparison_plots3.pdf", marrangeGrob(grobs = plots3, nrow=2, ncol=1,top = NULL),width = 297, height = 210, units = "mm")
+ggsave("EigenvaluesComparison.pdf", marrangeGrob(grobs = plots4, nrow=2, ncol=1,top = NULL),width = 297, height = 210, units = "mm")
+
+
+#5. PCA decomposition
+
+source('~/Runcie Lab/SparseFactorMixedModel_v2/R_BSFG/BSFG_functions.R')
+#load(BSFGOld)
+#old.G = G_Matrix_Comp(BSFG_state)
+old.G = MCMC[[4]]
+new.G = MCMC[[3]]
+star.G = MCMC[[2]]
+#p = BSFG_state$run_variables$p
+#traitnames = BSFG_state$traitnames
+#load("BSFG_state6.RData")
+#new.G = G_Matrix_Comp(BSFG_state)
+#load("BSFG_state7.RData")
+#star.G = G_Matrix_Comp(BSFG_state)
+
+old.traces.eigen = sapply(old.G, function(x)eigen(x)$values)
+new.traces.eigen = sapply(new.G, function(x)eigen(x)$values)
+star.traces.eigen = sapply(star.G, function(x)eigen(x)$values)
+
+traces.eigen.df = data.frame(c(c(old.traces.eigen),c(new.traces.eigen),c(star.traces.eigen)))
+traces.eigen.df$model = rep(c("old","new","star"),c(p*spn,p*spn,p*spn))
+
+traces.eigen.df$model = factor(traces.eigen.df$model, level=c("old","new","star"))
+traces.eigen.df$model = factor(traces.eigen.df$model, level=c("star","new","old"))
+
+traces.eigen.df$lambda = rep(paste0("PC",1:18),3*spn)
+names(traces.eigen.df) = c("response","model","PC")
+
+traces.eigen.df.sum = summarySE(traces.eigen.df,measurevar = "response",groupvars = c("model","PC"))
+names(traces.eigen.df.sum) = c("model","PC","q1","q2","response")
+
+# reorder the PC value in the data
+traces.eigen.df.sum$PC = factor(traces.eigen.df.sum$PC,level=paste0("PC",1:18))
+
+# size dataframe
+old.traces.size = apply(old.traces.eigen,2,sum)
+new.traces.size = apply(new.traces.eigen,2,sum)
+star.traces.size = apply(star.traces.eigen,2,sum)
+size.df = data.frame(old.traces.size,new.traces.size,star.traces.size)
+library(reshape2)
+size.df = melt(size.df,value.name = "Size")
+size.df$variable = factor(size.df$variable,levels = c("star.traces.size","new.traces.size","old.traces.size"))
+
+# PC1
+old.traces.pc1c = apply(old.traces.eigen,2,function(x)x[1]/sum(x))
+new.traces.pc1c = apply(new.traces.eigen,2,function(x)x[1]/sum(x))
+star.traces.pc1c = apply(star.traces.eigen,2,function(x)x[1]/sum(x))
+pc1c.df = data.frame(old.traces.pc1c,new.traces.pc1c,star.traces.pc1c)
+library(reshape2)
+pc1c.df = melt(pc1c.df,value.name = "PC1.Contribution")
+pc1c.df$variable = factor(pc1c.df$variable,levels = c("star.traces.pc1c","new.traces.pc1c","old.traces.pc1c"))
+
+
+
+save(traces.eigen.df.sum , file = "eigen567.RData")
+save(traces.eigen.df.sum, file = "eigen51417.RData")
+save(size.df, file = "size567.RData")
+save(size.df, file = "size51417.RData")
+save(pc1c.df, file = "pc1c567.RData")
+save(pc1c.df, file = "pc1c51417.RData")
+
+load("eigen567.RData")
+load("size567.RData")
+load("pc1c567.RData")
+# size of each model 
+library(dplyr)
+traces.eigen.df.sum %>%
+  group_by(model) %>%
+  summarize(size = sum(response))
+
+library(ggplot2)
+size567.boxplot <- ggplot(size.df, aes(x="",y=Size, fill=variable))+
+  geom_boxplot(alpha=0.85)+
+  scale_fill_manual(breaks =rev(c("star.traces.size","new.traces.size","old.traces.size")),
+                     values=rev(c( "blue","red2","grey0")),labels=rev(c("Low Line","High Line","Control Line")))+
+  theme(axis.title.x = element_blank())+
+  labs(fill = "G (F3)")
+
+pc1c567.boxplot <- ggplot(pc1c.df, aes(x="",y=PC1.Contribution, fill=variable))+
+  geom_boxplot(alpha=0.85)+
+  scale_fill_manual(breaks =rev(c("star.traces.pc1c","new.traces.pc1c","old.traces.pc1c")),
+                    values=rev(c( "blue","red2","grey0")),labels=rev(c("Low Line","High Line","Control Line")))+
+  theme(axis.title.x = element_blank())+
+  labs(fill = "G (F3)")
+
+pd <- position_dodge(width = 0.6)
+
+g11<-ggplot(traces.eigen.df.sum,aes(x = PC, y = response, color=model))+
+  scale_color_manual(breaks =rev(c("star","new","old")),
+                     values=rev(c( "blue","red2","grey0")),labels=rev(c("Low Line","High Line","Control Line")))+
+  ylab("Variance")+
+  geom_errorbar(aes(ymin=q1+response, ymax=q2+response), position=pd)+
+  geom_point(position=pd,size=1)+
+  xlab("Principle Components")+
+  theme(axis.text=element_text(size=10),
+        axis.title=element_text(size=15),legend.text=element_text(size=12),
+        legend.title=element_text(size=12))+
+  labs(color = "G (F3)")
+#-------------------------------
+# combine two boxplot
+library(gridExtra)
+library("cowplot")
+plot_grid(size567.boxplot, size51417.boxplot, ncol = 2, nrow = 1)
+plot_grid(pc1c567.boxplot, pc1c51417.boxplot, ncol = 2, nrow = 1)
+#-------------------------------
+load("eigen51417.RData")
+load("size51417.RData")
+load("pc1c51417.RData")
+# size of each model
+library(dplyr)
+traces.eigen.df.sum %>%
+  group_by(model) %>%
+  summarize(size = sum(response))
+
+# boxplot of size of each model
+library(ggplot2)
+size51417.boxplot <- ggplot(size.df, aes(x="",y=Size, fill=variable))+
+  geom_boxplot()+
+  scale_fill_manual(breaks = c("star.traces.size","new.traces.size","old.traces.size"),
+                     values=c("grey60","grey30","grey0"),labels=c("F1","F2","F3"))+
+  theme(axis.title.x = element_blank())+
+  labs(fill = "G (Control)")
+
+pc1c51417.boxplot = ggplot(pc1c.df, aes(x="",y=PC1.Contribution, fill=variable))+
+  geom_boxplot()+
+  scale_fill_manual(breaks = c("star.traces.pc1c","new.traces.pc1c","old.traces.pc1c"),
+                    values=c("grey60","grey30","grey0"),labels=c("F1","F2","F3"))+
+  theme(axis.title.x = element_blank())+
+  labs(fill = "G (Control)")
+
+g22<-ggplot(traces.eigen.df.sum,aes(x = reorder(PC,PC), y = response, color=model))+
+  scale_color_manual(breaks = c("star","new","old"),
+                     values=c("grey60","grey30","grey0"),labels=c("F1","F2","F3"))+
+  ylab("Response")+
+  geom_errorbar(aes(ymin=q1+response, ymax=q2+response), position=pd)+
+  geom_point(position=pd,size=1)+
+  theme(axis.text=element_text(size=10),
+        axis.title=element_text(size=15),legend.text=element_text(size=12),
+        legend.title=element_text(size=12),legend.margin=unit(0.455,"cm"))+
+  labs(color = "G (Control)")+
+  xlab("Principle Components")+
+  ylab("Variance")
+# statistical test for the difference
+# a.ANOVA test
+class(traces.eigen.df)
+anova(lm(response~model, data = traces.eigen.df))
+anova(lm(response~model:PC, data = traces.eigen.df)) #which one to use?
+anova(lm(response~model*PC, data = traces.eigen.df))
+
+# b.Bartlett test
+
+# 6. angle and length of deltaZ and first eigenvector
+
+# distribution within population
+dist_within_theta = function(traces.traits){
+  theta = vector(,spn)
+  for (i in 1:spn){
+    pairs = randcomb(1:spn, 2)
+    a = traces.traits[,pairs[1]]
+    b = traces.traits[,pairs[2]]
+    a.length = sqrt(sum(a * a)) 
+    b.length = sqrt(sum(b * b)) 
+    theta[i] = acos(sum(a*b) / (a.length * b.length))
+  }
+  return(theta)
+}
+
+# first eigen
+old.traces.vector = sapply(old.G, function(x)eigen(x)$vector[,1])
+new.traces.vector = sapply(new.G, function(x)eigen(x)$vector[,1])
+star.traces.vector = sapply(star.G, function(x)eigen(x)$vector[,1])
+
+old.theta = dist_within_theta(old.traces.vector)
+new.theta = dist_within_theta(new.traces.vector)
+star.theta = dist_within_theta(star.traces.vector)
+# deltaZ
+old.theta = dist_within_theta(old.traces.traits) #old.traces.traits is from deltaZ_posterior_mean
+new.theta = dist_within_theta(new.traces.traits)
+star.theta = dist_within_theta(star.traces.traits)
+
+
+# distribution between population
+dist_between_theta = function(Atraces.traits,Btraces.traits){
+  theta = vector(,spn)
+  for (i in 1:spn){
+    pairs = randcomb(1:spn, 2)
+    a = Atraces.traits[,pairs[1]]
+    b = Btraces.traits[,pairs[2]]
+    a.length = sqrt(sum(a * a)) 
+    b.length = sqrt(sum(b * b)) 
+    theta[i] = acos(sum(a*b) / (a.length * b.length))
+  }
+  return(theta)
+}
+
+# first eigen vector
+old.new.theta = dist_between_theta(old.traces.vector,new.traces.vector)
+# K-S test
+#ks.test(old.theta,old.new.theta) #different
+old.star.theta = dist_between_theta(old.traces.vector,star.traces.vector)
+new.star.theta = dist_between_theta(new.traces.vector,star.traces.vector)
+
+
+# deltaZ
+old.new.theta = dist_between_theta(old.traces.traits,new.traces.traits)
+# K-S test
+#ks.test(old.theta,old.new.theta) #different
+old.star.theta = dist_between_theta(old.traces.traits,star.traces.traits)
+new.star.theta = dist_between_theta(new.traces.traits,star.traces.traits)
+#--------------------------------
+# plot
+theta.df = data.frame(old.theta,new.theta,star.theta, old.new.theta, old.star.theta, new.star.theta)
+
+library(reshape2)
+theta.df = melt(theta.df, value.name = "Degree")
+names(theta.df) = c("labels", "theta")
+ggplot(theta.df, aes(x = theta, col=labels))+
+  geom_density(aes(fill=labels),alpha=0.1)
+#+scale_color_manual(breaks = c("old.theta","new.theta","star.theta","old.new.theta","old.star.theta","new.star.theta"),
+#                     values=rainbow(6))
+
+#-------------------------------
+# 7.length of deltaZ
+
+length_deltaZ = function(traces.traits){
+  #posmean.deltaZ = apply(traces.traits,1,mean)
+  #deltaZ.length = sqrt(sum(posmean.deltaZ * posmean.deltaZ)) 
+  deltaZ.length = apply(traces.traits,2,function(x)sqrt(sum(x * x)))
+  return(deltaZ.length)
+}
+old.deltaZ.length = length_deltaZ(old.traces.traits)
+new.deltaZ.length = length_deltaZ(new.traces.traits)
+star.deltaZ.length = length_deltaZ(star.traces.traits)
+
+deltaZ.length.df = data.frame(old.deltaZ.length,new.deltaZ.length,star.deltaZ.length)
+library(reshape2)
+deltaZ.length.df = melt(deltaZ.length.df,value.name = "Length")
+deltaZ.length.df$variable = factor(deltaZ.length.df$variable,levels = c("star.deltaZ.length","new.deltaZ.length","old.deltaZ.length"))
+save(deltaZ.length.df, file="traitslength567.RData")
+save(deltaZ.length.df, file="traitslength51417.RData")
+
+load("traitslength567.RData")
+traits.length567.boxplot <- ggplot(deltaZ.length.df, aes(x="",y=Length, fill=variable))+
+  geom_boxplot(alpha=0.85)+
+  scale_fill_manual(breaks =rev(c("star.deltaZ.length","new.deltaZ.length","old.deltaZ.length")),
+                    values=rev(c( "blue","red2","grey0")),labels=rev(c("Low Line","High Line","Control Line")))+
+  theme(axis.title.x = element_blank())+
+  labs(fill = "G (F3)")
+
+load("traitslength51417.RData")
+traits.length51417.boxplot <- ggplot(deltaZ.length.df, aes(x="",y=Length, fill=variable))+
+  geom_boxplot()+
+  scale_fill_manual(breaks = c("star.deltaZ.length","new.deltaZ.length","old.deltaZ.length"),
+                    values=c("grey60","grey30","grey0"),labels=c("F1","F2","F3"))+
+  theme(axis.title.x = element_blank())+
+  labs(fill = "G (Control)")
+
+library(gridExtra)
+library("cowplot")
+plot_grid(traits.length567.boxplot, traits.length51417.boxplot, ncol = 2, nrow = 1)
diff --git a/R_BSFG/deltaZ_posterior_mean.Rmd b/R_BSFG/deltaZ_posterior_mean.Rmd
new file mode 100644
index 0000000..f83a428
--- /dev/null
+++ b/R_BSFG/deltaZ_posterior_mean.Rmd
@@ -0,0 +1,117 @@
+---
+title: "deltaZ_posterior_mean"
+output: html_document
+---
+
+```{r}
+source('~/Runcie Lab/SparseFactorMixedModel_v2/R_BSFG/BSFG_functions.R')
+setwd("~/Runcie Lab/SparseFactorMixedModel_v2/MCMC/5/5/Lambda1.5_delta2shape3")
+BSFGOld = "BSFG_state.RData"
+BSFGNew = "BSFG_fixedlambda65.RData"
+BSFGStar = "BSFG_fixedlambda75.RData"
+#BSFGNew = "BSFG_fixedlambdaR5.RData"
+#BSFGStar = "BSFG_fixedlambdaS5.RData"
+target = "F_h2"
+load(BSFGOld)
+traitnames = BSFG_state$traitnames
+#old.traces.G = G_Traces_Comp(BSFG_state)
+old.G = G_Matrix_Comp(BSFG_state)
+p = BSFG_state$run_variables$p
+spn = dim(BSFG_state$Posterior[[target]])[2]
+# posterior mean
+# do this in case the value of k differ for this two models and there in no lambda in BSFG_state_fixed case
+Lambda = BSFG_state$Posterior$Lambda
+load(BSFGNew)
+BSFG_state$Posterior$Lambda = Lambda
+new.G = G_Matrix_Comp(BSFG_state)
+
+load(BSFGStar)
+BSFG_state$Posterior$Lambda = Lambda
+star.G = G_Matrix_Comp(BSFG_state)
+#new.traces.G = G_Traces_Comp(BSFG_state)
+
+
+# The distance of two matrix(in predicted phenotype value/level)
+#selection gradient
+beta = 0.3609
+beta.v = c(0,beta,rep(0,16))
+traces.traits = matrix(,p,spn)
+
+#old.G in list form
+old.traces.traits = sapply(old.G,function(x)x%*%beta.v)
+old.traits.posmean = apply(old.traces.traits,1,mean)
+new.traces.traits = sapply(new.G,function(x)x%*%beta.v)
+new.traits.posmean = apply(new.traces.traits,1,mean)
+star.traces.traits = sapply(star.G,function(x)x%*%beta.v)
+star.traits.posmean = apply(star.traces.traits,1,mean)
+#boxplot(old.traces_traits[2,])
+#boxplot(t(old.traces.traits))
+#boxplot(new.traces_traits[2,])
+#boxplot(t(new.traces.traits))
+# use ggplot 
+traces.traits.df = data.frame(c(c(old.traces.traits),c(new.traces.traits),c(star.traces.traits)))
+traces.traits.df$model = rep(c("old","new","star"),each=spn*p)
+traces.traits.df$traits = rep(traitnames,3*spn)
+names(traces.traits.df) = c("response","model","traits")
+library(ggplot2)
+#ggplot(traces.traits.df,aes(x =traits,y=response,col=model,fill=model))+
+ # geom_boxplot()
+#plot(x=rep(1:18,2),y=c(old.traits.posmean,new.traits.posmean),col=rep(c(1,2),eac#h=18),type = "o")
+ggplot(traces.traits.df, aes(x = traits, y = response, color=model))+
+  #geom_boxplot(alpha = 0.5)+
+  scale_color_manual(breaks = c("old", "new", "star"),
+                     values=c("green", "red", "blue"))+
+  #scale_fill_manual(breaks = c("old", "new", "star"),
+  #                  values=c("green", "red", "blue"))+
+  ggtitle("The distribution of variance (Latent traits)")+
+  scale_color_discrete(breaks = c("old", "new", "star"),labels=c("LconG3", "LhighG3", "LlowG3"))+
+ # scale_fill_discrete(breaks = c("old", "new", "star"),labels=c("LconG3", "LhighG3", "LlowG3"))+
+  ylab("Response")+
+  geom_errorbar(aes(ymin=quantile(response,0.025), ymax=quantile(response,0.975)), width=.1, position=pd)
+
+```
+
+```{r}
+#when lambda is not fixed
+source('~/Runcie Lab/SparseFactorMixedModel_v2/R_BSFG/BSFG_functions.R')
+setwd("~/Runcie Lab/SparseFactorMixedModel_v2/MCMC/5/5/Lambda1.5_delta2shape3")
+BSFGOld = "BSFG_state.RData"
+BSFGNew = "BSFG_state14.RData"
+BSFGStar = "BSFG_state17.RData"
+target = "F_h2"
+load(BSFGOld)
+traitnames = BSFG_state$traitnames
+#old.traces.G = G_Traces_Comp(BSFG_state)
+old.G = G_Matrix_Comp(BSFG_state)
+p = BSFG_state$run_variables$p
+spn = dim(BSFG_state$Posterior[[target]])[2]
+load(BSFGNew)
+new.G = G_Matrix_Comp(BSFG_state)
+load(BSFGStar)
+#BSFG_state$Posterior$Lambda = Lambda
+star.G = G_Matrix_Comp(BSFG_state)
+
+beta = 0.3609
+beta.v = c(0,beta,rep(0,16))
+traces.traits = matrix(,p,spn)
+
+#old.G in list form
+old.traces.traits = sapply(old.G,function(x)x%*%beta.v)
+old.traits.posmean = apply(old.traces.traits,1,mean)
+new.traces.traits = sapply(new.G,function(x)x%*%beta.v)
+new.traits.posmean = apply(new.traces.traits,1,mean)
+star.traces.traits = sapply(star.G,function(x)x%*%beta.v)
+star.traits.posmean = apply(star.traces.traits,1,mean)
+#boxplot(old.traces_traits[2,])
+#boxplot(t(old.traces.traits))
+#boxplot(new.traces_traits[2,])
+#boxplot(t(new.traces.traits))
+# use ggplot 
+traces.traits.df = data.frame(c(c(old.traces.traits),c(new.traces.traits),c(star.traces.traits)))
+traces.traits.df$model = rep(c("old","new","star"),each=spn*p)
+traces.traits.df$traits = rep(traitnames,3*spn)
+names(traces.traits.df) = c("response","model","traits")
+library(ggplot2)
+ggplot(traces.traits.df,aes(x =traits,y=response,col=model))+
+  geom_boxplot()
+```
diff --git a/R_BSFG/generate_simulations_halfSib_fast.R b/R_BSFG/generate_simulations_halfSib_fast.R
new file mode 100644
index 0000000..578ff91
--- /dev/null
+++ b/R_BSFG/generate_simulations_halfSib_fast.R
@@ -0,0 +1,338 @@
+library(pedantics)
+library(R.matlab)
+
+prepare_simulation2 = function(A_chol,A,Lambda,factor_h2s,E_a_prec,resid_Y_prec,name){
+	n = dim(A_chol)[1]  # ??difference of A_chol and A
+	p = nrow(Lambda) 
+	
+	r = n
+	# n = 2*n
+
+	k = ncol(Lambda)
+	F_a = t(A_chol) %*% sweep(matrix(rnorm(n*k),n),2,sqrt(factor_h2s),'*') # make sure MN(0,A,sigma)??
+	F_r = sweep(matrix(rnorm(n*k),n),2,sqrt(1-factor_h2s),'*') # factor_h2s is the variance of factor??
+	F = F_a + F_r
+
+	E_a_act = t(A_chol) %*% sweep(matrix(rnorm(n*p),n),2,sqrt(1/E_a_prec),'*')
+	E_r_act = sweep(matrix(rnorm(n*p),n),2,sqrt(1/resid_Y_prec),'*')
+
+	Y = F %*% t(Lambda) + E_a_act + E_r_act
+	
+	X = matrix(1,nrow=1,ncol=n)
+	B = rep(0,p)
+	Z_1 = diag(r)[,rep(1:r,n/r)]  #?? we defined r = n at 8
+	Va = colSums(Lambda^2) * factor_h2s + 1/E_a_prec  #?? G
+	Ve = colSums(Lambda^2) * (1-factor_h2s) + 1/resid_Y_prec # R
+	h2 = Va/(Va + Ve)   # heribility
+	G = Lambda %*% diag(factor_h2s) %*% t(Lambda) + diag(1/E_a_prec) 
+	R = Lambda %*% diag(1-factor_h2s) %*% t(Lambda) + diag(1/resid_Y_prec)
+	h2 = diag(G)/(diag(G)+diag(R))  # ?? difference with 26
+	
+	# Z_2 = rbind(rep(c(1,0,0,0),n/4),rep(c(0,1,0,0),n/4),rep(c(0,0,1,0),n/4),rep(c(0,0,0,1),n/4))
+	
+	# plot(h2,type="l")
+	
+	print(name)
+	setup = list(Y=Y,F_act=F,E_a_act=E_a_act,F_a_act = F_a,Z_1=Z_1,A=A,X=X,n=n,r=r,p=p,G=G,R=R,
+		gen_factor_Lambda = Lambda,error_factor_Lambda = Lambda,h2=h2,B=B, factor_h2s=factor_h2s,name=name)
+
+	save(setup,file = 'setup.RData')
+	do.call(writeMat,c("setup.mat",setup))
+	save(pedigree,file="pedigree.Robj")
+}
+
+prepare_simulation = function(G_chol,R_chol,A_chol,A,gen_factor_Lambda,error_factor_Lambda,factor_h2s,name){
+	n = dim(A_chol)[1]
+	p = dim(G_chol)[1]
+	
+	r = n
+	# n = 2*n
+
+	Y=U_act=E_act=c()
+	U_act = t(A_chol) %*% matrix(rnorm(p*r),r,p) %*% G_chol
+	E_act = matrix(rnorm(p*n),n,p) %*% R_chol
+	Y=rbind(U_act) + E_act 
+	# Y=rbind(U_act,U_act) + E_act 
+
+	
+	X = matrix(1,nrow=1,ncol=n)
+	B = rep(0,p)
+	Z_1 = diag(r)[,rep(1:r,n/r)]
+	h2 = diag(G)/(diag(G)+diag(R))
+	
+	# Z_2 = rbind(rep(c(1,0,0,0),n/4),rep(c(0,1,0,0),n/4),rep(c(0,0,1,0),n/4),rep(c(0,0,0,1),n/4))
+	
+	plot(h2,type="l")
+	
+	print(name)
+	setup = list(
+		Y=Y,U_act=U_act,E_act=E_act,Z_1=Z_1,A=A,X=X,
+		n=n,r=r,p=p,
+		gen_factor_Lambda = gen_factor_Lambda,error_factor_Lambda = error_factor_Lambda,
+		h2=h2,G=G,R=R,B=B, factor_h2s=factor_h2s,name=name
+	)
+	save(setup,file = 'setup.RData')
+
+	do.call(writeMat,c("setup.mat",setup))
+	save(pedigree,file="pedigree.Robj")
+}
+
+
+CovToCor=function(x) diag(1/sqrt(diag(x))) %*% x %*% diag(1/sqrt(diag(x))) 
+
+
+# set seed for repeatability
+set.seed(1)
+
+counter = 0
+
+#sample_size simulations
+num_reps = 10
+p = 100
+k=10
+factor_h2s = seq(1,0,by=-0.1)[1:k]
+factor_h2s[seq(1,k,by=2)]=0
+
+
+# ways to simulate pedigree?? 
+sample_size = list(Small = c(50,5), Medium = c(100,10), Large = c(500,10))
+
+
+for(size in names(sample_size)){
+	nM = sample_size[[size]][1]	
+	nI = sample_size[[size]][2]	
+	pedigree = data.frame(ind=nM*nI + nM + 1:(nM*nI),dam=1:(nM*nI) + nM, sire = gl(nM,nI))
+	pedigree<-fixPedigree(pedigree)  #A
+	children = !is.na(pedigree[,3])
+
+	#generate A matrix as 2* kinship matrix from whole pedigree
+	A = 2*kinship(pedigree[,1], pedigree[,2], pedigree[,3])[children,children]
+	A_chol = chol(A)
+	for(rep in 1:num_reps){
+		counter = counter+1
+		Lambda = matrix(0,p,k)
+		numeff = sample((p/30):(p/4),k,replace=T)
+		for(h in 1:k){
+			Lambda[sample(1:p,numeff[h]),h] = rnorm(numeff[h])
+		}
+		Lambda = Lambda[,order(-diag(t(Lambda) %*% Lambda))]
+		cols=1:k
+		g_cols = factor_h2s>0
+		Lambda =Lambda[do.call("order", unname(split(-abs(Lambda[,cols[factor_h2s>0]]), col(Lambda[,cols[factor_h2s>0]])))),]
+		
+
+		E_a_prec = rep(1/0.2,p)
+		resid_Y_prec = rep(1/0.2,p)
+
+		name = paste('SampleSize',size,rep,sep='_')
+		dir = paste('Sim',counter,sep="_")
+		try(dir.create(dir))
+		setwd(dir)
+		prepare_simulation2(A_chol,A,Lambda,factor_h2s,E_a_prec,resid_Y_prec,name)
+		setwd('..')
+	}
+}
+
+#k simulations
+num_reps = 10
+p = 100
+factor_h2 = 0.5
+
+ks_size = list(Small = c(5,10), Medium = c(15,25), Large = c(30,50))
+
+nM = 100
+nI = 10
+pedigree = data.frame(ind=nM*nI + nM + 1:(nM*nI),dam=1:(nM*nI) + nM, sire = gl(nM,nI))
+pedigree<-fixPedigree(pedigree)
+children = !is.na(pedigree[,3])
+#generate A matrix as 2* kinship matrix from whole pedigree
+A = 2*kinship(pedigree[,1], pedigree[,2], pedigree[,3])[children,children]
+A_chol = chol(A)
+
+for(size in names(ks_size)){
+	for(rep in 1:num_reps){
+		k = ks_size[[size]][2]
+		counter = counter+1
+		Lambda = matrix(0,p,k)
+		numeff = sample((p/30):(p/4),k,replace=T)
+		for(h in 1:k){
+			Lambda[sample(1:p,numeff[h]),h] = rnorm(numeff[h])
+		}
+		Lambda = Lambda[,order(-diag(t(Lambda) %*% Lambda))]
+		cols=1:k
+		factor_h2s = rep(0,k)
+		factor_h2s[1:ks_size[[size]][1]] = factor_h2
+		# factor_h2s[sample(1:k,ks_size[[size]][1])] = factor_h2
+		# g_cols = factor_h2s>0
+		# Lambda =Lambda[do.call("order", unname(split(-abs(Lambda[,cols[factor_h2s>0]]), col(Lambda[,cols[factor_h2s>0]])))),]
+		
+		E_a_prec = rep(1/0.2,p)
+		resid_Y_prec = rep(1/0.2,p)
+
+		name = paste('k',size,rep,sep='_')
+		dir = paste('Sim',counter,sep="_")
+		try(dir.create(dir))
+		setwd(dir)
+		prepare_simulation2(A_chol,A,Lambda,factor_h2s,E_a_prec,resid_Y_prec,name)
+		setwd('..')
+	}
+}
+
+
+#unconstrained R
+library(MCMCpack)
+num_reps = 10
+p = 100
+kG = 5
+
+nM = 100
+nI = 10
+pedigree = data.frame(ind=nM*nI + nM + 1:(nM*nI),dam=1:(nM*nI) + nM, sire = gl(nM,nI))
+pedigree<-fixPedigree(pedigree)
+children = !is.na(pedigree[,3])
+#generate A matrix as 2* kinship matrix from whole pedigree
+A = 2*kinship(pedigree[,1], pedigree[,2], pedigree[,3])[children,children]
+A_chol = chol(A)
+
+	for(rep in 1:num_reps){
+		k = kG
+		counter = counter+1
+		Lambda = matrix(0,p,k)
+		numeff = sample((p/30):(p/4),k,replace=T)
+		for(h in 1:k){
+			Lambda[sample(1:p,numeff[h]),h] = rnorm(numeff[h])
+		}
+		Lambda = Lambda[,order(-diag(t(Lambda) %*% Lambda))]
+		cols=1:k
+		factor_h2s = rep(1,k)
+		g_cols = factor_h2s>0
+		Lambda =Lambda[do.call("order", unname(split(-abs(Lambda[,cols[factor_h2s>0]]), col(Lambda[,cols[factor_h2s>0]])))),]
+		
+
+		G = Lambda %*% diag(factor_h2s) %*% t(Lambda) + 0.2*diag(p)
+		R = rwish(p+1,diag(diag(G))/p)
+		G_chol = chol(G)
+		R_chol = chol(R)
+		h2 = diag(G)/(diag(G)+diag(R))
+		hist(h2,breaks=seq(0,1,length=30))
+		image(CovToCor(G)[,p:1])
+		image(CovToCor(R)[,p:1])
+		  
+		gen_factor_Lambda = (Lambda %*% diag(sqrt(factor_h2s)))[,factor_h2s>0]
+		error_factor_Lambda = Lambda
+
+		name = paste('Rwish',rep,sep='_')
+		dir = paste('Sim',counter,sep="_")
+		try(dir.create(dir))
+		setwd(dir)
+		prepare_simulation(G_chol,R_chol,A_chol,A,gen_factor_Lambda,error_factor_Lambda,factor_h2s,name)
+		setwd('..')
+	}
+
+
+
+#non-sparse R
+num_reps = 10
+p = 100
+kG = 5
+kR = 5
+
+nM = 100
+nI = 10
+pedigree = data.frame(ind=nM*nI + nM + 1:(nM*nI),dam=1:(nM*nI) + nM, sire = gl(nM,nI))
+pedigree<-fixPedigree(pedigree)
+children = !is.na(pedigree[,3])
+#generate A matrix as 2* kinship matrix from whole pedigree
+A = 2*kinship(pedigree[,1], pedigree[,2], pedigree[,3])[children,children]
+A_chol = chol(A)
+
+	for(rep in 1:num_reps){
+		k = kG
+		counter = counter+1
+		Lambda = matrix(0,p,k)
+		numeff = sample((p/30):(p/4),k,replace=T)
+		for(h in 1:k){
+			Lambda[sample(1:p,numeff[h]),h] = rnorm(numeff[h])
+		}
+		Lambda = Lambda[,order(-diag(t(Lambda) %*% Lambda))]
+		cols=1:k
+		factor_h2s = rep(0.5,k)
+		g_cols = factor_h2s>0
+		Lambda =Lambda[do.call("order", unname(split(-abs(Lambda[,cols[factor_h2s>0]]), col(Lambda[,cols[factor_h2s>0]])))),]
+		
+		Lambda = cbind(Lambda,matrix(rnorm(kR*p),p,kR))
+		factor_h2s = c(factor_h2s,rep(0,kR))
+		
+
+		G = Lambda %*% diag(factor_h2s) %*% t(Lambda) + 0.2*diag(p)
+		R = Lambda %*% diag(1-factor_h2s) %*% t(Lambda) + 0.2*diag(p)
+		G_chol = chol(G)
+		R_chol = chol(R)
+		h2 = diag(G)/(diag(G)+diag(R))
+		hist(h2,breaks=seq(0,1,length=30))
+		image(CovToCor(G)[,p:1])
+		image(CovToCor(R)[,p:1])
+		  
+		gen_factor_Lambda = (Lambda %*% diag(sqrt(factor_h2s)))[,factor_h2s>0]
+		error_factor_Lambda = Lambda
+
+		name = paste('R_notSparse',rep,sep='_')
+		dir = paste('Sim',counter,sep="_")
+		try(dir.create(dir))
+		setwd(dir)
+		prepare_simulation(G_chol,R_chol,A_chol,A,gen_factor_Lambda,error_factor_Lambda,factor_h2s,name)
+		setwd('..')
+	}
+
+
+
+#p simulations
+num_reps = 10
+factor_h2 = 0.5
+k = 10
+k_G = 5
+
+ks_size = list(Small = 20, Large = 1000)
+
+nM = 100
+nI = 10
+pedigree = data.frame(ind=nM*nI + nM + 1:(nM*nI),dam=1:(nM*nI) + nM, sire = gl(nM,nI))
+pedigree<-fixPedigree(pedigree)
+children = !is.na(pedigree[,3])
+#generate A matrix as 2* kinship matrix from whole pedigree
+A = 2*kinship(pedigree[,1], pedigree[,2], pedigree[,3])[children,children]
+A_chol = chol(A)
+
+for(size in names(ks_size)){
+	for(rep in 1:num_reps){
+		p = ks_size[[size]]
+		counter = counter+1
+		Lambda = matrix(0,p,k)
+		numeff = sample((p/30):(p/4),k,replace=T)
+		numeff[numeff<3]=3
+		for(h in 1:k){
+			Lambda[sample(1:p,numeff[h]),h] = rnorm(numeff[h])
+		}
+		Lambda = Lambda[,order(-diag(t(Lambda) %*% Lambda))]
+		cols=1:k
+		factor_h2s = rep(0,k)
+		factor_h2s[1:k_G] = factor_h2
+		# factor_h2s[sample(1:k,ks_size[[size]][1])] = factor_h2
+		# g_cols = factor_h2s>0
+		# Lambda =Lambda[do.call("order", unname(split(-abs(Lambda[,cols[factor_h2s>0]]), col(Lambda[,cols[factor_h2s>0]])))),]
+		
+		E_a_prec = rep(1/0.2,p)
+		resid_Y_prec = rep(1/0.2,p)
+		  
+		gen_factor_Lambda = (Lambda %*% diag(sqrt(factor_h2s)))[,factor_h2s>0]
+		error_factor_Lambda = Lambda
+
+		name = paste('p',size,rep,sep='_')
+		dir = paste('Sim',counter,sep="_")
+		try(dir.create(dir))
+		setwd(dir)
+		prepare_simulation2(A_chol,A,Lambda,factor_h2s,E_a_prec,resid_Y_prec,name)
+		setwd('..')
+	}
+}
+
diff --git a/R_BSFG/test_indep_Ablocks.R b/R_BSFG/test_indep_Ablocks.R
new file mode 100644
index 0000000..4dbe5d8
--- /dev/null
+++ b/R_BSFG/test_indep_Ablocks.R
@@ -0,0 +1,50 @@
+setwd("~/Runcie Lab/pedigree")
+data=read.csv('IDDammSirelnUpdate2.csv',header = TRUE)
+# 5
+GenerationCode = c(0,1,2,3)
+LineCode = c(1)
+
+
+# 6
+GenerationCode=3
+LineCode = c(0,2)
+
+
+# 7
+GenerationCode=3
+LineCode = c(0,3)
+
+ped <- data[,1:3]
+for(i in 1:3) ped[,i] <- as.factor(ped[,i])
+library(MCMCglmm)
+invA <- inverseA(ped)
+A <- solve(invA$Ainv)
+
+data$Gen_Code = paste(data$Generation,data$LineCode,sep='_')
+o = order(data$Gen_Code)
+
+A_order = A[o,o]
+image(A_order>0)
+
+
+generation_pos <- data$Generation %in% GenerationCode
+LineCode_pos <- data$LineCode%in%LineCode|data$LineCode==0
+data_pos <- which((generation_pos&LineCode_pos)|data$LineCode==0)
+#nodes_names <- as.factor(as.character(data$animal[data_pos]))
+n = length(data_pos)
+
+# reorder the data according to specific rows in each model
+data.trans = data
+data.trans[1:n,] = data[data_pos,]
+data.trans[185:(184+n),] = data[data_pos,]
+data.trans[366:(365+n),] = data[data_pos,]
+
+
+ped <- data.trans[,1:3]
+ped <- data[,1:3]
+for(i in 1:3) ped[,i] <- as.factor(ped[,i])
+library(MCMCglmm)
+invA2 <- inverseA(ped)#, node = data.trans$animal[1:545])
+invA <- inverseA(ped, node = data.trans$animal[1:545])
+A <- solve(invA$Ainv)
+A <- matrix(A@x, nrow<-n, ncol<-n, byrow <- FALSE)

From 2255e85f4a3e37fbf6bd68ab966b4061135ed64b Mon Sep 17 00:00:00 2001
From: "Xin Li(Lexie)" <xxrli@ucdavis.edu>
Date: Tue, 25 Apr 2017 00:07:59 -0700
Subject: [PATCH 18/18] update functions

---
 R_BSFG/BSFG_functions.R       | 48 +++++++++++++++++++++++++++++++++++
 R_BSFG/plotting_diagnostics.R | 27 ++++++++++++++++++--
 2 files changed, 73 insertions(+), 2 deletions(-)

diff --git a/R_BSFG/BSFG_functions.R b/R_BSFG/BSFG_functions.R
index 1f9dec7..e1dddab 100644
--- a/R_BSFG/BSFG_functions.R
+++ b/R_BSFG/BSFG_functions.R
@@ -630,6 +630,54 @@ G_Matrix_Comp = function(BSFG_state){
   return(GMatrix)
 }
 
+G_Matrix_comb_Comp = function(BSFG_state){
+  Posterior      = BSFG_state$Posterior
+  traitnames     = BSFG_state$traitnames
+  run_variables  = BSFG_state$run_variables
+  sp_num = ncol(Posterior$Lambda) 
+  #n = run_variables$n
+  k = 10
+  p = run_variables$p
+  n = nrow(Posterior$F_a)/k
+  k1 = nrow(Posterior$F_a)/n;
+  k2 = nrow(Posterior$Lambda)/p;
+  if (k2 >= k1){
+    k = k1
+  }else{
+    k=k2
+  }
+  h2s = Posterior$F_h2[,1:sp_num]
+  #G_Lambdas = array(0,dim = dim(Posterior$Lambda))
+  #Lambda_est = matrix(0,p,k)
+  G_est = E_est = matrix(0,p,p)
+  #traces_G = matrix(,p*(p+1)/2,sp_num)
+  #traces_G_cor = matrix(,p*(p+1)/2,sp_num)
+  #traces_E = matrix(,p*(p+1)/2,sp_num)
+  GMatrix = NULL
+  for(j in 1:sp_num) {
+    Lj = matrix(Posterior$Lambda[,j],p,k)
+    h2j = Posterior$F_h2[,j]
+    G_Lj = Lj %*%  diag(sqrt(h2j))
+    #G_Lambdas[,j] = c(G_Lj)
+    Gj = G_Lj %*%  t(G_Lj) + diag(1/Posterior$E_a_prec[,j])
+    rownames(Gj) = traitnames
+    #posterior mean
+    GMatrix[[j]] = Gj
+    G_est = G_est + Gj/sp_num
+    #library(gdata)
+    #traces_G[,j] = lowerTriangle(Gj,diag = TRUE)
+    #traces_G_cor[,j] = lowerTriangle(CovToCor(Gj),diag = TRUE)
+    
+    #E_Lj = Lj  %*% diag(1-h2j) %*%  t(Lj) + diag(1/Posterior$resid_Y_prec[,j])
+    #E_est = E_est + E_Lj/sp_num;
+    #posterior mean for lambda
+    #Lambda_est = Lambda_est + matrix(Posterior$Lambda[,j],p,k)/sp_num;
+    #traces_E[,j] = lowerTriangle(E_Lj,diag = TRUE)
+  }
+  #G_Lambda = matrix(rowMeans(G_Lambdas),p,k)
+  return(G_est)
+}
+
 G_Traces_Comp = function(BSFG_state){
   Posterior      = BSFG_state$Posterior
   traitnames     = BSFG_state$traitnames
diff --git a/R_BSFG/plotting_diagnostics.R b/R_BSFG/plotting_diagnostics.R
index a8344f1..4c35905 100644
--- a/R_BSFG/plotting_diagnostics.R
+++ b/R_BSFG/plotting_diagnostics.R
@@ -139,7 +139,7 @@ draw_simulation_diagnostics = function(sp_num,run_parameters,run_variables,Poste
     }
 }
 
-draw_results_diagnostics = function(sp_num,params,run_variables,Lambda, F_h2, Posterior,E_a_prec,resid_Y_prec,traitnames){
+draw_results_diagnostics = function(sp_num,paras,run_variables,Lambda, F_h2, Posterior,E_a_prec,resid_Y_prec,traitnames){
     # devices = dev.list()
     # while(length(devices) < 7){
     #     if(.Platform$OS.type != "windows") {
@@ -338,11 +338,34 @@ plot_diagnostics = function(BSFG_state){
   sp_num = ncol(BSFG_state$Posterior$Lambda)    
     draw_results_diagnostics(sp_num,params,run_variables,Lambda, F_h2, Posterior,E_a_prec,resid_Y_prec,traitnames)
     
-  # }
 }
 # load("BSFG_state.RData")
 # plot_diagnostics(BSFG_state)
 
+plot_diagnostics_comb = function(BSFG_state){
+  burn           = BSFG_state$run_parameters$burn
+  thin           = BSFG_state$run_parameters$thin
+  start_i        = BSFG_state$current_state$nrun
+  params         = BSFG_state$params
+  run_variables  = BSFG_state$run_variables
+  Lambda         = BSFG_state$current_state$Lambda
+  F_h2           = BSFG_state$current_state$F_h2[[pop]]
+  Posterior      = BSFG_state$Posterior
+  E_a_prec       = BSFG_state$current_state$E_a_prec[[pop]]
+  resid_Y_prec   = BSFG_state$current_state$resid_Y_prec[[pop]]
+  traitnames     = BSFG_state$traitnames
+  draw_iter      = BSFG_state$run_parameters$draw_iter
+  #   for(i in start_i+(1:n_samples)){
+  #   if( (i-burn) %% thin == 0 && i > burn) {
+  #       sp_num = (i-burn)/thin    
+  #   }
+  #     if(i %% draw_iter  == 0) {
+  sp_num = ncol(BSFG_state$Posterior$Lambda)    
+  draw_results_diagnostics(sp_num,params,run_variables,Lambda, F_h2, Posterior,E_a_prec,resid_Y_prec,traitnames)
+  
+}
+
+
 ComparingGMatrix_plot = function(target){
   #load data from the original population
   load("BSFG_state.RData")