From 9a95fd339bd4c01512fe4ec267e440c22e82e25e Mon Sep 17 00:00:00 2001 From: xin li Date: Fri, 21 Oct 2016 10:50:43 -0700 Subject: [PATCH 01/18] subtract the mean of itself --- R_BSFG/fast_BSFG_sampler_init_fixedlambda.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R_BSFG/fast_BSFG_sampler_init_fixedlambda.R b/R_BSFG/fast_BSFG_sampler_init_fixedlambda.R index b22b3fc..a092137 100644 --- a/R_BSFG/fast_BSFG_sampler_init_fixedlambda.R +++ b/R_BSFG/fast_BSFG_sampler_init_fixedlambda.R @@ -71,7 +71,7 @@ fast_BSFG_sampler_init_fixedlambda = function(priors,run_parameters,YNew,YOld){ load(paste0("../",YOld)) Y_old = setup$Y - Mean_Y = colMeans(Y_old,na.rm=T) + Mean_Y = colMeans(Y,na.rm=T) # subtract the mean of newY if we only care about the variance of F_a VY = apply(Y_old,2,var,na.rm=T) From 8568f75a0f5b6b4ad0f49f959ed7015b460919f1 Mon Sep 17 00:00:00 2001 From: xin li Date: Fri, 21 Oct 2016 10:53:51 -0700 Subject: [PATCH 02/18] do not fix B matrix --- R_BSFG/BSFG_functions.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R_BSFG/BSFG_functions.R b/R_BSFG/BSFG_functions.R index 41d430a..59f8560 100644 --- a/R_BSFG/BSFG_functions.R +++ b/R_BSFG/BSFG_functions.R @@ -430,7 +430,7 @@ save_posterior_samples_fixedlambda = function( j,Posterior,F,F_a,B,W,E_a,F_h2,re Posterior$E_a_prec[,j] = E_a_prec Posterior$W_prec[,j] = W_prec - # save B,U,W + # save U,W Posterior$E_a = (Posterior$E_a*(j-1) + E_a)/j Posterior$W = (Posterior$W*(j-1) + W)/j From 07eee42526091a87ab9cec26c4e51675a8d28c4f Mon Sep 17 00:00:00 2001 From: xin li Date: Fri, 21 Oct 2016 10:54:33 -0700 Subject: [PATCH 03/18] adding options for saving G_est --- R_BSFG/plotting_diagnostics.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R_BSFG/plotting_diagnostics.R b/R_BSFG/plotting_diagnostics.R index e2f6cf2..a8344f1 100644 --- a/R_BSFG/plotting_diagnostics.R +++ b/R_BSFG/plotting_diagnostics.R @@ -238,7 +238,7 @@ draw_results_diagnostics = function(sp_num,params,run_variables,Lambda, F_h2, Po } G_Lambda = matrix(rowMeans(G_Lambdas),p,k) #dev.off() - + save(G_est,file = "G_est.RData") # Figure of posterior mean estimates #dev.set(devices[4]) #pdf('Figure of posterior mean estimates.pdf') @@ -380,3 +380,4 @@ ComparingGMatrix_plot = function(target){ } } + From 2965af18d12eb076dcb3d2a51ee3e9c6f9f4a22b Mon Sep 17 00:00:00 2001 From: xin li Date: Fri, 21 Oct 2016 10:55:22 -0700 Subject: [PATCH 04/18] changes of ways to plots --- R_BSFG/plottting_comparing.R | 93 +++++++++++++++++++++++++++--------- 1 file changed, 70 insertions(+), 23 deletions(-) diff --git a/R_BSFG/plottting_comparing.R b/R_BSFG/plottting_comparing.R index e4ac4f6..15ab005 100644 --- a/R_BSFG/plottting_comparing.R +++ b/R_BSFG/plottting_comparing.R @@ -5,11 +5,12 @@ BSFGStar = "BSFG_fixedlambda75.RData" target = "F_h2" #it looks like we do not need so much latent factors. choose the first l factors. #choose the first l latent traits and make them a vector for all iterations -l=5 # 2:6 +l=5 # 2:6 pos_fa = function(spn,pos.fa,n){ pos.fa.rb = vector() for (i in 1:spn){ - pos.fa.rb = c(pos.fa.rb,pos.fa[1:(l*n),i]) + #pos.fa.rb = c(pos.fa.rb,pos.fa[1:(l*n),i]) + pos.fa.rb = c(pos.fa.rb,pos.fa[1:l,i]) #F_h2 } return(pos.fa.rb)} # old @@ -17,53 +18,69 @@ load(BSFGOld) spn = dim(BSFG_state$Posterior[[target]])[2] old.n = dim(BSFG_state$data_matrices$Y)[1] old.k = nrow(BSFG_state$Posterior[[target]]) -old.pos.fa = BSFG_state$Posterior$F_a +#old.pos.fa = BSFG_state$Posterior$F_a +old.pos.fh2 = BSFG_state$Posterior$F_h2 -old.pos.fa.rb = pos_fa(spn,old.pos.fa,old.n) +#old.pos.fa.rb = pos_fa(spn,old.pos.fa,old.n) +old.pos.fh2.rb = pos_fa(spn,old.pos.fh2,old.n) # new load(BSFGNew) spn = dim(BSFG_state$Posterior[[target]])[2] new.n = dim(BSFG_state$data_matrices$Y)[1] new.k = nrow(BSFG_state$Posterior[[target]]) -new.pos.fa = BSFG_state$Posterior$F_a -new.pos.fa.rb = pos_fa(spn,new.pos.fa,new.n) +#new.pos.fa = BSFG_state$Posterior$F_a +new.pos.fh2 = BSFG_state$Posterior$F_h2 +#new.pos.fa.rb = pos_fa(spn,new.pos.fa,new.n) +new.pos.fh2.rb = pos_fa(spn,new.pos.fh2,new.n) # star : represent the third comparison load(BSFGStar) spn = dim(BSFG_state$Posterior[[target]])[2] star.n = dim(BSFG_state$data_matrices$Y)[1] star.k = nrow(BSFG_state$Posterior[[target]]) -star.pos.fa = BSFG_state$Posterior$F_a -star.pos.fa.rb = pos_fa(spn,star.pos.fa,star.n) +#star.pos.fa = BSFG_state$Posterior$F_a +star.pos.fh2 = BSFG_state$Posterior$F_h2 +#star.pos.fa.rb = pos_fa(spn,star.pos.fa,star.n) +star.pos.fh2.rb = pos_fa(spn,star.pos.fh2,star.n) # combine # column "old", "new", "star" -pos.fa.b = cbind(c(old.pos.fa.rb,new.pos.fa.rb,star.pos.fa.rb),rep(c("old","new","star"),c(old.n*l*spn,new.n*l*spn,star.n*l*spn))) +#pos.fa.b = cbind(c(old.pos.fa.rb,new.pos.fa.rb,star.pos.fa.rb),rep(c("old","new","star"),c(old.n*l*spn,new.n*l*spn,star.n*l*spn))) +pos.fh2.b = data.frame(c(old.pos.fh2.rb,new.pos.fh2.rb,star.pos.fh2.rb),rep(c("old","new","star"),c(l*spn,l*spn,l*spn))) # column "factor" -pos.fa.b = cbind(pos.fa.b,c(rep(rep(sprintf("f%d",1:l),each=old.n),spn),rep(rep(sprintf("f%d",1:l),each=new.n),spn),rep(rep(sprintf("f%d",1:l),each=star.n),spn))) -colnames(pos.fa.b) = c("Fa","model", "factor") -pos.fa.b=as.data.frame(pos.fa.b) +#pos.fa.b = data.frame(pos.fa.b,c(rep(rep(sprintf("f%d",1:l),each=old.n),spn),rep(rep(sprintf("f%d",1:l),each=new.n),spn),rep(rep(sprintf("f%d",1:l),each=star.n),spn))) +pos.fh2.b = data.frame(pos.fh2.b,c(rep(sprintf("f%d",1:l),spn),rep(sprintf("f%d",1:l),spn),rep(sprintf("f%d",1:l),spn))) +#colnames(pos.fa.b) = c("Fa","model", "factor") +colnames(pos.fh2.b) = c("Fh2","model", "factor") +#pos.fa.b=as.data.frame(pos.fa.b) #save(pos.fa.b,file="pos_fa_bind.RData") -load("pos_fa_bind.RData") +save(pos.fh2.b,file="pos_fh2_bind.RData") +#load("pos_fa_bind.RData") +load("pos_fh2_bind.RData") + # plot # 1.boxplot of distribution of Fa library(ggplot2) -ggplot(pos.fa.b, aes(x = factor, y = as.numeric(Fa), color=model,fill=model))+ - geom_boxplot() - #geom_tile("The distribution of Latent traits") +ggplot(pos.fh2.b, aes(x = factor, y = Fh2, color=model,fill=model))+ + geom_boxplot()+ + ggtitle("The distribution of Latent traits") #geom_abline(intercept=0,slope=0,lty=2) -ggplot(pos.fa.b)+ - geom_density(aes(x = factor, y = as.numeric(Fa), color=model)) - #geom_tile("The distribution of Latent traits") +ggplot(pos.fh2.b,aes( x = Fh2, color=model))+ + geom_density()+ + facet_wrap(~factor,scales = "free_y")+ + ggtitle("The distribution of Latent traits") + # 2.For each trait, compare the correlation of G1 vs G2 +source('~/Runcie Lab/SparseFactorMixedModel_v2/R_BSFG/BSFG_functions.R') load(BSFGOld) -old.traces.G = G_Traces_Comp(BSFG_state) +#old.traces.G = G_Traces_Comp(BSFG_state) old.G = G_Matrix_Comp(BSFG_state) p = BSFG_state$run_variables$p # do this in case the value of k differ for this two models and there in no lambda in BSFG_state_fixed case Lambda = BSFG_state$Posterior$Lambda load(BSFGNew) +load(BSFGStar) BSFG_state$Posterior$Lambda = Lambda -new.traces.G = G_Traces_Comp(BSFG_state) +#new.traces.G = G_Traces_Comp(BSFG_state) new.G = G_Matrix_Comp(BSFG_state) #For all the elements in G #pairs(t(old.traces.G)~t(new.traces.G)) @@ -72,6 +89,10 @@ old.G = array(unlist(old.G),dim = c(nrow(old.G[[1]]),ncol(old.G[[1]]),length(old new.G = array(unlist(new.G),dim = c(nrow(new.G[[1]]),ncol(new.G[[1]]),length(new.G))) old.G.b = apply(old.G,2,rbind) new.G.b = apply(new.G,2,rbind) +save(old.G.b,file = "old_G_b.RData") +load("old_G_b.RData") +save(new.G.b,file = "new_G_b.RData") +load("new_G_b.RData") #plot(old.G.b[,2],new.G.b[,2]) #combine old.G and new.G as a dataframe #G.b = c(old.G.b[,2],new.G.b[,2]) @@ -79,6 +100,7 @@ new.G.b = apply(new.G,2,rbind) #G.b = as.data.frame(G.b) #colnames(G.b) = c("covariance","model") # 1st triat + each_trait_G_pair = function(i){ G.b = cbind(old.G.b[,i],new.G.b[,i]) G.b = as.data.frame(G.b) @@ -87,11 +109,36 @@ library(ggplot2) ggplot(G.b,aes(y = as.numeric(Old),x=as.numeric(New)))+ #geom_dotplot(binwidth = 15) geom_point()+ - geom_abline(intercept = 0,slope = 1) + geom_abline(intercept = 0,slope = 1)+ + ggtitle(paste0(BSFG_state$traitnames[i])) } +pdf("each_trait_G_pair.pdf") +#par(mfrow = c(3,3)) +for (i in 1:p){ + each_trait_G_pair(i) +} +dev.off() each_trait_G_pair(1) each_trait_G_pair(2) each_trait_G_pair(3) +each_trait_G_pair(4) +each_trait_G_pair(5) # 3.delta z # The distance of two matrix(in predicted phenotype value/level) - +#selection gradient +beta = 0.3609 +beta_v = c(0,beta,rep(0,16)) +traces_traits = matrix(,p,sp_num) +for(j in 1:sp_num) { + Lj = matrix(Posterior$Lambda[,j],p,k) + h2j = Posterior$F_h2[,j] + G_Lj = Lj %*% diag(sqrt(h2j)) + Gj = G_Lj %*% t(G_Lj) + diag(1/Posterior$E_a_prec[,j]) + Gj = sqrt(diag(VY))%*%Gj%*%t(sqrt(diag(VY))) + traces_traits[,j] = Gj%*%beta_v +} +boxplot(t(traces_traits)) +save(traces_traits,file = 'traces_traits.RData') +# For each G matrix +setwd("~/Runcie Lab/SparseFactorMixedModel_v2/MCMC/19/19/Lambda1.5_delta2shape3") +load("G_BSFG_sc.RData") From e22f2719de9e7c2596b3f822f81f0d8515f6faa1 Mon Sep 17 00:00:00 2001 From: xin li Date: Fri, 9 Dec 2016 14:42:08 -0800 Subject: [PATCH 05/18] newest version update --- R_BSFG/BSFG_functions.R | 15 +- R_BSFG/Comparing.R | 143 ++++++++ R_BSFG/G_matrix.R | 114 ++++++ R_BSFG/G_matrix_output.R | 2 +- R_BSFG/Sim_2/model_setup_generation.R | 84 +++++ R_BSFG/code_comparing_plotting4.R | 277 +++++++++++++++ R_BSFG/deltaZ_posterior_mean.R | 147 ++++++++ R_BSFG/factor_3Dplot.R | 49 ++- R_BSFG/fast_BSFG_sampler_current.R | 22 +- R_BSFG/model_setup.R | 43 ++- R_BSFG/model_setup_fixedlambda.R | 39 ++ R_BSFG/model_setup_generation.R | 70 ++++ R_BSFG/plottting_comparing.R | 489 +++++++++++++++++++++++--- R_BSFG/setup_pedigree.R | 6 +- 14 files changed, 1400 insertions(+), 100 deletions(-) create mode 100644 R_BSFG/Comparing.R create mode 100644 R_BSFG/G_matrix.R create mode 100644 R_BSFG/Sim_2/model_setup_generation.R create mode 100644 R_BSFG/code_comparing_plotting4.R create mode 100644 R_BSFG/deltaZ_posterior_mean.R create mode 100644 R_BSFG/model_setup_fixedlambda.R create mode 100644 R_BSFG/model_setup_generation.R diff --git a/R_BSFG/BSFG_functions.R b/R_BSFG/BSFG_functions.R index 59f8560..ff340d0 100644 --- a/R_BSFG/BSFG_functions.R +++ b/R_BSFG/BSFG_functions.R @@ -43,9 +43,14 @@ sample_Lambda = function( Y_tilde,F,resid_Y_prec, E_a_prec,Plam,invert_aI_bZAZ ) Lambda = matrix(0,nr = p,nc = k) for(j in 1:p) { - FUDi = E_a_prec[j] * sweep(FtU,2,(s + E_a_prec[j]/resid_Y_prec[j]),'/') - means = FUDi %*% UtY[,j] - Qlam = FUDi %*% t(FtU) + diag(Plam[j,]) + means = c() + Qlam = c() + for(pop in pops){ + FUDi = E_a_prec[[pop]][j] * sweep(FtU[[pop]],2,(s[[pop]] + E_a_prec[[pop]][j]/resid_Y_prec[j]),'/') + means = means + FUDi %*% UtY[[pop]][,j] + Qlam = Qlam + FUDi %*% t(FtU[[pop]]) + } + Qlam = Qlam + diag(Plam[j,]) # recover() Llam = t(chol(Qlam)) @@ -336,10 +341,12 @@ update_k = function( F,Lambda,F_a,F_h2,Lambda_prec,Plam,delta,tauh,Z_W,Lambda_df tauh = cumprod(delta) Plam = sweep(Lambda_prec,2,tauh,'*') Lambda = cbind(Lambda,rnorm(p,0,sqrt(1/Plam[,k]))) + F_h2[k] = runif(1) F_a = cbind(F_a,rnorm(r,0,sqrt(F_h2[k]))) F = cbind(F,rnorm(n,Z_W %*% F_a[,k],sqrt(1-F_h2[k]))) - } else if(num > 0) { # drop redundant columns + + } else if(num > 0) { # drop redundant columns nonred = which(vec == 0) # non-redundant loadings columns Lambda = Lambda[,nonred] Lambda_prec = Lambda_prec[,nonred] diff --git a/R_BSFG/Comparing.R b/R_BSFG/Comparing.R new file mode 100644 index 0000000..895ab8e --- /dev/null +++ b/R_BSFG/Comparing.R @@ -0,0 +1,143 @@ +# First method +# Fixed lambda +# all the BSFG G matrix +load("~/../Desktop/G_BSFG_sc.RData") +# 5 G matrix +BSFG.sc5 = G_BSFG_sc[5] + +model_path = "~/Runcie Lab/SparseFactorMixedModel_v2/R_BSFG" +library(Rcpp) +library(RcppArmadillo) +source(paste(model_path,'BSFG_functions.R',sep='/')) +sourceCpp(paste(model_path,'BSFG_functions_c.cpp',sep='/')) +source(paste(model_path,'fast_BSFG_sampler_init_fixedlambda.R',sep='/')) +source(paste(model_path,'fast_BSFG_sampler_fixedlambda.R',sep='/')) + +run_parameters = list( + b0 = 1, + b1 = 0.0005, + epsilon = 1e-2, + prop = 1.00, + h2_divisions = 50, + save_freq = 100, + burn = 2000, #100 + thin = 400, #2 + draw_iter = 200 +) +# 5 BSFG_state +setwd("~/Runcie Lab/SparseFactorMixedModel_v2/MCMC/5/5/Lambda1.5_delta2shape3") +load("BSFG_state.RData") +priors = BSFG_state$priors + +YNew="setup6.RData" +YOld="setup5.RData" +BSFG_state = fast_BSFG_sampler_init_fixedlambda(priors,run_parameters,YNew,YOld) +BSFG_state = fast_BSFG_sampler_fixedlambda(BSFG_state,YNew,YOld) + +#load("BSFG_fixedlambda65.RData") +BSFGOld = "BSFG_state.RData" +BSFGNew = "BSFG_fixedlambda65.RData" +BSFG3 = +# Comparisons: + + load(BSFGOld) + spn = dim(BSFG_state$Posterior[[target]])[2] + old.n = dim(BSFG_state$data_matrices$Y)[1] + old.k = nrow(BSFG_state$Posterior[[target]]) + #pos = BSFG_state$Posterior[[target]][,spn] + #pos = BSFG_state$Posterior[[target]] + old.pos.fa = BSFG_state$Posterior$F_a + old.pos.fa.rb = vector() + l=5 + for (i in 1:spn){ + old.pos.fa.rb = c(old.pos.fa.rb,old.pos.fa[1:(l*old.n),i]) + } + #combine posterior of F_a into a big matrix + + #old.pos.fa.matrix = matrix(0,nr=0,nc=l) + #for (i in 1:spn){ + # old.fa.matrix = matrix(old.pos.fa[,i],nr=old.n,nc=k) + # old.pos.fa.matrix = rbind(old.pos.fa.matrix,old.fa.matrix[,1:l]) + #} + #boxplot(old.pos.fa.matrix) + # combine a column which specify the latent factor + + + load(BSFGNew) + spn = dim(BSFG_state$Posterior[[target]])[2] + new.n = dim(BSFG_state$data_matrices$Y)[1] + new.k = nrow(BSFG_state$Posterior[[target]]) + #pos = BSFG_state$Posterior[[target]][,spn] + #pos = BSFG_state$Posterior[[target]] + new.pos.fa = BSFG_state$Posterior$F_a + #combine posterior of F_a into a big matrix + l=5 + new.pos.fa.rb = vector() + for (i in 1:spn){ + new.pos.fa.rb = c(new.pos.fa.rb,new.pos.fa[1:(l*new.n),i]) + } + #new.pos.fa.matrix = matrix(0,nr=0,nc=l) + #for (i in 1:spn){ + # new.fa.matrix = matrix(new.pos.fa[,i],nr=new.n,nc=k) + # new.pos.fa.matrix = rbind(new.pos.fa.matrix,new.fa.matrix[,1:l]) + #} + #boxplot(new.pos.fa.matrix) + + + +#combine old pos.fa.matrix and new pos.fa.matrix into one dataframe +# use ggplot to plot +#it looks like we do not need so much latent factors. choose the first8 factors. + #pos.fa.matrix = cbind(rbind(old.pos.fa.matrix,new.pos.fa.matrix),rep(c("old","new"),c(old.n*spn,new.n*spn))) + # column "old", "new" + pos.fa.b = cbind(c(old.pos.fa.rb,new.pos.fa.rb),rep(c("old","new"),c(old.n*l*spn,new.n*l*spn))) + # column "factor" + pos.fa.b = cbind(pos.fa.b,c(rep(rep(sprintf("f%d",1:l),each=old.n),spn),rep(rep(sprintf("f%d",1:l),each=new.n),spn))) + colnames(pos.fa.b) = c("Fa","model", "factor") + pos.fa.b=as.data.frame(pos.fa.b) + #pos.fa.sample = sample(1:nrow(pos.fa.b),200*(old.n+new.n)) + #pos.fa.sample = pos.fa.b[pos.fa.sample,] + #plot boxplot of distribution of Fa + library(ggplot2) + ggplot(pos.fa.b,aes(x = factor, y = as.numeric(Fa), color=model,fill=model))+ + geom_boxplot()+ + geom_abline(a=0,b=0,lty=2) + # For each trait, compare the correlation of G1 vs G2 + + # + + + if (target!="F_h2"){ + pos = matrix(pos,nr=n) + k = nrow(BSFG_state$Posterior[[target]])/n + } + #load data from new population + load(BSFGNew) + n = dim(BSFG_state$data_matrices$Y)[1] + star = BSFG_state$Posterior[[target]][,spn] + if (target!="F_h2"){ + star = matrix(star,nr=n) + pdf(sprintf("comparing_%s_densityplot.pdf",target)) + for(i in 1:k){ + plot(density(pos[,i]),main = sprintf("%s %d",target,i),col = "blue",type = "l",xlab = "#obs") + # plot(density(F_a_pos[,i]),main = sprintf("%d",i),col = "blue",type = "l",ylim = c(min(F_a_pos)-5,max(F_a_pos)+5),xlab = "#obs") + lines(density(star[,i]),col="red",type = "l") + abline + legend("topright",legend = c("original","new"),col = c("blue","red"),text.col = c("blue","red"),bty = "n",pch = 1) + } + dev.off() + }else{ + pdf(sprintf("comparing_%s_densityplot.pdf",target)) + plot(density(pos),main = sprintf("%s",target),col = "blue",type = "l",xlab = "#obs") + # plot(density(F_a_pos[,i]),main = sprintf("%d",i),col = "blue",type = "l",ylim = c(min(F_a_pos)-5,max(F_a_pos)+5),xlab = "#obs") + lines(density(star),col="red",type = "l") + abline + legend("topright",legend = c("original","new"),col = c("blue","red"),text.col = c("blue","red"),bty = "n",pch = 1) + dev.off() + } +} + +ComparingGMatrix_plot("F_h2",BSFGNew,BSFGOld) + + + diff --git a/R_BSFG/G_matrix.R b/R_BSFG/G_matrix.R new file mode 100644 index 0000000..4b5756c --- /dev/null +++ b/R_BSFG/G_matrix.R @@ -0,0 +1,114 @@ +CovToCor = function(X){ + # Normalize a covariance matrix into a correlation matrix + corX=diag(1/sqrt(diag(X))) %*% X %*% diag(1/sqrt(diag(X))); + return(corX) +} + +run_variables = BSFG_state$run_variables +Posterior = BSFG_state$Posterior +E_a_prec = BSFG_state$current_state$E_a_prec +resid_Y_prec = BSFG_state$current_state$resid_Y_prec +traitnames = BSFG_state$traitnames + +sp_num = ncol(BSFG_state$Posterior$Lambda) +p = run_variables$p +k = nrow(Posterior$Lambda)/p; + +G_Lambdas = array(0,dim = dim(Posterior$Lambda)) +Lambda_est = matrix(0,p,k) +G_est = E_est = matrix(0,p,p) +traces_G = matrix(,p*(p+1)/2,sp_num) +diag_G = matrix(,p,sp_num) +traces_G_cor = matrix(,p*(p+1)/2,sp_num) +traces_E = matrix(,p*(p+1)/2,sp_num) +diag_E = matrix(,p,sp_num) + + + +for(j in 1:sp_num) { + Lj = matrix(Posterior$Lambda[,j],p,k) + h2j = Posterior$F_h2[,j] + G_Lj = Lj %*% diag(sqrt(h2j)) + G_Lambdas[,j] = c(G_Lj) + Gj = G_Lj %*% t(G_Lj) + diag(1/Posterior$E_a_prec[,j]) + G_est = G_est + Gj/sp_num + traces_traits[,j] = Gj%*%beta_v + library(gdata) + traces_G[,j] = lowerTriangle(Gj,diag = TRUE) + diag_G[,j] = diag(Gj) + traces_G_cor[,j] = lowerTriangle(CovToCor(Gj),diag = TRUE) + + E_Lj = Lj %*% diag(1-h2j) %*% t(Lj) + diag(1/Posterior$resid_Y_prec[,j]) + E_est = E_est + E_Lj/sp_num; + Lambda_est = Lambda_est + matrix(Posterior$Lambda[,j],p,k)/sp_num; + traces_E[,j] = lowerTriangle(E_Lj,diag = TRUE) + diag_E[,j] = diag(E_Lj) +} +G_Lambda = matrix(rowMeans(G_Lambdas),p,k) + + + +load("C:/Users/Xin~/Desktop/setup_LineCode1.RData") +Y = setup$Y +VY = apply(Y,2,var,na.rm=T) + +#selection gradient +beta = 0.3609 +beta_v = c(0,beta,rep(0,16)) +traces_traits = matrix(,p,sp_num) +for(j in 1:sp_num) { + Lj = matrix(Posterior$Lambda[,j],p,k) + h2j = Posterior$F_h2[,j] + G_Lj = Lj %*% diag(sqrt(h2j)) + Gj = G_Lj %*% t(G_Lj) + diag(1/Posterior$E_a_prec[,j]) + Gj = sqrt(diag(VY))%*%Gj%*%t(sqrt(diag(VY))) + traces_traits[,j] = Gj%*%beta_v +} +boxplot(t(traces_traits)) +save(traces_traits,file = 'traces_traits.RData') + +#heritabiity of posterior mean +G_est1 = sqrt(diag(VY))%*%G_est%*%t(sqrt(diag(VY))) +save(G_est1,file = "G_est_posterior.RData") + +E_est1 = sqrt(diag(VY))%*%E_est%*%t(sqrt(diag(VY))) +heritability = diag(G_est1)/diag(G_est1+E_est1) +names(heritability) = traitnames +plot(heritability) + +#heritability confidence interval 95% +diag_E1 = sweep(diag_E,1,VY,'*') +diag_G1 = sweep(diag_G,1,VY,'*') +P = diag_G1+diag_E1 +trace_h2 = matrix(mapply(function(x,y)x/y, diag_G1,P),nr=p) + +CI_h2 = t(apply(trace_h2,1,function(x)quantile(x,c(0.025,0.975)))) +q_h2 = cbind(CI_h2,heritability) +colnames(q_h2)[3] = "posterior" +save(q_h2, file = "q_h2.RData") + +#G_matrix confidence interval 95% +CI_G = t(apply(traces_G,1,function(x)quantile(x,c(0.025,0.975)))) +q_G = cbind(CI_G,lowerTriangle(G_est,diag = TRUE)) +colnames(q_G)[3] = "posterior" + +#2.5% +G_low = matrix(,nr=p,nc=p) +G_low[lower.tri(G_low,diag=TRUE)]=q_G[,1] +G_low[upper.tri(G_low)]=upperTriangle(t(G_low)) +G_low1 = sqrt(diag(VY))%*%G_low%*%t(sqrt(diag(VY))) +save(G_low1,file = "G_est_low.RData") +#97.5% +G_high = matrix(,nr=p,nc=p) +G_high[lower.tri(G_high,diag=TRUE)]=q_G[,2] +G_high[upper.tri(G_high)]=upperTriangle(t(G_high)) +G_high1 = sqrt(diag(VY))%*%G_high%*%t(sqrt(diag(VY))) +save(G_high1,file = "G_est_high.RData") + +library(lattice) +levelplot(CovToCor(G_est1)) + +load("C:/Users/Xin~/Desktop/Gmatrix_MCMCglmm.r") +library(gdata) +plot(upperTriangle(G_est1),upperTriangle(G),xlab = "BSFG",ylab = "MCMCglm") +lines(x = c(-2,100), y = c(-2,100)) diff --git a/R_BSFG/G_matrix_output.R b/R_BSFG/G_matrix_output.R index 8c9be4d..71aef1e 100644 --- a/R_BSFG/G_matrix_output.R +++ b/R_BSFG/G_matrix_output.R @@ -21,7 +21,7 @@ setwd(sprintf('~/Runcie Lab/SparseFactorMixedModel_v2/MCMC/%d/%d',i,i)) setwd("Lambda1.5_delta2shape3") load("BSFG_state.RData") # Scaled G Matrix -G_BSFG_sc[[i]] = G_Matrix_Comp(BSFG_state) +G_BSFG_sc[i] = G_Matrix_Comp(BSFG_state) G_est_all = G_BSFG_sc[[i]] sp_num = length(G_est_all) p = BSFG_state$run_variables$P diff --git a/R_BSFG/Sim_2/model_setup_generation.R b/R_BSFG/Sim_2/model_setup_generation.R new file mode 100644 index 0000000..cd739a8 --- /dev/null +++ b/R_BSFG/Sim_2/model_setup_generation.R @@ -0,0 +1,84 @@ +setwd("~/Runcie Lab/pedigree") + +data=read.csv('IDDammSirelnUpdate2.csv',header = TRUE) +source("~/Runcie Lab/SparseFactorMixedModel_v2/R_BSFG/setup_pedigree.R") +setup=setup_pedigree_GeneLine(data,GenerationCode=c(0,1),LineCode=1) + + +setwd("~/Runcie Lab/SparseFactorMixedModel_v2/R_BSFG") +model_path = "~/Runcie Lab/SparseFactorMixedModel_v2/R_BSFG" +library(Rcpp) +library(RcppArmadillo) +source(paste(model_path,'fast_BSFG_sampler_init.R',sep='/')) +#source(paste(model_path,'fast_BSFG_sampler_current.R',sep='/')) +source(paste(model_path,'BSFG_functions.R',sep='/')) +source(paste(model_path,'plotting_diagnostics.R',sep='/')) +#source(paste(model_path,'setup_pedigree.R',sep='/')) +sourceCpp(paste(model_path,'BSFG_functions_c.cpp',sep='/')) +#source(paste(model_path,'fast_BSFG_sampler_init_fixedlambda.R',sep='/')) +#source(paste(model_path,'fast_BSFG_sampler_fixedlambda.R',sep='/')) +source(paste(model_path,'fast_BSFG_sampler_generation.R',sep='/')) + +setwd('Sim_3') + +folder = "generation0" +folder = "Line1Gene1" +folder = "Line1Gene01" +try(dir.create(folder)) +setwd(folder) + +# initialize priors +run_parameters = list( + b0 = 1, + b1 = 0.0005, + epsilon = 1e-2, + prop = 1.00, + h2_divisions = 50, + save_freq = 100, + burn = 2000, #100 + thin = 400, #2 + draw_iter = 200 +) + +#load("./priors.RData") +priors = list( + k_init = 20, + resid_Y_prec_shape = 2, + resid_Y_prec_rate = 1/10, + E_a_prec_shape = 2, + E_a_prec_rate = 1/10, + W_prec_shape = 2, + W_prec_rate = 1/10, + Lambda_df = 1.5, # 3/2, 3, 1 + delta_1_shape = 2.1, + delta_1_rate = 1/20, + delta_2_shape = 3, # 3, 3/2, 9/2 + delta_2_rate = 1, + h2_priors_factors = c(run_parameters$h2_divisions-1,rep(1,run_parameters$h2_divisions-1))/(2*(run_parameters$h2_divisions-1)) +) + +print('Initializing') +save(priors,file = 'Priors.RData') + +# Initialize Chain, prep runs +BSFG_state = fast_BSFG_sampler_init(priors,run_parameters) +save(BSFG_state,file="BSFG_state.RData") + +n_samples = 4000; +for(i in 1:40) { + print(sprintf('Run %d',i)) + BSFG_state = fast_BSFG_sampler_generation(BSFG_state,n_samples) + print(i) +} +#filenames = list.files("~/Runcie Lab/pedigree",pattern="*.RData") +#setwd("~/Runcie Lab/pedigree") +#allfiles = lapply(filenames,function(x){get(load(x))}) +#names(allfiles) = sapply(filenames ,function(x)substr(x,1,(nchar(x)-6))) + +for (i in 1:4){ + load(filenames[i]) +} + +a = load(filenames[1]) + A= get(a) + \ No newline at end of file diff --git a/R_BSFG/code_comparing_plotting4.R b/R_BSFG/code_comparing_plotting4.R new file mode 100644 index 0000000..b64c288 --- /dev/null +++ b/R_BSFG/code_comparing_plotting4.R @@ -0,0 +1,277 @@ +source('~/Runcie Lab/SparseFactorMixedModel_v2/R_BSFG/BSFG_functions.R') +setwd("~/Runcie Lab/SparseFactorMixedModel_v2/MCMC") +MCMC = readRDS("G_MCMC.Rdata") + +traitnames = rownames(MCMC[[1]][[1]]) + +old.G = MCMC[[5]] + +p = 18 +spn = 2000 + +new.G = MCMC[[1]] + +star.G = MCMC[[8]] + +four.G = MCMC[[12]] + +# The distance of two matrix(in predicted phenotype value/level) +#selection gradient +beta = 0.3609 +beta.v = c(0,beta,rep(0,16)) +traces.traits = matrix(,p,spn) + +#old.G in list form +old.traces.traits = sapply(old.G,function(x)x%*%beta.v) +#old.traits.posmean = apply(old.traces.traits,1,mean) +new.traces.traits = sapply(new.G,function(x)x%*%beta.v) +#new.traits.posmean = apply(new.traces.traits,1,mean) +star.traces.traits = sapply(star.G,function(x)x%*%beta.v) +four.traces.traits = sapply(four.G,function(x)x%*%beta.v) +#star.traits.posmean = apply(star.traces.traits,1,mean) +#boxplot(old.traces_traits[2,]) +#boxplot(t(old.traces.traits)) +#boxplot(new.traces_traits[2,]) +#boxplot(t(new.traces.traits)) +# use ggplot +traces.traits.df = data.frame(c(c(old.traces.traits),c(new.traces.traits),c(star.traces.traits),c(four.traces.traits))) +traces.traits.df$model = rep(c("old","new","star","four"),c(p*spn,p*spn,p*spn,p*spn)) + +traces.traits.df$model = factor(traces.traits.df$model, level=c("old","new","star","four")) +#traces.traits.df$model = factor(traces.traits.df$model, level=c("star","new","old","four")) + +traces.traits.df$traits = rep(traitnames,4*spn) +names(traces.traits.df) = c("response","model","traits") + +summarySE <- function(data=NULL, measurevar, groupvars=NULL, na.rm=FALSE, + conf.interval=.95, .drop=TRUE) { + library(plyr) + + # New version of length which can handle NA's: if na.rm==T, don't count them + length2 <- function (x, na.rm=FALSE) { + if (na.rm) sum(!is.na(x)) + else length(x) + } + + # This does the summary. For each group's data frame, return a vector with + # N, mean, and sd + datac <- ddply(data, groupvars, .drop=.drop, + .fun = function(xx, col) { + c( + q1 = quantile (xx[[col]],0.025, na.rm=na.rm)- mean (xx[[col]], na.rm=na.rm), + q2 = quantile (xx[[col]],0.975, na.rm=na.rm)- mean (xx[[col]], na.rm=na.rm), + mean = mean (xx[[col]], na.rm=na.rm) + + ) + }, + measurevar + ) + + # Rename the "mean" column + datac <- rename(datac, c("mean" = measurevar)) + + #datac$se <- datac$sd / sqrt(datac$N) # Calculate standard error of the mean + + # Confidence interval multiplier for standard error + # Calculate t-statistic for confidence interval: + # e.g., if conf.interval is .95, use .975 (above/below), and use df=N-1 + #ciMult <- qt(conf.interval/2 + .5, datac$N-1) + #datac$ci <- datac$se * ciMult + + return(datac) +} + +traces.traits.df.sum = summarySE(traces.traits.df,measurevar = "response",groupvars = c("model","traits")) +names(traces.traits.df.sum) = c("model","traits","q1","q2","response") + + +#save(traces.traits.df.sum,file = "plot6.RData") + + +library(ggplot2) +#ggplot(traces.traits.df,aes(x =traits,y=response,col=model,fill=model))+ +# geom_boxplot() +#plot(x=rep(1:18,2),y=c(old.traits.posmean,new.traits.posmean),col=rep(c(1,2),eac#h=18),type = "o") +pd <- position_dodge(width = 0.6) +#load("plot5.RData") +ggplot(traces.traits.df.sum,aes(x = traits, y = response, color=model))+ + ylab("Response")+ + geom_errorbar(aes(ymin=q1+response, ymax=q2+response), position=pd)+ + geom_point(position=pd,size=1) + + +#5. PCA decomposition + +old.traces.eigen = sapply(old.G, function(x)eigen(x)$values) +new.traces.eigen = sapply(new.G, function(x)eigen(x)$values) +star.traces.eigen = sapply(star.G, function(x)eigen(x)$values) +four.traces.eigen = sapply(four.G, function(x)eigen(x)$values) + +traces.eigen.df = data.frame(c(c(old.traces.eigen),c(new.traces.eigen),c(star.traces.eigen),c(four.traces.eigen))) +traces.eigen.df$model = rep(c("old","new","star","four"),c(p*spn,p*spn,p*spn,p*spn)) + +traces.eigen.df$model = factor(traces.eigen.df$model, level=c("old","new","star","four")) +#traces.eigen.df$model = factor(traces.eigen.df$model, level=c("star","new","old","four")) + +traces.eigen.df$lambda = rep(paste0("PC",1:18),4*spn) +names(traces.eigen.df) = c("response","model","PC") + +traces.eigen.df.sum = summarySE(traces.eigen.df,measurevar = "response",groupvars = c("model","PC")) +names(traces.eigen.df.sum) = c("model","PC","q1","q2","response") + +# reorder the PC value in the data +traces.eigen.df.sum$PC = factor(traces.eigen.df.sum$PC,level=paste0("PC",1:18)) + +# size dataframe +old.traces.size = apply(old.traces.eigen,2,sum) +new.traces.size = apply(new.traces.eigen,2,sum) +star.traces.size = apply(star.traces.eigen,2,sum) +four.traces.size = apply(four.traces.eigen,2,sum) +size.df = data.frame(old.traces.size,new.traces.size,star.traces.size,four.traces.size) +library(reshape2) +size.df = melt(size.df,value.name = "Size") +size.df$variable = factor(size.df$variable,levels = c("star.traces.size","new.traces.size","old.traces.size","four.traces.size")) + + +old.traces.pc1c = apply(old.traces.eigen,2,function(x)sum(x[2])/sum(x)) +new.traces.pc1c = apply(new.traces.eigen,2,function(x)sum(x[2])/sum(x)) +star.traces.pc1c = apply(star.traces.eigen,2,function(x)sum(x[2])/sum(x)) +four.traces.pc1c = apply(four.traces.eigen,2,function(x)sum(x[2])/sum(x)) + +# PC1 +old.traces.pc1c = apply(old.traces.eigen,2,function(x)x[1]/sum(x)) +new.traces.pc1c = apply(new.traces.eigen,2,function(x)x[1]/sum(x)) +star.traces.pc1c = apply(star.traces.eigen,2,function(x)x[1]/sum(x)) +four.traces.pc1c = apply(four.traces.eigen,2,function(x)x[1]/sum(x)) +pc1c.df = data.frame(old.traces.pc1c,new.traces.pc1c,star.traces.pc1c,four.traces.pc1c) +library(reshape2) +pc1c.df = melt(pc1c.df,value.name = "PC1.Contribution") +pc1c.df$variable = factor(pc1c.df$variable,levels = c("star.traces.pc1c","new.traces.pc1c","old.traces.pc1c","four.traces.pc1c")) + + +# size of each model +library(dplyr) +traces.eigen.df.sum %>% + group_by(model) %>% + summarize(size = sum(response)) + +library(ggplot2) +ggplot(size.df, aes(x="",y=Size, fill=variable))+ + geom_boxplot(alpha=0.85)+ + theme(axis.title.x = element_blank()) + + +ggplot(pc1c.df, aes(x="",y=PC1.Contribution, fill=variable))+ + geom_boxplot(alpha=0.85)+ + theme(axis.title.x = element_blank()) + + +pd <- position_dodge(width = 0.6) + +ggplot(traces.eigen.df.sum,aes(x = PC, y = response, color=model))+ + ylab("Variance")+ + geom_errorbar(aes(ymin=q1+response, ymax=q2+response), position=pd)+ + geom_point(position=pd,size=1)+ + xlab("Principle Components") + + +# 6. angle and length of deltaZ and first eigenvector + +# distribution within population +dist_within_theta = function(traces.traits){ + theta = vector(,spn) + for (i in 1:spn){ + pairs = randcomb(1:spn, 2) + a = traces.traits[,pairs[1]] + b = traces.traits[,pairs[2]] + a.length = sqrt(sum(a * a)) + b.length = sqrt(sum(b * b)) + theta[i] = acos(sum(a*b) / (a.length * b.length)) + } + return(theta) +} + +# first eigen +old.traces.vector = sapply(old.G, function(x)eigen(x)$vector[,1]) +new.traces.vector = sapply(new.G, function(x)eigen(x)$vector[,1]) +star.traces.vector = sapply(star.G, function(x)eigen(x)$vector[,1]) +four.traces.vector = sapply(four.G, function(x)eigen(x)$vector[,1]) + +old.theta = dist_within_theta(old.traces.vector) +new.theta = dist_within_theta(new.traces.vector) +star.theta = dist_within_theta(star.traces.vector) +four.theta = dist_within_theta(four.traces.vector) +# deltaZ +old.theta = dist_within_theta(old.traces.traits) #old.traces.traits is from deltaZ_posterior_mean +new.theta = dist_within_theta(new.traces.traits) +star.theta = dist_within_theta(star.traces.traits) +four.theta = dist_within_theta(four.traces.traits) + +# distribution between population +dist_between_theta = function(Atraces.traits,Btraces.traits){ + theta = vector(,spn) + for (i in 1:spn){ + pairs = randcomb(1:spn, 2) + a = Atraces.traits[,pairs[1]] + b = Btraces.traits[,pairs[2]] + a.length = sqrt(sum(a * a)) + b.length = sqrt(sum(b * b)) + theta[i] = acos(sum(a*b) / (a.length * b.length)) + } + return(theta) +} + +# first eigen vector +old.new.theta = dist_between_theta(old.traces.vector,new.traces.vector) +# K-S test +#ks.test(old.theta,old.new.theta) #different +old.star.theta = dist_between_theta(old.traces.vector,star.traces.vector) +four.star.theta = dist_between_theta(four.traces.vector,star.traces.vector) +new.star.theta = dist_between_theta(new.traces.vector,star.traces.vector) +new.four.theta = dist_between_theta(new.traces.vector,four.traces.vector) +old.four.theta = dist_between_theta(old.traces.vector,four.traces.vector) + +# deltaZ +old.new.theta = dist_between_theta(old.traces.traits,new.traces.traits) +# K-S test +#ks.test(old.theta,old.new.theta) #different +old.star.theta = dist_between_theta(old.traces.traits,star.traces.traits) +new.star.theta = dist_between_theta(new.traces.traits,star.traces.traits) +#-------------------------------- +# plot +theta.df = data.frame(old.theta,new.theta,star.theta,four.theta, old.new.theta, old.star.theta, new.star.theta,four.star.theta,new.four.theta,old.four.theta) + +library(reshape2) +theta.df = melt(theta.df, value.name = "Degree") +names(theta.df) = c("labels", "theta") +ggplot(theta.df, aes(x = theta, col=labels))+ + geom_density(aes(fill=labels),alpha=0.1) +#+scale_color_manual(breaks = c("old.theta","new.theta","star.theta","old.new.theta","old.star.theta","new.star.theta"), +# values=rainbow(6)) + +#------------------------------- +# 7.length of deltaZ + +length_deltaZ = function(traces.traits){ + #posmean.deltaZ = apply(traces.traits,1,mean) + #deltaZ.length = sqrt(sum(posmean.deltaZ * posmean.deltaZ)) + deltaZ.length = apply(traces.traits,2,function(x)sqrt(sum(x * x))) + return(deltaZ.length) +} +old.deltaZ.length = length_deltaZ(old.traces.traits) +new.deltaZ.length = length_deltaZ(new.traces.traits) +star.deltaZ.length = length_deltaZ(star.traces.traits) +four.deltaZ.length = length_deltaZ(four.traces.traits) + +deltaZ.length.df = data.frame(old.deltaZ.length,new.deltaZ.length,star.deltaZ.length,four.deltaZ.length) +library(reshape2) +deltaZ.length.df = melt(deltaZ.length.df,value.name = "Length") +deltaZ.length.df$variable = factor(deltaZ.length.df$variable,levels = c("star.deltaZ.length","new.deltaZ.length","old.deltaZ.length","four.deltaZ.length")) + + + +ggplot(deltaZ.length.df, aes(x="",y=Length, fill=variable))+ + geom_boxplot(alpha=0.85)+ + theme(axis.title.x = element_blank()) + + diff --git a/R_BSFG/deltaZ_posterior_mean.R b/R_BSFG/deltaZ_posterior_mean.R new file mode 100644 index 0000000..5810972 --- /dev/null +++ b/R_BSFG/deltaZ_posterior_mean.R @@ -0,0 +1,147 @@ +source('~/Runcie Lab/SparseFactorMixedModel_v2/R_BSFG/BSFG_functions.R') +setwd("~/Runcie Lab/SparseFactorMixedModel_v2/MCMC/5/5/Lambda1.5_delta2shape3") +BSFGOld = "BSFG_state.RData" +#BSFGNew = "BSFG_fixedlambda65.RData" +#BSFGStar = "BSFG_fixedlambda75.RData" +BSFGNew = "BSFG_state6.RData" +BSFGStar = "BSFG_state7.RData" +target = "F_h2" +load(BSFGOld) +traitnames = BSFG_state$traitnames +#old.traces.G = G_Traces_Comp(BSFG_state) +old.G = G_Matrix_Comp(BSFG_state) +p = BSFG_state$run_variables$p +spn = dim(BSFG_state$Posterior[[target]])[2] +# posterior mean +# do this in case the value of k differ for this two models and there in no lambda in BSFG_state_fixed case +#Lambda = BSFG_state$Posterior$Lambda +load(BSFGNew) +#BSFG_state$Posterior$Lambda = Lambda +new.G = G_Matrix_Comp(BSFG_state) + +load(BSFGStar) +#BSFG_state$Posterior$Lambda = Lambda +star.G = G_Matrix_Comp(BSFG_state) +#new.traces.G = G_Traces_Comp(BSFG_state) + + +# The distance of two matrix(in predicted phenotype value/level) +#selection gradient +beta = 0.3609 +beta.v = c(0,beta,rep(0,16)) +traces.traits = matrix(,p,spn) + +#old.G in list form +old.traces.traits = sapply(old.G,function(x)x%*%beta.v) +#old.traits.posmean = apply(old.traces.traits,1,mean) +new.traces.traits = sapply(new.G,function(x)x%*%beta.v) +#new.traits.posmean = apply(new.traces.traits,1,mean) +star.traces.traits = sapply(star.G,function(x)x%*%beta.v) +#star.traits.posmean = apply(star.traces.traits,1,mean) +#boxplot(old.traces_traits[2,]) +#boxplot(t(old.traces.traits)) +#boxplot(new.traces_traits[2,]) +#boxplot(t(new.traces.traits)) +# use ggplot +traces.traits.df = data.frame(c(c(old.traces.traits),c(new.traces.traits),c(star.traces.traits))) +traces.traits.df$model = rep(c("old","new","star"),c(p*spn,p*spn,p*spn)) + +traces.traits.df$model = factor(traces.traits.df$model, level=c("old","new","star")) +traces.traits.df$model = factor(traces.traits.df$model, level=c("star","new","old")) + +traces.traits.df$traits = rep(traitnames,3*spn) +names(traces.traits.df) = c("response","model","traits") + +summarySE <- function(data=NULL, measurevar, groupvars=NULL, na.rm=FALSE, + conf.interval=.95, .drop=TRUE) { + library(plyr) + + # New version of length which can handle NA's: if na.rm==T, don't count them + length2 <- function (x, na.rm=FALSE) { + if (na.rm) sum(!is.na(x)) + else length(x) + } + + # This does the summary. For each group's data frame, return a vector with + # N, mean, and sd + datac <- ddply(data, groupvars, .drop=.drop, + .fun = function(xx, col) { + c( + q1 = quantile (xx[[col]],0.025, na.rm=na.rm)- mean (xx[[col]], na.rm=na.rm), + q2 = quantile (xx[[col]],0.975, na.rm=na.rm)- mean (xx[[col]], na.rm=na.rm), + mean = mean (xx[[col]], na.rm=na.rm) + + ) + }, + measurevar + ) + + # Rename the "mean" column + datac <- rename(datac, c("mean" = measurevar)) + + #datac$se <- datac$sd / sqrt(datac$N) # Calculate standard error of the mean + + # Confidence interval multiplier for standard error + # Calculate t-statistic for confidence interval: + # e.g., if conf.interval is .95, use .975 (above/below), and use df=N-1 + #ciMult <- qt(conf.interval/2 + .5, datac$N-1) + #datac$ci <- datac$se * ciMult + + return(datac) +} + +traces.traits.df.sum = summarySE(traces.traits.df,measurevar = "response",groupvars = c("model","traits")) +names(traces.traits.df.sum) = c("model","traits","q1","q2","response") + + +save(traces.traits.df.sum,file = "plot6.RData") + + +library(ggplot2) +#ggplot(traces.traits.df,aes(x =traits,y=response,col=model,fill=model))+ +# geom_boxplot() +#plot(x=rep(1:18,2),y=c(old.traits.posmean,new.traits.posmean),col=rep(c(1,2),eac#h=18),type = "o") +pd <- position_dodge(width = 0.6) +load("plot5.RData") +plot5 <- ggplot(traces.traits.df.sum,aes(x = traits, y = response, color=model))+ + scale_color_manual(breaks =rev(c("star","new","old")), + values=rev(c( "blue","red2","grey0")),labels=rev(c("Low Line","High Line","Control Line")))+ + ylab("Response")+ + geom_errorbar(aes(ymin=q1+response, ymax=q2+response), position=pd)+ + geom_point(position=pd,size=1)+ + theme(axis.text=element_text(size=10), + axis.title=element_text(size=15),legend.text=element_text(size=12), + legend.title=element_text(size=12))+ + labs(fill = "G (F3)",color = "G (F3)") + + +library(ggplot2) +#ggplot(traces.traits.df,aes(x =traits,y=response,col=model,fill=model))+ +# geom_boxplot() +#plot(x=rep(1:18,2),y=c(old.traits.posmean,new.traits.posmean),col=rep(c(1,2),eac#h=18),type = "o") +#pd <- position_dodge(width = 0.3) +load("plot6.RData") +plot6<- ggplot(traces.traits.df.sum,aes(x = traits, y = response, color=model))+ + scale_color_manual(breaks = c("star","new","old"), + values=c("grey60","grey30","grey0"),labels=c("F1","F2","F3"))+ + ylab("Response")+ + geom_errorbar(aes(ymin=q1+response, ymax=q2+response), position=pd)+ + geom_point(position=pd,size=1)+ + theme(axis.text=element_text(size=10), + axis.title=element_text(size=15),legend.text=element_text(size=12), + legend.title=element_text(size=12))+ + labs(fill = "G (Control)",color = "G (Control)") + + +plots1=list() + plots1=lapply(c(1,3),function(i) get(paste0("plot",i))) +plots2=list() +plots2=lapply(c(2,4),function(i) get(paste0("plot",i))) +plots3=list() +plots3=lapply(c(5,6),function(i) get(paste0("plot",i))) +plots4=list(g11,g22) +library(gridExtra) +ggsave("BSFG_comparison_plots1.pdf", marrangeGrob(grobs = plots1, nrow=2, ncol=1,top = NULL),width = 297, height = 210, units = "mm") +ggsave("BSFG_comparison_plots2.pdf", marrangeGrob(grobs = plots2, nrow=2, ncol=1,top = NULL),width = 297, height = 210, units = "mm") +ggsave("BSFG_comparison_plots3.pdf", marrangeGrob(grobs = plots3, nrow=2, ncol=1,top = NULL),width = 297, height = 210, units = "mm") +ggsave("EigenvaluesComparison.pdf", marrangeGrob(grobs = plots4, nrow=2, ncol=1,top = NULL),width = 297, height = 210, units = "mm") diff --git a/R_BSFG/factor_3Dplot.R b/R_BSFG/factor_3Dplot.R index 8caa2de..95407c9 100644 --- a/R_BSFG/factor_3Dplot.R +++ b/R_BSFG/factor_3Dplot.R @@ -1,5 +1,6 @@ -setwd('~/Runcie Lab/SparseFactorMixedModel_v2/R_BSFG/Sim_1/R_rep_test') -setwd('~/Runcie Lab/SparseFactorMixedModel_v2/R_BSFG/Sim_1/R_rep_fixedlambda') +#setwd('~/Runcie Lab/SparseFactorMixedModel_v2/R_BSFG/Sim_1/R_rep_test') +#setwd('~/Runcie Lab/SparseFactorMixedModel_v2/R_BSFG/Sim_1/R_rep_fixedlambda') +setwd('~/../Desktop/5/Lambda1.5_delta2shape3') #function for plotting 3D plot rgl_add_axes <- function(x, y, z, axis.col = "grey", @@ -46,13 +47,13 @@ F_a_old = matrix(F_a_old[,ncol(F_a_old)],nr=n_old) #Y_sd = apply(na.omit(BSFG_state$data_matrices$Y),2,sd) #F_a_old = apply(F_a_old,2,function(x) x*Y_sd+Y_mean) -load('BSFG_fixedlambda2.RData') +load('BSFG_fixedlambda65.RData') F_a_new2 = BSFG_state$Posterior$F_a n_new2 = BSFG_state$run_variables$n F_a_new2 = matrix(F_a_new2[,ncol(F_a_new2)],nr=n_new2) #F_a_new = F_a_new*Y_sd+Y_mean -load('BSFG_fixedlambda3.RData') +load('BSFG_fixedlambda75.RData') F_a_new3 = BSFG_state$Posterior$F_a n_new3 = BSFG_state$run_variables$n F_a_new3 = matrix(F_a_new3[,ncol(F_a_new3)],nr=n_new3) @@ -61,7 +62,9 @@ F_a_new3 = matrix(F_a_new3[,ncol(F_a_new3)],nr=n_new3) -#======================================================= +#=========================================== +#---------- 3D plot Function---------------- +#=========================================== factor_3Dplot = function(x1,x2,x3){ if(exists('n_new3')){ @@ -94,15 +97,19 @@ factor_3Dplot = function(x1,x2,x3){ movie3d(spin3d(axis = c(0, 0, 1)), duration = 10,dir = getwd()) } -#------------------------------------------- -#plot~~~~~ +#--------------------------------- +#------------- 3D plot ----------- +#--------------------------------- #latent factor we choose x1=1 -x2=5 -x3=10 +x2=2 +x3=3 factor_3Dplot(x1,x2,x3) - -#How much contribution for each of these factors +#--------------------------------- +#---Contribution of ith Factor---- +#--------------------------------- +# How much contribution for each of these factors +# You can load different BSFG_state here load('BSFG_state.RData') p = BSFG_state$run_variables$p @@ -113,8 +120,28 @@ Lambda = matrix(Lambda[,ncol(Lambda)],nr=p) Lambda_x1 = t(Lambda[,x1])%*%Lambda[,x1] Lambda_x2 = t(Lambda[,x2])%*%Lambda[,x2] Lambda_x3 = t(Lambda[,x3])%*%Lambda[,x3] + +# The way to get G_est: +# Run plot_diagnostics(BSFG_state) +# Be careful: The BSFG_state is the one you load in line113 +# a file called 'G_est.RData' will be saved in the current work-directory +# load('G_est.RData') S = sum(diag(G_est)) + #The proportion of total sample variance explained by the i-th factor Lambda_x1/S Lambda_x2/S Lambda_x3/S + +#=================================== +#-------- Unscale G_est------------ +#=================================== +# Load setup dataset which is used for BSFG_state you load before +load('../setup5.RData') +Y = setup$Y +n = nrow(Y) +p = ncol(Y) +Y_missing = is.na(Y) # matrix recording missing data in Y +Mean_Y = colMeans(Y,na.rm=T) +VY = apply(Y,2,var,na.rm=T) +G_est_unscale =sqrt(diag(VY))%*%G_est%*%sqrt(diag(VY)) diff --git a/R_BSFG/fast_BSFG_sampler_current.R b/R_BSFG/fast_BSFG_sampler_current.R index efd1c14..497daf2 100644 --- a/R_BSFG/fast_BSFG_sampler_current.R +++ b/R_BSFG/fast_BSFG_sampler_current.R @@ -185,7 +185,7 @@ fast_BSFG_sampler_current = function(BSFG_state,n_samples) { # recover() # -----Sample Lambda------------------ # - #conditioning on W, B, F, marginalizing over E_a + #conditioning on W, B, F, marginalizing over E _a Y_tilde = Y - X %*% B - Z_2 %*% W Y_tilde = as.matrix(Y_tilde) # Lambda = sample_Lambda( Y_tilde,F,resid_Y_prec, E_a_prec,Plam,invert_aI_bZAZ ) @@ -201,15 +201,17 @@ fast_BSFG_sampler_current = function(BSFG_state,n_samples) { # -----Sample B and E_a--------------- # #conditioning on W, F, Lambda - Y_tilde = Y - F %*% t(Lambda) - Z_2 %*% W - Y_tilde = as.matrix(Y_tilde) - # location_sample = sample_means( Y_tilde, resid_Y_prec, E_a_prec, invert_aPXA_bDesignDesignT ) - location_sample = sample_means_c( Y_tilde, resid_Y_prec, E_a_prec, invert_aPXA_bDesignDesignT ) - B = location_sample[1:b,] - if(length(B)==p){ - B = matrix(0,nr=0,nc=p) - } - E_a = location_sample[b+(1:r),] + for£¨pop in pops){ + Y_tilde = Y - F %*% t(Lambda) - Z_2 %*% W + Y_tilde = as.matrix(Y_tilde) + # location_sample = sample_means( Y_tilde, resid_Y_prec, E_a_prec, invert_aPXA_bDesignDesignT ) + location_sample = sample_means_c( Y_tilde, resid_Y_prec, E_a_prec, invert_aPXA_bDesignDesignT ) + B = location_sample[1:b,] + if(length(B)==p){ + B = matrix(0,nr=0,nc=p) + } + E_a = location_sample[b+(1:r),] + } # -----Sample W ---------------------- # #conditioning on B, E_a, F, Lambda diff --git a/R_BSFG/model_setup.R b/R_BSFG/model_setup.R index bffa643..af1f218 100644 --- a/R_BSFG/model_setup.R +++ b/R_BSFG/model_setup.R @@ -1,32 +1,38 @@ -setwd("~/Runcie Lab/SparseFactorMixedModel/R_BSFG") +setwd("~/Runcie Lab/SparseFactorMixedModel_v2/R_BSFG") #model_path = '~/Box Sync/DER_projects/BSFG/R_BSFG' #model_path = "../SparseFactorMixedModel/R_BSFG" -model_path = "~/Runcie Lab/SparseFactorMixedModel/R_BSFG" +#model_path = "~/Runcie Lab/SparseFactorMixedModel_v2/R_BSFG" + +model_path = "~/Runcie Lab/SparseFactorMixedModel_v2/R_BSFG" source(paste(model_path,'fast_BSFG_sampler_init.R',sep='/')) source(paste(model_path,'fast_BSFG_sampler_current.R',sep='/')) source(paste(model_path,'BSFG_functions.R',sep='/')) -source(paste(model_path,'fast_BSFG_sampler_fixedlambda.R',sep='/')) -source(paste(model_path,'fast_BSFG_sampler_init_fixedlambda.R',sep='/')) source(paste(model_path,'plotting_diagnostics.R',sep='/')) +source(paste(model_path,'setup_pedigree.R',sep='/')) +sourceCpp(paste(model_path,'BSFG_functions_c.cpp',sep='/')) +source(paste(model_path,'fast_BSFG_sampler_init_fixedlambda.R',sep='/')) +source(paste(model_path,'fast_BSFG_sampler_fixedlambda.R',sep='/')) +source(paste(model_path,'fast_BSFG_sampler_generation.R',sep='/')) + library(Rcpp) library(RcppArmadillo) -sourceCpp(paste(model_path,'BSFG_functions_c.cpp',sep='/')) # set the directory to the location of the setup.RData or setup.mat file #setwd('Example_simulation') -setwd('Sim_1') +setwd('1') # choose a seed for the random number generator. This can be a random seed (for analysis), or you can choose your seed so that # you can repeat the MCMC exactly #seed = sample(1:1e3,1) #set.seed(seed) # create a folder for holding the posterior samples of the current chain (multiple folders could be used for different chains) -rep = "Lambda1.5_delta2shape4.5" #original +folder = "Lambda1.5_delta2shape3" #original #rep = "fixedlambda1" -#rep = "test9" +rep = "test666" +folder = "generation0" folder = sprintf('R_rep_%s',rep) try(dir.create(folder)) setwd(folder) @@ -54,10 +60,10 @@ priors = list( E_a_prec_rate = 1/10, W_prec_shape = 2, W_prec_rate = 1/10, - Lambda_df = 3, # 3/2, 3, 1 + Lambda_df = 1.5, # 3/2, 3, 1 delta_1_shape = 2.1, delta_1_rate = 1/20, - delta_2_shape = 4.5, # 3, 3/2, 9/2 + delta_2_shape = 3, # 3, 3/2, 9/2 delta_2_rate = 1, h2_priors_factors = c(run_parameters$h2_divisions-1,rep(1,run_parameters$h2_divisions-1))/(2*(run_parameters$h2_divisions-1)) ) @@ -83,30 +89,33 @@ save(BSFG_state,file="BSFG_state.RData") # Run Gibbs sampler. Run in smallish chunks. Output can be used to re-start chain where it left off. # c1=fix(clock); n_samples = 4000; -for(i in 1:50) { +for(i in 1:1) { print(sprintf('Run %d',i)) BSFG_state = fast_BSFG_sampler_current(BSFG_state,n_samples) + BSFG_state = fast_BSFG_sampler_generation(BSFG_state,n_samples) print(i) } ## function only for plotting +setwd('~/Runcie Lab/SparseFactorMixedModel/R_BSFG/R_rep/R_tune_5') load("BSFG_state.RData") plot_diagnostics(BSFG_state) #fixed lambda #load("BSFG_state.RData") #priors = BSFG_state$priors -BSFG_state = fast_BSFG_sampler_init_fixedlambda(priors,run_parameters,YNew="setup_LineCode1.RData",YOld="setup_LineCode1.RData") +BSFG_state = fast_BSFG_sampler_init_fixedlambda(priors,run_parameters,YNew="setup_LineCode2.RData",YOld="setup_LineCode1.RData") load("BSFG_fixedlambda.RData") BSFG_state = fast_BSFG_sampler_fixedlambda(BSFG_state) +setwd('~/Runcie Lab/SparseFactorMixedModel/R_BSFG/Sim_1/R_rep_test') +ComparingGMatrix_plot("F_a") -ComparingGMatrix_plot("F_h2") - - - - +setwd('~/../Desktop') +load('BSFG_state.RData') +data = read.csv('ScentDataAll (2).csv') +setup = setup_pedigree(data,3) diff --git a/R_BSFG/model_setup_fixedlambda.R b/R_BSFG/model_setup_fixedlambda.R new file mode 100644 index 0000000..615723b --- /dev/null +++ b/R_BSFG/model_setup_fixedlambda.R @@ -0,0 +1,39 @@ +#model_path = "~/Runcie Lab/SparseFactorMixedModel_v2/R_BSFG" +model_path="." +library(Rcpp) +library(RcppArmadillo) +source(paste(model_path,'BSFG_functions.R',sep='/')) +sourceCpp(paste(model_path,'BSFG_functions_c.cpp',sep='/')) +source(paste(model_path,'fast_BSFG_sampler_init_fixedlambda.R',sep='/')) +source(paste(model_path,'fast_BSFG_sampler_fixedlambda.R',sep='/')) + +run_parameters = list( + b0 = 1, + b1 = 0.0005, + epsilon = 1e-2, + prop = 1.00, + h2_divisions = 50, + save_freq = 100, + burn = 2000, #100 + thin = 400, #2 + draw_iter = 200 +) + +# For the 1st comparation: choose 5 as old BSFG_state. Pairs(5,6; 5,7) +# For the 2nd comparation: choose 5 as old BSFG_state. Pairs(5,14; 5,17) +setwd('~/../Desktop/5/Lambda1.5_delta2shape3') +# fixed lambda +# load 'BSFG_state' as the old BSFG_state +load("BSFG_state.RData") +priors = BSFG_state$priors + +# YNew and YOld dataset should be in the upper directory of current dir +# 'setup datasets' will have the same file names by using setup_pedigree.R function :'setup.RData'. +# Need to change the names of 'setup dataset' files as the example below +YNew="setup6.RData" +YOld="setup5.RData" +BSFG_state = fast_BSFG_sampler_init_fixedlambda(priors,run_parameters,YNew,YOld) + +# load 'BSFG_fixedlambda' as the new BSFG_state +BSFG_state = fast_BSFG_sampler_fixedlambda(BSFG_state,YNew,YOld) +load("BSFG_fixedlambda.RData") diff --git a/R_BSFG/model_setup_generation.R b/R_BSFG/model_setup_generation.R new file mode 100644 index 0000000..2cf6708 --- /dev/null +++ b/R_BSFG/model_setup_generation.R @@ -0,0 +1,70 @@ +setwd("~/Runcie Lab/pedigree") + +data=read.csv('IDDammSirelnUpdate(1).csv',header = TRUE) +source("~/Runcie Lab/pedigree/setup_pedigree_GeneLine.R") +setup=setup_pedigree_GeneLine(data,GenerationCode=0,LineCode=c(1,2,3)) + + +setwd("~/Runcie Lab/SparseFactorMixedModel_v2/R_BSFG") +model_path = "~/Runcie Lab/SparseFactorMixedModel_v2/R_BSFG" +source(paste(model_path,'fast_BSFG_sampler_init.R',sep='/')) +source(paste(model_path,'fast_BSFG_sampler_current.R',sep='/')) +source(paste(model_path,'BSFG_functions.R',sep='/')) +source(paste(model_path,'plotting_diagnostics.R',sep='/')) +source(paste(model_path,'setup_pedigree.R',sep='/')) +sourceCpp(paste(model_path,'BSFG_functions_c.cpp',sep='/')) +source(paste(model_path,'fast_BSFG_sampler_init_fixedlambda.R',sep='/')) +source(paste(model_path,'fast_BSFG_sampler_fixedlambda.R',sep='/')) +source(paste(model_path,'fast_BSFG_sampler_generation.R',sep='/')) + +library(Rcpp) +library(RcppArmadillo) + +setwd('Sim_2') +folder = "generation0" +try(dir.create(folder)) +setwd(folder) + +# initialize priors +run_parameters = list( + b0 = 1, + b1 = 0.0005, + epsilon = 1e-2, + prop = 1.00, + h2_divisions = 50, + save_freq = 100, + burn = 2000, #100 + thin = 400, #2 + draw_iter = 200 +) + +#load("./priors.RData") +priors = list( + k_init = 20, + resid_Y_prec_shape = 2, + resid_Y_prec_rate = 1/10, + E_a_prec_shape = 2, + E_a_prec_rate = 1/10, + W_prec_shape = 2, + W_prec_rate = 1/10, + Lambda_df = 1.5, # 3/2, 3, 1 + delta_1_shape = 2.1, + delta_1_rate = 1/20, + delta_2_shape = 3, # 3, 3/2, 9/2 + delta_2_rate = 1, + h2_priors_factors = c(run_parameters$h2_divisions-1,rep(1,run_parameters$h2_divisions-1))/(2*(run_parameters$h2_divisions-1)) +) + +print('Initializing') +save(priors,file = 'Priors.RData') + +# Initialize Chain, prep runs +BSFG_state = fast_BSFG_sampler_init(priors,run_parameters) +save(BSFG_state,file="BSFG_state.RData") + +n_samples = 4000; +for(i in 1:10) { + print(sprintf('Run %d',i)) + BSFG_state = fast_BSFG_sampler_generation(BSFG_state,n_samples) + print(i) +} diff --git a/R_BSFG/plottting_comparing.R b/R_BSFG/plottting_comparing.R index 15ab005..1e76a67 100644 --- a/R_BSFG/plottting_comparing.R +++ b/R_BSFG/plottting_comparing.R @@ -1,11 +1,38 @@ setwd("~/Runcie Lab/SparseFactorMixedModel_v2/MCMC/5/5/Lambda1.5_delta2shape3") -BSFGOld = "BSFG_state.RData" -BSFGNew = "BSFG_fixedlambda65.RData" -BSFGStar = "BSFG_fixedlambda75.RData" +BSFGOld = "BSFG_state1.RData" +BSFGNew = "BSFG_state2.RData" +BSFGStar = "BSFG_state3.RData" target = "F_h2" + +#function change list into a array +L_to_A = function(l){ + l.to.a = array(unlist(l),dim = c(nrow(l[[1]]),ncol(l[[1]]),length(l))) + return(l.to.a) +} + +#To interpret lambda/latent factors +#Lambda.array = L_to_A(BSFG_state$Posterior$Lambda) +#take posterior mean +Lambda.pos = apply(Lambda,1,mean) +Lambda.pos = matrix(Lambda.pos,nr=p) + +Lambda.pos[,2]=(abs(Lambda.pos[,1])+abs(Lambda.pos[,2]))/2 +Lambda.pos=Lambda.pos[,-1] + +rownames(Lambda.pos) = BSFG_state$traitnames +pdf("interpret_lambda1.pdf") +par(mfrow=c(3,3)) +for (i in 1:17){ + names(Lambda.pos[,i]) = rownames(Lambda.pos) +plot(Lambda.pos[,i], xlab = paste0(i),col=1:18,) + +abline(0,0) +} +dev.off() +save(Lambda.pos,file="LambdaMatrix.RData") #it looks like we do not need so much latent factors. choose the first l factors. #choose the first l latent traits and make them a vector for all iterations -l=5 # 2:6 +l=8 # 2:6 pos_fa = function(spn,pos.fa,n){ pos.fa.rb = vector() for (i in 1:spn){ @@ -17,28 +44,33 @@ pos_fa = function(spn,pos.fa,n){ load(BSFGOld) spn = dim(BSFG_state$Posterior[[target]])[2] old.n = dim(BSFG_state$data_matrices$Y)[1] -old.k = nrow(BSFG_state$Posterior[[target]]) +old.k = nrow(BSFG_state$Posterior[[target]])-1 #old.pos.fa = BSFG_state$Posterior$F_a old.pos.fh2 = BSFG_state$Posterior$F_h2 - +old.pos.fh2[2,] = (old.pos.fh2[1,]+old.pos.fh2[2,])/2 +old.pos.fh2 = old.pos.fh2[-1,] #old.pos.fa.rb = pos_fa(spn,old.pos.fa,old.n) old.pos.fh2.rb = pos_fa(spn,old.pos.fh2,old.n) # new load(BSFGNew) spn = dim(BSFG_state$Posterior[[target]])[2] new.n = dim(BSFG_state$data_matrices$Y)[1] -new.k = nrow(BSFG_state$Posterior[[target]]) +new.k = nrow(BSFG_state$Posterior[[target]])-1 #new.pos.fa = BSFG_state$Posterior$F_a new.pos.fh2 = BSFG_state$Posterior$F_h2 +new.pos.fh2[2,] = (new.pos.fh2[1,]+new.pos.fh2[2,])/2 +new.pos.fh2 = new.pos.fh2[-1,] #new.pos.fa.rb = pos_fa(spn,new.pos.fa,new.n) new.pos.fh2.rb = pos_fa(spn,new.pos.fh2,new.n) # star : represent the third comparison load(BSFGStar) spn = dim(BSFG_state$Posterior[[target]])[2] star.n = dim(BSFG_state$data_matrices$Y)[1] -star.k = nrow(BSFG_state$Posterior[[target]]) +star.k = nrow(BSFG_state$Posterior[[target]])-1 #star.pos.fa = BSFG_state$Posterior$F_a star.pos.fh2 = BSFG_state$Posterior$F_h2 +star.pos.fh2[2,] = (star.pos.fh2[1,]+star.pos.fh2[2,])/2 +star.pos.fh2 = star.pos.fh2[-1,] #star.pos.fa.rb = pos_fa(spn,star.pos.fa,star.n) star.pos.fh2.rb = pos_fa(spn,star.pos.fh2,star.n) # combine @@ -52,32 +84,155 @@ pos.fh2.b = data.frame(pos.fh2.b,c(rep(sprintf("f%d",1:l),spn),rep(sprintf("f%d" colnames(pos.fh2.b) = c("Fh2","model", "factor") #pos.fa.b=as.data.frame(pos.fa.b) #save(pos.fa.b,file="pos_fa_bind.RData") -save(pos.fh2.b,file="pos_fh2_bind.RData") +#save(pos.fh2.b,file="pos_fh2_bind.RData") #load("pos_fa_bind.RData") -load("pos_fh2_bind.RData") +#load("pos_fh2_bind.RData") +pos.fh2.b$model = factor(pos.fh2.b$model, level=c("old","new","star")) +pos.fh2.b$model = factor(pos.fh2.b$model, level=c("star","new","old")) +summarySE <- function(data=NULL, measurevar, groupvars=NULL, na.rm=FALSE, + conf.interval=.95, .drop=TRUE) { + library(plyr) + + # New version of length which can handle NA's: if na.rm==T, don't count them + length2 <- function (x, na.rm=FALSE) { + if (na.rm) sum(!is.na(x)) + else length(x) + } + + # This does the summary. For each group's data frame, return a vector with + # N, mean, and sd + datac <- ddply(data, groupvars, .drop=.drop, + .fun = function(xx, col) { + c( + q0 = min(xx[[col]], na.rm=na.rm), + q1 = quantile (xx[[col]],0.025, na.rm=na.rm), + q2 = median(xx[[col]], na.rm=na.rm), + q3 = quantile (xx[[col]],0.975, na.rm=na.rm), + q4 = max (xx[[col]], na.rm=na.rm) + + ) + }, + measurevar + ) + + # Rename the "mean" column + # datac <- rename(datac, c("mmm" = measurevar)) + + #datac$se <- datac$sd / sqrt(datac$N) # Calculate standard error of the mean + + # Confidence interval multiplier for standard error + # Calculate t-statistic for confidence interval: + # e.g., if conf.interval is .95, use .975 (above/below), and use df=N-1 + #ciMult <- qt(conf.interval/2 + .5, datac$N-1) + #datac$ci <- datac$se * ciMult + + return(datac) +} + +pos.fh2.b.sum = summarySE(pos.fh2.b,measurevar = "Fh2",groupvars = c("model","factor")) + +names(pos.fh2.b.sum) = c("model","factor","q0","q1" ,"q2","q3","q4" ) + +save(pos.fh2.b,file = "plot3.RData") + +save(pos.fh2.b.sum,file="plot4.RData") # plot -# 1.boxplot of distribution of Fa +# 1.boxplot of distribution of Fh2 library(ggplot2) -ggplot(pos.fh2.b, aes(x = factor, y = Fh2, color=model,fill=model))+ - geom_boxplot()+ - ggtitle("The distribution of Latent traits") +load("plot1.RData") +plot1 <- ggplot(pos.fh2.b, aes(x = factor, y = Fh2, fill=model,color=model))+ + geom_boxplot(alpha = 0.6,position="dodge")+ + scale_color_manual(breaks =rev(c("star","new","old")), + values=rev(c( "blue","red2","grey0")),labels=rev(c("Low Line","High Line","Control Line")))+ + scale_fill_manual(breaks =rev(c("star","new","old")), + values=rev(c( "blue","red2","grey0")),labels=rev(c("Low Line","High Line","Control Line")))+ + ylab("Variance")+ + theme(axis.text=element_text(size=10), + axis.title=element_text(size=15),legend.text=element_text(size=12), + legend.title=element_text(size=12))+ + labs(fill = "G (F3)",color = "G (F3)") + + +# 95% boxplot +load("plot2.RData") +plot2<- ggplot(pos.fh2.b.sum, aes(x = factor, color=model,fill=model))+ + geom_boxplot(alpha = 0.5,aes(ymin=q0,lower=q1,middle=q2,upper=q3,ymax=q4),stat = "identity")+ + scale_color_manual(breaks =rev(c("star","new","old")), + values=rev(c( "blue","red2","grey0")),labels=rev(c("Low Line","High Line","Control Line")))+ + scale_fill_manual(breaks =rev(c("star","new","old")), + values=rev(c( "blue","red2","grey0")),labels=rev(c("Low Line","High Line","Control Line")))+ + #geom_boxplot(alpha = 0.5)+ + ylab("Variance")+ + theme(axis.text=element_text(size=10), + axis.title=element_text(size=15),legend.text=element_text(size=12), + legend.title=element_text(size=12))+ + labs(fill = "G (F3)",color = "G (F3)") + #geom_abline(intercept=0,slope=0,lty=2) +load("plot3.RData") +plot3<- ggplot(pos.fh2.b, aes(x = factor,y = Fh2, color=model,fill=model))+ + geom_boxplot(alpha = 0.5)+ + #geom_boxplot(alpha = 0.5,aes(ymin=q0,lower=q1,middle=q2,upper=q3,ymax=q4),stat = "identity")+ + scale_color_manual(breaks = c("star","new","old"), + values=c("grey60","grey30","grey0"),labels=c("F1","F2","F3"))+ + scale_fill_manual(breaks = c("star","new","old"), + values=c("grey60","grey30","grey0"),labels=c("F1","F2","F3"))+ + ylab("Variance")+ + theme(axis.text=element_text(size=10), + axis.title=element_text(size=15),legend.text=element_text(size=12), + legend.title=element_text(size=12),legend.margin=unit(0.455,"cm"))+ + labs(fill = "G (Control)",color = "G (Control)") -ggplot(pos.fh2.b,aes( x = Fh2, color=model))+ - geom_density()+ - facet_wrap(~factor,scales = "free_y")+ - ggtitle("The distribution of Latent traits") +load("plot4.RData") +plot4 <- ggplot(pos.fh2.b.sum, aes(x = factor, color=model,fill=model))+ + #geom_boxplot(alpha = 0.5)+ + geom_boxplot(alpha = 0.5,aes(ymin=q0,lower=q1,middle=q2,upper=q3,ymax=q4),stat = "identity")+ + scale_color_manual(breaks = c("star","new","old"), + values=c("grey60","grey30","grey0"),labels=c("F1","F2","F3"))+ + scale_fill_manual(breaks = c("star","new","old"), + values=c("grey60","grey30","grey0"),labels=c("F1","F2","F3"))+ + ylab("Variance")+ + theme(axis.text=element_text(size=10), + axis.title=element_text(size=15),legend.text=element_text(size=12), + legend.title=element_text(size=12),legend.margin=unit(0.455,"cm"))+ + labs(fill = "G (Control)",color = "G (Control)") +# percentage og heribility +load('BSFG_state.RData') +p = BSFG_state$run_variables$p +Lambda = BSFG_state$Posterior$Lambda +Lambda = matrix(Lambda[,ncol(Lambda)],nr=p) +old.G = G_Matrix_Comp(BSFG_state) + +G_pos = function(G){ +G = array(unlist(G),dim = c(nrow(G[[1]]),ncol(G[[1]]),length(G))) +G.pos = apply(G,c(1,2),mean) +return(G.pos) +} +old.G.pos = G_pos(old.G) +#the size of x1, x2, x3 column + +#Contribution of ith Factor +con_factor = function(l,G.pos){ +Lambda_l = t(Lambda[,l])%*%Lambda[,l] +S = sum(diag(G.pos)) +return(Lambda_l/S) +} + +con_factor(3,old.G.pos) +sapply(1:8,function(x)con_factor(x,old.G.pos)) # 2.For each trait, compare the correlation of G1 vs G2 source('~/Runcie Lab/SparseFactorMixedModel_v2/R_BSFG/BSFG_functions.R') load(BSFGOld) #old.traces.G = G_Traces_Comp(BSFG_state) old.G = G_Matrix_Comp(BSFG_state) p = BSFG_state$run_variables$p +# posterior mean # do this in case the value of k differ for this two models and there in no lambda in BSFG_state_fixed case Lambda = BSFG_state$Posterior$Lambda load(BSFGNew) +load("BSFG_state7.RData") load(BSFGStar) BSFG_state$Posterior$Lambda = Lambda #new.traces.G = G_Traces_Comp(BSFG_state) @@ -85,14 +240,16 @@ new.G = G_Matrix_Comp(BSFG_state) #For all the elements in G #pairs(t(old.traces.G)~t(new.traces.G)) #For each trait + old.G = array(unlist(old.G),dim = c(nrow(old.G[[1]]),ncol(old.G[[1]]),length(old.G))) new.G = array(unlist(new.G),dim = c(nrow(new.G[[1]]),ncol(new.G[[1]]),length(new.G))) -old.G.b = apply(old.G,2,rbind) -new.G.b = apply(new.G,2,rbind) -save(old.G.b,file = "old_G_b.RData") -load("old_G_b.RData") -save(new.G.b,file = "new_G_b.RData") -load("new_G_b.RData") +old.G.pos = apply(old.G,c(1,2),mean) +new.G.pos = apply(new.G,c(1,2),mean) +#new.G.b = apply(new.G,2,rbind) +#save(old.G.pos,file = "old_G_b.RData") +#load("old_G_b.RData") +#save(new.G.pos,file = "new_G_b.RData") +#load("new_G_b.RData") #plot(old.G.b[,2],new.G.b[,2]) #combine old.G and new.G as a dataframe #G.b = c(old.G.b[,2],new.G.b[,2]) @@ -101,44 +258,266 @@ load("new_G_b.RData") #colnames(G.b) = c("covariance","model") # 1st triat -each_trait_G_pair = function(i){ -G.b = cbind(old.G.b[,i],new.G.b[,i]) -G.b = as.data.frame(G.b) -colnames(G.b) = c("Old","New") -library(ggplot2) -ggplot(G.b,aes(y = as.numeric(Old),x=as.numeric(New)))+ +#variance +par(mfrow=c(1,2)) +plot(diag(new.G.pos),diag(old.G.pos),main = "Variance") +abline(0,1) +#covariance +diag(new.G.pos)=NA +diag(old.G.pos)=NA +plot(old.G.pos,new.G.pos, main="Covariance") +abline(0,1) +par(mfrow=c(1,1)) +#each_trait_G_pair = function(i){ +#G.b = cbind(old.G.b[,i],new.G.b[,i]) +#G.b = as.data.frame(G.b) +#colnames(G.b) = c("Old","New") +#library(ggplot2) +#ggplot(G.b,aes(y = as.numeric(Old),x=as.numeric(New)))+ #geom_dotplot(binwidth = 15) - geom_point()+ - geom_abline(intercept = 0,slope = 1)+ - ggtitle(paste0(BSFG_state$traitnames[i])) -} -pdf("each_trait_G_pair.pdf") +# geom_point()+ +# geom_abline(intercept = 0,slope = 1)+ +# ggtitle(paste0(BSFG_state$traitnames[i])) +#} +#pdf("each_trait_G_pair.pdf") #par(mfrow = c(3,3)) -for (i in 1:p){ - each_trait_G_pair(i) -} -dev.off() -each_trait_G_pair(1) -each_trait_G_pair(2) -each_trait_G_pair(3) -each_trait_G_pair(4) -each_trait_G_pair(5) +#for (i in 1:p){ +# each_trait_G_pair(i) +#} +#dev.off() +#each_trait_G_pair(1) +#each_trait_G_pair(2) +#each_trait_G_pair(3) +#each_trait_G_pair(4) +#each_trait_G_pair(5) + # 3.delta z +source('~/Runcie Lab/SparseFactorMixedModel_v2/R_BSFG/BSFG_functions.R') +load(BSFGOld) +#old.traces.G = G_Traces_Comp(BSFG_state) +old.G = G_Matrix_Comp(BSFG_state) +p = BSFG_state$run_variables$p +# posterior mean +# do this in case the value of k differ for this two models and there in no lambda in BSFG_state_fixed case +Lambda = BSFG_state$Posterior$Lambda +load(BSFGNew) +load("BSFG_state6.RData") +load(BSFGStar) +BSFG_state$Posterior$Lambda = Lambda +#new.traces.G = G_Traces_Comp(BSFG_state) +new.G = G_Matrix_Comp(BSFG_state) + # The distance of two matrix(in predicted phenotype value/level) #selection gradient beta = 0.3609 -beta_v = c(0,beta,rep(0,16)) -traces_traits = matrix(,p,sp_num) -for(j in 1:sp_num) { - Lj = matrix(Posterior$Lambda[,j],p,k) - h2j = Posterior$F_h2[,j] - G_Lj = Lj %*% diag(sqrt(h2j)) - Gj = G_Lj %*% t(G_Lj) + diag(1/Posterior$E_a_prec[,j]) - Gj = sqrt(diag(VY))%*%Gj%*%t(sqrt(diag(VY))) - traces_traits[,j] = Gj%*%beta_v -} -boxplot(t(traces_traits)) +beta.v = c(0,beta,rep(0,16)) +#traces.traits = matrix(,p,spn) + +#old.G in list form +old.traces.traits = sapply(old.G,function(x)x%*%beta.v) +old.traits.posmean = apply(old.traces.traits,1,mean) +new.traces.traits = sapply(new.G,function(x)x%*%beta.v) +new.traits.posmean = apply(new.traces.traits,1,mean) +star.traces.traits = sapply(star.G,function(x)x%*%beta.v) +star.traits.posmean = apply(star.traces.traits,1,mean) +#boxplot(old.traces_traits[2,]) +boxplot(t(old.traces.traits)) +#boxplot(new.traces_traits[2,]) +boxplot(t(new.traces.traits)) + +#plot(x=rep(1:18,2),y=c(old.traits.posmean,new.traits.posmean),col=rep(c(1,2),each=18),type = "o") save(traces_traits,file = 'traces_traits.RData') # For each G matrix setwd("~/Runcie Lab/SparseFactorMixedModel_v2/MCMC/19/19/Lambda1.5_delta2shape3") load("G_BSFG_sc.RData") + +ggplot(pos.fh2.b, aes(x = factor, y = Fh2, color=model,fill=model))+ + geom_boxplot(alpha = 0.5)+ + scale_color_manual(breaks = c("old", "new", "star"), + values=c("green", "red", "blue"))+ + scale_fill_manual(breaks = c("old", "new", "star"), + values=c("green", "red", "blue"))+ + ggtitle("The distribution of variance (Latent traits)")+ + scale_color_discrete(breaks = c("old", "new", "star"),labels=c("LconG3", "LhighG3", "LlowG3"))+ + scale_fill_discrete(breaks = c("old", "new", "star"),labels=c("LconG3", "LhighG3", "LlowG3"))+ + ylab("Variance")+ + geom_errorbar(aes(ymin=len-se, ymax=len+se), width=.1, position=pd) + +#4. E(beta(G1-G2)beta) +source('~/Runcie Lab/SparseFactorMixedModel_v2/R_BSFG/BSFG_functions.R') +load(BSFGOld) +#old.traces.G = G_Traces_Comp(BSFG_state) +old.G = G_Matrix_Comp(BSFG_state) +p = BSFG_state$run_variables$p +# posterior mean +# do this in case the value of k differ for this two models and there in no lambda in BSFG_state_fixed case +Lambda = BSFG_state$Posterior$Lambda +load(BSFGNew) +load(BSFGStar) +BSFG_state$Posterior$Lambda = Lambda +#new.traces.G = G_Traces_Comp(BSFG_state) +load("BSFG_state6.RData") +new.G = G_Matrix_Comp(BSFG_state) +load("BSFG_state7.RData") +star.G = G_Matrix_Comp(BSFG_state) +beta = 0.3609 +beta.v = as.vector(c(0,beta,rep(0,16))) +# calcualte mean diff response value +diff.traces = mapply(function(x,y) t(beta.v)%*%t(x-y)%*%(x-y)%*%beta.v, old.G,new.G) +boxplot(diff.traces) +mean(sqrt(diff.traces)) + +#5. PCA decomposition + +source('~/Runcie Lab/SparseFactorMixedModel_v2/R_BSFG/BSFG_functions.R') +load(BSFGOld) +#old.traces.G = G_Traces_Comp(BSFG_state) +old.G = G_Matrix_Comp(BSFG_state) +p = BSFG_state$run_variables$p +traitnames = BSFG_state$traitnames +#load(BSFGNew) +load("BSFG_state2.RData") +new.G = G_Matrix_Comp(BSFG_state) +load("BSFG_state3.RData") +star.G = G_Matrix_Comp(BSFG_state) + +old.traces.eigen = sapply(old.G, function(x)eigen(x)$values) +new.traces.eigen = sapply(new.G, function(x)eigen(x)$values) +star.traces.eigen = sapply(star.G, function(x)eigen(x)$values) + +traces.eigen.df = data.frame(c(c(old.traces.eigen),c(new.traces.eigen),c(star.traces.eigen))) +traces.eigen.df$model = rep(c("old","new","star"),c(p*spn,p*spn,p*spn)) + +traces.eigen.df$model = factor(traces.eigen.df$model, level=c("old","new","star")) +traces.eigen.df$model = factor(traces.eigen.df$model, level=c("star","new","old")) + +traces.eigen.df$lambda = rep(paste0("PC",1:18),3*spn) +names(traces.eigen.df) = c("response","model","PC") + +traces.eigen.df.sum = summarySE(traces.eigen.df,measurevar = "response",groupvars = c("model","PC")) +names(traces.eigen.df.sum) = c("model","PC","q1","q2","response") + +# reorder the PC value in the data +traces.eigen.df.sum$PC = factor(traces.eigen.df.sum$PC,level=paste0("PC",1:18)) + + + +save(traces.eigen.df.sum , file = "eigen567.RData") +save(traces.eigen.df.sum, file = "eigen51417.RData") + +library(ggplot2) +pd <- position_dodge(width = 0.6) +load("eigen567.RData") + +# size of each model +library(dplyr) +traces.eigen.df.sum %>% + group_by(model) %>% + summarize(size = sum(response)) + +g11<-ggplot(traces.eigen.df.sum,aes(x = PC, y = response, color=model))+ + scale_color_manual(breaks =rev(c("star","new","old")), + values=rev(c( "blue","red2","grey0")),labels=rev(c("Low Line","High Line","Control Line")))+ + ylab("Variance")+ + geom_errorbar(aes(ymin=q1+response, ymax=q2+response), position=pd)+ + geom_point(position=pd,size=1)+ + xlab("Principle Components")+ + theme(axis.text=element_text(size=10), + axis.title=element_text(size=15),legend.text=element_text(size=12), + legend.title=element_text(size=12))+ + labs(color = "G (F3)") + + + +load("eigen51417.RData") +# size of each model +library(dplyr) +traces.eigen.df.sum %>% + group_by(model) %>% + summarize(size = sum(response)) + +g22<-ggplot(traces.eigen.df.sum,aes(x = reorder(PC,PC), y = response, color=model))+ + scale_color_manual(breaks = c("star","new","old"), + values=c("grey60","grey30","grey0"),labels=c("F1","F2","F3"))+ + ylab("Response")+ + geom_errorbar(aes(ymin=q1+response, ymax=q2+response), position=pd)+ + geom_point(position=pd,size=1)+ + theme(axis.text=element_text(size=10), + axis.title=element_text(size=15),legend.text=element_text(size=12), + legend.title=element_text(size=12),legend.margin=unit(0.455,"cm"))+ + labs(color = "G (Control)")+ + xlab("Principle Components")+ + ylab("Variance") +# statistical test for the difference +# a.ANOVA test +class(traces.eigen.df) +anova(lm(response~model, data = traces.eigen.df)) +anova(lm(response~model:PC, data = traces.eigen.df)) #which one to use? +anova(lm(response~model*PC, data = traces.eigen.df)) + +# b.Bartlett test + +# 6. angle and length of deltaZ + +# distribution within population +dist_within_theta = function(traces.traits){ +theta = vector(,spn) +for (i in 1:spn){ + pairs = randcomb(1:spn, 2) + a = traces.traits[,pairs[1]] + b = traces.traits[,pairs[2]] + a.length = sqrt(sum(a * a)) + b.length = sqrt(sum(b * b)) + theta[i] = acos(sum(a*b) / (a.length * b.length)) +} +return(theta) +} +old.theta = dist_within_theta(old.traces.traits) + +new.theta = dist_within_theta(new.traces.traits) + +star.theta = dist_within_theta(star.traces.traits) + + +# distribution between population +dist_between_theta = function(Atraces.traits,Btraces.traits){ + theta = vector(,spn) + for (i in 1:spn){ + pairs = randcomb(1:spn, 2) + a = Atraces.traits[,pairs[1]] + b = Btraces.traits[,pairs[2]] + a.length = sqrt(sum(a * a)) + b.length = sqrt(sum(b * b)) + theta[i] = acos(sum(a*b) / (a.length * b.length)) + } + return(theta) +} +old.new.theta = dist_between_theta(old.traces.traits,new.traces.traits) +# K-S test +ks.test(old.theta,old.new.theta) #different + +old.star.theta = dist_between_theta(old.traces.traits,star.traces.traits) +new.star.theta = dist_between_theta(new.traces.traits,star.traces.traits) + +theta.df = data.frame(old.theta,new.theta,star.theta, old.new.theta, old.star.theta, new.star.theta) + +library(reshape2) +theta.df = melt(theta.df, value.name = "Degree") +names(theta.df) = c("labels", "theta") +ggplot(theta.df, aes(x = theta, col=labels))+ + geom_density(aes(fill=labels),alpha=0.1) +#+scale_color_manual(breaks = c("old.theta","new.theta","star.theta","old.new.theta","old.star.theta","new.star.theta"), +# values=rainbow(6)) + +# length +length_deltaZ = function(traces.traits){ + posmean.deltaZ = apply(traces.traits,1,mean) + deltaZ.length = sqrt(sum(posmean.deltaZ * posmean.deltaZ)) + return(deltaZ.length) +} +old.deltaZ.length = length_deltaZ(old.traces.traits) +old.deltaZ.length +new.deltaZ.length = length_deltaZ(new.trace s.traits) +new.deltaZ.length +star.deltaZ.length = length_deltaZ(star.traces.traits) +star.deltaZ.length diff --git a/R_BSFG/setup_pedigree.R b/R_BSFG/setup_pedigree.R index b96f952..619a359 100644 --- a/R_BSFG/setup_pedigree.R +++ b/R_BSFG/setup_pedigree.R @@ -1,5 +1,5 @@ -setup_pedigree = function(data=data,GenerationCode,LineCode,fixedfactor,randomfactor=TRUE){ +setup_pedigree = function(data=data,GenerationCode,LineCode,fixedfactor,randomfactor=FALSE){ generation_pos <- data$Generation %in% GenerationCode LineCode_pos <- data$LineCode%in%LineCode|data$LineCode==0 @@ -34,7 +34,9 @@ setup_pedigree = function(data=data,GenerationCode,LineCode,fixedfactor,randomfa } reg=lm(as.matrix(Y)~0+X, na.action= na.omit) B_act = reg$coefficients - }else{X=matrix(0,nr=n,nc=0);B_act=matrix(0,nr=0,nc=ncol(Y))} + }else{ + # there should be a column of 1 in design matrix as mean value + X=matrix(0,nr=n,nc=0);B_act=matrix(0,nr=0,nc=ncol(Y))} traitnames <- names(data)[7:ncol(data)] From f37181a4b46e82e3778b5b7c640f6c647d1859a6 Mon Sep 17 00:00:00 2001 From: xin li Date: Mon, 23 Jan 2017 16:09:09 -0800 Subject: [PATCH 06/18] Adding functions for saving posterior samples in combined sampling cases --- R_BSFG/BSFG_functions.R | 63 +++++++++++++++++++++++++++++++++++++---- 1 file changed, 57 insertions(+), 6 deletions(-) diff --git a/R_BSFG/BSFG_functions.R b/R_BSFG/BSFG_functions.R index ff340d0..921255a 100644 --- a/R_BSFG/BSFG_functions.R +++ b/R_BSFG/BSFG_functions.R @@ -46,9 +46,9 @@ sample_Lambda = function( Y_tilde,F,resid_Y_prec, E_a_prec,Plam,invert_aI_bZAZ ) means = c() Qlam = c() for(pop in pops){ - FUDi = E_a_prec[[pop]][j] * sweep(FtU[[pop]],2,(s[[pop]] + E_a_prec[[pop]][j]/resid_Y_prec[j]),'/') - means = means + FUDi %*% UtY[[pop]][,j] - Qlam = Qlam + FUDi %*% t(FtU[[pop]]) + FUDi = E_a_prec[j] * sweep(FtU,2,(s + E_a_prec[j]/resid_Y_prec[j]),'/') + means = means + FUDi %*% UtY[,j] + Qlam = Qlam + FUDi %*% t(FtU) } Qlam = Qlam + diag(Plam[j,]) @@ -65,6 +65,7 @@ sample_Lambda = function( Y_tilde,F,resid_Y_prec, E_a_prec,Plam,invert_aI_bZAZ ) return(Lambda) } + sample_prec_discrete_conditional = function(Y,h2_divisions,h2_priors,invert_aI_bZAZ,res_prec) { #sample factor heritibilties conditional on a given residual precision #(res_precision) @@ -206,6 +207,7 @@ sample_means = function( Y_tilde, resid_Y_prec, E_a_prec, invert_aPXA_bDesignDes return(location_sample) } + sample_F_a = function(F,Z_1,F_h2,invert_aZZt_Ainv) { #samples genetic effects on factors (F_a) conditional on the factor scores F: # F_i = F_{a_i} + E_i, E_i~N(0,s2*(1-h2)*I) for each latent trait i @@ -246,6 +248,8 @@ sample_F_a = function(F,Z_1,F_h2,invert_aZZt_Ainv) { return(F_a) } + + sample_factors_scores = function( Y_tilde, Z_1,Lambda,resid_Y_prec,F_a,F_h2 ) { #Sample factor scores given factor loadings (F_a), factor heritabilities (F_h2) and #phenotype residuals @@ -274,6 +278,7 @@ sample_factors_scores = function( Y_tilde, Z_1,Lambda,resid_Y_prec,F_a,F_h2 ) { } + sample_delta = function( delta,tauh,Lambda_prec,delta_1_shape,delta_1_rate,delta_2_shape,delta_2_rate,Lambda2 ) { #sample delta and tauh parameters that control the magnitudes of higher #index factor loadings. @@ -410,10 +415,56 @@ save_posterior_samples = function( sp_num,Posterior,Lambda,F,F_a,B,W,E_a,delta,F Posterior$B = (Posterior$B*(sp_num-1) + B)/sp_num Posterior$E_a = (Posterior$E_a*(sp_num-1) + E_a)/sp_num Posterior$W = (Posterior$W*(sp_num-1) + W)/sp_num - return(Posterior) } - + +save_posterior_samples_comb = function( sp_num,Posterior,Lambda,F,F_a,B,W,E_a,delta,F_h2,resid_Y_prec,E_a_prec,W_prec) { + # save posteriors. Full traces are kept of the more interesting parameters. + # Only the posterior means are kept of less interesting parameters. These + # should be correctly calculated over several re-starts of the sampler. + + # first unlist F, F_a, F_h2, Y, B, E_a + nl = length(F_h2) + F = unlist(do.call("rbind", F)) # n*k + F_a = unlist(do.call("rbind", F_a)) #n*k + F_h2 = Reduce("+", F_h2)/nl # k taking average + B = Reduce("+",B)/nl # b*p taking average + E_a = unlist(do.call("rbind", E_a)) # n*k + W = Reduce("+",W)/nl # k*p taking average + resid_Y_prec = Reduce("+",resid_Y_prec)/nl #p taking average + E_a_prec = Reduce("+",E_a_prec)/nl #p + W_prec = Reduce("+",W_prec)/nl #p + + #sp = ncol(Posterior$Lambda) + sp = ncol(Posterior$Lambda) + #size(Posterior$Lambda,2) + #save factor samples + # #if(length(Lambda) > nrow(Posterior[["Lambda"]])){ this will not happen since we stop updatek + # # expand factor sample matrix if necessary + # Posterior[["F"]] = rbind(Posterior[["F"]], matrix(0,nr = length(F) -nrow(Posterior[["F"]]), nc = sp)) + # Posterior[["F_a"]] = rbind(Posterior[["F_a"]], matrix(0,nr = length(F_a) -nrow(Posterior[["F_a"]]), nc = sp)) + # Posterior[["delta"]] = rbind(Posterior[["delta"]], matrix(0,nr = length(delta) -nrow(Posterior[["delta"]]), nc = sp)) + # Posterior[["F_h2"]] = rbind(Posterior[["F_h2"]], matrix(0,nr = length(F_h2) -nrow(Posterior[["F_h2"]]), nc = sp)) + # } + Posterior$Lambda[1:length(Lambda),sp_num] = c(Lambda) + Posterior$F[1:length(F),sp_num] = c(F) + Posterior$F_a[1:length(F_a),sp_num] = c(F_a) + Posterior$delta[1:length(delta),sp_num] = delta + Posterior$F_h2[1:length(F_h2),sp_num] = F_h2 + + Posterior$resid_Y_prec[,sp_num] = resid_Y_prec + Posterior$E_a_prec[,sp_num] = E_a_prec + Posterior$W_prec[,sp_num] = W_prec + + # save B,U,W + Posterior$B = (Posterior$B*(sp_num-1) + B)/sp_num + Posterior$E_a = (Posterior$E_a*(sp_num-1) + E_a)/sp_num + Posterior$W = (Posterior$W*(sp_num-1) + W)/sp_num + + return(Posterior) +} + + save_posterior_samples_fixedlambda = function( j,Posterior,F,F_a,B,W,E_a,F_h2,resid_Y_prec,E_a_prec,W_prec) { # save posteriors. Full traces are kept of the more interesting parameters. # Only the posterior means are kept of less interesting parameters. These @@ -442,7 +493,7 @@ save_posterior_samples_fixedlambda = function( j,Posterior,F,F_a,B,W,E_a,F_h2,re Posterior$E_a = (Posterior$E_a*(j-1) + E_a)/j Posterior$W = (Posterior$W*(j-1) + W)/j - return(Posterior) + return(Posterior) } From baccd0488eb528d6853c206bab8dfeb23e632205 Mon Sep 17 00:00:00 2001 From: xin li Date: Mon, 23 Jan 2017 16:10:10 -0800 Subject: [PATCH 07/18] minor changes --- R_BSFG/code_comparing_plotting4.R | 224 +++++++++++++++++++++++++----- R_BSFG/deltaZ_posterior_mean.R | 42 +++--- 2 files changed, 209 insertions(+), 57 deletions(-) diff --git a/R_BSFG/code_comparing_plotting4.R b/R_BSFG/code_comparing_plotting4.R index b64c288..413788a 100644 --- a/R_BSFG/code_comparing_plotting4.R +++ b/R_BSFG/code_comparing_plotting4.R @@ -94,10 +94,20 @@ library(ggplot2) #plot(x=rep(1:18,2),y=c(old.traits.posmean,new.traits.posmean),col=rep(c(1,2),eac#h=18),type = "o") pd <- position_dodge(width = 0.6) #load("plot5.RData") -ggplot(traces.traits.df.sum,aes(x = traits, y = response, color=model))+ - ylab("Response")+ - geom_errorbar(aes(ymin=q1+response, ymax=q2+response), position=pd)+ - geom_point(position=pd,size=1) +#ggplot(traces.traits.df.sum,aes(x = traits, y = response, color=model))+ +# ylab("Response")+ +# geom_errorbar(aes(ymin=q1+response, ymax=q2+response), position=pd)+ +# geom_point(position=pd,size=1) +ggplot(traces.traits.df,aes(x = traits, y = response, fill=model))+ + ylab("Variance")+ + geom_boxplot( position="dodge")+ + scale_fill_manual(breaks = c("old","new","star","four"), + values=c("grey0","grey30","red2","blue"),labels=c("Overall","Control","Tall","Short"))+ + theme(axis.text=element_text(size=10), + axis.title=element_text(size=15),legend.text=element_text(size=12), + legend.title=element_text(size=12))+ + labs(fill = "G") + #5. PCA decomposition @@ -106,21 +116,24 @@ old.traces.eigen = sapply(old.G, function(x)eigen(x)$values) new.traces.eigen = sapply(new.G, function(x)eigen(x)$values) star.traces.eigen = sapply(star.G, function(x)eigen(x)$values) four.traces.eigen = sapply(four.G, function(x)eigen(x)$values) +# we only need the first 8 of them +cumsum(old.traces.eigen[,1]/sum(old.traces.eigen[,1])) +l = 8 -traces.eigen.df = data.frame(c(c(old.traces.eigen),c(new.traces.eigen),c(star.traces.eigen),c(four.traces.eigen))) -traces.eigen.df$model = rep(c("old","new","star","four"),c(p*spn,p*spn,p*spn,p*spn)) +traces.eigen.df = data.frame(c(c(old.traces.eigen[1:8,]),c(new.traces.eigen[1:8,]),c(star.traces.eigen[1:8,]),c(four.traces.eigen[1:8,]))) +traces.eigen.df$model = rep(c("old","new","star","four"),c(l*spn,l*spn,l*spn,l*spn)) traces.eigen.df$model = factor(traces.eigen.df$model, level=c("old","new","star","four")) #traces.eigen.df$model = factor(traces.eigen.df$model, level=c("star","new","old","four")) -traces.eigen.df$lambda = rep(paste0("PC",1:18),4*spn) +traces.eigen.df$lambda = rep(paste0("PC",1:l),4*spn) names(traces.eigen.df) = c("response","model","PC") -traces.eigen.df.sum = summarySE(traces.eigen.df,measurevar = "response",groupvars = c("model","PC")) -names(traces.eigen.df.sum) = c("model","PC","q1","q2","response") +#traces.eigen.df.sum = summarySE(traces.eigen.df,measurevar = "response",groupvars = c("model","PC")) +#names(traces.eigen.df.sum) = c("model","PC","q1","q2","response") # reorder the PC value in the data -traces.eigen.df.sum$PC = factor(traces.eigen.df.sum$PC,level=paste0("PC",1:18)) +traces.eigen.df$PC = factor(traces.eigen.df$PC,level=paste0("PC",1:8)) # size dataframe old.traces.size = apply(old.traces.eigen,2,sum) @@ -130,13 +143,8 @@ four.traces.size = apply(four.traces.eigen,2,sum) size.df = data.frame(old.traces.size,new.traces.size,star.traces.size,four.traces.size) library(reshape2) size.df = melt(size.df,value.name = "Size") -size.df$variable = factor(size.df$variable,levels = c("star.traces.size","new.traces.size","old.traces.size","four.traces.size")) - +size.df$variable = factor(size.df$variable,levels = c("old.traces.size","new.traces.size","star.traces.size","four.traces.size")) -old.traces.pc1c = apply(old.traces.eigen,2,function(x)sum(x[2])/sum(x)) -new.traces.pc1c = apply(new.traces.eigen,2,function(x)sum(x[2])/sum(x)) -star.traces.pc1c = apply(star.traces.eigen,2,function(x)sum(x[2])/sum(x)) -four.traces.pc1c = apply(four.traces.eigen,2,function(x)sum(x[2])/sum(x)) # PC1 old.traces.pc1c = apply(old.traces.eigen,2,function(x)x[1]/sum(x)) @@ -146,7 +154,7 @@ four.traces.pc1c = apply(four.traces.eigen,2,function(x)x[1]/sum(x)) pc1c.df = data.frame(old.traces.pc1c,new.traces.pc1c,star.traces.pc1c,four.traces.pc1c) library(reshape2) pc1c.df = melt(pc1c.df,value.name = "PC1.Contribution") -pc1c.df$variable = factor(pc1c.df$variable,levels = c("star.traces.pc1c","new.traces.pc1c","old.traces.pc1c","four.traces.pc1c")) +pc1c.df$variable = factor(pc1c.df$variable,levels = c("old.traces.pc1c","new.traces.pc1c","star.traces.pc1c","four.traces.pc1c")) # size of each model @@ -156,23 +164,66 @@ traces.eigen.df.sum %>% summarize(size = sum(response)) library(ggplot2) -ggplot(size.df, aes(x="",y=Size, fill=variable))+ - geom_boxplot(alpha=0.85)+ - theme(axis.title.x = element_blank()) - - -ggplot(pc1c.df, aes(x="",y=PC1.Contribution, fill=variable))+ - geom_boxplot(alpha=0.85)+ - theme(axis.title.x = element_blank()) - - -pd <- position_dodge(width = 0.6) - -ggplot(traces.eigen.df.sum,aes(x = PC, y = response, color=model))+ - ylab("Variance")+ - geom_errorbar(aes(ymin=q1+response, ymax=q2+response), position=pd)+ - geom_point(position=pd,size=1)+ - xlab("Principle Components") +plot1<- ggplot(size.df, aes(x="",y=Size, fill=variable))+ + geom_boxplot( position="dodge",outlier.shape = NA)+ + labs(fill = "G")+ + scale_fill_manual( values=c("grey0","grey30","red2","blue"),labels=c("Overall","Control","Tall","Short"))+ + theme(axis.text=element_text(size=10), + axis.title=element_text(size=15),legend.position="none",axis.title.x = element_blank(), + panel.border = element_blank(), + panel.grid.major = element_blank(), + panel.grid.minor = element_blank(), + panel.background = element_rect(fill="white",color = "black")) + +plot2<- ggplot(pc1c.df, aes(x="",y=PC1.Contribution, fill=variable))+ + geom_boxplot( position="dodge",outlier.shape = NA)+ + labs(fill = "G")+ + scale_fill_manual( values=c("grey0","grey30","red2","blue"),labels=c("Overall","Control","Tall","Short"))+ + theme(axis.text=element_text(size=10), + axis.title=element_text(size=15),legend.position="none",axis.title.x = element_blank(), + panel.border = element_blank(), + panel.grid.major = element_blank(), + panel.grid.minor = element_blank(), + panel.background = element_rect(fill="white",color = "black"))+ + ylab("PC1") + +plot3 <- ggplot(traces.eigen.df,aes(x = PC, y = response, fill=model))+ + ylab("Variance")+ + xlab("")+ + geom_boxplot( position="dodge",outlier.shape = NA)+ + scale_fill_manual(breaks = c("old","new","star","four"), + values=c("grey0","grey30","red2","blue"),labels=c("Overall","Control","Tall","Short"))+ + theme(axis.text=element_text(size=10), + axis.title=element_text(size=15),legend.text=element_text(size=12), + legend.title=element_text(size=12), + panel.border = element_blank(), + panel.grid.major = element_blank(), + panel.grid.minor = element_blank(), + panel.background = element_rect(fill="white",color = "black"))+ + labs(fill = "G") + +plot5 <- ggplot(traces.eigen.df,aes(x = PC, y = response, fill=model))+ + ylab("Variance")+ + xlab("")+ + geom_boxplot( position="dodge",outlier.shape = NA)+ + scale_fill_manual(breaks = c("old","new","star","four"), + values=c("grey0","grey30","red2","blue"),labels=c("Overall","Control","Tall","Short"))+ + theme(axis.text=element_text(size=10), + axis.title=element_text(size=15),legend.text=element_text(size=12), + legend.title=element_text(size=12), + panel.border = element_blank(), + panel.grid.major = element_blank(), + panel.grid.minor = element_blank(), + panel.background = element_rect(fill="white",color = "black"))+ + labs(fill = "G")+ + ylim(c(0,2)) +#pd <- position_dodge(width = 0.6) + +#ggplot(traces.eigen.df.sum,aes(x = PC, y = response, color=model))+ +# ylab("Variance")+ +# geom_errorbar(aes(ymin=q1+response, ymax=q2+response), position=pd)+ +# geom_point(position=pd,size=1)+ +# xlab("Principle Components") # 6. angle and length of deltaZ and first eigenvector @@ -201,6 +252,29 @@ old.theta = dist_within_theta(old.traces.vector) new.theta = dist_within_theta(new.traces.vector) star.theta = dist_within_theta(star.traces.vector) four.theta = dist_within_theta(four.traces.vector) +# 95% C.I +theta_ci = function(theta.trace){ + quantile(theta.trace,c(0.05,0.95)) +} +theta_ci(old.theta) +theta_ci(new.theta) +theta_ci(star.theta) +theta_ci(four.theta) +# mode +#library(modeest) +#mlv(old.theta, method = "mfv") +#mlv(new.theta, method = "mfv") +#mlv(star.theta, method = "mfv") +#mlv(four.theta, method = "mfv") + +dmode <- function(x) { + den <- density(x, kernel=c("gaussian")) + ( den$x[den$y==max(den$y)] ) +} +dmode(old.theta) +dmode(new.theta) +dmode(star.theta) +dmode(four.theta) # deltaZ old.theta = dist_within_theta(old.traces.traits) #old.traces.traits is from deltaZ_posterior_mean new.theta = dist_within_theta(new.traces.traits) @@ -231,6 +305,26 @@ new.star.theta = dist_between_theta(new.traces.vector,star.traces.vector) new.four.theta = dist_between_theta(new.traces.vector,four.traces.vector) old.four.theta = dist_between_theta(old.traces.vector,four.traces.vector) +# 95% C.I +theta_ci(old.new.theta) +theta_ci(old.star.theta) +theta_ci(four.star.theta) +theta_ci(new.star.theta) +theta_ci(new.four.theta) +theta_ci(old.four.theta) +# mode +# mlv(old.new.theta, method = "mfv") +# mlv(old.star.theta, method = "mfv") +# mlv(four.star.theta, method = "mfv") +# mlv(new.star.theta, method = "mfv") +# mlv(new.four.theta, method = "mfv") +# mlv(old.four.theta, method = "mfv") +dmode(old.new.theta) +dmode(old.star.theta) +dmode(old.four.theta) +dmode(new.star.theta) +dmode(new.four.theta) +dmode(four.star.theta) # deltaZ old.new.theta = dist_between_theta(old.traces.traits,new.traces.traits) # K-S test @@ -244,11 +338,69 @@ theta.df = data.frame(old.theta,new.theta,star.theta,four.theta, old.new.theta, library(reshape2) theta.df = melt(theta.df, value.name = "Degree") names(theta.df) = c("labels", "theta") -ggplot(theta.df, aes(x = theta, col=labels))+ - geom_density(aes(fill=labels),alpha=0.1) +theta.df$labels = factor(theta.df$labels,levels = c("old.theta","new.theta","star.theta","four.theta", "old.new.theta", "old.star.theta", "old.four.theta","new.star.theta","four.star.theta","new.four.theta")) + +plot4 <- ggplot(theta.df, aes(x="",y = theta))+ + geom_boxplot(aes(fill=labels),outlier.shape = NA)+ + labs(fill = "G")+ + scale_fill_manual( values=c("grey0","grey30","red2","blue","grey50","red3","blue2","red4","blue4","purple4"),labels=c("Overall","Control","Tall","Short","Overall_Control", + "Overall_Tall","Overall_Short","Control_Tall","Control_Short","Low_Tall"))+ + theme(axis.text=element_text(size=10), + axis.title=element_text(size=15),legend.text=element_text(size=12), + legend.title=element_text(size=12),axis.title.x = element_blank(), + panel.border = element_blank(), + panel.grid.major = element_blank(), + panel.grid.minor = element_blank(), + panel.background = element_rect(fill="white",color = "black"))+ + ylab("Theta") + + #+scale_color_manual(breaks = c("old.theta","new.theta","star.theta","old.new.theta","old.star.theta","new.star.theta"), # values=rainbow(6)) +save(plot1,file = "plot1.RData") +save(plot2,file = "plot2.RData") +save(plot3,file = "plot3.RData") +save(plot4,file = "plot4.RData") + +multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) { + library(grid) + + # Make a list from the ... arguments and plotlist + plots <- c(list(...), plotlist) + + numPlots = length(plots) + + # If layout is NULL, then use 'cols' to determine layout + if (is.null(layout)) { + # Make the panel + # ncol: Number of columns of plots + # nrow: Number of rows needed, calculated from # of cols + layout <- matrix(seq(1, cols * ceiling(numPlots/cols)), + ncol = cols, nrow = ceiling(numPlots/cols)) + } + + if (numPlots==1) { + print(plots[[1]]) + + } else { + # Set up the page + grid.newpage() + pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout)))) + + # Make each plot, in the correct location + for (i in 1:numPlots) { + # Get the i,j matrix positions of the regions that contain this subplot + matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE)) + + print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row, + layout.pos.col = matchidx$col)) + } + } +} + +layout <- matrix(c(1, 1, 1, 2, 3, 4), nrow = 2, byrow = TRUE) +multiplot(plotlist = list(plot5,plot2,plot1,plot4),layout = layout) #------------------------------- # 7.length of deltaZ diff --git a/R_BSFG/deltaZ_posterior_mean.R b/R_BSFG/deltaZ_posterior_mean.R index 5810972..f6ae18d 100644 --- a/R_BSFG/deltaZ_posterior_mean.R +++ b/R_BSFG/deltaZ_posterior_mean.R @@ -1,42 +1,42 @@ -source('~/Runcie Lab/SparseFactorMixedModel_v2/R_BSFG/BSFG_functions.R') -setwd("~/Runcie Lab/SparseFactorMixedModel_v2/MCMC/5/5/Lambda1.5_delta2shape3") -BSFGOld = "BSFG_state.RData" +#source('~/Runcie Lab/SparseFactorMixedModel_v2/R_BSFG/BSFG_functions.R') +#setwd("~/Runcie Lab/SparseFactorMixedModel_v2/MCMC/5/5/Lambda1.5_delta2shape3") +#BSFGOld = "BSFG_state.RData" #BSFGNew = "BSFG_fixedlambda65.RData" #BSFGStar = "BSFG_fixedlambda75.RData" -BSFGNew = "BSFG_state6.RData" -BSFGStar = "BSFG_state7.RData" -target = "F_h2" -load(BSFGOld) -traitnames = BSFG_state$traitnames +#BSFGNew = "BSFG_state6.RData" +#BSFGStar = "BSFG_state7.RData" +#target = "F_h2" +#load(BSFGOld) +#traitnames = BSFG_state$traitnames #old.traces.G = G_Traces_Comp(BSFG_state) -old.G = G_Matrix_Comp(BSFG_state) -p = BSFG_state$run_variables$p -spn = dim(BSFG_state$Posterior[[target]])[2] +#old.G = G_Matrix_Comp(BSFG_state) +#p = BSFG_state$run_variables$p +#spn = dim(BSFG_state$Posterior[[target]])[2] # posterior mean # do this in case the value of k differ for this two models and there in no lambda in BSFG_state_fixed case #Lambda = BSFG_state$Posterior$Lambda -load(BSFGNew) +#load(BSFGNew) #BSFG_state$Posterior$Lambda = Lambda -new.G = G_Matrix_Comp(BSFG_state) +#new.G = G_Matrix_Comp(BSFG_state) -load(BSFGStar) +#load(BSFGStar) #BSFG_state$Posterior$Lambda = Lambda -star.G = G_Matrix_Comp(BSFG_state) +#star.G = G_Matrix_Comp(BSFG_state) #new.traces.G = G_Traces_Comp(BSFG_state) # The distance of two matrix(in predicted phenotype value/level) #selection gradient -beta = 0.3609 -beta.v = c(0,beta,rep(0,16)) -traces.traits = matrix(,p,spn) +#beta = 0.3609 +#beta.v = c(0,beta,rep(0,16)) +#traces.traits = matrix(,p,spn) #old.G in list form -old.traces.traits = sapply(old.G,function(x)x%*%beta.v) +#old.traces.traits = sapply(old.G,function(x)x%*%beta.v) #old.traits.posmean = apply(old.traces.traits,1,mean) -new.traces.traits = sapply(new.G,function(x)x%*%beta.v) +#new.traces.traits = sapply(new.G,function(x)x%*%beta.v) #new.traits.posmean = apply(new.traces.traits,1,mean) -star.traces.traits = sapply(star.G,function(x)x%*%beta.v) +#star.traces.traits = sapply(star.G,function(x)x%*%beta.v) #star.traits.posmean = apply(star.traces.traits,1,mean) #boxplot(old.traces_traits[2,]) #boxplot(t(old.traces.traits)) From c28ddde824929698c4eed1bd6ec18136c916c00d Mon Sep 17 00:00:00 2001 From: xin li Date: Mon, 23 Jan 2017 17:15:05 -0800 Subject: [PATCH 08/18] Adding function for sampling combined data --- R_BSFG/fast_BSFG_sampler_comb.R | 360 ++++++++++++++++++++++++++++++++ 1 file changed, 360 insertions(+) create mode 100644 R_BSFG/fast_BSFG_sampler_comb.R diff --git a/R_BSFG/fast_BSFG_sampler_comb.R b/R_BSFG/fast_BSFG_sampler_comb.R new file mode 100644 index 0000000..2e6913e --- /dev/null +++ b/R_BSFG/fast_BSFG_sampler_comb.R @@ -0,0 +1,360 @@ +fast_BSFG_sampler_comb = function(BSFG_state,n_samples) { + # -- Daniel Runcie -- # + # + # Gibbs sampler for genetic covariance estimation based on mixed effects + # model, with missing data + # + # Code for: + # + # Runcie and Mukherjee (2013) Dissecting high-dimensional traits + # with Bayesian sparse factor analysis of genetic covariance matrices. + # GENETICS. + # + # (c) July 30, 2013 + # + # code based on original provided by Anirban Bhattacharya + # + # + # This function implements the BSF-G partially collapsed Gibbs sampler. + # Variable notation follows the Runcie and Mukherjee manuscript as closely + # as possible. + # + # All input and initialization functions are carried out by the function + # fast_BSFG_sampler_init See the documentation of that function for details. + # + # The sampler is designed to do a short-medium length run and then return + # the state of the chain. After assessing the progress of the chain, + # (optionally), the Posterior matrices can be reset, and the chain + # re-started where it left off. Right now, the random seed is not saved. Probably should + # be if I can figure out how. + # + # This function takes the following inputs: + # data_matrices: struct holding the (should be imutable) data, design and incidence matrices: + # Y: Full phenotype data. n x p + # X: Fixed effect design matrix. n x b + # Z_1: Random effect 1 incidence matrix. n x r1 + # Z_2: Random effect 2 incidence matrix. n x r2 + # Y_missing: incidence matrix of missing data in Y + # start_i: iteration number of the end of the last run. + # draw_iter: frequency of updating diagnostic plots + # burn: number of burnin samples + # sp: total number of samples to collect + # thin: thinning rate of chain + # simulation: boolean. Is this a simulation? + # params: struct with chain parameters. + # priors: struct with all relevant prior hyperparameters + # Posterior: struct with posterior matrices, or running posterior means. + # Note: not sure how well posterior means work after re-starting chain. Probably not well. + # current_state: current (initial) conditions of all model parameters + # + # Several diagnostic plots are produced during the run. + # Their interpretation is described within the source codes: + # draw_simulation_diagnostics.m: For simulated data with known true values + # draw_results_diagnostics.m: Otherwise + # + + data_matrices = BSFG_state$data_matrices + params = BSFG_state$params + priors = BSFG_state$priors + Posterior = BSFG_state$Posterior + current_state = BSFG_state$current_state + run_parameters = BSFG_state$run_parameters + run_variables = BSFG_state$run_variables + traitnames = BSFG_state$traitnames + + # ----------------------------------------------- # + # ----------------Load data matrices------------- # + # ----------------------------------------------- # + + Y = data_matrices$Y + Z_1 = data_matrices$Z_1 + Z_2 = data_matrices$Z_2 + X = data_matrices$X + Y_missing = data_matrices$Y_missing + + p = run_variables$p + n = run_variables$n + r = run_variables$r + r2 = run_variables$r2 + b = run_variables$b + + + # ----------------------------------------------- # + # ----------------Load priors-------------------- # + # ----------------------------------------------- # + + resid_Y_prec_shape = priors$resid_Y_prec_shape + resid_Y_prec_rate = priors$resid_Y_prec_rate + E_a_prec_shape = priors$E_a_prec_shape + E_a_prec_rate = priors$E_a_prec_rate + W_prec_shape = priors$W_prec_shape + W_prec_rate = priors$W_prec_rate + Lambda_df = priors$Lambda_df + delta_1_shape = priors$delta_1_shape + delta_1_rate = priors$delta_1_rate + delta_2_shape = priors$delta_2_shape + delta_2_rate = priors$delta_2_rate + h2_priors_factors = priors$h2_priors_factors + + + # ----------------------------------------------- # + # ----------------Load current state------------- # + # ----------------------------------------------- # + resid_Y_prec = current_state$resid_Y_prec + F = current_state$F + Lambda = current_state$Lambda + E_a = current_state$E_a + W = current_state$W + Lambda_prec = current_state$Lambda_prec + delta = current_state$delta + tauh = current_state$tauh + E_a_prec = current_state$E_a_prec + W_prec = current_state$W_prec + Plam = current_state$Plam + F_h2 = current_state$F_h2 + F_a = current_state$F_a + B = current_state$B + start_i = current_state$nrun + k = ncol(F) + + # ----------------------------------------------- # + # -----------Reset Global Random Number Stream--- # + # ----------------------------------------------- # + do.call("RNGkind",as.list(BSFG_state$RNG$RNGkind)) ## must be first! + assign(".Random.seed", BSFG_state$RNG$Random.seed, .GlobalEnv) + + # ----------------------------------------------- # + # -----------Load pre-calcualted matrices-------- # + # ----------------------------------------------- # + Ainv = run_variables$Ainv + A_2_inv = run_variables$A_2_inv + invert_aI_bZAZ = run_variables$invert_aI_bZAZ + invert_aPXA_bDesignDesignT = run_variables$invert_aPXA_bDesignDesignT + invert_aZZt_Ainv = run_variables$invert_aZZt_Ainv + invert_aPXA_bDesignDesignT_rand2 = run_variables$invert_aPXA_bDesignDesignT_rand2 + + + # ----------------------------------------------- # + # ----------------Set up run--------------------- # + # ----------------------------------------------- # + # b0,b1: parameters controlling rate of adaptation of factor model size + # h2_divisions: number of discrete steps for each factor heritability parameter + # epsilon: truncation point for factor loadings during adaptation + b0 = run_parameters$b0 + b1 = run_parameters$b1 + h2_divisions = run_parameters$h2_divisions + epsilon = run_parameters$epsilon + prop = run_parameters$prop + save_freq = run_parameters$save_freq + burn = run_parameters$burn + thin = run_parameters$thin + draw_iter = run_parameters$draw_iter + simulation = run_parameters$simulation + + + # ----------------------------------------------- # + # ---Extend posterior matrices for new samples--- # + # ----------------------------------------------- # + # change posterior into a list (can the elements in dataframe be lists?) + sp = (start_i + n_samples - burn)/thin - ncol(Posterior$Lambda) + if(sp > 0){ + Posterior$Lambda = cbind(Posterior$Lambda,matrix(0,nr = nrow(Posterior$Lambda),nc = sp)) + Posterior$F = cbind(Posterior$F,matrix(0,nr = nrow(Posterior$F),nc = sp)) + Posterior$F_a = cbind(Posterior$F_a,matrix(0,nr = nrow(Posterior$F_a),nc = sp)) + Posterior$delta = cbind(Posterior$delta,matrix(0,nr = nrow(Posterior$delta),nc = sp)) + Posterior$F_h2 = cbind(Posterior$F_h2,matrix(0,nr = nrow(Posterior$F_h2),nc = sp)) + Posterior$resid_Y_prec = cbind(Posterior$resid_Y_prec,matrix(0,nr = nrow(Posterior$resid_Y_prec),nc = sp)) + Posterior$E_a_prec = cbind(Posterior$E_a_prec,matrix(0,nr = nrow(Posterior$E_a_prec),nc = sp)) + Posterior$W_prec = cbind(Posterior$W_prec,matrix(0,nr = nrow(Posterior$W_prec),nc = sp)) + } + + # ----------------------------------------------- # + # --------------start gibbs sampling------------- # + # ----------------------------------------------- # + + start_time = Sys.time() + for(i in start_i+(1:n_samples)){ + + # -----fill in missing phenotypes----- # + #conditioning on everything else + if(sum(Y_missing)>0) { + meanTraits = X %*% B + F %*% t(Lambda) + Z_1 %*% E_a + Z_2 %*% W + resids = matrix(rnorm(p*n,0,sqrt(1/resid_Y_prec)),nr = n,nc = p,byrow=T) + Y[Y_missing] = meanTraits[Y_missing] + resids[Y_missing] + } + # recover() + + # -----Sample Lambda------------------ # + #conditioning on W, B, F, marginalizing over E _a + Y_tilde = Y - X %*% B - Z_2 %*% W + Y_tilde = as.matrix(Y_tilde) + # Lambda = sample_Lambda( Y_tilde,F,resid_Y_prec, E_a_prec,Plam,invert_aI_bZAZ ) + + Lambda = sample_Lambda_comb( Y_tilde,F,resid_Y_prec, E_a_prec,Plam,invert_aI_bZAZ ) + + # -----Sample Lambda_prec------------- # + Lambda2 = Lambda^2 + Lambda_prec = matrix(rgamma(p*k,shape = (Lambda_df + 1)/2,rate = (Lambda_df + sweep(Lambda2,2,tauh,'*'))/2),nr = p,nc = k) + + # -----Sample delta, update tauh------ # + delta = sample_delta( delta,tauh,Lambda_prec,delta_1_shape,delta_1_rate,delta_2_shape,delta_2_rate,Lambda2 ) + tauh = cumprod(delta) + + # -----Update Plam-------------------- # + Plam = sweep(Lambda_prec,2,tauh,'*') + + # -----Sample resid_Y_prec ---------------- # + # should be able to sample this here marginalizing over E_a in the same way that E_a_prec is done below. + + # -----Sample E_a_prec---------------- # + #conditioning on W, B, F, marginalizing over E_a + # E_a_prec = sample_prec_discrete_conditional(Y - X %*% B-F %*% t(Lambda) - Z_2 %*% W,h2_divisions,h2_priors_resids,invert_aI_bZAZ,resid_Y_prec) + # E_a_prec = sample_prec_discrete_conditional_c(Y - X %*% B-F %*% t(Lambda) - Z_2 %*% W,h2_divisions,h2_priors_resids,invert_aI_bZAZ,resid_Y_prec) + + # pops should be a output from setup datasets and for pop in pops, the row number for + # each pop should also included in the setup dataset + # read pops (number of populations) + # read row number for each pop + # in the loop, replace Y as Y[pops,], etc (instead of using lists because we need to use all Y together to sample lambda) + + # -----Sample B and E_a--------------- # + #conditioning on W, F, Lambda + for(pop in pops){ + Y_tilde = Y[[pop]] - F[[pop]] %*% t(Lambda) - Z_2[[pop]] %*% W[[pop]] + Y_tilde = as.matrix(Y_tilde) + # location_sample = sample_means( Y_tilde, resid_Y_prec, E_a_prec, invert_aPXA_bDesignDesignT ) + location_sample = sample_means( Y_tilde, resid_Y_prec[[pop]], E_a_prec[[pop]], invert_aPXA_bDesignDesignT[[pop]] ) + B[[pop]] = location_sample[1:b,] + if(length(B[[pop]])==p){ + B[[pop]] = matrix(0,nr=0,nc=p) + } + E_a[[pop]] = location_sample[b+(1:r),] + + + # -----Sample W ---------------------- # + #conditioning on B, E_a, F, Lambda + if(ncol(Z_2) > 0) { + Y_tilde = Y[[pop]] - X[[pop]] %*% B[[pop]] - Z_1[[pop]] %*% E_a[[pop]] - F[[pop]] %*% t(Lambda) + Y_tilde = as.matrix(Y_tilde) + # location_sample = sample_means( Y_tilde, resid_Y_prec, W_prec, invert_aPXA_bDesignDesignT_rand2 ) + location_sample = sample_means( Y_tilde, resid_Y_prec[[pop]], W_prec[[pop]], invert_aPXA_bDesignDesignT_rand2[[pop]] ) + W[[pop]] = location_sample + } + + # -----Sample F_h2-------------------- # + #conditioning on F, marginalizing over F_a + # F_h2 = sample_h2s_discrete(F,h2_divisions,h2_priors_factors,invert_aI_bZAZ) + F_h2[[pop]] = sample_h2s_discrete(F[[pop]],h2_divisions,h2_priors_factors,invert_aI_bZAZ[[pop]]) + + # -----Sample F_a--------------------- # + #conditioning on F, F_h2 + # F_a = sample_F_a_c(F,Z_1,F_h2,invert_aZZt_Ainv) + F_a[[pop]] = sample_F_a(F[[pop]],Z_1[[pop]],F_h2[[pop]],invert_aZZt_Ainv[[pop]]); + + # -----Sample F----------------------- # + #conditioning on B,F_a,E_a,W,Lambda, F_h2 + Y_tilde = Y[[pop]] - X[[pop]] %*% B[[pop]] - Z_1[[pop]] %*% E_a[[pop]] - Z_2[[pop]] %*% W[[pop]] + Y_tilde = as.matrix(Y_tilde) + # F1 = sample_factors_scores( Y_tilde, Z_1,Lambda,resid_Y_prec,F_a,F_h2 ) + F[[pop]] = sample_factors_scores(Y_tilde, Z_1[[pop]],Lambda,resid_Y_prec[[pop]],F_a[[pop]],F_h2[[pop]] ) + + + # -----Sample resid_Y_prec------------ # + Y_tilde = Y[[pop]] - X[[pop]] %*% B[[pop]] - F[[pop]] %*% t(Lambda) - Z_1[[pop]] %*% E_a[[pop]] - Z_2[[pop]] %*% W[[pop]] + resid_Y_prec[[pop]] = rgamma(p,shape = resid_Y_prec_shape + n/2, rate = resid_Y_prec_rate+1/2*colSums(Y_tilde^2)) #model residual precision + # n is the sum of all populations + + # -----Sample E_a_prec---------------- # + # #random effect 1 (D) residual precision + E_a_prec[[pop]] = rgamma(p,shape = E_a_prec_shape + r/2, rate = E_a_prec_rate + 1/2*diag(t(E_a[[pop]]) %*% Ainv[[pop]] %*% E_a[[pop]])) + + # -----Sample W_prec------------------ # + if(ncol(Z_2) > 0) { + W_prec[[pop]] = rgamma(p, W_prec_shape + r2/2,rate = W_prec_rate + 1/2*diag(t(W[[pop]]) %*% A_2_inv[[pop]] %*% W[[pop]])) + } + } + # -- adapt number of factors to samples ---# + # adapt_result = update_k( F,Lambda,F_a,F_h2,Lambda_prec,Plam,delta,tauh,Z_1,Lambda_df,delta_2_shape,delta_2_rate,b0,b1,i,epsilon,prop ) + # F = adapt_result$F + # Lambda = adapt_result$Lambda + # F_a = adapt_result$F_a + # F_h2 = adapt_result$F_h2 + # Lambda_prec = adapt_result$Lambda_prec + # Plam = adapt_result$Plam + # delta = adapt_result$delta + # tauh = adapt_result$tauh + # k = ncol(F) + + # -- save sampled values (after thinning) -- # + if( (i-burn) %% thin == 0 && i > burn) { + + sp_num = (i-burn)/thin + + Posterior = save_posterior_samples_comb( sp_num,Posterior,Lambda,F,F_a,B,W,E_a,delta,F_h2,resid_Y_prec,E_a_prec,W_prec) + + # reorder elements in Lambda + Lambda_pos = Posterior$Lambda + Lam_rowmean = rowMeans(Lambda_pos) + Lambda_hat = matrix(Lam_rowmean,nr=p) + + + if(sp_num %% save_freq == 0) save(Posterior,file='Posterior.RData') + } + + + # -- provide run diagnostics and plots -- # + # if(i %% draw_iter == 0) { + # directory=strread(pwd,'#s','delimiter','/') + # disp(directory(end)) + # disp(i) + # elapsed=toc + # #output some running statistics on the current factors and their genetic variances + # # each row represents a factor: + # # delta[i], [i], F_h2[i], V_P(F_i), V_A(F_i) + # [delta,[1:size(F,2)]',F_h2,sum(F.^2)'./(size(F,1)-1),sum(F_a.^2)'./(size(F_a,1)-1)] + # disp(strcat('Time remaining:',num2str((nrun-i) * (elapsed/i) * 1/60))) + + #make some plots of some running statistics + + # draw_results_diagnostics(sp_num,run_parameters,run_variables,Lambda, F_h2, Posterior,E_a_prec,resid_Y_prec,traitnames) + # } + } + + end_time = Sys.time() + print(end_time - start_time) + # save(Posterior,file = 'Posterior.RData') + + + # ----------------------------------------------- # + # ------------Save state for restart------------- # + # ----------------------------------------------- # + + current_state[["resid_Y_prec"]] = resid_Y_prec + current_state[["Lambda_prec"]] = Lambda_prec + current_state[["delta"]] = delta + current_state[["tauh"]] = tauh + current_state[["Plam"]] = Plam + current_state[["Lambda"]] = Lambda + current_state[["F_h2"]] = F_h2 + current_state[["E_a_prec"]] = E_a_prec + current_state[["W_prec"]] = W_prec + current_state[["F_a"]] = F_a + current_state[["F"]] = F + current_state[["E_a"]] = E_a + current_state[["B"]] = B + current_state[["W"]] = W + current_state[["nrun"]] = i + + # save(current_state,file='current_state.RData') + + + BSFG_state$current_state = current_state + BSFG_state$Posterior = Posterior + BSFG_state$RNG = list( + Random.seed = .Random.seed, + RNGkind = RNGkind() + ) + + save(BSFG_state,file = 'BSFG_state.RData') + return(BSFG_state) +} \ No newline at end of file From 6c7a40472d9d8893bc85dd07be70ad4cf59e720b Mon Sep 17 00:00:00 2001 From: xin li Date: Mon, 23 Jan 2017 18:00:23 -0800 Subject: [PATCH 09/18] Adding function of initial sampler for combined datasets --- R_BSFG/fast_BSFG_sampler_init_comb.R | 370 +++++++++++++++++++++++++++ 1 file changed, 370 insertions(+) create mode 100644 R_BSFG/fast_BSFG_sampler_init_comb.R diff --git a/R_BSFG/fast_BSFG_sampler_init_comb.R b/R_BSFG/fast_BSFG_sampler_init_comb.R new file mode 100644 index 0000000..cb6f5fd --- /dev/null +++ b/R_BSFG/fast_BSFG_sampler_init_comb.R @@ -0,0 +1,370 @@ + +fast_BSFG_sampler_init = function(priors,run_parameters){ + # require(PEIP) + require(Matrix) + + # data_matrices,run_parameters,priors,current_state,Posterior,simulation = F) + # Preps everything for an analysis with the function MA_sampler. + # Loads data from setup.mat + # Decides if the data is from a simulation + # Uses prior to find initial values for all parameters + # Initializes Posterior struct + # Saves run parameters + # + # Input file setup.mat should contain: + # Y gene expression data data: n x p + # X fixed-effect design matrix (optional) + # Z_1 random effects 1 incidence matrix n x r1 + # Z_2 random effects 2 incidence matrix n x r2 (optional) + # A kinship matrix of lines: r1 x r1 + # optional: + # U_act + # gen_factor_Lambda + # error_factor_Lambda + # h2_act + # G + # R + # B + # factor_h2s + # name + + # ----------------------- # + # ------read data-------- # + # ----------------------- # + if(file.exists('../setup.RData')) { + load('../setup.RData') + } + # else{ + #require(R.matlab) + #setup = readMat('../setup.mat') + #for(i in 1:10) names(setup) = sub('.','_',names(setup),fixed=T) + #} + Y = setup$Y + U_act = setup$U_act + E_act = setup$E_act + B = setup$B_act #Is B = B_act? There is no use of B_act in this code. + Z_1 = setup$Z_1 + A = setup$A + X = setup$X + n = setup$n + r = setup$r + traitnames = setup$traitnames + + #Determine if 'setup.mat' contains output of a simulation, based on if + #known factor loadings are included. Affects plotting functions + simulation = F + if('gen_factor_Lambda' %in% names(setup)){ + simulation = T + print('simulated data') + run_parameters$setup = setup + } + run_parameters$simulation = simulation + + # Factors: + # initial number of factors + k = priors$k_init + resid_Y_prec_shape = priors$resid_Y_prec_shape + resid_Y_prec_rate = priors$resid_Y_prec_rate + E_a_prec_shape = priors$E_a_prec_shape + E_a_prec_rate = priors$E_a_prec_rate + W_prec_shape = priors$W_prec_shape + W_prec_rate = priors$W_prec_rate + p = ncol(Y[[1]]) + + # do a loop for each Y in the list + for (pop in pops){ + #normalize Y to have zero mean and unit variances among observed values, + #allowing for NaNs. + n = nrow(Y[[pop]]) + r = ncol(Z_1[[pop]]) + + Y_missing = is.na(Y[[pop]]) # matrix recording missing data in Y + Mean_Y = colMeans(Y[[pop]],na.rm=T) + VY = apply(Y[[pop]],2,var,na.rm=T) + # Don't remove the mean and standardize the variance if it's a + # simulation because this makes comparing to the simulated values more + # difficult. + # Do we want to do this for real data, or let the user do it? + if(simulation) { + VY = rep(1,p) + } else{ + Y[[pop]] = sweep(Y[[pop]],2,Mean_Y,'-') + Y[[pop]] = sweep(Y[[pop]],2,sqrt(VY),'/') + } + + + #determine if a design matrix (X) exists (is loaded from setup.mat). If + #not, make a dummy X-matrix with no columns. + if(is.null(X[[pop]])) { + X[[pop]]=matrix(0,nr = n,nc = 0) + B[[pop]]=matrix(0,nr = 0,nc = p) + } + if(nrow(X[[pop]]) == 1) X[[pop]] = t(X[[pop]]) # because I used to give X transposed + stopifnot(nrow(X[[pop]]) == n) + b = ncol(X[[pop]]) + + + #Determine if a second random effects design matrix exists. If not, make a + #dummy matrix + if(! 'Z_2' %in% ls()) { + Z_2[[pop]]=matrix(0,nr = n,nc = 0) + } + stopifnot(nrow(Z_2[[pop]]) == n) + r2 = ncol(Z_2[[pop]]) + + + + #fixed effect priors + + priors$b_X_prec[[pop]] = 1e-10*diag(1,b) + + # ----------------------------- # + # -----Initialize variables---- # + # ----------------------------- # + + # --- transcript-level model + # p-vector of probe residual precisions. + # Prior: Gamma distribution for each element + # shape = resid_Y_prec_shape + # rate = resid_Y_prec_rate + resid_Y_prec[[pop]] = rgamma(p,shape = resid_Y_prec_shape,rate = resid_Y_prec_rate) + + + # g-vector of specific precisions of genetic effects. + # Prior: Gamma distribution for each element + # shape = E_a_prec_shape + # rate = E_a_prec_rate + E_a_prec[[pop]] = rgamma(p,shape = E_a_prec_shape,rate = E_a_prec_rate) + + # Genetic effects not accounted for by factors. + # Prior: Normal distribution on each element. + # mean = 0 + # sd = 1./sqrt(E_a_prec)' on each row + E_a[[pop]] = matrix(rnorm(p*r,0,sqrt(1/E_a_prec[[pop]])),nr = r,nc = p, byrow = T) + + # Latent factor heritabilties. h2 can take h2_divisions values + # between 0 and 1. + # Prior: 0.5: h2=0, .05: h2 > 0. + F_h2[[pop]] = runif(k) + + # Genetic effects on the factor scores. + # Prior: Normal distribution for each element + # mean = 0 + # sd = sqrt(F_h2') for each row. + F_a[[pop]] = matrix(rnorm(k*r,0,sqrt(F_h2[[pop]])),nr = r,nc = k, byrow = T) + + # Full Factor scores. Combination of genetic and residual variation on + # each factor. + # Prior: Normal distribution for each element + # mean = Z_1 * F_a + # sd = sqrt(1-F_h2') for each row. + F[[pop]] = Z_1[[pop]] %*% F_a[[pop]] + matrix(rnorm(k*n,0,sqrt(1-F_h2[[pop]])),nr = n,nc = k, byrow = T) + + # g-vector of specific precisions of genetic effects. + # Prior: Gamma distribution for each element + # shape = E_a_prec_shape + # rate = E_a_prec_rate + W_prec[[pop]] = rgamma(p,shape = W_prec_shape,rate = W_prec_rate) + + # Genetic effects not accounted for by factors. + # Prior: Normal distribution on each element. + # mean = 0 + # sd = 1./sqrt(E_a_prec)' on each row + W[[pop]] = matrix(rnorm(p*r2,0,sqrt(1/W_prec[[pop]])),nr = r2, nc = p, byrow = T) + + # Fixed effect coefficients. + # Prior: Normal distribution for each element + # mean = 0 + # sd = sqrt(1/fixed_effects_prec) + B[[pop]] = matrix(rnorm(b*p),nr = b, nc = p) + + # ------------------------------------ # + # ----Precalculate some matrices------ # + # ------------------------------------ # + + # recover() + #invert the random effect covariance matrices + Ainv[[pop]] = solve(A[[pop]]) + A_2_inv[[pop]] = diag(1,r2) #Z_2 random effects are assumed to have covariance proportional to the identity. Can be modified. + + #pre-calculate transformation parameters to diagonalize aI + bZAZ for fast + #inversion: inv(aI + bZAZ) = 1/b*U*diag(1./(s+a/b))*U' + #uses singular value decomposition of ZAZ for stability when ZAZ is low + #rank + # XZ = [X_f Z_1] + # [U,S,~] = svd(XZ*blkdiag(1e6*eye(b_f),A)*XZ') + # recover() + # r = rgamma(nrow(Z_1),1,1) + # result = GSVD_2_c(cholcov(diag(r)),cholcov(Z_1 %*% A %*% t(Z_1))) + # r2 = rgamma(nrow(Z_1),1,1) + # result2 = GSVD_2_c(cholcov(diag(r2)),cholcov(Z_1 %*% A %*% t(Z_1))) + + result = svd(Z_1[[pop]] %*% A[[pop]] %*% t(Z_1[[pop]])) + invert_aI_bZAZ[[pop]] = list( + U = result$u, + s = result$d + ) + + #fixed effects + random effects 1 + #diagonalize mixed model equations for fast inversion: + #inv(a*bdiag(priors$b_X_prec,Ainv) + b*t(cbind(X,Z_1)) %*% cbind(X,Z_1)) = U %*% diag(1/(a*s1+b*s2)) %*% t(U) + Design= cbind(X[[pop]],Z_1[[pop]]) + Design2 = t(Design) %*% Design + result = GSVD_2_c(cholcov(bdiag(priors$b_X_prec,Ainv[[pop]])),cholcov(Design2)) + invert_aPXA_bDesignDesignT[[pop]] = list( + U = t(solve(result$X)), + s1 = diag(result$C)^2, + s2 = diag(result$S)^2 + ) + invert_aPXA_bDesignDesignT[[pop]]$Design_U = Design %*% invert_aPXA_bDesignDesignT[[pop]]$U + + #random effects 2 + #diagonalize mixed model equations for fast inversion: + #inv(a*A_2_inv + b*Z_2'Z_2]) = U*diag(1./(a.*s1+b.*s2))*U' + if(r2 > 0) { + Design = Z_2[[pop]] + Design2 = t(Design) %*% Design + result = GSVD_2_c(cholcov(A_2_inv[[pop]]),cholcov(Design2)) + invert_aPXA_bDesignDesignT_rand2[[pop]] = list( + U = t(solve(result$X)), + s1 = diag(result$C)^2, + s2 = diag(result$S)^2 + ) + invert_aPXA_bDesignDesignT_rand2[[pop]]$Design_U = Design %*% invert_aPXA_bDesignDesignT_rand2[[pop]]$U + } else{ + invert_aPXA_bDesignDesignT_rand2[[pop]] = list() + } + + #genetic effect variances of factor traits + # diagonalizing a*Z_1'*Z_1 + b*Ainv for fast inversion + #diagonalize mixed model equations for fast inversion: + # inv(a*Z_1'*Z_1 + b*Ainv) = U*diag(1./(a.*s1+b.*s2))*U' + #similar to fixed effects + random effects 1 above, but no fixed effects. + ZZt = t(Z_1[[pop]]) %*% Z_1[[pop]] + result = GSVD_2_c(cholcov(ZZt),cholcov(Ainv[[pop]])) + invert_aZZt_Ainv = list( + U = t(solve(result$X)), + s1 = diag(result$C)^2, + s2 = diag(result$S)^2 + ) + # the end of loop for pops + } + + + data_matrices = list( + Y = Y, + Z_1 = Z_1, + Z_2 = Z_2, + X = X, + Y_missing = Y_missing + ) + + # ----------------------------- # + # -----Initialize lambda ---- # + # ----------------------------- # + # Factor loading precisions (except column penalty tauh). + # Prior: Gamma distribution for each element. + # shape = Lambda_df/2 + # rate = Lambda_df/2 + # Marginilizes to t-distribution with Lambda_df degrees of freedom + # on each factor loading, conditional on tauh + Lambda_df = priors$Lambda_df + Lambda_prec = matrix(rgamma(p*k,shape = Lambda_df/2,rate = Lambda_df/2),nr = p,nc = k) + + # Factor penalty. tauh(h) = \prod_{i=1}^h \delta_i + # Prior: Gamma distribution for each element of delta + # delta_1: + # shape = delta_1_shape + # rate = delta_1_rate + # delta_2 ... delta_m: + # shape = delta_2_shape + # rate = delta_2_rate + delta_1_shape = priors$delta_1_shape + delta_1_rate = priors$delta_1_rate + delta_2_shape = priors$delta_2_shape + delta_2_rate = priors$delta_2_rate + delta = c(rgamma(1,shape = delta_1_shape,rate = delta_1_rate),rgamma(k-1,shape = delta_2_shape,rate = delta_2_rate)) + tauh = cumprod(delta) + + # Total Factor loading precisions Lambda_prec * tauh + Plam = sweep(Lambda_prec,2,tauh,'*') + + # Lambda - factor loadings + # Prior: Normal distribution for each element. + # mu = 0 + # sd = sqrt(1/Plam) + Lambda = matrix(rnorm(p*k,0,sqrt(1/Plam)),nr = p,nc = k) + + # ----------------------- # + # -Initialize Posterior-- # + # ----------------------- # + Posterior = list( + Lambda = matrix(0,nr=0,nc=0), + F_a = matrix(0,nr=0,nc=0), + F = matrix(0,nr=0,nc=0), + delta = matrix(0,nr=0,nc=0), + F_h2 = matrix(0,nr=0,nc=0), + resid_Y_prec = matrix(0,nr = p,nc = 0), + E_a_prec = matrix(0,nr = p,nc = 0), + W_prec = matrix(0,nr = p,nc = 0), + B = matrix(0,nr = b,nc = p), + W = matrix(0,nr = r2,nc = p), + E_a = matrix(0,nr = r,nc = p) + ) + # ----------------------- # + # ---Save initial values- # + # ----------------------- # + current_state = list( + resid_Y_prec = resid_Y_prec, + Lambda_prec = Lambda_prec, + delta = delta, + tauh = tauh, + Plam = Plam, + Lambda = Lambda, + F_h2 = F_h2, + E_a_prec = E_a_prec, + W_prec = W_prec, + F_a = F_a, + F = F, + E_a = E_a, + B = B, + W = W, + nrun = 0 + ) + # ----------------------------- # + # ----Save run parameters------ # + # ----------------------------- # + + run_variables = list( + p = p, + n = n, + r = r, + r2 = r2, + b = b, + Mean_Y = Mean_Y, + VY = VY, + Ainv = Ainv, + A_2_inv = A_2_inv, + invert_aI_bZAZ = invert_aI_bZAZ, + invert_aPXA_bDesignDesignT = invert_aPXA_bDesignDesignT, + invert_aZZt_Ainv = invert_aZZt_Ainv, + invert_aPXA_bDesignDesignT_rand2 = invert_aPXA_bDesignDesignT_rand2 + ) + + RNG = list( + Random.seed = .Random.seed, + RNGkind = RNGkind() + ) + + return(list( + data_matrices = data_matrices, + run_parameters = run_parameters, + run_variables = run_variables, + priors = priors, + current_state = current_state, + Posterior = Posterior, + simulation = simulation, + RNG = RNG, + traitnames = traitnames + )) +} From ff98738124aac8e442bb468c72d42288f883e508 Mon Sep 17 00:00:00 2001 From: xin li Date: Fri, 3 Feb 2017 20:43:20 -0800 Subject: [PATCH 10/18] Simulation data based on pedigree in pengjuan's datasets --- R_BSFG/data_sim.R | 128 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 128 insertions(+) create mode 100644 R_BSFG/data_sim.R diff --git a/R_BSFG/data_sim.R b/R_BSFG/data_sim.R new file mode 100644 index 0000000..0edca7a --- /dev/null +++ b/R_BSFG/data_sim.R @@ -0,0 +1,128 @@ +library(pedantics) +#library(R.matlab) +# define E_a_prec; resid_Y_prec; name +# get A + + +prepare_simulation2 = function(A_chol,A,Lambda,factor_h2s,E_a_prec,resid_Y_prec,name){ + n = dim(A_chol)[1] #A_chol is Cholesky decomposition of A + p = nrow(Lambda) + + r = n + # n = 2*n + + k = ncol(Lambda) + F_a = t(A_chol) %*% sweep(matrix(rnorm(n*k),n),2,sqrt(factor_h2s),'*') # make sure MN(0,A,sigma) + F_r = sweep(matrix(rnorm(n*k),n),2,sqrt(1-factor_h2s),'*') # factor_h2s is the variance of factor + F = F_a + F_r + + E_a_act = t(A_chol) %*% sweep(matrix(rnorm(n*p),n),2,sqrt(1/E_a_prec),'*') # + E_r_act = sweep(matrix(rnorm(n*p),n),2,sqrt(1/resid_Y_prec),'*') + + Y = F %*% t(Lambda) + E_a_act + E_r_act + + X = matrix(1,nrow=1,ncol=n) + B = rep(0,p) + Z_1 = diag(r)[,rep(1:r,n/r)] + Va = colSums(Lambda^2) * factor_h2s + 1/E_a_prec #diagonal elements in G + Ve = colSums(Lambda^2) * (1-factor_h2s) + 1/resid_Y_prec #diagonal elements in R + h2 = Va/(Va + Ve) # heribility + G = Lambda %*% diag(factor_h2s) %*% t(Lambda) + diag(1/E_a_prec) + R = Lambda %*% diag(1-factor_h2s) %*% t(Lambda) + diag(1/resid_Y_prec) + # h2 = diag(G)/(diag(G)+diag(R)) + + # Z_2 = rbind(rep(c(1,0,0,0),n/4),rep(c(0,1,0,0),n/4),rep(c(0,0,1,0),n/4),rep(c(0,0,0,1),n/4)) + + # plot(h2,type="l") + + print(name) + setup = list(Y=Y,F_act=F,E_a_act=E_a_act,F_a_act = F_a,Z_1=Z_1,A=A,X=X,n=n,r=r,p=p,G=G,R=R, + gen_factor_Lambda = Lambda,error_factor_Lambda = Lambda,h2=h2,B=B, factor_h2s=factor_h2s,name=name) + + return(setup) + #do.call(writeMat,c("setup.mat",setup)) + #save(pedigree,file="pedigree.Robj") +} + + +#================================================================== + + + +counter = 0 + +#sample_size simulations +num_reps = 10 +p = 100 +k=10 + + +#the same lambda +Lambda = matrix(0,p,k) +numeff = sample((p/30):(p/4),k,replace=T) +for(h in 1:k){ # even though numeff[h] is not a whole number, by applying sample, it only use the int part. It doesn't round off. + Lambda[sample(1:p,numeff[h]),h] = rnorm(numeff[h]) +} +Lambda = Lambda[,order(-diag(t(Lambda) %*% Lambda))] +cols=1:k +E_a_prec = rep(1/0.2,p) +resid_Y_prec = rep(1/0.2,p) +# pedigree from pengjuan's dataset to test if diff populations are indep +setwd("~/Runcie Lab/pedigree") +data=read.csv('IDDammSirelnUpdate(1).csv',header = TRUE) +source("~/Runcie Lab/SparseFactorMixedModel_v2/R_BSFG/setup_pedigree.R") +# find way to change this according to the size(different pops) +pops = c("LConG1","LConG2","LConG3") +ori.setup = list() +ori.setup[[pops[1]]] = setup_pedigree(data,GenerationCode=3,LineCode=c(0,1),fixedfactor=NA) # no fixed factor because we seperate the dataset based on generation +ori.setup[[pops[2]]] = setup_pedigree(data,GenerationCode=2,LineCode=1,fixedfactor=NA) +ori.setup[[pops[3]]] = setup_pedigree(data,GenerationCode=1,LineCode=1,fixedfactor=NA) + +setwd("~/Runcie Lab/SparseFactorMixedModel_v2/R_BSFG/Simulation") + #sample_size = list(Small = c(50,5), Medium = c(100,10), Large = c(500,10)) +# set seed for repeatability +set.seed(1) +for(rep in 1:num_reps){ + # number of individuals that have the same mother: + # c(50,5) in this case means that for 50 fathers, each of them have 5 children. There are total 250 individuals. + # and for each individual, it has distinct mother + setup = list() + for(pop in pops){ + # nM = sample_size[[size]][1] + # nI = sample_size[[size]][2] + # pedigree = data.frame(ind=nM*nI + nM + 1:(nM*nI),dam=1:(nM*nI) + nM, sire = gl(nM,nI)) + # pedigree<-fixPedigree(pedigree) #A + # children = !is.na(pedigree[,3]) + # + # #generate A matrix as 2* kinship matrix from whole pedigree + # A = 2*kinship(pedigree[,1], pedigree[,2], pedigree[,3])[children,children] + + # use pedigree from pengjuan's dataset + A = ori.setup[[pop]]$A + A_chol = chol(A) + + # this should be different for each pops + factor_h2s = runif(k) # k numbers in [0-1] + # order by descending order + factor_h2s = factor_h2s[order(factor_h2s,decreasing = TRUE)] + factor_h2s[seq(1,k,by=2)]=0 # make 5 of them to be zero + + g_cols = factor_h2s>0 + Lambda =Lambda[do.call("order", unname(split(-abs(Lambda[,cols[factor_h2s>0]]), col(Lambda[,cols[factor_h2s>0]])))),] + # reordering lambda + + name = paste(pop,rep,sep='_') + setup[[pop]]= prepare_simulation2(A_chol,A,Lambda,factor_h2s,E_a_prec,resid_Y_prec,name) + + } + counter = counter+1 + dir = paste('Sim',counter,sep="_") + try(dir.create(dir)) + setwd(dir) + save(setup,file = 'setup.RData') + setwd('..') + +} + + + From a743cd821e3f32e7689414547ded522e994b3ac6 Mon Sep 17 00:00:00 2001 From: xin li Date: Fri, 3 Mar 2017 21:55:46 -0800 Subject: [PATCH 11/18] Generate initiating and sampling functions for BSFG model with several populations in the same time. --- R_BSFG/BSFG_functions.R | 76 ++++++++++++++++--- R_BSFG/fast_BSFG_sampler_comb.R | 98 +++++++++++++----------- R_BSFG/fast_BSFG_sampler_init_comb.R | 109 +++++++++++++++------------ 3 files changed, 180 insertions(+), 103 deletions(-) diff --git a/R_BSFG/BSFG_functions.R b/R_BSFG/BSFG_functions.R index 921255a..52c086b 100644 --- a/R_BSFG/BSFG_functions.R +++ b/R_BSFG/BSFG_functions.R @@ -45,11 +45,9 @@ sample_Lambda = function( Y_tilde,F,resid_Y_prec, E_a_prec,Plam,invert_aI_bZAZ ) for(j in 1:p) { means = c() Qlam = c() - for(pop in pops){ - FUDi = E_a_prec[j] * sweep(FtU,2,(s + E_a_prec[j]/resid_Y_prec[j]),'/') - means = means + FUDi %*% UtY[,j] - Qlam = Qlam + FUDi %*% t(FtU) - } + FUDi = E_a_prec[j] * sweep(FtU,2,(s + E_a_prec[j]/resid_Y_prec[j]),'/') + means = means + FUDi %*% UtY[,j] + Qlam = Qlam + FUDi %*% t(FtU) Qlam = Qlam + diag(Plam[j,]) # recover() @@ -65,6 +63,58 @@ sample_Lambda = function( Y_tilde,F,resid_Y_prec, E_a_prec,Plam,invert_aI_bZAZ ) return(Lambda) } +sample_Lambda_comb = function( Y,X,B,Z_2,W,F,resid_Y_prec, E_a_prec,Plam,invert_aI_bZAZ ) { + + #Sample factor loadings Lambda while marginalizing over residual + #genetic effects: Y - Z_2W = F*Lambda' + E, vec(E)~N(0,kron(Psi_E,In) + kron(Psi_U, ZAZ^T)) + #note: conditioning on F, but marginalizing over E_a. + #sampling is done separately by trait because each column of Lambda is + #independent in the conditional posterior + #note: invert_aI_bZAZ has parameters that diagonalize aI + bZAZ for fast + #inversion: inv(aI + bZAZ) = 1/b*U*diag(1./(s+a/b))*U' + + p = length(resid_Y_prec[[1]]) + k = ncol(F[[1]]) + + Zlams = matrix(rnorm(k*p),nr = k, nc = p) + Lambda = matrix(0,nr = p,nc = k) + + + for(j in 1:p) { + means = matrix(0,nr=p,nc=1) + Qlam = matrix(0,p,p) + # Is this part correct???????????????????? + Y_tilde = list(); + for(pop in pops){ + #conditioning on W, B, F, marginalizing over E _a + Y_tilde[[pop]] = Y[[pop]] - X[[pop]] %*% B[[pop]] - Z_2[[pop]] %*% W[[pop]] + Y_tilde[[pop]] = as.matrix(Y_tilde[[pop]]) + + U = invert_aI_bZAZ[[pop]]$U + s = invert_aI_bZAZ[[pop]]$s + + FtU = t(F[[pop]]) %*% U + UtY = t(U) %*% Y_tilde[[pop]] + + FUDi = E_a_prec[[pop]][j] * sweep(FtU,2,(s + E_a_prec[[pop]][j]/resid_Y_prec[[pop]][j]),'/') + means = means + FUDi %*% UtY[,j] + Qlam = Qlam + FUDi %*% t(FtU) + #recover() + } + Qlam = Qlam + diag(Plam[j,]) + + #recover() + Llam = t(chol(Qlam)) + vlam = forwardsolve(Llam,means) + mlam = backsolve(t(Llam),vlam) + ylam = backsolve(t(Llam),Zlams[,j]) + + Lambda[j,] = ylam + mlam + + } + + return(Lambda) +} sample_prec_discrete_conditional = function(Y,h2_divisions,h2_priors,invert_aI_bZAZ,res_prec) { #sample factor heritibilties conditional on a given residual precision @@ -439,13 +489,15 @@ save_posterior_samples_comb = function( sp_num,Posterior,Lambda,F,F_a,B,W,E_a,de sp = ncol(Posterior$Lambda) #size(Posterior$Lambda,2) #save factor samples - # #if(length(Lambda) > nrow(Posterior[["Lambda"]])){ this will not happen since we stop updatek - # # expand factor sample matrix if necessary - # Posterior[["F"]] = rbind(Posterior[["F"]], matrix(0,nr = length(F) -nrow(Posterior[["F"]]), nc = sp)) - # Posterior[["F_a"]] = rbind(Posterior[["F_a"]], matrix(0,nr = length(F_a) -nrow(Posterior[["F_a"]]), nc = sp)) - # Posterior[["delta"]] = rbind(Posterior[["delta"]], matrix(0,nr = length(delta) -nrow(Posterior[["delta"]]), nc = sp)) - # Posterior[["F_h2"]] = rbind(Posterior[["F_h2"]], matrix(0,nr = length(F_h2) -nrow(Posterior[["F_h2"]]), nc = sp)) - # } + # change the if condition to be if sp_num == 1. So we don't need to run the update k part and we can also assign new value to out posterior variables. + if(sp_num==1){ + # expand factor sample matrix if necessary + Posterior$Lambda = rbind(Posterior$Lambda, matrix(0,nr = length(Lambda)-nrow(Posterior$Lambda),nc = sp)) + Posterior[["F"]] = rbind(Posterior[["F"]], matrix(0,nr = length(F) -nrow(Posterior[["F"]]), nc = sp)) + Posterior[["F_a"]] = rbind(Posterior[["F_a"]], matrix(0,nr = length(F_a) -nrow(Posterior[["F_a"]]), nc = sp)) + Posterior[["delta"]] = rbind(Posterior[["delta"]], matrix(0,nr = length(delta) -nrow(Posterior[["delta"]]), nc = sp)) + Posterior[["F_h2"]] = rbind(Posterior[["F_h2"]], matrix(0,nr = length(F_h2) -nrow(Posterior[["F_h2"]]), nc = sp)) + } Posterior$Lambda[1:length(Lambda),sp_num] = c(Lambda) Posterior$F[1:length(F),sp_num] = c(F) Posterior$F_a[1:length(F_a),sp_num] = c(F_a) diff --git a/R_BSFG/fast_BSFG_sampler_comb.R b/R_BSFG/fast_BSFG_sampler_comb.R index 2e6913e..1777c25 100644 --- a/R_BSFG/fast_BSFG_sampler_comb.R +++ b/R_BSFG/fast_BSFG_sampler_comb.R @@ -115,7 +115,7 @@ fast_BSFG_sampler_comb = function(BSFG_state,n_samples) { F_a = current_state$F_a B = current_state$B start_i = current_state$nrun - k = ncol(F) + k = ncol(F[[1]]) # ----------------------------------------------- # # -----------Reset Global Random Number Stream--- # @@ -184,39 +184,6 @@ fast_BSFG_sampler_comb = function(BSFG_state,n_samples) { } # recover() - # -----Sample Lambda------------------ # - #conditioning on W, B, F, marginalizing over E _a - Y_tilde = Y - X %*% B - Z_2 %*% W - Y_tilde = as.matrix(Y_tilde) - # Lambda = sample_Lambda( Y_tilde,F,resid_Y_prec, E_a_prec,Plam,invert_aI_bZAZ ) - - Lambda = sample_Lambda_comb( Y_tilde,F,resid_Y_prec, E_a_prec,Plam,invert_aI_bZAZ ) - - # -----Sample Lambda_prec------------- # - Lambda2 = Lambda^2 - Lambda_prec = matrix(rgamma(p*k,shape = (Lambda_df + 1)/2,rate = (Lambda_df + sweep(Lambda2,2,tauh,'*'))/2),nr = p,nc = k) - - # -----Sample delta, update tauh------ # - delta = sample_delta( delta,tauh,Lambda_prec,delta_1_shape,delta_1_rate,delta_2_shape,delta_2_rate,Lambda2 ) - tauh = cumprod(delta) - - # -----Update Plam-------------------- # - Plam = sweep(Lambda_prec,2,tauh,'*') - - # -----Sample resid_Y_prec ---------------- # - # should be able to sample this here marginalizing over E_a in the same way that E_a_prec is done below. - - # -----Sample E_a_prec---------------- # - #conditioning on W, B, F, marginalizing over E_a - # E_a_prec = sample_prec_discrete_conditional(Y - X %*% B-F %*% t(Lambda) - Z_2 %*% W,h2_divisions,h2_priors_resids,invert_aI_bZAZ,resid_Y_prec) - # E_a_prec = sample_prec_discrete_conditional_c(Y - X %*% B-F %*% t(Lambda) - Z_2 %*% W,h2_divisions,h2_priors_resids,invert_aI_bZAZ,resid_Y_prec) - - # pops should be a output from setup datasets and for pop in pops, the row number for - # each pop should also included in the setup dataset - # read pops (number of populations) - # read row number for each pop - # in the loop, replace Y as Y[pops,], etc (instead of using lists because we need to use all Y together to sample lambda) - # -----Sample B and E_a--------------- # #conditioning on W, F, Lambda for(pop in pops){ @@ -224,16 +191,12 @@ fast_BSFG_sampler_comb = function(BSFG_state,n_samples) { Y_tilde = as.matrix(Y_tilde) # location_sample = sample_means( Y_tilde, resid_Y_prec, E_a_prec, invert_aPXA_bDesignDesignT ) location_sample = sample_means( Y_tilde, resid_Y_prec[[pop]], E_a_prec[[pop]], invert_aPXA_bDesignDesignT[[pop]] ) - B[[pop]] = location_sample[1:b,] - if(length(B[[pop]])==p){ - B[[pop]] = matrix(0,nr=0,nc=p) - } - E_a[[pop]] = location_sample[b+(1:r),] - + B[[pop]] = location_sample[1:b[[pop]],,drop=FALSE] + E_a[[pop]] = location_sample[b[[pop]]+(1:r[[pop]]),] # -----Sample W ---------------------- # #conditioning on B, E_a, F, Lambda - if(ncol(Z_2) > 0) { + if(ncol(Z_2[[pop]]) > 0) { Y_tilde = Y[[pop]] - X[[pop]] %*% B[[pop]] - Z_1[[pop]] %*% E_a[[pop]] - F[[pop]] %*% t(Lambda) Y_tilde = as.matrix(Y_tilde) # location_sample = sample_means( Y_tilde, resid_Y_prec, W_prec, invert_aPXA_bDesignDesignT_rand2 ) @@ -261,18 +224,65 @@ fast_BSFG_sampler_comb = function(BSFG_state,n_samples) { # -----Sample resid_Y_prec------------ # Y_tilde = Y[[pop]] - X[[pop]] %*% B[[pop]] - F[[pop]] %*% t(Lambda) - Z_1[[pop]] %*% E_a[[pop]] - Z_2[[pop]] %*% W[[pop]] - resid_Y_prec[[pop]] = rgamma(p,shape = resid_Y_prec_shape + n/2, rate = resid_Y_prec_rate+1/2*colSums(Y_tilde^2)) #model residual precision + resid_Y_prec[[pop]] = rgamma(p,shape = resid_Y_prec_shape + n[[pop]]/2, rate = resid_Y_prec_rate+1/2*colSums(Y_tilde^2)) #model residual precision # n is the sum of all populations # -----Sample E_a_prec---------------- # # #random effect 1 (D) residual precision - E_a_prec[[pop]] = rgamma(p,shape = E_a_prec_shape + r/2, rate = E_a_prec_rate + 1/2*diag(t(E_a[[pop]]) %*% Ainv[[pop]] %*% E_a[[pop]])) + E_a_prec[[pop]] = rgamma(p,shape = E_a_prec_shape + r[[pop]]/2, rate = E_a_prec_rate + 1/2*diag(t(E_a[[pop]]) %*% Ainv[[pop]] %*% E_a[[pop]])) # -----Sample W_prec------------------ # - if(ncol(Z_2) > 0) { + if(ncol(Z_2[[pop]]) > 0) { W_prec[[pop]] = rgamma(p, W_prec_shape + r2/2,rate = W_prec_rate + 1/2*diag(t(W[[pop]]) %*% A_2_inv[[pop]] %*% W[[pop]])) } } + + # -----Sample Lambda------------------ # + # create a new set of variables for sampling Lambda + #Y_lab = do.call(rbind,Y) + # Combine X as block diagonal matrix + #library(Matrix) + #X_lab = Reduce(bdiag,X) + #B_lab = do.call(rbind,B) + + #Z_2_lab = do.call(rbind,Z_2) + #W_lab = Reduce("+",W)/length(W) + #F_lab = do.call(rbind,F) + #resid_Y_prec_lab = Reduce("+",resid_Y_prec)/length(resid_Y_prec) + #E_a_prec_lab = Reduce("+",E_a_prec)/length(E_a_prec) + + #conditioning on W, B, F, marginalizing over E _a + #Y_tilde = Y - X %*% B - Z_2 %*% W + #Y_tilde = as.matrix(Y_tilde) + # Lambda = sample_Lambda( Y_tilde,F,resid_Y_prec, E_a_prec,Plam,invert_aI_bZAZ ) + + Lambda = sample_Lambda_comb( Y,X,B,Z_2,W,F,resid_Y_prec, E_a_prec,Plam,invert_aI_bZAZ ) + + # -----Sample Lambda_prec------------- # + Lambda2 = Lambda^2 + Lambda_prec = matrix(rgamma(p*k,shape = (Lambda_df + 1)/2,rate = (Lambda_df + sweep(Lambda2,2,tauh,'*'))/2),nr = p,nc = k) + + # -----Sample delta, update tauh------ # + delta = sample_delta( delta,tauh,Lambda_prec,delta_1_shape,delta_1_rate,delta_2_shape,delta_2_rate,Lambda2 ) + tauh = cumprod(delta) + + # -----Update Plam-------------------- # + Plam = sweep(Lambda_prec,2,tauh,'*') + + # -----Sample resid_Y_prec ---------------- # + # should be able to sample this here marginalizing over E_a in the same way that E_a_prec is done below. + + # -----Sample E_a_prec---------------- # + #conditioning on W, B, F, marginalizing over E_a + # E_a_prec = sample_prec_discrete_conditional(Y - X %*% B-F %*% t(Lambda) - Z_2 %*% W,h2_divisions,h2_priors_resids,invert_aI_bZAZ,resid_Y_prec) + # E_a_prec = sample_prec_discrete_conditional_c(Y - X %*% B-F %*% t(Lambda) - Z_2 %*% W,h2_divisions,h2_priors_resids,invert_aI_bZAZ,resid_Y_prec) + + # pops should be a output from setup datasets and for pop in pops, the row number for + # each pop should also included in the setup dataset + # read pops (number of populations) + # read row number for each pop + # in the loop, replace Y as Y[pops,], etc (instead of using lists because we need to use all Y together to sample lambda) + # -- adapt number of factors to samples ---# # adapt_result = update_k( F,Lambda,F_a,F_h2,Lambda_prec,Plam,delta,tauh,Z_1,Lambda_df,delta_2_shape,delta_2_rate,b0,b1,i,epsilon,prop ) # F = adapt_result$F diff --git a/R_BSFG/fast_BSFG_sampler_init_comb.R b/R_BSFG/fast_BSFG_sampler_init_comb.R index cb6f5fd..8b75d9e 100644 --- a/R_BSFG/fast_BSFG_sampler_init_comb.R +++ b/R_BSFG/fast_BSFG_sampler_init_comb.R @@ -1,5 +1,4 @@ - -fast_BSFG_sampler_init = function(priors,run_parameters){ +fast_BSFG_sampler_init_comb = function(priors,run_parameters){ # require(PEIP) require(Matrix) @@ -34,24 +33,9 @@ fast_BSFG_sampler_init = function(priors,run_parameters){ if(file.exists('../setup.RData')) { load('../setup.RData') } - # else{ - #require(R.matlab) - #setup = readMat('../setup.mat') - #for(i in 1:10) names(setup) = sub('.','_',names(setup),fixed=T) - #} - Y = setup$Y - U_act = setup$U_act - E_act = setup$E_act - B = setup$B_act #Is B = B_act? There is no use of B_act in this code. - Z_1 = setup$Z_1 - A = setup$A - X = setup$X - n = setup$n - r = setup$r - traitnames = setup$traitnames - #Determine if 'setup.mat' contains output of a simulation, based on if - #known factor loadings are included. Affects plotting functions + #k = priors$k_init + simulation = F if('gen_factor_Lambda' %in% names(setup)){ simulation = T @@ -60,23 +44,47 @@ fast_BSFG_sampler_init = function(priors,run_parameters){ } run_parameters$simulation = simulation + pops = c("LConG1","LConG2","LConG3") + # do a loop for each Y in the list + Y =list();U_act=list();E_act=list();B=list();Z_1=list();A=list() + X=list();n=list();r=list();r2=list();b=list();traitnames=list() + Z_2=list();priors$b_X_prec = list();resid_Y_prec=list() + E_a_prec=list();E_a=list();F_h2=list();F_a=list();F=list();W_prec=list();W=list() + Ainv=list();A_2_inv=list();invert_aI_bZAZ=list();invert_aZZt_Ainv=list();invert_aPXA_bDesignDesignT=list() + invert_aPXA_bDesignDesignT_rand2=list() + + for (pop in pops){ + Y[[pop]] = setup[[pop]]$Y + U_act[[pop]] = setup[[pop]]$U_act + E_act[[pop]] = setup[[pop]]$E_act + B[[pop]] = setup[[pop]]$B_act #Is B = B_act? There is no use of B_act in this code. + Z_1[[pop]] = setup[[pop]]$Z_1 + A[[pop]] = setup[[pop]]$A + X[[pop]] = setup[[pop]]$X + n[[pop]] = setup[[pop]]$n + r[[pop]] = setup[[pop]]$r + traitnames[[pop]] = setup[[pop]]$traitnames + + + #Determine if 'setup.mat' contains output of a simulation, based on if + #known factor loadings are included. Affects plotting functions + # Factors: # initial number of factors - k = priors$k_init + resid_Y_prec_shape = priors$resid_Y_prec_shape resid_Y_prec_rate = priors$resid_Y_prec_rate E_a_prec_shape = priors$E_a_prec_shape E_a_prec_rate = priors$E_a_prec_rate W_prec_shape = priors$W_prec_shape W_prec_rate = priors$W_prec_rate - p = ncol(Y[[1]]) - - # do a loop for each Y in the list - for (pop in pops){ + p = ncol(Y[[1]]) + k = p + #normalize Y to have zero mean and unit variances among observed values, #allowing for NaNs. - n = nrow(Y[[pop]]) - r = ncol(Z_1[[pop]]) + n[[pop]] = nrow(Y[[pop]]) + r[[pop]] = ncol(Z_1[[pop]]) Y_missing = is.na(Y[[pop]]) # matrix recording missing data in Y Mean_Y = colMeans(Y[[pop]],na.rm=T) @@ -96,27 +104,30 @@ fast_BSFG_sampler_init = function(priors,run_parameters){ #determine if a design matrix (X) exists (is loaded from setup.mat). If #not, make a dummy X-matrix with no columns. if(is.null(X[[pop]])) { - X[[pop]]=matrix(0,nr = n,nc = 0) + X[[pop]]=matrix(0,nr = n[[pop]],nc = 0) B[[pop]]=matrix(0,nr = 0,nc = p) } if(nrow(X[[pop]]) == 1) X[[pop]] = t(X[[pop]]) # because I used to give X transposed - stopifnot(nrow(X[[pop]]) == n) - b = ncol(X[[pop]]) + stopifnot(nrow(X[[pop]]) == n[[pop]]) + b[[pop]] = ncol(X[[pop]]) #Determine if a second random effects design matrix exists. If not, make a #dummy matrix - if(! 'Z_2' %in% ls()) { - Z_2[[pop]]=matrix(0,nr = n,nc = 0) - } - stopifnot(nrow(Z_2[[pop]]) == n) - r2 = ncol(Z_2[[pop]]) + # if(! 'Z_2' %in% ls()) { + # Z_2[[pop]]=matrix(0,nr = n,nc = 0) + # } + if(is.null(Z_2[[pop]])){ + Z_2[[pop]]=matrix(0,nr = n[[pop]],nc = 0) + } + stopifnot(nrow(Z_2[[pop]]) == n[[pop]]) + r2[[pop]] = ncol(Z_2[[pop]]) #fixed effect priors - priors$b_X_prec[[pop]] = 1e-10*diag(1,b) + priors$b_X_prec[[pop]] = 1e-10*diag(1,b[[pop]]) # ----------------------------- # # -----Initialize variables---- # @@ -140,25 +151,28 @@ fast_BSFG_sampler_init = function(priors,run_parameters){ # Prior: Normal distribution on each element. # mean = 0 # sd = 1./sqrt(E_a_prec)' on each row - E_a[[pop]] = matrix(rnorm(p*r,0,sqrt(1/E_a_prec[[pop]])),nr = r,nc = p, byrow = T) + E_a[[pop]] = matrix(rnorm(p*r[[pop]],0,sqrt(1/E_a_prec[[pop]])),nr = r[[pop]],nc = p, byrow = T) # Latent factor heritabilties. h2 can take h2_divisions values # between 0 and 1. # Prior: 0.5: h2=0, .05: h2 > 0. + + # change k back to p F_h2[[pop]] = runif(k) # Genetic effects on the factor scores. # Prior: Normal distribution for each element # mean = 0 # sd = sqrt(F_h2') for each row. - F_a[[pop]] = matrix(rnorm(k*r,0,sqrt(F_h2[[pop]])),nr = r,nc = k, byrow = T) + #F_a[[pop]] = matrix(rnorm(k*r,0,sqrt(F_h2[[pop]])),nr = r,nc = k, byrow = T) + F_a[[pop]] = matrix(rnorm(k*r[[pop]],0,sqrt(F_h2[[pop]])),nr = r[[pop]],nc = k, byrow = T) # Full Factor scores. Combination of genetic and residual variation on # each factor. # Prior: Normal distribution for each element # mean = Z_1 * F_a # sd = sqrt(1-F_h2') for each row. - F[[pop]] = Z_1[[pop]] %*% F_a[[pop]] + matrix(rnorm(k*n,0,sqrt(1-F_h2[[pop]])),nr = n,nc = k, byrow = T) + F[[pop]] = Z_1[[pop]] %*% F_a[[pop]] + matrix(rnorm(k*n[[pop]],0,sqrt(1-F_h2[[pop]])),nr = n[[pop]],nc = k, byrow = T) # g-vector of specific precisions of genetic effects. # Prior: Gamma distribution for each element @@ -170,13 +184,13 @@ fast_BSFG_sampler_init = function(priors,run_parameters){ # Prior: Normal distribution on each element. # mean = 0 # sd = 1./sqrt(E_a_prec)' on each row - W[[pop]] = matrix(rnorm(p*r2,0,sqrt(1/W_prec[[pop]])),nr = r2, nc = p, byrow = T) + W[[pop]] = matrix(rnorm(p*r2[[pop]],0,sqrt(1/W_prec[[pop]])),nr = r2[[pop]], nc = p, byrow = T) # Fixed effect coefficients. # Prior: Normal distribution for each element # mean = 0 # sd = sqrt(1/fixed_effects_prec) - B[[pop]] = matrix(rnorm(b*p),nr = b, nc = p) + B[[pop]] = matrix(rnorm(b[[pop]]*p),nr = b[[pop]], nc = p) # ------------------------------------ # # ----Precalculate some matrices------ # @@ -185,7 +199,7 @@ fast_BSFG_sampler_init = function(priors,run_parameters){ # recover() #invert the random effect covariance matrices Ainv[[pop]] = solve(A[[pop]]) - A_2_inv[[pop]] = diag(1,r2) #Z_2 random effects are assumed to have covariance proportional to the identity. Can be modified. + A_2_inv[[pop]] = diag(1,r2[[pop]]) #Z_2 random effects are assumed to have covariance proportional to the identity. Can be modified. #pre-calculate transformation parameters to diagonalize aI + bZAZ for fast #inversion: inv(aI + bZAZ) = 1/b*U*diag(1./(s+a/b))*U' @@ -210,7 +224,7 @@ fast_BSFG_sampler_init = function(priors,run_parameters){ #inv(a*bdiag(priors$b_X_prec,Ainv) + b*t(cbind(X,Z_1)) %*% cbind(X,Z_1)) = U %*% diag(1/(a*s1+b*s2)) %*% t(U) Design= cbind(X[[pop]],Z_1[[pop]]) Design2 = t(Design) %*% Design - result = GSVD_2_c(cholcov(bdiag(priors$b_X_prec,Ainv[[pop]])),cholcov(Design2)) + result = GSVD_2_c(cholcov(bdiag(priors$b_X_prec[[pop]],Ainv[[pop]])),cholcov(Design2)) invert_aPXA_bDesignDesignT[[pop]] = list( U = t(solve(result$X)), s1 = diag(result$C)^2, @@ -221,7 +235,7 @@ fast_BSFG_sampler_init = function(priors,run_parameters){ #random effects 2 #diagonalize mixed model equations for fast inversion: #inv(a*A_2_inv + b*Z_2'Z_2]) = U*diag(1./(a.*s1+b.*s2))*U' - if(r2 > 0) { + if(r2[[pop]] > 0) { Design = Z_2[[pop]] Design2 = t(Design) %*% Design result = GSVD_2_c(cholcov(A_2_inv[[pop]]),cholcov(Design2)) @@ -242,7 +256,7 @@ fast_BSFG_sampler_init = function(priors,run_parameters){ #similar to fixed effects + random effects 1 above, but no fixed effects. ZZt = t(Z_1[[pop]]) %*% Z_1[[pop]] result = GSVD_2_c(cholcov(ZZt),cholcov(Ainv[[pop]])) - invert_aZZt_Ainv = list( + invert_aZZt_Ainv[[pop]] = list( U = t(solve(result$X)), s1 = diag(result$C)^2, s2 = diag(result$S)^2 @@ -300,6 +314,7 @@ fast_BSFG_sampler_init = function(priors,run_parameters){ # ----------------------- # Posterior = list( Lambda = matrix(0,nr=0,nc=0), + E_a = matrix(0,nr =nrow(do.call(rbind,F_a)) ,nc = p), F_a = matrix(0,nr=0,nc=0), F = matrix(0,nr=0,nc=0), delta = matrix(0,nr=0,nc=0), @@ -307,9 +322,9 @@ fast_BSFG_sampler_init = function(priors,run_parameters){ resid_Y_prec = matrix(0,nr = p,nc = 0), E_a_prec = matrix(0,nr = p,nc = 0), W_prec = matrix(0,nr = p,nc = 0), - B = matrix(0,nr = b,nc = p), - W = matrix(0,nr = r2,nc = p), - E_a = matrix(0,nr = r,nc = p) + B = matrix(0,nr = b[[1]],nc = p), + W = matrix(0,nr = r2[[1]],nc = p) + #E_a = matrix(0,nr = r[[1]],nc = p) ) # ----------------------- # # ---Save initial values- # From 5b5793fb37f6f44751dc899c5283225208647220 Mon Sep 17 00:00:00 2001 From: xin li Date: Fri, 3 Mar 2017 21:58:34 -0800 Subject: [PATCH 12/18] Data simulation for testing BSFG_comb. --- R_BSFG/data_sim.R | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/R_BSFG/data_sim.R b/R_BSFG/data_sim.R index 0edca7a..146f062 100644 --- a/R_BSFG/data_sim.R +++ b/R_BSFG/data_sim.R @@ -1,7 +1,5 @@ library(pedantics) #library(R.matlab) -# define E_a_prec; resid_Y_prec; name -# get A prepare_simulation2 = function(A_chol,A,Lambda,factor_h2s,E_a_prec,resid_Y_prec,name){ @@ -37,8 +35,8 @@ prepare_simulation2 = function(A_chol,A,Lambda,factor_h2s,E_a_prec,resid_Y_prec, print(name) setup = list(Y=Y,F_act=F,E_a_act=E_a_act,F_a_act = F_a,Z_1=Z_1,A=A,X=X,n=n,r=r,p=p,G=G,R=R, - gen_factor_Lambda = Lambda,error_factor_Lambda = Lambda,h2=h2,B=B, factor_h2s=factor_h2s,name=name) - + gen_factor_Lambda = Lambda,error_factor_Lambda = Lambda,h2=h2,B=B, factor_h2s=factor_h2s,name=name,pops) + # ????????????????????gen_factor_Lambda ; error_factor_Lambda return(setup) #do.call(writeMat,c("setup.mat",setup)) #save(pedigree,file="pedigree.Robj") @@ -46,9 +44,6 @@ prepare_simulation2 = function(A_chol,A,Lambda,factor_h2s,E_a_prec,resid_Y_prec, #================================================================== - - - counter = 0 #sample_size simulations @@ -72,7 +67,7 @@ setwd("~/Runcie Lab/pedigree") data=read.csv('IDDammSirelnUpdate(1).csv',header = TRUE) source("~/Runcie Lab/SparseFactorMixedModel_v2/R_BSFG/setup_pedigree.R") # find way to change this according to the size(different pops) -pops = c("LConG1","LConG2","LConG3") + ori.setup = list() ori.setup[[pops[1]]] = setup_pedigree(data,GenerationCode=3,LineCode=c(0,1),fixedfactor=NA) # no fixed factor because we seperate the dataset based on generation ori.setup[[pops[2]]] = setup_pedigree(data,GenerationCode=2,LineCode=1,fixedfactor=NA) @@ -82,6 +77,7 @@ setwd("~/Runcie Lab/SparseFactorMixedModel_v2/R_BSFG/Simulation") #sample_size = list(Small = c(50,5), Medium = c(100,10), Large = c(500,10)) # set seed for repeatability set.seed(1) +pops = c("LConG1","LConG2","LConG3") for(rep in 1:num_reps){ # number of individuals that have the same mother: # c(50,5) in this case means that for 50 fathers, each of them have 5 children. There are total 250 individuals. @@ -107,7 +103,7 @@ for(rep in 1:num_reps){ factor_h2s = factor_h2s[order(factor_h2s,decreasing = TRUE)] factor_h2s[seq(1,k,by=2)]=0 # make 5 of them to be zero - g_cols = factor_h2s>0 + g_cols = factor_h2s>0 # will this step makes lambda matrix to be different in pops????? Lambda =Lambda[do.call("order", unname(split(-abs(Lambda[,cols[factor_h2s>0]]), col(Lambda[,cols[factor_h2s>0]])))),] # reordering lambda @@ -121,7 +117,6 @@ for(rep in 1:num_reps){ setwd(dir) save(setup,file = 'setup.RData') setwd('..') - } From 4ed81e79c49873195d6d353181a5fa26cd8428ed Mon Sep 17 00:00:00 2001 From: "Xin Li(Lexie)" Date: Mon, 13 Mar 2017 22:37:14 -0700 Subject: [PATCH 13/18] change k = 10. --- R_BSFG/BSFG_functions.R | 6 +++--- R_BSFG/data_sim.R | 4 ++-- R_BSFG/fast_BSFG_sampler_init_comb.R | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/R_BSFG/BSFG_functions.R b/R_BSFG/BSFG_functions.R index 52c086b..1f9dec7 100644 --- a/R_BSFG/BSFG_functions.R +++ b/R_BSFG/BSFG_functions.R @@ -81,9 +81,9 @@ sample_Lambda_comb = function( Y,X,B,Z_2,W,F,resid_Y_prec, E_a_prec,Plam,invert_ for(j in 1:p) { - means = matrix(0,nr=p,nc=1) - Qlam = matrix(0,p,p) - # Is this part correct???????????????????? + #means = matrix(0,nr=p,nc=1) + means = matrix(0,nr=k,nc=1) + Qlam = matrix(0,k,k) Y_tilde = list(); for(pop in pops){ #conditioning on W, B, F, marginalizing over E _a diff --git a/R_BSFG/data_sim.R b/R_BSFG/data_sim.R index 146f062..0d7558d 100644 --- a/R_BSFG/data_sim.R +++ b/R_BSFG/data_sim.R @@ -47,7 +47,7 @@ prepare_simulation2 = function(A_chol,A,Lambda,factor_h2s,E_a_prec,resid_Y_prec, counter = 0 #sample_size simulations -num_reps = 10 +num_reps = 5 p = 100 k=10 @@ -104,7 +104,7 @@ for(rep in 1:num_reps){ factor_h2s[seq(1,k,by=2)]=0 # make 5 of them to be zero g_cols = factor_h2s>0 # will this step makes lambda matrix to be different in pops????? - Lambda =Lambda[do.call("order", unname(split(-abs(Lambda[,cols[factor_h2s>0]]), col(Lambda[,cols[factor_h2s>0]])))),] + #Lambda =Lambda[do.call("order", unname(split(-abs(Lambda[,cols[factor_h2s>0]]), col(Lambda[,cols[factor_h2s>0]])))),] # reordering lambda name = paste(pop,rep,sep='_') diff --git a/R_BSFG/fast_BSFG_sampler_init_comb.R b/R_BSFG/fast_BSFG_sampler_init_comb.R index 8b75d9e..6f2cd67 100644 --- a/R_BSFG/fast_BSFG_sampler_init_comb.R +++ b/R_BSFG/fast_BSFG_sampler_init_comb.R @@ -34,7 +34,7 @@ fast_BSFG_sampler_init_comb = function(priors,run_parameters){ load('../setup.RData') } - #k = priors$k_init + k = priors$k_init simulation = F if('gen_factor_Lambda' %in% names(setup)){ @@ -79,7 +79,7 @@ fast_BSFG_sampler_init_comb = function(priors,run_parameters){ W_prec_shape = priors$W_prec_shape W_prec_rate = priors$W_prec_rate p = ncol(Y[[1]]) - k = p + #k = p #normalize Y to have zero mean and unit variances among observed values, #allowing for NaNs. From fa47849b18d4323ae6871f2bf9995658e673cd82 Mon Sep 17 00:00:00 2001 From: "Xin Li(Lexie)" Date: Fri, 24 Mar 2017 03:44:26 -0700 Subject: [PATCH 14/18] standardize in initial function: subtract the mean(overall mean), but not divide the sd. --- R_BSFG/fast_BSFG_sampler_init_comb.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/R_BSFG/fast_BSFG_sampler_init_comb.R b/R_BSFG/fast_BSFG_sampler_init_comb.R index 6f2cd67..1f24618 100644 --- a/R_BSFG/fast_BSFG_sampler_init_comb.R +++ b/R_BSFG/fast_BSFG_sampler_init_comb.R @@ -32,6 +32,8 @@ fast_BSFG_sampler_init_comb = function(priors,run_parameters){ # ----------------------- # if(file.exists('../setup.RData')) { load('../setup.RData') + Y = lapply(setup, function(x) x$Y) + Mean_Y = colMeans(do.call(rbind,Y),na.rm = TRUE) } k = priors$k_init @@ -87,7 +89,7 @@ fast_BSFG_sampler_init_comb = function(priors,run_parameters){ r[[pop]] = ncol(Z_1[[pop]]) Y_missing = is.na(Y[[pop]]) # matrix recording missing data in Y - Mean_Y = colMeans(Y[[pop]],na.rm=T) + #Mean_Y = colMeans(Y[[pop]],na.rm=T) VY = apply(Y[[pop]],2,var,na.rm=T) # Don't remove the mean and standardize the variance if it's a # simulation because this makes comparing to the simulated values more From d292075324d3190998b10c051a9b6ee9daaafaaf Mon Sep 17 00:00:00 2001 From: "Xin Li(Lexie)" Date: Fri, 24 Mar 2017 03:44:48 -0700 Subject: [PATCH 15/18] minor fix --- R_BSFG/fast_BSFG_sampler.R | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/R_BSFG/fast_BSFG_sampler.R b/R_BSFG/fast_BSFG_sampler.R index 7d2d29b..a58df8b 100644 --- a/R_BSFG/fast_BSFG_sampler.R +++ b/R_BSFG/fast_BSFG_sampler.R @@ -203,7 +203,11 @@ fast_BSFG_sampler = function(BSFG_state,n_samples) { Y_tilde = Y - F %*% t(Lambda) - Z_2 %*% W # location_sample = sample_means( Y_tilde, resid_Y_prec, E_a_prec, invert_aPXA_bDesignDesignT ) location_sample = sample_means_c( Y_tilde, resid_Y_prec, E_a_prec, invert_aPXA_bDesignDesignT ) - B = location_sample[1:b,] + if(b>0) { + B = location_sample[1:b,] + } else { + B = matrix(0,0,p) + } E_a = location_sample[b+(1:r),] # -----Sample W ---------------------- # From 2764585446cd3cd820a68157662ae77a2bfa4599 Mon Sep 17 00:00:00 2001 From: "Xin Li(Lexie)" Date: Fri, 24 Mar 2017 03:45:20 -0700 Subject: [PATCH 16/18] minor fix --- R_BSFG/fast_BSFG_sampler_current.R | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/R_BSFG/fast_BSFG_sampler_current.R b/R_BSFG/fast_BSFG_sampler_current.R index 497daf2..af3c40c 100644 --- a/R_BSFG/fast_BSFG_sampler_current.R +++ b/R_BSFG/fast_BSFG_sampler_current.R @@ -201,17 +201,13 @@ fast_BSFG_sampler_current = function(BSFG_state,n_samples) { # -----Sample B and E_a--------------- # #conditioning on W, F, Lambda - for£¨pop in pops){ Y_tilde = Y - F %*% t(Lambda) - Z_2 %*% W Y_tilde = as.matrix(Y_tilde) # location_sample = sample_means( Y_tilde, resid_Y_prec, E_a_prec, invert_aPXA_bDesignDesignT ) location_sample = sample_means_c( Y_tilde, resid_Y_prec, E_a_prec, invert_aPXA_bDesignDesignT ) - B = location_sample[1:b,] - if(length(B)==p){ - B = matrix(0,nr=0,nc=p) - } + B = location_sample[1:b,,drop=FALSE] E_a = location_sample[b+(1:r),] - } + # -----Sample W ---------------------- # #conditioning on B, E_a, F, Lambda @@ -349,4 +345,4 @@ fast_BSFG_sampler_current = function(BSFG_state,n_samples) { save(BSFG_state,file = 'BSFG_state.RData') return(BSFG_state) -} \ No newline at end of file +} From bb906abe99cd119f305e8be2d6337960a6f17d50 Mon Sep 17 00:00:00 2001 From: "Xin Li(Lexie)" Date: Tue, 25 Apr 2017 00:06:24 -0700 Subject: [PATCH 17/18] functions for plotting --- R_BSFG/code_comparing_plotting.R | 409 +++++++++++++++++++++ R_BSFG/deltaZ_posterior_mean.Rmd | 117 ++++++ R_BSFG/generate_simulations_halfSib_fast.R | 338 +++++++++++++++++ R_BSFG/test_indep_Ablocks.R | 50 +++ 4 files changed, 914 insertions(+) create mode 100644 R_BSFG/code_comparing_plotting.R create mode 100644 R_BSFG/deltaZ_posterior_mean.Rmd create mode 100644 R_BSFG/generate_simulations_halfSib_fast.R create mode 100644 R_BSFG/test_indep_Ablocks.R diff --git a/R_BSFG/code_comparing_plotting.R b/R_BSFG/code_comparing_plotting.R new file mode 100644 index 0000000..27dedce --- /dev/null +++ b/R_BSFG/code_comparing_plotting.R @@ -0,0 +1,409 @@ +source('~/Runcie Lab/SparseFactorMixedModel_v2/R_BSFG/BSFG_functions.R') +setwd("~/Runcie Lab/SparseFactorMixedModel_v2/MCMC") +MCMC = readRDS("G_MCMC.Rdata") +#setwd("~/Runcie Lab/SparseFactorMixedModel_v2/MCMC/5/5/Lambda1.5_delta2shape3") +#BSFGOld = "BSFG_state.RData" +#BSFGNew = "BSFG_state14.RData" +#BSFGStar = "BSFG_state17.RData" +#target = "F_h2" +#load(BSFGOld) +#traitnames = BSFG_state$traitnames +traitnames = rownames(MCMC[[1]][[1]]) + +#old.G = G_Matrix_Comp(BSFG_state) +old.G = MCMC[[4]] +#p = BSFG_state$run_variables$p +#spn = dim(BSFG_state$Posterior[[target]])[2] +p = 18 +spn = 2000 +#load(BSFGNew) +#new.G = G_Matrix_Comp(BSFG_state) +new.G = MCMC[[3]] + +#load(BSFGStar) +#star.G = G_Matrix_Comp(BSFG_state) +star.G = MCMC[[2]] + + +# The distance of two matrix(in predicted phenotype value/level) +#selection gradient +beta = 0.3609 +beta.v = c(0,beta,rep(0,16)) +traces.traits = matrix(,p,spn) + +#old.G in list form +old.traces.traits = sapply(old.G,function(x)x%*%beta.v) +#old.traits.posmean = apply(old.traces.traits,1,mean) +new.traces.traits = sapply(new.G,function(x)x%*%beta.v) +#new.traits.posmean = apply(new.traces.traits,1,mean) +star.traces.traits = sapply(star.G,function(x)x%*%beta.v) +#star.traits.posmean = apply(star.traces.traits,1,mean) +#boxplot(old.traces_traits[2,]) +#boxplot(t(old.traces.traits)) +#boxplot(new.traces_traits[2,]) +#boxplot(t(new.traces.traits)) +# use ggplot +traces.traits.df = data.frame(c(c(old.traces.traits),c(new.traces.traits),c(star.traces.traits))) +traces.traits.df$model = rep(c("old","new","star"),c(p*spn,p*spn,p*spn)) + +traces.traits.df$model = factor(traces.traits.df$model, level=c("old","new","star")) +traces.traits.df$model = factor(traces.traits.df$model, level=c("star","new","old")) + +traces.traits.df$traits = rep(traitnames,3*spn) +names(traces.traits.df) = c("response","model","traits") + +summarySE <- function(data=NULL, measurevar, groupvars=NULL, na.rm=FALSE, + conf.interval=.95, .drop=TRUE) { + library(plyr) + + # New version of length which can handle NA's: if na.rm==T, don't count them + length2 <- function (x, na.rm=FALSE) { + if (na.rm) sum(!is.na(x)) + else length(x) + } + + # This does the summary. For each group's data frame, return a vector with + # N, mean, and sd + datac <- ddply(data, groupvars, .drop=.drop, + .fun = function(xx, col) { + c( + q1 = quantile (xx[[col]],0.025, na.rm=na.rm)- mean (xx[[col]], na.rm=na.rm), + q2 = quantile (xx[[col]],0.975, na.rm=na.rm)- mean (xx[[col]], na.rm=na.rm), + mean = mean (xx[[col]], na.rm=na.rm) + + ) + }, + measurevar + ) + + # Rename the "mean" column + datac <- rename(datac, c("mean" = measurevar)) + + #datac$se <- datac$sd / sqrt(datac$N) # Calculate standard error of the mean + + # Confidence interval multiplier for standard error + # Calculate t-statistic for confidence interval: + # e.g., if conf.interval is .95, use .975 (above/below), and use df=N-1 + #ciMult <- qt(conf.interval/2 + .5, datac$N-1) + #datac$ci <- datac$se * ciMult + + return(datac) +} + +traces.traits.df.sum = summarySE(traces.traits.df,measurevar = "response",groupvars = c("model","traits")) +names(traces.traits.df.sum) = c("model","traits","q1","q2","response") + + +save(traces.traits.df.sum,file = "plot6.RData") + + +library(ggplot2) +#ggplot(traces.traits.df,aes(x =traits,y=response,col=model,fill=model))+ +# geom_boxplot() +#plot(x=rep(1:18,2),y=c(old.traits.posmean,new.traits.posmean),col=rep(c(1,2),eac#h=18),type = "o") +pd <- position_dodge(width = 0.6) +load("plot5.RData") +plot5 <- ggplot(traces.traits.df.sum,aes(x = traits, y = response, color=model))+ + scale_color_manual(breaks =rev(c("star","new","old")), + values=rev(c( "blue","red2","grey0")),labels=rev(c("Low Line","High Line","Control Line")))+ + ylab("Response")+ + geom_errorbar(aes(ymin=q1+response, ymax=q2+response), position=pd)+ + geom_point(position=pd,size=1)+ + theme(axis.text=element_text(size=10), + axis.title=element_text(size=15),legend.text=element_text(size=12), + legend.title=element_text(size=12))+ + labs(fill = "G (F3)",color = "G (F3)") + + +library(ggplot2) +#ggplot(traces.traits.df,aes(x =traits,y=response,col=model,fill=model))+ +# geom_boxplot() +#plot(x=rep(1:18,2),y=c(old.traits.posmean,new.traits.posmean),col=rep(c(1,2),eac#h=18),type = "o") +#pd <- position_dodge(width = 0.3) +load("plot6.RData") +plot6<- ggplot(traces.traits.df.sum,aes(x = traits, y = response, color=model))+ + scale_color_manual(breaks = c("star","new","old"), + values=c("grey60","grey30","grey0"),labels=c("F1","F2","F3"))+ + ylab("Response")+ + geom_errorbar(aes(ymin=q1+response, ymax=q2+response), position=pd)+ + geom_point(position=pd,size=1)+ + theme(axis.text=element_text(size=10), + axis.title=element_text(size=15),legend.text=element_text(size=12), + legend.title=element_text(size=12))+ + labs(fill = "G (Control)",color = "G (Control)") + + +plots1=list() +plots1=lapply(c(1,3),function(i) get(paste0("plot",i))) +plots2=list() +plots2=lapply(c(2,4),function(i) get(paste0("plot",i))) +plots3=list() +plots3=lapply(c(5,6),function(i) get(paste0("plot",i))) +plots4=list(g11,g22) +library(gridExtra) +ggsave("BSFG_comparison_plots1.pdf", marrangeGrob(grobs = plots1, nrow=2, ncol=1,top = NULL),width = 297, height = 210, units = "mm") +ggsave("BSFG_comparison_plots2.pdf", marrangeGrob(grobs = plots2, nrow=2, ncol=1,top = NULL),width = 297, height = 210, units = "mm") +ggsave("BSFG_comparison_plots3.pdf", marrangeGrob(grobs = plots3, nrow=2, ncol=1,top = NULL),width = 297, height = 210, units = "mm") +ggsave("EigenvaluesComparison.pdf", marrangeGrob(grobs = plots4, nrow=2, ncol=1,top = NULL),width = 297, height = 210, units = "mm") + + +#5. PCA decomposition + +source('~/Runcie Lab/SparseFactorMixedModel_v2/R_BSFG/BSFG_functions.R') +#load(BSFGOld) +#old.G = G_Matrix_Comp(BSFG_state) +old.G = MCMC[[4]] +new.G = MCMC[[3]] +star.G = MCMC[[2]] +#p = BSFG_state$run_variables$p +#traitnames = BSFG_state$traitnames +#load("BSFG_state6.RData") +#new.G = G_Matrix_Comp(BSFG_state) +#load("BSFG_state7.RData") +#star.G = G_Matrix_Comp(BSFG_state) + +old.traces.eigen = sapply(old.G, function(x)eigen(x)$values) +new.traces.eigen = sapply(new.G, function(x)eigen(x)$values) +star.traces.eigen = sapply(star.G, function(x)eigen(x)$values) + +traces.eigen.df = data.frame(c(c(old.traces.eigen),c(new.traces.eigen),c(star.traces.eigen))) +traces.eigen.df$model = rep(c("old","new","star"),c(p*spn,p*spn,p*spn)) + +traces.eigen.df$model = factor(traces.eigen.df$model, level=c("old","new","star")) +traces.eigen.df$model = factor(traces.eigen.df$model, level=c("star","new","old")) + +traces.eigen.df$lambda = rep(paste0("PC",1:18),3*spn) +names(traces.eigen.df) = c("response","model","PC") + +traces.eigen.df.sum = summarySE(traces.eigen.df,measurevar = "response",groupvars = c("model","PC")) +names(traces.eigen.df.sum) = c("model","PC","q1","q2","response") + +# reorder the PC value in the data +traces.eigen.df.sum$PC = factor(traces.eigen.df.sum$PC,level=paste0("PC",1:18)) + +# size dataframe +old.traces.size = apply(old.traces.eigen,2,sum) +new.traces.size = apply(new.traces.eigen,2,sum) +star.traces.size = apply(star.traces.eigen,2,sum) +size.df = data.frame(old.traces.size,new.traces.size,star.traces.size) +library(reshape2) +size.df = melt(size.df,value.name = "Size") +size.df$variable = factor(size.df$variable,levels = c("star.traces.size","new.traces.size","old.traces.size")) + +# PC1 +old.traces.pc1c = apply(old.traces.eigen,2,function(x)x[1]/sum(x)) +new.traces.pc1c = apply(new.traces.eigen,2,function(x)x[1]/sum(x)) +star.traces.pc1c = apply(star.traces.eigen,2,function(x)x[1]/sum(x)) +pc1c.df = data.frame(old.traces.pc1c,new.traces.pc1c,star.traces.pc1c) +library(reshape2) +pc1c.df = melt(pc1c.df,value.name = "PC1.Contribution") +pc1c.df$variable = factor(pc1c.df$variable,levels = c("star.traces.pc1c","new.traces.pc1c","old.traces.pc1c")) + + + +save(traces.eigen.df.sum , file = "eigen567.RData") +save(traces.eigen.df.sum, file = "eigen51417.RData") +save(size.df, file = "size567.RData") +save(size.df, file = "size51417.RData") +save(pc1c.df, file = "pc1c567.RData") +save(pc1c.df, file = "pc1c51417.RData") + +load("eigen567.RData") +load("size567.RData") +load("pc1c567.RData") +# size of each model +library(dplyr) +traces.eigen.df.sum %>% + group_by(model) %>% + summarize(size = sum(response)) + +library(ggplot2) +size567.boxplot <- ggplot(size.df, aes(x="",y=Size, fill=variable))+ + geom_boxplot(alpha=0.85)+ + scale_fill_manual(breaks =rev(c("star.traces.size","new.traces.size","old.traces.size")), + values=rev(c( "blue","red2","grey0")),labels=rev(c("Low Line","High Line","Control Line")))+ + theme(axis.title.x = element_blank())+ + labs(fill = "G (F3)") + +pc1c567.boxplot <- ggplot(pc1c.df, aes(x="",y=PC1.Contribution, fill=variable))+ + geom_boxplot(alpha=0.85)+ + scale_fill_manual(breaks =rev(c("star.traces.pc1c","new.traces.pc1c","old.traces.pc1c")), + values=rev(c( "blue","red2","grey0")),labels=rev(c("Low Line","High Line","Control Line")))+ + theme(axis.title.x = element_blank())+ + labs(fill = "G (F3)") + +pd <- position_dodge(width = 0.6) + +g11<-ggplot(traces.eigen.df.sum,aes(x = PC, y = response, color=model))+ + scale_color_manual(breaks =rev(c("star","new","old")), + values=rev(c( "blue","red2","grey0")),labels=rev(c("Low Line","High Line","Control Line")))+ + ylab("Variance")+ + geom_errorbar(aes(ymin=q1+response, ymax=q2+response), position=pd)+ + geom_point(position=pd,size=1)+ + xlab("Principle Components")+ + theme(axis.text=element_text(size=10), + axis.title=element_text(size=15),legend.text=element_text(size=12), + legend.title=element_text(size=12))+ + labs(color = "G (F3)") +#------------------------------- +# combine two boxplot +library(gridExtra) +library("cowplot") +plot_grid(size567.boxplot, size51417.boxplot, ncol = 2, nrow = 1) +plot_grid(pc1c567.boxplot, pc1c51417.boxplot, ncol = 2, nrow = 1) +#------------------------------- +load("eigen51417.RData") +load("size51417.RData") +load("pc1c51417.RData") +# size of each model +library(dplyr) +traces.eigen.df.sum %>% + group_by(model) %>% + summarize(size = sum(response)) + +# boxplot of size of each model +library(ggplot2) +size51417.boxplot <- ggplot(size.df, aes(x="",y=Size, fill=variable))+ + geom_boxplot()+ + scale_fill_manual(breaks = c("star.traces.size","new.traces.size","old.traces.size"), + values=c("grey60","grey30","grey0"),labels=c("F1","F2","F3"))+ + theme(axis.title.x = element_blank())+ + labs(fill = "G (Control)") + +pc1c51417.boxplot = ggplot(pc1c.df, aes(x="",y=PC1.Contribution, fill=variable))+ + geom_boxplot()+ + scale_fill_manual(breaks = c("star.traces.pc1c","new.traces.pc1c","old.traces.pc1c"), + values=c("grey60","grey30","grey0"),labels=c("F1","F2","F3"))+ + theme(axis.title.x = element_blank())+ + labs(fill = "G (Control)") + +g22<-ggplot(traces.eigen.df.sum,aes(x = reorder(PC,PC), y = response, color=model))+ + scale_color_manual(breaks = c("star","new","old"), + values=c("grey60","grey30","grey0"),labels=c("F1","F2","F3"))+ + ylab("Response")+ + geom_errorbar(aes(ymin=q1+response, ymax=q2+response), position=pd)+ + geom_point(position=pd,size=1)+ + theme(axis.text=element_text(size=10), + axis.title=element_text(size=15),legend.text=element_text(size=12), + legend.title=element_text(size=12),legend.margin=unit(0.455,"cm"))+ + labs(color = "G (Control)")+ + xlab("Principle Components")+ + ylab("Variance") +# statistical test for the difference +# a.ANOVA test +class(traces.eigen.df) +anova(lm(response~model, data = traces.eigen.df)) +anova(lm(response~model:PC, data = traces.eigen.df)) #which one to use? +anova(lm(response~model*PC, data = traces.eigen.df)) + +# b.Bartlett test + +# 6. angle and length of deltaZ and first eigenvector + +# distribution within population +dist_within_theta = function(traces.traits){ + theta = vector(,spn) + for (i in 1:spn){ + pairs = randcomb(1:spn, 2) + a = traces.traits[,pairs[1]] + b = traces.traits[,pairs[2]] + a.length = sqrt(sum(a * a)) + b.length = sqrt(sum(b * b)) + theta[i] = acos(sum(a*b) / (a.length * b.length)) + } + return(theta) +} + +# first eigen +old.traces.vector = sapply(old.G, function(x)eigen(x)$vector[,1]) +new.traces.vector = sapply(new.G, function(x)eigen(x)$vector[,1]) +star.traces.vector = sapply(star.G, function(x)eigen(x)$vector[,1]) + +old.theta = dist_within_theta(old.traces.vector) +new.theta = dist_within_theta(new.traces.vector) +star.theta = dist_within_theta(star.traces.vector) +# deltaZ +old.theta = dist_within_theta(old.traces.traits) #old.traces.traits is from deltaZ_posterior_mean +new.theta = dist_within_theta(new.traces.traits) +star.theta = dist_within_theta(star.traces.traits) + + +# distribution between population +dist_between_theta = function(Atraces.traits,Btraces.traits){ + theta = vector(,spn) + for (i in 1:spn){ + pairs = randcomb(1:spn, 2) + a = Atraces.traits[,pairs[1]] + b = Btraces.traits[,pairs[2]] + a.length = sqrt(sum(a * a)) + b.length = sqrt(sum(b * b)) + theta[i] = acos(sum(a*b) / (a.length * b.length)) + } + return(theta) +} + +# first eigen vector +old.new.theta = dist_between_theta(old.traces.vector,new.traces.vector) +# K-S test +#ks.test(old.theta,old.new.theta) #different +old.star.theta = dist_between_theta(old.traces.vector,star.traces.vector) +new.star.theta = dist_between_theta(new.traces.vector,star.traces.vector) + + +# deltaZ +old.new.theta = dist_between_theta(old.traces.traits,new.traces.traits) +# K-S test +#ks.test(old.theta,old.new.theta) #different +old.star.theta = dist_between_theta(old.traces.traits,star.traces.traits) +new.star.theta = dist_between_theta(new.traces.traits,star.traces.traits) +#-------------------------------- +# plot +theta.df = data.frame(old.theta,new.theta,star.theta, old.new.theta, old.star.theta, new.star.theta) + +library(reshape2) +theta.df = melt(theta.df, value.name = "Degree") +names(theta.df) = c("labels", "theta") +ggplot(theta.df, aes(x = theta, col=labels))+ + geom_density(aes(fill=labels),alpha=0.1) +#+scale_color_manual(breaks = c("old.theta","new.theta","star.theta","old.new.theta","old.star.theta","new.star.theta"), +# values=rainbow(6)) + +#------------------------------- +# 7.length of deltaZ + +length_deltaZ = function(traces.traits){ + #posmean.deltaZ = apply(traces.traits,1,mean) + #deltaZ.length = sqrt(sum(posmean.deltaZ * posmean.deltaZ)) + deltaZ.length = apply(traces.traits,2,function(x)sqrt(sum(x * x))) + return(deltaZ.length) +} +old.deltaZ.length = length_deltaZ(old.traces.traits) +new.deltaZ.length = length_deltaZ(new.traces.traits) +star.deltaZ.length = length_deltaZ(star.traces.traits) + +deltaZ.length.df = data.frame(old.deltaZ.length,new.deltaZ.length,star.deltaZ.length) +library(reshape2) +deltaZ.length.df = melt(deltaZ.length.df,value.name = "Length") +deltaZ.length.df$variable = factor(deltaZ.length.df$variable,levels = c("star.deltaZ.length","new.deltaZ.length","old.deltaZ.length")) +save(deltaZ.length.df, file="traitslength567.RData") +save(deltaZ.length.df, file="traitslength51417.RData") + +load("traitslength567.RData") +traits.length567.boxplot <- ggplot(deltaZ.length.df, aes(x="",y=Length, fill=variable))+ + geom_boxplot(alpha=0.85)+ + scale_fill_manual(breaks =rev(c("star.deltaZ.length","new.deltaZ.length","old.deltaZ.length")), + values=rev(c( "blue","red2","grey0")),labels=rev(c("Low Line","High Line","Control Line")))+ + theme(axis.title.x = element_blank())+ + labs(fill = "G (F3)") + +load("traitslength51417.RData") +traits.length51417.boxplot <- ggplot(deltaZ.length.df, aes(x="",y=Length, fill=variable))+ + geom_boxplot()+ + scale_fill_manual(breaks = c("star.deltaZ.length","new.deltaZ.length","old.deltaZ.length"), + values=c("grey60","grey30","grey0"),labels=c("F1","F2","F3"))+ + theme(axis.title.x = element_blank())+ + labs(fill = "G (Control)") + +library(gridExtra) +library("cowplot") +plot_grid(traits.length567.boxplot, traits.length51417.boxplot, ncol = 2, nrow = 1) diff --git a/R_BSFG/deltaZ_posterior_mean.Rmd b/R_BSFG/deltaZ_posterior_mean.Rmd new file mode 100644 index 0000000..f83a428 --- /dev/null +++ b/R_BSFG/deltaZ_posterior_mean.Rmd @@ -0,0 +1,117 @@ +--- +title: "deltaZ_posterior_mean" +output: html_document +--- + +```{r} +source('~/Runcie Lab/SparseFactorMixedModel_v2/R_BSFG/BSFG_functions.R') +setwd("~/Runcie Lab/SparseFactorMixedModel_v2/MCMC/5/5/Lambda1.5_delta2shape3") +BSFGOld = "BSFG_state.RData" +BSFGNew = "BSFG_fixedlambda65.RData" +BSFGStar = "BSFG_fixedlambda75.RData" +#BSFGNew = "BSFG_fixedlambdaR5.RData" +#BSFGStar = "BSFG_fixedlambdaS5.RData" +target = "F_h2" +load(BSFGOld) +traitnames = BSFG_state$traitnames +#old.traces.G = G_Traces_Comp(BSFG_state) +old.G = G_Matrix_Comp(BSFG_state) +p = BSFG_state$run_variables$p +spn = dim(BSFG_state$Posterior[[target]])[2] +# posterior mean +# do this in case the value of k differ for this two models and there in no lambda in BSFG_state_fixed case +Lambda = BSFG_state$Posterior$Lambda +load(BSFGNew) +BSFG_state$Posterior$Lambda = Lambda +new.G = G_Matrix_Comp(BSFG_state) + +load(BSFGStar) +BSFG_state$Posterior$Lambda = Lambda +star.G = G_Matrix_Comp(BSFG_state) +#new.traces.G = G_Traces_Comp(BSFG_state) + + +# The distance of two matrix(in predicted phenotype value/level) +#selection gradient +beta = 0.3609 +beta.v = c(0,beta,rep(0,16)) +traces.traits = matrix(,p,spn) + +#old.G in list form +old.traces.traits = sapply(old.G,function(x)x%*%beta.v) +old.traits.posmean = apply(old.traces.traits,1,mean) +new.traces.traits = sapply(new.G,function(x)x%*%beta.v) +new.traits.posmean = apply(new.traces.traits,1,mean) +star.traces.traits = sapply(star.G,function(x)x%*%beta.v) +star.traits.posmean = apply(star.traces.traits,1,mean) +#boxplot(old.traces_traits[2,]) +#boxplot(t(old.traces.traits)) +#boxplot(new.traces_traits[2,]) +#boxplot(t(new.traces.traits)) +# use ggplot +traces.traits.df = data.frame(c(c(old.traces.traits),c(new.traces.traits),c(star.traces.traits))) +traces.traits.df$model = rep(c("old","new","star"),each=spn*p) +traces.traits.df$traits = rep(traitnames,3*spn) +names(traces.traits.df) = c("response","model","traits") +library(ggplot2) +#ggplot(traces.traits.df,aes(x =traits,y=response,col=model,fill=model))+ + # geom_boxplot() +#plot(x=rep(1:18,2),y=c(old.traits.posmean,new.traits.posmean),col=rep(c(1,2),eac#h=18),type = "o") +ggplot(traces.traits.df, aes(x = traits, y = response, color=model))+ + #geom_boxplot(alpha = 0.5)+ + scale_color_manual(breaks = c("old", "new", "star"), + values=c("green", "red", "blue"))+ + #scale_fill_manual(breaks = c("old", "new", "star"), + # values=c("green", "red", "blue"))+ + ggtitle("The distribution of variance (Latent traits)")+ + scale_color_discrete(breaks = c("old", "new", "star"),labels=c("LconG3", "LhighG3", "LlowG3"))+ + # scale_fill_discrete(breaks = c("old", "new", "star"),labels=c("LconG3", "LhighG3", "LlowG3"))+ + ylab("Response")+ + geom_errorbar(aes(ymin=quantile(response,0.025), ymax=quantile(response,0.975)), width=.1, position=pd) + +``` + +```{r} +#when lambda is not fixed +source('~/Runcie Lab/SparseFactorMixedModel_v2/R_BSFG/BSFG_functions.R') +setwd("~/Runcie Lab/SparseFactorMixedModel_v2/MCMC/5/5/Lambda1.5_delta2shape3") +BSFGOld = "BSFG_state.RData" +BSFGNew = "BSFG_state14.RData" +BSFGStar = "BSFG_state17.RData" +target = "F_h2" +load(BSFGOld) +traitnames = BSFG_state$traitnames +#old.traces.G = G_Traces_Comp(BSFG_state) +old.G = G_Matrix_Comp(BSFG_state) +p = BSFG_state$run_variables$p +spn = dim(BSFG_state$Posterior[[target]])[2] +load(BSFGNew) +new.G = G_Matrix_Comp(BSFG_state) +load(BSFGStar) +#BSFG_state$Posterior$Lambda = Lambda +star.G = G_Matrix_Comp(BSFG_state) + +beta = 0.3609 +beta.v = c(0,beta,rep(0,16)) +traces.traits = matrix(,p,spn) + +#old.G in list form +old.traces.traits = sapply(old.G,function(x)x%*%beta.v) +old.traits.posmean = apply(old.traces.traits,1,mean) +new.traces.traits = sapply(new.G,function(x)x%*%beta.v) +new.traits.posmean = apply(new.traces.traits,1,mean) +star.traces.traits = sapply(star.G,function(x)x%*%beta.v) +star.traits.posmean = apply(star.traces.traits,1,mean) +#boxplot(old.traces_traits[2,]) +#boxplot(t(old.traces.traits)) +#boxplot(new.traces_traits[2,]) +#boxplot(t(new.traces.traits)) +# use ggplot +traces.traits.df = data.frame(c(c(old.traces.traits),c(new.traces.traits),c(star.traces.traits))) +traces.traits.df$model = rep(c("old","new","star"),each=spn*p) +traces.traits.df$traits = rep(traitnames,3*spn) +names(traces.traits.df) = c("response","model","traits") +library(ggplot2) +ggplot(traces.traits.df,aes(x =traits,y=response,col=model))+ + geom_boxplot() +``` diff --git a/R_BSFG/generate_simulations_halfSib_fast.R b/R_BSFG/generate_simulations_halfSib_fast.R new file mode 100644 index 0000000..578ff91 --- /dev/null +++ b/R_BSFG/generate_simulations_halfSib_fast.R @@ -0,0 +1,338 @@ +library(pedantics) +library(R.matlab) + +prepare_simulation2 = function(A_chol,A,Lambda,factor_h2s,E_a_prec,resid_Y_prec,name){ + n = dim(A_chol)[1] # ??difference of A_chol and A + p = nrow(Lambda) + + r = n + # n = 2*n + + k = ncol(Lambda) + F_a = t(A_chol) %*% sweep(matrix(rnorm(n*k),n),2,sqrt(factor_h2s),'*') # make sure MN(0,A,sigma)?? + F_r = sweep(matrix(rnorm(n*k),n),2,sqrt(1-factor_h2s),'*') # factor_h2s is the variance of factor?? + F = F_a + F_r + + E_a_act = t(A_chol) %*% sweep(matrix(rnorm(n*p),n),2,sqrt(1/E_a_prec),'*') + E_r_act = sweep(matrix(rnorm(n*p),n),2,sqrt(1/resid_Y_prec),'*') + + Y = F %*% t(Lambda) + E_a_act + E_r_act + + X = matrix(1,nrow=1,ncol=n) + B = rep(0,p) + Z_1 = diag(r)[,rep(1:r,n/r)] #?? we defined r = n at 8 + Va = colSums(Lambda^2) * factor_h2s + 1/E_a_prec #?? G + Ve = colSums(Lambda^2) * (1-factor_h2s) + 1/resid_Y_prec # R + h2 = Va/(Va + Ve) # heribility + G = Lambda %*% diag(factor_h2s) %*% t(Lambda) + diag(1/E_a_prec) + R = Lambda %*% diag(1-factor_h2s) %*% t(Lambda) + diag(1/resid_Y_prec) + h2 = diag(G)/(diag(G)+diag(R)) # ?? difference with 26 + + # Z_2 = rbind(rep(c(1,0,0,0),n/4),rep(c(0,1,0,0),n/4),rep(c(0,0,1,0),n/4),rep(c(0,0,0,1),n/4)) + + # plot(h2,type="l") + + print(name) + setup = list(Y=Y,F_act=F,E_a_act=E_a_act,F_a_act = F_a,Z_1=Z_1,A=A,X=X,n=n,r=r,p=p,G=G,R=R, + gen_factor_Lambda = Lambda,error_factor_Lambda = Lambda,h2=h2,B=B, factor_h2s=factor_h2s,name=name) + + save(setup,file = 'setup.RData') + do.call(writeMat,c("setup.mat",setup)) + save(pedigree,file="pedigree.Robj") +} + +prepare_simulation = function(G_chol,R_chol,A_chol,A,gen_factor_Lambda,error_factor_Lambda,factor_h2s,name){ + n = dim(A_chol)[1] + p = dim(G_chol)[1] + + r = n + # n = 2*n + + Y=U_act=E_act=c() + U_act = t(A_chol) %*% matrix(rnorm(p*r),r,p) %*% G_chol + E_act = matrix(rnorm(p*n),n,p) %*% R_chol + Y=rbind(U_act) + E_act + # Y=rbind(U_act,U_act) + E_act + + + X = matrix(1,nrow=1,ncol=n) + B = rep(0,p) + Z_1 = diag(r)[,rep(1:r,n/r)] + h2 = diag(G)/(diag(G)+diag(R)) + + # Z_2 = rbind(rep(c(1,0,0,0),n/4),rep(c(0,1,0,0),n/4),rep(c(0,0,1,0),n/4),rep(c(0,0,0,1),n/4)) + + plot(h2,type="l") + + print(name) + setup = list( + Y=Y,U_act=U_act,E_act=E_act,Z_1=Z_1,A=A,X=X, + n=n,r=r,p=p, + gen_factor_Lambda = gen_factor_Lambda,error_factor_Lambda = error_factor_Lambda, + h2=h2,G=G,R=R,B=B, factor_h2s=factor_h2s,name=name + ) + save(setup,file = 'setup.RData') + + do.call(writeMat,c("setup.mat",setup)) + save(pedigree,file="pedigree.Robj") +} + + +CovToCor=function(x) diag(1/sqrt(diag(x))) %*% x %*% diag(1/sqrt(diag(x))) + + +# set seed for repeatability +set.seed(1) + +counter = 0 + +#sample_size simulations +num_reps = 10 +p = 100 +k=10 +factor_h2s = seq(1,0,by=-0.1)[1:k] +factor_h2s[seq(1,k,by=2)]=0 + + +# ways to simulate pedigree?? +sample_size = list(Small = c(50,5), Medium = c(100,10), Large = c(500,10)) + + +for(size in names(sample_size)){ + nM = sample_size[[size]][1] + nI = sample_size[[size]][2] + pedigree = data.frame(ind=nM*nI + nM + 1:(nM*nI),dam=1:(nM*nI) + nM, sire = gl(nM,nI)) + pedigree<-fixPedigree(pedigree) #A + children = !is.na(pedigree[,3]) + + #generate A matrix as 2* kinship matrix from whole pedigree + A = 2*kinship(pedigree[,1], pedigree[,2], pedigree[,3])[children,children] + A_chol = chol(A) + for(rep in 1:num_reps){ + counter = counter+1 + Lambda = matrix(0,p,k) + numeff = sample((p/30):(p/4),k,replace=T) + for(h in 1:k){ + Lambda[sample(1:p,numeff[h]),h] = rnorm(numeff[h]) + } + Lambda = Lambda[,order(-diag(t(Lambda) %*% Lambda))] + cols=1:k + g_cols = factor_h2s>0 + Lambda =Lambda[do.call("order", unname(split(-abs(Lambda[,cols[factor_h2s>0]]), col(Lambda[,cols[factor_h2s>0]])))),] + + + E_a_prec = rep(1/0.2,p) + resid_Y_prec = rep(1/0.2,p) + + name = paste('SampleSize',size,rep,sep='_') + dir = paste('Sim',counter,sep="_") + try(dir.create(dir)) + setwd(dir) + prepare_simulation2(A_chol,A,Lambda,factor_h2s,E_a_prec,resid_Y_prec,name) + setwd('..') + } +} + +#k simulations +num_reps = 10 +p = 100 +factor_h2 = 0.5 + +ks_size = list(Small = c(5,10), Medium = c(15,25), Large = c(30,50)) + +nM = 100 +nI = 10 +pedigree = data.frame(ind=nM*nI + nM + 1:(nM*nI),dam=1:(nM*nI) + nM, sire = gl(nM,nI)) +pedigree<-fixPedigree(pedigree) +children = !is.na(pedigree[,3]) +#generate A matrix as 2* kinship matrix from whole pedigree +A = 2*kinship(pedigree[,1], pedigree[,2], pedigree[,3])[children,children] +A_chol = chol(A) + +for(size in names(ks_size)){ + for(rep in 1:num_reps){ + k = ks_size[[size]][2] + counter = counter+1 + Lambda = matrix(0,p,k) + numeff = sample((p/30):(p/4),k,replace=T) + for(h in 1:k){ + Lambda[sample(1:p,numeff[h]),h] = rnorm(numeff[h]) + } + Lambda = Lambda[,order(-diag(t(Lambda) %*% Lambda))] + cols=1:k + factor_h2s = rep(0,k) + factor_h2s[1:ks_size[[size]][1]] = factor_h2 + # factor_h2s[sample(1:k,ks_size[[size]][1])] = factor_h2 + # g_cols = factor_h2s>0 + # Lambda =Lambda[do.call("order", unname(split(-abs(Lambda[,cols[factor_h2s>0]]), col(Lambda[,cols[factor_h2s>0]])))),] + + E_a_prec = rep(1/0.2,p) + resid_Y_prec = rep(1/0.2,p) + + name = paste('k',size,rep,sep='_') + dir = paste('Sim',counter,sep="_") + try(dir.create(dir)) + setwd(dir) + prepare_simulation2(A_chol,A,Lambda,factor_h2s,E_a_prec,resid_Y_prec,name) + setwd('..') + } +} + + +#unconstrained R +library(MCMCpack) +num_reps = 10 +p = 100 +kG = 5 + +nM = 100 +nI = 10 +pedigree = data.frame(ind=nM*nI + nM + 1:(nM*nI),dam=1:(nM*nI) + nM, sire = gl(nM,nI)) +pedigree<-fixPedigree(pedigree) +children = !is.na(pedigree[,3]) +#generate A matrix as 2* kinship matrix from whole pedigree +A = 2*kinship(pedigree[,1], pedigree[,2], pedigree[,3])[children,children] +A_chol = chol(A) + + for(rep in 1:num_reps){ + k = kG + counter = counter+1 + Lambda = matrix(0,p,k) + numeff = sample((p/30):(p/4),k,replace=T) + for(h in 1:k){ + Lambda[sample(1:p,numeff[h]),h] = rnorm(numeff[h]) + } + Lambda = Lambda[,order(-diag(t(Lambda) %*% Lambda))] + cols=1:k + factor_h2s = rep(1,k) + g_cols = factor_h2s>0 + Lambda =Lambda[do.call("order", unname(split(-abs(Lambda[,cols[factor_h2s>0]]), col(Lambda[,cols[factor_h2s>0]])))),] + + + G = Lambda %*% diag(factor_h2s) %*% t(Lambda) + 0.2*diag(p) + R = rwish(p+1,diag(diag(G))/p) + G_chol = chol(G) + R_chol = chol(R) + h2 = diag(G)/(diag(G)+diag(R)) + hist(h2,breaks=seq(0,1,length=30)) + image(CovToCor(G)[,p:1]) + image(CovToCor(R)[,p:1]) + + gen_factor_Lambda = (Lambda %*% diag(sqrt(factor_h2s)))[,factor_h2s>0] + error_factor_Lambda = Lambda + + name = paste('Rwish',rep,sep='_') + dir = paste('Sim',counter,sep="_") + try(dir.create(dir)) + setwd(dir) + prepare_simulation(G_chol,R_chol,A_chol,A,gen_factor_Lambda,error_factor_Lambda,factor_h2s,name) + setwd('..') + } + + + +#non-sparse R +num_reps = 10 +p = 100 +kG = 5 +kR = 5 + +nM = 100 +nI = 10 +pedigree = data.frame(ind=nM*nI + nM + 1:(nM*nI),dam=1:(nM*nI) + nM, sire = gl(nM,nI)) +pedigree<-fixPedigree(pedigree) +children = !is.na(pedigree[,3]) +#generate A matrix as 2* kinship matrix from whole pedigree +A = 2*kinship(pedigree[,1], pedigree[,2], pedigree[,3])[children,children] +A_chol = chol(A) + + for(rep in 1:num_reps){ + k = kG + counter = counter+1 + Lambda = matrix(0,p,k) + numeff = sample((p/30):(p/4),k,replace=T) + for(h in 1:k){ + Lambda[sample(1:p,numeff[h]),h] = rnorm(numeff[h]) + } + Lambda = Lambda[,order(-diag(t(Lambda) %*% Lambda))] + cols=1:k + factor_h2s = rep(0.5,k) + g_cols = factor_h2s>0 + Lambda =Lambda[do.call("order", unname(split(-abs(Lambda[,cols[factor_h2s>0]]), col(Lambda[,cols[factor_h2s>0]])))),] + + Lambda = cbind(Lambda,matrix(rnorm(kR*p),p,kR)) + factor_h2s = c(factor_h2s,rep(0,kR)) + + + G = Lambda %*% diag(factor_h2s) %*% t(Lambda) + 0.2*diag(p) + R = Lambda %*% diag(1-factor_h2s) %*% t(Lambda) + 0.2*diag(p) + G_chol = chol(G) + R_chol = chol(R) + h2 = diag(G)/(diag(G)+diag(R)) + hist(h2,breaks=seq(0,1,length=30)) + image(CovToCor(G)[,p:1]) + image(CovToCor(R)[,p:1]) + + gen_factor_Lambda = (Lambda %*% diag(sqrt(factor_h2s)))[,factor_h2s>0] + error_factor_Lambda = Lambda + + name = paste('R_notSparse',rep,sep='_') + dir = paste('Sim',counter,sep="_") + try(dir.create(dir)) + setwd(dir) + prepare_simulation(G_chol,R_chol,A_chol,A,gen_factor_Lambda,error_factor_Lambda,factor_h2s,name) + setwd('..') + } + + + +#p simulations +num_reps = 10 +factor_h2 = 0.5 +k = 10 +k_G = 5 + +ks_size = list(Small = 20, Large = 1000) + +nM = 100 +nI = 10 +pedigree = data.frame(ind=nM*nI + nM + 1:(nM*nI),dam=1:(nM*nI) + nM, sire = gl(nM,nI)) +pedigree<-fixPedigree(pedigree) +children = !is.na(pedigree[,3]) +#generate A matrix as 2* kinship matrix from whole pedigree +A = 2*kinship(pedigree[,1], pedigree[,2], pedigree[,3])[children,children] +A_chol = chol(A) + +for(size in names(ks_size)){ + for(rep in 1:num_reps){ + p = ks_size[[size]] + counter = counter+1 + Lambda = matrix(0,p,k) + numeff = sample((p/30):(p/4),k,replace=T) + numeff[numeff<3]=3 + for(h in 1:k){ + Lambda[sample(1:p,numeff[h]),h] = rnorm(numeff[h]) + } + Lambda = Lambda[,order(-diag(t(Lambda) %*% Lambda))] + cols=1:k + factor_h2s = rep(0,k) + factor_h2s[1:k_G] = factor_h2 + # factor_h2s[sample(1:k,ks_size[[size]][1])] = factor_h2 + # g_cols = factor_h2s>0 + # Lambda =Lambda[do.call("order", unname(split(-abs(Lambda[,cols[factor_h2s>0]]), col(Lambda[,cols[factor_h2s>0]])))),] + + E_a_prec = rep(1/0.2,p) + resid_Y_prec = rep(1/0.2,p) + + gen_factor_Lambda = (Lambda %*% diag(sqrt(factor_h2s)))[,factor_h2s>0] + error_factor_Lambda = Lambda + + name = paste('p',size,rep,sep='_') + dir = paste('Sim',counter,sep="_") + try(dir.create(dir)) + setwd(dir) + prepare_simulation2(A_chol,A,Lambda,factor_h2s,E_a_prec,resid_Y_prec,name) + setwd('..') + } +} + diff --git a/R_BSFG/test_indep_Ablocks.R b/R_BSFG/test_indep_Ablocks.R new file mode 100644 index 0000000..4dbe5d8 --- /dev/null +++ b/R_BSFG/test_indep_Ablocks.R @@ -0,0 +1,50 @@ +setwd("~/Runcie Lab/pedigree") +data=read.csv('IDDammSirelnUpdate2.csv',header = TRUE) +# 5 +GenerationCode = c(0,1,2,3) +LineCode = c(1) + + +# 6 +GenerationCode=3 +LineCode = c(0,2) + + +# 7 +GenerationCode=3 +LineCode = c(0,3) + +ped <- data[,1:3] +for(i in 1:3) ped[,i] <- as.factor(ped[,i]) +library(MCMCglmm) +invA <- inverseA(ped) +A <- solve(invA$Ainv) + +data$Gen_Code = paste(data$Generation,data$LineCode,sep='_') +o = order(data$Gen_Code) + +A_order = A[o,o] +image(A_order>0) + + +generation_pos <- data$Generation %in% GenerationCode +LineCode_pos <- data$LineCode%in%LineCode|data$LineCode==0 +data_pos <- which((generation_pos&LineCode_pos)|data$LineCode==0) +#nodes_names <- as.factor(as.character(data$animal[data_pos])) +n = length(data_pos) + +# reorder the data according to specific rows in each model +data.trans = data +data.trans[1:n,] = data[data_pos,] +data.trans[185:(184+n),] = data[data_pos,] +data.trans[366:(365+n),] = data[data_pos,] + + +ped <- data.trans[,1:3] +ped <- data[,1:3] +for(i in 1:3) ped[,i] <- as.factor(ped[,i]) +library(MCMCglmm) +invA2 <- inverseA(ped)#, node = data.trans$animal[1:545]) +invA <- inverseA(ped, node = data.trans$animal[1:545]) +A <- solve(invA$Ainv) +A <- matrix(A@x, nrow<-n, ncol<-n, byrow <- FALSE) From 2255e85f4a3e37fbf6bd68ab966b4061135ed64b Mon Sep 17 00:00:00 2001 From: "Xin Li(Lexie)" Date: Tue, 25 Apr 2017 00:07:59 -0700 Subject: [PATCH 18/18] update functions --- R_BSFG/BSFG_functions.R | 48 +++++++++++++++++++++++++++++++++++ R_BSFG/plotting_diagnostics.R | 27 ++++++++++++++++++-- 2 files changed, 73 insertions(+), 2 deletions(-) diff --git a/R_BSFG/BSFG_functions.R b/R_BSFG/BSFG_functions.R index 1f9dec7..e1dddab 100644 --- a/R_BSFG/BSFG_functions.R +++ b/R_BSFG/BSFG_functions.R @@ -630,6 +630,54 @@ G_Matrix_Comp = function(BSFG_state){ return(GMatrix) } +G_Matrix_comb_Comp = function(BSFG_state){ + Posterior = BSFG_state$Posterior + traitnames = BSFG_state$traitnames + run_variables = BSFG_state$run_variables + sp_num = ncol(Posterior$Lambda) + #n = run_variables$n + k = 10 + p = run_variables$p + n = nrow(Posterior$F_a)/k + k1 = nrow(Posterior$F_a)/n; + k2 = nrow(Posterior$Lambda)/p; + if (k2 >= k1){ + k = k1 + }else{ + k=k2 + } + h2s = Posterior$F_h2[,1:sp_num] + #G_Lambdas = array(0,dim = dim(Posterior$Lambda)) + #Lambda_est = matrix(0,p,k) + G_est = E_est = matrix(0,p,p) + #traces_G = matrix(,p*(p+1)/2,sp_num) + #traces_G_cor = matrix(,p*(p+1)/2,sp_num) + #traces_E = matrix(,p*(p+1)/2,sp_num) + GMatrix = NULL + for(j in 1:sp_num) { + Lj = matrix(Posterior$Lambda[,j],p,k) + h2j = Posterior$F_h2[,j] + G_Lj = Lj %*% diag(sqrt(h2j)) + #G_Lambdas[,j] = c(G_Lj) + Gj = G_Lj %*% t(G_Lj) + diag(1/Posterior$E_a_prec[,j]) + rownames(Gj) = traitnames + #posterior mean + GMatrix[[j]] = Gj + G_est = G_est + Gj/sp_num + #library(gdata) + #traces_G[,j] = lowerTriangle(Gj,diag = TRUE) + #traces_G_cor[,j] = lowerTriangle(CovToCor(Gj),diag = TRUE) + + #E_Lj = Lj %*% diag(1-h2j) %*% t(Lj) + diag(1/Posterior$resid_Y_prec[,j]) + #E_est = E_est + E_Lj/sp_num; + #posterior mean for lambda + #Lambda_est = Lambda_est + matrix(Posterior$Lambda[,j],p,k)/sp_num; + #traces_E[,j] = lowerTriangle(E_Lj,diag = TRUE) + } + #G_Lambda = matrix(rowMeans(G_Lambdas),p,k) + return(G_est) +} + G_Traces_Comp = function(BSFG_state){ Posterior = BSFG_state$Posterior traitnames = BSFG_state$traitnames diff --git a/R_BSFG/plotting_diagnostics.R b/R_BSFG/plotting_diagnostics.R index a8344f1..4c35905 100644 --- a/R_BSFG/plotting_diagnostics.R +++ b/R_BSFG/plotting_diagnostics.R @@ -139,7 +139,7 @@ draw_simulation_diagnostics = function(sp_num,run_parameters,run_variables,Poste } } -draw_results_diagnostics = function(sp_num,params,run_variables,Lambda, F_h2, Posterior,E_a_prec,resid_Y_prec,traitnames){ +draw_results_diagnostics = function(sp_num,paras,run_variables,Lambda, F_h2, Posterior,E_a_prec,resid_Y_prec,traitnames){ # devices = dev.list() # while(length(devices) < 7){ # if(.Platform$OS.type != "windows") { @@ -338,11 +338,34 @@ plot_diagnostics = function(BSFG_state){ sp_num = ncol(BSFG_state$Posterior$Lambda) draw_results_diagnostics(sp_num,params,run_variables,Lambda, F_h2, Posterior,E_a_prec,resid_Y_prec,traitnames) - # } } # load("BSFG_state.RData") # plot_diagnostics(BSFG_state) +plot_diagnostics_comb = function(BSFG_state){ + burn = BSFG_state$run_parameters$burn + thin = BSFG_state$run_parameters$thin + start_i = BSFG_state$current_state$nrun + params = BSFG_state$params + run_variables = BSFG_state$run_variables + Lambda = BSFG_state$current_state$Lambda + F_h2 = BSFG_state$current_state$F_h2[[pop]] + Posterior = BSFG_state$Posterior + E_a_prec = BSFG_state$current_state$E_a_prec[[pop]] + resid_Y_prec = BSFG_state$current_state$resid_Y_prec[[pop]] + traitnames = BSFG_state$traitnames + draw_iter = BSFG_state$run_parameters$draw_iter + # for(i in start_i+(1:n_samples)){ + # if( (i-burn) %% thin == 0 && i > burn) { + # sp_num = (i-burn)/thin + # } + # if(i %% draw_iter == 0) { + sp_num = ncol(BSFG_state$Posterior$Lambda) + draw_results_diagnostics(sp_num,params,run_variables,Lambda, F_h2, Posterior,E_a_prec,resid_Y_prec,traitnames) + +} + + ComparingGMatrix_plot = function(target){ #load data from the original population load("BSFG_state.RData")