diff --git a/R/simulate_flocker_data.R b/R/simulate_flocker_data.R index 513824c..4265aeb 100644 --- a/R/simulate_flocker_data.R +++ b/R/simulate_flocker_data.R @@ -28,14 +28,53 @@ #' be simulated without any covariate influence on occupancy. #' @param rep_constant logical: create data with unit covariates only (TRUE) #' or data that includes event covariates (FALSE) -#' @param params a named list containing of parameter values to use in simulation. -#' Any required parameters whose names are not in this list will be assigned their -#' default values. To see the parameter names and structures required, run with -#' `params = NULL` (the default) and examine the `$params` element of the output. -#' @param covariates a dataframe of covariate values to use in simulation, or -#' NULL to simulate values. To see the covariate names and structures required, -#' run with `covariates = NULL` (the default) and examine the `$covariates` element -#' of the output. +#' @param params A named list controlling the coefficients used to generate the +#' latent occupancy state and detections. Supported names are: +#' \itemize{ +#' \item \code{coefs}: A numeric data.frame of per-species coefficient +#' values. Must have \code{n_sp} rows and +#' one column for each coefficient required by the requested model type. +#' Column names must match the required set (order does not matter). +#' If supplied, \code{coef_means} and \code{Sigma} must not be supplied. +#' \item \code{coef_means}: Numeric vector of length \code{n_coef} giving the +#' mean of the multivariate normal distribution used to simulate per-species +#' coefficients when \code{coefs} is not supplied. +#' \item \code{Sigma}: Numeric \code{n_coef x n_coef} symmetric covariance +#' matrix for the multivariate normal distribution used to simulate per-species +#' coefficients when \code{coefs} is not supplied. +#' } +#' +#' If \code{params = NULL} (the default), coefficients are simulated from a +#' multivariate normal prior with \code{coef_means = rep(0, n_coef)} and a +#' default \code{Sigma}. The dimension \code{n_coef} and the required coefficient +#' names depend on the requested model type: +#' \describe{ +#' \item{Always}{\code{det_intercept}, \code{det_slope_unit}} +#' \item{If \code{rep_constant = FALSE}}{\code{det_slope_visit}} +#' \item{If \code{n_season == 1} or \code{multi_init == "explicit"}}{\code{occ_intercept}} +#' \item{If \code{n_season == 1} or \code{multi_init == "explicit"}, and \code{augmented = FALSE}}{\code{occ_slope_unit}} +#' \item{If \code{n_season > 1}}{\code{col_intercept}, \code{col_slope_unit}} +#' \item{If \code{multiseason == "colex"}}{\code{ex_intercept}, \code{ex_slope_unit}} +#' \item{If \code{multiseason == "autologistic"}}{\code{auto_intercept}, \code{auto_slope_unit}} +#' } +#' +#' Notes: +#' \itemize{ +#' \item When \code{augmented = TRUE}, \code{occ_slope_unit} is not used and +#' occupancy is simulated with no covariate effect. +#' \item Any names in \code{params} other than \code{coefs}, \code{coef_means}, +#' and \code{Sigma} are rejected with an error. +#' } +#' @param covariates A named list of realized covariate values to use in the +#' simulation. Supported names are: +#' \itemize{ +#' \item \code{uc1}: numeric vector of length \code{n_pt} giving the unit-level covariate. +#' \item \code{ec1}: numeric vector of length \code{n_pt * n_season * n_rep} +#' giving the event-level covariate (ignored if \code{rep_constant = TRUE}). +#' } +#' If \code{NULL} (the default), covariates are simulated internally from +#' standard normal distributions. +#' \code{ec1} is assumed not to vary by species. #' @param seed random seed. NULL uses (and updates) the existing RNG state. Other values #' do not update the global RNG state. #' @param ragged_rep logical: create data with variable (TRUE) or constant @@ -80,7 +119,7 @@ simulate_flocker_data <- function( ) assertthat::assert_that( is.null(multi_init) | isTRUE(multi_init %in% c("explicit", "equilibrium")), - msg = "multiseason must be NULL, 'explicit', or 'equilibrium'" + msg = "multi_init must be NULL, 'explicit', or 'equilibrium'" ) assertthat::assert_that(is_one_logical(augmented), msg = "augmented must be a single logical") assertthat::assert_that(is_one_logical(rep_constant), msg = "rep_constant must be a single logical") @@ -160,6 +199,72 @@ sfd <- function( # If coefs not specified directly, simulate from the prior (either passed via # params or otherwise the default prior). n_coef <- length(coef_names) + + np <- names(params) + + # Disallow unknown params keys (they don't affect simulation) + allowed_keys <- c("coefs", "coef_means", "Sigma") + unknown_keys <- setdiff(np, allowed_keys) + if (length(unknown_keys) > 0) { + stop( + "simulate_flocker_data(): 'params' contains unsupported names that are ignored: ", + paste(unknown_keys, collapse = ", "), + call. = FALSE + ) + } + + if ("coefs" %in% np) { + if ("coef_means" %in% np || "Sigma" %in% np) { + stop( + "simulate_flocker_data(): if params$coefs is provided, do not also provide ", + "params$coef_means or params$Sigma (they would be ignored).", + call. = FALSE + ) + } + + assertthat::assert_that( + is.data.frame(params$coefs), + msg = "params$coefs, if provided, must be a data.frame" + ) + assertthat::assert_that( + nrow(params$coefs) == n_sp, + msg = "params$coefs, if provided, must have n_sp rows (one per species)" + ) + assertthat::assert_that( + all(vapply(params$coefs, is.numeric, logical(1))), + msg = "params$coefs must have all-numeric columns" + ) + assertthat::assert_that( + setequal(colnames(params$coefs), coef_names), + msg = "params$coefs must contain exactly the required coefficient names" + ) + + params$coefs <- params$coefs[, coef_names, drop = FALSE] + } else { + # If coefs absent, validate coef_means/Sigma if present + if ("coef_means" %in% np) { + assertthat::assert_that( + length(params$coef_means) == length(coef_names), + msg = "params$coef_means must have length equal to the number of coefficients" + ) + } + if ("Sigma" %in% np) { + assertthat::assert_that( + is.matrix(params$Sigma), + nrow(params$Sigma) == length(coef_names), + ncol(params$Sigma) == length(coef_names), + msg = "params$Sigma must be an n_coef x n_coef matrix" + ) + assertthat::assert_that( + isTRUE(all.equal(params$Sigma, t(params$Sigma))), + msg = "params$Sigma must be symmetric" + ) + + # could add a check for PSD here, but for now we'll rely on errors from MASS::mvnorm + } + } + + np <- names(params) if (!("coefs" %in% np)) { # coef values not specified directly if (!("coef_means" %in% np)) { @@ -194,9 +299,15 @@ sfd <- function( names(params$coefs) <- coef_names } + + # Ensure all required coefficient names are present (order irrelevant) assertthat::assert_that( - identical(names(params$coefs), coef_names) + setequal(names(params$coefs), coef_names), + msg = "params$coefs must contain exactly the required coefficient names" ) + + # Reorder columns into canonical order + params$coefs <- params$coefs[, coef_names, drop = FALSE] coefs <- params$coefs # Create covariates (currently assume that unit covariates are the same @@ -209,6 +320,13 @@ sfd <- function( covariates <- list( uc1 = stats::rnorm(n_pt)[unit_backbone$id_point] ) + } else { + assertthat::assert_that(is.list(covariates), msg = "covariates must be a named list") + assertthat::assert_that("uc1" %in% names(covariates), msg = "covariates must include uc1") + assertthat::assert_that( + is.numeric(covariates$uc1) && length(covariates$uc1) == n_pt, + msg = "covariates$uc1 must be numeric of length n_pt" + ) } unit_backbone$uc1 <- covariates$uc1 @@ -285,7 +403,12 @@ sfd <- function( coefs$det_slope_unit[as.integer(visit_full$species)] * visit_full$uc1 if (!rep_constant) { - if (!("ec1" %in% names(covariates))) { + if ("ec1" %in% names(covariates)) { + assertthat::assert_that( + is.numeric(covariates$ec1) && length(covariates$ec1) == n_pt * n_season * n_rep, + msg = "covariates$ec1 must be numeric of length n_pt * n_season * n_rep" + ) + } else { covariates$ec1 <- stats::rnorm(n_pt*n_season*n_rep)[ # we don't allow ec1 to vary by species; # this allows the covariate to play nicely with augmented models. @@ -344,7 +467,7 @@ sfd <- function( out <- list(obs = remove_rownames(obs), unit_covs = remove_rownames(unit_covs), - event_covs = lapply(event_covs, remove_rownames) # this yields an empty list if event_covs is NULL + event_covs = if (is.null(event_covs)) NULL else lapply(event_covs, remove_rownames) ) } else if (augmented) { visit_full$id_unit <- interaction(visit_full$id_point, visit_full$species) @@ -379,7 +502,7 @@ sfd <- function( out <- list( obs = remove_rownames(abind::abind(obs_temp, along = 3)), unit_covs = remove_rownames(unit_covs), - event_covs = lapply(event_covs, remove_rownames) + event_covs = if (is.null(event_covs)) NULL else lapply(event_covs, remove_rownames) ) } else if (n_season > 1) { @@ -405,7 +528,7 @@ sfd <- function( } if(rep_constant){ - event_covs <- list() + event_covs <- NULL } else { event_covs <- list(ec1 = remove_rownames(abind::abind(events_temp, along = 3))) } diff --git a/inst/cache_vignettes.R b/inst/cache_vignettes.R index 2ea51ff..7bd982e 100644 --- a/inst/cache_vignettes.R +++ b/inst/cache_vignettes.R @@ -1,37 +1,119 @@ -# This script creates .Rmd vignettes that include pre-computed R output so that -# whent they get built on CI it's a lightweight operation. +if (!file.exists("DESCRIPTION")) { + stop("Run this script from the package root (where DESCRIPTION lives).") +} -# This script should be run every time there is an update to the vignettes -# or an update to the package for which the vignettes are a desirable -# part of the test harness (but ideally the actual test harness should be -# sufficient for testing, and we shouldn't rely on vignettes). +suppressPackageStartupMessages({ + library(withr) + library(knitr) + library(rmarkdown) +}) -# Edits that touch only the text of the vignettes, and not the R computation, -# can be made with extreme care by changing both the .Rmd.orig files and the -# .Rmd files with computation cached. +find_pkg_root <- function() { + path <- normalizePath(getwd(), winslash = "/", mustWork = TRUE) + repeat { + if (file.exists(file.path(path, "DESCRIPTION"))) return(path) + parent <- normalizePath(file.path(path, ".."), winslash = "/", mustWork = TRUE) + if (identical(parent, path)) stop("Could not find package root (no DESCRIPTION found).") + path <- parent + } +} -old_wd <- getwd() +pkg_root <- find_pkg_root() -setwd(paste0(dirname(rstudioapi::getActiveDocumentContext()$path), "/..")) - -knitr::knit( - "vignettes/augmented_models.Rmd.orig", - output = "vignettes/augmented_models.Rmd" +vigs <- list( + c("vignettes/augmented_models.Rmd.orig", "vignettes/augmented_models.Rmd"), + c("vignettes/flocker_tutorial.Rmd.orig", "vignettes/flocker_tutorial.Rmd"), + c("vignettes/nonlinear_models.Rmd.orig", "vignettes/nonlinear_models.Rmd"), + c("vignettes/articles/sbc.Rmd.orig", "vignettes/articles/sbc.Rmd"), + c("vignettes/articles/sbc_multi.Rmd.orig", "vignettes/articles/sbc_multi.Rmd"), + c("vignettes/articles/sbc_aug.Rmd.orig", "vignettes/articles/sbc_aug.Rmd") ) -knitr::knit( - "vignettes/flocker_tutorial.Rmd.orig", - output = "vignettes/flocker_tutorial.Rmd" -) +with_dir(pkg_root, { + # Ensure relative paths in chunks resolve from package root + old_root <- knitr::opts_knit$get("root.dir") + on.exit(knitr::opts_knit$set(root.dir = old_root), add = TRUE) + knitr::opts_knit$set(root.dir = pkg_root) + + knit_one <- function(infile, outfile, fig_path = NULL) { + dir.create(dirname(outfile), recursive = TRUE, showWarnings = FALSE) + + if (!is.null(fig_path)) { + dir.create(fig_path, recursive = TRUE, showWarnings = FALSE) + old_fig <- knitr::opts_chunk$get("fig.path") + on.exit(knitr::opts_chunk$set(fig.path = old_fig), add = TRUE) + knitr::opts_chunk$set(fig.path = paste0(fig_path, "/")) + } + + message("Knitting ", infile, " -> ", outfile) + + env <- new.env(parent = globalenv()) + env$params <- rmarkdown::yaml_front_matter(infile)$params + + knitr::knit(infile, output = outfile, envir = env) + } + + for(i in 1:3){ + knit_one(vigs[[i]][[1]], vigs[[i]][[2]]) + } + for(i in 4:6){ + # SBC: force figures into man/figures/sbc_vignette + knit_one(vigs[[i]][[1]], vigs[[i]][[2]], fig_path = "man/figures/sbc_vignette") + } -knitr::knit( - "vignettes/nonlinear_models.Rmd.orig", - output = "vignettes/nonlinear_models.Rmd" -) +}) -knitr::knit( - "vignettes/sbc.Rmd.orig", - output = "vignettes/sbc.Rmd" -) +# ---- Post-process cached articles to fix figure paths (Pandoc resolves relative +# paths from the input document directory, so vignettes/articles/* need ../man/...) ---- + +fix_article_fig_paths <- function(rmd_path, + from = "man/figures/sbc_vignette/", + to = "../../man/figures/sbc_vignette/") { + # Only makes changes if the file exists and actually contains the "from" string, + # and avoids double-prepending if it already has "../". + stopifnot(length(rmd_path) == 1) + + if (!file.exists(rmd_path)) { + warning("Skipping missing file: ", rmd_path) + return(invisible(FALSE)) + } + + lines <- readLines(rmd_path, warn = FALSE) + + # 1) HTML: + # Replace src="man/figures/..." but NOT src="../man/figures/..." + lines2 <- gsub( + pattern = paste0('src="', from), + replacement = paste0('src="', to), + x = lines, + fixed = TRUE + ) + + # 2) Markdown: ![](man/figures/...) + # Replace (man/figures/...) but NOT (../man/figures/...) + lines2 <- gsub( + pattern = paste0("](", from), + replacement = paste0("](", to), + x = lines2, + fixed = TRUE + ) + + changed <- !identical(lines, lines2) + if (changed) { + writeLines(lines2, rmd_path) + message("Rewrote figure paths in ", rmd_path) + } else { + message("No figure paths to rewrite in ", rmd_path) + } + + invisible(changed) +} + +# Apply to the cached article outputs (the .Rmd files, not the .orig) +article_outfiles <- vapply(vigs[4:6], `[[`, character(1), 2) + +with_dir(pkg_root, { + for (f in article_outfiles) fix_article_fig_paths(f) +}) -setwd(old_wd) +message("Done.") \ No newline at end of file diff --git a/man/figures/sbc_vignette/data-augmented-1.png b/man/figures/sbc_vignette/data-augmented-1.png index 16106a9..ca96d18 100644 Binary files a/man/figures/sbc_vignette/data-augmented-1.png and b/man/figures/sbc_vignette/data-augmented-1.png differ diff --git a/man/figures/sbc_vignette/data-augmented-2.png b/man/figures/sbc_vignette/data-augmented-2.png index e4760ca..5e1c03f 100644 Binary files a/man/figures/sbc_vignette/data-augmented-2.png and b/man/figures/sbc_vignette/data-augmented-2.png differ diff --git a/man/figures/sbc_vignette/data-augmented-3.png b/man/figures/sbc_vignette/data-augmented-3.png index 4dc8fb4..257aaf9 100644 Binary files a/man/figures/sbc_vignette/data-augmented-3.png and b/man/figures/sbc_vignette/data-augmented-3.png differ diff --git a/man/figures/sbc_vignette/multi-auto-eq-1.png b/man/figures/sbc_vignette/multi-auto-eq-1.png index 754c9b9..21aeac7 100644 Binary files a/man/figures/sbc_vignette/multi-auto-eq-1.png and b/man/figures/sbc_vignette/multi-auto-eq-1.png differ diff --git a/man/figures/sbc_vignette/multi-auto-eq-2.png b/man/figures/sbc_vignette/multi-auto-eq-2.png index d19971f..520bd8a 100644 Binary files a/man/figures/sbc_vignette/multi-auto-eq-2.png and b/man/figures/sbc_vignette/multi-auto-eq-2.png differ diff --git a/man/figures/sbc_vignette/multi-auto-eq-3.png b/man/figures/sbc_vignette/multi-auto-eq-3.png index 4185898..ec61717 100644 Binary files a/man/figures/sbc_vignette/multi-auto-eq-3.png and b/man/figures/sbc_vignette/multi-auto-eq-3.png differ diff --git a/man/figures/sbc_vignette/multi-auto-ex-1.png b/man/figures/sbc_vignette/multi-auto-ex-1.png index 2ecbfc5..2eed728 100644 Binary files a/man/figures/sbc_vignette/multi-auto-ex-1.png and b/man/figures/sbc_vignette/multi-auto-ex-1.png differ diff --git a/man/figures/sbc_vignette/multi-auto-ex-2.png b/man/figures/sbc_vignette/multi-auto-ex-2.png index 85916d6..94ae7a7 100644 Binary files a/man/figures/sbc_vignette/multi-auto-ex-2.png and b/man/figures/sbc_vignette/multi-auto-ex-2.png differ diff --git a/man/figures/sbc_vignette/multi-auto-ex-3.png b/man/figures/sbc_vignette/multi-auto-ex-3.png index c4fd058..1f1d7a2 100644 Binary files a/man/figures/sbc_vignette/multi-auto-ex-3.png and b/man/figures/sbc_vignette/multi-auto-ex-3.png differ diff --git a/man/figures/sbc_vignette/multi-colex-eq-1.png b/man/figures/sbc_vignette/multi-colex-eq-1.png index 4a4d038..e368a7a 100644 Binary files a/man/figures/sbc_vignette/multi-colex-eq-1.png and b/man/figures/sbc_vignette/multi-colex-eq-1.png differ diff --git a/man/figures/sbc_vignette/multi-colex-eq-2.png b/man/figures/sbc_vignette/multi-colex-eq-2.png index e69677c..2b37ee1 100644 Binary files a/man/figures/sbc_vignette/multi-colex-eq-2.png and b/man/figures/sbc_vignette/multi-colex-eq-2.png differ diff --git a/man/figures/sbc_vignette/multi-colex-eq-3.png b/man/figures/sbc_vignette/multi-colex-eq-3.png index c13d51f..916055f 100644 Binary files a/man/figures/sbc_vignette/multi-colex-eq-3.png and b/man/figures/sbc_vignette/multi-colex-eq-3.png differ diff --git a/man/figures/sbc_vignette/multi-colex-ex-1.png b/man/figures/sbc_vignette/multi-colex-ex-1.png index c18217b..b23aea3 100644 Binary files a/man/figures/sbc_vignette/multi-colex-ex-1.png and b/man/figures/sbc_vignette/multi-colex-ex-1.png differ diff --git a/man/figures/sbc_vignette/multi-colex-ex-2.png b/man/figures/sbc_vignette/multi-colex-ex-2.png index 47db5fe..5636b82 100644 Binary files a/man/figures/sbc_vignette/multi-colex-ex-2.png and b/man/figures/sbc_vignette/multi-colex-ex-2.png differ diff --git a/man/figures/sbc_vignette/multi-colex-ex-3.png b/man/figures/sbc_vignette/multi-colex-ex-3.png index 1b94ceb..7f5de85 100644 Binary files a/man/figures/sbc_vignette/multi-colex-ex-3.png and b/man/figures/sbc_vignette/multi-colex-ex-3.png differ diff --git a/man/figures/sbc_vignette/rep-constant-1.png b/man/figures/sbc_vignette/rep-constant-1.png index 363fead..7156645 100644 Binary files a/man/figures/sbc_vignette/rep-constant-1.png and b/man/figures/sbc_vignette/rep-constant-1.png differ diff --git a/man/figures/sbc_vignette/rep-constant-2.png b/man/figures/sbc_vignette/rep-constant-2.png index 8475447..4b31ea8 100644 Binary files a/man/figures/sbc_vignette/rep-constant-2.png and b/man/figures/sbc_vignette/rep-constant-2.png differ diff --git a/man/figures/sbc_vignette/rep-constant-3.png b/man/figures/sbc_vignette/rep-constant-3.png index 9fd124f..3f9ecfe 100644 Binary files a/man/figures/sbc_vignette/rep-constant-3.png and b/man/figures/sbc_vignette/rep-constant-3.png differ diff --git a/man/figures/sbc_vignette/rep-varying-1.png b/man/figures/sbc_vignette/rep-varying-1.png index c1294d3..ef1bd52 100644 Binary files a/man/figures/sbc_vignette/rep-varying-1.png and b/man/figures/sbc_vignette/rep-varying-1.png differ diff --git a/man/figures/sbc_vignette/rep-varying-2.png b/man/figures/sbc_vignette/rep-varying-2.png index c5c402e..650441a 100644 Binary files a/man/figures/sbc_vignette/rep-varying-2.png and b/man/figures/sbc_vignette/rep-varying-2.png differ diff --git a/man/figures/sbc_vignette/rep-varying-3.png b/man/figures/sbc_vignette/rep-varying-3.png index 98a0a69..794f800 100644 Binary files a/man/figures/sbc_vignette/rep-varying-3.png and b/man/figures/sbc_vignette/rep-varying-3.png differ diff --git a/man/simulate_flocker_data.Rd b/man/simulate_flocker_data.Rd index 37dc74f..9fcb2af 100644 --- a/man/simulate_flocker_data.Rd +++ b/man/simulate_flocker_data.Rd @@ -53,15 +53,54 @@ be simulated without any covariate influence on occupancy.} \item{rep_constant}{logical: create data with unit covariates only (TRUE) or data that includes event covariates (FALSE)} -\item{params}{a named list containing of parameter values to use in simulation. -Any required parameters whose names are not in this list will be assigned their -default values. To see the parameter names and structures required, run with -`params = NULL` (the default) and examine the `$params` element of the output.} +\item{params}{A named list controlling the coefficients used to generate the + latent occupancy state and detections. Supported names are: + \itemize{ + \item \code{coefs}: A numeric data.frame of per-species coefficient + values. Must have \code{n_sp} rows and + one column for each coefficient required by the requested model type. + Column names must match the required set (order does not matter). + If supplied, \code{coef_means} and \code{Sigma} must not be supplied. + \item \code{coef_means}: Numeric vector of length \code{n_coef} giving the + mean of the multivariate normal distribution used to simulate per-species + coefficients when \code{coefs} is not supplied. + \item \code{Sigma}: Numeric \code{n_coef x n_coef} symmetric covariance + matrix for the multivariate normal distribution used to simulate per-species + coefficients when \code{coefs} is not supplied. + } -\item{covariates}{a dataframe of covariate values to use in simulation, or -NULL to simulate values. To see the covariate names and structures required, -run with `covariates = NULL` (the default) and examine the `$covariates` element -of the output.} + If \code{params = NULL} (the default), coefficients are simulated from a + multivariate normal prior with \code{coef_means = rep(0, n_coef)} and a + default \code{Sigma}. The dimension \code{n_coef} and the required coefficient + names depend on the requested model type: + \describe{ + \item{Always}{\code{det_intercept}, \code{det_slope_unit}} + \item{If \code{rep_constant = FALSE}}{\code{det_slope_visit}} + \item{If \code{n_season == 1} or \code{multi_init == "explicit"}}{\code{occ_intercept}} + \item{If \code{n_season == 1} or \code{multi_init == "explicit"}, and \code{augmented = FALSE}}{\code{occ_slope_unit}} + \item{If \code{n_season > 1}}{\code{col_intercept}, \code{col_slope_unit}} + \item{If \code{multiseason == "colex"}}{\code{ex_intercept}, \code{ex_slope_unit}} + \item{If \code{multiseason == "autologistic"}}{\code{auto_intercept}, \code{auto_slope_unit}} + } + + Notes: + \itemize{ + \item When \code{augmented = TRUE}, \code{occ_slope_unit} is not used and + occupancy is simulated with no covariate effect. + \item Any names in \code{params} other than \code{coefs}, \code{coef_means}, + and \code{Sigma} are rejected with an error. + }} + +\item{covariates}{A named list of realized covariate values to use in the +simulation. Supported names are: +\itemize{ + \item \code{uc1}: numeric vector of length \code{n_pt} giving the unit-level covariate. + \item \code{ec1}: numeric vector of length \code{n_pt * n_season * n_rep} + giving the event-level covariate (ignored if \code{rep_constant = TRUE}). +} +If \code{NULL} (the default), covariates are simulated internally from +standard normal distributions. +\code{ec1} is assumed not to vary by species.} \item{seed}{random seed. NULL uses (and updates) the existing RNG state. Other values do not update the global RNG state.} diff --git a/tests/testthat/test-simulate_flocker_data.R b/tests/testthat/test-simulate_flocker_data.R new file mode 100644 index 0000000..b02ff92 --- /dev/null +++ b/tests/testthat/test-simulate_flocker_data.R @@ -0,0 +1,233 @@ +expect_named <- function(x, nm) { + testthat::expect_true(setequal(names(x), nm)) +} + +expect_array_dim <- function(x, d) { + testthat::expect_true(is.array(x)) + testthat::expect_equal(dim(x), d) +} + +test_that("single-season rep_constant shapes are correct", { + fd <- simulate_flocker_data(n_pt = 7, n_sp = 3, n_rep = 4, rep_constant = TRUE, seed = 1) + + expect_true(is.matrix(fd$obs)) + expect_equal(dim(fd$obs), c(7*3, 4)) + + expect_true(is.data.frame(fd$unit_covs)) + expect_named(fd$unit_covs, c("uc1","species")) + expect_equal(nrow(fd$unit_covs), 7*3) + + expect_true(is.null(fd$event_covs)) + expect_true(is.list(fd$params)) + expect_true(is.list(fd$covariates)) +}) + +test_that("single-season rep_varying includes event covariates with correct shapes", { + fd <- simulate_flocker_data(n_pt = 5, n_sp = 2, n_rep = 3, rep_constant = FALSE, seed = 2) + + expect_true(is.matrix(fd$obs)) + expect_equal(dim(fd$obs), c(5*2, 3)) + + expect_true(is.list(fd$event_covs)) + expect_true("ec1" %in% names(fd$event_covs)) + expect_true(is.matrix(fd$event_covs$ec1)) + expect_equal(dim(fd$event_covs$ec1), c(5*2, 3)) +}) + +test_that("seed produces deterministic output", { + a <- simulate_flocker_data(n_pt=6, n_sp=2, n_rep=3, seed=123) + b <- simulate_flocker_data(n_pt=6, n_sp=2, n_rep=3, seed=123) + + expect_identical(a$obs, b$obs) + expect_identical(a$unit_covs, b$unit_covs) + expect_identical(a$event_covs, b$event_covs) + expect_identical(a$params$coefs, b$params$coefs) +}) + +test_that("non-NULL seed does not advance global RNG", { + set.seed(999) + x1 <- runif(1) + + simulate_flocker_data(n_pt=3, n_sp=1, n_rep=2, seed=1) + + x2 <- runif(1) + + set.seed(999) + y1 <- runif(1) + y2 <- runif(1) + + expect_identical(x1, y1) + expect_identical(x2, y2) +}) + +test_that("NULL seed advances global RNG", { + set.seed(999) + x1 <- runif(1) + + simulate_flocker_data(n_pt=3, n_sp=1, n_rep=2, seed=NULL) + + x2 <- runif(1) + + set.seed(999) + y1 <- runif(1) + y2 <- runif(1) + + expect_identical(x1, y1) + expect_false(identical(x2, y2)) +}) + +test_that("multiseason/multi_init constraints are enforced", { + expect_error(simulate_flocker_data(n_season=1, multiseason="colex", multi_init=NULL)) + expect_error(simulate_flocker_data(n_season=1, multiseason=NULL, multi_init="explicit")) + + expect_error(simulate_flocker_data(n_season=2, multiseason=NULL, multi_init="explicit")) + expect_error(simulate_flocker_data(n_season=2, multiseason="colex", multi_init=NULL)) +}) + +test_that("augmented constraints are enforced", { + expect_error(simulate_flocker_data(n_season=2, augmented=TRUE, multiseason="colex", multi_init="explicit")) +}) + +test_that("params rejects unknown keys", { + expect_error(simulate_flocker_data(params=list(zzz=1), seed=1)) +}) + +test_that("params$coefs validation works", { + expect_error(simulate_flocker_data(n_sp=2, params=list(coefs=matrix(1,2,2)), seed=1)) + + bad_rows <- data.frame(det_intercept=1, det_slope_unit=1, occ_intercept=1, occ_slope_unit=1) + expect_error(simulate_flocker_data(n_sp=2, params=list(coefs=bad_rows), seed=1)) + + bad_col <- data.frame(det_intercept=1, det_slope_unit="a", occ_intercept=1, occ_slope_unit=1) + expect_error(simulate_flocker_data(n_sp=1, params=list(coefs=bad_col), seed=1)) + + bad_names <- data.frame(a=1, b=1, c=1, d=1) + expect_error(simulate_flocker_data(n_sp=1, params=list(coefs=bad_names), seed=1)) +}) + +test_that("cannot supply coefs with coef_means or Sigma", { + cf <- data.frame(det_intercept=0, det_slope_unit=0, occ_intercept=0, occ_slope_unit=0) + expect_error(simulate_flocker_data(n_sp=1, params=list(coefs=cf, coef_means=rep(0,4)), seed=1)) + expect_error(simulate_flocker_data(n_sp=1, params=list(coefs=cf, Sigma=diag(4)), seed=1)) +}) + +test_that("covariates validation works", { + expect_error(simulate_flocker_data(covariates=1, seed=1)) + expect_error(simulate_flocker_data(covariates=list(), seed=1)) + expect_error(simulate_flocker_data(n_pt=3, covariates=list(uc1=1:2), seed=1)) + + # ec1 length check only when rep_constant=FALSE + expect_error(simulate_flocker_data(n_pt=3, n_rep=2, n_season=1, rep_constant=FALSE, + covariates=list(uc1=rnorm(3), ec1=rnorm(3)), seed=1)) +}) + + +test_that("multiseason output shapes are correct", { + fd <- simulate_flocker_data(n_pt=4, n_sp=2, n_rep=3, n_season=5, + multiseason="colex", multi_init="explicit", + rep_constant=FALSE, seed=1) + + expect_true(is.array(fd$obs)) + expect_equal(dim(fd$obs), c(4*2, 3, 5)) + + expect_true(is.list(fd$unit_covs)) + expect_length(fd$unit_covs, 5) + expect_true(all(vapply(fd$unit_covs, is.data.frame, logical(1)))) + expect_equal(nrow(fd$unit_covs[[1]]), 8) + + expect_true(is.list(fd$event_covs)) + expect_true(is.array(fd$event_covs$ec1)) + expect_equal(dim(fd$event_covs$ec1), c(8, 3, 5)) +}) + +test_that("ragged_rep introduces missing visits", { + fd <- simulate_flocker_data(n_pt=10, n_sp=1, n_rep=6, ragged_rep=TRUE, + rep_constant=FALSE, seed=1) + expect_true(any(is.na(fd$obs))) + expect_true(any(is.na(fd$event_covs$ec1))) +}) + +test_that("missing_seasons introduces missing seasons", { + fd <- simulate_flocker_data(n_pt=10, n_sp=1, n_rep=3, n_season=4, + multiseason="colex", multi_init="explicit", + rep_constant=FALSE, missing_seasons=TRUE, seed=1) + # Should be some NA in obs + expect_true(any(is.na(fd$obs))) +}) + +test_that("augmented output is trimmed and has expected shapes", { + fd <- simulate_flocker_data(n_pt=30, n_sp=20, n_rep=4, augmented=TRUE, + rep_constant=FALSE, seed=2) + + expect_true(is.array(fd$obs)) + expect_equal(dim(fd$obs)[1:2], c(30, 4)) + expect_true(dim(fd$obs)[3] <= 20) # trimmed, maybe equal but often less + expect_true(dim(fd$obs)[3] >= 1) + + expect_true(is.data.frame(fd$unit_covs)) + expect_named(fd$unit_covs, c("uc1")) + expect_equal(nrow(fd$unit_covs), 30) + + expect_true(is.list(fd$event_covs)) + expect_true(is.matrix(fd$event_covs$ec1)) + expect_equal(dim(fd$event_covs$ec1), c(30, 4)) +}) + +test_that("coef set is correct: single-season rep-constant", { + fd <- simulate_flocker_data(n_sp=2, rep_constant=TRUE, augmented=FALSE, seed=1) + expect_true(setequal(colnames(fd$params$coefs), + c("det_intercept","det_slope_unit","occ_intercept","occ_slope_unit"))) +}) + +test_that("coef set is correct: multiseason colex equilibrium", { + fd <- simulate_flocker_data(n_sp=2, n_season=3, multiseason="colex", multi_init="equilibrium", + rep_constant=FALSE, seed=1) + expect_true(setequal(colnames(fd$params$coefs), + c("det_intercept","det_slope_unit","det_slope_visit", + "col_intercept","col_slope_unit","ex_intercept","ex_slope_unit"))) +}) + +test_that("coef set is correct: multiseason autologistic explicit", { + fd <- simulate_flocker_data(n_sp=2, n_season=3, multiseason="autologistic", multi_init="explicit", + rep_constant=FALSE, seed=1) + expect_true(setequal(colnames(fd$params$coefs), + c("det_intercept","det_slope_unit","det_slope_visit", + "occ_intercept","occ_slope_unit", + "col_intercept","col_slope_unit", + "auto_intercept","auto_slope_unit"))) +}) + +test_that("coef set is correct: multiseason autologistic equilibrium", { + fd <- simulate_flocker_data(n_sp=2, n_season=3, multiseason="autologistic", multi_init="equilibrium", + rep_constant=FALSE, seed=1) + expect_true(setequal(colnames(fd$params$coefs), + c("det_intercept","det_slope_unit","det_slope_visit", + "col_intercept","col_slope_unit", + "auto_intercept","auto_slope_unit"))) +}) + +test_that("sfd validates covariates$uc1", { + expect_error(flocker:::sfd(n_rep=2,n_pt=3,n_sp=1,n_season=1, + multiseason=NULL,multi_init=NULL,augmented=FALSE,rep_constant=TRUE, + params=list(), + covariates=list(uc1=1:2), + ragged_rep=FALSE,missing_seasons=FALSE)) +}) + +test_that("simulate_flocker_data runs across key configurations", { + configs <- list( + list(n_season=1, rep_constant=TRUE, augmented=FALSE), + list(n_season=1, rep_constant=FALSE, augmented=FALSE), + list(n_season=1, rep_constant=FALSE, augmented=TRUE), + list(n_season=3, rep_constant=FALSE, multiseason="colex", multi_init="explicit"), + list(n_season=3, rep_constant=FALSE, multiseason="colex", multi_init="equilibrium"), + list(n_season=3, rep_constant=FALSE, multiseason="autologistic", multi_init="explicit"), + list(n_season=3, rep_constant=FALSE, multiseason="autologistic", multi_init="equilibrium") + ) + + for (cfg in configs) { + fd <- do.call(simulate_flocker_data, c(list(n_pt=5,n_sp=3,n_rep=2,seed=1), cfg)) + expect_true(is.list(fd)) + expect_true("obs" %in% names(fd)) + } +}) \ No newline at end of file diff --git a/vignettes/articles/sbc.Rmd b/vignettes/articles/sbc.Rmd index 9fc07d8..ec3116f 100644 --- a/vignettes/articles/sbc.Rmd +++ b/vignettes/articles/sbc.Rmd @@ -1,32 +1,29 @@ --- -title: "SBC for flocker models" +title: "SBC for single-season flocker models" author: "Jacob Socolar" -date: "2023-10-22" +date: "2026-02-24" output: rmarkdown::html_vignette vignette: > - %\VignetteIndexEntry{SBC for flocker models} + %\VignetteIndexEntry{SBC for single-season flocker models} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} params: n_sims: 1000 n_sites: 200 - n_sims_augmented: 200 - n_sites_augmented: 50 - n_pseudospecies_augmented: 50 --- -```r -params <- list( - n_sims = 1000, - n_sites = 200, - n_sims_augmented = 200, - n_sites_augmented = 50, - n_pseudospecies_augmented = 50 -) + + +### Rmd parameters + +``` +## Parameter value +## 1 n_sims 1000 +## 2 n_sites 200 ``` -```r +``` r library(flocker) library(brms) library(SBC) @@ -35,8 +32,8 @@ set.seed(1) ## Overview -This document performs simulation-based calibration for the models -available in R package `flocker`. Here, our goal is to validate `flocker`'s +This document is one of a series of simulation-based calibration exercieses for +models available in R package `flocker`. Here, our goal is to validate `flocker`'s data formatting, decoding, and likelihood implementations, and not `brms`'s construction of the linear predictors. @@ -50,11 +47,14 @@ occupancy, colonization, extinction and/or autologistic terms as applicable, and one event covariate that affects detection only (for all models except the rep-constant). +In addition to this article on single-season models, articles are available +showing SBC for multi-season models and data-augmented multi-species models. + ## Single-season ### Rep-constant -```r +``` r # make the stancode model_name <- paste0(tempdir(), "/sbc_rep_constant_model.stan") fd <- simulate_flocker_data( @@ -139,23 +139,32 @@ rep_constant_results <- compute_SBC(rep_constant_dataset, rep_constant_backend) plot_ecdf(rep_constant_results) ``` -![plot of chunk rep-constant](../man/figures/sbc_vignette/rep-constant-1.png) +
+plot of chunk rep-constant +

plot of chunk rep-constant

+
-```r +``` r plot_rank_hist(rep_constant_results) ``` -![plot of chunk rep-constant](../man/figures/sbc_vignette/rep-constant-2.png) +
+plot of chunk rep-constant +

plot of chunk rep-constant

+
-```r +``` r plot_ecdf_diff(rep_constant_results) ``` -![plot of chunk rep-constant](../man/figures/sbc_vignette/rep-constant-3.png) +
+plot of chunk rep-constant +

plot of chunk rep-constant

+
### Rep-varying -```r +``` r # make the stancode model_name <- paste0(tempdir(), "/sbc_rep_varying_model.stan") fd <- simulate_flocker_data( @@ -241,7 +250,7 @@ rep_varying_results <- compute_SBC(rep_varying_dataset, rep_varying_backend) ``` ``` -## - 3 (0%) fits had at least one Rhat > 1.01. Largest Rhat was 1.016. +## - 1 (0%) fits had maximum Rhat > 1.01. Maximum Rhat was 1.01. ``` ``` @@ -250,733 +259,30 @@ rep_varying_results <- compute_SBC(rep_varying_dataset, rep_varying_backend) ## and/or investigating $outputs/$messages/$warnings for detailed output from the backend. ``` -```r +``` r plot_ecdf(rep_varying_results) ``` -![plot of chunk rep-varying](../man/figures/sbc_vignette/rep-varying-1.png) +
+plot of chunk rep-varying +

plot of chunk rep-varying

+
-```r +``` r plot_rank_hist(rep_varying_results) ``` -![plot of chunk rep-varying](../man/figures/sbc_vignette/rep-varying-2.png) +
+plot of chunk rep-varying +

plot of chunk rep-varying

+
-```r +``` r plot_ecdf_diff(rep_varying_results) ``` -![plot of chunk rep-varying](../man/figures/sbc_vignette/rep-varying-3.png) - - -## Multi-season -`flocker` fits multi-season models that parameterize the dynamics using colonization/extinction or autologistic specifications, and that parameterize -the initial occupancy state using explicit and equilibrium parameterizations, -for a total of four classes of multi-season model. We validate each class. - -### Colonization-extinction, explicit initial occupancy - -```r -# make the stancode -model_name <- paste0(tempdir(), "/sbc_colex_ex_model.stan") -fd <- simulate_flocker_data( - n_pt = params$n_sites, n_sp = 1, n_season = 4, - params = list( - coefs = data.frame( - det_intercept = rnorm(1), - det_slope_unit = rnorm(1), - det_slope_visit = rnorm(1), - occ_intercept = rnorm(1), - occ_slope_unit = rnorm(1), - col_intercept = rnorm(1), - col_slope_unit = rnorm(1), - ex_intercept = rnorm(1), - ex_slope_unit = rnorm(1) - ) - ), - seed = NULL, - rep_constant = FALSE, - multiseason = "colex", - multi_init = "explicit", - ragged_rep = TRUE -) -flocker_data = make_flocker_data( - fd$obs, fd$unit_covs, fd$event_covs, - type = "multi", quiet = TRUE) - -scode <- flocker_stancode( - f_occ = ~ 0 + Intercept + uc1, - f_col = ~ 0 + Intercept + uc1, - f_ex = ~ 0 + Intercept + uc1, - f_det = ~ 0 + Intercept + uc1 + ec1, - flocker_data = flocker_data, - prior = - brms::set_prior("std_normal()") + - brms::set_prior("std_normal()", dpar = "occ") + - brms::set_prior("std_normal()", dpar = "colo") + - brms::set_prior("std_normal()", dpar = "ex"), - multiseason = "colex", - multi_init = "explicit", - backend = "cmdstanr" - ) -writeLines(scode, model_name) - -colex_ex_generator <- function(N){ - fd <- simulate_flocker_data( - n_pt = params$n_sites, n_sp = 1, n_season = 4, - params = list( - det_intercept = rnorm(1), - det_slope_unit = rnorm(1), - det_slope_visit = rnorm(1), - occ_intercept = rnorm(1), - occ_slope_unit = rnorm(1), - colo_intercept = rnorm(1), - colo_slope_unit = rnorm(1), - ex_intercept = rnorm(1), - ex_slope_unit = rnorm(1) - ), - seed = NULL, - rep_constant = FALSE, - multiseason = "colex", - multi_init = "explicit", - ragged_rep = TRUE - ) - - flocker_data = make_flocker_data( - fd$obs, fd$unit_covs, fd$event_covs, - type = "multi", quiet = TRUE) - - # format for return - list( - variables = list( - `b[1]` = fd$params$coefs$det_intercept, - `b[2]` = fd$params$coefs$det_slope_unit, - `b[3]` = fd$params$coefs$det_slope_visit, - `b_occ[1]` = fd$params$coefs$occ_intercept, - `b_occ[2]` = fd$params$coefs$occ_slope_unit, - `b_colo[1]` = fd$params$coefs$col_intercept, - `b_colo[2]` = fd$params$coefs$col_slope_unit, - `b_ex[1]` = fd$params$coefs$ex_intercept, - `b_ex[2]` = fd$params$coefs$ex_slope_unit - ), - generated = flocker_standata( - f_occ = ~ 0 + Intercept + uc1, - f_col = ~ 0 + Intercept + uc1, - f_ex = ~ 0 + Intercept + uc1, - f_det = ~ 0 + Intercept + uc1 + ec1, - flocker_data = flocker_data, - multiseason = "colex", - multi_init = "explicit" - ) - ) -} - -colex_ex_gen <- SBC_generator_function( - colex_ex_generator, - N = params$n_sites - ) -colex_ex_dataset <- suppressMessages( - generate_datasets(colex_ex_gen, params$n_sims) -) - -colex_ex_backend <- - SBC_backend_cmdstan_sample( - cmdstanr::cmdstan_model( - paste0(tempdir(), "/sbc_colex_ex_model.stan") - ) - ) - -colex_ex_results <- compute_SBC(colex_ex_dataset, colex_ex_backend) - -plot_ecdf(colex_ex_results) -``` - -![plot of chunk multi-colex-ex](../man/figures/sbc_vignette/multi-colex-ex-1.png) - -```r -plot_rank_hist(colex_ex_results) -``` - -![plot of chunk multi-colex-ex](../man/figures/sbc_vignette/multi-colex-ex-2.png) - -```r -plot_ecdf_diff(colex_ex_results) -``` - -![plot of chunk multi-colex-ex](../man/figures/sbc_vignette/multi-colex-ex-3.png) - -### Colonization-extinction, equilibrium initial occupancy - -```r -# make the stancode -model_name <- paste0(tempdir(), "/sbc_colex_eq_model.stan") -fd <- simulate_flocker_data( - n_pt = params$n_sites, n_sp = 1, n_season = 4, - params = list( - det_intercept = rnorm(1), - det_slope_unit = rnorm(1), - det_slope_visit = rnorm(1), - colo_intercept = rnorm(1), - colo_slope_unit = rnorm(1), - ex_intercept = rnorm(1), - ex_slope_unit = rnorm(1) - ), - seed = NULL, - rep_constant = FALSE, - multiseason = "colex", - multi_init = "equilibrium", - ragged_rep = TRUE -) -flocker_data = make_flocker_data( - fd$obs, fd$unit_covs, fd$event_covs, - type = "multi", quiet = TRUE) - -scode <- flocker_stancode( - f_col = ~ 0 + Intercept + uc1, - f_ex = ~ 0 + Intercept + uc1, - f_det = ~ 0 + Intercept + uc1 + ec1, - flocker_data = flocker_data, - prior = - brms::set_prior("std_normal()") + - brms::set_prior("std_normal()", dpar = "colo") + - brms::set_prior("std_normal()", dpar = "ex"), - multiseason = "colex", - multi_init = "equilibrium", - backend = "cmdstanr" - ) -writeLines(scode, model_name) - -colex_eq_generator <- function(N){ - fd <- simulate_flocker_data( - n_pt = params$n_sites, n_sp = 1, n_season = 4, - params = list( - det_intercept = rnorm(1), - det_slope_unit = rnorm(1), - det_slope_visit = rnorm(1), - col_intercept = rnorm(1), - col_slope_unit = rnorm(1), - ex_intercept = rnorm(1), - ex_slope_unit = rnorm(1) - ), - seed = NULL, - rep_constant = FALSE, - multiseason = "colex", - multi_init = "equilibrium", - ragged_rep = TRUE - ) - - flocker_data = make_flocker_data( - fd$obs, fd$unit_covs, fd$event_covs, - type = "multi", quiet = TRUE) - - # format for return - list( - variables = list( - `b[1]` = fd$params$coefs$det_intercept, - `b[2]` = fd$params$coefs$det_slope_unit, - `b[3]` = fd$params$coefs$det_slope_visit, - `b_colo[1]` = fd$params$coefs$col_intercept, - `b_colo[2]` = fd$params$coefs$col_slope_unit, - `b_ex[1]` = fd$params$coefs$ex_intercept, - `b_ex[2]` = fd$params$coefs$ex_slope_unit - ), - generated = flocker_standata( - f_col = ~ 0 + Intercept + uc1, - f_ex = ~ 0 + Intercept + uc1, - f_det = ~ 0 + Intercept + uc1 + ec1, - flocker_data = flocker_data, - multiseason = "colex", - multi_init = "equilibrium" - ) - ) -} - -colex_eq_gen <- SBC_generator_function( - colex_eq_generator, - N = params$n_sites - ) -colex_eq_dataset <- suppressMessages( - generate_datasets(colex_eq_gen, params$n_sims) -) - -colex_eq_backend <- - SBC_backend_cmdstan_sample( - cmdstanr::cmdstan_model( - paste0(tempdir(), "/sbc_colex_eq_model.stan") - ) - ) - -colex_eq_results <- compute_SBC(colex_eq_dataset, colex_eq_backend) -``` - -``` -## - 1 (0%) fits had at least one Rhat > 1.01. Largest Rhat was 1.012. -``` - -``` -## - 853 (85%) fits had some steps rejected. Maximum number of rejections was 5. -``` - -``` -## Not all diagnostics are OK. -## You can learn more by inspecting $default_diagnostics, $backend_diagnostics -## and/or investigating $outputs/$messages/$warnings for detailed output from the backend. -``` - -```r -plot_ecdf(colex_eq_results) -``` - -![plot of chunk multi-colex-eq](../man/figures/sbc_vignette/multi-colex-eq-1.png) - -```r -plot_rank_hist(colex_eq_results) -``` - -![plot of chunk multi-colex-eq](../man/figures/sbc_vignette/multi-colex-eq-2.png) - -```r -plot_ecdf_diff(colex_eq_results) -``` - -![plot of chunk multi-colex-eq](../man/figures/sbc_vignette/multi-colex-eq-3.png) - -### Autologistic, explicit initial occupancy - -```r -# make the stancode -model_name <- paste0(tempdir(), "/sbc_auto_ex_model.stan") -fd <- simulate_flocker_data( - n_pt = params$n_sites, n_sp = 1, n_season = 4, - params = list( - det_intercept = rnorm(1), - det_slope_unit = rnorm(1), - det_slope_visit = rnorm(1), - occ_intercept = rnorm(1), - occ_slope_unit = rnorm(1), - col_intercept = rnorm(1), - col_slope_unit = rnorm(1), - auto_intercept = rnorm(1), - auto_slope_unit = rnorm(1) - ), - seed = NULL, - rep_constant = FALSE, - multiseason = "autologistic", - multi_init = "explicit", - ragged_rep = TRUE -) -flocker_data = make_flocker_data( - fd$obs, fd$unit_covs, fd$event_covs, - type = "multi", quiet = TRUE) - -scode <- flocker_stancode( - f_occ = ~ 0 + Intercept + uc1, - f_col = ~ 0 + Intercept + uc1, - f_auto = ~ 0 + Intercept + uc1, - f_det = ~ 0 + Intercept + uc1 + ec1, - flocker_data = flocker_data, - prior = - brms::set_prior("std_normal()") + - brms::set_prior("std_normal()", dpar = "occ") + - brms::set_prior("std_normal()", dpar = "colo") + - brms::set_prior("std_normal()", dpar = "autologistic"), - multiseason = "autologistic", - multi_init = "explicit", - backend = "cmdstanr" - ) -writeLines(scode, model_name) - -auto_ex_generator <- function(N){ - fd <- simulate_flocker_data( - n_pt = params$n_sites, n_sp = 1, n_season = 4, - params = list( - det_intercept = rnorm(1), - det_slope_unit = rnorm(1), - det_slope_visit = rnorm(1), - occ_intercept = rnorm(1), - occ_slope_unit = rnorm(1), - colo_intercept = rnorm(1), - colo_slope_unit = rnorm(1), - auto_intercept = rnorm(1), - auto_slope_unit = rnorm(1) - ), - seed = NULL, - rep_constant = FALSE, - multiseason = "autologistic", - multi_init = "explicit", - ragged_rep = TRUE - ) - - flocker_data = make_flocker_data( - fd$obs, fd$unit_covs, fd$event_covs, - type = "multi", quiet = TRUE) - - # format for return - list( - variables = list( - `b[1]` = fd$params$coefs$det_intercept, - `b[2]` = fd$params$coefs$det_slope_unit, - `b[3]` = fd$params$coefs$det_slope_visit, - `b_occ[1]` = fd$params$coefs$occ_intercept, - `b_occ[2]` = fd$params$coefs$occ_slope_unit, - `b_colo[1]` = fd$params$coefs$col_intercept, - `b_colo[2]` = fd$params$coefs$col_slope_unit, - `b_autologistic[1]` = fd$params$coefs$auto_intercept, - `b_autologistic[2]` = fd$params$coefs$auto_slope_unit - ), - generated = flocker_standata( - f_occ = ~ 0 + Intercept + uc1, - f_col = ~ 0 + Intercept + uc1, - f_auto = ~ 0 + Intercept + uc1, - f_det = ~ 0 + Intercept + uc1 + ec1, - flocker_data = flocker_data, - multiseason = "autologistic", - multi_init = "explicit" - ) - ) -} - -auto_ex_gen <- SBC_generator_function( - auto_ex_generator, - N = params$n_sites - ) -auto_ex_dataset <- suppressMessages( - generate_datasets(auto_ex_gen, params$n_sims) -) - -auto_ex_backend <- - SBC_backend_cmdstan_sample( - cmdstanr::cmdstan_model( - paste0(tempdir(), "/sbc_auto_ex_model.stan") - ) - ) - -auto_ex_results <- compute_SBC(auto_ex_dataset, auto_ex_backend) - -plot_ecdf(auto_ex_results) -``` - -![plot of chunk multi-auto-ex](../man/figures/sbc_vignette/multi-auto-ex-1.png) - -```r -plot_rank_hist(auto_ex_results) -``` - -![plot of chunk multi-auto-ex](../man/figures/sbc_vignette/multi-auto-ex-2.png) - -```r -plot_ecdf_diff(auto_ex_results) -``` - -![plot of chunk multi-auto-ex](../man/figures/sbc_vignette/multi-auto-ex-3.png) - -### Autologistic, equilibrium initial occupancy - -```r -# make the stancode -model_name <- paste0(tempdir(), "/sbc_auto_eq_model.stan") -fd <- simulate_flocker_data( - n_pt = params$n_sites, n_sp = 1, n_season = 4, - params = list( - det_intercept = rnorm(1), - det_slope_unit = rnorm(1), - det_slope_visit = rnorm(1), - auto_intercept = rnorm(1), - auto_slope_unit = rnorm(1) - ), - seed = NULL, - rep_constant = FALSE, - multiseason = "autologistic", - multi_init = "equilibrium", - ragged_rep = TRUE -) -flocker_data = make_flocker_data( - fd$obs, fd$unit_covs, fd$event_covs, - type = "multi", quiet = TRUE) - -scode <- flocker_stancode( - f_col = ~ 0 + Intercept + uc1, - f_auto = ~ 0 + Intercept + uc1, - f_det = ~ 0 + Intercept + uc1 + ec1, - flocker_data = flocker_data, - prior = - brms::set_prior("std_normal()") + - brms::set_prior("std_normal()", dpar = "colo") + - brms::set_prior("std_normal()", dpar = "autologistic"), - multiseason = "autologistic", - multi_init = "equilibrium", - backend = "cmdstanr" - ) -writeLines(scode, model_name) - -auto_eq_generator <- function(N){ - fd <- simulate_flocker_data( - n_pt = params$n_sites, n_sp = 1, n_season = 4, - params = list( - det_intercept = rnorm(1), - det_slope_unit = rnorm(1), - det_slope_visit = rnorm(1), - col_intercept = rnorm(1), - col_slope_unit = rnorm(1), - auto_intercept = rnorm(1), - auto_slope_unit = rnorm(1) - ), - seed = NULL, - rep_constant = FALSE, - multiseason = "autologistic", - multi_init = "equilibrium", - ragged_rep = TRUE - ) - - flocker_data = make_flocker_data( - fd$obs, fd$unit_covs, fd$event_covs, - type = "multi", quiet = TRUE) - - # format for return - list( - variables = list( - `b[1]` = fd$params$coefs$det_intercept, - `b[2]` = fd$params$coefs$det_slope_unit, - `b[3]` = fd$params$coefs$det_slope_visit, - `b_colo[1]` = fd$params$coefs$col_intercept, - `b_colo[2]` = fd$params$coefs$col_slope_unit, - `b_autologistic[1]` = fd$params$coefs$auto_intercept, - `b_autologistic[2]` = fd$params$coefs$auto_slope_unit - ), - generated = flocker_standata( - f_col = ~ 0 + Intercept + uc1, - f_auto = ~ 0 + Intercept + uc1, - f_det = ~ 0 + Intercept + uc1 + ec1, - flocker_data = flocker_data, - multiseason = "autologistic", - multi_init = "equilibrium" - ) - ) -} - -auto_eq_gen <- SBC_generator_function( - auto_eq_generator, - N = params$n_sites - ) -auto_eq_dataset <- suppressMessages( - generate_datasets(auto_eq_gen, params$n_sims) -) - -auto_eq_backend <- - SBC_backend_cmdstan_sample( - cmdstanr::cmdstan_model( - paste0(tempdir(), "/sbc_auto_eq_model.stan") - ) - ) - -auto_eq_results <- compute_SBC(auto_eq_dataset, auto_eq_backend) -``` - -``` -## - 302 (30%) fits had some steps rejected. Maximum number of rejections was 3. -``` - -``` -## Not all diagnostics are OK. -## You can learn more by inspecting $default_diagnostics, $backend_diagnostics -## and/or investigating $outputs/$messages/$warnings for detailed output from the backend. -``` - -```r -plot_ecdf(auto_eq_results) -``` - -![plot of chunk multi-auto-eq](../man/figures/sbc_vignette/multi-auto-eq-1.png) - -```r -plot_rank_hist(auto_eq_results) -``` - -![plot of chunk multi-auto-eq](../man/figures/sbc_vignette/multi-auto-eq-2.png) - -```r -plot_ecdf_diff(auto_eq_results) -``` - -![plot of chunk multi-auto-eq](../man/figures/sbc_vignette/multi-auto-eq-3.png) - - -## Data-augmented - -```r -# make the stancode -model_name <- paste0(tempdir(), "/sbc_augmented_model.stan") - -omega <- boot::inv.logit(rnorm(1, 0, .1)) -available <- rbinom(1, params$n_pseudospecies_augmented, omega) -unavailable <- params$n_pseudospecies_augmented - available -coef_means <- rnorm(5) # normal prior on random effect means -sigma <- abs(rnorm(5)) # half-normal prior on all random effect sds -Sigma <- diag(5) * sigma - -fd <- simulate_flocker_data( - n_pt = params$n_sites_augmented, n_sp = available, - params = list( - coef_means = coef_means, - Sigma = Sigma, - coefs = data.frame( - det_intercept = rnorm(available, coef_means[1], sigma[1]), - det_slope_unit = rnorm(available, coef_means[2], sigma[2]), - det_slope_visit = rnorm(available, coef_means[3], sigma[3]), - occ_intercept = rnorm(available, coef_means[4], sigma[4]), - occ_slope_unit = rnorm(available, coef_means[5], sigma[5]) - ) - ), - seed = NULL, - rep_constant = FALSE, - ragged_rep = TRUE -) - -obs_aug <- fd$obs[seq_len(params$n_sites_augmented), ] - -for(i in 2:available){ - obs_aug <- abind::abind( - obs_aug, - fd$obs[((i - 1) * params$n_sites_augmented) + seq_len(params$n_sites_augmented), ], - along = 3 - ) -} - -event_covs_aug <- list(ec1 = fd$event_covs$ec1[seq_len(params$n_sites_augmented), ]) -unit_covs_aug <- data.frame(uc1 = fd$unit_covs[seq_len(params$n_sites_augmented), "uc1"]) - -flocker_data = make_flocker_data( - obs_aug, unit_covs_aug, event_covs_aug, - type = "augmented", n_aug = unavailable, - quiet = TRUE) - -scode <- flocker_stancode( - f_occ = ~ 0 + Intercept + uc1 + (1 + uc1 || ff_species), - f_det = ~ 0 + Intercept + uc1 + ec1 + (1 + uc1 + ec1 || ff_species), - flocker_data = flocker_data, - prior = - brms::set_prior("std_normal()") + - brms::set_prior("std_normal()", class = "sd") + - brms::set_prior("std_normal()", dpar = "occ") + - brms::set_prior("std_normal()", class = "sd", dpar = "occ") + - brms::set_prior("normal(0, 0.1)", class = "Intercept", dpar = "Omega"), - backend = "cmdstanr", - augmented = TRUE - ) -writeLines(scode, model_name) - -aug_generator <- function(N){ - omega <- boot::inv.logit(rnorm(1, 0, .1)) - available <- rbinom(1, params$n_pseudospecies_augmented, omega) - unavailable <- params$n_pseudospecies_augmented - available - coef_means <- rnorm(5) # normal prior on random effect means - sigma <- abs(rnorm(5)) # half-normal prior on all random effect sds - Sigma <- diag(5) * sigma - - fd <- simulate_flocker_data( - n_pt = params$n_sites_augmented, n_sp = available, - params = list( - coef_means = coef_means, - Sigma = Sigma, - coefs = data.frame( - det_intercept = rnorm(available, coef_means[1], sigma[1]), - det_slope_unit = rnorm(available, coef_means[2], sigma[2]), - det_slope_visit = rnorm(available, coef_means[3], sigma[3]), - occ_intercept = rnorm(available, coef_means[4], sigma[4]), - occ_slope_unit = rnorm(available, coef_means[5], sigma[5]) - ) - ), - seed = NULL, - rep_constant = FALSE, - ragged_rep = TRUE - ) - - obs_aug <- fd$obs[seq_len(params$n_sites_augmented), ] - - for(i in 2:available){ - obs_aug <- abind::abind( - obs_aug, - fd$obs[((i - 1) * params$n_sites_augmented) + seq_len(params$n_sites_augmented), ], - along = 3 - ) - } - - event_covs_aug <- list(ec1 = fd$event_covs$ec1[seq_len(params$n_sites_augmented), ]) - unit_covs_aug <- data.frame(uc1 = fd$unit_covs[seq_len(params$n_sites_augmented), "uc1"]) - - flocker_data = make_flocker_data( - obs_aug, unit_covs_aug, event_covs_aug, - type = "augmented", n_aug = unavailable, - quiet = TRUE) - # format for return - list( - variables = list( - `b[1]` = fd$params$coef_means[1], - `b[2]` = fd$params$coef_means[2], - `b[3]` = fd$params$coef_means[3], - `b_occ[1]` = fd$params$coef_means[4], - `b_occ[2]` = fd$params$coef_means[5], - `Intercept_Omega` = boot::logit(omega) - ), - generated = flocker_standata( - f_occ = ~ 0 + Intercept + uc1 + (1 + uc1 || ff_species), - f_det = ~ 0 + Intercept + uc1 + ec1 + (1 + uc1 + ec1 || ff_species), - flocker_data = flocker_data, - augmented = TRUE - ) - ) -} - -aug_gen <- SBC_generator_function( - aug_generator, - N = params$n_sites_augmented - ) -aug_dataset <- suppressMessages( - generate_datasets(aug_gen, params$n_sims_augmented) -) - -aug_backend <- - SBC_backend_cmdstan_sample( - cmdstanr::cmdstan_model( - paste0(tempdir(), "/sbc_augmented_model.stan") - ) - ) - -aug_results <- compute_SBC(aug_dataset, aug_backend) -``` - -``` -## - 4 (2%) fits had at least one Rhat > 1.01. Largest Rhat was 1.022. -``` - -``` -## - 2 (1%) fits had divergent transitions. Maximum number of divergences was 2. -``` - -``` -## - 7 (4%) fits had some steps rejected. Maximum number of rejections was 1. -``` - -``` -## Not all diagnostics are OK. -## You can learn more by inspecting $default_diagnostics, $backend_diagnostics -## and/or investigating $outputs/$messages/$warnings for detailed output from the backend. -``` - -```r -plot_ecdf(aug_results) -``` - -![plot of chunk data-augmented](../man/figures/sbc_vignette/data-augmented-1.png) - -```r -plot_rank_hist(aug_results) -``` - -![plot of chunk data-augmented](../man/figures/sbc_vignette/data-augmented-2.png) - -```r -plot_ecdf_diff(aug_results) -``` +
+plot of chunk rep-varying +

plot of chunk rep-varying

+
-![plot of chunk data-augmented](../man/figures/sbc_vignette/data-augmented-3.png) diff --git a/vignettes/articles/sbc.Rmd.orig b/vignettes/articles/sbc.Rmd.orig index dc7859a..0fcda31 100644 --- a/vignettes/articles/sbc.Rmd.orig +++ b/vignettes/articles/sbc.Rmd.orig @@ -1,31 +1,33 @@ --- -title: "SBC for flocker models" +title: "SBC for single-season flocker models" author: "Jacob Socolar" date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > - %\VignetteIndexEntry{SBC for flocker models} + %\VignetteIndexEntry{SBC for single-season flocker models} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} params: n_sims: 1000 n_sites: 200 - n_sims_augmented: 200 - n_sites_augmented: 50 - n_pseudospecies_augmented: 50 --- ```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) +knitr::opts_chunk$set( + echo = TRUE, + fig.path = "man/figures/sbc_vignette/", + fig.retina = 2 +) +dir.create("man/figures/sbc_vignette", recursive = TRUE, showWarnings = FALSE) + ``` -```{r display_params} -params <- list( - n_sims = 1000, - n_sites = 200, - n_sims_augmented = 200, - n_sites_augmented = 50, - n_pseudospecies_augmented = 50 +### Rmd parameters +```{r display_params, echo = FALSE} +data.frame( + Parameter = names(params), + value = unlist(params, use.names = FALSE), + row.names = NULL ) ``` @@ -39,8 +41,8 @@ set.seed(1) ## Overview -This document performs simulation-based calibration for the models -available in R package `flocker`. Here, our goal is to validate `flocker`'s +This document is one of a series of simulation-based calibration exercieses for +models available in R package `flocker`. Here, our goal is to validate `flocker`'s data formatting, decoding, and likelihood implementations, and not `brms`'s construction of the linear predictors. @@ -54,6 +56,9 @@ occupancy, colonization, extinction and/or autologistic terms as applicable, and one event covariate that affects detection only (for all models except the rep-constant). +In addition to this article on single-season models, articles are available +showing SBC for multi-season models and data-augmented multi-species models. + ## Single-season ### Rep-constant @@ -236,607 +241,3 @@ plot_ecdf_diff(rep_varying_results) ``` - -## Multi-season -`flocker` fits multi-season models that parameterize the dynamics using colonization/extinction or autologistic specifications, and that parameterize -the initial occupancy state using explicit and equilibrium parameterizations, -for a total of four classes of multi-season model. We validate each class. - -### Colonization-extinction, explicit initial occupancy -```{r multi-colex-ex} -# make the stancode -model_name <- paste0(tempdir(), "/sbc_colex_ex_model.stan") -fd <- simulate_flocker_data( - n_pt = params$n_sites, n_sp = 1, n_season = 4, - params = list( - coefs = data.frame( - det_intercept = rnorm(1), - det_slope_unit = rnorm(1), - det_slope_visit = rnorm(1), - occ_intercept = rnorm(1), - occ_slope_unit = rnorm(1), - col_intercept = rnorm(1), - col_slope_unit = rnorm(1), - ex_intercept = rnorm(1), - ex_slope_unit = rnorm(1) - ) - ), - seed = NULL, - rep_constant = FALSE, - multiseason = "colex", - multi_init = "explicit", - ragged_rep = TRUE -) -flocker_data = make_flocker_data( - fd$obs, fd$unit_covs, fd$event_covs, - type = "multi", quiet = TRUE) - -scode <- flocker_stancode( - f_occ = ~ 0 + Intercept + uc1, - f_col = ~ 0 + Intercept + uc1, - f_ex = ~ 0 + Intercept + uc1, - f_det = ~ 0 + Intercept + uc1 + ec1, - flocker_data = flocker_data, - prior = - brms::set_prior("std_normal()") + - brms::set_prior("std_normal()", dpar = "occ") + - brms::set_prior("std_normal()", dpar = "colo") + - brms::set_prior("std_normal()", dpar = "ex"), - multiseason = "colex", - multi_init = "explicit", - backend = "cmdstanr" - ) -writeLines(scode, model_name) - -colex_ex_generator <- function(N){ - fd <- simulate_flocker_data( - n_pt = params$n_sites, n_sp = 1, n_season = 4, - params = list( - det_intercept = rnorm(1), - det_slope_unit = rnorm(1), - det_slope_visit = rnorm(1), - occ_intercept = rnorm(1), - occ_slope_unit = rnorm(1), - colo_intercept = rnorm(1), - colo_slope_unit = rnorm(1), - ex_intercept = rnorm(1), - ex_slope_unit = rnorm(1) - ), - seed = NULL, - rep_constant = FALSE, - multiseason = "colex", - multi_init = "explicit", - ragged_rep = TRUE - ) - - flocker_data = make_flocker_data( - fd$obs, fd$unit_covs, fd$event_covs, - type = "multi", quiet = TRUE) - - # format for return - list( - variables = list( - `b[1]` = fd$params$coefs$det_intercept, - `b[2]` = fd$params$coefs$det_slope_unit, - `b[3]` = fd$params$coefs$det_slope_visit, - `b_occ[1]` = fd$params$coefs$occ_intercept, - `b_occ[2]` = fd$params$coefs$occ_slope_unit, - `b_colo[1]` = fd$params$coefs$col_intercept, - `b_colo[2]` = fd$params$coefs$col_slope_unit, - `b_ex[1]` = fd$params$coefs$ex_intercept, - `b_ex[2]` = fd$params$coefs$ex_slope_unit - ), - generated = flocker_standata( - f_occ = ~ 0 + Intercept + uc1, - f_col = ~ 0 + Intercept + uc1, - f_ex = ~ 0 + Intercept + uc1, - f_det = ~ 0 + Intercept + uc1 + ec1, - flocker_data = flocker_data, - multiseason = "colex", - multi_init = "explicit" - ) - ) -} - -colex_ex_gen <- SBC_generator_function( - colex_ex_generator, - N = params$n_sites - ) -colex_ex_dataset <- suppressMessages( - generate_datasets(colex_ex_gen, params$n_sims) -) - -colex_ex_backend <- - SBC_backend_cmdstan_sample( - cmdstanr::cmdstan_model( - paste0(tempdir(), "/sbc_colex_ex_model.stan") - ) - ) - -colex_ex_results <- compute_SBC(colex_ex_dataset, colex_ex_backend) - -plot_ecdf(colex_ex_results) -plot_rank_hist(colex_ex_results) -plot_ecdf_diff(colex_ex_results) - -``` - -### Colonization-extinction, equilibrium initial occupancy -```{r multi-colex-eq, eval = TRUE} -# make the stancode -model_name <- paste0(tempdir(), "/sbc_colex_eq_model.stan") -fd <- simulate_flocker_data( - n_pt = params$n_sites, n_sp = 1, n_season = 4, - params = list( - det_intercept = rnorm(1), - det_slope_unit = rnorm(1), - det_slope_visit = rnorm(1), - colo_intercept = rnorm(1), - colo_slope_unit = rnorm(1), - ex_intercept = rnorm(1), - ex_slope_unit = rnorm(1) - ), - seed = NULL, - rep_constant = FALSE, - multiseason = "colex", - multi_init = "equilibrium", - ragged_rep = TRUE -) -flocker_data = make_flocker_data( - fd$obs, fd$unit_covs, fd$event_covs, - type = "multi", quiet = TRUE) - -scode <- flocker_stancode( - f_col = ~ 0 + Intercept + uc1, - f_ex = ~ 0 + Intercept + uc1, - f_det = ~ 0 + Intercept + uc1 + ec1, - flocker_data = flocker_data, - prior = - brms::set_prior("std_normal()") + - brms::set_prior("std_normal()", dpar = "colo") + - brms::set_prior("std_normal()", dpar = "ex"), - multiseason = "colex", - multi_init = "equilibrium", - backend = "cmdstanr" - ) -writeLines(scode, model_name) - -colex_eq_generator <- function(N){ - fd <- simulate_flocker_data( - n_pt = params$n_sites, n_sp = 1, n_season = 4, - params = list( - det_intercept = rnorm(1), - det_slope_unit = rnorm(1), - det_slope_visit = rnorm(1), - col_intercept = rnorm(1), - col_slope_unit = rnorm(1), - ex_intercept = rnorm(1), - ex_slope_unit = rnorm(1) - ), - seed = NULL, - rep_constant = FALSE, - multiseason = "colex", - multi_init = "equilibrium", - ragged_rep = TRUE - ) - - flocker_data = make_flocker_data( - fd$obs, fd$unit_covs, fd$event_covs, - type = "multi", quiet = TRUE) - - # format for return - list( - variables = list( - `b[1]` = fd$params$coefs$det_intercept, - `b[2]` = fd$params$coefs$det_slope_unit, - `b[3]` = fd$params$coefs$det_slope_visit, - `b_colo[1]` = fd$params$coefs$col_intercept, - `b_colo[2]` = fd$params$coefs$col_slope_unit, - `b_ex[1]` = fd$params$coefs$ex_intercept, - `b_ex[2]` = fd$params$coefs$ex_slope_unit - ), - generated = flocker_standata( - f_col = ~ 0 + Intercept + uc1, - f_ex = ~ 0 + Intercept + uc1, - f_det = ~ 0 + Intercept + uc1 + ec1, - flocker_data = flocker_data, - multiseason = "colex", - multi_init = "equilibrium" - ) - ) -} - -colex_eq_gen <- SBC_generator_function( - colex_eq_generator, - N = params$n_sites - ) -colex_eq_dataset <- suppressMessages( - generate_datasets(colex_eq_gen, params$n_sims) -) - -colex_eq_backend <- - SBC_backend_cmdstan_sample( - cmdstanr::cmdstan_model( - paste0(tempdir(), "/sbc_colex_eq_model.stan") - ) - ) - -colex_eq_results <- compute_SBC(colex_eq_dataset, colex_eq_backend) - -plot_ecdf(colex_eq_results) -plot_rank_hist(colex_eq_results) -plot_ecdf_diff(colex_eq_results) - -``` - -### Autologistic, explicit initial occupancy -```{r multi-auto-ex, eval = TRUE} -# make the stancode -model_name <- paste0(tempdir(), "/sbc_auto_ex_model.stan") -fd <- simulate_flocker_data( - n_pt = params$n_sites, n_sp = 1, n_season = 4, - params = list( - det_intercept = rnorm(1), - det_slope_unit = rnorm(1), - det_slope_visit = rnorm(1), - occ_intercept = rnorm(1), - occ_slope_unit = rnorm(1), - col_intercept = rnorm(1), - col_slope_unit = rnorm(1), - auto_intercept = rnorm(1), - auto_slope_unit = rnorm(1) - ), - seed = NULL, - rep_constant = FALSE, - multiseason = "autologistic", - multi_init = "explicit", - ragged_rep = TRUE -) -flocker_data = make_flocker_data( - fd$obs, fd$unit_covs, fd$event_covs, - type = "multi", quiet = TRUE) - -scode <- flocker_stancode( - f_occ = ~ 0 + Intercept + uc1, - f_col = ~ 0 + Intercept + uc1, - f_auto = ~ 0 + Intercept + uc1, - f_det = ~ 0 + Intercept + uc1 + ec1, - flocker_data = flocker_data, - prior = - brms::set_prior("std_normal()") + - brms::set_prior("std_normal()", dpar = "occ") + - brms::set_prior("std_normal()", dpar = "colo") + - brms::set_prior("std_normal()", dpar = "autologistic"), - multiseason = "autologistic", - multi_init = "explicit", - backend = "cmdstanr" - ) -writeLines(scode, model_name) - -auto_ex_generator <- function(N){ - fd <- simulate_flocker_data( - n_pt = params$n_sites, n_sp = 1, n_season = 4, - params = list( - det_intercept = rnorm(1), - det_slope_unit = rnorm(1), - det_slope_visit = rnorm(1), - occ_intercept = rnorm(1), - occ_slope_unit = rnorm(1), - colo_intercept = rnorm(1), - colo_slope_unit = rnorm(1), - auto_intercept = rnorm(1), - auto_slope_unit = rnorm(1) - ), - seed = NULL, - rep_constant = FALSE, - multiseason = "autologistic", - multi_init = "explicit", - ragged_rep = TRUE - ) - - flocker_data = make_flocker_data( - fd$obs, fd$unit_covs, fd$event_covs, - type = "multi", quiet = TRUE) - - # format for return - list( - variables = list( - `b[1]` = fd$params$coefs$det_intercept, - `b[2]` = fd$params$coefs$det_slope_unit, - `b[3]` = fd$params$coefs$det_slope_visit, - `b_occ[1]` = fd$params$coefs$occ_intercept, - `b_occ[2]` = fd$params$coefs$occ_slope_unit, - `b_colo[1]` = fd$params$coefs$col_intercept, - `b_colo[2]` = fd$params$coefs$col_slope_unit, - `b_autologistic[1]` = fd$params$coefs$auto_intercept, - `b_autologistic[2]` = fd$params$coefs$auto_slope_unit - ), - generated = flocker_standata( - f_occ = ~ 0 + Intercept + uc1, - f_col = ~ 0 + Intercept + uc1, - f_auto = ~ 0 + Intercept + uc1, - f_det = ~ 0 + Intercept + uc1 + ec1, - flocker_data = flocker_data, - multiseason = "autologistic", - multi_init = "explicit" - ) - ) -} - -auto_ex_gen <- SBC_generator_function( - auto_ex_generator, - N = params$n_sites - ) -auto_ex_dataset <- suppressMessages( - generate_datasets(auto_ex_gen, params$n_sims) -) - -auto_ex_backend <- - SBC_backend_cmdstan_sample( - cmdstanr::cmdstan_model( - paste0(tempdir(), "/sbc_auto_ex_model.stan") - ) - ) - -auto_ex_results <- compute_SBC(auto_ex_dataset, auto_ex_backend) - -plot_ecdf(auto_ex_results) -plot_rank_hist(auto_ex_results) -plot_ecdf_diff(auto_ex_results) - -``` - -### Autologistic, equilibrium initial occupancy -```{r multi-auto-eq, eval = TRUE} -# make the stancode -model_name <- paste0(tempdir(), "/sbc_auto_eq_model.stan") -fd <- simulate_flocker_data( - n_pt = params$n_sites, n_sp = 1, n_season = 4, - params = list( - det_intercept = rnorm(1), - det_slope_unit = rnorm(1), - det_slope_visit = rnorm(1), - auto_intercept = rnorm(1), - auto_slope_unit = rnorm(1) - ), - seed = NULL, - rep_constant = FALSE, - multiseason = "autologistic", - multi_init = "equilibrium", - ragged_rep = TRUE -) -flocker_data = make_flocker_data( - fd$obs, fd$unit_covs, fd$event_covs, - type = "multi", quiet = TRUE) - -scode <- flocker_stancode( - f_col = ~ 0 + Intercept + uc1, - f_auto = ~ 0 + Intercept + uc1, - f_det = ~ 0 + Intercept + uc1 + ec1, - flocker_data = flocker_data, - prior = - brms::set_prior("std_normal()") + - brms::set_prior("std_normal()", dpar = "colo") + - brms::set_prior("std_normal()", dpar = "autologistic"), - multiseason = "autologistic", - multi_init = "equilibrium", - backend = "cmdstanr" - ) -writeLines(scode, model_name) - -auto_eq_generator <- function(N){ - fd <- simulate_flocker_data( - n_pt = params$n_sites, n_sp = 1, n_season = 4, - params = list( - det_intercept = rnorm(1), - det_slope_unit = rnorm(1), - det_slope_visit = rnorm(1), - col_intercept = rnorm(1), - col_slope_unit = rnorm(1), - auto_intercept = rnorm(1), - auto_slope_unit = rnorm(1) - ), - seed = NULL, - rep_constant = FALSE, - multiseason = "autologistic", - multi_init = "equilibrium", - ragged_rep = TRUE - ) - - flocker_data = make_flocker_data( - fd$obs, fd$unit_covs, fd$event_covs, - type = "multi", quiet = TRUE) - - # format for return - list( - variables = list( - `b[1]` = fd$params$coefs$det_intercept, - `b[2]` = fd$params$coefs$det_slope_unit, - `b[3]` = fd$params$coefs$det_slope_visit, - `b_colo[1]` = fd$params$coefs$col_intercept, - `b_colo[2]` = fd$params$coefs$col_slope_unit, - `b_autologistic[1]` = fd$params$coefs$auto_intercept, - `b_autologistic[2]` = fd$params$coefs$auto_slope_unit - ), - generated = flocker_standata( - f_col = ~ 0 + Intercept + uc1, - f_auto = ~ 0 + Intercept + uc1, - f_det = ~ 0 + Intercept + uc1 + ec1, - flocker_data = flocker_data, - multiseason = "autologistic", - multi_init = "equilibrium" - ) - ) -} - -auto_eq_gen <- SBC_generator_function( - auto_eq_generator, - N = params$n_sites - ) -auto_eq_dataset <- suppressMessages( - generate_datasets(auto_eq_gen, params$n_sims) -) - -auto_eq_backend <- - SBC_backend_cmdstan_sample( - cmdstanr::cmdstan_model( - paste0(tempdir(), "/sbc_auto_eq_model.stan") - ) - ) - -auto_eq_results <- compute_SBC(auto_eq_dataset, auto_eq_backend) - -plot_ecdf(auto_eq_results) -plot_rank_hist(auto_eq_results) -plot_ecdf_diff(auto_eq_results) - -``` - - -## Data-augmented -```{r data-augmented, eval = TRUE} -# make the stancode -model_name <- paste0(tempdir(), "/sbc_augmented_model.stan") - -omega <- boot::inv.logit(rnorm(1, 0, .1)) -available <- rbinom(1, params$n_pseudospecies_augmented, omega) -unavailable <- params$n_pseudospecies_augmented - available -coef_means <- rnorm(5) # normal prior on random effect means -sigma <- abs(rnorm(5)) # half-normal prior on all random effect sds -Sigma <- diag(5) * sigma - -fd <- simulate_flocker_data( - n_pt = params$n_sites_augmented, n_sp = available, - params = list( - coef_means = coef_means, - Sigma = Sigma, - coefs = data.frame( - det_intercept = rnorm(available, coef_means[1], sigma[1]), - det_slope_unit = rnorm(available, coef_means[2], sigma[2]), - det_slope_visit = rnorm(available, coef_means[3], sigma[3]), - occ_intercept = rnorm(available, coef_means[4], sigma[4]), - occ_slope_unit = rnorm(available, coef_means[5], sigma[5]) - ) - ), - seed = NULL, - rep_constant = FALSE, - ragged_rep = TRUE -) - -obs_aug <- fd$obs[seq_len(params$n_sites_augmented), ] - -for(i in 2:available){ - obs_aug <- abind::abind( - obs_aug, - fd$obs[((i - 1) * params$n_sites_augmented) + seq_len(params$n_sites_augmented), ], - along = 3 - ) -} - -event_covs_aug <- list(ec1 = fd$event_covs$ec1[seq_len(params$n_sites_augmented), ]) -unit_covs_aug <- data.frame(uc1 = fd$unit_covs[seq_len(params$n_sites_augmented), "uc1"]) - -flocker_data = make_flocker_data( - obs_aug, unit_covs_aug, event_covs_aug, - type = "augmented", n_aug = unavailable, - quiet = TRUE) - -scode <- flocker_stancode( - f_occ = ~ 0 + Intercept + uc1 + (1 + uc1 || ff_species), - f_det = ~ 0 + Intercept + uc1 + ec1 + (1 + uc1 + ec1 || ff_species), - flocker_data = flocker_data, - prior = - brms::set_prior("std_normal()") + - brms::set_prior("std_normal()", class = "sd") + - brms::set_prior("std_normal()", dpar = "occ") + - brms::set_prior("std_normal()", class = "sd", dpar = "occ") + - brms::set_prior("normal(0, 0.1)", class = "Intercept", dpar = "Omega"), - backend = "cmdstanr", - augmented = TRUE - ) -writeLines(scode, model_name) - -aug_generator <- function(N){ - omega <- boot::inv.logit(rnorm(1, 0, .1)) - available <- rbinom(1, params$n_pseudospecies_augmented, omega) - unavailable <- params$n_pseudospecies_augmented - available - coef_means <- rnorm(5) # normal prior on random effect means - sigma <- abs(rnorm(5)) # half-normal prior on all random effect sds - Sigma <- diag(5) * sigma - - fd <- simulate_flocker_data( - n_pt = params$n_sites_augmented, n_sp = available, - params = list( - coef_means = coef_means, - Sigma = Sigma, - coefs = data.frame( - det_intercept = rnorm(available, coef_means[1], sigma[1]), - det_slope_unit = rnorm(available, coef_means[2], sigma[2]), - det_slope_visit = rnorm(available, coef_means[3], sigma[3]), - occ_intercept = rnorm(available, coef_means[4], sigma[4]), - occ_slope_unit = rnorm(available, coef_means[5], sigma[5]) - ) - ), - seed = NULL, - rep_constant = FALSE, - ragged_rep = TRUE - ) - - obs_aug <- fd$obs[seq_len(params$n_sites_augmented), ] - - for(i in 2:available){ - obs_aug <- abind::abind( - obs_aug, - fd$obs[((i - 1) * params$n_sites_augmented) + seq_len(params$n_sites_augmented), ], - along = 3 - ) - } - - event_covs_aug <- list(ec1 = fd$event_covs$ec1[seq_len(params$n_sites_augmented), ]) - unit_covs_aug <- data.frame(uc1 = fd$unit_covs[seq_len(params$n_sites_augmented), "uc1"]) - - flocker_data = make_flocker_data( - obs_aug, unit_covs_aug, event_covs_aug, - type = "augmented", n_aug = unavailable, - quiet = TRUE) - # format for return - list( - variables = list( - `b[1]` = fd$params$coef_means[1], - `b[2]` = fd$params$coef_means[2], - `b[3]` = fd$params$coef_means[3], - `b_occ[1]` = fd$params$coef_means[4], - `b_occ[2]` = fd$params$coef_means[5], - `Intercept_Omega` = boot::logit(omega) - ), - generated = flocker_standata( - f_occ = ~ 0 + Intercept + uc1 + (1 + uc1 || ff_species), - f_det = ~ 0 + Intercept + uc1 + ec1 + (1 + uc1 + ec1 || ff_species), - flocker_data = flocker_data, - augmented = TRUE - ) - ) -} - -aug_gen <- SBC_generator_function( - aug_generator, - N = params$n_sites_augmented - ) -aug_dataset <- suppressMessages( - generate_datasets(aug_gen, params$n_sims_augmented) -) - -aug_backend <- - SBC_backend_cmdstan_sample( - cmdstanr::cmdstan_model( - paste0(tempdir(), "/sbc_augmented_model.stan") - ) - ) - -aug_results <- compute_SBC(aug_dataset, aug_backend) - -plot_ecdf(aug_results) -plot_rank_hist(aug_results) -plot_ecdf_diff(aug_results) - -``` diff --git a/vignettes/articles/sbc_aug.Rmd b/vignettes/articles/sbc_aug.Rmd new file mode 100644 index 0000000..15566b3 --- /dev/null +++ b/vignettes/articles/sbc_aug.Rmd @@ -0,0 +1,238 @@ +--- +title: "SBC for data-augmented flocker models" +author: "Jacob Socolar" +date: "2026-02-26" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{SBC for data-augmented flocker models} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +params: + n_sims_augmented: 200 + n_sites_augmented: 50 + n_pseudospecies_augmented: 50 +--- + + + +### Rmd parameters + +``` +## Parameter value +## 1 n_sims_augmented 200 +## 2 n_sites_augmented 50 +## 3 n_pseudospecies_augmented 50 +``` + + +``` r +library(flocker) +library(brms) +library(SBC) +set.seed(1) +``` + +## Overview + +This document is one of a series of simulation-based calibration exercieses for +models available in R package `flocker`. Here, our goal is to validate `flocker`'s +data formatting, decoding, and likelihood implementations, and not `brms`'s +construction of the linear predictors. + +The encoding of the data for a `flocker` model tends to be more complex in the +presence of missing observations, and so we include missingness in the data +simulation wherever possible (some visits missing in all models, some time-steps +missing in multiseason models). + +In all models, we include one unit covariate that affects detection and +occupancy, colonization, extinction and/or autologistic terms as applicable, +and one event covariate that affects detection only (for all models except the +rep-constant). + + +## Data-augmented + +``` r +# make the stancode +model_name <- paste0(tempdir(), "/sbc_augmented_model.stan") + +omega <- boot::inv.logit(rnorm(1, 0, .1)) +available <- rbinom(1, params$n_pseudospecies_augmented, omega) +unavailable <- params$n_pseudospecies_augmented - available + +coef_means <- rnorm(5) # population-level effects (std_normal) +sigma <- abs(rnorm(5)) # half-normal(0,1) for sd parameters + +coefs_df <- data.frame( + det_intercept = rnorm(available, coef_means[1], sigma[1]), + det_slope_unit = rnorm(available, coef_means[2], sigma[2]), + det_slope_visit= rnorm(available, coef_means[3], sigma[3]), + occ_intercept = rnorm(available, coef_means[4], sigma[4]), + occ_slope_unit = rnorm(available, coef_means[5], sigma[5]) +) + +fd <- simulate_flocker_data( + n_pt = params$n_sites_augmented, n_sp = available, + params = list(coefs = coefs_df), + seed = NULL, + rep_constant = FALSE, + ragged_rep = TRUE +) + +obs_aug <- fd$obs[seq_len(params$n_sites_augmented), ] + +for(i in 2:available){ + obs_aug <- abind::abind( + obs_aug, + fd$obs[((i - 1) * params$n_sites_augmented) + seq_len(params$n_sites_augmented), ], + along = 3 + ) +} + +event_covs_aug <- list(ec1 = fd$event_covs$ec1[seq_len(params$n_sites_augmented), ]) +unit_covs_aug <- data.frame(uc1 = fd$unit_covs[seq_len(params$n_sites_augmented), "uc1"]) + +flocker_data = make_flocker_data( + obs_aug, unit_covs_aug, event_covs_aug, + type = "augmented", n_aug = unavailable, + quiet = TRUE) + +scode <- flocker_stancode( + f_occ = ~ 0 + Intercept + uc1 + (1 + uc1 || ff_species), + f_det = ~ 0 + Intercept + uc1 + ec1 + (1 + uc1 + ec1 || ff_species), + flocker_data = flocker_data, + prior = + brms::set_prior("std_normal()") + + brms::set_prior("std_normal()", class = "sd") + + brms::set_prior("std_normal()", dpar = "occ") + + brms::set_prior("std_normal()", class = "sd", dpar = "occ") + + brms::set_prior("normal(0, 0.1)", class = "Intercept", dpar = "Omega"), + backend = "cmdstanr", + augmented = TRUE + ) +writeLines(scode, model_name) + +aug_generator <- function(N){ + omega <- boot::inv.logit(rnorm(1, 0, .1)) + available <- rbinom(1, params$n_pseudospecies_augmented, omega) + unavailable <- params$n_pseudospecies_augmented - available + + coef_means <- rnorm(5) + sigma <- abs(rnorm(5)) + + coefs_df <- data.frame( + det_intercept = rnorm(available, coef_means[1], sigma[1]), + det_slope_unit = rnorm(available, coef_means[2], sigma[2]), + det_slope_visit = rnorm(available, coef_means[3], sigma[3]), + occ_intercept = rnorm(available, coef_means[4], sigma[4]), + occ_slope_unit = rnorm(available, coef_means[5], sigma[5]) + ) + + fd <- simulate_flocker_data( + n_pt = params$n_sites_augmented, n_sp = available, + params = list(coefs = coefs_df), + seed = NULL, + rep_constant = FALSE, + ragged_rep = TRUE + ) + + obs_aug <- fd$obs[seq_len(params$n_sites_augmented), ] + for(i in 2:available){ + obs_aug <- abind::abind( + obs_aug, + fd$obs[((i - 1) * params$n_sites_augmented) + seq_len(params$n_sites_augmented), ], + along = 3 + ) + } + + event_covs_aug <- list(ec1 = fd$event_covs$ec1[seq_len(params$n_sites_augmented), ]) + unit_covs_aug <- data.frame(uc1 = fd$unit_covs[seq_len(params$n_sites_augmented), "uc1"]) + + flocker_data <- make_flocker_data( + obs_aug, unit_covs_aug, event_covs_aug, + type = "augmented", n_aug = unavailable, + quiet = TRUE + ) + + list( + variables = list( + `b[1]` = coef_means[1], + `b[2]` = coef_means[2], + `b[3]` = coef_means[3], + `b_occ[1]` = coef_means[4], + `b_occ[2]` = coef_means[5], + `Intercept_Omega` = boot::logit(omega) + # Optional but recommended for “fuller” SBC on the augmented model: + # `sd_1[1]` = sigma[1], ... + ), + generated = flocker_standata( + f_occ = ~ 0 + Intercept + uc1 + (1 + uc1 || ff_species), + f_det = ~ 0 + Intercept + uc1 + ec1 + (1 + uc1 + ec1 || ff_species), + flocker_data = flocker_data, + augmented = TRUE + ) + ) +} + +aug_gen <- SBC_generator_function( + aug_generator, + N = params$n_sites_augmented + ) +aug_dataset <- suppressMessages( + generate_datasets(aug_gen, params$n_sims_augmented) +) + +aug_backend <- + SBC_backend_cmdstan_sample( + cmdstanr::cmdstan_model( + paste0(tempdir(), "/sbc_augmented_model.stan") + ) + ) + +aug_results <- compute_SBC(aug_dataset, aug_backend) +``` + +``` +## - 4 (2%) fits had divergences. Maximum number of divergences was 6. +``` + +``` +## - 10 (5%) fits had steps rejected. Maximum number of steps rejected was 1. +``` + +``` +## - 1 (0%) fits had maximum Rhat > 1.01. Maximum Rhat was 1.012. +``` + +``` +## Not all diagnostics are OK. +## You can learn more by inspecting $default_diagnostics, $backend_diagnostics +## and/or investigating $outputs/$messages/$warnings for detailed output from the backend. +``` + +``` r +plot_ecdf(aug_results) +``` + +
+plot of chunk data-augmented +

plot of chunk data-augmented

+
+ +``` r +plot_rank_hist(aug_results) +``` + +
+plot of chunk data-augmented +

plot of chunk data-augmented

+
+ +``` r +plot_ecdf_diff(aug_results) +``` + +
+plot of chunk data-augmented +

plot of chunk data-augmented

+
diff --git a/vignettes/articles/sbc_aug.Rmd.orig b/vignettes/articles/sbc_aug.Rmd.orig new file mode 100644 index 0000000..51d2f56 --- /dev/null +++ b/vignettes/articles/sbc_aug.Rmd.orig @@ -0,0 +1,205 @@ +--- +title: "SBC for data-augmented flocker models" +author: "Jacob Socolar" +date: "`r Sys.Date()`" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{SBC for data-augmented flocker models} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +params: + n_sims_augmented: 200 + n_sites_augmented: 50 + n_pseudospecies_augmented: 50 +--- + +```{r setup, include=FALSE} +knitr::opts_chunk$set( + echo = TRUE, + fig.path = "man/figures/sbc_vignette/", + fig.retina = 2 +) +dir.create("man/figures/sbc_vignette", recursive = TRUE, showWarnings = FALSE) + +``` + +### Rmd parameters +```{r display_params, echo = FALSE} +data.frame( + Parameter = names(params), + value = unlist(params, use.names = FALSE), + row.names = NULL +) + +``` + +```{r setup2, message = FALSE} +library(flocker) +library(brms) +library(SBC) +set.seed(1) +``` + +## Overview + +This document is one of a series of simulation-based calibration exercieses for +models available in R package `flocker`. Here, our goal is to validate `flocker`'s +data formatting, decoding, and likelihood implementations, and not `brms`'s +construction of the linear predictors. + +The encoding of the data for a `flocker` model tends to be more complex in the +presence of missing observations, and so we include missingness in the data +simulation wherever possible (some visits missing in all models, some time-steps +missing in multiseason models). + +In all models, we include one unit covariate that affects detection and +occupancy, colonization, extinction and/or autologistic terms as applicable, +and one event covariate that affects detection only (for all models except the +rep-constant). + + +## Data-augmented +```{r data-augmented, eval = TRUE} +# make the stancode +model_name <- paste0(tempdir(), "/sbc_augmented_model.stan") + +omega <- boot::inv.logit(rnorm(1, 0, .1)) +available <- rbinom(1, params$n_pseudospecies_augmented, omega) +unavailable <- params$n_pseudospecies_augmented - available + +coef_means <- rnorm(5) # population-level effects (std_normal) +sigma <- abs(rnorm(5)) # half-normal(0,1) for sd parameters + +coefs_df <- data.frame( + det_intercept = rnorm(available, coef_means[1], sigma[1]), + det_slope_unit = rnorm(available, coef_means[2], sigma[2]), + det_slope_visit= rnorm(available, coef_means[3], sigma[3]), + occ_intercept = rnorm(available, coef_means[4], sigma[4]), + occ_slope_unit = rnorm(available, coef_means[5], sigma[5]) +) + +fd <- simulate_flocker_data( + n_pt = params$n_sites_augmented, n_sp = available, + params = list(coefs = coefs_df), + seed = NULL, + rep_constant = FALSE, + ragged_rep = TRUE +) + +obs_aug <- fd$obs[seq_len(params$n_sites_augmented), ] + +for(i in 2:available){ + obs_aug <- abind::abind( + obs_aug, + fd$obs[((i - 1) * params$n_sites_augmented) + seq_len(params$n_sites_augmented), ], + along = 3 + ) +} + +event_covs_aug <- list(ec1 = fd$event_covs$ec1[seq_len(params$n_sites_augmented), ]) +unit_covs_aug <- data.frame(uc1 = fd$unit_covs[seq_len(params$n_sites_augmented), "uc1"]) + +flocker_data = make_flocker_data( + obs_aug, unit_covs_aug, event_covs_aug, + type = "augmented", n_aug = unavailable, + quiet = TRUE) + +scode <- flocker_stancode( + f_occ = ~ 0 + Intercept + uc1 + (1 + uc1 || ff_species), + f_det = ~ 0 + Intercept + uc1 + ec1 + (1 + uc1 + ec1 || ff_species), + flocker_data = flocker_data, + prior = + brms::set_prior("std_normal()") + + brms::set_prior("std_normal()", class = "sd") + + brms::set_prior("std_normal()", dpar = "occ") + + brms::set_prior("std_normal()", class = "sd", dpar = "occ") + + brms::set_prior("normal(0, 0.1)", class = "Intercept", dpar = "Omega"), + backend = "cmdstanr", + augmented = TRUE + ) +writeLines(scode, model_name) + +aug_generator <- function(N){ + omega <- boot::inv.logit(rnorm(1, 0, .1)) + available <- rbinom(1, params$n_pseudospecies_augmented, omega) + unavailable <- params$n_pseudospecies_augmented - available + + coef_means <- rnorm(5) + sigma <- abs(rnorm(5)) + + coefs_df <- data.frame( + det_intercept = rnorm(available, coef_means[1], sigma[1]), + det_slope_unit = rnorm(available, coef_means[2], sigma[2]), + det_slope_visit = rnorm(available, coef_means[3], sigma[3]), + occ_intercept = rnorm(available, coef_means[4], sigma[4]), + occ_slope_unit = rnorm(available, coef_means[5], sigma[5]) + ) + + fd <- simulate_flocker_data( + n_pt = params$n_sites_augmented, n_sp = available, + params = list(coefs = coefs_df), + seed = NULL, + rep_constant = FALSE, + ragged_rep = TRUE + ) + + obs_aug <- fd$obs[seq_len(params$n_sites_augmented), ] + for(i in 2:available){ + obs_aug <- abind::abind( + obs_aug, + fd$obs[((i - 1) * params$n_sites_augmented) + seq_len(params$n_sites_augmented), ], + along = 3 + ) + } + + event_covs_aug <- list(ec1 = fd$event_covs$ec1[seq_len(params$n_sites_augmented), ]) + unit_covs_aug <- data.frame(uc1 = fd$unit_covs[seq_len(params$n_sites_augmented), "uc1"]) + + flocker_data <- make_flocker_data( + obs_aug, unit_covs_aug, event_covs_aug, + type = "augmented", n_aug = unavailable, + quiet = TRUE + ) + + list( + variables = list( + `b[1]` = coef_means[1], + `b[2]` = coef_means[2], + `b[3]` = coef_means[3], + `b_occ[1]` = coef_means[4], + `b_occ[2]` = coef_means[5], + `Intercept_Omega` = boot::logit(omega) + # Optional but recommended for “fuller” SBC on the augmented model: + # `sd_1[1]` = sigma[1], ... + ), + generated = flocker_standata( + f_occ = ~ 0 + Intercept + uc1 + (1 + uc1 || ff_species), + f_det = ~ 0 + Intercept + uc1 + ec1 + (1 + uc1 + ec1 || ff_species), + flocker_data = flocker_data, + augmented = TRUE + ) + ) +} + +aug_gen <- SBC_generator_function( + aug_generator, + N = params$n_sites_augmented + ) +aug_dataset <- suppressMessages( + generate_datasets(aug_gen, params$n_sims_augmented) +) + +aug_backend <- + SBC_backend_cmdstan_sample( + cmdstanr::cmdstan_model( + paste0(tempdir(), "/sbc_augmented_model.stan") + ) + ) + +aug_results <- compute_SBC(aug_dataset, aug_backend) + +plot_ecdf(aug_results) +plot_rank_hist(aug_results) +plot_ecdf_diff(aug_results) + +``` diff --git a/vignettes/articles/sbc_multi.Rmd b/vignettes/articles/sbc_multi.Rmd new file mode 100644 index 0000000..012c811 --- /dev/null +++ b/vignettes/articles/sbc_multi.Rmd @@ -0,0 +1,635 @@ +--- +title: "SBC for multi-season flocker models" +author: "Jacob Socolar" +date: "2026-02-24" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{SBC for multi-season flocker models} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +params: + n_sims: 1000 + n_sites: 200 +--- + + + +### Rmd parameters + +``` +## Parameter value +## 1 n_sims 1000 +## 2 n_sites 200 +``` + + +``` r +library(flocker) +library(brms) +library(SBC) +set.seed(1) +``` + +## Overview + +This document is one of a series of simulation-based calibration exercieses for +models available in R package `flocker`. Here, our goal is to validate `flocker`'s +data formatting, decoding, and likelihood implementations, and not `brms`'s +construction of the linear predictors. + +The encoding of the data for a `flocker` model tends to be more complex in the +presence of missing observations, and so we include missingness in the data +simulation wherever possible (some visits missing in all models, some time-steps +missing in multiseason models). + +In all models, we include one unit covariate that affects detection and +occupancy, colonization, extinction and/or autologistic terms as applicable, +and one event covariate that affects detection only (for all models except the +rep-constant). + + +## Multi-season +`flocker` fits multi-season models that parameterize the dynamics using colonization/extinction or autologistic specifications, and that parameterize +the initial occupancy state using explicit and equilibrium parameterizations, +for a total of four classes of multi-season model. We validate each class. + +### Colonization-extinction, explicit initial occupancy + +``` r +# make the stancode +model_name <- paste0(tempdir(), "/sbc_colex_ex_model.stan") +fd <- simulate_flocker_data( + n_pt = params$n_sites, n_sp = 1, n_season = 4, + params = list( + coefs = data.frame( + det_intercept = rnorm(1), + det_slope_unit = rnorm(1), + det_slope_visit = rnorm(1), + occ_intercept = rnorm(1), + occ_slope_unit = rnorm(1), + col_intercept = rnorm(1), + col_slope_unit = rnorm(1), + ex_intercept = rnorm(1), + ex_slope_unit = rnorm(1) + ) + ), + seed = NULL, + rep_constant = FALSE, + multiseason = "colex", + multi_init = "explicit", + ragged_rep = TRUE +) +flocker_data = make_flocker_data( + fd$obs, fd$unit_covs, fd$event_covs, + type = "multi", quiet = TRUE) + +scode <- flocker_stancode( + f_occ = ~ 0 + Intercept + uc1, + f_col = ~ 0 + Intercept + uc1, + f_ex = ~ 0 + Intercept + uc1, + f_det = ~ 0 + Intercept + uc1 + ec1, + flocker_data = flocker_data, + prior = + brms::set_prior("std_normal()") + + brms::set_prior("std_normal()", dpar = "occ") + + brms::set_prior("std_normal()", dpar = "colo") + + brms::set_prior("std_normal()", dpar = "ex"), + multiseason = "colex", + multi_init = "explicit", + backend = "cmdstanr" + ) +writeLines(scode, model_name) + +colex_ex_generator <- function(N){ + fd <- simulate_flocker_data( + n_pt = N, n_sp = 1, n_season = 4, + params = list( + coefs = data.frame( + det_intercept = rnorm(1), + det_slope_unit = rnorm(1), + det_slope_visit = rnorm(1), + occ_intercept = rnorm(1), + occ_slope_unit = rnorm(1), + col_intercept = rnorm(1), + col_slope_unit = rnorm(1), + ex_intercept = rnorm(1), + ex_slope_unit = rnorm(1) + ) + ), + seed = NULL, + rep_constant = FALSE, + multiseason = "colex", + multi_init = "explicit", + ragged_rep = TRUE + ) + + flocker_data = make_flocker_data( + fd$obs, fd$unit_covs, fd$event_covs, + type = "multi", quiet = TRUE) + + # format for return + list( + variables = list( + `b[1]` = fd$params$coefs$det_intercept, + `b[2]` = fd$params$coefs$det_slope_unit, + `b[3]` = fd$params$coefs$det_slope_visit, + `b_occ[1]` = fd$params$coefs$occ_intercept, + `b_occ[2]` = fd$params$coefs$occ_slope_unit, + `b_colo[1]` = fd$params$coefs$col_intercept, + `b_colo[2]` = fd$params$coefs$col_slope_unit, + `b_ex[1]` = fd$params$coefs$ex_intercept, + `b_ex[2]` = fd$params$coefs$ex_slope_unit + ), + generated = flocker_standata( + f_occ = ~ 0 + Intercept + uc1, + f_col = ~ 0 + Intercept + uc1, + f_ex = ~ 0 + Intercept + uc1, + f_det = ~ 0 + Intercept + uc1 + ec1, + flocker_data = flocker_data, + multiseason = "colex", + multi_init = "explicit" + ) + ) +} + +colex_ex_gen <- SBC_generator_function( + colex_ex_generator, + N = params$n_sites + ) +colex_ex_dataset <- suppressMessages( + generate_datasets(colex_ex_gen, params$n_sims) +) + +colex_ex_backend <- + SBC_backend_cmdstan_sample( + cmdstanr::cmdstan_model( + paste0(tempdir(), "/sbc_colex_ex_model.stan") + ) + ) + +colex_ex_results <- compute_SBC(colex_ex_dataset, colex_ex_backend) + +plot_ecdf(colex_ex_results) +``` + +
+plot of chunk multi-colex-ex +

plot of chunk multi-colex-ex

+
+ +``` r +plot_rank_hist(colex_ex_results) +``` + +
+plot of chunk multi-colex-ex +

plot of chunk multi-colex-ex

+
+ +``` r +plot_ecdf_diff(colex_ex_results) +``` + +
+plot of chunk multi-colex-ex +

plot of chunk multi-colex-ex

+
+ +### Colonization-extinction, equilibrium initial occupancy + +``` r +# make the stancode +model_name <- paste0(tempdir(), "/sbc_colex_eq_model.stan") +fd <- simulate_flocker_data( + n_pt = params$n_sites, n_sp = 1, n_season = 4, + params = list( + coefs = data.frame( + det_intercept = rnorm(1), + det_slope_unit = rnorm(1), + det_slope_visit = rnorm(1), + col_intercept = rnorm(1), + col_slope_unit = rnorm(1), + ex_intercept = rnorm(1), + ex_slope_unit = rnorm(1) + ) + ), + seed = NULL, + rep_constant = FALSE, + multiseason = "colex", + multi_init = "equilibrium", + ragged_rep = TRUE +) +flocker_data = make_flocker_data( + fd$obs, fd$unit_covs, fd$event_covs, + type = "multi", quiet = TRUE) + +scode <- flocker_stancode( + f_col = ~ 0 + Intercept + uc1, + f_ex = ~ 0 + Intercept + uc1, + f_det = ~ 0 + Intercept + uc1 + ec1, + flocker_data = flocker_data, + prior = + brms::set_prior("std_normal()") + + brms::set_prior("std_normal()", dpar = "colo") + + brms::set_prior("std_normal()", dpar = "ex"), + multiseason = "colex", + multi_init = "equilibrium", + backend = "cmdstanr" + ) +writeLines(scode, model_name) + +colex_eq_generator <- function(N){ + fd <- simulate_flocker_data( + n_pt = N, n_sp = 1, n_season = 4, + params = list( + coefs = data.frame( + det_intercept = rnorm(1), + det_slope_unit = rnorm(1), + det_slope_visit = rnorm(1), + col_intercept = rnorm(1), + col_slope_unit = rnorm(1), + ex_intercept = rnorm(1), + ex_slope_unit = rnorm(1) + ) + ), + seed = NULL, + rep_constant = FALSE, + multiseason = "colex", + multi_init = "equilibrium", + ragged_rep = TRUE + ) + + flocker_data = make_flocker_data( + fd$obs, fd$unit_covs, fd$event_covs, + type = "multi", quiet = TRUE) + + # format for return + list( + variables = list( + `b[1]` = fd$params$coefs$det_intercept, + `b[2]` = fd$params$coefs$det_slope_unit, + `b[3]` = fd$params$coefs$det_slope_visit, + `b_colo[1]` = fd$params$coefs$col_intercept, + `b_colo[2]` = fd$params$coefs$col_slope_unit, + `b_ex[1]` = fd$params$coefs$ex_intercept, + `b_ex[2]` = fd$params$coefs$ex_slope_unit + ), + generated = flocker_standata( + f_col = ~ 0 + Intercept + uc1, + f_ex = ~ 0 + Intercept + uc1, + f_det = ~ 0 + Intercept + uc1 + ec1, + flocker_data = flocker_data, + multiseason = "colex", + multi_init = "equilibrium" + ) + ) +} + +colex_eq_gen <- SBC_generator_function( + colex_eq_generator, + N = params$n_sites + ) +colex_eq_dataset <- suppressMessages( + generate_datasets(colex_eq_gen, params$n_sims) +) + +colex_eq_backend <- + SBC_backend_cmdstan_sample( + cmdstanr::cmdstan_model( + paste0(tempdir(), "/sbc_colex_eq_model.stan") + ) + ) + +colex_eq_results <- compute_SBC(colex_eq_dataset, colex_eq_backend) +``` + +``` +## - 843 (84%) fits had steps rejected. Maximum number of steps rejected was 4. +``` + +``` +## Not all diagnostics are OK. +## You can learn more by inspecting $default_diagnostics, $backend_diagnostics +## and/or investigating $outputs/$messages/$warnings for detailed output from the backend. +``` + +``` r +plot_ecdf(colex_eq_results) +``` + +
+plot of chunk multi-colex-eq +

plot of chunk multi-colex-eq

+
+ +``` r +plot_rank_hist(colex_eq_results) +``` + +
+plot of chunk multi-colex-eq +

plot of chunk multi-colex-eq

+
+ +``` r +plot_ecdf_diff(colex_eq_results) +``` + +
+plot of chunk multi-colex-eq +

plot of chunk multi-colex-eq

+
+ +### Autologistic, explicit initial occupancy + +``` r +# make the stancode +model_name <- paste0(tempdir(), "/sbc_auto_ex_model.stan") +fd <- simulate_flocker_data( + n_pt = params$n_sites, n_sp = 1, n_season = 4, + params = list( + coefs = data.frame( + det_intercept = rnorm(1), + det_slope_unit = rnorm(1), + det_slope_visit = rnorm(1), + occ_intercept = rnorm(1), + occ_slope_unit = rnorm(1), + col_intercept = rnorm(1), + col_slope_unit = rnorm(1), + auto_intercept = rnorm(1), + auto_slope_unit = rnorm(1) + ) + ), + seed = NULL, + rep_constant = FALSE, + multiseason = "autologistic", + multi_init = "explicit", + ragged_rep = TRUE +) +flocker_data = make_flocker_data( + fd$obs, fd$unit_covs, fd$event_covs, + type = "multi", quiet = TRUE) + +scode <- flocker_stancode( + f_occ = ~ 0 + Intercept + uc1, + f_col = ~ 0 + Intercept + uc1, + f_auto = ~ 0 + Intercept + uc1, + f_det = ~ 0 + Intercept + uc1 + ec1, + flocker_data = flocker_data, + prior = + brms::set_prior("std_normal()") + + brms::set_prior("std_normal()", dpar = "occ") + + brms::set_prior("std_normal()", dpar = "colo") + + brms::set_prior("std_normal()", dpar = "autologistic"), + multiseason = "autologistic", + multi_init = "explicit", + backend = "cmdstanr" + ) +writeLines(scode, model_name) + +auto_ex_generator <- function(N){ + fd <- simulate_flocker_data( + n_pt = N, n_sp = 1, n_season = 4, + params = list( + coefs = data.frame( + det_intercept = rnorm(1), + det_slope_unit = rnorm(1), + det_slope_visit = rnorm(1), + occ_intercept = rnorm(1), + occ_slope_unit = rnorm(1), + col_intercept = rnorm(1), + col_slope_unit = rnorm(1), + auto_intercept = rnorm(1), + auto_slope_unit = rnorm(1) + ) + ), + seed = NULL, + rep_constant = FALSE, + multiseason = "autologistic", + multi_init = "explicit", + ragged_rep = TRUE + ) + + flocker_data = make_flocker_data( + fd$obs, fd$unit_covs, fd$event_covs, + type = "multi", quiet = TRUE) + + # format for return + list( + variables = list( + `b[1]` = fd$params$coefs$det_intercept, + `b[2]` = fd$params$coefs$det_slope_unit, + `b[3]` = fd$params$coefs$det_slope_visit, + `b_occ[1]` = fd$params$coefs$occ_intercept, + `b_occ[2]` = fd$params$coefs$occ_slope_unit, + `b_colo[1]` = fd$params$coefs$col_intercept, + `b_colo[2]` = fd$params$coefs$col_slope_unit, + `b_autologistic[1]` = fd$params$coefs$auto_intercept, + `b_autologistic[2]` = fd$params$coefs$auto_slope_unit + ), + generated = flocker_standata( + f_occ = ~ 0 + Intercept + uc1, + f_col = ~ 0 + Intercept + uc1, + f_auto = ~ 0 + Intercept + uc1, + f_det = ~ 0 + Intercept + uc1 + ec1, + flocker_data = flocker_data, + multiseason = "autologistic", + multi_init = "explicit" + ) + ) +} + +auto_ex_gen <- SBC_generator_function( + auto_ex_generator, + N = params$n_sites + ) +auto_ex_dataset <- suppressMessages( + generate_datasets(auto_ex_gen, params$n_sims) +) + +auto_ex_backend <- + SBC_backend_cmdstan_sample( + cmdstanr::cmdstan_model( + paste0(tempdir(), "/sbc_auto_ex_model.stan") + ) + ) + +auto_ex_results <- compute_SBC(auto_ex_dataset, auto_ex_backend) + +plot_ecdf(auto_ex_results) +``` + +
+plot of chunk multi-auto-ex +

plot of chunk multi-auto-ex

+
+ +``` r +plot_rank_hist(auto_ex_results) +``` + +
+plot of chunk multi-auto-ex +

plot of chunk multi-auto-ex

+
+ +``` r +plot_ecdf_diff(auto_ex_results) +``` + +
+plot of chunk multi-auto-ex +

plot of chunk multi-auto-ex

+
+ +### Autologistic, equilibrium initial occupancy + +``` r +# make the stancode +model_name <- paste0(tempdir(), "/sbc_auto_eq_model.stan") +fd <- simulate_flocker_data( + n_pt = params$n_sites, n_sp = 1, n_season = 4, + params = list( + coefs = data.frame( + det_intercept = rnorm(1), + det_slope_unit = rnorm(1), + det_slope_visit = rnorm(1), + col_intercept = rnorm(1), + col_slope_unit = rnorm(1), + auto_intercept = rnorm(1), + auto_slope_unit = rnorm(1) + ) + ), + seed = NULL, + rep_constant = FALSE, + multiseason = "autologistic", + multi_init = "equilibrium", + ragged_rep = TRUE +) +flocker_data = make_flocker_data( + fd$obs, fd$unit_covs, fd$event_covs, + type = "multi", quiet = TRUE) + +scode <- flocker_stancode( + f_col = ~ 0 + Intercept + uc1, + f_auto = ~ 0 + Intercept + uc1, + f_det = ~ 0 + Intercept + uc1 + ec1, + flocker_data = flocker_data, + prior = + brms::set_prior("std_normal()") + + brms::set_prior("std_normal()", dpar = "colo") + + brms::set_prior("std_normal()", dpar = "autologistic"), + multiseason = "autologistic", + multi_init = "equilibrium", + backend = "cmdstanr" + ) +writeLines(scode, model_name) + +auto_eq_generator <- function(N){ + fd <- simulate_flocker_data( + n_pt = N, n_sp = 1, n_season = 4, + params = list( + coefs = data.frame( + det_intercept = rnorm(1), + det_slope_unit = rnorm(1), + det_slope_visit = rnorm(1), + col_intercept = rnorm(1), + col_slope_unit = rnorm(1), + auto_intercept = rnorm(1), + auto_slope_unit = rnorm(1) + ) + ), + seed = NULL, + rep_constant = FALSE, + multiseason = "autologistic", + multi_init = "equilibrium", + ragged_rep = TRUE + ) + + flocker_data = make_flocker_data( + fd$obs, fd$unit_covs, fd$event_covs, + type = "multi", quiet = TRUE) + + # format for return + list( + variables = list( + `b[1]` = fd$params$coefs$det_intercept, + `b[2]` = fd$params$coefs$det_slope_unit, + `b[3]` = fd$params$coefs$det_slope_visit, + `b_colo[1]` = fd$params$coefs$col_intercept, + `b_colo[2]` = fd$params$coefs$col_slope_unit, + `b_autologistic[1]` = fd$params$coefs$auto_intercept, + `b_autologistic[2]` = fd$params$coefs$auto_slope_unit + ), + generated = flocker_standata( + f_col = ~ 0 + Intercept + uc1, + f_auto = ~ 0 + Intercept + uc1, + f_det = ~ 0 + Intercept + uc1 + ec1, + flocker_data = flocker_data, + multiseason = "autologistic", + multi_init = "equilibrium" + ) + ) +} + +auto_eq_gen <- SBC_generator_function( + auto_eq_generator, + N = params$n_sites + ) +auto_eq_dataset <- suppressMessages( + generate_datasets(auto_eq_gen, params$n_sims) +) + +auto_eq_backend <- + SBC_backend_cmdstan_sample( + cmdstanr::cmdstan_model( + paste0(tempdir(), "/sbc_auto_eq_model.stan") + ) + ) + +auto_eq_results <- compute_SBC(auto_eq_dataset, auto_eq_backend) +``` + +``` +## - 346 (35%) fits had steps rejected. Maximum number of steps rejected was 3. +``` + +``` +## - 1 (0%) fits had tail ESS / maximum rank < 0.5. Minimum tail ESS / maximum rank was 0.45. This potentially skews the rank statistics. +## If the fits look good otherwise, increasing `thin_ranks` (via recompute_SBC_statistics) +## or number of posterior draws (by refitting) might help. +``` + +``` +## Not all diagnostics are OK. +## You can learn more by inspecting $default_diagnostics, $backend_diagnostics +## and/or investigating $outputs/$messages/$warnings for detailed output from the backend. +``` + +``` r +plot_ecdf(auto_eq_results) +``` + +
+plot of chunk multi-auto-eq +

plot of chunk multi-auto-eq

+
+ +``` r +plot_rank_hist(auto_eq_results) +``` + +
+plot of chunk multi-auto-eq +

plot of chunk multi-auto-eq

+
+ +``` r +plot_ecdf_diff(auto_eq_results) +``` + +
+plot of chunk multi-auto-eq +

plot of chunk multi-auto-eq

+
+ diff --git a/vignettes/articles/sbc_multi.Rmd.orig b/vignettes/articles/sbc_multi.Rmd.orig new file mode 100644 index 0000000..da7f2f6 --- /dev/null +++ b/vignettes/articles/sbc_multi.Rmd.orig @@ -0,0 +1,530 @@ +--- +title: "SBC for multi-season flocker models" +author: "Jacob Socolar" +date: "`r Sys.Date()`" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{SBC for multi-season flocker models} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +params: + n_sims: 1000 + n_sites: 200 +--- + +```{r setup, include=FALSE} +knitr::opts_chunk$set( + echo = TRUE, + fig.path = "man/figures/sbc_vignette/", + fig.retina = 2 +) +dir.create("man/figures/sbc_vignette", recursive = TRUE, showWarnings = FALSE) + +``` + +### Rmd parameters +```{r display_params, echo = FALSE} +data.frame( + Parameter = names(params), + value = unlist(params, use.names = FALSE), + row.names = NULL +) + +``` + +```{r setup2, message = FALSE} +library(flocker) +library(brms) +library(SBC) +set.seed(1) +``` + +## Overview + +This document is one of a series of simulation-based calibration exercieses for +models available in R package `flocker`. Here, our goal is to validate `flocker`'s +data formatting, decoding, and likelihood implementations, and not `brms`'s +construction of the linear predictors. + +The encoding of the data for a `flocker` model tends to be more complex in the +presence of missing observations, and so we include missingness in the data +simulation wherever possible (some visits missing in all models, some time-steps +missing in multiseason models). + +In all models, we include one unit covariate that affects detection and +occupancy, colonization, extinction and/or autologistic terms as applicable, +and one event covariate that affects detection only (for all models except the +rep-constant). + + +## Multi-season +`flocker` fits multi-season models that parameterize the dynamics using colonization/extinction or autologistic specifications, and that parameterize +the initial occupancy state using explicit and equilibrium parameterizations, +for a total of four classes of multi-season model. We validate each class. + +### Colonization-extinction, explicit initial occupancy +```{r multi-colex-ex} +# make the stancode +model_name <- paste0(tempdir(), "/sbc_colex_ex_model.stan") +fd <- simulate_flocker_data( + n_pt = params$n_sites, n_sp = 1, n_season = 4, + params = list( + coefs = data.frame( + det_intercept = rnorm(1), + det_slope_unit = rnorm(1), + det_slope_visit = rnorm(1), + occ_intercept = rnorm(1), + occ_slope_unit = rnorm(1), + col_intercept = rnorm(1), + col_slope_unit = rnorm(1), + ex_intercept = rnorm(1), + ex_slope_unit = rnorm(1) + ) + ), + seed = NULL, + rep_constant = FALSE, + multiseason = "colex", + multi_init = "explicit", + ragged_rep = TRUE +) +flocker_data = make_flocker_data( + fd$obs, fd$unit_covs, fd$event_covs, + type = "multi", quiet = TRUE) + +scode <- flocker_stancode( + f_occ = ~ 0 + Intercept + uc1, + f_col = ~ 0 + Intercept + uc1, + f_ex = ~ 0 + Intercept + uc1, + f_det = ~ 0 + Intercept + uc1 + ec1, + flocker_data = flocker_data, + prior = + brms::set_prior("std_normal()") + + brms::set_prior("std_normal()", dpar = "occ") + + brms::set_prior("std_normal()", dpar = "colo") + + brms::set_prior("std_normal()", dpar = "ex"), + multiseason = "colex", + multi_init = "explicit", + backend = "cmdstanr" + ) +writeLines(scode, model_name) + +colex_ex_generator <- function(N){ + fd <- simulate_flocker_data( + n_pt = N, n_sp = 1, n_season = 4, + params = list( + coefs = data.frame( + det_intercept = rnorm(1), + det_slope_unit = rnorm(1), + det_slope_visit = rnorm(1), + occ_intercept = rnorm(1), + occ_slope_unit = rnorm(1), + col_intercept = rnorm(1), + col_slope_unit = rnorm(1), + ex_intercept = rnorm(1), + ex_slope_unit = rnorm(1) + ) + ), + seed = NULL, + rep_constant = FALSE, + multiseason = "colex", + multi_init = "explicit", + ragged_rep = TRUE + ) + + flocker_data = make_flocker_data( + fd$obs, fd$unit_covs, fd$event_covs, + type = "multi", quiet = TRUE) + + # format for return + list( + variables = list( + `b[1]` = fd$params$coefs$det_intercept, + `b[2]` = fd$params$coefs$det_slope_unit, + `b[3]` = fd$params$coefs$det_slope_visit, + `b_occ[1]` = fd$params$coefs$occ_intercept, + `b_occ[2]` = fd$params$coefs$occ_slope_unit, + `b_colo[1]` = fd$params$coefs$col_intercept, + `b_colo[2]` = fd$params$coefs$col_slope_unit, + `b_ex[1]` = fd$params$coefs$ex_intercept, + `b_ex[2]` = fd$params$coefs$ex_slope_unit + ), + generated = flocker_standata( + f_occ = ~ 0 + Intercept + uc1, + f_col = ~ 0 + Intercept + uc1, + f_ex = ~ 0 + Intercept + uc1, + f_det = ~ 0 + Intercept + uc1 + ec1, + flocker_data = flocker_data, + multiseason = "colex", + multi_init = "explicit" + ) + ) +} + +colex_ex_gen <- SBC_generator_function( + colex_ex_generator, + N = params$n_sites + ) +colex_ex_dataset <- suppressMessages( + generate_datasets(colex_ex_gen, params$n_sims) +) + +colex_ex_backend <- + SBC_backend_cmdstan_sample( + cmdstanr::cmdstan_model( + paste0(tempdir(), "/sbc_colex_ex_model.stan") + ) + ) + +colex_ex_results <- compute_SBC(colex_ex_dataset, colex_ex_backend) + +plot_ecdf(colex_ex_results) +plot_rank_hist(colex_ex_results) +plot_ecdf_diff(colex_ex_results) + +``` + +### Colonization-extinction, equilibrium initial occupancy +```{r multi-colex-eq, eval = TRUE} +# make the stancode +model_name <- paste0(tempdir(), "/sbc_colex_eq_model.stan") +fd <- simulate_flocker_data( + n_pt = params$n_sites, n_sp = 1, n_season = 4, + params = list( + coefs = data.frame( + det_intercept = rnorm(1), + det_slope_unit = rnorm(1), + det_slope_visit = rnorm(1), + col_intercept = rnorm(1), + col_slope_unit = rnorm(1), + ex_intercept = rnorm(1), + ex_slope_unit = rnorm(1) + ) + ), + seed = NULL, + rep_constant = FALSE, + multiseason = "colex", + multi_init = "equilibrium", + ragged_rep = TRUE +) +flocker_data = make_flocker_data( + fd$obs, fd$unit_covs, fd$event_covs, + type = "multi", quiet = TRUE) + +scode <- flocker_stancode( + f_col = ~ 0 + Intercept + uc1, + f_ex = ~ 0 + Intercept + uc1, + f_det = ~ 0 + Intercept + uc1 + ec1, + flocker_data = flocker_data, + prior = + brms::set_prior("std_normal()") + + brms::set_prior("std_normal()", dpar = "colo") + + brms::set_prior("std_normal()", dpar = "ex"), + multiseason = "colex", + multi_init = "equilibrium", + backend = "cmdstanr" + ) +writeLines(scode, model_name) + +colex_eq_generator <- function(N){ + fd <- simulate_flocker_data( + n_pt = N, n_sp = 1, n_season = 4, + params = list( + coefs = data.frame( + det_intercept = rnorm(1), + det_slope_unit = rnorm(1), + det_slope_visit = rnorm(1), + col_intercept = rnorm(1), + col_slope_unit = rnorm(1), + ex_intercept = rnorm(1), + ex_slope_unit = rnorm(1) + ) + ), + seed = NULL, + rep_constant = FALSE, + multiseason = "colex", + multi_init = "equilibrium", + ragged_rep = TRUE + ) + + flocker_data = make_flocker_data( + fd$obs, fd$unit_covs, fd$event_covs, + type = "multi", quiet = TRUE) + + # format for return + list( + variables = list( + `b[1]` = fd$params$coefs$det_intercept, + `b[2]` = fd$params$coefs$det_slope_unit, + `b[3]` = fd$params$coefs$det_slope_visit, + `b_colo[1]` = fd$params$coefs$col_intercept, + `b_colo[2]` = fd$params$coefs$col_slope_unit, + `b_ex[1]` = fd$params$coefs$ex_intercept, + `b_ex[2]` = fd$params$coefs$ex_slope_unit + ), + generated = flocker_standata( + f_col = ~ 0 + Intercept + uc1, + f_ex = ~ 0 + Intercept + uc1, + f_det = ~ 0 + Intercept + uc1 + ec1, + flocker_data = flocker_data, + multiseason = "colex", + multi_init = "equilibrium" + ) + ) +} + +colex_eq_gen <- SBC_generator_function( + colex_eq_generator, + N = params$n_sites + ) +colex_eq_dataset <- suppressMessages( + generate_datasets(colex_eq_gen, params$n_sims) +) + +colex_eq_backend <- + SBC_backend_cmdstan_sample( + cmdstanr::cmdstan_model( + paste0(tempdir(), "/sbc_colex_eq_model.stan") + ) + ) + +colex_eq_results <- compute_SBC(colex_eq_dataset, colex_eq_backend) + +plot_ecdf(colex_eq_results) +plot_rank_hist(colex_eq_results) +plot_ecdf_diff(colex_eq_results) + +``` + +### Autologistic, explicit initial occupancy +```{r multi-auto-ex, eval = TRUE} +# make the stancode +model_name <- paste0(tempdir(), "/sbc_auto_ex_model.stan") +fd <- simulate_flocker_data( + n_pt = params$n_sites, n_sp = 1, n_season = 4, + params = list( + coefs = data.frame( + det_intercept = rnorm(1), + det_slope_unit = rnorm(1), + det_slope_visit = rnorm(1), + occ_intercept = rnorm(1), + occ_slope_unit = rnorm(1), + col_intercept = rnorm(1), + col_slope_unit = rnorm(1), + auto_intercept = rnorm(1), + auto_slope_unit = rnorm(1) + ) + ), + seed = NULL, + rep_constant = FALSE, + multiseason = "autologistic", + multi_init = "explicit", + ragged_rep = TRUE +) +flocker_data = make_flocker_data( + fd$obs, fd$unit_covs, fd$event_covs, + type = "multi", quiet = TRUE) + +scode <- flocker_stancode( + f_occ = ~ 0 + Intercept + uc1, + f_col = ~ 0 + Intercept + uc1, + f_auto = ~ 0 + Intercept + uc1, + f_det = ~ 0 + Intercept + uc1 + ec1, + flocker_data = flocker_data, + prior = + brms::set_prior("std_normal()") + + brms::set_prior("std_normal()", dpar = "occ") + + brms::set_prior("std_normal()", dpar = "colo") + + brms::set_prior("std_normal()", dpar = "autologistic"), + multiseason = "autologistic", + multi_init = "explicit", + backend = "cmdstanr" + ) +writeLines(scode, model_name) + +auto_ex_generator <- function(N){ + fd <- simulate_flocker_data( + n_pt = N, n_sp = 1, n_season = 4, + params = list( + coefs = data.frame( + det_intercept = rnorm(1), + det_slope_unit = rnorm(1), + det_slope_visit = rnorm(1), + occ_intercept = rnorm(1), + occ_slope_unit = rnorm(1), + col_intercept = rnorm(1), + col_slope_unit = rnorm(1), + auto_intercept = rnorm(1), + auto_slope_unit = rnorm(1) + ) + ), + seed = NULL, + rep_constant = FALSE, + multiseason = "autologistic", + multi_init = "explicit", + ragged_rep = TRUE + ) + + flocker_data = make_flocker_data( + fd$obs, fd$unit_covs, fd$event_covs, + type = "multi", quiet = TRUE) + + # format for return + list( + variables = list( + `b[1]` = fd$params$coefs$det_intercept, + `b[2]` = fd$params$coefs$det_slope_unit, + `b[3]` = fd$params$coefs$det_slope_visit, + `b_occ[1]` = fd$params$coefs$occ_intercept, + `b_occ[2]` = fd$params$coefs$occ_slope_unit, + `b_colo[1]` = fd$params$coefs$col_intercept, + `b_colo[2]` = fd$params$coefs$col_slope_unit, + `b_autologistic[1]` = fd$params$coefs$auto_intercept, + `b_autologistic[2]` = fd$params$coefs$auto_slope_unit + ), + generated = flocker_standata( + f_occ = ~ 0 + Intercept + uc1, + f_col = ~ 0 + Intercept + uc1, + f_auto = ~ 0 + Intercept + uc1, + f_det = ~ 0 + Intercept + uc1 + ec1, + flocker_data = flocker_data, + multiseason = "autologistic", + multi_init = "explicit" + ) + ) +} + +auto_ex_gen <- SBC_generator_function( + auto_ex_generator, + N = params$n_sites + ) +auto_ex_dataset <- suppressMessages( + generate_datasets(auto_ex_gen, params$n_sims) +) + +auto_ex_backend <- + SBC_backend_cmdstan_sample( + cmdstanr::cmdstan_model( + paste0(tempdir(), "/sbc_auto_ex_model.stan") + ) + ) + +auto_ex_results <- compute_SBC(auto_ex_dataset, auto_ex_backend) + +plot_ecdf(auto_ex_results) +plot_rank_hist(auto_ex_results) +plot_ecdf_diff(auto_ex_results) + +``` + +### Autologistic, equilibrium initial occupancy +```{r multi-auto-eq, eval = TRUE} +# make the stancode +model_name <- paste0(tempdir(), "/sbc_auto_eq_model.stan") +fd <- simulate_flocker_data( + n_pt = params$n_sites, n_sp = 1, n_season = 4, + params = list( + coefs = data.frame( + det_intercept = rnorm(1), + det_slope_unit = rnorm(1), + det_slope_visit = rnorm(1), + col_intercept = rnorm(1), + col_slope_unit = rnorm(1), + auto_intercept = rnorm(1), + auto_slope_unit = rnorm(1) + ) + ), + seed = NULL, + rep_constant = FALSE, + multiseason = "autologistic", + multi_init = "equilibrium", + ragged_rep = TRUE +) +flocker_data = make_flocker_data( + fd$obs, fd$unit_covs, fd$event_covs, + type = "multi", quiet = TRUE) + +scode <- flocker_stancode( + f_col = ~ 0 + Intercept + uc1, + f_auto = ~ 0 + Intercept + uc1, + f_det = ~ 0 + Intercept + uc1 + ec1, + flocker_data = flocker_data, + prior = + brms::set_prior("std_normal()") + + brms::set_prior("std_normal()", dpar = "colo") + + brms::set_prior("std_normal()", dpar = "autologistic"), + multiseason = "autologistic", + multi_init = "equilibrium", + backend = "cmdstanr" + ) +writeLines(scode, model_name) + +auto_eq_generator <- function(N){ + fd <- simulate_flocker_data( + n_pt = N, n_sp = 1, n_season = 4, + params = list( + coefs = data.frame( + det_intercept = rnorm(1), + det_slope_unit = rnorm(1), + det_slope_visit = rnorm(1), + col_intercept = rnorm(1), + col_slope_unit = rnorm(1), + auto_intercept = rnorm(1), + auto_slope_unit = rnorm(1) + ) + ), + seed = NULL, + rep_constant = FALSE, + multiseason = "autologistic", + multi_init = "equilibrium", + ragged_rep = TRUE + ) + + flocker_data = make_flocker_data( + fd$obs, fd$unit_covs, fd$event_covs, + type = "multi", quiet = TRUE) + + # format for return + list( + variables = list( + `b[1]` = fd$params$coefs$det_intercept, + `b[2]` = fd$params$coefs$det_slope_unit, + `b[3]` = fd$params$coefs$det_slope_visit, + `b_colo[1]` = fd$params$coefs$col_intercept, + `b_colo[2]` = fd$params$coefs$col_slope_unit, + `b_autologistic[1]` = fd$params$coefs$auto_intercept, + `b_autologistic[2]` = fd$params$coefs$auto_slope_unit + ), + generated = flocker_standata( + f_col = ~ 0 + Intercept + uc1, + f_auto = ~ 0 + Intercept + uc1, + f_det = ~ 0 + Intercept + uc1 + ec1, + flocker_data = flocker_data, + multiseason = "autologistic", + multi_init = "equilibrium" + ) + ) +} + +auto_eq_gen <- SBC_generator_function( + auto_eq_generator, + N = params$n_sites + ) +auto_eq_dataset <- suppressMessages( + generate_datasets(auto_eq_gen, params$n_sims) +) + +auto_eq_backend <- + SBC_backend_cmdstan_sample( + cmdstanr::cmdstan_model( + paste0(tempdir(), "/sbc_auto_eq_model.stan") + ) + ) + +auto_eq_results <- compute_SBC(auto_eq_dataset, auto_eq_backend) + +plot_ecdf(auto_eq_results) +plot_rank_hist(auto_eq_results) +plot_ecdf_diff(auto_eq_results) + +``` + diff --git a/vignettes/augmented_models.Rmd b/vignettes/augmented_models.Rmd index 4c03920..de1f639 100644 --- a/vignettes/augmented_models.Rmd +++ b/vignettes/augmented_models.Rmd @@ -1,7 +1,7 @@ --- title: "Data-augmented models in flocker" author: "Jacob Socolar" -date: "2023-10-20" +date: "2026-02-24" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Data-augmented models in flocker} @@ -27,366 +27,25 @@ where $s$ indexes the species, $\omega$ is the fitted availability probability o Fitting the data-augmented model in `flocker` requires passing the observed data as a three-dimensional array with sites along the first dimension, visits along the second, and species along the third. Additionally, we must supply the `n_aug` argument to `make_flocker_data()`, specifying how many all-zero pseudospecies to augment the data with. -``` -#> Formatting data for a single-season multispecies occupancy model with data augmentation for never-observed species. For details, see make_flocker_data_augmented. All warnings and error messages should be interpreted in the context of make_flocker_data_augmented -#> formatting rep indices -#> - | - | | 0% - | - | | 1% - | - |= | 1% - | - |= | 2% - | - |== | 2% - | - |== | 3% - | - |=== | 3% - | - |=== | 4% - | - |=== | 5% - | - |==== | 5% - | - |==== | 6% - | - |===== | 6% - | - |===== | 7% - | - |====== | 7% - | - |====== | 8% - | - |====== | 9% - | - |======= | 9% - | - |======= | 10% - | - |======== | 10% - | - |======== | 11% - | - |========= | 11% - | - |========= | 12% - | - |========== | 12% - | - |========== | 13% - | - |========== | 14% - | - |=========== | 14% - | - |=========== | 15% - | - |============ | 15% - | - |============ | 16% - | - |============= | 16% - | - |============= | 17% - | - |============= | 18% - | - |============== | 18% - | - |============== | 19% - | - |=============== | 19% - | - |=============== | 20% - | - |================ | 20% - | - |================ | 21% - | - |================ | 22% - | - |================= | 22% - | - |================= | 23% - | - |================== | 23% - | - |================== | 24% - | - |=================== | 24% - | - |=================== | 25% - | - |=================== | 26% - | - |==================== | 26% - | - |==================== | 27% - | - |===================== | 27% - | - |===================== | 28% - | - |====================== | 28% - | - |====================== | 29% - | - |====================== | 30% - | - |======================= | 30% - | - |======================= | 31% - | - |======================== | 31% - | - |======================== | 32% - | - |========================= | 32% - | - |========================= | 33% - | - |========================= | 34% - | - |========================== | 34% - | - |========================== | 35% - | - |=========================== | 35% - | - |=========================== | 36% - | - |============================ | 36% - | - |============================ | 37% - | - |============================ | 38% - | - |============================= | 38% - | - |============================= | 39% - | - |============================== | 39% - | - |============================== | 40% - | - |=============================== | 40% - | - |=============================== | 41% - | - |================================ | 41% - | - |================================ | 42% - | - |================================ | 43% - | - |================================= | 43% - | - |================================= | 44% - | - |================================== | 44% - | - |================================== | 45% - | - |=================================== | 45% - | - |=================================== | 46% - | - |=================================== | 47% - | - |==================================== | 47% - | - |==================================== | 48% - | - |===================================== | 48% - | - |===================================== | 49% - | - |====================================== | 49% - | - |====================================== | 50% - | - |====================================== | 51% - | - |======================================= | 51% - | - |======================================= | 52% - | - |======================================== | 52% - | - |======================================== | 53% - | - |========================================= | 53% - | - |========================================= | 54% - | - |========================================= | 55% - | - |========================================== | 55% - | - |========================================== | 56% - | - |=========================================== | 56% - | - |=========================================== | 57% - | - |============================================ | 57% - | - |============================================ | 58% - | - |============================================ | 59% - | - |============================================= | 59% - | - |============================================= | 60% - | - |============================================== | 60% - | - |============================================== | 61% - | - |=============================================== | 61% - | - |=============================================== | 62% - | - |================================================ | 62% - | - |================================================ | 63% - | - |================================================ | 64% - | - |================================================= | 64% - | - |================================================= | 65% - | - |================================================== | 65% - | - |================================================== | 66% - | - |=================================================== | 66% - | - |=================================================== | 67% - | - |=================================================== | 68% - | - |==================================================== | 68% - | - |==================================================== | 69% - | - |===================================================== | 69% - | - |===================================================== | 70% - | - |====================================================== | 70% - | - |====================================================== | 71% - | - |====================================================== | 72% - | - |======================================================= | 72% - | - |======================================================= | 73% - | - |======================================================== | 73% - | - |======================================================== | 74% - | - |========================================================= | 74% - | - |========================================================= | 75% - | - |========================================================= | 76% - | - |========================================================== | 76% - | - |========================================================== | 77% - | - |=========================================================== | 77% - | - |=========================================================== | 78% - | - |============================================================ | 78% - | - |============================================================ | 79% - | - |============================================================ | 80% - | - |============================================================= | 80% - | - |============================================================= | 81% - | - |============================================================== | 81% - | - |============================================================== | 82% - | - |=============================================================== | 82% - | - |=============================================================== | 83% - | - |=============================================================== | 84% - | - |================================================================ | 84% - | - |================================================================ | 85% - | - |================================================================= | 85% - | - |================================================================= | 86% - | - |================================================================== | 86% - | - |================================================================== | 87% - | - |================================================================== | 88% - | - |=================================================================== | 88% - | - |=================================================================== | 89% - | - |==================================================================== | 89% - | - |==================================================================== | 90% - | - |===================================================================== | 90% - | - |===================================================================== | 91% - | - |====================================================================== | 91% - | - |====================================================================== | 92% - | - |====================================================================== | 93% - | - |======================================================================= | 93% - | - |======================================================================= | 94% - | - |======================================================================== | 94% - | - |======================================================================== | 95% - | - |========================================================================= | 95% - | - |========================================================================= | 96% - | - |========================================================================= | 97% - | - |========================================================================== | 97% - | - |========================================================================== | 98% - | - |=========================================================================== | 98% - | - |=========================================================================== | 99% - | - |============================================================================| 99% - | - |============================================================================| 100% -#> Compiling Stan program... -#> Start sampling +``` r +set.seed(2) +library(flocker) +d <- simulate_flocker_data( + augmented = TRUE + ) +fd = make_flocker_data( + d$obs, d$unit_covs, d$event_covs, + type = "augmented", n_aug = 100, + quiet = TRUE + ) +fm <- flock( + f_occ = ~ (1 | ff_species), + f_det = ~ uc1 + ec1 + (1 + uc1 + ec1 | ff_species), + flocker_data = fd, + augmented = TRUE, + cores = 4, + refresh = 0 + ) #> Warning: Bulk Effective Samples Size (ESS) is too low, indicating posterior means and medians may be unreliable. #> Running the chains for more iterations may help. See #> https://mc-stan.org/misc/warnings.html#bulk-ess @@ -395,7 +54,7 @@ Fitting the data-augmented model in `flocker` requires passing the observed data Here, the random effect of species is specified using the special grouping keyword `ff_species` (names beginning with `ff_` are reserved in `flocker` and are not allowed as names for user-supplied covariates). -```r +``` r summary(fm) #> Family: occupancy_augmented #> Links: mu = identity; occ = identity; Omega = identity @@ -406,24 +65,24 @@ summary(fm) #> Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1; #> total post-warmup draws = 4000 #> -#> Group-Level Effects: +#> Multilevel Hyperparameters: #> ~ff_species (Number of levels: 128) #> Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS -#> sd(Intercept) 1.59 0.26 1.16 2.17 1.00 753 1572 -#> sd(uc1) 0.25 0.11 0.06 0.48 1.00 1303 1041 -#> sd(ec1) 0.39 0.09 0.23 0.60 1.00 2010 2878 -#> sd(occ_Intercept) 1.59 0.32 1.09 2.36 1.01 881 1406 -#> cor(Intercept,uc1) 0.74 0.22 0.14 0.97 1.00 2155 1983 -#> cor(Intercept,ec1) 0.57 0.19 0.11 0.85 1.00 2915 2617 -#> cor(uc1,ec1) 0.58 0.26 -0.05 0.94 1.01 913 1378 +#> sd(Intercept) 1.58 0.27 1.15 2.19 1.00 961 1454 +#> sd(uc1) 0.25 0.11 0.06 0.49 1.00 1244 1032 +#> sd(ec1) 0.39 0.09 0.23 0.58 1.00 2281 2648 +#> sd(occ_Intercept) 1.60 0.33 1.10 2.38 1.00 912 1233 +#> cor(Intercept,uc1) 0.74 0.22 0.15 0.98 1.00 2157 2522 +#> cor(Intercept,ec1) 0.56 0.20 0.10 0.86 1.00 2935 2678 +#> cor(uc1,ec1) 0.57 0.27 -0.07 0.94 1.00 793 1486 #> -#> Population-Level Effects: +#> Regression Coefficients: #> Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS -#> Intercept 0.47 0.32 -0.18 1.09 1.01 368 818 -#> occ_Intercept 0.21 0.36 -0.52 0.87 1.01 454 983 -#> Omega_Intercept -1.26 0.22 -1.70 -0.85 1.00 6322 2762 -#> uc1 0.01 0.08 -0.14 0.16 1.00 1061 1985 -#> ec1 0.07 0.09 -0.11 0.25 1.00 1479 2479 +#> Intercept 0.48 0.31 -0.14 1.10 1.01 351 528 +#> occ_Intercept 0.24 0.34 -0.47 0.89 1.01 525 1094 +#> Omega_Intercept -1.27 0.22 -1.70 -0.85 1.00 7528 3124 +#> uc1 0.01 0.07 -0.14 0.16 1.00 797 1472 +#> ec1 0.07 0.09 -0.10 0.25 1.00 1082 1803 #> #> Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS #> and Tail_ESS are effective sample size measures, and Rhat is the potential diff --git a/vignettes/augmented_models.Rmd.orig b/vignettes/augmented_models.Rmd.orig index 8c74705..1986389 100644 --- a/vignettes/augmented_models.Rmd.orig +++ b/vignettes/augmented_models.Rmd.orig @@ -39,14 +39,16 @@ d <- simulate_flocker_data( ) fd = make_flocker_data( d$obs, d$unit_covs, d$event_covs, - type = "augmented", n_aug = 100 + type = "augmented", n_aug = 100, + quiet = TRUE ) fm <- flock( f_occ = ~ (1 | ff_species), f_det = ~ uc1 + ec1 + (1 + uc1 + ec1 | ff_species), flocker_data = fd, augmented = TRUE, - cores = 4 + cores = 4, + refresh = 0 ) ``` diff --git a/vignettes/flocker_tutorial.Rmd b/vignettes/flocker_tutorial.Rmd index 5c2ab09..85b5967 100644 --- a/vignettes/flocker_tutorial.Rmd +++ b/vignettes/flocker_tutorial.Rmd @@ -1,7 +1,7 @@ --- title: "Fitting occupancy models with flocker" author: Jacob Socolar & Simon Mills -date: "2023-10-20" +date: "2026-02-24" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Fitting occupancy models with flocker} @@ -13,7 +13,7 @@ vignette: > -`flocker` is an R package for fitting [occupancy models](https://jsocolar.github.io/closureOccupancy/) that incorporate +`flocker` is an R package for fitting [occupancy models](https://jsocolar.github.io/closureOccupancy/) that incorportate sophisticated effects structures using simple formula-based syntax. `flocker` is built on R package `brms`, which in turn is a front-end for `Stan`. @@ -64,7 +64,7 @@ To request features or report bugs (much appreciated!), please [open an issue on To make `flocker` and `brms` functions globally available within an R session run: -```r +``` r library(flocker) library(brms) set.seed(1) @@ -78,7 +78,7 @@ arguments will simulate example data for other likelihoods, including multi-season and data-augmented occupancy models. -```r +``` r d <- simulate_flocker_data() ``` @@ -126,7 +126,7 @@ To pass data to `flocker`, we first pass the output from and apply the necessary formatting: -```r +``` r fd_rep_varying <- make_flocker_data( obs = d$obs, unit_covs = d$unit_covs, @@ -153,7 +153,7 @@ as one-sided formulas to the relevant arguments of `flock()` (`f_occ`, `f_det`, `f_col`, `f_ex`, and `f_auto` as applicable). -```r +``` r rep_varying <- flock( f_occ = ~ uc1 + (1 + uc1 | species), f_det = ~ uc1 + ec1 + (1 + uc1 + ec1 | species), @@ -183,7 +183,7 @@ formatting; it is insufficient to omit event covariates from the detection formula supplied to `flock()` after formatting the data for a rep-varying model. -```r +``` r fd_rep_constant <- make_flocker_data( obs = d$obs, unit_covs = d$unit_covs @@ -203,7 +203,7 @@ rep_constant <- flock( Note that within-chain parallelization is available (uniquely so) for the rep-constant mode: -```r +``` r rep_constant <- flock( f_occ = ~ uc1 + (1 + uc1 | species), f_det = ~ uc1 + (1 + uc1 | species), @@ -229,7 +229,7 @@ model with explicit inits, but we will be able to fit other models types). -```r +``` r multi_data <- simulate_flocker_data( n_season = 3, n_pt = 300, @@ -253,7 +253,7 @@ occupancy in the first timestep. Depending on hardware, fitting this model might take several minutes. -```r +``` r multi_colex <- flock( f_occ = ~ uc1, f_det = ~ uc1 + ec1, @@ -272,7 +272,7 @@ Here is the colonization-extinction model using equilibrium occupancy probabilities in the first timestep: -```r +``` r multi_colex_eq <- flock( f_det = ~ uc1 + ec1, f_col = ~ uc1, @@ -293,7 +293,7 @@ the formula `f_auto = ~ 1`, but it is fine to relax this constraint and use, e.g. `f_auto = ~ uc1`. -```r +``` r multi_auto <- flock( f_occ = ~ uc1, f_det = ~ uc1 + ec1, @@ -312,7 +312,7 @@ And the autologistic model with equilibrium occupancy probabilities in the first timestep: -```r +``` r multi_auto_eq <- flock( f_det = ~ uc1 + ec1, f_col = ~ uc1, @@ -338,7 +338,7 @@ along the second, and species along the third. Additionally, we must supply the pseudospecies to augment the data with. -```r +``` r augmented_data <- simulate_flocker_data( augmented = TRUE ) @@ -356,9 +356,6 @@ augmented <- flock( silent = 2, refresh = 0 ) -#> Warning: Bulk Effective Samples Size (ESS) is too low, indicating posterior means and medians may be unreliable. -#> Running the chains for more iterations may help. See -#> https://mc-stan.org/misc/warnings.html#bulk-ess ``` Here, the random effect of species is specified using the special grouping @@ -382,7 +379,7 @@ All post-processing functions from `brms` work on single-season rep-constant models, but do not work on any other model types. For example: -```r +``` r predictions_rep_constant <- brms::posterior_predict(rep_constant) loo_rep_constant <- brms::loo(rep_constant, moment_match = TRUE) brms::conditional_effects(rep_constant) @@ -397,7 +394,7 @@ probability that a given (pseudo)species occurs in the metacommunity) are available via `fitted_flocker`. For example: -```r +``` r fitted_flocker(rep_constant) fitted_flocker(rep_varying) fitted_flocker(multi_colex) @@ -415,7 +412,7 @@ dimension. The function `get_Z()` returns the posterior distribution of occupancy probabilities across the closure-units. The shape of the output depends on the class of model, and is an array in the shape of the first visit in `obs` as passed to `make_flocker_data`, with posterior iterations stacked along the final dimension. Thus, for a single-season rep-varying model, the output is a matrix where rows are posterior iterations, columns are closure-units, and values are draws from the posterior distribution of occupancy probabilities: -```r +``` r get_Z(rep_varying) ``` @@ -443,7 +440,7 @@ third dimension is iterations, and values are `1`, `0`, or `NA`, representing detection, nondetection, and no corresponding sampling event. For example: -```r +``` r predict_flocker(rep_varying) ``` @@ -470,16 +467,15 @@ the binomial coefficient when computing the log-likelihood for the rep-constant model. -```r +``` r loo_compare_flocker( list(rep_constant, rep_varying) ) #> Warning: Some Pareto k diagnostic values are too high. See help('pareto-k-diagnostic') for details. - #> Warning: Some Pareto k diagnostic values are too high. See help('pareto-k-diagnostic') for details. #> elpd_diff se_diff #> model2 0.0 0.0 -#> model1 -171.1 17.2 +#> model1 -121.0 16.1 ``` Likewise, we can compare the four flavors of multi-season model that we fit @@ -489,15 +485,15 @@ probabilities (rather than equilibrium). As expected, the `multi_colex` model performs best: -```r +``` r loo_compare_flocker( list(multi_colex, multi_colex_eq, multi_auto, multi_auto_eq) ) #> elpd_diff se_diff #> model1 0.0 0.0 -#> model3 -7.8 4.1 -#> model2 -27.1 7.2 -#> model4 -32.9 7.9 +#> model2 -16.5 6.2 +#> model3 -41.7 9.5 +#> model4 -57.5 10.8 ``` Flocker also provides the function `loo_flocker()` to return a table of @@ -518,51 +514,51 @@ model fitting using `get_flocker_prior()` which is a drop-in replacement for `brms::get_prior()`. -```r +``` r get_flocker_prior( f_occ = ~ uc1 + (1 + uc1 | species), f_det = ~ uc1 + ec1 + (1 + uc1 + ec1 | species), flocker_data = fd_rep_varying ) -#> prior class coef group resp dpar nlpar lb ub source -#> (flat) b default -#> (flat) b ec1 (vectorized) -#> (flat) b uc1 (vectorized) -#> lkj(1) cor default -#> lkj(1) cor species (vectorized) -#> student_t(3, 0, 2.5) Intercept default -#> student_t(3, 0, 2.5) sd 0 default -#> student_t(3, 0, 2.5) sd species 0 (vectorized) -#> student_t(3, 0, 2.5) sd ec1 species 0 (vectorized) -#> student_t(3, 0, 2.5) sd Intercept species 0 (vectorized) -#> student_t(3, 0, 2.5) sd uc1 species 0 (vectorized) -#> (flat) b occ default -#> (flat) b uc1 occ (vectorized) -#> (flat) Intercept occ default -#> student_t(3, 0, 2.5) sd occ 0 default -#> student_t(3, 0, 2.5) sd species occ 0 (vectorized) -#> student_t(3, 0, 2.5) sd Intercept species occ 0 (vectorized) -#> student_t(3, 0, 2.5) sd uc1 species occ 0 (vectorized) +#> prior class coef group resp dpar nlpar lb ub tag source +#> (flat) b default +#> (flat) b ec1 (vectorized) +#> (flat) b uc1 (vectorized) +#> lkj(1) cor default +#> lkj(1) cor species (vectorized) +#> student_t(3, 0, 2.5) Intercept default +#> student_t(3, 0, 2.5) sd 0 default +#> student_t(3, 0, 2.5) sd species 0 (vectorized) +#> student_t(3, 0, 2.5) sd ec1 species 0 (vectorized) +#> student_t(3, 0, 2.5) sd Intercept species 0 (vectorized) +#> student_t(3, 0, 2.5) sd uc1 species 0 (vectorized) +#> (flat) b occ default +#> (flat) b uc1 occ (vectorized) +#> (flat) Intercept occ default +#> student_t(3, 0, 2.5) sd occ 0 default +#> student_t(3, 0, 2.5) sd species occ 0 (vectorized) +#> student_t(3, 0, 2.5) sd Intercept species occ 0 (vectorized) +#> student_t(3, 0, 2.5) sd uc1 species occ 0 (vectorized) brms::prior_summary(rep_varying) -#> prior class coef group resp dpar nlpar lb ub source -#> (flat) b default -#> (flat) b ec1 (vectorized) -#> (flat) b uc1 (vectorized) -#> (flat) b occ default -#> (flat) b uc1 occ (vectorized) -#> student_t(3, 0, 2.5) Intercept default -#> (flat) Intercept occ default -#> lkj_corr_cholesky(1) L default -#> lkj_corr_cholesky(1) L species (vectorized) -#> student_t(3, 0, 2.5) sd 0 default -#> student_t(3, 0, 2.5) sd occ 0 default -#> student_t(3, 0, 2.5) sd species 0 (vectorized) -#> student_t(3, 0, 2.5) sd ec1 species 0 (vectorized) -#> student_t(3, 0, 2.5) sd Intercept species 0 (vectorized) -#> student_t(3, 0, 2.5) sd uc1 species 0 (vectorized) -#> student_t(3, 0, 2.5) sd species occ 0 (vectorized) -#> student_t(3, 0, 2.5) sd Intercept species occ 0 (vectorized) -#> student_t(3, 0, 2.5) sd uc1 species occ 0 (vectorized) +#> prior class coef group resp dpar nlpar lb ub tag source +#> (flat) b default +#> (flat) b ec1 (vectorized) +#> (flat) b uc1 (vectorized) +#> (flat) b occ default +#> (flat) b uc1 occ (vectorized) +#> student_t(3, 0, 2.5) Intercept default +#> (flat) Intercept occ default +#> lkj_corr_cholesky(1) L default +#> lkj_corr_cholesky(1) L species (vectorized) +#> student_t(3, 0, 2.5) sd 0 default +#> student_t(3, 0, 2.5) sd occ 0 default +#> student_t(3, 0, 2.5) sd species 0 (vectorized) +#> student_t(3, 0, 2.5) sd ec1 species 0 (vectorized) +#> student_t(3, 0, 2.5) sd Intercept species 0 (vectorized) +#> student_t(3, 0, 2.5) sd uc1 species 0 (vectorized) +#> student_t(3, 0, 2.5) sd species occ 0 (vectorized) +#> student_t(3, 0, 2.5) sd Intercept species occ 0 (vectorized) +#> student_t(3, 0, 2.5) sd uc1 species occ 0 (vectorized) ``` Note that in examples like the above, with covariates shared between both the occupancy and detection model formulas (`uc1` in this example), then the prior table will contain two entries @@ -571,7 +567,7 @@ one for the parameter governing detection. Specifying priors for parameters in f other than detection can be done with reference to the `dpar` column, e.g.: -```r +``` r user_prior <- c(brms::set_prior("normal(0, 1)", coef = "uc1"), brms::set_prior("normal(0, 3)", coef = "uc1", dpar = "occ")) ``` @@ -589,7 +585,7 @@ the intercepts (flat on the probability scale) when all predictors are held at their means and a moderately regularizing prior on the coefficients: -```r +``` r rep_varying_prior1 <- flock( f_occ = ~ uc1, f_det = ~ ec1, @@ -604,19 +600,19 @@ rep_varying_prior1 <- flock( refresh = 0 ) brms::prior_summary(rep_varying_prior1) -#> prior class coef group resp dpar nlpar lb ub source -#> normal(0,2) b user -#> normal(0,2) b ec1 (vectorized) -#> normal(0,2) b occ user -#> normal(0,2) b uc1 occ (vectorized) -#> logistic(0,1) Intercept user -#> logistic(0,1) Intercept occ user +#> prior class coef group resp dpar nlpar lb ub tag source +#> normal(0,2) b user +#> normal(0,2) b ec1 (vectorized) +#> normal(0,2) b occ user +#> normal(0,2) b uc1 occ (vectorized) +#> logistic(0,1) Intercept user +#> logistic(0,1) Intercept occ user ``` Here is an example where we set informative priors on the intercepts when all covariates are fixed to zero and the same moderately regularizing prior on the coefficients: -```r +``` r rep_varying_prior2 <- flock( f_occ = ~ 0 + Intercept + uc1, f_det = ~ 0 + Intercept + ec1, @@ -631,19 +627,19 @@ rep_varying_prior2 <- flock( refresh = 0 ) brms::prior_summary(rep_varying_prior2) -#> prior class coef group resp dpar nlpar lb ub source -#> normal(0,2) b user -#> normal(0,2) b ec1 (vectorized) -#> normal(1, 1) b Intercept user -#> normal(0,2) b occ user -#> normal(-1, 1) b Intercept occ user -#> normal(0,2) b uc1 occ (vectorized) +#> prior class coef group resp dpar nlpar lb ub tag source +#> normal(0,2) b user +#> normal(0,2) b ec1 (vectorized) +#> normal(1, 1) b Intercept user +#> normal(0,2) b occ user +#> normal(-1, 1) b Intercept occ user +#> normal(0,2) b uc1 occ (vectorized) ``` ### Model formulas Simple formulas follow the same syntax as R's `lm()` function. For example: -```r +``` r mod1 <- flock( f_occ = ~ uc1 + (1|species), f_det = ~ 1, @@ -656,7 +652,7 @@ Simple random effects follow `lme4` syntax, including advanced `lme4` syntax like `||` for uncorrelated effects and `/` and `:` for expansion of multiple grouping terms. Here's a simple example: -```r +``` r mod2 <- flock( f_occ = ~ uc1 + (1|species), f_det = ~ 1, @@ -670,7 +666,7 @@ formulas, but as uncorrelated *between* formulas. For example, the code below estimates a single correlation for the intercept and slope in the occupancy sub-model. -```r +``` r mod3 <- flock( f_occ = ~ uc1 + (1 + uc1 | species), f_det = ~ ec1 + (1 | species), @@ -684,7 +680,7 @@ intercepts in the occupancy and detection sub-models, and correlated effects of `sc1` on occupancy and `vc1` on detection, but no correlations between the intercepts and the slopes in either sub-model: -```r +``` r mod4 <- flock( f_occ = ~ uc1 + (1 |g1| species) + (0 + uc1 |g2| species), f_det = ~ ec1 + (1 |g1| species) + (0 + ec1 |g2| species), @@ -700,7 +696,7 @@ Via `brms`, `flocker` supports Gaussian processes of arbitrary dimensionality monotonic effects of ordinal factors via `brms::mo()` ([see here](https://paul-buerkner.github.io/brms/articles/brms_monotonic.html)). For example: -```r +``` r mod5 <- flock( f_occ = ~ s(uc1), f_det = ~ t2(uc1, ec1), @@ -729,7 +725,7 @@ include multiple phylogenetic effects within a single occupancy model (see [Mills et al. 2022](https://doi.org/10.1002/ecy.3867)). -```r +``` r # simulate an example phylogeny phylogeny <- ape::rtree(30, tip.label = paste0("sp_", 1:30)) @@ -774,7 +770,7 @@ transect, yielding a one-dimensional analog of a spatial autologistic occupancy model. A second caution is to remind users that in multi-species models, users will -likely want to fit separate spatial terms by species ([Doser et al 2022](https://besjournals.onlinelibrary.wiley.com/doi/10.1111/2041-210X.13897)). +likely want to fit separate spatial terms by species ([Doser et al 2022](https://besjournals.onlinelibrary.wiley.com/doi/full/10.1111/2041-210X.13897)). For Gaussian processes, this can be achieved via the `by` argument to `brms::gp()`. For some conditional autoregressive structures (those that allow disconnected islands), this can be achieved by passing a block-diagonal diff --git a/vignettes/nonlinear_models.Rmd b/vignettes/nonlinear_models.Rmd index 144a63d..6c08786 100644 --- a/vignettes/nonlinear_models.Rmd +++ b/vignettes/nonlinear_models.Rmd @@ -1,7 +1,7 @@ --- title: "Nonlinear models in flocker" author: "Jacob Socolar" -date: "2023-10-21" +date: "2026-02-24" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Nonlinear models in flocker} @@ -23,10 +23,10 @@ details. This vignette focuses on more complicated nonlinear models that require the use of special nonlinear `brms` formulas. We showcase two models. The first fits -a parametric nonlinear predictor. The second fits a model with a spatially +a parameteric nonlinear predictor. The second fits a model with a spatially varying coefficient that is given a gaussian process prior. -## Parametric nonlinear predictor +## Parameteric nonlinear predictor In this scenario, we consider a model where the response is a specific nonlinear parametric function whose parameters are fitted and might or might not depend on covariates. Suppose for example that an expanding population of a territorial species undergoes logistic growth, and also that some unknown proportion of @@ -38,7 +38,7 @@ inflection point. At multiple discrete times, we randomly sample several sites to survey, and survey each of those sites over several repeat visits. -```r +``` r library(flocker); library(brms) set.seed(3) @@ -65,6 +65,7 @@ fd <- make_flocker_data( unit_covs = data.frame(t = data[,c("t")]), event_covs <- list(dummy = matrix(rnorm(n_visit*nrow(data)), ncol = 3)) ) + ``` We wish to fit an occupancy model that recovers the unknown parameters $L$, $k$, @@ -96,7 +97,7 @@ obligatory to pass the entire formula for all distributional parameters as a single `brmsformula` object. This means in turn that the user must be familiar with `flocker`'s internal naming conventions for all of the relevant distributional parameters (`det` and one or more of `occ`, `colo`, `ex`, -`autologistic`, `Omega`). If fitting a data-augmented model, it will be required +`autologistic`, `Omega`). If fitting a data-augmented model, it will be requried to pass the `Omega ~ 1` formula within the `brmsformula` (When passing the traditional one-sided formula to `f_det`, `flocker` includes the formula for `Omega` internally and automatically). @@ -109,7 +110,7 @@ they make no contribution to the likelihood. With all of that said, we can go ahead and fit this model! -```r +``` r fit <- flock(f_det = brms::bf( det ~ 1 + dummy, occ ~ log(L/(1 + exp(-k*(t - t0)) - L)), @@ -130,7 +131,7 @@ fit <- flock(f_det = brms::bf( ``` -```r +``` r summary(fit) #> Family: occupancy_single #> Links: mu = identity; occ = identity @@ -143,13 +144,13 @@ summary(fit) #> Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1; #> total post-warmup draws = 4000 #> -#> Population-Level Effects: +#> Regression Coefficients: #> Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS -#> Intercept -0.91 0.13 -1.18 -0.66 1.00 2535 2318 -#> L_Intercept 0.50 0.10 0.38 0.77 1.00 1241 864 -#> k_Intercept 0.19 0.08 0.07 0.36 1.00 1283 1433 -#> t0_Intercept -5.90 3.38 -10.34 3.17 1.00 1248 921 -#> dummy 0.00 0.08 -0.15 0.15 1.00 2620 2236 +#> Intercept -0.91 0.13 -1.15 -0.66 1.00 2377 2339 +#> L_Intercept 0.51 0.10 0.38 0.75 1.00 1624 1252 +#> k_Intercept 0.18 0.08 0.07 0.37 1.00 1576 1945 +#> t0_Intercept -5.75 3.44 -10.32 3.48 1.00 1466 1157 +#> dummy 0.00 0.08 -0.15 0.15 1.00 2417 2072 #> #> Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS #> and Tail_ESS are effective sample size measures, and Rhat is the potential @@ -172,7 +173,7 @@ quite a few of data points to constrain the standard deviation of the Gaussian process, so we simulate with 2000 sites: -```r +``` r set.seed(1) n <- 2000 # sample size lscale <- 0.3 # square root of l of the gaussian kernel @@ -207,6 +208,7 @@ obs <- matrix(nrow = n, ncol = n_visit) for(j in 1:n_visit){ obs[,j] <- gp_data$Z * rbinom(n, 1, boot::inv.logit(det_intercept)) } + ``` @@ -214,7 +216,7 @@ And here's how we can fit this model in `flocker`! Because we have a large numbe computational efficiency. -```r +``` r fd2 <- make_flocker_data(obs = obs, unit_covs = gp_data[, c("x", "y", "covariate")]) svc_mod <- flock( f_det = brms::bf( @@ -227,10 +229,11 @@ svc_mod <- flock( flocker_data = fd2, cores = 4 ) + ``` -```r +``` r summary(svc_mod) #> Family: occupancy_single_C #> Links: mu = identity; occ = identity @@ -242,15 +245,15 @@ summary(svc_mod) #> Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1; #> total post-warmup draws = 4000 #> -#> Gaussian Process Terms: +#> Gaussian Process Hyperparameters: #> Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS -#> sdgp(g_gpxy) 1.66 0.75 0.73 3.66 1.00 2849 3317 -#> lscale(g_gpxy) 0.23 0.11 0.08 0.50 1.00 3850 3141 +#> sdgp(g_gpxy) 1.65 0.72 0.73 3.51 1.00 3024 3017 +#> lscale(g_gpxy) 0.23 0.11 0.08 0.50 1.00 4346 3149 #> -#> Population-Level Effects: +#> Regression Coefficients: #> Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS -#> Intercept -1.04 0.06 -1.15 -0.94 1.00 5532 2810 -#> occint_Intercept 0.09 0.09 -0.08 0.27 1.00 5736 3182 +#> Intercept -1.04 0.06 -1.15 -0.93 1.00 6193 3413 +#> occint_Intercept 0.09 0.09 -0.08 0.26 1.00 6464 3223 #> #> Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS #> and Tail_ESS are effective sample size measures, and Rhat is the potential