diff --git a/R/simulate_flocker_data.R b/R/simulate_flocker_data.R
index 513824c..4265aeb 100644
--- a/R/simulate_flocker_data.R
+++ b/R/simulate_flocker_data.R
@@ -28,14 +28,53 @@
#' be simulated without any covariate influence on occupancy.
#' @param rep_constant logical: create data with unit covariates only (TRUE)
#' or data that includes event covariates (FALSE)
-#' @param params a named list containing of parameter values to use in simulation.
-#' Any required parameters whose names are not in this list will be assigned their
-#' default values. To see the parameter names and structures required, run with
-#' `params = NULL` (the default) and examine the `$params` element of the output.
-#' @param covariates a dataframe of covariate values to use in simulation, or
-#' NULL to simulate values. To see the covariate names and structures required,
-#' run with `covariates = NULL` (the default) and examine the `$covariates` element
-#' of the output.
+#' @param params A named list controlling the coefficients used to generate the
+#' latent occupancy state and detections. Supported names are:
+#' \itemize{
+#' \item \code{coefs}: A numeric data.frame of per-species coefficient
+#' values. Must have \code{n_sp} rows and
+#' one column for each coefficient required by the requested model type.
+#' Column names must match the required set (order does not matter).
+#' If supplied, \code{coef_means} and \code{Sigma} must not be supplied.
+#' \item \code{coef_means}: Numeric vector of length \code{n_coef} giving the
+#' mean of the multivariate normal distribution used to simulate per-species
+#' coefficients when \code{coefs} is not supplied.
+#' \item \code{Sigma}: Numeric \code{n_coef x n_coef} symmetric covariance
+#' matrix for the multivariate normal distribution used to simulate per-species
+#' coefficients when \code{coefs} is not supplied.
+#' }
+#'
+#' If \code{params = NULL} (the default), coefficients are simulated from a
+#' multivariate normal prior with \code{coef_means = rep(0, n_coef)} and a
+#' default \code{Sigma}. The dimension \code{n_coef} and the required coefficient
+#' names depend on the requested model type:
+#' \describe{
+#' \item{Always}{\code{det_intercept}, \code{det_slope_unit}}
+#' \item{If \code{rep_constant = FALSE}}{\code{det_slope_visit}}
+#' \item{If \code{n_season == 1} or \code{multi_init == "explicit"}}{\code{occ_intercept}}
+#' \item{If \code{n_season == 1} or \code{multi_init == "explicit"}, and \code{augmented = FALSE}}{\code{occ_slope_unit}}
+#' \item{If \code{n_season > 1}}{\code{col_intercept}, \code{col_slope_unit}}
+#' \item{If \code{multiseason == "colex"}}{\code{ex_intercept}, \code{ex_slope_unit}}
+#' \item{If \code{multiseason == "autologistic"}}{\code{auto_intercept}, \code{auto_slope_unit}}
+#' }
+#'
+#' Notes:
+#' \itemize{
+#' \item When \code{augmented = TRUE}, \code{occ_slope_unit} is not used and
+#' occupancy is simulated with no covariate effect.
+#' \item Any names in \code{params} other than \code{coefs}, \code{coef_means},
+#' and \code{Sigma} are rejected with an error.
+#' }
+#' @param covariates A named list of realized covariate values to use in the
+#' simulation. Supported names are:
+#' \itemize{
+#' \item \code{uc1}: numeric vector of length \code{n_pt} giving the unit-level covariate.
+#' \item \code{ec1}: numeric vector of length \code{n_pt * n_season * n_rep}
+#' giving the event-level covariate (ignored if \code{rep_constant = TRUE}).
+#' }
+#' If \code{NULL} (the default), covariates are simulated internally from
+#' standard normal distributions.
+#' \code{ec1} is assumed not to vary by species.
#' @param seed random seed. NULL uses (and updates) the existing RNG state. Other values
#' do not update the global RNG state.
#' @param ragged_rep logical: create data with variable (TRUE) or constant
@@ -80,7 +119,7 @@ simulate_flocker_data <- function(
)
assertthat::assert_that(
is.null(multi_init) | isTRUE(multi_init %in% c("explicit", "equilibrium")),
- msg = "multiseason must be NULL, 'explicit', or 'equilibrium'"
+ msg = "multi_init must be NULL, 'explicit', or 'equilibrium'"
)
assertthat::assert_that(is_one_logical(augmented), msg = "augmented must be a single logical")
assertthat::assert_that(is_one_logical(rep_constant), msg = "rep_constant must be a single logical")
@@ -160,6 +199,72 @@ sfd <- function(
# If coefs not specified directly, simulate from the prior (either passed via
# params or otherwise the default prior).
n_coef <- length(coef_names)
+
+ np <- names(params)
+
+ # Disallow unknown params keys (they don't affect simulation)
+ allowed_keys <- c("coefs", "coef_means", "Sigma")
+ unknown_keys <- setdiff(np, allowed_keys)
+ if (length(unknown_keys) > 0) {
+ stop(
+ "simulate_flocker_data(): 'params' contains unsupported names that are ignored: ",
+ paste(unknown_keys, collapse = ", "),
+ call. = FALSE
+ )
+ }
+
+ if ("coefs" %in% np) {
+ if ("coef_means" %in% np || "Sigma" %in% np) {
+ stop(
+ "simulate_flocker_data(): if params$coefs is provided, do not also provide ",
+ "params$coef_means or params$Sigma (they would be ignored).",
+ call. = FALSE
+ )
+ }
+
+ assertthat::assert_that(
+ is.data.frame(params$coefs),
+ msg = "params$coefs, if provided, must be a data.frame"
+ )
+ assertthat::assert_that(
+ nrow(params$coefs) == n_sp,
+ msg = "params$coefs, if provided, must have n_sp rows (one per species)"
+ )
+ assertthat::assert_that(
+ all(vapply(params$coefs, is.numeric, logical(1))),
+ msg = "params$coefs must have all-numeric columns"
+ )
+ assertthat::assert_that(
+ setequal(colnames(params$coefs), coef_names),
+ msg = "params$coefs must contain exactly the required coefficient names"
+ )
+
+ params$coefs <- params$coefs[, coef_names, drop = FALSE]
+ } else {
+ # If coefs absent, validate coef_means/Sigma if present
+ if ("coef_means" %in% np) {
+ assertthat::assert_that(
+ length(params$coef_means) == length(coef_names),
+ msg = "params$coef_means must have length equal to the number of coefficients"
+ )
+ }
+ if ("Sigma" %in% np) {
+ assertthat::assert_that(
+ is.matrix(params$Sigma),
+ nrow(params$Sigma) == length(coef_names),
+ ncol(params$Sigma) == length(coef_names),
+ msg = "params$Sigma must be an n_coef x n_coef matrix"
+ )
+ assertthat::assert_that(
+ isTRUE(all.equal(params$Sigma, t(params$Sigma))),
+ msg = "params$Sigma must be symmetric"
+ )
+
+ # could add a check for PSD here, but for now we'll rely on errors from MASS::mvnorm
+ }
+ }
+
+
np <- names(params)
if (!("coefs" %in% np)) { # coef values not specified directly
if (!("coef_means" %in% np)) {
@@ -194,9 +299,15 @@ sfd <- function(
names(params$coefs) <- coef_names
}
+
+ # Ensure all required coefficient names are present (order irrelevant)
assertthat::assert_that(
- identical(names(params$coefs), coef_names)
+ setequal(names(params$coefs), coef_names),
+ msg = "params$coefs must contain exactly the required coefficient names"
)
+
+ # Reorder columns into canonical order
+ params$coefs <- params$coefs[, coef_names, drop = FALSE]
coefs <- params$coefs
# Create covariates (currently assume that unit covariates are the same
@@ -209,6 +320,13 @@ sfd <- function(
covariates <- list(
uc1 = stats::rnorm(n_pt)[unit_backbone$id_point]
)
+ } else {
+ assertthat::assert_that(is.list(covariates), msg = "covariates must be a named list")
+ assertthat::assert_that("uc1" %in% names(covariates), msg = "covariates must include uc1")
+ assertthat::assert_that(
+ is.numeric(covariates$uc1) && length(covariates$uc1) == n_pt,
+ msg = "covariates$uc1 must be numeric of length n_pt"
+ )
}
unit_backbone$uc1 <- covariates$uc1
@@ -285,7 +403,12 @@ sfd <- function(
coefs$det_slope_unit[as.integer(visit_full$species)] * visit_full$uc1
if (!rep_constant) {
- if (!("ec1" %in% names(covariates))) {
+ if ("ec1" %in% names(covariates)) {
+ assertthat::assert_that(
+ is.numeric(covariates$ec1) && length(covariates$ec1) == n_pt * n_season * n_rep,
+ msg = "covariates$ec1 must be numeric of length n_pt * n_season * n_rep"
+ )
+ } else {
covariates$ec1 <- stats::rnorm(n_pt*n_season*n_rep)[ # we don't allow ec1 to vary by species;
# this allows the covariate to play nicely with augmented models.
@@ -344,7 +467,7 @@ sfd <- function(
out <- list(obs = remove_rownames(obs),
unit_covs = remove_rownames(unit_covs),
- event_covs = lapply(event_covs, remove_rownames) # this yields an empty list if event_covs is NULL
+ event_covs = if (is.null(event_covs)) NULL else lapply(event_covs, remove_rownames)
)
} else if (augmented) {
visit_full$id_unit <- interaction(visit_full$id_point, visit_full$species)
@@ -379,7 +502,7 @@ sfd <- function(
out <- list(
obs = remove_rownames(abind::abind(obs_temp, along = 3)),
unit_covs = remove_rownames(unit_covs),
- event_covs = lapply(event_covs, remove_rownames)
+ event_covs = if (is.null(event_covs)) NULL else lapply(event_covs, remove_rownames)
)
} else if (n_season > 1) {
@@ -405,7 +528,7 @@ sfd <- function(
}
if(rep_constant){
- event_covs <- list()
+ event_covs <- NULL
} else {
event_covs <- list(ec1 = remove_rownames(abind::abind(events_temp, along = 3)))
}
diff --git a/inst/cache_vignettes.R b/inst/cache_vignettes.R
index 2ea51ff..7bd982e 100644
--- a/inst/cache_vignettes.R
+++ b/inst/cache_vignettes.R
@@ -1,37 +1,119 @@
-# This script creates .Rmd vignettes that include pre-computed R output so that
-# whent they get built on CI it's a lightweight operation.
+if (!file.exists("DESCRIPTION")) {
+ stop("Run this script from the package root (where DESCRIPTION lives).")
+}
-# This script should be run every time there is an update to the vignettes
-# or an update to the package for which the vignettes are a desirable
-# part of the test harness (but ideally the actual test harness should be
-# sufficient for testing, and we shouldn't rely on vignettes).
+suppressPackageStartupMessages({
+ library(withr)
+ library(knitr)
+ library(rmarkdown)
+})
-# Edits that touch only the text of the vignettes, and not the R computation,
-# can be made with extreme care by changing both the .Rmd.orig files and the
-# .Rmd files with computation cached.
+find_pkg_root <- function() {
+ path <- normalizePath(getwd(), winslash = "/", mustWork = TRUE)
+ repeat {
+ if (file.exists(file.path(path, "DESCRIPTION"))) return(path)
+ parent <- normalizePath(file.path(path, ".."), winslash = "/", mustWork = TRUE)
+ if (identical(parent, path)) stop("Could not find package root (no DESCRIPTION found).")
+ path <- parent
+ }
+}
-old_wd <- getwd()
+pkg_root <- find_pkg_root()
-setwd(paste0(dirname(rstudioapi::getActiveDocumentContext()$path), "/.."))
-
-knitr::knit(
- "vignettes/augmented_models.Rmd.orig",
- output = "vignettes/augmented_models.Rmd"
+vigs <- list(
+ c("vignettes/augmented_models.Rmd.orig", "vignettes/augmented_models.Rmd"),
+ c("vignettes/flocker_tutorial.Rmd.orig", "vignettes/flocker_tutorial.Rmd"),
+ c("vignettes/nonlinear_models.Rmd.orig", "vignettes/nonlinear_models.Rmd"),
+ c("vignettes/articles/sbc.Rmd.orig", "vignettes/articles/sbc.Rmd"),
+ c("vignettes/articles/sbc_multi.Rmd.orig", "vignettes/articles/sbc_multi.Rmd"),
+ c("vignettes/articles/sbc_aug.Rmd.orig", "vignettes/articles/sbc_aug.Rmd")
)
-knitr::knit(
- "vignettes/flocker_tutorial.Rmd.orig",
- output = "vignettes/flocker_tutorial.Rmd"
-)
+with_dir(pkg_root, {
+ # Ensure relative paths in chunks resolve from package root
+ old_root <- knitr::opts_knit$get("root.dir")
+ on.exit(knitr::opts_knit$set(root.dir = old_root), add = TRUE)
+ knitr::opts_knit$set(root.dir = pkg_root)
+
+ knit_one <- function(infile, outfile, fig_path = NULL) {
+ dir.create(dirname(outfile), recursive = TRUE, showWarnings = FALSE)
+
+ if (!is.null(fig_path)) {
+ dir.create(fig_path, recursive = TRUE, showWarnings = FALSE)
+ old_fig <- knitr::opts_chunk$get("fig.path")
+ on.exit(knitr::opts_chunk$set(fig.path = old_fig), add = TRUE)
+ knitr::opts_chunk$set(fig.path = paste0(fig_path, "/"))
+ }
+
+ message("Knitting ", infile, " -> ", outfile)
+
+ env <- new.env(parent = globalenv())
+ env$params <- rmarkdown::yaml_front_matter(infile)$params
+
+ knitr::knit(infile, output = outfile, envir = env)
+ }
+
+ for(i in 1:3){
+ knit_one(vigs[[i]][[1]], vigs[[i]][[2]])
+ }
+ for(i in 4:6){
+ # SBC: force figures into man/figures/sbc_vignette
+ knit_one(vigs[[i]][[1]], vigs[[i]][[2]], fig_path = "man/figures/sbc_vignette")
+ }
-knitr::knit(
- "vignettes/nonlinear_models.Rmd.orig",
- output = "vignettes/nonlinear_models.Rmd"
-)
+})
-knitr::knit(
- "vignettes/sbc.Rmd.orig",
- output = "vignettes/sbc.Rmd"
-)
+# ---- Post-process cached articles to fix figure paths (Pandoc resolves relative
+# paths from the input document directory, so vignettes/articles/* need ../man/...) ----
+
+fix_article_fig_paths <- function(rmd_path,
+ from = "man/figures/sbc_vignette/",
+ to = "../../man/figures/sbc_vignette/") {
+ # Only makes changes if the file exists and actually contains the "from" string,
+ # and avoids double-prepending if it already has "../".
+ stopifnot(length(rmd_path) == 1)
+
+ if (!file.exists(rmd_path)) {
+ warning("Skipping missing file: ", rmd_path)
+ return(invisible(FALSE))
+ }
+
+ lines <- readLines(rmd_path, warn = FALSE)
+
+ # 1) HTML:
+ # Replace src="man/figures/..." but NOT src="../man/figures/..."
+ lines2 <- gsub(
+ pattern = paste0('src="', from),
+ replacement = paste0('src="', to),
+ x = lines,
+ fixed = TRUE
+ )
+
+ # 2) Markdown: 
+ # Replace (man/figures/...) but NOT (../man/figures/...)
+ lines2 <- gsub(
+ pattern = paste0("](", from),
+ replacement = paste0("](", to),
+ x = lines2,
+ fixed = TRUE
+ )
+
+ changed <- !identical(lines, lines2)
+ if (changed) {
+ writeLines(lines2, rmd_path)
+ message("Rewrote figure paths in ", rmd_path)
+ } else {
+ message("No figure paths to rewrite in ", rmd_path)
+ }
+
+ invisible(changed)
+}
+
+# Apply to the cached article outputs (the .Rmd files, not the .orig)
+article_outfiles <- vapply(vigs[4:6], `[[`, character(1), 2)
+
+with_dir(pkg_root, {
+ for (f in article_outfiles) fix_article_fig_paths(f)
+})
-setwd(old_wd)
+message("Done.")
\ No newline at end of file
diff --git a/man/figures/sbc_vignette/data-augmented-1.png b/man/figures/sbc_vignette/data-augmented-1.png
index 16106a9..ca96d18 100644
Binary files a/man/figures/sbc_vignette/data-augmented-1.png and b/man/figures/sbc_vignette/data-augmented-1.png differ
diff --git a/man/figures/sbc_vignette/data-augmented-2.png b/man/figures/sbc_vignette/data-augmented-2.png
index e4760ca..5e1c03f 100644
Binary files a/man/figures/sbc_vignette/data-augmented-2.png and b/man/figures/sbc_vignette/data-augmented-2.png differ
diff --git a/man/figures/sbc_vignette/data-augmented-3.png b/man/figures/sbc_vignette/data-augmented-3.png
index 4dc8fb4..257aaf9 100644
Binary files a/man/figures/sbc_vignette/data-augmented-3.png and b/man/figures/sbc_vignette/data-augmented-3.png differ
diff --git a/man/figures/sbc_vignette/multi-auto-eq-1.png b/man/figures/sbc_vignette/multi-auto-eq-1.png
index 754c9b9..21aeac7 100644
Binary files a/man/figures/sbc_vignette/multi-auto-eq-1.png and b/man/figures/sbc_vignette/multi-auto-eq-1.png differ
diff --git a/man/figures/sbc_vignette/multi-auto-eq-2.png b/man/figures/sbc_vignette/multi-auto-eq-2.png
index d19971f..520bd8a 100644
Binary files a/man/figures/sbc_vignette/multi-auto-eq-2.png and b/man/figures/sbc_vignette/multi-auto-eq-2.png differ
diff --git a/man/figures/sbc_vignette/multi-auto-eq-3.png b/man/figures/sbc_vignette/multi-auto-eq-3.png
index 4185898..ec61717 100644
Binary files a/man/figures/sbc_vignette/multi-auto-eq-3.png and b/man/figures/sbc_vignette/multi-auto-eq-3.png differ
diff --git a/man/figures/sbc_vignette/multi-auto-ex-1.png b/man/figures/sbc_vignette/multi-auto-ex-1.png
index 2ecbfc5..2eed728 100644
Binary files a/man/figures/sbc_vignette/multi-auto-ex-1.png and b/man/figures/sbc_vignette/multi-auto-ex-1.png differ
diff --git a/man/figures/sbc_vignette/multi-auto-ex-2.png b/man/figures/sbc_vignette/multi-auto-ex-2.png
index 85916d6..94ae7a7 100644
Binary files a/man/figures/sbc_vignette/multi-auto-ex-2.png and b/man/figures/sbc_vignette/multi-auto-ex-2.png differ
diff --git a/man/figures/sbc_vignette/multi-auto-ex-3.png b/man/figures/sbc_vignette/multi-auto-ex-3.png
index c4fd058..1f1d7a2 100644
Binary files a/man/figures/sbc_vignette/multi-auto-ex-3.png and b/man/figures/sbc_vignette/multi-auto-ex-3.png differ
diff --git a/man/figures/sbc_vignette/multi-colex-eq-1.png b/man/figures/sbc_vignette/multi-colex-eq-1.png
index 4a4d038..e368a7a 100644
Binary files a/man/figures/sbc_vignette/multi-colex-eq-1.png and b/man/figures/sbc_vignette/multi-colex-eq-1.png differ
diff --git a/man/figures/sbc_vignette/multi-colex-eq-2.png b/man/figures/sbc_vignette/multi-colex-eq-2.png
index e69677c..2b37ee1 100644
Binary files a/man/figures/sbc_vignette/multi-colex-eq-2.png and b/man/figures/sbc_vignette/multi-colex-eq-2.png differ
diff --git a/man/figures/sbc_vignette/multi-colex-eq-3.png b/man/figures/sbc_vignette/multi-colex-eq-3.png
index c13d51f..916055f 100644
Binary files a/man/figures/sbc_vignette/multi-colex-eq-3.png and b/man/figures/sbc_vignette/multi-colex-eq-3.png differ
diff --git a/man/figures/sbc_vignette/multi-colex-ex-1.png b/man/figures/sbc_vignette/multi-colex-ex-1.png
index c18217b..b23aea3 100644
Binary files a/man/figures/sbc_vignette/multi-colex-ex-1.png and b/man/figures/sbc_vignette/multi-colex-ex-1.png differ
diff --git a/man/figures/sbc_vignette/multi-colex-ex-2.png b/man/figures/sbc_vignette/multi-colex-ex-2.png
index 47db5fe..5636b82 100644
Binary files a/man/figures/sbc_vignette/multi-colex-ex-2.png and b/man/figures/sbc_vignette/multi-colex-ex-2.png differ
diff --git a/man/figures/sbc_vignette/multi-colex-ex-3.png b/man/figures/sbc_vignette/multi-colex-ex-3.png
index 1b94ceb..7f5de85 100644
Binary files a/man/figures/sbc_vignette/multi-colex-ex-3.png and b/man/figures/sbc_vignette/multi-colex-ex-3.png differ
diff --git a/man/figures/sbc_vignette/rep-constant-1.png b/man/figures/sbc_vignette/rep-constant-1.png
index 363fead..7156645 100644
Binary files a/man/figures/sbc_vignette/rep-constant-1.png and b/man/figures/sbc_vignette/rep-constant-1.png differ
diff --git a/man/figures/sbc_vignette/rep-constant-2.png b/man/figures/sbc_vignette/rep-constant-2.png
index 8475447..4b31ea8 100644
Binary files a/man/figures/sbc_vignette/rep-constant-2.png and b/man/figures/sbc_vignette/rep-constant-2.png differ
diff --git a/man/figures/sbc_vignette/rep-constant-3.png b/man/figures/sbc_vignette/rep-constant-3.png
index 9fd124f..3f9ecfe 100644
Binary files a/man/figures/sbc_vignette/rep-constant-3.png and b/man/figures/sbc_vignette/rep-constant-3.png differ
diff --git a/man/figures/sbc_vignette/rep-varying-1.png b/man/figures/sbc_vignette/rep-varying-1.png
index c1294d3..ef1bd52 100644
Binary files a/man/figures/sbc_vignette/rep-varying-1.png and b/man/figures/sbc_vignette/rep-varying-1.png differ
diff --git a/man/figures/sbc_vignette/rep-varying-2.png b/man/figures/sbc_vignette/rep-varying-2.png
index c5c402e..650441a 100644
Binary files a/man/figures/sbc_vignette/rep-varying-2.png and b/man/figures/sbc_vignette/rep-varying-2.png differ
diff --git a/man/figures/sbc_vignette/rep-varying-3.png b/man/figures/sbc_vignette/rep-varying-3.png
index 98a0a69..794f800 100644
Binary files a/man/figures/sbc_vignette/rep-varying-3.png and b/man/figures/sbc_vignette/rep-varying-3.png differ
diff --git a/man/simulate_flocker_data.Rd b/man/simulate_flocker_data.Rd
index 37dc74f..9fcb2af 100644
--- a/man/simulate_flocker_data.Rd
+++ b/man/simulate_flocker_data.Rd
@@ -53,15 +53,54 @@ be simulated without any covariate influence on occupancy.}
\item{rep_constant}{logical: create data with unit covariates only (TRUE)
or data that includes event covariates (FALSE)}
-\item{params}{a named list containing of parameter values to use in simulation.
-Any required parameters whose names are not in this list will be assigned their
-default values. To see the parameter names and structures required, run with
-`params = NULL` (the default) and examine the `$params` element of the output.}
+\item{params}{A named list controlling the coefficients used to generate the
+ latent occupancy state and detections. Supported names are:
+ \itemize{
+ \item \code{coefs}: A numeric data.frame of per-species coefficient
+ values. Must have \code{n_sp} rows and
+ one column for each coefficient required by the requested model type.
+ Column names must match the required set (order does not matter).
+ If supplied, \code{coef_means} and \code{Sigma} must not be supplied.
+ \item \code{coef_means}: Numeric vector of length \code{n_coef} giving the
+ mean of the multivariate normal distribution used to simulate per-species
+ coefficients when \code{coefs} is not supplied.
+ \item \code{Sigma}: Numeric \code{n_coef x n_coef} symmetric covariance
+ matrix for the multivariate normal distribution used to simulate per-species
+ coefficients when \code{coefs} is not supplied.
+ }
-\item{covariates}{a dataframe of covariate values to use in simulation, or
-NULL to simulate values. To see the covariate names and structures required,
-run with `covariates = NULL` (the default) and examine the `$covariates` element
-of the output.}
+ If \code{params = NULL} (the default), coefficients are simulated from a
+ multivariate normal prior with \code{coef_means = rep(0, n_coef)} and a
+ default \code{Sigma}. The dimension \code{n_coef} and the required coefficient
+ names depend on the requested model type:
+ \describe{
+ \item{Always}{\code{det_intercept}, \code{det_slope_unit}}
+ \item{If \code{rep_constant = FALSE}}{\code{det_slope_visit}}
+ \item{If \code{n_season == 1} or \code{multi_init == "explicit"}}{\code{occ_intercept}}
+ \item{If \code{n_season == 1} or \code{multi_init == "explicit"}, and \code{augmented = FALSE}}{\code{occ_slope_unit}}
+ \item{If \code{n_season > 1}}{\code{col_intercept}, \code{col_slope_unit}}
+ \item{If \code{multiseason == "colex"}}{\code{ex_intercept}, \code{ex_slope_unit}}
+ \item{If \code{multiseason == "autologistic"}}{\code{auto_intercept}, \code{auto_slope_unit}}
+ }
+
+ Notes:
+ \itemize{
+ \item When \code{augmented = TRUE}, \code{occ_slope_unit} is not used and
+ occupancy is simulated with no covariate effect.
+ \item Any names in \code{params} other than \code{coefs}, \code{coef_means},
+ and \code{Sigma} are rejected with an error.
+ }}
+
+\item{covariates}{A named list of realized covariate values to use in the
+simulation. Supported names are:
+\itemize{
+ \item \code{uc1}: numeric vector of length \code{n_pt} giving the unit-level covariate.
+ \item \code{ec1}: numeric vector of length \code{n_pt * n_season * n_rep}
+ giving the event-level covariate (ignored if \code{rep_constant = TRUE}).
+}
+If \code{NULL} (the default), covariates are simulated internally from
+standard normal distributions.
+\code{ec1} is assumed not to vary by species.}
\item{seed}{random seed. NULL uses (and updates) the existing RNG state. Other values
do not update the global RNG state.}
diff --git a/tests/testthat/test-simulate_flocker_data.R b/tests/testthat/test-simulate_flocker_data.R
new file mode 100644
index 0000000..b02ff92
--- /dev/null
+++ b/tests/testthat/test-simulate_flocker_data.R
@@ -0,0 +1,233 @@
+expect_named <- function(x, nm) {
+ testthat::expect_true(setequal(names(x), nm))
+}
+
+expect_array_dim <- function(x, d) {
+ testthat::expect_true(is.array(x))
+ testthat::expect_equal(dim(x), d)
+}
+
+test_that("single-season rep_constant shapes are correct", {
+ fd <- simulate_flocker_data(n_pt = 7, n_sp = 3, n_rep = 4, rep_constant = TRUE, seed = 1)
+
+ expect_true(is.matrix(fd$obs))
+ expect_equal(dim(fd$obs), c(7*3, 4))
+
+ expect_true(is.data.frame(fd$unit_covs))
+ expect_named(fd$unit_covs, c("uc1","species"))
+ expect_equal(nrow(fd$unit_covs), 7*3)
+
+ expect_true(is.null(fd$event_covs))
+ expect_true(is.list(fd$params))
+ expect_true(is.list(fd$covariates))
+})
+
+test_that("single-season rep_varying includes event covariates with correct shapes", {
+ fd <- simulate_flocker_data(n_pt = 5, n_sp = 2, n_rep = 3, rep_constant = FALSE, seed = 2)
+
+ expect_true(is.matrix(fd$obs))
+ expect_equal(dim(fd$obs), c(5*2, 3))
+
+ expect_true(is.list(fd$event_covs))
+ expect_true("ec1" %in% names(fd$event_covs))
+ expect_true(is.matrix(fd$event_covs$ec1))
+ expect_equal(dim(fd$event_covs$ec1), c(5*2, 3))
+})
+
+test_that("seed produces deterministic output", {
+ a <- simulate_flocker_data(n_pt=6, n_sp=2, n_rep=3, seed=123)
+ b <- simulate_flocker_data(n_pt=6, n_sp=2, n_rep=3, seed=123)
+
+ expect_identical(a$obs, b$obs)
+ expect_identical(a$unit_covs, b$unit_covs)
+ expect_identical(a$event_covs, b$event_covs)
+ expect_identical(a$params$coefs, b$params$coefs)
+})
+
+test_that("non-NULL seed does not advance global RNG", {
+ set.seed(999)
+ x1 <- runif(1)
+
+ simulate_flocker_data(n_pt=3, n_sp=1, n_rep=2, seed=1)
+
+ x2 <- runif(1)
+
+ set.seed(999)
+ y1 <- runif(1)
+ y2 <- runif(1)
+
+ expect_identical(x1, y1)
+ expect_identical(x2, y2)
+})
+
+test_that("NULL seed advances global RNG", {
+ set.seed(999)
+ x1 <- runif(1)
+
+ simulate_flocker_data(n_pt=3, n_sp=1, n_rep=2, seed=NULL)
+
+ x2 <- runif(1)
+
+ set.seed(999)
+ y1 <- runif(1)
+ y2 <- runif(1)
+
+ expect_identical(x1, y1)
+ expect_false(identical(x2, y2))
+})
+
+test_that("multiseason/multi_init constraints are enforced", {
+ expect_error(simulate_flocker_data(n_season=1, multiseason="colex", multi_init=NULL))
+ expect_error(simulate_flocker_data(n_season=1, multiseason=NULL, multi_init="explicit"))
+
+ expect_error(simulate_flocker_data(n_season=2, multiseason=NULL, multi_init="explicit"))
+ expect_error(simulate_flocker_data(n_season=2, multiseason="colex", multi_init=NULL))
+})
+
+test_that("augmented constraints are enforced", {
+ expect_error(simulate_flocker_data(n_season=2, augmented=TRUE, multiseason="colex", multi_init="explicit"))
+})
+
+test_that("params rejects unknown keys", {
+ expect_error(simulate_flocker_data(params=list(zzz=1), seed=1))
+})
+
+test_that("params$coefs validation works", {
+ expect_error(simulate_flocker_data(n_sp=2, params=list(coefs=matrix(1,2,2)), seed=1))
+
+ bad_rows <- data.frame(det_intercept=1, det_slope_unit=1, occ_intercept=1, occ_slope_unit=1)
+ expect_error(simulate_flocker_data(n_sp=2, params=list(coefs=bad_rows), seed=1))
+
+ bad_col <- data.frame(det_intercept=1, det_slope_unit="a", occ_intercept=1, occ_slope_unit=1)
+ expect_error(simulate_flocker_data(n_sp=1, params=list(coefs=bad_col), seed=1))
+
+ bad_names <- data.frame(a=1, b=1, c=1, d=1)
+ expect_error(simulate_flocker_data(n_sp=1, params=list(coefs=bad_names), seed=1))
+})
+
+test_that("cannot supply coefs with coef_means or Sigma", {
+ cf <- data.frame(det_intercept=0, det_slope_unit=0, occ_intercept=0, occ_slope_unit=0)
+ expect_error(simulate_flocker_data(n_sp=1, params=list(coefs=cf, coef_means=rep(0,4)), seed=1))
+ expect_error(simulate_flocker_data(n_sp=1, params=list(coefs=cf, Sigma=diag(4)), seed=1))
+})
+
+test_that("covariates validation works", {
+ expect_error(simulate_flocker_data(covariates=1, seed=1))
+ expect_error(simulate_flocker_data(covariates=list(), seed=1))
+ expect_error(simulate_flocker_data(n_pt=3, covariates=list(uc1=1:2), seed=1))
+
+ # ec1 length check only when rep_constant=FALSE
+ expect_error(simulate_flocker_data(n_pt=3, n_rep=2, n_season=1, rep_constant=FALSE,
+ covariates=list(uc1=rnorm(3), ec1=rnorm(3)), seed=1))
+})
+
+
+test_that("multiseason output shapes are correct", {
+ fd <- simulate_flocker_data(n_pt=4, n_sp=2, n_rep=3, n_season=5,
+ multiseason="colex", multi_init="explicit",
+ rep_constant=FALSE, seed=1)
+
+ expect_true(is.array(fd$obs))
+ expect_equal(dim(fd$obs), c(4*2, 3, 5))
+
+ expect_true(is.list(fd$unit_covs))
+ expect_length(fd$unit_covs, 5)
+ expect_true(all(vapply(fd$unit_covs, is.data.frame, logical(1))))
+ expect_equal(nrow(fd$unit_covs[[1]]), 8)
+
+ expect_true(is.list(fd$event_covs))
+ expect_true(is.array(fd$event_covs$ec1))
+ expect_equal(dim(fd$event_covs$ec1), c(8, 3, 5))
+})
+
+test_that("ragged_rep introduces missing visits", {
+ fd <- simulate_flocker_data(n_pt=10, n_sp=1, n_rep=6, ragged_rep=TRUE,
+ rep_constant=FALSE, seed=1)
+ expect_true(any(is.na(fd$obs)))
+ expect_true(any(is.na(fd$event_covs$ec1)))
+})
+
+test_that("missing_seasons introduces missing seasons", {
+ fd <- simulate_flocker_data(n_pt=10, n_sp=1, n_rep=3, n_season=4,
+ multiseason="colex", multi_init="explicit",
+ rep_constant=FALSE, missing_seasons=TRUE, seed=1)
+ # Should be some NA in obs
+ expect_true(any(is.na(fd$obs)))
+})
+
+test_that("augmented output is trimmed and has expected shapes", {
+ fd <- simulate_flocker_data(n_pt=30, n_sp=20, n_rep=4, augmented=TRUE,
+ rep_constant=FALSE, seed=2)
+
+ expect_true(is.array(fd$obs))
+ expect_equal(dim(fd$obs)[1:2], c(30, 4))
+ expect_true(dim(fd$obs)[3] <= 20) # trimmed, maybe equal but often less
+ expect_true(dim(fd$obs)[3] >= 1)
+
+ expect_true(is.data.frame(fd$unit_covs))
+ expect_named(fd$unit_covs, c("uc1"))
+ expect_equal(nrow(fd$unit_covs), 30)
+
+ expect_true(is.list(fd$event_covs))
+ expect_true(is.matrix(fd$event_covs$ec1))
+ expect_equal(dim(fd$event_covs$ec1), c(30, 4))
+})
+
+test_that("coef set is correct: single-season rep-constant", {
+ fd <- simulate_flocker_data(n_sp=2, rep_constant=TRUE, augmented=FALSE, seed=1)
+ expect_true(setequal(colnames(fd$params$coefs),
+ c("det_intercept","det_slope_unit","occ_intercept","occ_slope_unit")))
+})
+
+test_that("coef set is correct: multiseason colex equilibrium", {
+ fd <- simulate_flocker_data(n_sp=2, n_season=3, multiseason="colex", multi_init="equilibrium",
+ rep_constant=FALSE, seed=1)
+ expect_true(setequal(colnames(fd$params$coefs),
+ c("det_intercept","det_slope_unit","det_slope_visit",
+ "col_intercept","col_slope_unit","ex_intercept","ex_slope_unit")))
+})
+
+test_that("coef set is correct: multiseason autologistic explicit", {
+ fd <- simulate_flocker_data(n_sp=2, n_season=3, multiseason="autologistic", multi_init="explicit",
+ rep_constant=FALSE, seed=1)
+ expect_true(setequal(colnames(fd$params$coefs),
+ c("det_intercept","det_slope_unit","det_slope_visit",
+ "occ_intercept","occ_slope_unit",
+ "col_intercept","col_slope_unit",
+ "auto_intercept","auto_slope_unit")))
+})
+
+test_that("coef set is correct: multiseason autologistic equilibrium", {
+ fd <- simulate_flocker_data(n_sp=2, n_season=3, multiseason="autologistic", multi_init="equilibrium",
+ rep_constant=FALSE, seed=1)
+ expect_true(setequal(colnames(fd$params$coefs),
+ c("det_intercept","det_slope_unit","det_slope_visit",
+ "col_intercept","col_slope_unit",
+ "auto_intercept","auto_slope_unit")))
+})
+
+test_that("sfd validates covariates$uc1", {
+ expect_error(flocker:::sfd(n_rep=2,n_pt=3,n_sp=1,n_season=1,
+ multiseason=NULL,multi_init=NULL,augmented=FALSE,rep_constant=TRUE,
+ params=list(),
+ covariates=list(uc1=1:2),
+ ragged_rep=FALSE,missing_seasons=FALSE))
+})
+
+test_that("simulate_flocker_data runs across key configurations", {
+ configs <- list(
+ list(n_season=1, rep_constant=TRUE, augmented=FALSE),
+ list(n_season=1, rep_constant=FALSE, augmented=FALSE),
+ list(n_season=1, rep_constant=FALSE, augmented=TRUE),
+ list(n_season=3, rep_constant=FALSE, multiseason="colex", multi_init="explicit"),
+ list(n_season=3, rep_constant=FALSE, multiseason="colex", multi_init="equilibrium"),
+ list(n_season=3, rep_constant=FALSE, multiseason="autologistic", multi_init="explicit"),
+ list(n_season=3, rep_constant=FALSE, multiseason="autologistic", multi_init="equilibrium")
+ )
+
+ for (cfg in configs) {
+ fd <- do.call(simulate_flocker_data, c(list(n_pt=5,n_sp=3,n_rep=2,seed=1), cfg))
+ expect_true(is.list(fd))
+ expect_true("obs" %in% names(fd))
+ }
+})
\ No newline at end of file
diff --git a/vignettes/articles/sbc.Rmd b/vignettes/articles/sbc.Rmd
index 9fc07d8..ec3116f 100644
--- a/vignettes/articles/sbc.Rmd
+++ b/vignettes/articles/sbc.Rmd
@@ -1,32 +1,29 @@
---
-title: "SBC for flocker models"
+title: "SBC for single-season flocker models"
author: "Jacob Socolar"
-date: "2023-10-22"
+date: "2026-02-24"
output: rmarkdown::html_vignette
vignette: >
- %\VignetteIndexEntry{SBC for flocker models}
+ %\VignetteIndexEntry{SBC for single-season flocker models}
%\VignetteEngine{knitr::rmarkdown}
%\VignetteEncoding{UTF-8}
params:
n_sims: 1000
n_sites: 200
- n_sims_augmented: 200
- n_sites_augmented: 50
- n_pseudospecies_augmented: 50
---
-```r
-params <- list(
- n_sims = 1000,
- n_sites = 200,
- n_sims_augmented = 200,
- n_sites_augmented = 50,
- n_pseudospecies_augmented = 50
-)
+
+
+### Rmd parameters
+
+```
+## Parameter value
+## 1 n_sims 1000
+## 2 n_sites 200
```
-```r
+``` r
library(flocker)
library(brms)
library(SBC)
@@ -35,8 +32,8 @@ set.seed(1)
## Overview
-This document performs simulation-based calibration for the models
-available in R package `flocker`. Here, our goal is to validate `flocker`'s
+This document is one of a series of simulation-based calibration exercieses for
+models available in R package `flocker`. Here, our goal is to validate `flocker`'s
data formatting, decoding, and likelihood implementations, and not `brms`'s
construction of the linear predictors.
@@ -50,11 +47,14 @@ occupancy, colonization, extinction and/or autologistic terms as applicable,
and one event covariate that affects detection only (for all models except the
rep-constant).
+In addition to this article on single-season models, articles are available
+showing SBC for multi-season models and data-augmented multi-species models.
+
## Single-season
### Rep-constant
-```r
+``` r
# make the stancode
model_name <- paste0(tempdir(), "/sbc_rep_constant_model.stan")
fd <- simulate_flocker_data(
@@ -139,23 +139,32 @@ rep_constant_results <- compute_SBC(rep_constant_dataset, rep_constant_backend)
plot_ecdf(rep_constant_results)
```
-
+
-```r
+``` r
plot_rank_hist(rep_constant_results)
```
-
+
-```r
+``` r
plot_ecdf_diff(rep_constant_results)
```
-
+
### Rep-varying
-```r
+``` r
# make the stancode
model_name <- paste0(tempdir(), "/sbc_rep_varying_model.stan")
fd <- simulate_flocker_data(
@@ -241,7 +250,7 @@ rep_varying_results <- compute_SBC(rep_varying_dataset, rep_varying_backend)
```
```
-## - 3 (0%) fits had at least one Rhat > 1.01. Largest Rhat was 1.016.
+## - 1 (0%) fits had maximum Rhat > 1.01. Maximum Rhat was 1.01.
```
```
@@ -250,733 +259,30 @@ rep_varying_results <- compute_SBC(rep_varying_dataset, rep_varying_backend)
## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.
```
-```r
+``` r
plot_ecdf(rep_varying_results)
```
-
+
-```r
+``` r
plot_rank_hist(rep_varying_results)
```
-
+
-```r
+``` r
plot_ecdf_diff(rep_varying_results)
```
-
-
-
-## Multi-season
-`flocker` fits multi-season models that parameterize the dynamics using colonization/extinction or autologistic specifications, and that parameterize
-the initial occupancy state using explicit and equilibrium parameterizations,
-for a total of four classes of multi-season model. We validate each class.
-
-### Colonization-extinction, explicit initial occupancy
-
-```r
-# make the stancode
-model_name <- paste0(tempdir(), "/sbc_colex_ex_model.stan")
-fd <- simulate_flocker_data(
- n_pt = params$n_sites, n_sp = 1, n_season = 4,
- params = list(
- coefs = data.frame(
- det_intercept = rnorm(1),
- det_slope_unit = rnorm(1),
- det_slope_visit = rnorm(1),
- occ_intercept = rnorm(1),
- occ_slope_unit = rnorm(1),
- col_intercept = rnorm(1),
- col_slope_unit = rnorm(1),
- ex_intercept = rnorm(1),
- ex_slope_unit = rnorm(1)
- )
- ),
- seed = NULL,
- rep_constant = FALSE,
- multiseason = "colex",
- multi_init = "explicit",
- ragged_rep = TRUE
-)
-flocker_data = make_flocker_data(
- fd$obs, fd$unit_covs, fd$event_covs,
- type = "multi", quiet = TRUE)
-
-scode <- flocker_stancode(
- f_occ = ~ 0 + Intercept + uc1,
- f_col = ~ 0 + Intercept + uc1,
- f_ex = ~ 0 + Intercept + uc1,
- f_det = ~ 0 + Intercept + uc1 + ec1,
- flocker_data = flocker_data,
- prior =
- brms::set_prior("std_normal()") +
- brms::set_prior("std_normal()", dpar = "occ") +
- brms::set_prior("std_normal()", dpar = "colo") +
- brms::set_prior("std_normal()", dpar = "ex"),
- multiseason = "colex",
- multi_init = "explicit",
- backend = "cmdstanr"
- )
-writeLines(scode, model_name)
-
-colex_ex_generator <- function(N){
- fd <- simulate_flocker_data(
- n_pt = params$n_sites, n_sp = 1, n_season = 4,
- params = list(
- det_intercept = rnorm(1),
- det_slope_unit = rnorm(1),
- det_slope_visit = rnorm(1),
- occ_intercept = rnorm(1),
- occ_slope_unit = rnorm(1),
- colo_intercept = rnorm(1),
- colo_slope_unit = rnorm(1),
- ex_intercept = rnorm(1),
- ex_slope_unit = rnorm(1)
- ),
- seed = NULL,
- rep_constant = FALSE,
- multiseason = "colex",
- multi_init = "explicit",
- ragged_rep = TRUE
- )
-
- flocker_data = make_flocker_data(
- fd$obs, fd$unit_covs, fd$event_covs,
- type = "multi", quiet = TRUE)
-
- # format for return
- list(
- variables = list(
- `b[1]` = fd$params$coefs$det_intercept,
- `b[2]` = fd$params$coefs$det_slope_unit,
- `b[3]` = fd$params$coefs$det_slope_visit,
- `b_occ[1]` = fd$params$coefs$occ_intercept,
- `b_occ[2]` = fd$params$coefs$occ_slope_unit,
- `b_colo[1]` = fd$params$coefs$col_intercept,
- `b_colo[2]` = fd$params$coefs$col_slope_unit,
- `b_ex[1]` = fd$params$coefs$ex_intercept,
- `b_ex[2]` = fd$params$coefs$ex_slope_unit
- ),
- generated = flocker_standata(
- f_occ = ~ 0 + Intercept + uc1,
- f_col = ~ 0 + Intercept + uc1,
- f_ex = ~ 0 + Intercept + uc1,
- f_det = ~ 0 + Intercept + uc1 + ec1,
- flocker_data = flocker_data,
- multiseason = "colex",
- multi_init = "explicit"
- )
- )
-}
-
-colex_ex_gen <- SBC_generator_function(
- colex_ex_generator,
- N = params$n_sites
- )
-colex_ex_dataset <- suppressMessages(
- generate_datasets(colex_ex_gen, params$n_sims)
-)
-
-colex_ex_backend <-
- SBC_backend_cmdstan_sample(
- cmdstanr::cmdstan_model(
- paste0(tempdir(), "/sbc_colex_ex_model.stan")
- )
- )
-
-colex_ex_results <- compute_SBC(colex_ex_dataset, colex_ex_backend)
-
-plot_ecdf(colex_ex_results)
-```
-
-
-
-```r
-plot_rank_hist(colex_ex_results)
-```
-
-
-
-```r
-plot_ecdf_diff(colex_ex_results)
-```
-
-
-
-### Colonization-extinction, equilibrium initial occupancy
-
-```r
-# make the stancode
-model_name <- paste0(tempdir(), "/sbc_colex_eq_model.stan")
-fd <- simulate_flocker_data(
- n_pt = params$n_sites, n_sp = 1, n_season = 4,
- params = list(
- det_intercept = rnorm(1),
- det_slope_unit = rnorm(1),
- det_slope_visit = rnorm(1),
- colo_intercept = rnorm(1),
- colo_slope_unit = rnorm(1),
- ex_intercept = rnorm(1),
- ex_slope_unit = rnorm(1)
- ),
- seed = NULL,
- rep_constant = FALSE,
- multiseason = "colex",
- multi_init = "equilibrium",
- ragged_rep = TRUE
-)
-flocker_data = make_flocker_data(
- fd$obs, fd$unit_covs, fd$event_covs,
- type = "multi", quiet = TRUE)
-
-scode <- flocker_stancode(
- f_col = ~ 0 + Intercept + uc1,
- f_ex = ~ 0 + Intercept + uc1,
- f_det = ~ 0 + Intercept + uc1 + ec1,
- flocker_data = flocker_data,
- prior =
- brms::set_prior("std_normal()") +
- brms::set_prior("std_normal()", dpar = "colo") +
- brms::set_prior("std_normal()", dpar = "ex"),
- multiseason = "colex",
- multi_init = "equilibrium",
- backend = "cmdstanr"
- )
-writeLines(scode, model_name)
-
-colex_eq_generator <- function(N){
- fd <- simulate_flocker_data(
- n_pt = params$n_sites, n_sp = 1, n_season = 4,
- params = list(
- det_intercept = rnorm(1),
- det_slope_unit = rnorm(1),
- det_slope_visit = rnorm(1),
- col_intercept = rnorm(1),
- col_slope_unit = rnorm(1),
- ex_intercept = rnorm(1),
- ex_slope_unit = rnorm(1)
- ),
- seed = NULL,
- rep_constant = FALSE,
- multiseason = "colex",
- multi_init = "equilibrium",
- ragged_rep = TRUE
- )
-
- flocker_data = make_flocker_data(
- fd$obs, fd$unit_covs, fd$event_covs,
- type = "multi", quiet = TRUE)
-
- # format for return
- list(
- variables = list(
- `b[1]` = fd$params$coefs$det_intercept,
- `b[2]` = fd$params$coefs$det_slope_unit,
- `b[3]` = fd$params$coefs$det_slope_visit,
- `b_colo[1]` = fd$params$coefs$col_intercept,
- `b_colo[2]` = fd$params$coefs$col_slope_unit,
- `b_ex[1]` = fd$params$coefs$ex_intercept,
- `b_ex[2]` = fd$params$coefs$ex_slope_unit
- ),
- generated = flocker_standata(
- f_col = ~ 0 + Intercept + uc1,
- f_ex = ~ 0 + Intercept + uc1,
- f_det = ~ 0 + Intercept + uc1 + ec1,
- flocker_data = flocker_data,
- multiseason = "colex",
- multi_init = "equilibrium"
- )
- )
-}
-
-colex_eq_gen <- SBC_generator_function(
- colex_eq_generator,
- N = params$n_sites
- )
-colex_eq_dataset <- suppressMessages(
- generate_datasets(colex_eq_gen, params$n_sims)
-)
-
-colex_eq_backend <-
- SBC_backend_cmdstan_sample(
- cmdstanr::cmdstan_model(
- paste0(tempdir(), "/sbc_colex_eq_model.stan")
- )
- )
-
-colex_eq_results <- compute_SBC(colex_eq_dataset, colex_eq_backend)
-```
-
-```
-## - 1 (0%) fits had at least one Rhat > 1.01. Largest Rhat was 1.012.
-```
-
-```
-## - 853 (85%) fits had some steps rejected. Maximum number of rejections was 5.
-```
-
-```
-## Not all diagnostics are OK.
-## You can learn more by inspecting $default_diagnostics, $backend_diagnostics
-## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.
-```
-
-```r
-plot_ecdf(colex_eq_results)
-```
-
-
-
-```r
-plot_rank_hist(colex_eq_results)
-```
-
-
-
-```r
-plot_ecdf_diff(colex_eq_results)
-```
-
-
-
-### Autologistic, explicit initial occupancy
-
-```r
-# make the stancode
-model_name <- paste0(tempdir(), "/sbc_auto_ex_model.stan")
-fd <- simulate_flocker_data(
- n_pt = params$n_sites, n_sp = 1, n_season = 4,
- params = list(
- det_intercept = rnorm(1),
- det_slope_unit = rnorm(1),
- det_slope_visit = rnorm(1),
- occ_intercept = rnorm(1),
- occ_slope_unit = rnorm(1),
- col_intercept = rnorm(1),
- col_slope_unit = rnorm(1),
- auto_intercept = rnorm(1),
- auto_slope_unit = rnorm(1)
- ),
- seed = NULL,
- rep_constant = FALSE,
- multiseason = "autologistic",
- multi_init = "explicit",
- ragged_rep = TRUE
-)
-flocker_data = make_flocker_data(
- fd$obs, fd$unit_covs, fd$event_covs,
- type = "multi", quiet = TRUE)
-
-scode <- flocker_stancode(
- f_occ = ~ 0 + Intercept + uc1,
- f_col = ~ 0 + Intercept + uc1,
- f_auto = ~ 0 + Intercept + uc1,
- f_det = ~ 0 + Intercept + uc1 + ec1,
- flocker_data = flocker_data,
- prior =
- brms::set_prior("std_normal()") +
- brms::set_prior("std_normal()", dpar = "occ") +
- brms::set_prior("std_normal()", dpar = "colo") +
- brms::set_prior("std_normal()", dpar = "autologistic"),
- multiseason = "autologistic",
- multi_init = "explicit",
- backend = "cmdstanr"
- )
-writeLines(scode, model_name)
-
-auto_ex_generator <- function(N){
- fd <- simulate_flocker_data(
- n_pt = params$n_sites, n_sp = 1, n_season = 4,
- params = list(
- det_intercept = rnorm(1),
- det_slope_unit = rnorm(1),
- det_slope_visit = rnorm(1),
- occ_intercept = rnorm(1),
- occ_slope_unit = rnorm(1),
- colo_intercept = rnorm(1),
- colo_slope_unit = rnorm(1),
- auto_intercept = rnorm(1),
- auto_slope_unit = rnorm(1)
- ),
- seed = NULL,
- rep_constant = FALSE,
- multiseason = "autologistic",
- multi_init = "explicit",
- ragged_rep = TRUE
- )
-
- flocker_data = make_flocker_data(
- fd$obs, fd$unit_covs, fd$event_covs,
- type = "multi", quiet = TRUE)
-
- # format for return
- list(
- variables = list(
- `b[1]` = fd$params$coefs$det_intercept,
- `b[2]` = fd$params$coefs$det_slope_unit,
- `b[3]` = fd$params$coefs$det_slope_visit,
- `b_occ[1]` = fd$params$coefs$occ_intercept,
- `b_occ[2]` = fd$params$coefs$occ_slope_unit,
- `b_colo[1]` = fd$params$coefs$col_intercept,
- `b_colo[2]` = fd$params$coefs$col_slope_unit,
- `b_autologistic[1]` = fd$params$coefs$auto_intercept,
- `b_autologistic[2]` = fd$params$coefs$auto_slope_unit
- ),
- generated = flocker_standata(
- f_occ = ~ 0 + Intercept + uc1,
- f_col = ~ 0 + Intercept + uc1,
- f_auto = ~ 0 + Intercept + uc1,
- f_det = ~ 0 + Intercept + uc1 + ec1,
- flocker_data = flocker_data,
- multiseason = "autologistic",
- multi_init = "explicit"
- )
- )
-}
-
-auto_ex_gen <- SBC_generator_function(
- auto_ex_generator,
- N = params$n_sites
- )
-auto_ex_dataset <- suppressMessages(
- generate_datasets(auto_ex_gen, params$n_sims)
-)
-
-auto_ex_backend <-
- SBC_backend_cmdstan_sample(
- cmdstanr::cmdstan_model(
- paste0(tempdir(), "/sbc_auto_ex_model.stan")
- )
- )
-
-auto_ex_results <- compute_SBC(auto_ex_dataset, auto_ex_backend)
-
-plot_ecdf(auto_ex_results)
-```
-
-
-
-```r
-plot_rank_hist(auto_ex_results)
-```
-
-
-
-```r
-plot_ecdf_diff(auto_ex_results)
-```
-
-
-
-### Autologistic, equilibrium initial occupancy
-
-```r
-# make the stancode
-model_name <- paste0(tempdir(), "/sbc_auto_eq_model.stan")
-fd <- simulate_flocker_data(
- n_pt = params$n_sites, n_sp = 1, n_season = 4,
- params = list(
- det_intercept = rnorm(1),
- det_slope_unit = rnorm(1),
- det_slope_visit = rnorm(1),
- auto_intercept = rnorm(1),
- auto_slope_unit = rnorm(1)
- ),
- seed = NULL,
- rep_constant = FALSE,
- multiseason = "autologistic",
- multi_init = "equilibrium",
- ragged_rep = TRUE
-)
-flocker_data = make_flocker_data(
- fd$obs, fd$unit_covs, fd$event_covs,
- type = "multi", quiet = TRUE)
-
-scode <- flocker_stancode(
- f_col = ~ 0 + Intercept + uc1,
- f_auto = ~ 0 + Intercept + uc1,
- f_det = ~ 0 + Intercept + uc1 + ec1,
- flocker_data = flocker_data,
- prior =
- brms::set_prior("std_normal()") +
- brms::set_prior("std_normal()", dpar = "colo") +
- brms::set_prior("std_normal()", dpar = "autologistic"),
- multiseason = "autologistic",
- multi_init = "equilibrium",
- backend = "cmdstanr"
- )
-writeLines(scode, model_name)
-
-auto_eq_generator <- function(N){
- fd <- simulate_flocker_data(
- n_pt = params$n_sites, n_sp = 1, n_season = 4,
- params = list(
- det_intercept = rnorm(1),
- det_slope_unit = rnorm(1),
- det_slope_visit = rnorm(1),
- col_intercept = rnorm(1),
- col_slope_unit = rnorm(1),
- auto_intercept = rnorm(1),
- auto_slope_unit = rnorm(1)
- ),
- seed = NULL,
- rep_constant = FALSE,
- multiseason = "autologistic",
- multi_init = "equilibrium",
- ragged_rep = TRUE
- )
-
- flocker_data = make_flocker_data(
- fd$obs, fd$unit_covs, fd$event_covs,
- type = "multi", quiet = TRUE)
-
- # format for return
- list(
- variables = list(
- `b[1]` = fd$params$coefs$det_intercept,
- `b[2]` = fd$params$coefs$det_slope_unit,
- `b[3]` = fd$params$coefs$det_slope_visit,
- `b_colo[1]` = fd$params$coefs$col_intercept,
- `b_colo[2]` = fd$params$coefs$col_slope_unit,
- `b_autologistic[1]` = fd$params$coefs$auto_intercept,
- `b_autologistic[2]` = fd$params$coefs$auto_slope_unit
- ),
- generated = flocker_standata(
- f_col = ~ 0 + Intercept + uc1,
- f_auto = ~ 0 + Intercept + uc1,
- f_det = ~ 0 + Intercept + uc1 + ec1,
- flocker_data = flocker_data,
- multiseason = "autologistic",
- multi_init = "equilibrium"
- )
- )
-}
-
-auto_eq_gen <- SBC_generator_function(
- auto_eq_generator,
- N = params$n_sites
- )
-auto_eq_dataset <- suppressMessages(
- generate_datasets(auto_eq_gen, params$n_sims)
-)
-
-auto_eq_backend <-
- SBC_backend_cmdstan_sample(
- cmdstanr::cmdstan_model(
- paste0(tempdir(), "/sbc_auto_eq_model.stan")
- )
- )
-
-auto_eq_results <- compute_SBC(auto_eq_dataset, auto_eq_backend)
-```
-
-```
-## - 302 (30%) fits had some steps rejected. Maximum number of rejections was 3.
-```
-
-```
-## Not all diagnostics are OK.
-## You can learn more by inspecting $default_diagnostics, $backend_diagnostics
-## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.
-```
-
-```r
-plot_ecdf(auto_eq_results)
-```
-
-
-
-```r
-plot_rank_hist(auto_eq_results)
-```
-
-
-
-```r
-plot_ecdf_diff(auto_eq_results)
-```
-
-
-
-
-## Data-augmented
-
-```r
-# make the stancode
-model_name <- paste0(tempdir(), "/sbc_augmented_model.stan")
-
-omega <- boot::inv.logit(rnorm(1, 0, .1))
-available <- rbinom(1, params$n_pseudospecies_augmented, omega)
-unavailable <- params$n_pseudospecies_augmented - available
-coef_means <- rnorm(5) # normal prior on random effect means
-sigma <- abs(rnorm(5)) # half-normal prior on all random effect sds
-Sigma <- diag(5) * sigma
-
-fd <- simulate_flocker_data(
- n_pt = params$n_sites_augmented, n_sp = available,
- params = list(
- coef_means = coef_means,
- Sigma = Sigma,
- coefs = data.frame(
- det_intercept = rnorm(available, coef_means[1], sigma[1]),
- det_slope_unit = rnorm(available, coef_means[2], sigma[2]),
- det_slope_visit = rnorm(available, coef_means[3], sigma[3]),
- occ_intercept = rnorm(available, coef_means[4], sigma[4]),
- occ_slope_unit = rnorm(available, coef_means[5], sigma[5])
- )
- ),
- seed = NULL,
- rep_constant = FALSE,
- ragged_rep = TRUE
-)
-
-obs_aug <- fd$obs[seq_len(params$n_sites_augmented), ]
-
-for(i in 2:available){
- obs_aug <- abind::abind(
- obs_aug,
- fd$obs[((i - 1) * params$n_sites_augmented) + seq_len(params$n_sites_augmented), ],
- along = 3
- )
-}
-
-event_covs_aug <- list(ec1 = fd$event_covs$ec1[seq_len(params$n_sites_augmented), ])
-unit_covs_aug <- data.frame(uc1 = fd$unit_covs[seq_len(params$n_sites_augmented), "uc1"])
-
-flocker_data = make_flocker_data(
- obs_aug, unit_covs_aug, event_covs_aug,
- type = "augmented", n_aug = unavailable,
- quiet = TRUE)
-
-scode <- flocker_stancode(
- f_occ = ~ 0 + Intercept + uc1 + (1 + uc1 || ff_species),
- f_det = ~ 0 + Intercept + uc1 + ec1 + (1 + uc1 + ec1 || ff_species),
- flocker_data = flocker_data,
- prior =
- brms::set_prior("std_normal()") +
- brms::set_prior("std_normal()", class = "sd") +
- brms::set_prior("std_normal()", dpar = "occ") +
- brms::set_prior("std_normal()", class = "sd", dpar = "occ") +
- brms::set_prior("normal(0, 0.1)", class = "Intercept", dpar = "Omega"),
- backend = "cmdstanr",
- augmented = TRUE
- )
-writeLines(scode, model_name)
-
-aug_generator <- function(N){
- omega <- boot::inv.logit(rnorm(1, 0, .1))
- available <- rbinom(1, params$n_pseudospecies_augmented, omega)
- unavailable <- params$n_pseudospecies_augmented - available
- coef_means <- rnorm(5) # normal prior on random effect means
- sigma <- abs(rnorm(5)) # half-normal prior on all random effect sds
- Sigma <- diag(5) * sigma
-
- fd <- simulate_flocker_data(
- n_pt = params$n_sites_augmented, n_sp = available,
- params = list(
- coef_means = coef_means,
- Sigma = Sigma,
- coefs = data.frame(
- det_intercept = rnorm(available, coef_means[1], sigma[1]),
- det_slope_unit = rnorm(available, coef_means[2], sigma[2]),
- det_slope_visit = rnorm(available, coef_means[3], sigma[3]),
- occ_intercept = rnorm(available, coef_means[4], sigma[4]),
- occ_slope_unit = rnorm(available, coef_means[5], sigma[5])
- )
- ),
- seed = NULL,
- rep_constant = FALSE,
- ragged_rep = TRUE
- )
-
- obs_aug <- fd$obs[seq_len(params$n_sites_augmented), ]
-
- for(i in 2:available){
- obs_aug <- abind::abind(
- obs_aug,
- fd$obs[((i - 1) * params$n_sites_augmented) + seq_len(params$n_sites_augmented), ],
- along = 3
- )
- }
-
- event_covs_aug <- list(ec1 = fd$event_covs$ec1[seq_len(params$n_sites_augmented), ])
- unit_covs_aug <- data.frame(uc1 = fd$unit_covs[seq_len(params$n_sites_augmented), "uc1"])
-
- flocker_data = make_flocker_data(
- obs_aug, unit_covs_aug, event_covs_aug,
- type = "augmented", n_aug = unavailable,
- quiet = TRUE)
- # format for return
- list(
- variables = list(
- `b[1]` = fd$params$coef_means[1],
- `b[2]` = fd$params$coef_means[2],
- `b[3]` = fd$params$coef_means[3],
- `b_occ[1]` = fd$params$coef_means[4],
- `b_occ[2]` = fd$params$coef_means[5],
- `Intercept_Omega` = boot::logit(omega)
- ),
- generated = flocker_standata(
- f_occ = ~ 0 + Intercept + uc1 + (1 + uc1 || ff_species),
- f_det = ~ 0 + Intercept + uc1 + ec1 + (1 + uc1 + ec1 || ff_species),
- flocker_data = flocker_data,
- augmented = TRUE
- )
- )
-}
-
-aug_gen <- SBC_generator_function(
- aug_generator,
- N = params$n_sites_augmented
- )
-aug_dataset <- suppressMessages(
- generate_datasets(aug_gen, params$n_sims_augmented)
-)
-
-aug_backend <-
- SBC_backend_cmdstan_sample(
- cmdstanr::cmdstan_model(
- paste0(tempdir(), "/sbc_augmented_model.stan")
- )
- )
-
-aug_results <- compute_SBC(aug_dataset, aug_backend)
-```
-
-```
-## - 4 (2%) fits had at least one Rhat > 1.01. Largest Rhat was 1.022.
-```
-
-```
-## - 2 (1%) fits had divergent transitions. Maximum number of divergences was 2.
-```
-
-```
-## - 7 (4%) fits had some steps rejected. Maximum number of rejections was 1.
-```
-
-```
-## Not all diagnostics are OK.
-## You can learn more by inspecting $default_diagnostics, $backend_diagnostics
-## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.
-```
-
-```r
-plot_ecdf(aug_results)
-```
-
-
-
-```r
-plot_rank_hist(aug_results)
-```
-
-
-
-```r
-plot_ecdf_diff(aug_results)
-```
+
-
diff --git a/vignettes/articles/sbc.Rmd.orig b/vignettes/articles/sbc.Rmd.orig
index dc7859a..0fcda31 100644
--- a/vignettes/articles/sbc.Rmd.orig
+++ b/vignettes/articles/sbc.Rmd.orig
@@ -1,31 +1,33 @@
---
-title: "SBC for flocker models"
+title: "SBC for single-season flocker models"
author: "Jacob Socolar"
date: "`r Sys.Date()`"
output: rmarkdown::html_vignette
vignette: >
- %\VignetteIndexEntry{SBC for flocker models}
+ %\VignetteIndexEntry{SBC for single-season flocker models}
%\VignetteEngine{knitr::rmarkdown}
%\VignetteEncoding{UTF-8}
params:
n_sims: 1000
n_sites: 200
- n_sims_augmented: 200
- n_sites_augmented: 50
- n_pseudospecies_augmented: 50
---
```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
+knitr::opts_chunk$set(
+ echo = TRUE,
+ fig.path = "man/figures/sbc_vignette/",
+ fig.retina = 2
+)
+dir.create("man/figures/sbc_vignette", recursive = TRUE, showWarnings = FALSE)
+
```
-```{r display_params}
-params <- list(
- n_sims = 1000,
- n_sites = 200,
- n_sims_augmented = 200,
- n_sites_augmented = 50,
- n_pseudospecies_augmented = 50
+### Rmd parameters
+```{r display_params, echo = FALSE}
+data.frame(
+ Parameter = names(params),
+ value = unlist(params, use.names = FALSE),
+ row.names = NULL
)
```
@@ -39,8 +41,8 @@ set.seed(1)
## Overview
-This document performs simulation-based calibration for the models
-available in R package `flocker`. Here, our goal is to validate `flocker`'s
+This document is one of a series of simulation-based calibration exercieses for
+models available in R package `flocker`. Here, our goal is to validate `flocker`'s
data formatting, decoding, and likelihood implementations, and not `brms`'s
construction of the linear predictors.
@@ -54,6 +56,9 @@ occupancy, colonization, extinction and/or autologistic terms as applicable,
and one event covariate that affects detection only (for all models except the
rep-constant).
+In addition to this article on single-season models, articles are available
+showing SBC for multi-season models and data-augmented multi-species models.
+
## Single-season
### Rep-constant
@@ -236,607 +241,3 @@ plot_ecdf_diff(rep_varying_results)
```
-
-## Multi-season
-`flocker` fits multi-season models that parameterize the dynamics using colonization/extinction or autologistic specifications, and that parameterize
-the initial occupancy state using explicit and equilibrium parameterizations,
-for a total of four classes of multi-season model. We validate each class.
-
-### Colonization-extinction, explicit initial occupancy
-```{r multi-colex-ex}
-# make the stancode
-model_name <- paste0(tempdir(), "/sbc_colex_ex_model.stan")
-fd <- simulate_flocker_data(
- n_pt = params$n_sites, n_sp = 1, n_season = 4,
- params = list(
- coefs = data.frame(
- det_intercept = rnorm(1),
- det_slope_unit = rnorm(1),
- det_slope_visit = rnorm(1),
- occ_intercept = rnorm(1),
- occ_slope_unit = rnorm(1),
- col_intercept = rnorm(1),
- col_slope_unit = rnorm(1),
- ex_intercept = rnorm(1),
- ex_slope_unit = rnorm(1)
- )
- ),
- seed = NULL,
- rep_constant = FALSE,
- multiseason = "colex",
- multi_init = "explicit",
- ragged_rep = TRUE
-)
-flocker_data = make_flocker_data(
- fd$obs, fd$unit_covs, fd$event_covs,
- type = "multi", quiet = TRUE)
-
-scode <- flocker_stancode(
- f_occ = ~ 0 + Intercept + uc1,
- f_col = ~ 0 + Intercept + uc1,
- f_ex = ~ 0 + Intercept + uc1,
- f_det = ~ 0 + Intercept + uc1 + ec1,
- flocker_data = flocker_data,
- prior =
- brms::set_prior("std_normal()") +
- brms::set_prior("std_normal()", dpar = "occ") +
- brms::set_prior("std_normal()", dpar = "colo") +
- brms::set_prior("std_normal()", dpar = "ex"),
- multiseason = "colex",
- multi_init = "explicit",
- backend = "cmdstanr"
- )
-writeLines(scode, model_name)
-
-colex_ex_generator <- function(N){
- fd <- simulate_flocker_data(
- n_pt = params$n_sites, n_sp = 1, n_season = 4,
- params = list(
- det_intercept = rnorm(1),
- det_slope_unit = rnorm(1),
- det_slope_visit = rnorm(1),
- occ_intercept = rnorm(1),
- occ_slope_unit = rnorm(1),
- colo_intercept = rnorm(1),
- colo_slope_unit = rnorm(1),
- ex_intercept = rnorm(1),
- ex_slope_unit = rnorm(1)
- ),
- seed = NULL,
- rep_constant = FALSE,
- multiseason = "colex",
- multi_init = "explicit",
- ragged_rep = TRUE
- )
-
- flocker_data = make_flocker_data(
- fd$obs, fd$unit_covs, fd$event_covs,
- type = "multi", quiet = TRUE)
-
- # format for return
- list(
- variables = list(
- `b[1]` = fd$params$coefs$det_intercept,
- `b[2]` = fd$params$coefs$det_slope_unit,
- `b[3]` = fd$params$coefs$det_slope_visit,
- `b_occ[1]` = fd$params$coefs$occ_intercept,
- `b_occ[2]` = fd$params$coefs$occ_slope_unit,
- `b_colo[1]` = fd$params$coefs$col_intercept,
- `b_colo[2]` = fd$params$coefs$col_slope_unit,
- `b_ex[1]` = fd$params$coefs$ex_intercept,
- `b_ex[2]` = fd$params$coefs$ex_slope_unit
- ),
- generated = flocker_standata(
- f_occ = ~ 0 + Intercept + uc1,
- f_col = ~ 0 + Intercept + uc1,
- f_ex = ~ 0 + Intercept + uc1,
- f_det = ~ 0 + Intercept + uc1 + ec1,
- flocker_data = flocker_data,
- multiseason = "colex",
- multi_init = "explicit"
- )
- )
-}
-
-colex_ex_gen <- SBC_generator_function(
- colex_ex_generator,
- N = params$n_sites
- )
-colex_ex_dataset <- suppressMessages(
- generate_datasets(colex_ex_gen, params$n_sims)
-)
-
-colex_ex_backend <-
- SBC_backend_cmdstan_sample(
- cmdstanr::cmdstan_model(
- paste0(tempdir(), "/sbc_colex_ex_model.stan")
- )
- )
-
-colex_ex_results <- compute_SBC(colex_ex_dataset, colex_ex_backend)
-
-plot_ecdf(colex_ex_results)
-plot_rank_hist(colex_ex_results)
-plot_ecdf_diff(colex_ex_results)
-
-```
-
-### Colonization-extinction, equilibrium initial occupancy
-```{r multi-colex-eq, eval = TRUE}
-# make the stancode
-model_name <- paste0(tempdir(), "/sbc_colex_eq_model.stan")
-fd <- simulate_flocker_data(
- n_pt = params$n_sites, n_sp = 1, n_season = 4,
- params = list(
- det_intercept = rnorm(1),
- det_slope_unit = rnorm(1),
- det_slope_visit = rnorm(1),
- colo_intercept = rnorm(1),
- colo_slope_unit = rnorm(1),
- ex_intercept = rnorm(1),
- ex_slope_unit = rnorm(1)
- ),
- seed = NULL,
- rep_constant = FALSE,
- multiseason = "colex",
- multi_init = "equilibrium",
- ragged_rep = TRUE
-)
-flocker_data = make_flocker_data(
- fd$obs, fd$unit_covs, fd$event_covs,
- type = "multi", quiet = TRUE)
-
-scode <- flocker_stancode(
- f_col = ~ 0 + Intercept + uc1,
- f_ex = ~ 0 + Intercept + uc1,
- f_det = ~ 0 + Intercept + uc1 + ec1,
- flocker_data = flocker_data,
- prior =
- brms::set_prior("std_normal()") +
- brms::set_prior("std_normal()", dpar = "colo") +
- brms::set_prior("std_normal()", dpar = "ex"),
- multiseason = "colex",
- multi_init = "equilibrium",
- backend = "cmdstanr"
- )
-writeLines(scode, model_name)
-
-colex_eq_generator <- function(N){
- fd <- simulate_flocker_data(
- n_pt = params$n_sites, n_sp = 1, n_season = 4,
- params = list(
- det_intercept = rnorm(1),
- det_slope_unit = rnorm(1),
- det_slope_visit = rnorm(1),
- col_intercept = rnorm(1),
- col_slope_unit = rnorm(1),
- ex_intercept = rnorm(1),
- ex_slope_unit = rnorm(1)
- ),
- seed = NULL,
- rep_constant = FALSE,
- multiseason = "colex",
- multi_init = "equilibrium",
- ragged_rep = TRUE
- )
-
- flocker_data = make_flocker_data(
- fd$obs, fd$unit_covs, fd$event_covs,
- type = "multi", quiet = TRUE)
-
- # format for return
- list(
- variables = list(
- `b[1]` = fd$params$coefs$det_intercept,
- `b[2]` = fd$params$coefs$det_slope_unit,
- `b[3]` = fd$params$coefs$det_slope_visit,
- `b_colo[1]` = fd$params$coefs$col_intercept,
- `b_colo[2]` = fd$params$coefs$col_slope_unit,
- `b_ex[1]` = fd$params$coefs$ex_intercept,
- `b_ex[2]` = fd$params$coefs$ex_slope_unit
- ),
- generated = flocker_standata(
- f_col = ~ 0 + Intercept + uc1,
- f_ex = ~ 0 + Intercept + uc1,
- f_det = ~ 0 + Intercept + uc1 + ec1,
- flocker_data = flocker_data,
- multiseason = "colex",
- multi_init = "equilibrium"
- )
- )
-}
-
-colex_eq_gen <- SBC_generator_function(
- colex_eq_generator,
- N = params$n_sites
- )
-colex_eq_dataset <- suppressMessages(
- generate_datasets(colex_eq_gen, params$n_sims)
-)
-
-colex_eq_backend <-
- SBC_backend_cmdstan_sample(
- cmdstanr::cmdstan_model(
- paste0(tempdir(), "/sbc_colex_eq_model.stan")
- )
- )
-
-colex_eq_results <- compute_SBC(colex_eq_dataset, colex_eq_backend)
-
-plot_ecdf(colex_eq_results)
-plot_rank_hist(colex_eq_results)
-plot_ecdf_diff(colex_eq_results)
-
-```
-
-### Autologistic, explicit initial occupancy
-```{r multi-auto-ex, eval = TRUE}
-# make the stancode
-model_name <- paste0(tempdir(), "/sbc_auto_ex_model.stan")
-fd <- simulate_flocker_data(
- n_pt = params$n_sites, n_sp = 1, n_season = 4,
- params = list(
- det_intercept = rnorm(1),
- det_slope_unit = rnorm(1),
- det_slope_visit = rnorm(1),
- occ_intercept = rnorm(1),
- occ_slope_unit = rnorm(1),
- col_intercept = rnorm(1),
- col_slope_unit = rnorm(1),
- auto_intercept = rnorm(1),
- auto_slope_unit = rnorm(1)
- ),
- seed = NULL,
- rep_constant = FALSE,
- multiseason = "autologistic",
- multi_init = "explicit",
- ragged_rep = TRUE
-)
-flocker_data = make_flocker_data(
- fd$obs, fd$unit_covs, fd$event_covs,
- type = "multi", quiet = TRUE)
-
-scode <- flocker_stancode(
- f_occ = ~ 0 + Intercept + uc1,
- f_col = ~ 0 + Intercept + uc1,
- f_auto = ~ 0 + Intercept + uc1,
- f_det = ~ 0 + Intercept + uc1 + ec1,
- flocker_data = flocker_data,
- prior =
- brms::set_prior("std_normal()") +
- brms::set_prior("std_normal()", dpar = "occ") +
- brms::set_prior("std_normal()", dpar = "colo") +
- brms::set_prior("std_normal()", dpar = "autologistic"),
- multiseason = "autologistic",
- multi_init = "explicit",
- backend = "cmdstanr"
- )
-writeLines(scode, model_name)
-
-auto_ex_generator <- function(N){
- fd <- simulate_flocker_data(
- n_pt = params$n_sites, n_sp = 1, n_season = 4,
- params = list(
- det_intercept = rnorm(1),
- det_slope_unit = rnorm(1),
- det_slope_visit = rnorm(1),
- occ_intercept = rnorm(1),
- occ_slope_unit = rnorm(1),
- colo_intercept = rnorm(1),
- colo_slope_unit = rnorm(1),
- auto_intercept = rnorm(1),
- auto_slope_unit = rnorm(1)
- ),
- seed = NULL,
- rep_constant = FALSE,
- multiseason = "autologistic",
- multi_init = "explicit",
- ragged_rep = TRUE
- )
-
- flocker_data = make_flocker_data(
- fd$obs, fd$unit_covs, fd$event_covs,
- type = "multi", quiet = TRUE)
-
- # format for return
- list(
- variables = list(
- `b[1]` = fd$params$coefs$det_intercept,
- `b[2]` = fd$params$coefs$det_slope_unit,
- `b[3]` = fd$params$coefs$det_slope_visit,
- `b_occ[1]` = fd$params$coefs$occ_intercept,
- `b_occ[2]` = fd$params$coefs$occ_slope_unit,
- `b_colo[1]` = fd$params$coefs$col_intercept,
- `b_colo[2]` = fd$params$coefs$col_slope_unit,
- `b_autologistic[1]` = fd$params$coefs$auto_intercept,
- `b_autologistic[2]` = fd$params$coefs$auto_slope_unit
- ),
- generated = flocker_standata(
- f_occ = ~ 0 + Intercept + uc1,
- f_col = ~ 0 + Intercept + uc1,
- f_auto = ~ 0 + Intercept + uc1,
- f_det = ~ 0 + Intercept + uc1 + ec1,
- flocker_data = flocker_data,
- multiseason = "autologistic",
- multi_init = "explicit"
- )
- )
-}
-
-auto_ex_gen <- SBC_generator_function(
- auto_ex_generator,
- N = params$n_sites
- )
-auto_ex_dataset <- suppressMessages(
- generate_datasets(auto_ex_gen, params$n_sims)
-)
-
-auto_ex_backend <-
- SBC_backend_cmdstan_sample(
- cmdstanr::cmdstan_model(
- paste0(tempdir(), "/sbc_auto_ex_model.stan")
- )
- )
-
-auto_ex_results <- compute_SBC(auto_ex_dataset, auto_ex_backend)
-
-plot_ecdf(auto_ex_results)
-plot_rank_hist(auto_ex_results)
-plot_ecdf_diff(auto_ex_results)
-
-```
-
-### Autologistic, equilibrium initial occupancy
-```{r multi-auto-eq, eval = TRUE}
-# make the stancode
-model_name <- paste0(tempdir(), "/sbc_auto_eq_model.stan")
-fd <- simulate_flocker_data(
- n_pt = params$n_sites, n_sp = 1, n_season = 4,
- params = list(
- det_intercept = rnorm(1),
- det_slope_unit = rnorm(1),
- det_slope_visit = rnorm(1),
- auto_intercept = rnorm(1),
- auto_slope_unit = rnorm(1)
- ),
- seed = NULL,
- rep_constant = FALSE,
- multiseason = "autologistic",
- multi_init = "equilibrium",
- ragged_rep = TRUE
-)
-flocker_data = make_flocker_data(
- fd$obs, fd$unit_covs, fd$event_covs,
- type = "multi", quiet = TRUE)
-
-scode <- flocker_stancode(
- f_col = ~ 0 + Intercept + uc1,
- f_auto = ~ 0 + Intercept + uc1,
- f_det = ~ 0 + Intercept + uc1 + ec1,
- flocker_data = flocker_data,
- prior =
- brms::set_prior("std_normal()") +
- brms::set_prior("std_normal()", dpar = "colo") +
- brms::set_prior("std_normal()", dpar = "autologistic"),
- multiseason = "autologistic",
- multi_init = "equilibrium",
- backend = "cmdstanr"
- )
-writeLines(scode, model_name)
-
-auto_eq_generator <- function(N){
- fd <- simulate_flocker_data(
- n_pt = params$n_sites, n_sp = 1, n_season = 4,
- params = list(
- det_intercept = rnorm(1),
- det_slope_unit = rnorm(1),
- det_slope_visit = rnorm(1),
- col_intercept = rnorm(1),
- col_slope_unit = rnorm(1),
- auto_intercept = rnorm(1),
- auto_slope_unit = rnorm(1)
- ),
- seed = NULL,
- rep_constant = FALSE,
- multiseason = "autologistic",
- multi_init = "equilibrium",
- ragged_rep = TRUE
- )
-
- flocker_data = make_flocker_data(
- fd$obs, fd$unit_covs, fd$event_covs,
- type = "multi", quiet = TRUE)
-
- # format for return
- list(
- variables = list(
- `b[1]` = fd$params$coefs$det_intercept,
- `b[2]` = fd$params$coefs$det_slope_unit,
- `b[3]` = fd$params$coefs$det_slope_visit,
- `b_colo[1]` = fd$params$coefs$col_intercept,
- `b_colo[2]` = fd$params$coefs$col_slope_unit,
- `b_autologistic[1]` = fd$params$coefs$auto_intercept,
- `b_autologistic[2]` = fd$params$coefs$auto_slope_unit
- ),
- generated = flocker_standata(
- f_col = ~ 0 + Intercept + uc1,
- f_auto = ~ 0 + Intercept + uc1,
- f_det = ~ 0 + Intercept + uc1 + ec1,
- flocker_data = flocker_data,
- multiseason = "autologistic",
- multi_init = "equilibrium"
- )
- )
-}
-
-auto_eq_gen <- SBC_generator_function(
- auto_eq_generator,
- N = params$n_sites
- )
-auto_eq_dataset <- suppressMessages(
- generate_datasets(auto_eq_gen, params$n_sims)
-)
-
-auto_eq_backend <-
- SBC_backend_cmdstan_sample(
- cmdstanr::cmdstan_model(
- paste0(tempdir(), "/sbc_auto_eq_model.stan")
- )
- )
-
-auto_eq_results <- compute_SBC(auto_eq_dataset, auto_eq_backend)
-
-plot_ecdf(auto_eq_results)
-plot_rank_hist(auto_eq_results)
-plot_ecdf_diff(auto_eq_results)
-
-```
-
-
-## Data-augmented
-```{r data-augmented, eval = TRUE}
-# make the stancode
-model_name <- paste0(tempdir(), "/sbc_augmented_model.stan")
-
-omega <- boot::inv.logit(rnorm(1, 0, .1))
-available <- rbinom(1, params$n_pseudospecies_augmented, omega)
-unavailable <- params$n_pseudospecies_augmented - available
-coef_means <- rnorm(5) # normal prior on random effect means
-sigma <- abs(rnorm(5)) # half-normal prior on all random effect sds
-Sigma <- diag(5) * sigma
-
-fd <- simulate_flocker_data(
- n_pt = params$n_sites_augmented, n_sp = available,
- params = list(
- coef_means = coef_means,
- Sigma = Sigma,
- coefs = data.frame(
- det_intercept = rnorm(available, coef_means[1], sigma[1]),
- det_slope_unit = rnorm(available, coef_means[2], sigma[2]),
- det_slope_visit = rnorm(available, coef_means[3], sigma[3]),
- occ_intercept = rnorm(available, coef_means[4], sigma[4]),
- occ_slope_unit = rnorm(available, coef_means[5], sigma[5])
- )
- ),
- seed = NULL,
- rep_constant = FALSE,
- ragged_rep = TRUE
-)
-
-obs_aug <- fd$obs[seq_len(params$n_sites_augmented), ]
-
-for(i in 2:available){
- obs_aug <- abind::abind(
- obs_aug,
- fd$obs[((i - 1) * params$n_sites_augmented) + seq_len(params$n_sites_augmented), ],
- along = 3
- )
-}
-
-event_covs_aug <- list(ec1 = fd$event_covs$ec1[seq_len(params$n_sites_augmented), ])
-unit_covs_aug <- data.frame(uc1 = fd$unit_covs[seq_len(params$n_sites_augmented), "uc1"])
-
-flocker_data = make_flocker_data(
- obs_aug, unit_covs_aug, event_covs_aug,
- type = "augmented", n_aug = unavailable,
- quiet = TRUE)
-
-scode <- flocker_stancode(
- f_occ = ~ 0 + Intercept + uc1 + (1 + uc1 || ff_species),
- f_det = ~ 0 + Intercept + uc1 + ec1 + (1 + uc1 + ec1 || ff_species),
- flocker_data = flocker_data,
- prior =
- brms::set_prior("std_normal()") +
- brms::set_prior("std_normal()", class = "sd") +
- brms::set_prior("std_normal()", dpar = "occ") +
- brms::set_prior("std_normal()", class = "sd", dpar = "occ") +
- brms::set_prior("normal(0, 0.1)", class = "Intercept", dpar = "Omega"),
- backend = "cmdstanr",
- augmented = TRUE
- )
-writeLines(scode, model_name)
-
-aug_generator <- function(N){
- omega <- boot::inv.logit(rnorm(1, 0, .1))
- available <- rbinom(1, params$n_pseudospecies_augmented, omega)
- unavailable <- params$n_pseudospecies_augmented - available
- coef_means <- rnorm(5) # normal prior on random effect means
- sigma <- abs(rnorm(5)) # half-normal prior on all random effect sds
- Sigma <- diag(5) * sigma
-
- fd <- simulate_flocker_data(
- n_pt = params$n_sites_augmented, n_sp = available,
- params = list(
- coef_means = coef_means,
- Sigma = Sigma,
- coefs = data.frame(
- det_intercept = rnorm(available, coef_means[1], sigma[1]),
- det_slope_unit = rnorm(available, coef_means[2], sigma[2]),
- det_slope_visit = rnorm(available, coef_means[3], sigma[3]),
- occ_intercept = rnorm(available, coef_means[4], sigma[4]),
- occ_slope_unit = rnorm(available, coef_means[5], sigma[5])
- )
- ),
- seed = NULL,
- rep_constant = FALSE,
- ragged_rep = TRUE
- )
-
- obs_aug <- fd$obs[seq_len(params$n_sites_augmented), ]
-
- for(i in 2:available){
- obs_aug <- abind::abind(
- obs_aug,
- fd$obs[((i - 1) * params$n_sites_augmented) + seq_len(params$n_sites_augmented), ],
- along = 3
- )
- }
-
- event_covs_aug <- list(ec1 = fd$event_covs$ec1[seq_len(params$n_sites_augmented), ])
- unit_covs_aug <- data.frame(uc1 = fd$unit_covs[seq_len(params$n_sites_augmented), "uc1"])
-
- flocker_data = make_flocker_data(
- obs_aug, unit_covs_aug, event_covs_aug,
- type = "augmented", n_aug = unavailable,
- quiet = TRUE)
- # format for return
- list(
- variables = list(
- `b[1]` = fd$params$coef_means[1],
- `b[2]` = fd$params$coef_means[2],
- `b[3]` = fd$params$coef_means[3],
- `b_occ[1]` = fd$params$coef_means[4],
- `b_occ[2]` = fd$params$coef_means[5],
- `Intercept_Omega` = boot::logit(omega)
- ),
- generated = flocker_standata(
- f_occ = ~ 0 + Intercept + uc1 + (1 + uc1 || ff_species),
- f_det = ~ 0 + Intercept + uc1 + ec1 + (1 + uc1 + ec1 || ff_species),
- flocker_data = flocker_data,
- augmented = TRUE
- )
- )
-}
-
-aug_gen <- SBC_generator_function(
- aug_generator,
- N = params$n_sites_augmented
- )
-aug_dataset <- suppressMessages(
- generate_datasets(aug_gen, params$n_sims_augmented)
-)
-
-aug_backend <-
- SBC_backend_cmdstan_sample(
- cmdstanr::cmdstan_model(
- paste0(tempdir(), "/sbc_augmented_model.stan")
- )
- )
-
-aug_results <- compute_SBC(aug_dataset, aug_backend)
-
-plot_ecdf(aug_results)
-plot_rank_hist(aug_results)
-plot_ecdf_diff(aug_results)
-
-```
diff --git a/vignettes/articles/sbc_aug.Rmd b/vignettes/articles/sbc_aug.Rmd
new file mode 100644
index 0000000..15566b3
--- /dev/null
+++ b/vignettes/articles/sbc_aug.Rmd
@@ -0,0 +1,238 @@
+---
+title: "SBC for data-augmented flocker models"
+author: "Jacob Socolar"
+date: "2026-02-26"
+output: rmarkdown::html_vignette
+vignette: >
+ %\VignetteIndexEntry{SBC for data-augmented flocker models}
+ %\VignetteEngine{knitr::rmarkdown}
+ %\VignetteEncoding{UTF-8}
+params:
+ n_sims_augmented: 200
+ n_sites_augmented: 50
+ n_pseudospecies_augmented: 50
+---
+
+
+
+### Rmd parameters
+
+```
+## Parameter value
+## 1 n_sims_augmented 200
+## 2 n_sites_augmented 50
+## 3 n_pseudospecies_augmented 50
+```
+
+
+``` r
+library(flocker)
+library(brms)
+library(SBC)
+set.seed(1)
+```
+
+## Overview
+
+This document is one of a series of simulation-based calibration exercieses for
+models available in R package `flocker`. Here, our goal is to validate `flocker`'s
+data formatting, decoding, and likelihood implementations, and not `brms`'s
+construction of the linear predictors.
+
+The encoding of the data for a `flocker` model tends to be more complex in the
+presence of missing observations, and so we include missingness in the data
+simulation wherever possible (some visits missing in all models, some time-steps
+missing in multiseason models).
+
+In all models, we include one unit covariate that affects detection and
+occupancy, colonization, extinction and/or autologistic terms as applicable,
+and one event covariate that affects detection only (for all models except the
+rep-constant).
+
+
+## Data-augmented
+
+``` r
+# make the stancode
+model_name <- paste0(tempdir(), "/sbc_augmented_model.stan")
+
+omega <- boot::inv.logit(rnorm(1, 0, .1))
+available <- rbinom(1, params$n_pseudospecies_augmented, omega)
+unavailable <- params$n_pseudospecies_augmented - available
+
+coef_means <- rnorm(5) # population-level effects (std_normal)
+sigma <- abs(rnorm(5)) # half-normal(0,1) for sd parameters
+
+coefs_df <- data.frame(
+ det_intercept = rnorm(available, coef_means[1], sigma[1]),
+ det_slope_unit = rnorm(available, coef_means[2], sigma[2]),
+ det_slope_visit= rnorm(available, coef_means[3], sigma[3]),
+ occ_intercept = rnorm(available, coef_means[4], sigma[4]),
+ occ_slope_unit = rnorm(available, coef_means[5], sigma[5])
+)
+
+fd <- simulate_flocker_data(
+ n_pt = params$n_sites_augmented, n_sp = available,
+ params = list(coefs = coefs_df),
+ seed = NULL,
+ rep_constant = FALSE,
+ ragged_rep = TRUE
+)
+
+obs_aug <- fd$obs[seq_len(params$n_sites_augmented), ]
+
+for(i in 2:available){
+ obs_aug <- abind::abind(
+ obs_aug,
+ fd$obs[((i - 1) * params$n_sites_augmented) + seq_len(params$n_sites_augmented), ],
+ along = 3
+ )
+}
+
+event_covs_aug <- list(ec1 = fd$event_covs$ec1[seq_len(params$n_sites_augmented), ])
+unit_covs_aug <- data.frame(uc1 = fd$unit_covs[seq_len(params$n_sites_augmented), "uc1"])
+
+flocker_data = make_flocker_data(
+ obs_aug, unit_covs_aug, event_covs_aug,
+ type = "augmented", n_aug = unavailable,
+ quiet = TRUE)
+
+scode <- flocker_stancode(
+ f_occ = ~ 0 + Intercept + uc1 + (1 + uc1 || ff_species),
+ f_det = ~ 0 + Intercept + uc1 + ec1 + (1 + uc1 + ec1 || ff_species),
+ flocker_data = flocker_data,
+ prior =
+ brms::set_prior("std_normal()") +
+ brms::set_prior("std_normal()", class = "sd") +
+ brms::set_prior("std_normal()", dpar = "occ") +
+ brms::set_prior("std_normal()", class = "sd", dpar = "occ") +
+ brms::set_prior("normal(0, 0.1)", class = "Intercept", dpar = "Omega"),
+ backend = "cmdstanr",
+ augmented = TRUE
+ )
+writeLines(scode, model_name)
+
+aug_generator <- function(N){
+ omega <- boot::inv.logit(rnorm(1, 0, .1))
+ available <- rbinom(1, params$n_pseudospecies_augmented, omega)
+ unavailable <- params$n_pseudospecies_augmented - available
+
+ coef_means <- rnorm(5)
+ sigma <- abs(rnorm(5))
+
+ coefs_df <- data.frame(
+ det_intercept = rnorm(available, coef_means[1], sigma[1]),
+ det_slope_unit = rnorm(available, coef_means[2], sigma[2]),
+ det_slope_visit = rnorm(available, coef_means[3], sigma[3]),
+ occ_intercept = rnorm(available, coef_means[4], sigma[4]),
+ occ_slope_unit = rnorm(available, coef_means[5], sigma[5])
+ )
+
+ fd <- simulate_flocker_data(
+ n_pt = params$n_sites_augmented, n_sp = available,
+ params = list(coefs = coefs_df),
+ seed = NULL,
+ rep_constant = FALSE,
+ ragged_rep = TRUE
+ )
+
+ obs_aug <- fd$obs[seq_len(params$n_sites_augmented), ]
+ for(i in 2:available){
+ obs_aug <- abind::abind(
+ obs_aug,
+ fd$obs[((i - 1) * params$n_sites_augmented) + seq_len(params$n_sites_augmented), ],
+ along = 3
+ )
+ }
+
+ event_covs_aug <- list(ec1 = fd$event_covs$ec1[seq_len(params$n_sites_augmented), ])
+ unit_covs_aug <- data.frame(uc1 = fd$unit_covs[seq_len(params$n_sites_augmented), "uc1"])
+
+ flocker_data <- make_flocker_data(
+ obs_aug, unit_covs_aug, event_covs_aug,
+ type = "augmented", n_aug = unavailable,
+ quiet = TRUE
+ )
+
+ list(
+ variables = list(
+ `b[1]` = coef_means[1],
+ `b[2]` = coef_means[2],
+ `b[3]` = coef_means[3],
+ `b_occ[1]` = coef_means[4],
+ `b_occ[2]` = coef_means[5],
+ `Intercept_Omega` = boot::logit(omega)
+ # Optional but recommended for “fuller” SBC on the augmented model:
+ # `sd_1[1]` = sigma[1], ...
+ ),
+ generated = flocker_standata(
+ f_occ = ~ 0 + Intercept + uc1 + (1 + uc1 || ff_species),
+ f_det = ~ 0 + Intercept + uc1 + ec1 + (1 + uc1 + ec1 || ff_species),
+ flocker_data = flocker_data,
+ augmented = TRUE
+ )
+ )
+}
+
+aug_gen <- SBC_generator_function(
+ aug_generator,
+ N = params$n_sites_augmented
+ )
+aug_dataset <- suppressMessages(
+ generate_datasets(aug_gen, params$n_sims_augmented)
+)
+
+aug_backend <-
+ SBC_backend_cmdstan_sample(
+ cmdstanr::cmdstan_model(
+ paste0(tempdir(), "/sbc_augmented_model.stan")
+ )
+ )
+
+aug_results <- compute_SBC(aug_dataset, aug_backend)
+```
+
+```
+## - 4 (2%) fits had divergences. Maximum number of divergences was 6.
+```
+
+```
+## - 10 (5%) fits had steps rejected. Maximum number of steps rejected was 1.
+```
+
+```
+## - 1 (0%) fits had maximum Rhat > 1.01. Maximum Rhat was 1.012.
+```
+
+```
+## Not all diagnostics are OK.
+## You can learn more by inspecting $default_diagnostics, $backend_diagnostics
+## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.
+```
+
+``` r
+plot_ecdf(aug_results)
+```
+
+
+
+``` r
+plot_rank_hist(aug_results)
+```
+
+
+
+``` r
+plot_ecdf_diff(aug_results)
+```
+
+
diff --git a/vignettes/articles/sbc_aug.Rmd.orig b/vignettes/articles/sbc_aug.Rmd.orig
new file mode 100644
index 0000000..51d2f56
--- /dev/null
+++ b/vignettes/articles/sbc_aug.Rmd.orig
@@ -0,0 +1,205 @@
+---
+title: "SBC for data-augmented flocker models"
+author: "Jacob Socolar"
+date: "`r Sys.Date()`"
+output: rmarkdown::html_vignette
+vignette: >
+ %\VignetteIndexEntry{SBC for data-augmented flocker models}
+ %\VignetteEngine{knitr::rmarkdown}
+ %\VignetteEncoding{UTF-8}
+params:
+ n_sims_augmented: 200
+ n_sites_augmented: 50
+ n_pseudospecies_augmented: 50
+---
+
+```{r setup, include=FALSE}
+knitr::opts_chunk$set(
+ echo = TRUE,
+ fig.path = "man/figures/sbc_vignette/",
+ fig.retina = 2
+)
+dir.create("man/figures/sbc_vignette", recursive = TRUE, showWarnings = FALSE)
+
+```
+
+### Rmd parameters
+```{r display_params, echo = FALSE}
+data.frame(
+ Parameter = names(params),
+ value = unlist(params, use.names = FALSE),
+ row.names = NULL
+)
+
+```
+
+```{r setup2, message = FALSE}
+library(flocker)
+library(brms)
+library(SBC)
+set.seed(1)
+```
+
+## Overview
+
+This document is one of a series of simulation-based calibration exercieses for
+models available in R package `flocker`. Here, our goal is to validate `flocker`'s
+data formatting, decoding, and likelihood implementations, and not `brms`'s
+construction of the linear predictors.
+
+The encoding of the data for a `flocker` model tends to be more complex in the
+presence of missing observations, and so we include missingness in the data
+simulation wherever possible (some visits missing in all models, some time-steps
+missing in multiseason models).
+
+In all models, we include one unit covariate that affects detection and
+occupancy, colonization, extinction and/or autologistic terms as applicable,
+and one event covariate that affects detection only (for all models except the
+rep-constant).
+
+
+## Data-augmented
+```{r data-augmented, eval = TRUE}
+# make the stancode
+model_name <- paste0(tempdir(), "/sbc_augmented_model.stan")
+
+omega <- boot::inv.logit(rnorm(1, 0, .1))
+available <- rbinom(1, params$n_pseudospecies_augmented, omega)
+unavailable <- params$n_pseudospecies_augmented - available
+
+coef_means <- rnorm(5) # population-level effects (std_normal)
+sigma <- abs(rnorm(5)) # half-normal(0,1) for sd parameters
+
+coefs_df <- data.frame(
+ det_intercept = rnorm(available, coef_means[1], sigma[1]),
+ det_slope_unit = rnorm(available, coef_means[2], sigma[2]),
+ det_slope_visit= rnorm(available, coef_means[3], sigma[3]),
+ occ_intercept = rnorm(available, coef_means[4], sigma[4]),
+ occ_slope_unit = rnorm(available, coef_means[5], sigma[5])
+)
+
+fd <- simulate_flocker_data(
+ n_pt = params$n_sites_augmented, n_sp = available,
+ params = list(coefs = coefs_df),
+ seed = NULL,
+ rep_constant = FALSE,
+ ragged_rep = TRUE
+)
+
+obs_aug <- fd$obs[seq_len(params$n_sites_augmented), ]
+
+for(i in 2:available){
+ obs_aug <- abind::abind(
+ obs_aug,
+ fd$obs[((i - 1) * params$n_sites_augmented) + seq_len(params$n_sites_augmented), ],
+ along = 3
+ )
+}
+
+event_covs_aug <- list(ec1 = fd$event_covs$ec1[seq_len(params$n_sites_augmented), ])
+unit_covs_aug <- data.frame(uc1 = fd$unit_covs[seq_len(params$n_sites_augmented), "uc1"])
+
+flocker_data = make_flocker_data(
+ obs_aug, unit_covs_aug, event_covs_aug,
+ type = "augmented", n_aug = unavailable,
+ quiet = TRUE)
+
+scode <- flocker_stancode(
+ f_occ = ~ 0 + Intercept + uc1 + (1 + uc1 || ff_species),
+ f_det = ~ 0 + Intercept + uc1 + ec1 + (1 + uc1 + ec1 || ff_species),
+ flocker_data = flocker_data,
+ prior =
+ brms::set_prior("std_normal()") +
+ brms::set_prior("std_normal()", class = "sd") +
+ brms::set_prior("std_normal()", dpar = "occ") +
+ brms::set_prior("std_normal()", class = "sd", dpar = "occ") +
+ brms::set_prior("normal(0, 0.1)", class = "Intercept", dpar = "Omega"),
+ backend = "cmdstanr",
+ augmented = TRUE
+ )
+writeLines(scode, model_name)
+
+aug_generator <- function(N){
+ omega <- boot::inv.logit(rnorm(1, 0, .1))
+ available <- rbinom(1, params$n_pseudospecies_augmented, omega)
+ unavailable <- params$n_pseudospecies_augmented - available
+
+ coef_means <- rnorm(5)
+ sigma <- abs(rnorm(5))
+
+ coefs_df <- data.frame(
+ det_intercept = rnorm(available, coef_means[1], sigma[1]),
+ det_slope_unit = rnorm(available, coef_means[2], sigma[2]),
+ det_slope_visit = rnorm(available, coef_means[3], sigma[3]),
+ occ_intercept = rnorm(available, coef_means[4], sigma[4]),
+ occ_slope_unit = rnorm(available, coef_means[5], sigma[5])
+ )
+
+ fd <- simulate_flocker_data(
+ n_pt = params$n_sites_augmented, n_sp = available,
+ params = list(coefs = coefs_df),
+ seed = NULL,
+ rep_constant = FALSE,
+ ragged_rep = TRUE
+ )
+
+ obs_aug <- fd$obs[seq_len(params$n_sites_augmented), ]
+ for(i in 2:available){
+ obs_aug <- abind::abind(
+ obs_aug,
+ fd$obs[((i - 1) * params$n_sites_augmented) + seq_len(params$n_sites_augmented), ],
+ along = 3
+ )
+ }
+
+ event_covs_aug <- list(ec1 = fd$event_covs$ec1[seq_len(params$n_sites_augmented), ])
+ unit_covs_aug <- data.frame(uc1 = fd$unit_covs[seq_len(params$n_sites_augmented), "uc1"])
+
+ flocker_data <- make_flocker_data(
+ obs_aug, unit_covs_aug, event_covs_aug,
+ type = "augmented", n_aug = unavailable,
+ quiet = TRUE
+ )
+
+ list(
+ variables = list(
+ `b[1]` = coef_means[1],
+ `b[2]` = coef_means[2],
+ `b[3]` = coef_means[3],
+ `b_occ[1]` = coef_means[4],
+ `b_occ[2]` = coef_means[5],
+ `Intercept_Omega` = boot::logit(omega)
+ # Optional but recommended for “fuller” SBC on the augmented model:
+ # `sd_1[1]` = sigma[1], ...
+ ),
+ generated = flocker_standata(
+ f_occ = ~ 0 + Intercept + uc1 + (1 + uc1 || ff_species),
+ f_det = ~ 0 + Intercept + uc1 + ec1 + (1 + uc1 + ec1 || ff_species),
+ flocker_data = flocker_data,
+ augmented = TRUE
+ )
+ )
+}
+
+aug_gen <- SBC_generator_function(
+ aug_generator,
+ N = params$n_sites_augmented
+ )
+aug_dataset <- suppressMessages(
+ generate_datasets(aug_gen, params$n_sims_augmented)
+)
+
+aug_backend <-
+ SBC_backend_cmdstan_sample(
+ cmdstanr::cmdstan_model(
+ paste0(tempdir(), "/sbc_augmented_model.stan")
+ )
+ )
+
+aug_results <- compute_SBC(aug_dataset, aug_backend)
+
+plot_ecdf(aug_results)
+plot_rank_hist(aug_results)
+plot_ecdf_diff(aug_results)
+
+```
diff --git a/vignettes/articles/sbc_multi.Rmd b/vignettes/articles/sbc_multi.Rmd
new file mode 100644
index 0000000..012c811
--- /dev/null
+++ b/vignettes/articles/sbc_multi.Rmd
@@ -0,0 +1,635 @@
+---
+title: "SBC for multi-season flocker models"
+author: "Jacob Socolar"
+date: "2026-02-24"
+output: rmarkdown::html_vignette
+vignette: >
+ %\VignetteIndexEntry{SBC for multi-season flocker models}
+ %\VignetteEngine{knitr::rmarkdown}
+ %\VignetteEncoding{UTF-8}
+params:
+ n_sims: 1000
+ n_sites: 200
+---
+
+
+
+### Rmd parameters
+
+```
+## Parameter value
+## 1 n_sims 1000
+## 2 n_sites 200
+```
+
+
+``` r
+library(flocker)
+library(brms)
+library(SBC)
+set.seed(1)
+```
+
+## Overview
+
+This document is one of a series of simulation-based calibration exercieses for
+models available in R package `flocker`. Here, our goal is to validate `flocker`'s
+data formatting, decoding, and likelihood implementations, and not `brms`'s
+construction of the linear predictors.
+
+The encoding of the data for a `flocker` model tends to be more complex in the
+presence of missing observations, and so we include missingness in the data
+simulation wherever possible (some visits missing in all models, some time-steps
+missing in multiseason models).
+
+In all models, we include one unit covariate that affects detection and
+occupancy, colonization, extinction and/or autologistic terms as applicable,
+and one event covariate that affects detection only (for all models except the
+rep-constant).
+
+
+## Multi-season
+`flocker` fits multi-season models that parameterize the dynamics using colonization/extinction or autologistic specifications, and that parameterize
+the initial occupancy state using explicit and equilibrium parameterizations,
+for a total of four classes of multi-season model. We validate each class.
+
+### Colonization-extinction, explicit initial occupancy
+
+``` r
+# make the stancode
+model_name <- paste0(tempdir(), "/sbc_colex_ex_model.stan")
+fd <- simulate_flocker_data(
+ n_pt = params$n_sites, n_sp = 1, n_season = 4,
+ params = list(
+ coefs = data.frame(
+ det_intercept = rnorm(1),
+ det_slope_unit = rnorm(1),
+ det_slope_visit = rnorm(1),
+ occ_intercept = rnorm(1),
+ occ_slope_unit = rnorm(1),
+ col_intercept = rnorm(1),
+ col_slope_unit = rnorm(1),
+ ex_intercept = rnorm(1),
+ ex_slope_unit = rnorm(1)
+ )
+ ),
+ seed = NULL,
+ rep_constant = FALSE,
+ multiseason = "colex",
+ multi_init = "explicit",
+ ragged_rep = TRUE
+)
+flocker_data = make_flocker_data(
+ fd$obs, fd$unit_covs, fd$event_covs,
+ type = "multi", quiet = TRUE)
+
+scode <- flocker_stancode(
+ f_occ = ~ 0 + Intercept + uc1,
+ f_col = ~ 0 + Intercept + uc1,
+ f_ex = ~ 0 + Intercept + uc1,
+ f_det = ~ 0 + Intercept + uc1 + ec1,
+ flocker_data = flocker_data,
+ prior =
+ brms::set_prior("std_normal()") +
+ brms::set_prior("std_normal()", dpar = "occ") +
+ brms::set_prior("std_normal()", dpar = "colo") +
+ brms::set_prior("std_normal()", dpar = "ex"),
+ multiseason = "colex",
+ multi_init = "explicit",
+ backend = "cmdstanr"
+ )
+writeLines(scode, model_name)
+
+colex_ex_generator <- function(N){
+ fd <- simulate_flocker_data(
+ n_pt = N, n_sp = 1, n_season = 4,
+ params = list(
+ coefs = data.frame(
+ det_intercept = rnorm(1),
+ det_slope_unit = rnorm(1),
+ det_slope_visit = rnorm(1),
+ occ_intercept = rnorm(1),
+ occ_slope_unit = rnorm(1),
+ col_intercept = rnorm(1),
+ col_slope_unit = rnorm(1),
+ ex_intercept = rnorm(1),
+ ex_slope_unit = rnorm(1)
+ )
+ ),
+ seed = NULL,
+ rep_constant = FALSE,
+ multiseason = "colex",
+ multi_init = "explicit",
+ ragged_rep = TRUE
+ )
+
+ flocker_data = make_flocker_data(
+ fd$obs, fd$unit_covs, fd$event_covs,
+ type = "multi", quiet = TRUE)
+
+ # format for return
+ list(
+ variables = list(
+ `b[1]` = fd$params$coefs$det_intercept,
+ `b[2]` = fd$params$coefs$det_slope_unit,
+ `b[3]` = fd$params$coefs$det_slope_visit,
+ `b_occ[1]` = fd$params$coefs$occ_intercept,
+ `b_occ[2]` = fd$params$coefs$occ_slope_unit,
+ `b_colo[1]` = fd$params$coefs$col_intercept,
+ `b_colo[2]` = fd$params$coefs$col_slope_unit,
+ `b_ex[1]` = fd$params$coefs$ex_intercept,
+ `b_ex[2]` = fd$params$coefs$ex_slope_unit
+ ),
+ generated = flocker_standata(
+ f_occ = ~ 0 + Intercept + uc1,
+ f_col = ~ 0 + Intercept + uc1,
+ f_ex = ~ 0 + Intercept + uc1,
+ f_det = ~ 0 + Intercept + uc1 + ec1,
+ flocker_data = flocker_data,
+ multiseason = "colex",
+ multi_init = "explicit"
+ )
+ )
+}
+
+colex_ex_gen <- SBC_generator_function(
+ colex_ex_generator,
+ N = params$n_sites
+ )
+colex_ex_dataset <- suppressMessages(
+ generate_datasets(colex_ex_gen, params$n_sims)
+)
+
+colex_ex_backend <-
+ SBC_backend_cmdstan_sample(
+ cmdstanr::cmdstan_model(
+ paste0(tempdir(), "/sbc_colex_ex_model.stan")
+ )
+ )
+
+colex_ex_results <- compute_SBC(colex_ex_dataset, colex_ex_backend)
+
+plot_ecdf(colex_ex_results)
+```
+
+
+
+``` r
+plot_rank_hist(colex_ex_results)
+```
+
+
+
+``` r
+plot_ecdf_diff(colex_ex_results)
+```
+
+
+
+### Colonization-extinction, equilibrium initial occupancy
+
+``` r
+# make the stancode
+model_name <- paste0(tempdir(), "/sbc_colex_eq_model.stan")
+fd <- simulate_flocker_data(
+ n_pt = params$n_sites, n_sp = 1, n_season = 4,
+ params = list(
+ coefs = data.frame(
+ det_intercept = rnorm(1),
+ det_slope_unit = rnorm(1),
+ det_slope_visit = rnorm(1),
+ col_intercept = rnorm(1),
+ col_slope_unit = rnorm(1),
+ ex_intercept = rnorm(1),
+ ex_slope_unit = rnorm(1)
+ )
+ ),
+ seed = NULL,
+ rep_constant = FALSE,
+ multiseason = "colex",
+ multi_init = "equilibrium",
+ ragged_rep = TRUE
+)
+flocker_data = make_flocker_data(
+ fd$obs, fd$unit_covs, fd$event_covs,
+ type = "multi", quiet = TRUE)
+
+scode <- flocker_stancode(
+ f_col = ~ 0 + Intercept + uc1,
+ f_ex = ~ 0 + Intercept + uc1,
+ f_det = ~ 0 + Intercept + uc1 + ec1,
+ flocker_data = flocker_data,
+ prior =
+ brms::set_prior("std_normal()") +
+ brms::set_prior("std_normal()", dpar = "colo") +
+ brms::set_prior("std_normal()", dpar = "ex"),
+ multiseason = "colex",
+ multi_init = "equilibrium",
+ backend = "cmdstanr"
+ )
+writeLines(scode, model_name)
+
+colex_eq_generator <- function(N){
+ fd <- simulate_flocker_data(
+ n_pt = N, n_sp = 1, n_season = 4,
+ params = list(
+ coefs = data.frame(
+ det_intercept = rnorm(1),
+ det_slope_unit = rnorm(1),
+ det_slope_visit = rnorm(1),
+ col_intercept = rnorm(1),
+ col_slope_unit = rnorm(1),
+ ex_intercept = rnorm(1),
+ ex_slope_unit = rnorm(1)
+ )
+ ),
+ seed = NULL,
+ rep_constant = FALSE,
+ multiseason = "colex",
+ multi_init = "equilibrium",
+ ragged_rep = TRUE
+ )
+
+ flocker_data = make_flocker_data(
+ fd$obs, fd$unit_covs, fd$event_covs,
+ type = "multi", quiet = TRUE)
+
+ # format for return
+ list(
+ variables = list(
+ `b[1]` = fd$params$coefs$det_intercept,
+ `b[2]` = fd$params$coefs$det_slope_unit,
+ `b[3]` = fd$params$coefs$det_slope_visit,
+ `b_colo[1]` = fd$params$coefs$col_intercept,
+ `b_colo[2]` = fd$params$coefs$col_slope_unit,
+ `b_ex[1]` = fd$params$coefs$ex_intercept,
+ `b_ex[2]` = fd$params$coefs$ex_slope_unit
+ ),
+ generated = flocker_standata(
+ f_col = ~ 0 + Intercept + uc1,
+ f_ex = ~ 0 + Intercept + uc1,
+ f_det = ~ 0 + Intercept + uc1 + ec1,
+ flocker_data = flocker_data,
+ multiseason = "colex",
+ multi_init = "equilibrium"
+ )
+ )
+}
+
+colex_eq_gen <- SBC_generator_function(
+ colex_eq_generator,
+ N = params$n_sites
+ )
+colex_eq_dataset <- suppressMessages(
+ generate_datasets(colex_eq_gen, params$n_sims)
+)
+
+colex_eq_backend <-
+ SBC_backend_cmdstan_sample(
+ cmdstanr::cmdstan_model(
+ paste0(tempdir(), "/sbc_colex_eq_model.stan")
+ )
+ )
+
+colex_eq_results <- compute_SBC(colex_eq_dataset, colex_eq_backend)
+```
+
+```
+## - 843 (84%) fits had steps rejected. Maximum number of steps rejected was 4.
+```
+
+```
+## Not all diagnostics are OK.
+## You can learn more by inspecting $default_diagnostics, $backend_diagnostics
+## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.
+```
+
+``` r
+plot_ecdf(colex_eq_results)
+```
+
+
+
+``` r
+plot_rank_hist(colex_eq_results)
+```
+
+
+
+``` r
+plot_ecdf_diff(colex_eq_results)
+```
+
+
+
+### Autologistic, explicit initial occupancy
+
+``` r
+# make the stancode
+model_name <- paste0(tempdir(), "/sbc_auto_ex_model.stan")
+fd <- simulate_flocker_data(
+ n_pt = params$n_sites, n_sp = 1, n_season = 4,
+ params = list(
+ coefs = data.frame(
+ det_intercept = rnorm(1),
+ det_slope_unit = rnorm(1),
+ det_slope_visit = rnorm(1),
+ occ_intercept = rnorm(1),
+ occ_slope_unit = rnorm(1),
+ col_intercept = rnorm(1),
+ col_slope_unit = rnorm(1),
+ auto_intercept = rnorm(1),
+ auto_slope_unit = rnorm(1)
+ )
+ ),
+ seed = NULL,
+ rep_constant = FALSE,
+ multiseason = "autologistic",
+ multi_init = "explicit",
+ ragged_rep = TRUE
+)
+flocker_data = make_flocker_data(
+ fd$obs, fd$unit_covs, fd$event_covs,
+ type = "multi", quiet = TRUE)
+
+scode <- flocker_stancode(
+ f_occ = ~ 0 + Intercept + uc1,
+ f_col = ~ 0 + Intercept + uc1,
+ f_auto = ~ 0 + Intercept + uc1,
+ f_det = ~ 0 + Intercept + uc1 + ec1,
+ flocker_data = flocker_data,
+ prior =
+ brms::set_prior("std_normal()") +
+ brms::set_prior("std_normal()", dpar = "occ") +
+ brms::set_prior("std_normal()", dpar = "colo") +
+ brms::set_prior("std_normal()", dpar = "autologistic"),
+ multiseason = "autologistic",
+ multi_init = "explicit",
+ backend = "cmdstanr"
+ )
+writeLines(scode, model_name)
+
+auto_ex_generator <- function(N){
+ fd <- simulate_flocker_data(
+ n_pt = N, n_sp = 1, n_season = 4,
+ params = list(
+ coefs = data.frame(
+ det_intercept = rnorm(1),
+ det_slope_unit = rnorm(1),
+ det_slope_visit = rnorm(1),
+ occ_intercept = rnorm(1),
+ occ_slope_unit = rnorm(1),
+ col_intercept = rnorm(1),
+ col_slope_unit = rnorm(1),
+ auto_intercept = rnorm(1),
+ auto_slope_unit = rnorm(1)
+ )
+ ),
+ seed = NULL,
+ rep_constant = FALSE,
+ multiseason = "autologistic",
+ multi_init = "explicit",
+ ragged_rep = TRUE
+ )
+
+ flocker_data = make_flocker_data(
+ fd$obs, fd$unit_covs, fd$event_covs,
+ type = "multi", quiet = TRUE)
+
+ # format for return
+ list(
+ variables = list(
+ `b[1]` = fd$params$coefs$det_intercept,
+ `b[2]` = fd$params$coefs$det_slope_unit,
+ `b[3]` = fd$params$coefs$det_slope_visit,
+ `b_occ[1]` = fd$params$coefs$occ_intercept,
+ `b_occ[2]` = fd$params$coefs$occ_slope_unit,
+ `b_colo[1]` = fd$params$coefs$col_intercept,
+ `b_colo[2]` = fd$params$coefs$col_slope_unit,
+ `b_autologistic[1]` = fd$params$coefs$auto_intercept,
+ `b_autologistic[2]` = fd$params$coefs$auto_slope_unit
+ ),
+ generated = flocker_standata(
+ f_occ = ~ 0 + Intercept + uc1,
+ f_col = ~ 0 + Intercept + uc1,
+ f_auto = ~ 0 + Intercept + uc1,
+ f_det = ~ 0 + Intercept + uc1 + ec1,
+ flocker_data = flocker_data,
+ multiseason = "autologistic",
+ multi_init = "explicit"
+ )
+ )
+}
+
+auto_ex_gen <- SBC_generator_function(
+ auto_ex_generator,
+ N = params$n_sites
+ )
+auto_ex_dataset <- suppressMessages(
+ generate_datasets(auto_ex_gen, params$n_sims)
+)
+
+auto_ex_backend <-
+ SBC_backend_cmdstan_sample(
+ cmdstanr::cmdstan_model(
+ paste0(tempdir(), "/sbc_auto_ex_model.stan")
+ )
+ )
+
+auto_ex_results <- compute_SBC(auto_ex_dataset, auto_ex_backend)
+
+plot_ecdf(auto_ex_results)
+```
+
+
+
+``` r
+plot_rank_hist(auto_ex_results)
+```
+
+
+
+``` r
+plot_ecdf_diff(auto_ex_results)
+```
+
+
+
+### Autologistic, equilibrium initial occupancy
+
+``` r
+# make the stancode
+model_name <- paste0(tempdir(), "/sbc_auto_eq_model.stan")
+fd <- simulate_flocker_data(
+ n_pt = params$n_sites, n_sp = 1, n_season = 4,
+ params = list(
+ coefs = data.frame(
+ det_intercept = rnorm(1),
+ det_slope_unit = rnorm(1),
+ det_slope_visit = rnorm(1),
+ col_intercept = rnorm(1),
+ col_slope_unit = rnorm(1),
+ auto_intercept = rnorm(1),
+ auto_slope_unit = rnorm(1)
+ )
+ ),
+ seed = NULL,
+ rep_constant = FALSE,
+ multiseason = "autologistic",
+ multi_init = "equilibrium",
+ ragged_rep = TRUE
+)
+flocker_data = make_flocker_data(
+ fd$obs, fd$unit_covs, fd$event_covs,
+ type = "multi", quiet = TRUE)
+
+scode <- flocker_stancode(
+ f_col = ~ 0 + Intercept + uc1,
+ f_auto = ~ 0 + Intercept + uc1,
+ f_det = ~ 0 + Intercept + uc1 + ec1,
+ flocker_data = flocker_data,
+ prior =
+ brms::set_prior("std_normal()") +
+ brms::set_prior("std_normal()", dpar = "colo") +
+ brms::set_prior("std_normal()", dpar = "autologistic"),
+ multiseason = "autologistic",
+ multi_init = "equilibrium",
+ backend = "cmdstanr"
+ )
+writeLines(scode, model_name)
+
+auto_eq_generator <- function(N){
+ fd <- simulate_flocker_data(
+ n_pt = N, n_sp = 1, n_season = 4,
+ params = list(
+ coefs = data.frame(
+ det_intercept = rnorm(1),
+ det_slope_unit = rnorm(1),
+ det_slope_visit = rnorm(1),
+ col_intercept = rnorm(1),
+ col_slope_unit = rnorm(1),
+ auto_intercept = rnorm(1),
+ auto_slope_unit = rnorm(1)
+ )
+ ),
+ seed = NULL,
+ rep_constant = FALSE,
+ multiseason = "autologistic",
+ multi_init = "equilibrium",
+ ragged_rep = TRUE
+ )
+
+ flocker_data = make_flocker_data(
+ fd$obs, fd$unit_covs, fd$event_covs,
+ type = "multi", quiet = TRUE)
+
+ # format for return
+ list(
+ variables = list(
+ `b[1]` = fd$params$coefs$det_intercept,
+ `b[2]` = fd$params$coefs$det_slope_unit,
+ `b[3]` = fd$params$coefs$det_slope_visit,
+ `b_colo[1]` = fd$params$coefs$col_intercept,
+ `b_colo[2]` = fd$params$coefs$col_slope_unit,
+ `b_autologistic[1]` = fd$params$coefs$auto_intercept,
+ `b_autologistic[2]` = fd$params$coefs$auto_slope_unit
+ ),
+ generated = flocker_standata(
+ f_col = ~ 0 + Intercept + uc1,
+ f_auto = ~ 0 + Intercept + uc1,
+ f_det = ~ 0 + Intercept + uc1 + ec1,
+ flocker_data = flocker_data,
+ multiseason = "autologistic",
+ multi_init = "equilibrium"
+ )
+ )
+}
+
+auto_eq_gen <- SBC_generator_function(
+ auto_eq_generator,
+ N = params$n_sites
+ )
+auto_eq_dataset <- suppressMessages(
+ generate_datasets(auto_eq_gen, params$n_sims)
+)
+
+auto_eq_backend <-
+ SBC_backend_cmdstan_sample(
+ cmdstanr::cmdstan_model(
+ paste0(tempdir(), "/sbc_auto_eq_model.stan")
+ )
+ )
+
+auto_eq_results <- compute_SBC(auto_eq_dataset, auto_eq_backend)
+```
+
+```
+## - 346 (35%) fits had steps rejected. Maximum number of steps rejected was 3.
+```
+
+```
+## - 1 (0%) fits had tail ESS / maximum rank < 0.5. Minimum tail ESS / maximum rank was 0.45. This potentially skews the rank statistics.
+## If the fits look good otherwise, increasing `thin_ranks` (via recompute_SBC_statistics)
+## or number of posterior draws (by refitting) might help.
+```
+
+```
+## Not all diagnostics are OK.
+## You can learn more by inspecting $default_diagnostics, $backend_diagnostics
+## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.
+```
+
+``` r
+plot_ecdf(auto_eq_results)
+```
+
+
+
+``` r
+plot_rank_hist(auto_eq_results)
+```
+
+
+
+``` r
+plot_ecdf_diff(auto_eq_results)
+```
+
+
+
diff --git a/vignettes/articles/sbc_multi.Rmd.orig b/vignettes/articles/sbc_multi.Rmd.orig
new file mode 100644
index 0000000..da7f2f6
--- /dev/null
+++ b/vignettes/articles/sbc_multi.Rmd.orig
@@ -0,0 +1,530 @@
+---
+title: "SBC for multi-season flocker models"
+author: "Jacob Socolar"
+date: "`r Sys.Date()`"
+output: rmarkdown::html_vignette
+vignette: >
+ %\VignetteIndexEntry{SBC for multi-season flocker models}
+ %\VignetteEngine{knitr::rmarkdown}
+ %\VignetteEncoding{UTF-8}
+params:
+ n_sims: 1000
+ n_sites: 200
+---
+
+```{r setup, include=FALSE}
+knitr::opts_chunk$set(
+ echo = TRUE,
+ fig.path = "man/figures/sbc_vignette/",
+ fig.retina = 2
+)
+dir.create("man/figures/sbc_vignette", recursive = TRUE, showWarnings = FALSE)
+
+```
+
+### Rmd parameters
+```{r display_params, echo = FALSE}
+data.frame(
+ Parameter = names(params),
+ value = unlist(params, use.names = FALSE),
+ row.names = NULL
+)
+
+```
+
+```{r setup2, message = FALSE}
+library(flocker)
+library(brms)
+library(SBC)
+set.seed(1)
+```
+
+## Overview
+
+This document is one of a series of simulation-based calibration exercieses for
+models available in R package `flocker`. Here, our goal is to validate `flocker`'s
+data formatting, decoding, and likelihood implementations, and not `brms`'s
+construction of the linear predictors.
+
+The encoding of the data for a `flocker` model tends to be more complex in the
+presence of missing observations, and so we include missingness in the data
+simulation wherever possible (some visits missing in all models, some time-steps
+missing in multiseason models).
+
+In all models, we include one unit covariate that affects detection and
+occupancy, colonization, extinction and/or autologistic terms as applicable,
+and one event covariate that affects detection only (for all models except the
+rep-constant).
+
+
+## Multi-season
+`flocker` fits multi-season models that parameterize the dynamics using colonization/extinction or autologistic specifications, and that parameterize
+the initial occupancy state using explicit and equilibrium parameterizations,
+for a total of four classes of multi-season model. We validate each class.
+
+### Colonization-extinction, explicit initial occupancy
+```{r multi-colex-ex}
+# make the stancode
+model_name <- paste0(tempdir(), "/sbc_colex_ex_model.stan")
+fd <- simulate_flocker_data(
+ n_pt = params$n_sites, n_sp = 1, n_season = 4,
+ params = list(
+ coefs = data.frame(
+ det_intercept = rnorm(1),
+ det_slope_unit = rnorm(1),
+ det_slope_visit = rnorm(1),
+ occ_intercept = rnorm(1),
+ occ_slope_unit = rnorm(1),
+ col_intercept = rnorm(1),
+ col_slope_unit = rnorm(1),
+ ex_intercept = rnorm(1),
+ ex_slope_unit = rnorm(1)
+ )
+ ),
+ seed = NULL,
+ rep_constant = FALSE,
+ multiseason = "colex",
+ multi_init = "explicit",
+ ragged_rep = TRUE
+)
+flocker_data = make_flocker_data(
+ fd$obs, fd$unit_covs, fd$event_covs,
+ type = "multi", quiet = TRUE)
+
+scode <- flocker_stancode(
+ f_occ = ~ 0 + Intercept + uc1,
+ f_col = ~ 0 + Intercept + uc1,
+ f_ex = ~ 0 + Intercept + uc1,
+ f_det = ~ 0 + Intercept + uc1 + ec1,
+ flocker_data = flocker_data,
+ prior =
+ brms::set_prior("std_normal()") +
+ brms::set_prior("std_normal()", dpar = "occ") +
+ brms::set_prior("std_normal()", dpar = "colo") +
+ brms::set_prior("std_normal()", dpar = "ex"),
+ multiseason = "colex",
+ multi_init = "explicit",
+ backend = "cmdstanr"
+ )
+writeLines(scode, model_name)
+
+colex_ex_generator <- function(N){
+ fd <- simulate_flocker_data(
+ n_pt = N, n_sp = 1, n_season = 4,
+ params = list(
+ coefs = data.frame(
+ det_intercept = rnorm(1),
+ det_slope_unit = rnorm(1),
+ det_slope_visit = rnorm(1),
+ occ_intercept = rnorm(1),
+ occ_slope_unit = rnorm(1),
+ col_intercept = rnorm(1),
+ col_slope_unit = rnorm(1),
+ ex_intercept = rnorm(1),
+ ex_slope_unit = rnorm(1)
+ )
+ ),
+ seed = NULL,
+ rep_constant = FALSE,
+ multiseason = "colex",
+ multi_init = "explicit",
+ ragged_rep = TRUE
+ )
+
+ flocker_data = make_flocker_data(
+ fd$obs, fd$unit_covs, fd$event_covs,
+ type = "multi", quiet = TRUE)
+
+ # format for return
+ list(
+ variables = list(
+ `b[1]` = fd$params$coefs$det_intercept,
+ `b[2]` = fd$params$coefs$det_slope_unit,
+ `b[3]` = fd$params$coefs$det_slope_visit,
+ `b_occ[1]` = fd$params$coefs$occ_intercept,
+ `b_occ[2]` = fd$params$coefs$occ_slope_unit,
+ `b_colo[1]` = fd$params$coefs$col_intercept,
+ `b_colo[2]` = fd$params$coefs$col_slope_unit,
+ `b_ex[1]` = fd$params$coefs$ex_intercept,
+ `b_ex[2]` = fd$params$coefs$ex_slope_unit
+ ),
+ generated = flocker_standata(
+ f_occ = ~ 0 + Intercept + uc1,
+ f_col = ~ 0 + Intercept + uc1,
+ f_ex = ~ 0 + Intercept + uc1,
+ f_det = ~ 0 + Intercept + uc1 + ec1,
+ flocker_data = flocker_data,
+ multiseason = "colex",
+ multi_init = "explicit"
+ )
+ )
+}
+
+colex_ex_gen <- SBC_generator_function(
+ colex_ex_generator,
+ N = params$n_sites
+ )
+colex_ex_dataset <- suppressMessages(
+ generate_datasets(colex_ex_gen, params$n_sims)
+)
+
+colex_ex_backend <-
+ SBC_backend_cmdstan_sample(
+ cmdstanr::cmdstan_model(
+ paste0(tempdir(), "/sbc_colex_ex_model.stan")
+ )
+ )
+
+colex_ex_results <- compute_SBC(colex_ex_dataset, colex_ex_backend)
+
+plot_ecdf(colex_ex_results)
+plot_rank_hist(colex_ex_results)
+plot_ecdf_diff(colex_ex_results)
+
+```
+
+### Colonization-extinction, equilibrium initial occupancy
+```{r multi-colex-eq, eval = TRUE}
+# make the stancode
+model_name <- paste0(tempdir(), "/sbc_colex_eq_model.stan")
+fd <- simulate_flocker_data(
+ n_pt = params$n_sites, n_sp = 1, n_season = 4,
+ params = list(
+ coefs = data.frame(
+ det_intercept = rnorm(1),
+ det_slope_unit = rnorm(1),
+ det_slope_visit = rnorm(1),
+ col_intercept = rnorm(1),
+ col_slope_unit = rnorm(1),
+ ex_intercept = rnorm(1),
+ ex_slope_unit = rnorm(1)
+ )
+ ),
+ seed = NULL,
+ rep_constant = FALSE,
+ multiseason = "colex",
+ multi_init = "equilibrium",
+ ragged_rep = TRUE
+)
+flocker_data = make_flocker_data(
+ fd$obs, fd$unit_covs, fd$event_covs,
+ type = "multi", quiet = TRUE)
+
+scode <- flocker_stancode(
+ f_col = ~ 0 + Intercept + uc1,
+ f_ex = ~ 0 + Intercept + uc1,
+ f_det = ~ 0 + Intercept + uc1 + ec1,
+ flocker_data = flocker_data,
+ prior =
+ brms::set_prior("std_normal()") +
+ brms::set_prior("std_normal()", dpar = "colo") +
+ brms::set_prior("std_normal()", dpar = "ex"),
+ multiseason = "colex",
+ multi_init = "equilibrium",
+ backend = "cmdstanr"
+ )
+writeLines(scode, model_name)
+
+colex_eq_generator <- function(N){
+ fd <- simulate_flocker_data(
+ n_pt = N, n_sp = 1, n_season = 4,
+ params = list(
+ coefs = data.frame(
+ det_intercept = rnorm(1),
+ det_slope_unit = rnorm(1),
+ det_slope_visit = rnorm(1),
+ col_intercept = rnorm(1),
+ col_slope_unit = rnorm(1),
+ ex_intercept = rnorm(1),
+ ex_slope_unit = rnorm(1)
+ )
+ ),
+ seed = NULL,
+ rep_constant = FALSE,
+ multiseason = "colex",
+ multi_init = "equilibrium",
+ ragged_rep = TRUE
+ )
+
+ flocker_data = make_flocker_data(
+ fd$obs, fd$unit_covs, fd$event_covs,
+ type = "multi", quiet = TRUE)
+
+ # format for return
+ list(
+ variables = list(
+ `b[1]` = fd$params$coefs$det_intercept,
+ `b[2]` = fd$params$coefs$det_slope_unit,
+ `b[3]` = fd$params$coefs$det_slope_visit,
+ `b_colo[1]` = fd$params$coefs$col_intercept,
+ `b_colo[2]` = fd$params$coefs$col_slope_unit,
+ `b_ex[1]` = fd$params$coefs$ex_intercept,
+ `b_ex[2]` = fd$params$coefs$ex_slope_unit
+ ),
+ generated = flocker_standata(
+ f_col = ~ 0 + Intercept + uc1,
+ f_ex = ~ 0 + Intercept + uc1,
+ f_det = ~ 0 + Intercept + uc1 + ec1,
+ flocker_data = flocker_data,
+ multiseason = "colex",
+ multi_init = "equilibrium"
+ )
+ )
+}
+
+colex_eq_gen <- SBC_generator_function(
+ colex_eq_generator,
+ N = params$n_sites
+ )
+colex_eq_dataset <- suppressMessages(
+ generate_datasets(colex_eq_gen, params$n_sims)
+)
+
+colex_eq_backend <-
+ SBC_backend_cmdstan_sample(
+ cmdstanr::cmdstan_model(
+ paste0(tempdir(), "/sbc_colex_eq_model.stan")
+ )
+ )
+
+colex_eq_results <- compute_SBC(colex_eq_dataset, colex_eq_backend)
+
+plot_ecdf(colex_eq_results)
+plot_rank_hist(colex_eq_results)
+plot_ecdf_diff(colex_eq_results)
+
+```
+
+### Autologistic, explicit initial occupancy
+```{r multi-auto-ex, eval = TRUE}
+# make the stancode
+model_name <- paste0(tempdir(), "/sbc_auto_ex_model.stan")
+fd <- simulate_flocker_data(
+ n_pt = params$n_sites, n_sp = 1, n_season = 4,
+ params = list(
+ coefs = data.frame(
+ det_intercept = rnorm(1),
+ det_slope_unit = rnorm(1),
+ det_slope_visit = rnorm(1),
+ occ_intercept = rnorm(1),
+ occ_slope_unit = rnorm(1),
+ col_intercept = rnorm(1),
+ col_slope_unit = rnorm(1),
+ auto_intercept = rnorm(1),
+ auto_slope_unit = rnorm(1)
+ )
+ ),
+ seed = NULL,
+ rep_constant = FALSE,
+ multiseason = "autologistic",
+ multi_init = "explicit",
+ ragged_rep = TRUE
+)
+flocker_data = make_flocker_data(
+ fd$obs, fd$unit_covs, fd$event_covs,
+ type = "multi", quiet = TRUE)
+
+scode <- flocker_stancode(
+ f_occ = ~ 0 + Intercept + uc1,
+ f_col = ~ 0 + Intercept + uc1,
+ f_auto = ~ 0 + Intercept + uc1,
+ f_det = ~ 0 + Intercept + uc1 + ec1,
+ flocker_data = flocker_data,
+ prior =
+ brms::set_prior("std_normal()") +
+ brms::set_prior("std_normal()", dpar = "occ") +
+ brms::set_prior("std_normal()", dpar = "colo") +
+ brms::set_prior("std_normal()", dpar = "autologistic"),
+ multiseason = "autologistic",
+ multi_init = "explicit",
+ backend = "cmdstanr"
+ )
+writeLines(scode, model_name)
+
+auto_ex_generator <- function(N){
+ fd <- simulate_flocker_data(
+ n_pt = N, n_sp = 1, n_season = 4,
+ params = list(
+ coefs = data.frame(
+ det_intercept = rnorm(1),
+ det_slope_unit = rnorm(1),
+ det_slope_visit = rnorm(1),
+ occ_intercept = rnorm(1),
+ occ_slope_unit = rnorm(1),
+ col_intercept = rnorm(1),
+ col_slope_unit = rnorm(1),
+ auto_intercept = rnorm(1),
+ auto_slope_unit = rnorm(1)
+ )
+ ),
+ seed = NULL,
+ rep_constant = FALSE,
+ multiseason = "autologistic",
+ multi_init = "explicit",
+ ragged_rep = TRUE
+ )
+
+ flocker_data = make_flocker_data(
+ fd$obs, fd$unit_covs, fd$event_covs,
+ type = "multi", quiet = TRUE)
+
+ # format for return
+ list(
+ variables = list(
+ `b[1]` = fd$params$coefs$det_intercept,
+ `b[2]` = fd$params$coefs$det_slope_unit,
+ `b[3]` = fd$params$coefs$det_slope_visit,
+ `b_occ[1]` = fd$params$coefs$occ_intercept,
+ `b_occ[2]` = fd$params$coefs$occ_slope_unit,
+ `b_colo[1]` = fd$params$coefs$col_intercept,
+ `b_colo[2]` = fd$params$coefs$col_slope_unit,
+ `b_autologistic[1]` = fd$params$coefs$auto_intercept,
+ `b_autologistic[2]` = fd$params$coefs$auto_slope_unit
+ ),
+ generated = flocker_standata(
+ f_occ = ~ 0 + Intercept + uc1,
+ f_col = ~ 0 + Intercept + uc1,
+ f_auto = ~ 0 + Intercept + uc1,
+ f_det = ~ 0 + Intercept + uc1 + ec1,
+ flocker_data = flocker_data,
+ multiseason = "autologistic",
+ multi_init = "explicit"
+ )
+ )
+}
+
+auto_ex_gen <- SBC_generator_function(
+ auto_ex_generator,
+ N = params$n_sites
+ )
+auto_ex_dataset <- suppressMessages(
+ generate_datasets(auto_ex_gen, params$n_sims)
+)
+
+auto_ex_backend <-
+ SBC_backend_cmdstan_sample(
+ cmdstanr::cmdstan_model(
+ paste0(tempdir(), "/sbc_auto_ex_model.stan")
+ )
+ )
+
+auto_ex_results <- compute_SBC(auto_ex_dataset, auto_ex_backend)
+
+plot_ecdf(auto_ex_results)
+plot_rank_hist(auto_ex_results)
+plot_ecdf_diff(auto_ex_results)
+
+```
+
+### Autologistic, equilibrium initial occupancy
+```{r multi-auto-eq, eval = TRUE}
+# make the stancode
+model_name <- paste0(tempdir(), "/sbc_auto_eq_model.stan")
+fd <- simulate_flocker_data(
+ n_pt = params$n_sites, n_sp = 1, n_season = 4,
+ params = list(
+ coefs = data.frame(
+ det_intercept = rnorm(1),
+ det_slope_unit = rnorm(1),
+ det_slope_visit = rnorm(1),
+ col_intercept = rnorm(1),
+ col_slope_unit = rnorm(1),
+ auto_intercept = rnorm(1),
+ auto_slope_unit = rnorm(1)
+ )
+ ),
+ seed = NULL,
+ rep_constant = FALSE,
+ multiseason = "autologistic",
+ multi_init = "equilibrium",
+ ragged_rep = TRUE
+)
+flocker_data = make_flocker_data(
+ fd$obs, fd$unit_covs, fd$event_covs,
+ type = "multi", quiet = TRUE)
+
+scode <- flocker_stancode(
+ f_col = ~ 0 + Intercept + uc1,
+ f_auto = ~ 0 + Intercept + uc1,
+ f_det = ~ 0 + Intercept + uc1 + ec1,
+ flocker_data = flocker_data,
+ prior =
+ brms::set_prior("std_normal()") +
+ brms::set_prior("std_normal()", dpar = "colo") +
+ brms::set_prior("std_normal()", dpar = "autologistic"),
+ multiseason = "autologistic",
+ multi_init = "equilibrium",
+ backend = "cmdstanr"
+ )
+writeLines(scode, model_name)
+
+auto_eq_generator <- function(N){
+ fd <- simulate_flocker_data(
+ n_pt = N, n_sp = 1, n_season = 4,
+ params = list(
+ coefs = data.frame(
+ det_intercept = rnorm(1),
+ det_slope_unit = rnorm(1),
+ det_slope_visit = rnorm(1),
+ col_intercept = rnorm(1),
+ col_slope_unit = rnorm(1),
+ auto_intercept = rnorm(1),
+ auto_slope_unit = rnorm(1)
+ )
+ ),
+ seed = NULL,
+ rep_constant = FALSE,
+ multiseason = "autologistic",
+ multi_init = "equilibrium",
+ ragged_rep = TRUE
+ )
+
+ flocker_data = make_flocker_data(
+ fd$obs, fd$unit_covs, fd$event_covs,
+ type = "multi", quiet = TRUE)
+
+ # format for return
+ list(
+ variables = list(
+ `b[1]` = fd$params$coefs$det_intercept,
+ `b[2]` = fd$params$coefs$det_slope_unit,
+ `b[3]` = fd$params$coefs$det_slope_visit,
+ `b_colo[1]` = fd$params$coefs$col_intercept,
+ `b_colo[2]` = fd$params$coefs$col_slope_unit,
+ `b_autologistic[1]` = fd$params$coefs$auto_intercept,
+ `b_autologistic[2]` = fd$params$coefs$auto_slope_unit
+ ),
+ generated = flocker_standata(
+ f_col = ~ 0 + Intercept + uc1,
+ f_auto = ~ 0 + Intercept + uc1,
+ f_det = ~ 0 + Intercept + uc1 + ec1,
+ flocker_data = flocker_data,
+ multiseason = "autologistic",
+ multi_init = "equilibrium"
+ )
+ )
+}
+
+auto_eq_gen <- SBC_generator_function(
+ auto_eq_generator,
+ N = params$n_sites
+ )
+auto_eq_dataset <- suppressMessages(
+ generate_datasets(auto_eq_gen, params$n_sims)
+)
+
+auto_eq_backend <-
+ SBC_backend_cmdstan_sample(
+ cmdstanr::cmdstan_model(
+ paste0(tempdir(), "/sbc_auto_eq_model.stan")
+ )
+ )
+
+auto_eq_results <- compute_SBC(auto_eq_dataset, auto_eq_backend)
+
+plot_ecdf(auto_eq_results)
+plot_rank_hist(auto_eq_results)
+plot_ecdf_diff(auto_eq_results)
+
+```
+
diff --git a/vignettes/augmented_models.Rmd b/vignettes/augmented_models.Rmd
index 4c03920..de1f639 100644
--- a/vignettes/augmented_models.Rmd
+++ b/vignettes/augmented_models.Rmd
@@ -1,7 +1,7 @@
---
title: "Data-augmented models in flocker"
author: "Jacob Socolar"
-date: "2023-10-20"
+date: "2026-02-24"
output: rmarkdown::html_vignette
vignette: >
%\VignetteIndexEntry{Data-augmented models in flocker}
@@ -27,366 +27,25 @@ where $s$ indexes the species, $\omega$ is the fitted availability probability o
Fitting the data-augmented model in `flocker` requires passing the observed data as a three-dimensional array with sites along the first dimension, visits along the second, and species along the third. Additionally, we must supply the `n_aug` argument to `make_flocker_data()`, specifying how many all-zero pseudospecies to augment the data with.
-```
-#> Formatting data for a single-season multispecies occupancy model with data augmentation for never-observed species. For details, see make_flocker_data_augmented. All warnings and error messages should be interpreted in the context of make_flocker_data_augmented
-#> formatting rep indices
-#>
- |
- | | 0%
- |
- | | 1%
- |
- |= | 1%
- |
- |= | 2%
- |
- |== | 2%
- |
- |== | 3%
- |
- |=== | 3%
- |
- |=== | 4%
- |
- |=== | 5%
- |
- |==== | 5%
- |
- |==== | 6%
- |
- |===== | 6%
- |
- |===== | 7%
- |
- |====== | 7%
- |
- |====== | 8%
- |
- |====== | 9%
- |
- |======= | 9%
- |
- |======= | 10%
- |
- |======== | 10%
- |
- |======== | 11%
- |
- |========= | 11%
- |
- |========= | 12%
- |
- |========== | 12%
- |
- |========== | 13%
- |
- |========== | 14%
- |
- |=========== | 14%
- |
- |=========== | 15%
- |
- |============ | 15%
- |
- |============ | 16%
- |
- |============= | 16%
- |
- |============= | 17%
- |
- |============= | 18%
- |
- |============== | 18%
- |
- |============== | 19%
- |
- |=============== | 19%
- |
- |=============== | 20%
- |
- |================ | 20%
- |
- |================ | 21%
- |
- |================ | 22%
- |
- |================= | 22%
- |
- |================= | 23%
- |
- |================== | 23%
- |
- |================== | 24%
- |
- |=================== | 24%
- |
- |=================== | 25%
- |
- |=================== | 26%
- |
- |==================== | 26%
- |
- |==================== | 27%
- |
- |===================== | 27%
- |
- |===================== | 28%
- |
- |====================== | 28%
- |
- |====================== | 29%
- |
- |====================== | 30%
- |
- |======================= | 30%
- |
- |======================= | 31%
- |
- |======================== | 31%
- |
- |======================== | 32%
- |
- |========================= | 32%
- |
- |========================= | 33%
- |
- |========================= | 34%
- |
- |========================== | 34%
- |
- |========================== | 35%
- |
- |=========================== | 35%
- |
- |=========================== | 36%
- |
- |============================ | 36%
- |
- |============================ | 37%
- |
- |============================ | 38%
- |
- |============================= | 38%
- |
- |============================= | 39%
- |
- |============================== | 39%
- |
- |============================== | 40%
- |
- |=============================== | 40%
- |
- |=============================== | 41%
- |
- |================================ | 41%
- |
- |================================ | 42%
- |
- |================================ | 43%
- |
- |================================= | 43%
- |
- |================================= | 44%
- |
- |================================== | 44%
- |
- |================================== | 45%
- |
- |=================================== | 45%
- |
- |=================================== | 46%
- |
- |=================================== | 47%
- |
- |==================================== | 47%
- |
- |==================================== | 48%
- |
- |===================================== | 48%
- |
- |===================================== | 49%
- |
- |====================================== | 49%
- |
- |====================================== | 50%
- |
- |====================================== | 51%
- |
- |======================================= | 51%
- |
- |======================================= | 52%
- |
- |======================================== | 52%
- |
- |======================================== | 53%
- |
- |========================================= | 53%
- |
- |========================================= | 54%
- |
- |========================================= | 55%
- |
- |========================================== | 55%
- |
- |========================================== | 56%
- |
- |=========================================== | 56%
- |
- |=========================================== | 57%
- |
- |============================================ | 57%
- |
- |============================================ | 58%
- |
- |============================================ | 59%
- |
- |============================================= | 59%
- |
- |============================================= | 60%
- |
- |============================================== | 60%
- |
- |============================================== | 61%
- |
- |=============================================== | 61%
- |
- |=============================================== | 62%
- |
- |================================================ | 62%
- |
- |================================================ | 63%
- |
- |================================================ | 64%
- |
- |================================================= | 64%
- |
- |================================================= | 65%
- |
- |================================================== | 65%
- |
- |================================================== | 66%
- |
- |=================================================== | 66%
- |
- |=================================================== | 67%
- |
- |=================================================== | 68%
- |
- |==================================================== | 68%
- |
- |==================================================== | 69%
- |
- |===================================================== | 69%
- |
- |===================================================== | 70%
- |
- |====================================================== | 70%
- |
- |====================================================== | 71%
- |
- |====================================================== | 72%
- |
- |======================================================= | 72%
- |
- |======================================================= | 73%
- |
- |======================================================== | 73%
- |
- |======================================================== | 74%
- |
- |========================================================= | 74%
- |
- |========================================================= | 75%
- |
- |========================================================= | 76%
- |
- |========================================================== | 76%
- |
- |========================================================== | 77%
- |
- |=========================================================== | 77%
- |
- |=========================================================== | 78%
- |
- |============================================================ | 78%
- |
- |============================================================ | 79%
- |
- |============================================================ | 80%
- |
- |============================================================= | 80%
- |
- |============================================================= | 81%
- |
- |============================================================== | 81%
- |
- |============================================================== | 82%
- |
- |=============================================================== | 82%
- |
- |=============================================================== | 83%
- |
- |=============================================================== | 84%
- |
- |================================================================ | 84%
- |
- |================================================================ | 85%
- |
- |================================================================= | 85%
- |
- |================================================================= | 86%
- |
- |================================================================== | 86%
- |
- |================================================================== | 87%
- |
- |================================================================== | 88%
- |
- |=================================================================== | 88%
- |
- |=================================================================== | 89%
- |
- |==================================================================== | 89%
- |
- |==================================================================== | 90%
- |
- |===================================================================== | 90%
- |
- |===================================================================== | 91%
- |
- |====================================================================== | 91%
- |
- |====================================================================== | 92%
- |
- |====================================================================== | 93%
- |
- |======================================================================= | 93%
- |
- |======================================================================= | 94%
- |
- |======================================================================== | 94%
- |
- |======================================================================== | 95%
- |
- |========================================================================= | 95%
- |
- |========================================================================= | 96%
- |
- |========================================================================= | 97%
- |
- |========================================================================== | 97%
- |
- |========================================================================== | 98%
- |
- |=========================================================================== | 98%
- |
- |=========================================================================== | 99%
- |
- |============================================================================| 99%
- |
- |============================================================================| 100%
-#> Compiling Stan program...
-#> Start sampling
+``` r
+set.seed(2)
+library(flocker)
+d <- simulate_flocker_data(
+ augmented = TRUE
+ )
+fd = make_flocker_data(
+ d$obs, d$unit_covs, d$event_covs,
+ type = "augmented", n_aug = 100,
+ quiet = TRUE
+ )
+fm <- flock(
+ f_occ = ~ (1 | ff_species),
+ f_det = ~ uc1 + ec1 + (1 + uc1 + ec1 | ff_species),
+ flocker_data = fd,
+ augmented = TRUE,
+ cores = 4,
+ refresh = 0
+ )
#> Warning: Bulk Effective Samples Size (ESS) is too low, indicating posterior means and medians may be unreliable.
#> Running the chains for more iterations may help. See
#> https://mc-stan.org/misc/warnings.html#bulk-ess
@@ -395,7 +54,7 @@ Fitting the data-augmented model in `flocker` requires passing the observed data
Here, the random effect of species is specified using the special grouping keyword `ff_species` (names beginning with `ff_` are reserved in `flocker` and are not allowed as names for user-supplied covariates).
-```r
+``` r
summary(fm)
#> Family: occupancy_augmented
#> Links: mu = identity; occ = identity; Omega = identity
@@ -406,24 +65,24 @@ summary(fm)
#> Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
#> total post-warmup draws = 4000
#>
-#> Group-Level Effects:
+#> Multilevel Hyperparameters:
#> ~ff_species (Number of levels: 128)
#> Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
-#> sd(Intercept) 1.59 0.26 1.16 2.17 1.00 753 1572
-#> sd(uc1) 0.25 0.11 0.06 0.48 1.00 1303 1041
-#> sd(ec1) 0.39 0.09 0.23 0.60 1.00 2010 2878
-#> sd(occ_Intercept) 1.59 0.32 1.09 2.36 1.01 881 1406
-#> cor(Intercept,uc1) 0.74 0.22 0.14 0.97 1.00 2155 1983
-#> cor(Intercept,ec1) 0.57 0.19 0.11 0.85 1.00 2915 2617
-#> cor(uc1,ec1) 0.58 0.26 -0.05 0.94 1.01 913 1378
+#> sd(Intercept) 1.58 0.27 1.15 2.19 1.00 961 1454
+#> sd(uc1) 0.25 0.11 0.06 0.49 1.00 1244 1032
+#> sd(ec1) 0.39 0.09 0.23 0.58 1.00 2281 2648
+#> sd(occ_Intercept) 1.60 0.33 1.10 2.38 1.00 912 1233
+#> cor(Intercept,uc1) 0.74 0.22 0.15 0.98 1.00 2157 2522
+#> cor(Intercept,ec1) 0.56 0.20 0.10 0.86 1.00 2935 2678
+#> cor(uc1,ec1) 0.57 0.27 -0.07 0.94 1.00 793 1486
#>
-#> Population-Level Effects:
+#> Regression Coefficients:
#> Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
-#> Intercept 0.47 0.32 -0.18 1.09 1.01 368 818
-#> occ_Intercept 0.21 0.36 -0.52 0.87 1.01 454 983
-#> Omega_Intercept -1.26 0.22 -1.70 -0.85 1.00 6322 2762
-#> uc1 0.01 0.08 -0.14 0.16 1.00 1061 1985
-#> ec1 0.07 0.09 -0.11 0.25 1.00 1479 2479
+#> Intercept 0.48 0.31 -0.14 1.10 1.01 351 528
+#> occ_Intercept 0.24 0.34 -0.47 0.89 1.01 525 1094
+#> Omega_Intercept -1.27 0.22 -1.70 -0.85 1.00 7528 3124
+#> uc1 0.01 0.07 -0.14 0.16 1.00 797 1472
+#> ec1 0.07 0.09 -0.10 0.25 1.00 1082 1803
#>
#> Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
#> and Tail_ESS are effective sample size measures, and Rhat is the potential
diff --git a/vignettes/augmented_models.Rmd.orig b/vignettes/augmented_models.Rmd.orig
index 8c74705..1986389 100644
--- a/vignettes/augmented_models.Rmd.orig
+++ b/vignettes/augmented_models.Rmd.orig
@@ -39,14 +39,16 @@ d <- simulate_flocker_data(
)
fd = make_flocker_data(
d$obs, d$unit_covs, d$event_covs,
- type = "augmented", n_aug = 100
+ type = "augmented", n_aug = 100,
+ quiet = TRUE
)
fm <- flock(
f_occ = ~ (1 | ff_species),
f_det = ~ uc1 + ec1 + (1 + uc1 + ec1 | ff_species),
flocker_data = fd,
augmented = TRUE,
- cores = 4
+ cores = 4,
+ refresh = 0
)
```
diff --git a/vignettes/flocker_tutorial.Rmd b/vignettes/flocker_tutorial.Rmd
index 5c2ab09..85b5967 100644
--- a/vignettes/flocker_tutorial.Rmd
+++ b/vignettes/flocker_tutorial.Rmd
@@ -1,7 +1,7 @@
---
title: "Fitting occupancy models with flocker"
author: Jacob Socolar & Simon Mills
-date: "2023-10-20"
+date: "2026-02-24"
output: rmarkdown::html_vignette
vignette: >
%\VignetteIndexEntry{Fitting occupancy models with flocker}
@@ -13,7 +13,7 @@ vignette: >
-`flocker` is an R package for fitting [occupancy models](https://jsocolar.github.io/closureOccupancy/) that incorporate
+`flocker` is an R package for fitting [occupancy models](https://jsocolar.github.io/closureOccupancy/) that incorportate
sophisticated effects structures using simple formula-based syntax. `flocker` is
built on R package `brms`, which in turn is a front-end for `Stan`.
@@ -64,7 +64,7 @@ To request features or report bugs (much appreciated!), please [open an issue on
To make `flocker` and `brms` functions globally available within an R session
run:
-```r
+``` r
library(flocker)
library(brms)
set.seed(1)
@@ -78,7 +78,7 @@ arguments will simulate example data for other likelihoods, including
multi-season and data-augmented occupancy models.
-```r
+``` r
d <- simulate_flocker_data()
```
@@ -126,7 +126,7 @@ To pass data to `flocker`, we first pass the output from
and apply the necessary formatting:
-```r
+``` r
fd_rep_varying <- make_flocker_data(
obs = d$obs,
unit_covs = d$unit_covs,
@@ -153,7 +153,7 @@ as one-sided formulas to the relevant arguments of `flock()` (`f_occ`, `f_det`,
`f_col`, `f_ex`, and `f_auto` as applicable).
-```r
+``` r
rep_varying <- flock(
f_occ = ~ uc1 + (1 + uc1 | species),
f_det = ~ uc1 + ec1 + (1 + uc1 + ec1 | species),
@@ -183,7 +183,7 @@ formatting; it is insufficient to omit event covariates from the detection
formula supplied to `flock()` after formatting the data for a rep-varying model.
-```r
+``` r
fd_rep_constant <- make_flocker_data(
obs = d$obs,
unit_covs = d$unit_covs
@@ -203,7 +203,7 @@ rep_constant <- flock(
Note that within-chain parallelization is available (uniquely so) for the
rep-constant mode:
-```r
+``` r
rep_constant <- flock(
f_occ = ~ uc1 + (1 + uc1 | species),
f_det = ~ uc1 + (1 + uc1 | species),
@@ -229,7 +229,7 @@ model with explicit inits, but we will be able to fit other models
types).
-```r
+``` r
multi_data <- simulate_flocker_data(
n_season = 3,
n_pt = 300,
@@ -253,7 +253,7 @@ occupancy in the first timestep. Depending on hardware, fitting this model might
take several minutes.
-```r
+``` r
multi_colex <- flock(
f_occ = ~ uc1,
f_det = ~ uc1 + ec1,
@@ -272,7 +272,7 @@ Here is the colonization-extinction model using equilibrium occupancy
probabilities in the first timestep:
-```r
+``` r
multi_colex_eq <- flock(
f_det = ~ uc1 + ec1,
f_col = ~ uc1,
@@ -293,7 +293,7 @@ the formula `f_auto = ~ 1`, but it is fine to relax this constraint and use,
e.g. `f_auto = ~ uc1`.
-```r
+``` r
multi_auto <- flock(
f_occ = ~ uc1,
f_det = ~ uc1 + ec1,
@@ -312,7 +312,7 @@ And the autologistic model with equilibrium occupancy probabilities in the
first timestep:
-```r
+``` r
multi_auto_eq <- flock(
f_det = ~ uc1 + ec1,
f_col = ~ uc1,
@@ -338,7 +338,7 @@ along the second, and species along the third. Additionally, we must supply the
pseudospecies to augment the data with.
-```r
+``` r
augmented_data <- simulate_flocker_data(
augmented = TRUE
)
@@ -356,9 +356,6 @@ augmented <- flock(
silent = 2,
refresh = 0
)
-#> Warning: Bulk Effective Samples Size (ESS) is too low, indicating posterior means and medians may be unreliable.
-#> Running the chains for more iterations may help. See
-#> https://mc-stan.org/misc/warnings.html#bulk-ess
```
Here, the random effect of species is specified using the special grouping
@@ -382,7 +379,7 @@ All post-processing functions from `brms` work on single-season rep-constant
models, but do not work on any other model types. For example:
-```r
+``` r
predictions_rep_constant <- brms::posterior_predict(rep_constant)
loo_rep_constant <- brms::loo(rep_constant, moment_match = TRUE)
brms::conditional_effects(rep_constant)
@@ -397,7 +394,7 @@ probability that a given (pseudo)species occurs in the metacommunity) are
available via `fitted_flocker`. For example:
-```r
+``` r
fitted_flocker(rep_constant)
fitted_flocker(rep_varying)
fitted_flocker(multi_colex)
@@ -415,7 +412,7 @@ dimension.
The function `get_Z()` returns the posterior distribution of occupancy probabilities across the closure-units. The shape of the output depends on the class of model, and is an array in the shape of the first visit in `obs` as passed to `make_flocker_data`, with posterior iterations stacked along the final dimension. Thus, for a single-season rep-varying model, the output is a matrix where rows are posterior iterations, columns are closure-units, and values are draws from the posterior distribution of occupancy probabilities:
-```r
+``` r
get_Z(rep_varying)
```
@@ -443,7 +440,7 @@ third dimension is iterations, and values are `1`, `0`, or `NA`, representing
detection, nondetection, and no corresponding sampling event. For example:
-```r
+``` r
predict_flocker(rep_varying)
```
@@ -470,16 +467,15 @@ the binomial coefficient when computing the log-likelihood for the rep-constant
model.
-```r
+``` r
loo_compare_flocker(
list(rep_constant, rep_varying)
)
#> Warning: Some Pareto k diagnostic values are too high. See help('pareto-k-diagnostic') for details.
-
#> Warning: Some Pareto k diagnostic values are too high. See help('pareto-k-diagnostic') for details.
#> elpd_diff se_diff
#> model2 0.0 0.0
-#> model1 -171.1 17.2
+#> model1 -121.0 16.1
```
Likewise, we can compare the four flavors of multi-season model that we fit
@@ -489,15 +485,15 @@ probabilities (rather than equilibrium). As expected, the `multi_colex` model
performs best:
-```r
+``` r
loo_compare_flocker(
list(multi_colex, multi_colex_eq, multi_auto, multi_auto_eq)
)
#> elpd_diff se_diff
#> model1 0.0 0.0
-#> model3 -7.8 4.1
-#> model2 -27.1 7.2
-#> model4 -32.9 7.9
+#> model2 -16.5 6.2
+#> model3 -41.7 9.5
+#> model4 -57.5 10.8
```
Flocker also provides the function `loo_flocker()` to return a table of
@@ -518,51 +514,51 @@ model fitting using `get_flocker_prior()` which is a drop-in replacement for
`brms::get_prior()`.
-```r
+``` r
get_flocker_prior(
f_occ = ~ uc1 + (1 + uc1 | species),
f_det = ~ uc1 + ec1 + (1 + uc1 + ec1 | species),
flocker_data = fd_rep_varying
)
-#> prior class coef group resp dpar nlpar lb ub source
-#> (flat) b default
-#> (flat) b ec1 (vectorized)
-#> (flat) b uc1 (vectorized)
-#> lkj(1) cor default
-#> lkj(1) cor species (vectorized)
-#> student_t(3, 0, 2.5) Intercept default
-#> student_t(3, 0, 2.5) sd 0 default
-#> student_t(3, 0, 2.5) sd species 0 (vectorized)
-#> student_t(3, 0, 2.5) sd ec1 species 0 (vectorized)
-#> student_t(3, 0, 2.5) sd Intercept species 0 (vectorized)
-#> student_t(3, 0, 2.5) sd uc1 species 0 (vectorized)
-#> (flat) b occ default
-#> (flat) b uc1 occ (vectorized)
-#> (flat) Intercept occ default
-#> student_t(3, 0, 2.5) sd occ 0 default
-#> student_t(3, 0, 2.5) sd species occ 0 (vectorized)
-#> student_t(3, 0, 2.5) sd Intercept species occ 0 (vectorized)
-#> student_t(3, 0, 2.5) sd uc1 species occ 0 (vectorized)
+#> prior class coef group resp dpar nlpar lb ub tag source
+#> (flat) b default
+#> (flat) b ec1 (vectorized)
+#> (flat) b uc1 (vectorized)
+#> lkj(1) cor default
+#> lkj(1) cor species (vectorized)
+#> student_t(3, 0, 2.5) Intercept default
+#> student_t(3, 0, 2.5) sd 0 default
+#> student_t(3, 0, 2.5) sd species 0 (vectorized)
+#> student_t(3, 0, 2.5) sd ec1 species 0 (vectorized)
+#> student_t(3, 0, 2.5) sd Intercept species 0 (vectorized)
+#> student_t(3, 0, 2.5) sd uc1 species 0 (vectorized)
+#> (flat) b occ default
+#> (flat) b uc1 occ (vectorized)
+#> (flat) Intercept occ default
+#> student_t(3, 0, 2.5) sd occ 0 default
+#> student_t(3, 0, 2.5) sd species occ 0 (vectorized)
+#> student_t(3, 0, 2.5) sd Intercept species occ 0 (vectorized)
+#> student_t(3, 0, 2.5) sd uc1 species occ 0 (vectorized)
brms::prior_summary(rep_varying)
-#> prior class coef group resp dpar nlpar lb ub source
-#> (flat) b default
-#> (flat) b ec1 (vectorized)
-#> (flat) b uc1 (vectorized)
-#> (flat) b occ default
-#> (flat) b uc1 occ (vectorized)
-#> student_t(3, 0, 2.5) Intercept default
-#> (flat) Intercept occ default
-#> lkj_corr_cholesky(1) L default
-#> lkj_corr_cholesky(1) L species (vectorized)
-#> student_t(3, 0, 2.5) sd 0 default
-#> student_t(3, 0, 2.5) sd occ 0 default
-#> student_t(3, 0, 2.5) sd species 0 (vectorized)
-#> student_t(3, 0, 2.5) sd ec1 species 0 (vectorized)
-#> student_t(3, 0, 2.5) sd Intercept species 0 (vectorized)
-#> student_t(3, 0, 2.5) sd uc1 species 0 (vectorized)
-#> student_t(3, 0, 2.5) sd species occ 0 (vectorized)
-#> student_t(3, 0, 2.5) sd Intercept species occ 0 (vectorized)
-#> student_t(3, 0, 2.5) sd uc1 species occ 0 (vectorized)
+#> prior class coef group resp dpar nlpar lb ub tag source
+#> (flat) b default
+#> (flat) b ec1 (vectorized)
+#> (flat) b uc1 (vectorized)
+#> (flat) b occ default
+#> (flat) b uc1 occ (vectorized)
+#> student_t(3, 0, 2.5) Intercept default
+#> (flat) Intercept occ default
+#> lkj_corr_cholesky(1) L default
+#> lkj_corr_cholesky(1) L species (vectorized)
+#> student_t(3, 0, 2.5) sd 0 default
+#> student_t(3, 0, 2.5) sd occ 0 default
+#> student_t(3, 0, 2.5) sd species 0 (vectorized)
+#> student_t(3, 0, 2.5) sd ec1 species 0 (vectorized)
+#> student_t(3, 0, 2.5) sd Intercept species 0 (vectorized)
+#> student_t(3, 0, 2.5) sd uc1 species 0 (vectorized)
+#> student_t(3, 0, 2.5) sd species occ 0 (vectorized)
+#> student_t(3, 0, 2.5) sd Intercept species occ 0 (vectorized)
+#> student_t(3, 0, 2.5) sd uc1 species occ 0 (vectorized)
```
Note that in examples like the above, with covariates shared between both the occupancy and detection model formulas (`uc1` in this example), then the prior table will contain two entries
@@ -571,7 +567,7 @@ one for the parameter governing detection. Specifying priors for parameters in f
other than detection can be done with reference to the `dpar` column, e.g.:
-```r
+``` r
user_prior <- c(brms::set_prior("normal(0, 1)", coef = "uc1"),
brms::set_prior("normal(0, 3)", coef = "uc1", dpar = "occ"))
```
@@ -589,7 +585,7 @@ the intercepts (flat on the probability scale) when all predictors are held at
their means and a moderately regularizing prior on the coefficients:
-```r
+``` r
rep_varying_prior1 <- flock(
f_occ = ~ uc1,
f_det = ~ ec1,
@@ -604,19 +600,19 @@ rep_varying_prior1 <- flock(
refresh = 0
)
brms::prior_summary(rep_varying_prior1)
-#> prior class coef group resp dpar nlpar lb ub source
-#> normal(0,2) b user
-#> normal(0,2) b ec1 (vectorized)
-#> normal(0,2) b occ user
-#> normal(0,2) b uc1 occ (vectorized)
-#> logistic(0,1) Intercept user
-#> logistic(0,1) Intercept occ user
+#> prior class coef group resp dpar nlpar lb ub tag source
+#> normal(0,2) b user
+#> normal(0,2) b ec1 (vectorized)
+#> normal(0,2) b occ user
+#> normal(0,2) b uc1 occ (vectorized)
+#> logistic(0,1) Intercept user
+#> logistic(0,1) Intercept occ user
```
Here is an example where we set informative priors on the intercepts when all covariates are fixed to zero and the same moderately regularizing prior on the coefficients:
-```r
+``` r
rep_varying_prior2 <- flock(
f_occ = ~ 0 + Intercept + uc1,
f_det = ~ 0 + Intercept + ec1,
@@ -631,19 +627,19 @@ rep_varying_prior2 <- flock(
refresh = 0
)
brms::prior_summary(rep_varying_prior2)
-#> prior class coef group resp dpar nlpar lb ub source
-#> normal(0,2) b user
-#> normal(0,2) b ec1 (vectorized)
-#> normal(1, 1) b Intercept user
-#> normal(0,2) b occ user
-#> normal(-1, 1) b Intercept occ user
-#> normal(0,2) b uc1 occ (vectorized)
+#> prior class coef group resp dpar nlpar lb ub tag source
+#> normal(0,2) b user
+#> normal(0,2) b ec1 (vectorized)
+#> normal(1, 1) b Intercept user
+#> normal(0,2) b occ user
+#> normal(-1, 1) b Intercept occ user
+#> normal(0,2) b uc1 occ (vectorized)
```
### Model formulas
Simple formulas follow the same syntax as R's `lm()` function. For example:
-```r
+``` r
mod1 <- flock(
f_occ = ~ uc1 + (1|species),
f_det = ~ 1,
@@ -656,7 +652,7 @@ Simple random effects follow `lme4` syntax, including advanced `lme4` syntax
like `||` for uncorrelated effects and `/` and `:` for expansion of multiple
grouping terms. Here's a simple example:
-```r
+``` r
mod2 <- flock(
f_occ = ~ uc1 + (1|species),
f_det = ~ 1,
@@ -670,7 +666,7 @@ formulas, but as uncorrelated *between* formulas. For example, the code below
estimates a single correlation for the intercept and slope in the occupancy
sub-model.
-```r
+``` r
mod3 <- flock(
f_occ = ~ uc1 + (1 + uc1 | species),
f_det = ~ ec1 + (1 | species),
@@ -684,7 +680,7 @@ intercepts in the occupancy and detection sub-models, and correlated effects of
`sc1` on occupancy and `vc1` on detection, but no correlations between the
intercepts and the slopes in either sub-model:
-```r
+``` r
mod4 <- flock(
f_occ = ~ uc1 + (1 |g1| species) + (0 + uc1 |g2| species),
f_det = ~ ec1 + (1 |g1| species) + (0 + ec1 |g2| species),
@@ -700,7 +696,7 @@ Via `brms`, `flocker` supports Gaussian processes of arbitrary dimensionality
monotonic effects of ordinal factors via `brms::mo()` ([see here](https://paul-buerkner.github.io/brms/articles/brms_monotonic.html)). For
example:
-```r
+``` r
mod5 <- flock(
f_occ = ~ s(uc1),
f_det = ~ t2(uc1, ec1),
@@ -729,7 +725,7 @@ include multiple phylogenetic effects within a single occupancy model (see
[Mills et al. 2022](https://doi.org/10.1002/ecy.3867)).
-```r
+``` r
# simulate an example phylogeny
phylogeny <- ape::rtree(30, tip.label = paste0("sp_", 1:30))
@@ -774,7 +770,7 @@ transect, yielding a one-dimensional analog of a spatial autologistic occupancy
model.
A second caution is to remind users that in multi-species models, users will
-likely want to fit separate spatial terms by species ([Doser et al 2022](https://besjournals.onlinelibrary.wiley.com/doi/10.1111/2041-210X.13897)).
+likely want to fit separate spatial terms by species ([Doser et al 2022](https://besjournals.onlinelibrary.wiley.com/doi/full/10.1111/2041-210X.13897)).
For Gaussian processes, this can be achieved via the `by` argument to
`brms::gp()`. For some conditional autoregressive structures (those that allow
disconnected islands), this can be achieved by passing a block-diagonal
diff --git a/vignettes/nonlinear_models.Rmd b/vignettes/nonlinear_models.Rmd
index 144a63d..6c08786 100644
--- a/vignettes/nonlinear_models.Rmd
+++ b/vignettes/nonlinear_models.Rmd
@@ -1,7 +1,7 @@
---
title: "Nonlinear models in flocker"
author: "Jacob Socolar"
-date: "2023-10-21"
+date: "2026-02-24"
output: rmarkdown::html_vignette
vignette: >
%\VignetteIndexEntry{Nonlinear models in flocker}
@@ -23,10 +23,10 @@ details.
This vignette focuses on more complicated nonlinear models that require the use
of special nonlinear `brms` formulas. We showcase two models. The first fits
-a parametric nonlinear predictor. The second fits a model with a spatially
+a parameteric nonlinear predictor. The second fits a model with a spatially
varying coefficient that is given a gaussian process prior.
-## Parametric nonlinear predictor
+## Parameteric nonlinear predictor
In this scenario, we consider a model where the response is a specific nonlinear
parametric function whose parameters are fitted and might or might not depend on covariates. Suppose for example that an expanding population of a territorial
species undergoes logistic growth, and also that some unknown proportion of
@@ -38,7 +38,7 @@ inflection point. At multiple discrete times, we randomly sample several sites
to survey, and survey each of those sites over several repeat visits.
-```r
+``` r
library(flocker); library(brms)
set.seed(3)
@@ -65,6 +65,7 @@ fd <- make_flocker_data(
unit_covs = data.frame(t = data[,c("t")]),
event_covs <- list(dummy = matrix(rnorm(n_visit*nrow(data)), ncol = 3))
)
+
```
We wish to fit an occupancy model that recovers the unknown parameters $L$, $k$,
@@ -96,7 +97,7 @@ obligatory to pass the entire formula for all distributional parameters as a
single `brmsformula` object. This means in turn that the user must be familiar
with `flocker`'s internal naming conventions for all of the relevant
distributional parameters (`det` and one or more of `occ`, `colo`, `ex`,
-`autologistic`, `Omega`). If fitting a data-augmented model, it will be required
+`autologistic`, `Omega`). If fitting a data-augmented model, it will be requried
to pass the `Omega ~ 1` formula within the `brmsformula` (When passing the
traditional one-sided formula to `f_det`, `flocker` includes the formula for
`Omega` internally and automatically).
@@ -109,7 +110,7 @@ they make no contribution to the likelihood.
With all of that said, we can go ahead and fit this model!
-```r
+``` r
fit <- flock(f_det = brms::bf(
det ~ 1 + dummy,
occ ~ log(L/(1 + exp(-k*(t - t0)) - L)),
@@ -130,7 +131,7 @@ fit <- flock(f_det = brms::bf(
```
-```r
+``` r
summary(fit)
#> Family: occupancy_single
#> Links: mu = identity; occ = identity
@@ -143,13 +144,13 @@ summary(fit)
#> Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
#> total post-warmup draws = 4000
#>
-#> Population-Level Effects:
+#> Regression Coefficients:
#> Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
-#> Intercept -0.91 0.13 -1.18 -0.66 1.00 2535 2318
-#> L_Intercept 0.50 0.10 0.38 0.77 1.00 1241 864
-#> k_Intercept 0.19 0.08 0.07 0.36 1.00 1283 1433
-#> t0_Intercept -5.90 3.38 -10.34 3.17 1.00 1248 921
-#> dummy 0.00 0.08 -0.15 0.15 1.00 2620 2236
+#> Intercept -0.91 0.13 -1.15 -0.66 1.00 2377 2339
+#> L_Intercept 0.51 0.10 0.38 0.75 1.00 1624 1252
+#> k_Intercept 0.18 0.08 0.07 0.37 1.00 1576 1945
+#> t0_Intercept -5.75 3.44 -10.32 3.48 1.00 1466 1157
+#> dummy 0.00 0.08 -0.15 0.15 1.00 2417 2072
#>
#> Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
#> and Tail_ESS are effective sample size measures, and Rhat is the potential
@@ -172,7 +173,7 @@ quite a few of data points to constrain the standard deviation of the Gaussian
process, so we simulate with 2000 sites:
-```r
+``` r
set.seed(1)
n <- 2000 # sample size
lscale <- 0.3 # square root of l of the gaussian kernel
@@ -207,6 +208,7 @@ obs <- matrix(nrow = n, ncol = n_visit)
for(j in 1:n_visit){
obs[,j] <- gp_data$Z * rbinom(n, 1, boot::inv.logit(det_intercept))
}
+
```
@@ -214,7 +216,7 @@ And here's how we can fit this model in `flocker`! Because we have a large numbe
computational efficiency.
-```r
+``` r
fd2 <- make_flocker_data(obs = obs, unit_covs = gp_data[, c("x", "y", "covariate")])
svc_mod <- flock(
f_det = brms::bf(
@@ -227,10 +229,11 @@ svc_mod <- flock(
flocker_data = fd2,
cores = 4
)
+
```
-```r
+``` r
summary(svc_mod)
#> Family: occupancy_single_C
#> Links: mu = identity; occ = identity
@@ -242,15 +245,15 @@ summary(svc_mod)
#> Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
#> total post-warmup draws = 4000
#>
-#> Gaussian Process Terms:
+#> Gaussian Process Hyperparameters:
#> Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
-#> sdgp(g_gpxy) 1.66 0.75 0.73 3.66 1.00 2849 3317
-#> lscale(g_gpxy) 0.23 0.11 0.08 0.50 1.00 3850 3141
+#> sdgp(g_gpxy) 1.65 0.72 0.73 3.51 1.00 3024 3017
+#> lscale(g_gpxy) 0.23 0.11 0.08 0.50 1.00 4346 3149
#>
-#> Population-Level Effects:
+#> Regression Coefficients:
#> Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
-#> Intercept -1.04 0.06 -1.15 -0.94 1.00 5532 2810
-#> occint_Intercept 0.09 0.09 -0.08 0.27 1.00 5736 3182
+#> Intercept -1.04 0.06 -1.15 -0.93 1.00 6193 3413
+#> occint_Intercept 0.09 0.09 -0.08 0.26 1.00 6464 3223
#>
#> Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
#> and Tail_ESS are effective sample size measures, and Rhat is the potential