diff --git a/.Rbuildignore b/.Rbuildignore
index a8fb882..8721688 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -5,3 +5,6 @@
 ^docs$
 ^pkgdown$
 ^\.github$
+^README\.Rmd$
+^doc$
+^Meta$
diff --git a/.gitignore b/.gitignore
index 457525e..54f27cb 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,3 +5,6 @@
 .DS_Store
 .quarto
 docs
+inst/doc
+/doc/
+/Meta/
diff --git a/DESCRIPTION b/DESCRIPTION
index 45a37b1..d99ccdd 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: lighthouse.codebook
 Title: Summarize Datasets for Lighthouse Institute Projects
-Version: 0.2.3
+Version: 0.3.0
 Authors@R: c(
     person("Casey", "Sarapas", 
            email = "ccsarapas@chestnut.org", 
@@ -35,3 +35,7 @@ Roxygen: list(markdown = TRUE)
 RoxygenNote: 7.3.3
 URL: https://github.com/ccsarapas/lighthouse.codebook, https://ccsarapas.github.io/lighthouse.codebook/
 BugReports: https://github.com/ccsarapas/lighthouse.codebook/issues
+Suggests: 
+    knitr,
+    rmarkdown
+VignetteBuilder: knitr
diff --git a/NAMESPACE b/NAMESPACE
index 7a19b66..af707b1 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -4,7 +4,9 @@ S3method(nan_to_na,data.frame)
 S3method(nan_to_na,default)
 S3method(nan_to_na,list)
 export(cb_create)
+export(cb_create_options)
 export(cb_create_redcap)
+export(cb_create_redcap_options)
 export(cb_create_spss)
 export(cb_get_data)
 export(cb_summarize_categorical)
diff --git a/NEWS.md b/NEWS.md
index d928098..5e0ed4d 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,63 @@
+# lighthouse.codebook 0.3.0
+
+## Added
+
+* Added options to `cb_write()` to show grouping variables for categorical summaries 
+  in rows (which was previously only possible for numeric summaries.) `cb_write()` 
+  now includes three arguments for showing some or all grouping variables in rows:
+  `group_rows` controls both numeric and categorical summaries, while `group_rows_numeric` 
+  and `group_rows_categorical` control numeric and categorical summaries, respectively.
+
+* Added an introductory vignette (see `vignette("lighthouse-codebook")`).
+
+* Expanded the README.
+
+## Changed
+
+* `cb_create()`, `cb_create_spss()`, and `cb_create_redcap()` now use a single `.options` 
+  argument for less commonly used settings. Arguments for those settings have been 
+  moved into an options object created with `cb_create_options()` or `cb_create_redcap_options()`.
+  ```r
+  # previously
+  cb <- cb_create(
+    dat, metadata = metadata, 
+    .rmv_html = FALSE, .include_r_classes = TRUE
+  )
+  
+  # now
+  cb <- cb_create(
+    dat, metadata = metadata, 
+    .options = cb_create_options(rmv_html = FALSE, include_r_classes = TRUE)
+  )
+  
+  ### `cb_create_spss()` also uses `cb_create_options()`
+  # previously
+  cb_spss <- cb_create_spss(dat_spss, .rmv_line_breaks = FALSE)
+
+  # now
+  cb_spss <- cb_create_spss(
+    dat_spss, 
+    .options = cb_create_options(rmv_line_breaks = FALSE)
+  )
+  
+  ### Note that `cb_create_redcap()` has its own options constructor
+  # previously
+  cb <- cb_create_redcap(
+    dat_rc, metadata = metadata_rc, 
+    .form = NULL, .rmv_html = FALSE
+  )
+  
+  # now
+  cb <- cb_create_redcap(
+    dat_rc, metadata = metadata_rc, 
+    .options = cb_create_redcap_options(form = NULL, rmv_html = FALSE)
+  )
+  ```
+
+## Removed
+
+* The `format = "values"` option in `cb_get_data()` has been removed (see #26).
+
 # lighthouse.codebook 0.2.3
 
 * `cb_create_spss()` now accepts `.rmv_html` and `.rmv_line_breaks` arguments, consistent
diff --git a/R/cb_create.r b/R/cb_create.r
index db26640..7e0fdab 100644
--- a/R/cb_create.r
+++ b/R/cb_create.r
@@ -8,8 +8,8 @@
 #' [`cb_summarize_text()`]).
 #'
 #' @param data A data frame.
-#' @param metadata A data frame containing metadata, such as variable labels and value
-#' labels.
+#' @param metadata A data frame containing metadata, such as variable labels and 
+#'   value labels.
 #' @param ... Additional columns from `metadata` to preserve in the final codebook.
 #'   New names can be assigned by passing named arguments. Columns for variable
 #'   name, form, variable label, and value labels are included by default.
@@ -23,47 +23,33 @@
 #'   right-hand side. If left-hand side is omitted, defaults to `tidyselect::everything()`.
 #'   See "Specifying user missing values" below for examples.
 #' @param .split_var_labels A [`tidyselect`][dplyr_tidy_select] expression or list of tidyselect
-#'   expressions, indicating (sets of) variable labels with a common stem that should 
+#'   expressions, indicating (sets of) variable labels with a common stem that should
 #'   be extracted into a separate column.
-#' @param .include_r_classes Include a column listing class(es) of each variable? 
-#'   (e.g., `"factor"`, `"POSIXct, POSIXt"`.)
-#' @param .include_types Include a column listing simplified type for each variable?
-#'   (e.g,. `"categorical"`, `"date-time"`.)
 #' @param .val_labs_sep1,.val_labs_sep2 Regex patterns separating value labels
-#'   in `metadata`. `.val_labs_sep1` separates values from labels, and `.val_labs_sep2` 
-#'   separates value/label pairs. e.g., if value labels are in format `"1, First label|2, Second label"`,
-#'   set `.val_labs_sep1` to `","` and `.val_labs_sep2` to `"\\|"`.
-#' @param .rmv_html Should HTML tags be removed from metadata (e.g., from variable 
-#'   and value labels)?
-#' @param .rmv_line_breaks Should line breaks be removed from metadata (e.g., from
-#'   variable and value labels)? If `TRUE`, line breaks will be replaced with `" / "`.
-#' @param .user_missing_col Include value labels for user missing values in a separate
-#'   column? The default, `"if_any"`, adds the column only if user missings are
-#'   specified for at least one variable.
-#' @param .user_missing_conflict If different labels for a value are provided in
-#'   metadata and user missings, which should be used?
-#' @param .user_missing_incompatible How to handle variables specified in `.user_missing` 
-#'   that aren't compatible with user missing values (e.g., logical, Date, or POSIXt)?
-#' 
+#'   in `metadata`. `.val_labs_sep1` separates values from labels, and `.val_labs_sep2`
+#'   separates value/label pairs from one another. e.g., if value labels are in 
+#'   the format `"1, First label|2, Second label"`, set `.val_labs_sep1` to `","` 
+#'   and `.val_labs_sep2` to `"\\|"`.
+#' @param .options Additional options to use for codebook creation. Must be the result 
+#'   from a call to `cb_create_options()`. See that function's help page for available 
+#'   options.
+#'
 #' @return
-#' An `"li_codebook"` object, consisting of (1) a tibble summarizing the passed
-#' dataset and (2) attributes containing the passed dataset (in several formats)
-#' and additional metadata. Specifically:
-#' - A tibble with columns:
-#'     - `name`: variable name
-#'     - `type`: optional column containing simplified variable type
-#'     - `class`: optional column containing class(es) of each variable
-#'     - `label_stem`: optional column containing variable label stems, if any variables 
-#'       are specified in `.split_var_labels`
-#'     - `label`: variable label
-#'     - `values`: values, with labels if applicable
-#'     - `user_missing`: optional column, depending on value of `.user_missing_col`,
-#'        showing user missing values, with labels if applicable
-#'     - `missing`: proportion missing
-#'     - additional columns if specified in `...`
-#' - Attributes:
-#'     - Transformed versions of the passed dataset. See [`cb_get_data()`]
-#'     - Lookup tables and other metadata used internally.
+#' An `"li_codebook"` object, consisting of a tibble summarizing the passed
+#' dataset and attributes containing additional metadata. The tibble includes columns:
+#' - `name`: variable name
+#' - `type`: column containing simplified variable type
+#' - `class`: optional column containing class(es) of each variable
+#' - `label_stem`: optional column containing variable label stems, if any variables
+#'   are specified in `.split_var_labels`
+#' - `label`: variable label
+#' - `values`: values, with labels if applicable
+#' - `user_missing`: optional column showing user missing values, with labels 
+#'   if applicable. By default, this column is included only if user missings 
+#'   are specified for at least one variable. This behavior can be changed using 
+#'   the `user_missing_col` argument to `cb_create_options()`.
+#' - `missing`: proportion missing
+#' - additional columns if specified in `...`
 #'
 #' @section Specifying user missing values:
 #' User missing values are defined by passing a formula or list of formulas to the
@@ -72,7 +58,7 @@
 #' \preformatted{
 #' cb <- cb_create(data, metadata, .user_missing = var1 ~ 99)
 #' }
-#' The same user missings can be applied to multiple variables using [tidyselect][dplyr_tidy_select] 
+#' The same user missings can be applied to multiple variables using [tidyselect][dplyr_tidy_select]
 #' expressions.
 #' \preformatted{
 #' # for variables `var1` through `var5`
@@ -80,7 +66,7 @@
 #'
 #' # for all numeric variables, plus `var6` and `var7`
 #' .user_missing = c(where(is.numeric), var6, var7) ~ c(-9, -8, -7)
-#' 
+#'
 #' # omitted left-hand side defaults to `tidyselect::everything()`
 #' .user_missing = ~ -99
 #' }
@@ -96,14 +82,16 @@
 #' \preformatted{
 #' .user_missing = ~ c(Declined = -98, "Not applicable" = -99)
 #' }
-#' If labels set in `.user_missing` conflict with those in `metadata`, `.user_missing_conflict`
-#' controls which labels are used.
-#' 
-#' User missing values are not compatible with logical, date, or datetime (POSIXt)
-#' variables. By default, these variables will be ignored if specified in `.user_missing`.
-#' (i.e., user missing values will be applied only to compatible variables.) This behavior
-#' can be changed using the `.user_missing_incompatible` argument.
-#' 
+#' If labels set in `.user_missing` conflict with those in `metadata`, the `user_missing_conflict`
+#' argument to `cb_create_options()` controls which labels are used.
+#'
+#' User missings may be set for numeric, character, factor/ordered factor, and haven_labelled/haven_labelled_spss
+#' vectors. For factors, user missings are set based on factor labels (not the underlying
+#' integer codes). For `"haven_labelled"` vectors, user missings are set based on
+#' values (not value labels). By default, variables with incompatible classes (e.g.,
+#' logical, Date, POSIXt) will be ignored if specified in `.user_missing`. This
+#' behavior can be changed using the `.user_missing_incompatible` argument to `cb_create_options()`.
+#'
 #' @examples
 #' diamonds2 <- ggplot2::diamonds |>
 #'   transform(
@@ -115,18 +103,18 @@
 #'       right = FALSE
 #'     ))
 #'   )
-#' 
+#'
 #' # basic codebook
 #' cb_create(diamonds2)
-#' 
+#'
 #' # convert variables to factor to treat as categorical
 #' diamonds2 |>
 #'   transform(
 #'     carat_group = factor(carat_group),
 #'     price_group = factor(price_group)
-#'   ) |> 
+#'   ) |>
 #'   cb_create()
-#' 
+#'
 #' # provide metadata for variable and value labels
 #' diamonds_meta <- data.frame(
 #'   name = names(diamonds2),
@@ -151,7 +139,7 @@
 #'     "1 = <$500; 2 = $500-$999; 3 = $1,000-$1,999; 4 = $2,000-$4,999; 5 = $5,000-$9,999; 6 = $10,000+"
 #'   )
 #' )
-#' 
+#'
 #' cb_create(
 #'   diamonds2, diamonds_meta,
 #'   .val_labs_sep1 = " = ", .val_labs_sep2 = "; "
@@ -165,40 +153,80 @@ cb_create <- function(data,
                       .val_labels = val_labels,
                       .user_missing = NULL,
                       .split_var_labels = NULL,
-                      .include_types = !.include_r_classes,
-                      .include_r_classes = FALSE,
                       .val_labs_sep1 = NULL,
                       .val_labs_sep2 = NULL,
-                      .rmv_html = TRUE,
-                      .rmv_line_breaks = TRUE,
-                      .user_missing_col = c("if_any", "yes", "no"),
-                      .user_missing_conflict = c("metadata", "missing_label"),
-                      .user_missing_incompatible = c("ignore", "warn", "error")
-                      ) {
+                      .options = cb_create_options()) {
+  check_options(.options)
   data |>
     cb_init(
       metadata,
       meta_var_name = {{ .name }}, meta_var_label = {{ .var_label }},
       meta_val_labels = {{ .val_labels }}, ...
     ) |>
-    cb_clean_fields(rmv_html = .rmv_html, rmv_line_breaks = .rmv_line_breaks) |>
+    cb_clean_fields(
+      rmv_html = .options$rmv_html, 
+      rmv_line_breaks = .options$rmv_line_breaks
+    ) |>
     cb_user_missings(
       user_missing = .user_missing,
-      incompatible = .user_missing_incompatible
+      incompatible = .options$user_missing_incompatible
     ) |>
     cb_add_lookups(sep1 = .val_labs_sep1, sep2 = .val_labs_sep2) |>
-    cb_label_data(conflict = .user_missing_conflict) |>
+    cb_label_data(conflict = .options$user_missing_conflict) |>
     cb_zap_data() |>
     cb_add_dims() |>
-    cb_add_val_labels_col(user_missing_col = .user_missing_col) |>
+    cb_add_val_labels_col(user_missing_col = .options$user_missing_col) |>
     cb_add_type_col(
-      include_r_classes = .include_r_classes,
-      include_types = .include_types
+      include_r_classes = .options$include_r_classes,
+      include_types = .options$include_types
     ) |>
     cb_add_missing_col() |>
     cb_split_labels_col(split_var_labels = rlang::enexpr(.split_var_labels))
 }
 
+#' Additional options for codebook creation
+#'
+#' @description
+#' Additional options for use by `cb_create()`. 
+#'
+#' @inheritParams rlang::args_dots_empty
+#' @param include_types Include a column listing simplified type for each variable?
+#'   (e.g,. `"categorical"`, `"date-time"`.)
+#' @param include_r_classes Include a column listing class(es) of each variable?
+#'   (e.g., `"factor"`, `"POSIXct, POSIXt"`.)
+#' @param rmv_html Should HTML tags be removed from metadata (e.g., from variable
+#'   and value labels)?
+#' @param rmv_line_breaks Should line breaks be removed from metadata (e.g., from
+#'   variable and value labels)? If `TRUE`, line breaks will be replaced with `" / "`.
+#' @param user_missing_col Include value labels for user missing values in a separate
+#'   column? The default, `"if_any"`, adds the column only if user missings are
+#'   specified for at least one variable.
+#' @param user_missing_conflict If labels passed to `.user_missing` conflict with
+#'   value labels in metadata, which should be used?
+#' @param user_missing_incompatible How to handle variables specified in `.user_missing`
+#'   that aren't compatible with user missing values (e.g., logical, Date, or POSIXt)?
+#' 
+#' @export
+cb_create_options <- function(
+    ...,
+    include_types = TRUE,
+    include_r_classes = FALSE,
+    rmv_html = TRUE,
+    rmv_line_breaks = TRUE,
+    user_missing_col = c("if_any", "yes", "no"),
+    user_missing_conflict = c("val_label", "missing_label"),
+    user_missing_incompatible = c("ignore", "warn", "error")) {
+  rlang::check_dots_empty()
+  out <- list(
+    include_types = include_types, include_r_classes = include_r_classes,
+    rmv_html = rmv_html, rmv_line_breaks = rmv_line_breaks,
+    user_missing_col = user_missing_col,
+    user_missing_conflict = user_missing_conflict,
+    user_missing_incompatible = user_missing_incompatible
+  )
+  structure(out, class = "cb_create_options")
+}
+
 #' Extract data from a codebook object
 #'
 #' Codebook objects created by [`cb_create()`] and friends contain several transformed
@@ -206,27 +234,37 @@ cb_create <- function(data,
 #'
 #' @param cb An object of class `"li_codebook"` as produced by [`cb_create()`] or
 #'   a variant.
-#' @param format Format of the returned data; see below for details.
+#' @param format Format of the returned data, either `"factors"` or `"haven"`; 
+#'   see below for details.
 #'
 #' @return
 #' A tibble with variables formatted based on the `format` argument.
-#' - For `format = "values"`, all variables retain the same values as the original
-#'   dataset, including values for user missings. The data may reflect transformations
-#'   made by variants of [`cb_create()`] -- e.g., for [`cb_create_redcap()`], integer coercion 
-#'   and propagation of user missings across checkbox variables.
-#' - For `"haven"`, value labels and user missings are encoded using class 
-#'   [`"haven_labelled"`][haven::labelled]`
 #' - For `"factors"`, all variables with value labels are converted to factors, 
 #'   and all user missings are converted to `NA`.
+#' - For `"haven"`, variable labels, value labels, and user missings are encoded 
+#'   using class [`"haven_labelled_spss"`][haven::labelled]`.
+#' 
+#' Both formats may also reflect transformations made by variants of [`cb_create()`].
+#' In particular, for codebooks created using [`cb_create_redcap()`], integer coercion 
+#' and propagation of user missings across checkbox variables.
 #' 
 #' @export
-cb_get_data <- function(cb, format = c("factors", "haven", "values")) {
+cb_get_data <- function(cb, format = c("factors", "haven")) {
   check_codebook(cb)
-  switch(match.arg(format),
-    factors = attr(cb, "data_zapped"),
-    haven = attr(cb, "data_labelled"),
-    values = attr(cb, "data")
+  tryCatch(
+    format <- match.arg(format),
+    error = \(e) {
+      if (format == "values") {
+        cli::cli_abort(
+          '`format = "values"` is no longer supported.',
+          call = parent.frame(4)
+        )
+      }
+      stop(e)
+    }
   )
+  if (format == "factors") attr(cb, "data_zapped")
+  else attr(cb, "data_labelled")
 }
 
 cb_init <- function(data, 
@@ -254,8 +292,8 @@ cb_init <- function(data,
     out <- out |>
       dplyr::mutate(values = NA_character_)
   }
+  out <- structure(out, class = c("li_codebook", class(out)))
   out <- set_attrs(out, data = data)
-  class(out) <- c("li_codebook", class(out))
   out
 }
 
@@ -293,7 +331,7 @@ cb_user_missings_by_var <- function(cb,
     )
     if (length(bad_vars) > 4) bad_vars <- c(head(bad_vars, 3), "...")
     bad_vars <- paste(bad_vars, collapse = ", ")
-    msg <- "{n_bad} variable{?s} specified in {.arg .user_missing} are not compatible with user missing values"
+    msg <- "{n_bad} variable{?s} specified in {.arg .user_missing} {?is/are} not compatible with user missing values"
     if (incompatible == "error") cli::cli_abort(c("!" = msg, "*" = bad_vars))
     cli::cli_warn(c("!" = paste0(msg, " and will be ignored"), "*" = bad_vars))
   }
@@ -426,7 +464,7 @@ cb_add_lookups <- function(cb, sep1, sep2) {
 
 reconcile_missing_labels <- function(val_labs, 
                                      missings, 
-                                     conflict = c("metadata", "missing_label")) {
+                                     conflict = c("val_label", "missing_label")) {
   conflict <- match.arg(conflict)
   
   labs_in_missing <- val_labs[match(missings, val_labs)]
@@ -447,7 +485,7 @@ reconcile_missing_labels <- function(val_labs,
   names(missings)[label_miss] <- lab_name[label_miss]
   ### if na is labelled and in vals and labels don't match
   # relabel based on `conflict`
-  if (conflict == "metadata") {
+  if (conflict == "val_label") {
     names(missings)[mismatch] <- lab_name[mismatch]
   } else if (conflict == "missing_label") {
     names(val_labs)[match(lab_val[mismatch], val_labs)] <- miss_name[mismatch]
@@ -455,8 +493,7 @@ reconcile_missing_labels <- function(val_labs,
   list(val_labs = val_labs, missings = missings)
 }
 
-cb_label_data <- function(cb, conflict = c("metadata", "missing_label")) {
-  conflict <- match.arg(conflict)
+cb_label_data <- function(cb, conflict = c("val_label", "missing_label")) {
   data <- attr(cb, "data")
   vals_by_label <- attr(cb, "vals_by_label")
   factors <- attr(cb, "factors")
diff --git a/R/cb_create_redcap.r b/R/cb_create_redcap.r
index a2d7c1c..ccb8c58 100644
--- a/R/cb_create_redcap.r
+++ b/R/cb_create_redcap.r
@@ -1,5 +1,5 @@
 #' Generate a codebook object from REDCap data
-#'
+#' 
 #' @description
 #' `cb_create_redcap()` builds an object of class `"li_codebook"` from a dataset and
 #' corresponding codebook exported from REDCap. The resulting object can be used
@@ -14,156 +14,199 @@
 #' - Unpacking, labelling, and optional missing propagation for checkbox data
 #' - Optional coercion for character variables marked as "integer" in `metedata$text_validation_type_or_show_slider_number`
 #' 
+#' All of these behaviors can be controlled using the `.options` argument.
+#' 
 #' @inheritParams cb_create
 #' @param data A data frame exported or retrieved from REDCap.
 #' @param metadata A data frame containing the REDCap codebook associated with `data`.
 #' @param ... Additional columns from `metadata` to preserve in the final codebook.
 #'   New names can be assigned by passing named arguments. Columns for variable
 #'   name, form, variable label, and value labels are included by default.
-#' @param .name,.var_label,.val_labels Columns in `metadata` containing variable
-#'   name, variable label, and value labels, respectively.
-#' @param .form Column in `metadata` containing form names. (Set to `NULL` to omit.)
 #' @param .user_missing A formula or list of formulas specifying user missing values.
 #'   Formulas should specify variables on the left-hand side (as variable names
 #'   or [tidyselect][dplyr_tidy_select] expressions), and missing values on the
 #'   right-hand side. If left-hand side is omitted, defaults to `tidyselect::everything()`.
 #'   See "Specifying user missing values" in [`cb_create()`] documentation  for examples.
-#' @param .coerce_integers Should variables listed as "integer" in `metedata$text_validation_type_or_show_slider_number` 
-#'   be coerced to integer?
-#' @param .checkbox_resp_values Should checkbox values use labels in `metadata` 
-#'   (`TRUE`) or "Yes" / "No" (`FALSE`)? See "Checkbox data handling" below.
-#' @param .propagate_checkbox_missings Should user missing values in a checkbox 
-#'   group be propagated across all variables in the group? See "Checkbox data handling" 
-#'   below.
+#' @param .options Additional options to use for codebook creation. Must be the result 
+#'   from a call to `cb_create_redcap_options()` or `cb_create_options()`. See `?cb_create_redcap_options` 
+#'   for available options.
 #'
 #' @return
-#' An `"li_codebook"` object, consisting of (1) a tibble summarizing the passed
-#' dataset and (2) attributes containing the passed dataset (in several formats)
-#' and additional metadata. Specifically:
-#' - A tibble with columns:
-#'     - `name`: variable name
-#'     - `form`: form name
-#'     - `type`: optional column containing simplified variable type
-#'     - `class`: optional column containing class(es) of each variable
-#'     - `label_stem`: optional column containing variable label stems, if any variables
-#'       are specified in `.split_var_labels`
-#'     - `label`: variable label
-#'     - `values`: values, with labels if applicable
-#'     - `user_missing`: optional column, depending on value of `.user_missing_col`,
-#'        showing user missing values, with labels if applicable
-#'     - `missing`: proportion missing
-#'     - additional columns if specified in `...`
-#' - Attributes:
-#'     - Transformed versions of the passed dataset. See [`cb_get_data()`].
-#'     - Lookup tables and other metadata used internally.
+#' An `"li_codebook"` object, consisting of a tibble summarizing the passed
+#' dataset and attributes containing additional metadata. The tibble includes columns:
+#' - `name`: variable name
+#' - `form`: form name
+#' - `type`: column containing simplified variable type
+#' - `class`: optional column containing class(es) of each variable
+#' - `label_stem`: optional column containing variable label stems, if any variables
+#'   are specified in `.split_var_labels`
+#' - `label`: variable label
+#' - `values`: values, with labels if applicable
+#' - `user_missing`: optional column showing user missing values, with labels 
+#'   if applicable. By default, this column is included only if user missings 
+#'   are specified for at least one variable. This behavior can be changed using 
+#'   the `user_missing_col` argument to `cb_create_options()`.
+#' - `missing`: proportion missing
+#' - additional columns if specified in `...`
 #'
 #' @section Checkbox data handling:
 #' ## Value labels
 #' Data from REDCap checkboxes yields one variable in the dataset for each response
-#' option. These will be labelled generically with `"Yes"` or `"No"`, unless `.checkbox_resp_values`
-#' is `TRUE`, in which case response-specific labels from `metadata` will be used.
-#' For example, if a checkbox group has options "In the past year," "More than a
+#' option. By default, these will be labelled generically with `"Yes"` or `"No"`.
+#' For example, consider a checkbox group with options "In the past year," "More than a
 #' year ago," and "Never," corresponding to variables `chk_var1___0`, `chk_var1___1`,
-#' and `chk_var1___2`: if `.checkbox_resp_values` is `FALSE`, all of these will
-#' have values:
+#' and `chk_var1___2`. By default, all of these will be given the same value labels:
 #'   - `chk_var1___0`, `chk_var1___1`, `chk_var1___2`: 0 = "No"; 1 =  "Yes". 
-#' 
-#' If `.checkbox_resp_values` is `TRUE`, each variable will have unique labels:
+#' This behavior can be changed by setting `checkbox_resp_values = TRUE` in `cb_create_options()`. 
+#' In this case, response-specific labels from `metadata` will be used, so that 
+#' each variable will have unique labels:
 #'   - `chk_var1___0`: 0 = "Not selected," 1 = "In the past year"
 #'   - `chk_var1___1`: 0 = "Not selected," 1 = "More than a year ago"
 #'   - `chk_var1___2`: 0 = "Not selected," 0 = "Never"
 #'
 #' ## Missing value propagation
-#' If `.propagate_checkbox_missings` is `TRUE`, missing values in a checkbox group
-#' variable will be propagated to all variables in the group. For example, given
-#' a checkbox group with options "Pregnant," "Not pregnant," and "Not applicable,"
-#' corresponding to variables `chk_preg_0___0`, `chk_preg_0___1`, and `chk_preg_0____9`,
-#' and assuming that `-9` is specified as a user missing value. If `.propagate_checkbox_missings`
-#' is `TRUE`, `chk_preg_0___0` and `chk_preg_0___1` will be set to `-9` if `chk_preg_0____9`
-#' is `1`. Otherwise, these columns will remain as `0` where `chk_preg_0____9` is `1`.
+#' By default, missing values in a checkbox group will be propagated to all variables 
+#' in the group. For example, consider a checkbox group with options "Pregnant," 
+#' "Not pregnant," and "Not applicable," corresponding to variables `chk_preg_0___0`, 
+#' `chk_preg_0___1`, and `chk_preg_0____9`, and assuming that `-9` is specified 
+#' as a user missing value. By default, `chk_preg_0___0` and `chk_preg_0___1` will 
+#' be set to `-9` if `chk_preg_0____9` is `1`. This behavior can be overridden by 
+#' setting `propagate_checkbox_missings = FALSE` in `cb_create_options()`, in which 
+#' case no values will be changed.
 #'
 #' @export
 cb_create_redcap <- function(data,
                              metadata,
                              ...,
-                             .name = field_name,
-                             .var_label = field_label,
-                             .val_labels = select_choices_or_calculations,
-                             .form = form_name,
                              .user_missing = NULL,
                              .split_var_labels = NULL,
-                             .include_types = !.include_r_classes,
-                             .include_r_classes = FALSE,
-                             .val_labs_sep1 = ", ",
-                             .val_labs_sep2 = "\\|",
-                             .rmv_html = TRUE,
-                             .rmv_line_breaks = TRUE,
-                             .coerce_integers = TRUE,
-                             .checkbox_resp_values = FALSE,
-                             .propagate_checkbox_missings = TRUE,
-                             .user_missing_col = c("if_any", "yes", "no"),
-                             .user_missing_conflict = c("metadata", "missing_label"),
-                             .user_missing_incompatible = c("ignore", "warn", "error")
-                             ) {
-  .user_missing_col <- match.arg(.user_missing_col)
-  .user_missing_conflict <- match.arg(.user_missing_conflict)
-  meta <- meta_expand_checkboxes_rc(metadata, data)
+                             .options = cb_create_redcap_options()) {
+  check_options(.options, redcap = TRUE)
+  meta <- meta_expand_checkboxes_rc(
+    metadata, data,
+    name = !!.options$name, type = !!.options$type
+  )
   cb <- data |>
     cb_init(
       meta,
-      meta_var_name = {{ .name }}, meta_var_label = {{ .var_label }},
-      meta_val_labels = {{ .val_labels }}, form = {{ .form }}, ...,
-      ..rc_type = field_type,
+      meta_var_name = !!.options$name, meta_var_label = !!.options$var_label,
+      meta_val_labels = !!.options$val_labels, form = !!.options$form, ...,
+      ..rc_type = !!.options$type,
       ..rc_validate_type = text_validation_type_or_show_slider_number,
     )
-  if (.coerce_integers) cb <- cb_coerce_integers_rc(cb)
+  if (.options$coerce_integers) cb <- cb_coerce_integers_rc(cb)
   cb$..rc_validate_type <- NULL
   cb <- cb |>
-    cb_clean_fields(rmv_html = .rmv_html, rmv_line_breaks = .rmv_line_breaks) |>
+    cb_clean_fields(
+      rmv_html = .options$rmv_html, 
+      rmv_line_breaks = .options$rmv_line_breaks
+    ) |>
     cb_user_missings(
       user_missing = .user_missing,
-      incompatible = .user_missing_incompatible
+      incompatible = .options$user_missing_incompatible
     ) |>
-    cb_add_lookups(sep1 = .val_labs_sep1, sep2 = .val_labs_sep2) |>
-    cb_relabel_checkboxes_rc(use_resp_values = .checkbox_resp_values)
+    cb_add_lookups(
+      sep1 = .options$val_labs_sep1, 
+      sep2 = .options$val_labs_sep2
+    ) |>
+    cb_relabel_checkboxes_rc(use_resp_values = .options$checkbox_resp_values)
   if ("form" %in% names(cb)) cb <- cb_complete_label_rc(cb)
-  if (.propagate_checkbox_missings) {
+  if (.options$propagate_checkbox_missings) {
     cb <- cb_propagate_user_missing_checkboxes_rc(cb)
   }
   cb |>
-    cb_label_data(conflict = .user_missing_conflict) |>
+    cb_label_data(conflict = .options$user_missing_conflict) |>
     cb_zap_data() |>
     cb_add_dims() |>
-    cb_add_val_labels_col(user_missing_col = .user_missing_col) |>
+    cb_add_val_labels_col(user_missing_col = .options$user_missing_col) |>
     cb_add_type_col(
-      include_r_classes = .include_r_classes,
-      include_types = .include_types
+      include_r_classes = .options$include_r_classes,
+      include_types = .options$include_types
     ) |>
     cb_add_missing_col() |>
     cb_split_labels_col(split_var_labels = rlang::enexpr(.split_var_labels)) |> 
     dplyr::relocate(any_of(c("form", "type", "class")), .after = name)
 }
 
-## `field_name` and `field_type` are hard-coded -- do they always have these names?
-meta_expand_checkboxes_rc <- function(meta, data) {
-  if (!("checkbox" %in% meta$field_type)) return(meta)
+#' @rdname cb_create_options
+#' 
+#' @param name,var_label,val_labels,type For REDCap data, columns in `metadata` containing variable
+#'   name, variable label, value labels, and variable type, respectively.
+#' @param form For REDCap data, column in `metadata` containing form names. (Set to `NULL` to omit.)
+#' @param val_labs_sep1,val_labs_sep2 For REDCap data, regex patterns separating value labels
+#'   in `metadata`. `val_labs_sep1` separates values from labels, and `val_labs_sep2`
+#'   separates value/label pairs from one another. e.g., if value labels are in 
+#'   the format `"1, First label|2, Second label"`, set `val_labs_sep1` to `","` 
+#'   and `val_labs_sep2` to `"\\|"`.
+#' @param coerce_integers For REDCap data, should variables listed as "integer" in `metedata$text_validation_type_or_show_slider_number` 
+#'   be coerced to integer?
+#' @param checkbox_resp_values For REDCap data, should checkbox values use labels in `metadata` (`TRUE`) 
+#'   or "Yes" / "No" (`FALSE`)? See "Checkbox data handling"  on the `cb_create_redcap()` 
+#'   help page.
+#' @param propagate_checkbox_missings For REDCap data, should user missing values in a checkbox group 
+#'   be propagated across all variables in the group? See "Checkbox data handling" 
+#'   on the `cb_create_redcap()` help page.
+#' 
+#' @export
+cb_create_redcap_options <- function(
+    ...,
+    include_types = TRUE,
+    include_r_classes = FALSE,
+    rmv_html = TRUE,
+    rmv_line_breaks = TRUE,
+    user_missing_col = c("if_any", "yes", "no"),
+    user_missing_conflict = c("val_label", "missing_label"),
+    user_missing_incompatible = c("ignore", "warn", "error"),
+    name = field_name,
+    var_label = field_label,
+    val_labels = select_choices_or_calculations,
+    type = field_type,
+    form = form_name,
+    val_labs_sep1 = ", ",
+    val_labs_sep2 = "\\|",
+    coerce_integers = TRUE,
+    checkbox_resp_values = FALSE,
+    propagate_checkbox_missings = TRUE) {
+  rlang::check_dots_empty()
+  out <- list(
+    include_types = include_types, include_r_classes = include_r_classes,
+    rmv_html = rmv_html, rmv_line_breaks = rmv_line_breaks,
+    user_missing_col = user_missing_col,
+    user_missing_conflict = user_missing_conflict,
+    user_missing_incompatible = user_missing_incompatible, 
+    name = rlang::enquo(name), var_label = rlang::enquo(var_label), 
+    val_labels = rlang::enquo(val_labels), type = rlang::enquo(type), 
+    form = rlang::enquo(form), val_labs_sep1 = val_labs_sep1, 
+    val_labs_sep2 = val_labs_sep2, coerce_integers = coerce_integers,
+    checkbox_resp_values = checkbox_resp_values,
+    propagate_checkbox_missings = propagate_checkbox_missings
+  )
+  structure(out, class = "cb_create_redcap_options")
+}
+
+meta_expand_checkboxes_rc <- function(meta, data, name, type) {
+  name_chr <- as.character(rlang::ensym(name))
+  type_chr <- as.character(rlang::ensym(type))
+  if (!("checkbox" %in% meta[[type_chr]])) return(meta)
   datanames <- names(data)
+  meta <- dplyr::rename(meta, ..name = {{ name }}, ..type = {{ type }})
   checkbox_names <- meta |>
-    dplyr::filter(field_type == "checkbox") |>
-    dplyr::select(field_name) |>
+    dplyr::filter(..type == "checkbox") |>
+    dplyr::select(..name) |>
     dplyr::reframe(
       .chk_name = datanames[
-        stringr::str_starts(datanames, stringr::str_c(field_name, "___"))
+        stringr::str_starts(datanames, stringr::str_c(..name, "___"))
       ],
-      .by = field_name
+      .by = ..name
     )
   meta |>
-    dplyr::left_join(checkbox_names, dplyr::join_by(field_name)) |>
+    dplyr::left_join(checkbox_names, dplyr::join_by(..name)) |>
     dplyr::mutate(
       # .chk_name_stem = ifelse(!is.na(.chk_name), field_name, NA),
-      field_name = dplyr::coalesce(.chk_name, field_name),
+      ..name = dplyr::coalesce(.chk_name, ..name),
       .keep = "unused"
-    )
+    ) |>
+    dplyr::rename("{name_chr}" := ..name, "{type_chr}" := ..type)
 }
 
 cb_coerce_integers_rc <- function(cb) {
diff --git a/R/cb_create_spss.r b/R/cb_create_spss.r
index b366a46..a4827cd 100644
--- a/R/cb_create_spss.r
+++ b/R/cb_create_spss.r
@@ -19,61 +19,47 @@
 #'   or [tidyselect][dplyr_tidy_select] expressions), and missing values on the
 #'   right-hand side. If left-hand side is omitted, defaults to `tidyselect::everything()`.
 #'   See "Specifying user missing values" in [`cb_create()`] documentation for examples.
-#' @param .rmv_html Should HTML tags be removed from variable and value labels?
-#' @param .rmv_line_breaks Should line breaks be removed from variable and value 
-#' labels? If `TRUE`, line breaks will be replaced with `" / "`.
-#' @param .user_missing_conflict If labels passed to `.user_missing` conflicts with
-#'   a value label in `data`, which should be used?
 #' 
 #' @return
-#' An `"li_codebook"` object, consisting of (1) a tibble summarizing the passed
-#' dataset and (2) attributes containing the passed dataset (in several formats)
-#' and additional metadata. Specifically:
-#' - A tibble with columns:
-#'     - `name`: variable name
-#'     - `type`: optional column containing simplified variable type
-#'     - `class`: optional column containing class(es) of each variable
-#'     - `label_stem`: optional column containing variable label stems, if any variables 
-#'       are specified in `.split_var_labels`
-#'     - `label`: variable label
-#'     - `values`: values, with labels if applicable
-#'     - `user_missing`: optional column, depending on value of `.user_missing_col`,
-#'        showing user missing values, with labels if applicable
-#'     - `missing`: proportion missing
-#' - Attributes:
-#'     - Transformed versions of the passed dataset. See [`cb_get_data()`].
-#'     - Lookup tables and other metadata used internally.
+#' An `"li_codebook"` object, consisting of a tibble summarizing the passed
+#' dataset and attributes containing additional metadata. The tibble includes columns:
+#' - `name`: variable name
+#' - `type`: column containing simplified variable type
+#' - `class`: optional column containing class(es) of each variable
+#' - `label_stem`: optional column containing variable label stems, if any variables
+#'   are specified in `.split_var_labels`
+#' - `label`: variable label
+#' - `values`: values, with labels if applicable
+#' - `user_missing`: optional column showing user missing values, with labels 
+#'   if applicable. By default, this column is included only if user missings 
+#'   are specified for at least one variable. This behavior can be changed using 
+#'   the `user_missing_col` argument to `cb_create_options()`.
+#' - `missing`: proportion missing
 #'
 #' @export
 cb_create_spss <- function(data,
                            .user_missing = NULL,
                            .split_var_labels = NULL,
-                           .include_types = !.include_r_classes,
-                           .include_r_classes = FALSE,
-                           .rmv_html = TRUE,
-                           .rmv_line_breaks = TRUE,
-                           .user_missing_col = c("if_any", "yes", "no"),
-                           .user_missing_conflict = c("val_label", "missing_label"),
-                           .user_missing_incompatible = c("ignore", "warn", "error")
-                           ) {
+                           .options = cb_create_options()) {
+  check_options(.options)
   data |>
     cb_init() |>
     cb_clean_fields_spss(
-      rmv_html = .rmv_html, 
-      rmv_line_breaks = .rmv_line_breaks
+      rmv_html = .options$rmv_html, 
+      rmv_line_breaks = .options$rmv_line_breaks
     ) |> 
     cb_add_label_col_spss() |>
     cb_update_labels_spss(
       user_missing = .user_missing,
-      user_missing_conflict = .user_missing_conflict,
-      user_missing_incompatible = .user_missing_incompatible
+      user_missing_conflict = .options$user_missing_conflict,
+      user_missing_incompatible = .options$user_missing_incompatible
     ) |>
     cb_zap_data_spss() |>
     cb_add_dims() |>
-    cb_add_val_labels_col(user_missing_col = .user_missing_col) |>
+    cb_add_val_labels_col(user_missing_col = .options$user_missing_col) |>
     cb_add_type_col(
-      include_r_classes = .include_r_classes,
-      include_types = .include_types
+      include_r_classes = .options$include_r_classes,
+      include_types = .options$include_types
     ) |>
     cb_add_missing_col() |>
     cb_split_labels_col(split_var_labels = rlang::enexpr(.split_var_labels))
@@ -114,7 +100,6 @@ cb_update_labels_spss <- function(cb,
       cb_add_lookups() |>
       set_attrs(data_labelled = data)
   } else {
-    conflict <- sub("val_label", "metadata", match.arg(user_missing_conflict))
     user_missing <- check_user_missing_arg(user_missing)
     user_missing_vars <- user_missing |>
       lapply(\(um) {
@@ -140,7 +125,7 @@ cb_update_labels_spss <- function(cb,
         user_missing = attr_user_missing[names(attr_user_missing) %in% user_missing_vars],
         vals_by_label = attr_vals_by_label[names(attr_vals_by_label) %in% user_missing_vars]
       ) |>
-      cb_label_data(conflict = conflict) |>
+      cb_label_data(conflict = user_missing_conflict) |>
       # then restore full missing and val attributes
       set_attrs(
         user_missing = attr_user_missing,
diff --git a/R/cb_summarize.r b/R/cb_summarize.r
index fb22b3d..d2c4650 100644
--- a/R/cb_summarize.r
+++ b/R/cb_summarize.r
@@ -152,7 +152,8 @@ cb_summarize_categorical_impl <- function(cb,
                                           prefixed = TRUE,
                                           detail_missing = missing(group_by),
                                           detail_na_label = "NA",
-                                          warn_if_none = FALSE) {
+                                          warn_if_none = FALSE,
+                                          group_rows = NULL) {
   force(detail_missing)
   data <- attr(cb, "data_labelled")
   data_dt <- data.table::as.data.table(data)
@@ -307,14 +308,18 @@ cb_summarize_categorical_impl <- function(cb,
   )
   freqs <- freqs[, cols_out, with = FALSE]
   
+  group_cols <- setdiff(group_by, group_rows)
+  if (!length(group_cols)) group_cols <- NULL
+  
   freqs |>
     tibble::as_tibble() |>
     set_attrs(
       detail_missing = detail_missing,
       id_cols = c("name", label_cols, "value"),
       group_by = group_by, 
-      group_cols = group_by,
-      group_counts = group_counts(cb, group_by)
+      group_rows = group_rows,
+      group_cols = group_cols,
+      group_counts = group_counts(cb, group_cols)
     )
 }
 
diff --git a/R/cb_write.r b/R/cb_write.r
index 748e140..a99c2f5 100644
--- a/R/cb_write.r
+++ b/R/cb_write.r
@@ -15,24 +15,28 @@
 #'   a variant.
 #' @param file Path to write to.
 #' @param dataset_name Name of the dataset to display in workbook headers.
-#' @param incl_date,incl_dims Should the date and/or dataset dimensions be included 
-#'   in the Overview tab header?
-#' @param hyperlinks If `TRUE`, variable names on the Overview sheet will link 
-#'   to corresponding rows on summary tabs and vice versa.
 #' @param group_by <[`tidy-select`][dplyr_tidy_select]> Column or columns to group
 #'   by. If specified, additional numeric and categorical summary tabs will be included
-#'   with grouped summaries. Subgroups are shown in columns by default. For the numeric 
-#'   summary tab, subgroups for some or all grouping variables can instead be shown 
-#'   in rows if specified in `group_rows_numeric`.
-#' @param group_rows_numeric <[`tidy-select`][dplyr_tidy_select]> Column or columns
-#'   to group by in rows on the grouped numeric summary tab. All columns must also 
-#'   be specified in `group_by`.
+#'   with grouped summaries. Subgroups are shown in columns by default. Some or all 
+#'   grouping variables can instead be shown in rows if specified in `group_rows`, 
+#' `group_rows_numeric`, or `group_rows_categorical`.
+#' @param group_rows <[`tidy-select`][dplyr_tidy_select]> Column or columns to group 
+#'   by in rows on grouped summary tabs. All columns must also be specified in `group_by`. 
+#'   Will apply to both numeric and categorical summary tabs unless otherwise specified 
+#'   in `group_rows_numeric` or `group_rows_categorical`.
+#' @param group_rows_numeric,group_rows_categorical <[`tidy-select`][dplyr_tidy_select]> 
+#'   Column or columns to group by in rows on grouped numeric or categorical summary 
+#'   tab.
 #' @param detail_missing Include detailed missing value information on ungrouped 
 #'   categorical and text summary tabs? (Detailed missing information for grouped 
 #'   summary tabs is not currently supported.)
 #' @param n_text_vals On the text summary tab, how many unique non-missing values 
 #'   should be included for each variable? If there are more than `n_text_vals` + 1 
 #'   unique values, the `n_text_vals` most common non-missing values will be included. 
+#' @param incl_date,incl_dims Should the date and/or dataset dimensions be included 
+#'   in the Overview tab header?
+#' @param hyperlinks If `TRUE`, variable names on the Overview sheet will link 
+#'   to corresponding rows on summary tabs and vice versa.
 #' @param overwrite Overwrite existing file?
 #'
 #' @return 
@@ -54,13 +58,15 @@
 cb_write <- function(cb, 
                      file, 
                      dataset_name = NULL,
-                     incl_date = TRUE,
-                     incl_dims = TRUE,
-                     hyperlinks = TRUE,
                      group_by = NULL,
-                     group_rows_numeric = NULL,
+                     group_rows = NULL,
+                     group_rows_numeric = group_rows,
+                     group_rows_categorical = group_rows,
                      detail_missing = c("if_any_user_missing", "yes", "no"),
                      n_text_vals = 5,
+                     incl_date = TRUE,
+                     incl_dims = TRUE,
+                     hyperlinks = TRUE,
                      overwrite = TRUE) {
   check_codebook(cb)
   detail_missing <- match.arg(detail_missing)
@@ -71,32 +77,38 @@ cb_write <- function(cb,
     num = cb_summarize_numeric_impl(cb),
     cat = cb_summarize_categorical_impl(cb, detail_missing = detail_missing),
     txt = cb_summarize_text_impl(
-      cb, 
-      n_text_vals = n_text_vals, 
+      cb,
+      n_text_vals = n_text_vals,
       detail_missing = detail_missing
     )
   )
   group_by <- cb_untidyselect(cb, {{ group_by }})
-  group_rows_numeric <- cb_untidyselect(cb, {{ group_rows_numeric }})
-  if (!is.null(group_rows_numeric)) {
-    if (is.null(group_by)) {
-      cli::cli_abort(
-        "If `group_rows_numeric` is specified, `group_by` must also be specified."
-      )
-    }
-    if (length(setdiff(group_rows_numeric, group_by))) {
-      cli::cli_abort(
-        "All columns specified in `group_rows_numeric` must also be included in `group_by`."
-      )
-    }
+  group_rows <- cb_untidyselect(cb, {{ group_rows }})
+  if (missing(group_rows_numeric)) {
+    group_rows_numeric <- group_rows
+  } else {
+    group_rows_numeric <- cb_untidyselect(cb, {{ group_rows_numeric }})
+  }
+  if (missing(group_rows_categorical)) {
+    group_rows_categorical <- group_rows
+  } else {
+    group_rows_categorical <- cb_untidyselect(cb, {{ group_rows_categorical }})
   }
+  check_group_rows_arg(group_rows, group_by)
+  check_group_rows_arg(group_rows_numeric, group_by)
+  check_group_rows_arg(group_rows_categorical, group_by)
+  
   if (!is.null(group_by)) {
     summaries$num_grp <- cb_summarize_numeric_impl(
       cb, 
       group_by = group_by, 
       group_rows = group_rows_numeric
     )
-    summaries$cat_grp <- cb_summarize_categorical_impl(cb, group_by = group_by)
+    summaries$cat_grp <- cb_summarize_categorical_impl(
+      cb,
+      group_by = group_by,
+      group_rows = group_rows_categorical
+    )
   }
   cb_write_codebook(
     cb, summaries,
@@ -768,7 +780,13 @@ cb_write_codebook <- function(cb,
     summaries$cat_grp <- summaries$cat_grp |>
       cb_format_names(skip = group_by, attrs = "id_cols")
     cols_pct <- untidyselect(summaries$cat_grp, tidyselect::starts_with("%"))
-    clear_repeats <- setdiff(attr(summaries$cat_grp, "id_cols"), "Value")
+    group_rows <- attr(summaries$cat_grp, "group_rows")
+    id_cols <- attr(summaries$cat_grp, "id_cols")
+    clear_repeats <- c(setdiff(id_cols, "Value"), group_rows)
+    if (!is.null(group_rows)) {
+      attr(summaries$cat_grp, "group_rows") <- c(group_rows, "Value")
+      attr(summaries$cat_grp, "id_cols") <- setdiff(id_cols, "Value")
+    }
     sheet_nms$cat_grp <- paste0("Grouped ", sheet_nms$cat)
     headers$cat_grp <- c(headers$cat, paste("By ", toString(group_by)))
     params$cat_grp <- summaries$cat_grp |>
diff --git a/R/utils.r b/R/utils.r
index 22226cc..6f49877 100644
--- a/R/utils.r
+++ b/R/utils.r
@@ -1,20 +1,56 @@
-is_codebook <- function(x) "li_codebook" %in% class(x)
+is_codebook <- function(x) inherits(x, "li_codebook")
 check_codebook <- function(x) {
   arg <- as.character(rlang::ensym(x))
   if (!is_codebook(x)) {
     cli::cli_abort('{.arg {arg}} must be an object of class `"li_codebook"`.')
   }
 }
+check_options <- function(x, redcap = FALSE) {
+  if (redcap) {
+    opts_class <- "cb_create_redcap_options"
+    opts_class_wrong <- "cb_create_options"
+  } else {
+    opts_class <- "cb_create_options"
+    opts_class_wrong <- "cb_create_redcap_options"
+  }
+  if (inherits(x, opts_class_wrong)) {
+    msg <- c(
+      "!" = "`.options` must be created from `{opts_class}()`, not `{opts_class_wrong}()`."
+    )
+    if (!redcap) {
+      msg <- c(msg, "i" = "Did you mean to call `cb_create_redcap()`?")
+    }
+    cli::cli_abort(msg)
+  }
+  if (!inherits(x, opts_class)) {
+    cli::cli_abort("`.options` must be created from `{opts_class}()`")
+  }
+}
 check_user_missing_arg <- function(x) {
   arg <- as.character(rlang::ensym(x))
   if (!(
-      rlang::is_formula(x) || (is.list(x) && all(sapply(x, rlang::is_formula)))
-    )) {
+    rlang::is_formula(x) || (is.list(x) && all(sapply(x, rlang::is_formula)))
+  )) {
     cli::cli_abort("{.arg {arg}} must be a formula or list of formulas.")
   }
   if (rlang::is_formula(x)) x <- list(x)
   x
 }
+check_group_rows_arg <- function(group_rows, group_by) {
+  arg <- as.character(rlang::ensym(group_rows))
+  if (!is.null(group_rows)) {
+    if (is.null(group_by)) {
+      cli::cli_abort(
+        "If {.arg {arg}} is specified, {.arg group_by} must also be specified."
+      )
+    }
+    if (length(setdiff(group_rows, group_by))) {
+      cli::cli_abort(
+        "All columns specified in {.arg {arg}} must also be included in {.arg group_by}."
+      )
+    }
+  }
+}
 set_attrs <- function(x, ...) {
   dots <- rlang::list2(...)
   for (nm in names(dots)) attr(x, nm) <- dots[[nm]]
diff --git a/README.Rmd b/README.Rmd
new file mode 100644
index 0000000..3173f0c
--- /dev/null
+++ b/README.Rmd
@@ -0,0 +1,130 @@
+---
+output: github_document
+---
+
+<!-- README.md is generated from README.Rmd. Please edit that file -->
+
+```{r, include = FALSE}
+knitr::opts_chunk$set(
+  collapse = TRUE,
+  comment = "#",
+  out.width = "100%",
+  fig.align = "center",
+  fig.path = "man/figures/",
+  eval = FALSE
+)
+```
+
+# lighthouse.codebook
+
+The lighthouse.codebook package includes tools to summarize a dataset into a formatted 
+Excel workbook, including a data dictionary and vrariable summaries. It incorporates external 
+metadata (such as variable labels, value labels, and user missing / non-response codes), 
+with functions for using metadata from SPSS and REDCap datasets. Codebooks can be 
+customized in a number of ways, including options for grouped summaries.
+
+## Installation
+
+You can install lighthouse.codebook by running:
+
+```r
+# install.packages("remotes")
+remotes::install_github("ccsarapas/lighthouse.codebook")
+```
+
+## Creating codebooks
+Creating a codebook involves two general steps:
+
+1. Create a "codebook" object in R from a data frame (and,
+   optionally, metadata) using `cb_create()` or a specialized variant
+   (such as `cb_create_spss()` or `cb_create_redcap()`).
+
+2. Write the codebook to disk using `cb_write()`.
+
+```r
+library(lighthouse.codebook)
+
+# create and write a codebook without metadata
+dat |> 
+  cb_create() |> 
+  cb_write("cb.xlsx")
+
+# with metadata
+dat |> 
+  cb_create(metadata = dat1_metadata) |> 
+  cb_write("cb.xlsx")
+
+# from SPSS data
+dat_spss <- haven::read_sav("dat_spss.sav", user_na = TRUE)
+
+dat_spss |> 
+  cb_create_spss() |> 
+  cb_write("cb_spss.xlsx")
+
+# from REDCap data
+dat_rc <- REDCapR::redcap_read(redcap_uri = rc_uri, token = rc_token)
+meta_rc <- REDCapR::redcap_metadata_read(redcap_uri = rc_uri, token = rc_token)
+
+dat_rc$data |> 
+  cb_create_redcap(metadata = meta_rc$data) |> 
+  cb_write("cb_rc.xlsx")
+```
+
+## Customizing codebooks
+
+There are many options for controlling how data is interpreted, summarized, and 
+presented. See `vignette("lighthouse-codebook")` for some of the most useful options, 
+including grouped data summaries and specifying user missing codes. Further options 
+are detailed in the help pages for `cb_create()` and `cb_write()`.
+
+<!-- - The "Creating Codebooks" vignette covers options for controlling how data and 
+metadata are _interpreted,_ such as by applying value labels, specifying user missing 
+or nonresponse codes, and taking advantage of specialized metadata (e.g., from SPSS 
+or REDCap data).
+- The "Writing Codebooks" vignette covers how data is _summarized and presented_ 
+in the codebook written to disk, including options for grouped summaries and missing 
+data. -->
+
+## Codebook contents
+
+The codebook written to disk will include an _overview_ tab listing all variables 
+in the dataset; _summary_ tabs for numeric, categorical, and text variables; and, 
+if grouping variables are specified, _grouped summary_ tabs for numeric and categorical 
+variables.
+
+The _overview_ tab includes one row for each variable in the dataset, with information 
+on variable types, labels, values, and missingness. By default, each variable is 
+hyperlinked to its location on the relevant summary tab.
+
+```{r, overview, echo = FALSE, eval = TRUE}
+knitr::include_graphics("man/figures/README-overview.png")
+```
+
+The _numeric summary_ tab includes descriptive statistics for all numeric variables 
+in the dataset:
+
+```{r, numeric, echo = FALSE, eval = TRUE}
+knitr::include_graphics("man/figures/README-numeric.png")
+```
+
+The _categorical summary_ tab includes frequencies for all categorical variables, 
+optionally with separate rows for user missing values:
+
+```{r, categorical, echo = FALSE, eval = TRUE}
+knitr::include_graphics("man/figures/README-categorical.png")
+```
+
+Finally, the _text summary_ tab includes frequencies for the most common values for all 
+text variables in the dataset. (The number of values shown can be adjusted using 
+the `n_text_vals` argument to `cb_write()`.)
+
+```{r, text, echo = FALSE, eval = TRUE}
+knitr::include_graphics("man/figures/README-text.png")
+```
+
+If `group_by` is specified in `cb_write()`, additional numeric and categorical summary
+tabs grouped by the specified variables will be included.
+
+## SPSS extension
+
+Functionality from this package is also available as an SPSS extension command [here](https://github.com/ccsarapas/lighthouse.codebook.spss).
diff --git a/README.md b/README.md
index 3e38f4e..c846ee9 100644
--- a/README.md
+++ b/README.md
@@ -1,16 +1,115 @@
 
+<!-- README.md is generated from README.Rmd. Please edit that file -->
+
 # lighthouse.codebook
 
-The lighthouse.codebook package includes tools for summarizing datasets used by staff at the [Lighthouse Institute](https://www.chestnut.org/lighthouse-institute/), the research division of Chestnut Health Systems.
+The lighthouse.codebook package includes tools to summarize a dataset
+into a formatted Excel workbook, including a data dictionary and
+vrariable summaries. It incorporates external metadata (such as variable
+labels, value labels, and user missing / non-response codes), with
+functions for using metadata from SPSS and REDCap datasets. Codebooks
+can be customized in a number of ways, including options for grouped
+summaries.
 
 ## Installation
 
-Install lighthouse.codebook by running:
+You can install lighthouse.codebook by running:
 
 ``` r
 # install.packages("remotes")
 remotes::install_github("ccsarapas/lighthouse.codebook")
 ```
+
+## Creating codebooks
+
+Creating a codebook involves two general steps:
+
+1.  Create a “codebook” object in R from a data frame (and, optionally,
+    metadata) using `cb_create()` or a specialized variant (such as
+    `cb_create_spss()` or `cb_create_redcap()`).
+
+2.  Write the codebook to disk using `cb_write()`.
+
+``` r
+library(lighthouse.codebook)
+
+# create and write a codebook without metadata
+dat |> 
+  cb_create() |> 
+  cb_write("cb.xlsx")
+
+# with metadata
+dat |> 
+  cb_create(metadata = dat1_metadata) |> 
+  cb_write("cb.xlsx")
+
+# from SPSS data
+dat_spss <- haven::read_sav("dat_spss.sav", user_na = TRUE)
+
+dat_spss |> 
+  cb_create_spss() |> 
+  cb_write("cb_spss.xlsx")
+
+# from REDCap data
+dat_rc <- REDCapR::redcap_read(redcap_uri = rc_uri, token = rc_token)
+meta_rc <- REDCapR::redcap_metadata_read(redcap_uri = rc_uri, token = rc_token)
+
+dat_rc$data |> 
+  cb_create_redcap(metadata = meta_rc$data) |> 
+  cb_write("cb_rc.xlsx")
+```
+
+## Customizing codebooks
+
+There are many options for controlling how data is interpreted,
+summarized, and presented. See `vignette("lighthouse-codebook")` for
+some of the most useful options, including grouped data summaries and
+specifying user missing codes. Further options are detailed in the help
+pages for `cb_create()` and `cb_write()`.
+
+<!-- - The "Creating Codebooks" vignette covers options for controlling how data and 
+metadata are _interpreted,_ such as by applying value labels, specifying user missing 
+or nonresponse codes, and taking advantage of specialized metadata (e.g., from SPSS 
+or REDCap data).
+- The "Writing Codebooks" vignette covers how data is _summarized and presented_ 
+in the codebook written to disk, including options for grouped summaries and missing 
+data. -->
+
+## Codebook contents
+
+The codebook written to disk will include an *overview* tab listing all
+variables in the dataset; *summary* tabs for numeric, categorical, and
+text variables; and, if grouping variables are specified, *grouped
+summary* tabs for numeric and categorical variables.
+
+The *overview* tab includes one row for each variable in the dataset,
+with information on variable types, labels, values, and missingness. By
+default, each variable is hyperlinked to its location on the relevant
+summary tab.
+
+<img src="man/figures/README-overview.png" width="100%" style="display: block; margin: auto;" />
+
+The *numeric summary* tab includes descriptive statistics for all
+numeric variables in the dataset:
+
+<img src="man/figures/README-numeric.png" width="100%" style="display: block; margin: auto;" />
+
+The *categorical summary* tab includes frequencies for all categorical
+variables, optionally with separate rows for user missing values:
+
+<img src="man/figures/README-categorical.png" width="100%" style="display: block; margin: auto;" />
+
+Finally, the *text summary* tab includes frequencies for the most common
+values for all text variables in the dataset. (The number of values
+shown can be adjusted using the `n_text_vals` argument to `cb_write()`.)
+
+<img src="man/figures/README-text.png" width="100%" style="display: block; margin: auto;" />
+
+If `group_by` is specified in `cb_write()`, additional numeric and
+categorical summary tabs grouped by the specified variables will be
+included.
+
 ## SPSS extension
 
-Functionality from this package is also available as an SPSS extension command [here](https://github.com/ccsarapas/lighthouse.codebook.spss).
\ No newline at end of file
+Functionality from this package is also available as an SPSS extension
+command [here](https://github.com/ccsarapas/lighthouse.codebook.spss).
diff --git a/_pkgdown.yml b/_pkgdown.yml
index d98e4d4..961f921 100644
--- a/_pkgdown.yml
+++ b/_pkgdown.yml
@@ -1,7 +1,10 @@
 url: https://ccsarapas.github.io/lighthouse.codebook/
 template:
   bootstrap: 5
-  bootswatch: sandstone
+  bslib:
+    base_font: {google: "Roboto"}
+    code_font: {google: "Roboto Mono"}
+    font_scale: 1.2
 footer:
   structure:
     left: package
@@ -10,4 +13,4 @@ authors:
   Casey Sarapas:
     href: "https://chestnut.org/li/scientists-and-project-directors/category/research-scientists/profile/casey-sarapas-phd"
   Chestnut Health Systems:
-    href: "https://chestnut.org/"
\ No newline at end of file
+    href: "https://chestnut.org/"
diff --git a/man/cb_create.Rd b/man/cb_create.Rd
index c8872c6..0112709 100644
--- a/man/cb_create.Rd
+++ b/man/cb_create.Rd
@@ -13,22 +13,16 @@ cb_create(
   .val_labels = val_labels,
   .user_missing = NULL,
   .split_var_labels = NULL,
-  .include_types = !.include_r_classes,
-  .include_r_classes = FALSE,
   .val_labs_sep1 = NULL,
   .val_labs_sep2 = NULL,
-  .rmv_html = TRUE,
-  .rmv_line_breaks = TRUE,
-  .user_missing_col = c("if_any", "yes", "no"),
-  .user_missing_conflict = c("metadata", "missing_label"),
-  .user_missing_incompatible = c("ignore", "warn", "error")
+  .options = cb_create_options()
 )
 }
 \arguments{
 \item{data}{A data frame.}
 
-\item{metadata}{A data frame containing metadata, such as variable labels and value
-labels.}
+\item{metadata}{A data frame containing metadata, such as variable labels and
+value labels.}
 
 \item{...}{Additional columns from \code{metadata} to preserve in the final codebook.
 New names can be assigned by passing named arguments. Columns for variable
@@ -49,58 +43,34 @@ See "Specifying user missing values" below for examples.}
 expressions, indicating (sets of) variable labels with a common stem that should
 be extracted into a separate column.}
 
-\item{.include_types}{Include a column listing simplified type for each variable?
-(e.g,. \code{"categorical"}, \code{"date-time"}.)}
-
-\item{.include_r_classes}{Include a column listing class(es) of each variable?
-(e.g., \code{"factor"}, \code{"POSIXct, POSIXt"}.)}
-
 \item{.val_labs_sep1, .val_labs_sep2}{Regex patterns separating value labels
 in \code{metadata}. \code{.val_labs_sep1} separates values from labels, and \code{.val_labs_sep2}
-separates value/label pairs. e.g., if value labels are in format \code{"1, First label|2, Second label"},
-set \code{.val_labs_sep1} to \code{","} and \code{.val_labs_sep2} to \code{"\\\\|"}.}
-
-\item{.rmv_html}{Should HTML tags be removed from metadata (e.g., from variable
-and value labels)?}
-
-\item{.rmv_line_breaks}{Should line breaks be removed from metadata (e.g., from
-variable and value labels)? If \code{TRUE}, line breaks will be replaced with \code{" / "}.}
-
-\item{.user_missing_col}{Include value labels for user missing values in a separate
-column? The default, \code{"if_any"}, adds the column only if user missings are
-specified for at least one variable.}
-
-\item{.user_missing_conflict}{If different labels for a value are provided in
-metadata and user missings, which should be used?}
+separates value/label pairs from one another. e.g., if value labels are in
+the format \code{"1, First label|2, Second label"}, set \code{.val_labs_sep1} to \code{","}
+and \code{.val_labs_sep2} to \code{"\\\\|"}.}
 
-\item{.user_missing_incompatible}{How to handle variables specified in \code{.user_missing}
-that aren't compatible with user missing values (e.g., logical, Date, or POSIXt)?}
+\item{.options}{Additional options to use for codebook creation. Must be the result
+from a call to \code{cb_create_options()}. See that function's help page for available
+options.}
 }
 \value{
-An \code{"li_codebook"} object, consisting of (1) a tibble summarizing the passed
-dataset and (2) attributes containing the passed dataset (in several formats)
-and additional metadata. Specifically:
-\itemize{
-\item A tibble with columns:
+An \code{"li_codebook"} object, consisting of a tibble summarizing the passed
+dataset and attributes containing additional metadata. The tibble includes columns:
 \itemize{
 \item \code{name}: variable name
-\item \code{type}: optional column containing simplified variable type
+\item \code{type}: column containing simplified variable type
 \item \code{class}: optional column containing class(es) of each variable
 \item \code{label_stem}: optional column containing variable label stems, if any variables
 are specified in \code{.split_var_labels}
 \item \code{label}: variable label
 \item \code{values}: values, with labels if applicable
-\item \code{user_missing}: optional column, depending on value of \code{.user_missing_col},
-showing user missing values, with labels if applicable
+\item \code{user_missing}: optional column showing user missing values, with labels
+if applicable. By default, this column is included only if user missings
+are specified for at least one variable. This behavior can be changed using
+the \code{user_missing_col} argument to \code{cb_create_options()}.
 \item \code{missing}: proportion missing
 \item additional columns if specified in \code{...}
 }
-\item Attributes:
-\itemize{
-\item Transformed versions of the passed dataset. See \code{\link[=cb_get_data]{cb_get_data()}}
-\item Lookup tables and other metadata used internally.
-}
-}
 }
 \description{
 \code{cb_create()} builds an object of class \code{"li_codebook"} from a dataset and optional
@@ -141,13 +111,15 @@ User missing values may optionally be named to set value labels:
 \preformatted{
 .user_missing = ~ c(Declined = -98, "Not applicable" = -99)
 }
-If labels set in \code{.user_missing} conflict with those in \code{metadata}, \code{.user_missing_conflict}
-controls which labels are used.
-
-User missing values are not compatible with logical, date, or datetime (POSIXt)
-variables. By default, these variables will be ignored if specified in \code{.user_missing}.
-(i.e., user missing values will be applied only to compatible variables.) This behavior
-can be changed using the \code{.user_missing_incompatible} argument.
+If labels set in \code{.user_missing} conflict with those in \code{metadata}, the \code{user_missing_conflict}
+argument to \code{cb_create_options()} controls which labels are used.
+
+User missings may be set for numeric, character, factor/ordered factor, and haven_labelled/haven_labelled_spss
+vectors. For factors, user missings are set based on factor labels (not the underlying
+integer codes). For \code{"haven_labelled"} vectors, user missings are set based on
+values (not value labels). By default, variables with incompatible classes (e.g.,
+logical, Date, POSIXt) will be ignored if specified in \code{.user_missing}. This
+behavior can be changed using the \code{.user_missing_incompatible} argument to \code{cb_create_options()}.
 }
 
 \examples{
@@ -170,7 +142,7 @@ diamonds2 |>
   transform(
     carat_group = factor(carat_group),
     price_group = factor(price_group)
-  ) |> 
+  ) |>
   cb_create()
 
 # provide metadata for variable and value labels
diff --git a/man/cb_create_options.Rd b/man/cb_create_options.Rd
new file mode 100644
index 0000000..33b114d
--- /dev/null
+++ b/man/cb_create_options.Rd
@@ -0,0 +1,89 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/cb_create.r, R/cb_create_redcap.r
+\name{cb_create_options}
+\alias{cb_create_options}
+\alias{cb_create_redcap_options}
+\title{Additional options for codebook creation}
+\usage{
+cb_create_options(
+  ...,
+  include_types = TRUE,
+  include_r_classes = FALSE,
+  rmv_html = TRUE,
+  rmv_line_breaks = TRUE,
+  user_missing_col = c("if_any", "yes", "no"),
+  user_missing_conflict = c("val_label", "missing_label"),
+  user_missing_incompatible = c("ignore", "warn", "error")
+)
+
+cb_create_redcap_options(
+  ...,
+  include_types = TRUE,
+  include_r_classes = FALSE,
+  rmv_html = TRUE,
+  rmv_line_breaks = TRUE,
+  user_missing_col = c("if_any", "yes", "no"),
+  user_missing_conflict = c("val_label", "missing_label"),
+  user_missing_incompatible = c("ignore", "warn", "error"),
+  name = field_name,
+  var_label = field_label,
+  val_labels = select_choices_or_calculations,
+  type = field_type,
+  form = form_name,
+  val_labs_sep1 = ", ",
+  val_labs_sep2 = "\\\\|",
+  coerce_integers = TRUE,
+  checkbox_resp_values = FALSE,
+  propagate_checkbox_missings = TRUE
+)
+}
+\arguments{
+\item{...}{These dots are for future extensions and must be empty.}
+
+\item{include_types}{Include a column listing simplified type for each variable?
+(e.g,. \code{"categorical"}, \code{"date-time"}.)}
+
+\item{include_r_classes}{Include a column listing class(es) of each variable?
+(e.g., \code{"factor"}, \code{"POSIXct, POSIXt"}.)}
+
+\item{rmv_html}{Should HTML tags be removed from metadata (e.g., from variable
+and value labels)?}
+
+\item{rmv_line_breaks}{Should line breaks be removed from metadata (e.g., from
+variable and value labels)? If \code{TRUE}, line breaks will be replaced with \code{" / "}.}
+
+\item{user_missing_col}{Include value labels for user missing values in a separate
+column? The default, \code{"if_any"}, adds the column only if user missings are
+specified for at least one variable.}
+
+\item{user_missing_conflict}{If labels passed to \code{.user_missing} conflict with
+value labels in metadata, which should be used?}
+
+\item{user_missing_incompatible}{How to handle variables specified in \code{.user_missing}
+that aren't compatible with user missing values (e.g., logical, Date, or POSIXt)?}
+
+\item{name, var_label, val_labels, type}{For REDCap data, columns in \code{metadata} containing variable
+name, variable label, value labels, and variable type, respectively.}
+
+\item{form}{For REDCap data, column in \code{metadata} containing form names. (Set to \code{NULL} to omit.)}
+
+\item{val_labs_sep1, val_labs_sep2}{For REDCap data, regex patterns separating value labels
+in \code{metadata}. \code{val_labs_sep1} separates values from labels, and \code{val_labs_sep2}
+separates value/label pairs from one another. e.g., if value labels are in
+the format \code{"1, First label|2, Second label"}, set \code{val_labs_sep1} to \code{","}
+and \code{val_labs_sep2} to \code{"\\\\|"}.}
+
+\item{coerce_integers}{For REDCap data, should variables listed as "integer" in \code{metedata$text_validation_type_or_show_slider_number}
+be coerced to integer?}
+
+\item{checkbox_resp_values}{For REDCap data, should checkbox values use labels in \code{metadata} (\code{TRUE})
+or "Yes" / "No" (\code{FALSE})? See "Checkbox data handling"  on the \code{cb_create_redcap()}
+help page.}
+
+\item{propagate_checkbox_missings}{For REDCap data, should user missing values in a checkbox group
+be propagated across all variables in the group? See "Checkbox data handling"
+on the \code{cb_create_redcap()} help page.}
+}
+\description{
+Additional options for use by \code{cb_create()}.
+}
diff --git a/man/cb_create_redcap.Rd b/man/cb_create_redcap.Rd
index 9ced111..98cc49f 100644
--- a/man/cb_create_redcap.Rd
+++ b/man/cb_create_redcap.Rd
@@ -8,24 +8,9 @@ cb_create_redcap(
   data,
   metadata,
   ...,
-  .name = field_name,
-  .var_label = field_label,
-  .val_labels = select_choices_or_calculations,
-  .form = form_name,
   .user_missing = NULL,
   .split_var_labels = NULL,
-  .include_types = !.include_r_classes,
-  .include_r_classes = FALSE,
-  .val_labs_sep1 = ", ",
-  .val_labs_sep2 = "\\\\|",
-  .rmv_html = TRUE,
-  .rmv_line_breaks = TRUE,
-  .coerce_integers = TRUE,
-  .checkbox_resp_values = FALSE,
-  .propagate_checkbox_missings = TRUE,
-  .user_missing_col = c("if_any", "yes", "no"),
-  .user_missing_conflict = c("metadata", "missing_label"),
-  .user_missing_incompatible = c("ignore", "warn", "error")
+  .options = cb_create_redcap_options()
 )
 }
 \arguments{
@@ -37,11 +22,6 @@ cb_create_redcap(
 New names can be assigned by passing named arguments. Columns for variable
 name, form, variable label, and value labels are included by default.}
 
-\item{.name, .var_label, .val_labels}{Columns in \code{metadata} containing variable
-name, variable label, and value labels, respectively.}
-
-\item{.form}{Column in \code{metadata} containing form names. (Set to \code{NULL} to omit.)}
-
 \item{.user_missing}{A formula or list of formulas specifying user missing values.
 Formulas should specify variables on the left-hand side (as variable names
 or \link[dplyr:dplyr_tidy_select]{tidyselect} expressions), and missing values on the
@@ -52,69 +32,29 @@ See "Specifying user missing values" in \code{\link[=cb_create]{cb_create()}} do
 expressions, indicating (sets of) variable labels with a common stem that should
 be extracted into a separate column.}
 
-\item{.include_types}{Include a column listing simplified type for each variable?
-(e.g,. \code{"categorical"}, \code{"date-time"}.)}
-
-\item{.include_r_classes}{Include a column listing class(es) of each variable?
-(e.g., \code{"factor"}, \code{"POSIXct, POSIXt"}.)}
-
-\item{.val_labs_sep1, .val_labs_sep2}{Regex patterns separating value labels
-in \code{metadata}. \code{.val_labs_sep1} separates values from labels, and \code{.val_labs_sep2}
-separates value/label pairs. e.g., if value labels are in format \code{"1, First label|2, Second label"},
-set \code{.val_labs_sep1} to \code{","} and \code{.val_labs_sep2} to \code{"\\\\|"}.}
-
-\item{.rmv_html}{Should HTML tags be removed from metadata (e.g., from variable
-and value labels)?}
-
-\item{.rmv_line_breaks}{Should line breaks be removed from metadata (e.g., from
-variable and value labels)? If \code{TRUE}, line breaks will be replaced with \code{" / "}.}
-
-\item{.coerce_integers}{Should variables listed as "integer" in \code{metedata$text_validation_type_or_show_slider_number}
-be coerced to integer?}
-
-\item{.checkbox_resp_values}{Should checkbox values use labels in \code{metadata}
-(\code{TRUE}) or "Yes" / "No" (\code{FALSE})? See "Checkbox data handling" below.}
-
-\item{.propagate_checkbox_missings}{Should user missing values in a checkbox
-group be propagated across all variables in the group? See "Checkbox data handling"
-below.}
-
-\item{.user_missing_col}{Include value labels for user missing values in a separate
-column? The default, \code{"if_any"}, adds the column only if user missings are
-specified for at least one variable.}
-
-\item{.user_missing_conflict}{If different labels for a value are provided in
-metadata and user missings, which should be used?}
-
-\item{.user_missing_incompatible}{How to handle variables specified in \code{.user_missing}
-that aren't compatible with user missing values (e.g., logical, Date, or POSIXt)?}
+\item{.options}{Additional options to use for codebook creation. Must be the result
+from a call to \code{cb_create_redcap_options()} or \code{cb_create_options()}. See \code{?cb_create_redcap_options}
+for available options.}
 }
 \value{
-An \code{"li_codebook"} object, consisting of (1) a tibble summarizing the passed
-dataset and (2) attributes containing the passed dataset (in several formats)
-and additional metadata. Specifically:
-\itemize{
-\item A tibble with columns:
+An \code{"li_codebook"} object, consisting of a tibble summarizing the passed
+dataset and attributes containing additional metadata. The tibble includes columns:
 \itemize{
 \item \code{name}: variable name
 \item \code{form}: form name
-\item \code{type}: optional column containing simplified variable type
+\item \code{type}: column containing simplified variable type
 \item \code{class}: optional column containing class(es) of each variable
 \item \code{label_stem}: optional column containing variable label stems, if any variables
 are specified in \code{.split_var_labels}
 \item \code{label}: variable label
 \item \code{values}: values, with labels if applicable
-\item \code{user_missing}: optional column, depending on value of \code{.user_missing_col},
-showing user missing values, with labels if applicable
+\item \code{user_missing}: optional column showing user missing values, with labels
+if applicable. By default, this column is included only if user missings
+are specified for at least one variable. This behavior can be changed using
+the \code{user_missing_col} argument to \code{cb_create_options()}.
 \item \code{missing}: proportion missing
 \item additional columns if specified in \code{...}
 }
-\item Attributes:
-\itemize{
-\item Transformed versions of the passed dataset. See \code{\link[=cb_get_data]{cb_get_data()}}.
-\item Lookup tables and other metadata used internally.
-}
-}
 }
 \description{
 \code{cb_create_redcap()} builds an object of class \code{"li_codebook"} from a dataset and
@@ -131,24 +71,23 @@ and metadata, including:
 \item Unpacking, labelling, and optional missing propagation for checkbox data
 \item Optional coercion for character variables marked as "integer" in \code{metedata$text_validation_type_or_show_slider_number}
 }
+
+All of these behaviors can be controlled using the \code{.options} argument.
 }
 \section{Checkbox data handling}{
 
 \subsection{Value labels}{
 
 Data from REDCap checkboxes yields one variable in the dataset for each response
-option. These will be labelled generically with \code{"Yes"} or \code{"No"}, unless \code{.checkbox_resp_values}
-is \code{TRUE}, in which case response-specific labels from \code{metadata} will be used.
-For example, if a checkbox group has options "In the past year," "More than a
+option. By default, these will be labelled generically with \code{"Yes"} or \code{"No"}.
+For example, consider a checkbox group with options "In the past year," "More than a
 year ago," and "Never," corresponding to variables \code{chk_var1___0}, \code{chk_var1___1},
-and \code{chk_var1___2}: if \code{.checkbox_resp_values} is \code{FALSE}, all of these will
-have values:
+and \code{chk_var1___2}. By default, all of these will be given the same value labels:
 \itemize{
 \item \code{chk_var1___0}, \code{chk_var1___1}, \code{chk_var1___2}: 0 = "No"; 1 =  "Yes".
-}
-
-If \code{.checkbox_resp_values} is \code{TRUE}, each variable will have unique labels:
-\itemize{
+This behavior can be changed by setting \code{checkbox_resp_values = TRUE} in \code{cb_create_options()}.
+In this case, response-specific labels from \code{metadata} will be used, so that
+each variable will have unique labels:
 \item \code{chk_var1___0}: 0 = "Not selected," 1 = "In the past year"
 \item \code{chk_var1___1}: 0 = "Not selected," 1 = "More than a year ago"
 \item \code{chk_var1___2}: 0 = "Not selected," 0 = "Never"
@@ -157,13 +96,14 @@ If \code{.checkbox_resp_values} is \code{TRUE}, each variable will have unique l
 
 \subsection{Missing value propagation}{
 
-If \code{.propagate_checkbox_missings} is \code{TRUE}, missing values in a checkbox group
-variable will be propagated to all variables in the group. For example, given
-a checkbox group with options "Pregnant," "Not pregnant," and "Not applicable,"
-corresponding to variables \code{chk_preg_0___0}, \code{chk_preg_0___1}, and \code{chk_preg_0____9},
-and assuming that \code{-9} is specified as a user missing value. If \code{.propagate_checkbox_missings}
-is \code{TRUE}, \code{chk_preg_0___0} and \code{chk_preg_0___1} will be set to \code{-9} if \code{chk_preg_0____9}
-is \code{1}. Otherwise, these columns will remain as \code{0} where \code{chk_preg_0____9} is \code{1}.
+By default, missing values in a checkbox group will be propagated to all variables
+in the group. For example, consider a checkbox group with options "Pregnant,"
+"Not pregnant," and "Not applicable," corresponding to variables \code{chk_preg_0___0},
+\code{chk_preg_0___1}, and \code{chk_preg_0____9}, and assuming that \code{-9} is specified
+as a user missing value. By default, \code{chk_preg_0___0} and \code{chk_preg_0___1} will
+be set to \code{-9} if \code{chk_preg_0____9} is \code{1}. This behavior can be overridden by
+setting \code{propagate_checkbox_missings = FALSE} in \code{cb_create_options()}, in which
+case no values will be changed.
 }
 }
 
diff --git a/man/cb_create_spss.Rd b/man/cb_create_spss.Rd
index b67dc25..f5ab4c8 100644
--- a/man/cb_create_spss.Rd
+++ b/man/cb_create_spss.Rd
@@ -8,13 +8,7 @@ cb_create_spss(
   data,
   .user_missing = NULL,
   .split_var_labels = NULL,
-  .include_types = !.include_r_classes,
-  .include_r_classes = FALSE,
-  .rmv_html = TRUE,
-  .rmv_line_breaks = TRUE,
-  .user_missing_col = c("if_any", "yes", "no"),
-  .user_missing_conflict = c("val_label", "missing_label"),
-  .user_missing_incompatible = c("ignore", "warn", "error")
+  .options = cb_create_options()
 )
 }
 \arguments{
@@ -31,51 +25,27 @@ See "Specifying user missing values" in \code{\link[=cb_create]{cb_create()}} do
 expressions, indicating (sets of) variable labels with a common stem that should
 be extracted into a separate column.}
 
-\item{.include_types}{Include a column listing simplified type for each variable?
-(e.g,. \code{"categorical"}, \code{"date-time"}.)}
-
-\item{.include_r_classes}{Include a column listing class(es) of each variable?
-(e.g., \code{"factor"}, \code{"POSIXct, POSIXt"}.)}
-
-\item{.rmv_html}{Should HTML tags be removed from variable and value labels?}
-
-\item{.rmv_line_breaks}{Should line breaks be removed from variable and value
-labels? If \code{TRUE}, line breaks will be replaced with \code{" / "}.}
-
-\item{.user_missing_col}{Include value labels for user missing values in a separate
-column? The default, \code{"if_any"}, adds the column only if user missings are
-specified for at least one variable.}
-
-\item{.user_missing_conflict}{If labels passed to \code{.user_missing} conflicts with
-a value label in \code{data}, which should be used?}
-
-\item{.user_missing_incompatible}{How to handle variables specified in \code{.user_missing}
-that aren't compatible with user missing values (e.g., logical, Date, or POSIXt)?}
+\item{.options}{Additional options to use for codebook creation. Must be the result
+from a call to \code{cb_create_options()}. See that function's help page for available
+options.}
 }
 \value{
-An \code{"li_codebook"} object, consisting of (1) a tibble summarizing the passed
-dataset and (2) attributes containing the passed dataset (in several formats)
-and additional metadata. Specifically:
-\itemize{
-\item A tibble with columns:
+An \code{"li_codebook"} object, consisting of a tibble summarizing the passed
+dataset and attributes containing additional metadata. The tibble includes columns:
 \itemize{
 \item \code{name}: variable name
-\item \code{type}: optional column containing simplified variable type
+\item \code{type}: column containing simplified variable type
 \item \code{class}: optional column containing class(es) of each variable
 \item \code{label_stem}: optional column containing variable label stems, if any variables
 are specified in \code{.split_var_labels}
 \item \code{label}: variable label
 \item \code{values}: values, with labels if applicable
-\item \code{user_missing}: optional column, depending on value of \code{.user_missing_col},
-showing user missing values, with labels if applicable
+\item \code{user_missing}: optional column showing user missing values, with labels
+if applicable. By default, this column is included only if user missings
+are specified for at least one variable. This behavior can be changed using
+the \code{user_missing_col} argument to \code{cb_create_options()}.
 \item \code{missing}: proportion missing
 }
-\item Attributes:
-\itemize{
-\item Transformed versions of the passed dataset. See \code{\link[=cb_get_data]{cb_get_data()}}.
-\item Lookup tables and other metadata used internally.
-}
-}
 }
 \description{
 \code{cb_create_spss()} builds an object of class \code{"li_codebook"} from an imported
diff --git a/man/cb_get_data.Rd b/man/cb_get_data.Rd
index 6cab0a9..d876368 100644
--- a/man/cb_get_data.Rd
+++ b/man/cb_get_data.Rd
@@ -4,26 +4,27 @@
 \alias{cb_get_data}
 \title{Extract data from a codebook object}
 \usage{
-cb_get_data(cb, format = c("factors", "haven", "values"))
+cb_get_data(cb, format = c("factors", "haven"))
 }
 \arguments{
 \item{cb}{An object of class \code{"li_codebook"} as produced by \code{\link[=cb_create]{cb_create()}} or
 a variant.}
 
-\item{format}{Format of the returned data; see below for details.}
+\item{format}{Format of the returned data, either \code{"factors"} or \code{"haven"};
+see below for details.}
 }
 \value{
 A tibble with variables formatted based on the \code{format} argument.
 \itemize{
-\item For \code{format = "values"}, all variables retain the same values as the original
-dataset, including values for user missings. The data may reflect transformations
-made by variants of \code{\link[=cb_create]{cb_create()}} -- e.g., for \code{\link[=cb_create_redcap]{cb_create_redcap()}}, integer coercion
-and propagation of user missings across checkbox variables.
-\item For \code{"haven"}, value labels and user missings are encoded using class
-\code{\link[haven:labelled]{"haven_labelled"}}`
 \item For \code{"factors"}, all variables with value labels are converted to factors,
 and all user missings are converted to \code{NA}.
+\item For \code{"haven"}, variable labels, value labels, and user missings are encoded
+using class \code{\link[haven:labelled]{"haven_labelled_spss"}}`.
 }
+
+Both formats may also reflect transformations made by variants of \code{\link[=cb_create]{cb_create()}}.
+In particular, for codebooks created using \code{\link[=cb_create_redcap]{cb_create_redcap()}}, integer coercion
+and propagation of user missings across checkbox variables.
 }
 \description{
 Codebook objects created by \code{\link[=cb_create]{cb_create()}} and friends contain several transformed
diff --git a/man/cb_write.Rd b/man/cb_write.Rd
index 0018f5b..1699465 100644
--- a/man/cb_write.Rd
+++ b/man/cb_write.Rd
@@ -8,13 +8,15 @@ cb_write(
   cb,
   file,
   dataset_name = NULL,
-  incl_date = TRUE,
-  incl_dims = TRUE,
-  hyperlinks = TRUE,
   group_by = NULL,
-  group_rows_numeric = NULL,
+  group_rows = NULL,
+  group_rows_numeric = group_rows,
+  group_rows_categorical = group_rows,
   detail_missing = c("if_any_user_missing", "yes", "no"),
   n_text_vals = 5,
+  incl_date = TRUE,
+  incl_dims = TRUE,
+  hyperlinks = TRUE,
   overwrite = TRUE
 )
 }
@@ -26,21 +28,20 @@ a variant.}
 
 \item{dataset_name}{Name of the dataset to display in workbook headers.}
 
-\item{incl_date, incl_dims}{Should the date and/or dataset dimensions be included
-in the Overview tab header?}
-
-\item{hyperlinks}{If \code{TRUE}, variable names on the Overview sheet will link
-to corresponding rows on summary tabs and vice versa.}
-
 \item{group_by}{<\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> Column or columns to group
 by. If specified, additional numeric and categorical summary tabs will be included
-with grouped summaries. Subgroups are shown in columns by default. For the numeric
-summary tab, subgroups for some or all grouping variables can instead be shown
-in rows if specified in \code{group_rows_numeric}.}
+with grouped summaries. Subgroups are shown in columns by default. Some or all
+grouping variables can instead be shown in rows if specified in \code{group_rows},
+\code{group_rows_numeric}, or \code{group_rows_categorical}.}
+
+\item{group_rows}{<\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> Column or columns to group
+by in rows on grouped summary tabs. All columns must also be specified in \code{group_by}.
+Will apply to both numeric and categorical summary tabs unless otherwise specified
+in \code{group_rows_numeric} or \code{group_rows_categorical}.}
 
-\item{group_rows_numeric}{<\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> Column or columns
-to group by in rows on the grouped numeric summary tab. All columns must also
-be specified in \code{group_by}.}
+\item{group_rows_numeric, group_rows_categorical}{<\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}>
+Column or columns to group by in rows on grouped numeric or categorical summary
+tab.}
 
 \item{detail_missing}{Include detailed missing value information on ungrouped
 categorical and text summary tabs? (Detailed missing information for grouped
@@ -50,6 +51,12 @@ summary tabs is not currently supported.)}
 should be included for each variable? If there are more than \code{n_text_vals} + 1
 unique values, the \code{n_text_vals} most common non-missing values will be included.}
 
+\item{incl_date, incl_dims}{Should the date and/or dataset dimensions be included
+in the Overview tab header?}
+
+\item{hyperlinks}{If \code{TRUE}, variable names on the Overview sheet will link
+to corresponding rows on summary tabs and vice versa.}
+
 \item{overwrite}{Overwrite existing file?}
 }
 \value{
diff --git a/man/figures/README-categorical.png b/man/figures/README-categorical.png
new file mode 100644
index 0000000..3766ac3
Binary files /dev/null and b/man/figures/README-categorical.png differ
diff --git a/man/figures/README-numeric.png b/man/figures/README-numeric.png
new file mode 100644
index 0000000..4599abe
Binary files /dev/null and b/man/figures/README-numeric.png differ
diff --git a/man/figures/README-overview.png b/man/figures/README-overview.png
new file mode 100644
index 0000000..dbad3da
Binary files /dev/null and b/man/figures/README-overview.png differ
diff --git a/man/figures/README-text.png b/man/figures/README-text.png
new file mode 100644
index 0000000..ccb966a
Binary files /dev/null and b/man/figures/README-text.png differ
diff --git a/vignettes/.gitignore b/vignettes/.gitignore
new file mode 100644
index 0000000..097b241
--- /dev/null
+++ b/vignettes/.gitignore
@@ -0,0 +1,2 @@
+*.html
+*.R
diff --git a/vignettes/lighthouse-codebook.Rmd b/vignettes/lighthouse-codebook.Rmd
new file mode 100644
index 0000000..678a4f8
--- /dev/null
+++ b/vignettes/lighthouse-codebook.Rmd
@@ -0,0 +1,406 @@
+---
+title: "Introduction to lighthouse.codebook"
+output: rmarkdown::html_vignette
+vignette: >
+  %\VignetteIndexEntry{Introduction to lighthouse.codebook}
+  %\VignetteEngine{knitr::rmarkdown}
+  %\VignetteEncoding{UTF-8}
+---
+
+```{r, include = FALSE}
+knitr::opts_chunk$set(
+  eval = FALSE,
+  comment = "#",
+  out.width = "100%"
+)
+```
+
+## Creating codebooks
+
+Creating a codebook involves two general steps:
+
+1. Create a “codebook” object in R from a data frame (and, optionally, metadata), 
+   using `cb_create()` or a specialized variant (such as `cb_create_spss()` or `cb_create_redcap()`).
+
+2. Write the codebook to disk using `cb_write()`.
+
+```r
+library(lighthouse.codebook)
+
+# create and write a codebook without metadata
+dat |> 
+  cb_create() |> 
+  cb_write("cb.xlsx")
+
+# with metadata
+dat |> 
+  cb_create(metadata = dat1_metadata) |> 
+  cb_write("cb.xlsx")
+
+# from SPSS data
+dat_spss <- haven::read_sav("dat_spss.sav", user_na = TRUE)
+
+dat_spss |> 
+  cb_create_spss() |> 
+  cb_write("cb_spss.xlsx")
+
+# from REDCap data
+dat_rc <- REDCapR::redcap_read(redcap_uri = rc_uri, token = rc_token)
+meta_rc <- REDCapR::redcap_metadata_read(redcap_uri = rc_uri, token = rc_token)
+
+dat_rc$data |> 
+  cb_create_redcap(metadata = meta_rc$data) |> 
+  cb_write("cb_rc.xlsx")
+```
+The codebook written to disk will include an overview tab listing all variables 
+in the dataset; summary tabs for numeric, categorical, and text variables; and, 
+if grouping variables are specified, grouped summary tabs for numeric and categorical 
+variables.
+
+## Customizing codebooks
+
+There are many options for controlling how data is interpreted, summarized, and 
+presented. This section shows a few of the most useful options. Further options 
+are detailed in the documentation for `cb_create()` and `cb_write()`. 
+
+### Grouped summaries
+
+Numeric and categorical data summaries can be grouped by one or more variables by 
+specifying them in the `group_by` argument to `cb_write()`.
+```r
+cb_create(data, metadata) |>
+  cb_write("cb.xlsx", group_by = treatment_group)
+
+cb_create(data, metadata) |>
+  cb_write("cb.xlsx", group_by = c(treatment_group, timepoint, age_group))
+```
+
+By default, values for each subgroup are shown in separate columns, with decked 
+heads if more than one grouping variable is specified. However, some or all grouping 
+variables can instead be shown in rows using the `group_rows` argument.
+```r
+# show `treatment_group` in columns and `timepoint` in rows
+cb_create(data, metadata) |>
+  cb_write(
+    "cb.xlsx", 
+    group_by = c(treatment_group, timepoint),
+    group_rows = timepoint
+  )
+```
+Different row grouping behavior can be specified for numeric versus categorical 
+summary tabs using the `group_rows_numeric` and `group_rows_categorical` arguments.
+```r
+# for numeric summary, show `treatment_group` in columns and `timepoint` in rows;
+# for categorical summary, show all grouping variables in columns
+cb_create(data, metadata) |>
+  cb_write(
+    "cb.xlsx", 
+    group_by = c(treatment_group, timepoint),
+    group_rows_numeric = timepoint
+  )
+
+# for numeric summary, show all grouping variables in rows; 
+# for categorical summary, show `treatment_group` in rows
+cb_create(data, metadata) |>
+  cb_write(
+    "cb.xlsx", 
+    group_by = c(treatment_group, timepoint),
+    group_rows_numeric = c(treatment_group, timepoint),
+    group_rows_categorical = treatment_group
+  )
+```
+
+### User missing values
+
+User missing values (also known as nonresponse codes, reserve codes, or special 
+values) can be specified using the `.user_missing` argument to `cb_create()`. 
+Missing values are specified using a formula or list of formulas, with variables 
+on the left-hand side (as names or [tidyselect](https://dplyr.tidyverse.org/reference/dplyr_tidy_select.html) 
+expressions) and values on the right-hand side. If the left-hand side is empty, 
+user missings will be set for all compatible variables in the dataset.
+
+```r
+# set a single missing value for a single variable
+cb <- cb_create(data, metadata, .user_missing = var1 ~ 99)
+
+# for variables `var1` through `var5`
+cb <- cb_create(data, metadata, .user_missing = var1:var5 ~ c(98, 99))
+
+# for all numeric variables, plus `var6` and `var7`
+cb <- cb_create(
+  data, 
+  metadata, 
+  .user_missing = c(where(is.numeric), var6, var7) ~ c(-9, -8, -7)
+)
+
+# for all compatible variables in dataset
+cb <- cb_create(data, metadata, .user_missing = ~ c(98, 99))
+```
+
+If the user missing values are named, the names will be treated as value labels 
+in data summaries.
+
+```r
+cb <- cb_create(
+  data, 
+  metadata, 
+  .user_missing = var1:var5 ~ c("Declined" = 98, "Not applicable" = 99)
+)
+```
+
+To apply different user missings for different variables, pass a list of formulas.
+```r
+cb <- cb_create(
+  data, 
+  metadata,
+  .user_missing = list(
+    starts_with("status") ~ c("Declined" = 98, "Not applicable" = 99),
+    var7:var10 ~ c("Don't know" = -4, "Not applicable" = -5)
+  )
+)
+```
+### Missing value handling
+
+On numeric summary tabs, missing values (including both user missing values and `NA`) 
+are dropped for computation of summary statistics. 
+
+On ungrouped categorical and text summary tabs, by default, user missing values 
+are individually tabulated. For example, if `.user_missing = ~ c("Declined" = 98, 
+"Not applicable" = 99)`, then categorical and text summary tabs will include rows 
+giving counts for `"[98] Declined"` and `"[99] Not applicable"`. All user missing 
+values and `NA` can instead be collapsed into a single `"(Missing)"` row using the 
+`detail_missing` argument to `cb_write()`.
+```r
+dat |>
+  cb_create(.user_missing = ~  ~ c("Declined" = 98, "Not applicable" = 99)) |>
+  cb_write("cb.xlsx", detail_missing = FALSE)
+```
+Finally, user missing values are always collapsed (as though `detail_missing = FALSE`) 
+on _grouped_ summary tabs. 
+
+### Splitting long variable labels
+
+Variable labels for sets of related variables sometimes share a common prefix. Using 
+the `.split_var_labels` argument to `cb_create()`, this prefix can be extracted 
+into a separate column, making it easier to see at a glance what is unique about 
+each variable. 
+
+For example, given a set of variable labels that all begin with `"What colors do 
+you like? Select all that apply: "`: 
+  
+| Name | Label |
+| ---- | ----- |
+| age | How old are you today? |
+| colors1 | What colors do you like? Select all that apply: Red |
+| colors2 | What colors do you like? Select all that apply: Green |
+| colors3 | What colors do you like? Select all that apply: Blue |
+| colors4 | What colors do you like? Select all that apply: Orange |
+| height | What is your height in inches? |
+  
+You can split the labels for these variables, specifying them using a [tidyselect](https://dplyr.tidyverse.org/reference/dplyr_tidy_select.html) 
+expression:
+
+```r
+cb_create(
+    data, 
+    metadata, 
+    .split_var_labels = starts_with("colors")
+  ) |>
+  cb_write("cb.xlsx")
+```
+
+| Name | Label Stem | Label |
+| ---- | ---------- | ----- |
+| age | | How old are you today? |
+| colors1 | What colors do you like? Select all that apply: | Red |
+| colors2 | What colors do you like? Select all that apply: | Green |
+| colors3 | What colors do you like? Select all that apply: | Blue |
+| colors4 | What colors do you like? Select all that apply: | Orange |
+| height | | What is your height in inches? |
+
+Multiple sets of variables with common prefixes can be specified by passing a 
+list of tidyselect expressions.
+
+```r
+cb_create(
+    data, 
+    metadata, 
+    .split_var_labels = list(
+      starts_with("colors"), 
+      fav_food2:fav_food9, 
+      c(rating1, rating4:rating7, rating9)
+    )
+  ) |>
+  cb_write("cb.xlsx")
+```
+
+## Variable typing
+Data summaries are produced for "numeric," "categorical," and "text" variables. For a given variable `x`,
+* `x` is treated as categorical if (1) it is a factor, ordered factor, or logical vector, _or_ (2) it has associated value labels other than missing value codes (specified in metadata or, for SPSS data, in a `"haven_labelled"` vector).
+* `x` is treated as numeric if (1) it is numeric (i.e., `is.numeric(x)` is `TRUE`) _and_ (2) it has no associated value labels other than missing value codes.
+* `x` is treated as text if (1) it is a character vector _and_ (2) it has no associated value labels other than missing value codes.
+
+Thus, you can change how a variable is summarized by changing its class. For 
+instance, to get complete frequencies for a numeric or character variable, convert 
+it to a factor; to get only the top frequencies for a factor with many levels, convert 
+it to character.
+
+Variables of other classes, such as dates, datetimes, and lists, are not currently 
+included on summary tabs. Summaries for dates and datetimes are planned for a future 
+release.
+
+## Other uses for the codebook object
+The `"lighthouse_codebook"` object created by `cb_create()` will most commonly be 
+used to write an Excel codebook to disk using `cb_write()`. However, it can also 
+be used to create other objects in R.
+
+```r
+# example data
+q4_subset <- gain_q4 |> 
+  subset(select = c(XPID, XOBS, XRA, B17, SU4a, SU4b, SU1f99v))
+
+# create codebook
+cb <- cb_create(
+  q4_subset,
+  metadata = q4_metadata,
+  .user_missing =  ~ c("Not Asked" = -3, 
+                      "Missing" = -4, 
+                      "Confidential" = -6, 
+                      "Refused" = -7,
+                      "Don't Know" = -8, 
+                      "Legitimate Skip" = -9)
+)
+
+cb
+# # A tibble: 7 × 6
+#   name    type        label                         values user_missings missing
+#   <chr>   <chr>       <chr>                         <chr>  <chr>           <dbl>
+# 1 XPID    text        Participant ID                NA     [-9] Legitim…   0
+# 2 XOBS    categorical Observation Wave              [0] I… [-9] Legitim…   0
+# 3 XRA     categorical Random assignment             [0] C… [-9] Legitim…   0
+# 4 B17     categorical Pregnant                      [0] N… [-9] Legitim…   0.221
+# 5 SU4a    numeric     PPS - P90 days alcohol use    NA     [-9] Legitim…   0.394
+# 6 SU4b    numeric     PPS - P90 days drunk or 5+ d… NA     [-9] Legitim…   0.442
+# 7 SU1f99v text        QCS - P90 Days Other AOD Tx … NA     [-9] Legitim…   0.923
+```
+### Extract transformed data
+Use `cb_get_data()` to extract transformed data based in several formats. `format 
+= "factors"` yields a dataset with all variables with value labels converted to 
+factors and user missings converted to `NA`.
+```r
+cb_get_data(cb, format = "factors")
+# # A tibble: 104 × 7
+#    XPID  XOBS    XRA       B17    SU4a  SU4b SU1f99v       
+#    <chr> <fct>   <fct>     <fct> <dbl> <dbl> <chr>
+#  1 001   Intake  Treatment No       NA    NA NA
+#  2 002   Intake  Control   No       10     4 NA
+#  3 003   Intake  Treatment No       10     1 Peer counselor
+#  4 003   3-month Treatment No       39    15 NA
+#  5 003   6-month Treatment No       NA    NA NA
+#  6 004   Intake  Control   No       10     2 NA
+#  7 004   3-month Control   No       55    63 NA
+#  8 004   6-month Control   No       10     1 NA
+#  9 005   Intake  Control   Yes      35     0 Social worker
+# 10 005   3-month Control   Yes      55    39 NA
+# # ℹ 94 more rows
+```
+Whereas `format = "haven"` yields a dataset with SPSS-style variable labels, value 
+labels, and user missings encoded using the `"haven_labelled_spss"` class.
+```r
+cb_get_data(cb, format = "haven")
+# # A tibble: 104 × 7
+#    XPID      XOBS        XRA           B17       SU4a      SU4b      SU1f99v    
+#    <chr+lbl> <dbl+lbl>   <dbl+lbl>     <dbl+lbl> <dbl+lbl> <dbl+lbl> <chr+lbl>
+#  1 001       0 [Intake]  1 [Treatment] 0 [No]    -9 (NA)   -9 (NA)   -9 (NA)    
+#  2 002       0 [Intake]  0 [Control]   0 [No]    10         4        -9 (NA)    
+#  3 003       0 [Intake]  1 [Treatment] 0 [No]    10         1        Peer couns…
+#  4 003       1 [3-month] 1 [Treatment] 0 [No]    39        15        -4 (NA)
+#  5 003       2 [6-month] 1 [Treatment] 0 [No]    -4 (NA)   -4 (NA)   -4 (NA)
+#  6 004       0 [Intake]  0 [Control]   0 [No]    10         2        -9 (NA)    
+#  7 004       1 [3-month] 0 [Control]   0 [No]    55        63        -9 (NA)
+#  8 004       2 [6-month] 0 [Control]   0 [No]    10         1        -9 (NA)
+#  9 005       0 [Intake]  0 [Control]   1 [Yes]   35         0        Social wor…
+# 10 005       1 [3-month] 0 [Control]   1 [Yes]   55        39        -9 (NA)
+# # ℹ 94 more rows
+```
+### Get data summaries
+`cb_summarize_numeric()`, `cb_summarize_categorical()`, and `cb_summarize_text()`
+return summaries for all variables of their respective types. These are the basis 
+of the summary tabs generated by `cb_write()`.
+```r
+cb_summarize_numeric(cb)
+# # A tibble: 2 × 8
+#   name  label                         valid_n valid_pct  mean    SD median   MAD
+#   <chr> <chr>                           <int>     <dbl> <dbl> <dbl>  <dbl> <dbl>
+# 1 SU4a  PPS - P90 days alcohol use         63     0.606 23.4   21.2     20 25.2 
+# 2 SU4b  PPS - P90 days drunk or 5+ d…      58     0.558  8.09  13.6      2  2.97
+# # ℹ 5 more variables: min <dbl>, max <dbl>, range <dbl>, skew <dbl>, kurt <dbl>
+
+cb_summarize_categorical(cb) |> 
+# # A tibble: 9 × 7
+#   name  label             is_missing value             n pct_of_all pct_of_valid
+#   <chr> <chr>             <lgl>      <chr>         <int>      <dbl>        <dbl>
+# 1 XOBS  Observation Wave  FALSE      [0] Intake       42    0.404         0.404
+# 2 XOBS  Observation Wave  FALSE      [1] 3-month      34    0.327         0.327
+# 3 XOBS  Observation Wave  FALSE      [2] 6-month      28    0.269         0.269
+# 4 XRA   Random assignment FALSE      [0] Control      50    0.481         0.481 
+# 5 XRA   Random assignment FALSE      [1] Treatment    54    0.519         0.519
+# 6 B17   Pregnant          FALSE      [1] Yes           6    0.0577        0.0741
+# 7 B17   Pregnant          FALSE      [0] No           75    0.721         0.926
+# 8 B17   Pregnant          TRUE       [-9] Legitim…    22    0.212        NA
+# 9 B17   Pregnant          TRUE       [-4] Missing      1    0.00962      NA     
+# # ℹ 1 more variable: pct_of_missing <dbl>
+
+cb_summarize_text(cb)
+# # A tibble: 14 × 7
+#    name    label                      is_missing unique_n value     n pct_of_all
+#    <chr>   <chr>                      <lgl>         <int> <chr> <int>     <dbl>
+#  1 XPID    Participant ID             FALSE            42 003       3    0.0288 
+#  2 XPID    Participant ID             FALSE            42 004       3    0.0288
+#  3 XPID    Participant ID             FALSE            42 005       3    0.0288
+#  4 XPID    Participant ID             FALSE            42 006       3    0.0288
+#  5 XPID    Participant ID             FALSE            42 010       3    0.0288
+#  6 XPID    Participant ID             FALSE            42 (37 …    89    0.856
+#  7 SU1f99v QCS - P90 Days Other AOD … FALSE             8 AA        1    0.00962
+#  8 SU1f99v QCS - P90 Days Other AOD … FALSE             8 Alco…     1    0.00962
+#  9 SU1f99v QCS - P90 Days Other AOD … FALSE             8 Case…     1    0.00962
+# 10 SU1f99v QCS - P90 Days Other AOD … FALSE             8 Group     1    0.00962
+# 11 SU1f99v QCS - P90 Days Other AOD … FALSE             8 NA        1    0.00962
+# 12 SU1f99v QCS - P90 Days Other AOD … FALSE             8 (3 o…     3    0.0288 
+# 13 SU1f99v QCS - P90 Days Other AOD … TRUE             NA [-4]…    44    0.423
+# 14 SU1f99v QCS - P90 Days Other AOD … TRUE             NA [-9]…    52    0.5
+# # ℹ 2 more variables: pct_of_valid <dbl>, pct_of_missing <dbl>
+```
+`cb_summarize_numeric()` and `cb_summarize_categorical()` can also return grouped 
+summaries:
+```r
+cb_summarize_numeric(cb, group_by = XOBS)
+# # A tibble: 6 × 8
+#   XOBS    name  label                      valid_n valid_pct  mean     SD median
+#   <fct>   <chr> <chr>                        <int>     <dbl> <dbl>  <dbl>  <dbl>
+# 1 Intake  SU4a  PPS - P90 days alcohol use      30     0.714 28.8  20.1     26.5
+# 2 3-month SU4a  PPS - P90 days alcohol use      20     0.588 21.4  21.0     12.5
+# 3 6-month SU4a  PPS - P90 days alcohol use      13     0.464 13.9  21.5      6
+# 4 Intake  SU4b  PPS - P90 days drunk or 5…      30     0.714  7.13 10.7      4
+# 5 3-month SU4b  PPS - P90 days drunk or 5…      15     0.441 16.1  20.0      3
+# 6 6-month SU4b  PPS - P90 days drunk or 5…      13     0.464  1     0.707    1
+# # ℹ 6 more variables: MAD <dbl>, min <dbl>, max <dbl>, range <dbl>, skew <dbl>, 
+# #   kurt <dbl>
+
+cb_summarize_categorical(cb, group_by = XRA)
+# # A tibble: 12 × 7
+#    XRA       name  label            value           n pct_of_all pct_of_valid
+#    <fct>     <chr> <chr>            <chr>       <int>      <dbl>        <dbl>
+#  1 Control   XOBS  Observation Wave [0] Intake     20     0.4          0.4
+#  2 Control   XOBS  Observation Wave [1] 3-month    16     0.32         0.32  
+#  3 Control   XOBS  Observation Wave [2] 6-month    14     0.28         0.28
+#  4 Treatment XOBS  Observation Wave [0] Intake     22     0.407        0.407
+#  5 Treatment XOBS  Observation Wave [1] 3-month    18     0.333        0.333
+#  6 Treatment XOBS  Observation Wave [2] 6-month    14     0.259        0.259
+#  7 Control   B17   Pregnant         [1] Yes         3     0.06         0.0811
+#  8 Control   B17   Pregnant         [0] No         34     0.68         0.919 
+#  9 Control   B17   Pregnant         (Missing)      13     0.26        NA
+# 10 Treatment B17   Pregnant         [1] Yes         3     0.0556       0.0682
+# 11 Treatment B17   Pregnant         [0] No         41     0.759        0.932
+# 12 Treatment B17   Pregnant         (Missing)      10     0.185       NA
+```