diff --git a/.Rbuildignore b/.Rbuildignore
index a8fb882..8721688 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -5,3 +5,6 @@
^docs$
^pkgdown$
^\.github$
+^README\.Rmd$
+^doc$
+^Meta$
diff --git a/.gitignore b/.gitignore
index 457525e..54f27cb 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,3 +5,6 @@
.DS_Store
.quarto
docs
+inst/doc
+/doc/
+/Meta/
diff --git a/DESCRIPTION b/DESCRIPTION
index 2c73089..933f04d 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -35,3 +35,7 @@ Roxygen: list(markdown = TRUE)
RoxygenNote: 7.3.3
URL: https://github.com/ccsarapas/lighthouse.codebook, https://ccsarapas.github.io/lighthouse.codebook/
BugReports: https://github.com/ccsarapas/lighthouse.codebook/issues
+Suggests:
+ knitr,
+ rmarkdown
+VignetteBuilder: knitr
diff --git a/R/cb_create.r b/R/cb_create.r
index 901e21e..0e77ec3 100644
--- a/R/cb_create.r
+++ b/R/cb_create.r
@@ -99,10 +99,12 @@
#' If labels set in `.user_missing` conflict with those in `metadata`, `.user_missing_conflict`
#' controls which labels are used.
#'
-#' User missing values are not compatible with logical, date, or datetime (POSIXt)
-#' variables. By default, these variables will be ignored if specified in `.user_missing`.
-#' (i.e., user missing values will be applied only to compatible variables.) This behavior
-#' can be changed using the `.user_missing_incompatible` argument.
+#' User missings may be set for numeric, character, factor/ordered factor, and haven_labelled/haven_labelled_spss
+#' vectors. For factors, user missings are set based on factor labels (not the underlying
+#' integer codes). For `"haven_labelled"` vectors, user missings are set based on
+#' values (not value labels). By default, variables with incompatible classes (e.g.,
+#' logical, Date, POSIXt) will be ignored if specified in `.user_missing`. This
+#' behavior can be changed using the `.user_missing_incompatible` argument.
#'
#' @examples
#' diamonds2 <- ggplot2::diamonds |>
diff --git a/README.Rmd b/README.Rmd
new file mode 100644
index 0000000..9fb6fa8
--- /dev/null
+++ b/README.Rmd
@@ -0,0 +1,130 @@
+---
+output: github_document
+---
+
+
+
+```{r, include = FALSE}
+knitr::opts_chunk$set(
+ collapse = TRUE,
+ comment = "#",
+ out.width = "100%",
+ fig.align = "center",
+ fig.path = "man/figures/",
+ eval = FALSE
+)
+```
+
+# lighthouse.codebook
+
+The lighthouse.codebook package includes tools to summarize a dataset into a formatted
+Excel workbook, including a data dictionary and summaries. It incorporates external
+metadata (such as variable labels, value labels, and user missing / non-response codes),
+with functions for using metadata from SPSS and REDCap datasets. Codebooks can be
+customized in a number of ways, including options for grouped summaries.
+
+## Installation
+
+You can install lighthouse.codebook by running:
+
+```r
+# install.packages("remotes")
+remotes::install_github("ccsarapas/lighthouse.codebook")
+```
+
+## Creating codebooks
+Creating a codebook involves two general steps:
+
+1. Create a a “codebook” object in R from a data frame (and,
+ optionally, metadata), using `cb_create()` or a specialized variant
+ (such as `cb_create_spss()` or `cb_create_redcap()`).
+
+2. Write the codebook to disk using `cb_write()`.
+
+``` r
+library(lighthouse.codebook)
+
+# create and write a codebook without metadata
+dat |>
+ cb_create() |>
+ cb_write("cb.xlsx")
+
+# with metadata
+dat |>
+ cb_create(metadata = dat1_metadata) |>
+ cb_write("cb.xlsx")
+
+# from SPSS data
+dat_spss <- haven::read_sav("dat_spss.sav", user_na = TRUE)
+
+dat_spss |>
+ cb_create_spss() |>
+ cb_write("cb_spss.xlsx")
+
+# from REDCap data
+dat_rc <- REDCapR::redcap_read(redcap_uri = rc_uri, token = rc_token)
+meta_rc <- REDCapR::redcap_metadata_read(redcap_uri = rc_uri, token = rc_token)
+
+dat_rc$data |>
+ cb_create_redcap(metadata = meta_rc$data) |>
+ cb_write("cb_rc.xlsx")
+```
+
+## Customizing codebooks
+
+There are many options for controlling how data is interpreted, summarized, and
+presented. See [**Introduction to lighthouse.codebook**](lighthouse-codebook.html) for
+some of the most useful options, including grouped data summaries and specifying
+user missing codes. Further options are detailed in the documentation for `cb_create()`
+and `cb_write()`.
+
+
+## Codebook contents
+
+The codebook written to disk will include an _overview_ tab listing all variables
+in the dataset; _summary_ tabs for numeric, categorical, and text variables; and,
+if grouping variables are specified, _grouped summary_ tabs for numeric and categorical
+variables.
+
+The _overview_ tab includes one row for each variable in the dataset, with information
+on variable types, labels, values, and missingness. By default, each variable is
+hyperlinked to its location on the relevant summary tab.
+
+```{r, overview, echo = FALSE, eval = TRUE}
+knitr::include_graphics("man/figures/README-overview.png")
+```
+
+The _numeric summary_ tab includes descriptive statistics for all numeric variables
+in the dataset:
+
+```{r, numeric, echo = FALSE, eval = TRUE}
+knitr::include_graphics("man/figures/README-numeric.png")
+```
+
+The _categorical summary_ tab includes frequencies for all categorical variables,
+optionally with separate rows for user missing values:
+
+```{r, categorical, echo = FALSE, eval = TRUE}
+knitr::include_graphics("man/figures/README-categorical.png")
+```
+
+Finally, the _text summary_ tab includes frequencies for the most common values for all
+text variables in the dataset. (The number of values shown can be adjusted using
+the `n_text_vals` argument to `cb_write()`.)
+
+```{r, text, echo = FALSE, eval = TRUE}
+knitr::include_graphics("man/figures/README-text.png")
+```
+
+If `group_by` is specified in `cb_write()`, additional numeric and categorical summary
+tabs will be included grouped by the specified variables.
+
+## SPSS extension
+
+Functionality from this package is also available as an SPSS extension command [here](https://github.com/ccsarapas/lighthouse.codebook.spss).
\ No newline at end of file
diff --git a/README.md b/README.md
index 3e38f4e..ffa5dec 100644
--- a/README.md
+++ b/README.md
@@ -1,16 +1,114 @@
+
+
# lighthouse.codebook
-The lighthouse.codebook package includes tools for summarizing datasets used by staff at the [Lighthouse Institute](https://www.chestnut.org/lighthouse-institute/), the research division of Chestnut Health Systems.
+The lighthouse.codebook package includes tools to summarize a dataset
+into a formatted Excel workbook, including a data dictionary and
+summaries. It incorporates external metadata (such as variable labels,
+value labels, and user missing / non-response codes), with functions for
+using metadata from SPSS and REDCap datasets. Codebooks can be
+customized in a number of ways, including options for grouped summaries.
## Installation
-Install lighthouse.codebook by running:
+You can install lighthouse.codebook by running:
``` r
# install.packages("remotes")
remotes::install_github("ccsarapas/lighthouse.codebook")
```
+
+## Creating codebooks
+
+Creating a codebook involves two general steps:
+
+1. Create a a “codebook” object in R from a data frame (and,
+ optionally, metadata), using `cb_create()` or a specialized variant
+ (such as `cb_create_spss()` or `cb_create_redcap()`).
+
+2. Write the codebook to disk using `cb_write()`.
+
+``` r
+library(lighthouse.codebook)
+
+# create and write a codebook without metadata
+dat |>
+ cb_create() |>
+ cb_write("cb.xlsx")
+
+# with metadata
+dat |>
+ cb_create(metadata = dat1_metadata) |>
+ cb_write("cb.xlsx")
+
+# from SPSS data
+dat_spss <- haven::read_sav("dat_spss.sav", user_na = TRUE)
+
+dat_spss |>
+ cb_create_spss() |>
+ cb_write("cb_spss.xlsx")
+
+# from REDCap data
+dat_rc <- REDCapR::redcap_read(redcap_uri = rc_uri, token = rc_token)
+meta_rc <- REDCapR::redcap_metadata_read(redcap_uri = rc_uri, token = rc_token)
+
+dat_rc$data |>
+ cb_create_redcap(metadata = meta_rc$data) |>
+ cb_write("cb_rc.xlsx")
+```
+
+## Customizing codebooks
+
+There are many options for controlling how data is interpreted,
+summarized, and presented. See [**Introduction to
+lighthouse.codebook**](lighthouse-codebook.html) for some of the most
+useful options, including grouped data summaries and specifying user
+missing codes. Further options are detailed in the documentation for
+`cb_create()` and `cb_write()`.
+
+
+## Codebook contents
+
+The codebook written to disk will include an *overview* tab listing all
+variables in the dataset; *summary* tabs for numeric, categorical, and
+text variables; and, if grouping variables are specified, *grouped
+summary* tabs for numeric and categorical variables.
+
+The *overview* tab includes one row for each variable in the dataset,
+with information on variable types, labels, values, and missingness. By
+default, each variable is hyperlinked to its location on the relevant
+summary tab.
+
+
+
+The *numeric summary* tab includes descriptive statistics for all
+numeric variables in the dataset:
+
+
+
+The *categorical summary* tab includes frequencies for all categorical
+variables, optionally with separate rows for user missing values:
+
+
+
+Finally, the *text summary* tab includes frequencies for the most common
+values for all text variables in the dataset. (The number of values
+shown can be adjusted using the `n_text_vals` argument to `cb_write()`.)
+
+
+
+If `group_by` is specified in `cb_write()`, additional numeric and
+categorical summary tabs will be included grouped by the specified
+variables.
+
## SPSS extension
-Functionality from this package is also available as an SPSS extension command [here](https://github.com/ccsarapas/lighthouse.codebook.spss).
\ No newline at end of file
+Functionality from this package is also available as an SPSS extension
+command [here](https://github.com/ccsarapas/lighthouse.codebook.spss).
diff --git a/_pkgdown.yml b/_pkgdown.yml
index d98e4d4..8221a9a 100644
--- a/_pkgdown.yml
+++ b/_pkgdown.yml
@@ -10,4 +10,4 @@ authors:
Casey Sarapas:
href: "https://chestnut.org/li/scientists-and-project-directors/category/research-scientists/profile/casey-sarapas-phd"
Chestnut Health Systems:
- href: "https://chestnut.org/"
\ No newline at end of file
+ href: "https://chestnut.org/"
diff --git a/man/figures/README-categorical.png b/man/figures/README-categorical.png
new file mode 100644
index 0000000..3766ac3
Binary files /dev/null and b/man/figures/README-categorical.png differ
diff --git a/man/figures/README-numeric.png b/man/figures/README-numeric.png
new file mode 100644
index 0000000..4599abe
Binary files /dev/null and b/man/figures/README-numeric.png differ
diff --git a/man/figures/README-overview.png b/man/figures/README-overview.png
new file mode 100644
index 0000000..dbad3da
Binary files /dev/null and b/man/figures/README-overview.png differ
diff --git a/man/figures/README-text.png b/man/figures/README-text.png
new file mode 100644
index 0000000..ccb966a
Binary files /dev/null and b/man/figures/README-text.png differ
diff --git a/vignettes/.gitignore b/vignettes/.gitignore
new file mode 100644
index 0000000..097b241
--- /dev/null
+++ b/vignettes/.gitignore
@@ -0,0 +1,2 @@
+*.html
+*.R
diff --git a/vignettes/lighthouse-codebook.Rmd b/vignettes/lighthouse-codebook.Rmd
new file mode 100644
index 0000000..4c37a0f
--- /dev/null
+++ b/vignettes/lighthouse-codebook.Rmd
@@ -0,0 +1,407 @@
+---
+title: "Introduction to lighthouse.codebook"
+output: rmarkdown::html_vignette
+vignette: >
+ %\VignetteIndexEntry{Introduction to lighthouse.codebook}
+ %\VignetteEngine{knitr::rmarkdown}
+ %\VignetteEncoding{UTF-8}
+---
+
+```{r, include = FALSE}
+knitr::opts_chunk$set(
+ eval = FALSE,
+ comment = "#",
+ out.width = "100%"
+)
+```
+
+## Creating codebooks
+
+
+
+Creating a codebook involves two general steps:
+
+1. Create a a “codebook” object in R from a data frame (and,
+ optionally, metadata), using `cb_create()` or a specialized variant
+ (such as `cb_create_spss()` or `cb_create_redcap()`).
+
+2. Write the codebook to disk using `cb_write()`.
+
+``` r
+library(lighthouse.codebook)
+
+# create and write a codebook without metadata
+dat |>
+ cb_create() |>
+ cb_write("cb.xlsx")
+
+# with metadata
+dat |>
+ cb_create(metadata = dat1_metadata) |>
+ cb_write("cb.xlsx")
+
+# from SPSS data
+dat_spss <- haven::read_sav("dat_spss.sav", user_na = TRUE)
+
+dat_spss |>
+ cb_create_spss() |>
+ cb_write("cb_spss.xlsx")
+
+# from REDCap data
+dat_rc <- REDCapR::redcap_read(redcap_uri = rc_uri, token = rc_token)
+meta_rc <- REDCapR::redcap_metadata_read(redcap_uri = rc_uri, token = rc_token)
+
+dat_rc$data |>
+ cb_create_redcap(metadata = meta_rc$data) |>
+ cb_write("cb_rc.xlsx")
+```
+The codebook written to disk will include an Overview tab listing all variables
+in the dataset; Summary tabs for numeric, categorical, and text variables; and,
+if grouping variables are specified, Grouped Summary tabs for numeric and categorical
+variables.
+
+## Customizing codebooks
+
+There are many options for controlling how data is interpreted, summarized, and
+presented. This section shows a few of the most useful options. Further options
+are detailed in the documentation for `cb_create()` and `cb_write()`.
+
+### Grouped summaries
+
+Numeric and categorical data summaries can be grouped by one or more variables by
+specifying them in the `group_by` argument to `cb_write()`.
+```r
+cb_create(data, metadata) |>
+ cb_write("cb.xlsx", group_by = treatment_group)
+
+cb_create(data, metadata) |>
+ cb_write("cb.xlsx", group_by = c(treatment_group, timepoint, age_group))
+```
+
+By default, values for each subgroup are shown in separate columns, with decked
+heads if more than one grouping variables is specified. However, some or all grouping
+variables can instead be shown in rows using the `group_rows` argument.
+```r
+# show `treatment_group` in columns and `timepoint` in rows
+cb_create(data, metadata) |>
+ cb_write(
+ "cb.xlsx",
+ group_by = c(treatment_group, timepoint),
+ group_rows = timepoint
+ )
+```
+Different row grouping behavior can be specified for numeric versus categorical
+summary tabs using the `group_rows_numeric` and `group_rows_categorical` arguments.
+```r
+# for numeric summary, show `treatment_group` in columns and `timepoint` in rows;
+# for categorical summary, show all grouping variables in columns
+cb_create(data, metadata) |>
+ cb_write(
+ "cb.xlsx",
+ group_by = c(treatment_group, timepoint),
+ group_rows_numeric = timepoint
+ )
+
+# for numeric summary, show all grouping vars in rows for numeric summary;
+# for categorical summary, show `treatment_group` in rows
+cb_create(data, metadata) |>
+ cb_write(
+ "cb.xlsx",
+ group_by = c(treatment_group, timepoint),
+ group_rows_numeric = c(treatment_group, timepoint),
+ group_rows_categorical = treatment_group
+ )
+```
+
+### User missing values
+
+User missing values (also known as nonresponse codes, reserve codes, or special
+values) can be specified using the `.user_missing` argument to `cb_create()`.
+Missing values are specified using a formula or list of formulas, with variables
+on the left-hand side (as names or [tidyselect](https://dplyr.tidyverse.org/reference/dplyr_tidy_select.html)
+expressions) and values on the right-hand side. If the left-hand side is empty,
+user missings will be set for all compatible variables in the dataset.
+
+```r
+# set a single missing value for a single variable
+cb <- cb_create(data, metadata, .user_missing = var1 ~ 99)
+
+# for variables `var1` through `var5`
+cb <- cb_create(data, metadata, .user_missing = var1:var5 ~ c(98, 99))
+
+# for all numeric variables, plus `var6` and `var7`
+cb <- cb_create(
+ data,
+ metadata,
+ .user_missing = c(where(is.numeric), var6, var7) ~ c(-9, -8, -7)
+)
+
+# for all compatible variables in dataset
+cb <- cb_create(data, metadata, .user_missing = ~ c(98, 99))
+```
+
+If the user missing values are named, the names will be treated as value labels
+in data summaries.
+
+```r
+cb <- cb_create(
+ data,
+ metadata,
+ .user_missing = var1:var5 ~ c("Declined" = 98, "Not applicable" = 99)
+)
+```
+
+To apply different user missings for different variables, pass a list of formulas.
+```r
+cb <- cb_create(
+ data,
+ metadata,
+ .user_missing = list(
+ starts_with("status") ~ c("Declined" = 98, "Not applicable" = 99),
+ var7:var10 ~ c("Don't know" = -4, "Not applicable" = -5)
+ )
+)
+```
+### Missing value handling
+
+On numeric summary tabs, missing values (including both user missing values and `NA`)
+are dropped for computation of summary statistics.
+
+On ungrouped categorical and text summary tabs, by default, user missing values
+are individually tabulated. For example, if `.user_missing = ~ c("Declined" = 98,
+"Not applicable" = 99)`, then categorical and text summary tabs will include rows
+giving counts for `"[98] Declined"` and `"[99] Not applicable"`. All user missing
+values and `NA` can instead be collapsed into a single `"(Missing)"` row using the
+`detail_missing` argument to `cb_write()`.
+```r
+dat |>
+ cb_create(.user_missing = ~ ~ c("Declined" = 98, "Not applicable" = 99)) |>
+ cb_write("cb.xlsx", detail_missing = FALSE)
+```
+Finally, user missing values are always collapsed (as though `detail_missing = FALSE`)
+on _grouped_ summary tabs.
+
+### Splitting long variable labels
+
+Sets of related variables sometimes share a common prefix. Using the `.split_var_labels`
+argument to `cb_create()`, this prefix can be extracted into a separate column,
+making it easier to see at a glance what is unique about each variable.
+For example, given a set of variable labels that all begin with `"What colors do
+you like? Select all that apply: "`:
+
+| Name | Label |
+| ---- | ----- |
+| age | How old are you today? |
+| colors1 | What colors do you like? Select all that apply: Red |
+| colors2 | What colors do you like? Select all that apply: Green |
+| colors3 | What colors do you like? Select all that apply: Blue |
+| colors4 | What colors do you like? Select all that apply: Orange |
+| height | What is your height in inches? |
+
+You can split the labels for these variables, specifying them using a [tidyselect](https://dplyr.tidyverse.org/reference/dplyr_tidy_select.html)
+expression:
+
+```r
+cb_create(
+ data,
+ metadata,
+ .split_var_labels = starts_with("colors")
+ ) |>
+ cb_write("cb.xlsx")
+```
+
+| Name | Label Stem | Label |
+| ---- | ---------- | ----- |
+| age | | How old are you today? |
+| colors1 | What colors do you like? Select all that apply: | Red |
+| colors2 | What colors do you like? Select all that apply: | Green |
+| colors3 | What colors do you like? Select all that apply: | Blue |
+| colors4 | What colors do you like? Select all that apply: | Orange |
+| height | | What is your height in inches? |
+
+Multiple sets of variables with common prefixes can be specified by passing a
+list of tidyselect expressions.
+
+```r
+cb_create(
+ data,
+ metadata,
+ .split_var_labels = list(
+ starts_with("colors"),
+ fav_food2:fav_food9,
+ c(rating1, rating4:rating7, rating9)
+ )
+ ) |>
+ cb_write("cb.xlsx")
+```
+
+## Variable typing
+Data summaries are produced for "numeric," "categorical," and "text" variables. For a given variable `x`,
+- `x` is treated as categorical if (1) it is a factor, ordered factor, or logical vector, _or_ (2) it has associated value labels other than missing value codes (specified in metadata or, for SPSS data, in a `"haven_labelled"` vector).
+- `x` is treated as numeric if (1) it is numeric (i.e., `is.numeric(x)` is `TRUE`) _and_ (2) it has no associated value labels other than missing value codes.
+- `x` is treated as text if (1) it is a character vector _and_ (2) it has no associated value labels other than missing value codes.
+
+Thus, you can change the way a variable is summarized by changing its class. For
+instance, to get complete frequencies for a numeric or character variable, convert
+it to factor; to get only the top frequencies for a factor with many levels, convert
+it to character.
+
+Variables of other classes, such as dates, datetimes, and lists, are not currently
+included on summary tabs. Summaries for dates and datetimes are planned for a future
+release.
+
+## Other uses for the codebook object
+The `"lighthouse_codebook"` object created by `cb_create()` will most commonly be
+used to write an Excel codebook to disk using `cb_write()`. However, it can also
+be used to create other objects in R.
+
+```r
+# example data
+q4_subset <- gain_q4 |>
+ subset(select = c(XPID, XOBS, XRA, B17, SU4a, SU4b, SU1f99v))
+
+# create codebook
+cb <- cb_create(
+ q4_subset,
+ metadata = q4_metadata,
+ .user_missing = ~ c("Not Asked" = -3,
+ "Missing" = -4,
+ "Confidential" = -6,
+ "Refused" = -7,
+ "Don't Know" = -8,
+ "Legitimate Skip" = -9)
+)
+
+cb
+# # A tibble: 7 × 6
+# name type label values user_missings missing
+#
+# 1 XPID text Participant ID NA [-9] Legitim… 0
+# 2 XOBS categorical Observation Wave [0] I… [-9] Legitim… 0
+# 3 XRA categorical Random assignment [0] C… [-9] Legitim… 0
+# 4 B17 categorical Pregnant [0] N… [-9] Legitim… 0.221
+# 5 SU4a numeric PPS - P90 days alcohol use NA [-9] Legitim… 0.394
+# 6 SU4b numeric PPS - P90 days drunk or 5+ d… NA [-9] Legitim… 0.442
+# 7 SU1f99v text QCS - P90 Days Other AOD Tx … NA [-9] Legitim… 0.923
+```
+### Extract transformed data
+Use `cb_get_data()` to extract transformed data based in several formats. `format
+= "factors"` yields a dataset with all variables with value labels converted to
+factors and user missings converted to `NA`.
+```r
+cb_get_data(cb, format = "factors")
+# # A tibble: 104 × 7
+# XPID XOBS XRA B17 SU4a SU4b SU1f99v
+#
+# 1 001 Intake Treatment No NA NA NA
+# 2 002 Intake Control No 10 4 NA
+# 3 003 Intake Treatment No 10 1 Peer counselor
+# 4 003 3-month Treatment No 39 15 NA
+# 5 003 6-month Treatment No NA NA NA
+# 6 004 Intake Control No 10 2 NA
+# 7 004 3-month Control No 55 63 NA
+# 8 004 6-month Control No 10 1 NA
+# 9 005 Intake Control Yes 35 0 Social worker
+# 10 005 3-month Control Yes 55 39 NA
+# # ℹ 94 more rows
+```
+Whereas `format = "haven"` yields a dataset with SPSS-style variable labels, value
+labels, and user missings encoded using the `"haven_labelled_spss"` class.
+```r
+cb_get_data(cb, format = "haven")
+# # A tibble: 104 × 7
+# XPID XOBS XRA B17 SU4a SU4b SU1f99v
+#
+# 1 001 0 [Intake] 1 [Treatment] 0 [No] -9 (NA) -9 (NA) -9 (NA)
+# 2 002 0 [Intake] 0 [Control] 0 [No] 10 4 -9 (NA)
+# 3 003 0 [Intake] 1 [Treatment] 0 [No] 10 1 Peer couns…
+# 4 003 1 [3-month] 1 [Treatment] 0 [No] 39 15 -4 (NA)
+# 5 003 2 [6-month] 1 [Treatment] 0 [No] -4 (NA) -4 (NA) -4 (NA)
+# 6 004 0 [Intake] 0 [Control] 0 [No] 10 2 -9 (NA)
+# 7 004 1 [3-month] 0 [Control] 0 [No] 55 63 -9 (NA)
+# 8 004 2 [6-month] 0 [Control] 0 [No] 10 1 -9 (NA)
+# 9 005 0 [Intake] 0 [Control] 1 [Yes] 35 0 Social wor…
+# 10 005 1 [3-month] 0 [Control] 1 [Yes] 55 39 -9 (NA)
+# # ℹ 94 more rows
+```
+### Get data summaries
+`cb_summarize_numeric()`, `cb_summarize_categorical()`, and `cb_summarize_text()`
+return summaries for all variables of their respective types. These are the basis
+of the summary tabs generated by `cb_write()`.
+```r
+cb_summarize_numeric(cb)
+# # A tibble: 2 × 8
+# name label valid_n valid_pct mean SD median MAD
+#
+# 1 SU4a PPS - P90 days alcohol use 63 0.606 23.4 21.2 20 25.2
+# 2 SU4b PPS - P90 days drunk or 5+ d… 58 0.558 8.09 13.6 2 2.97
+# # ℹ 5 more variables: min , max , range , skew , kurt
+
+cb_summarize_categorical(cb) |>
+# # A tibble: 9 × 7
+# name label is_missing value n pct_of_all pct_of_valid
+#
+# 1 XOBS Observation Wave FALSE [0] Intake 42 0.404 0.404
+# 2 XOBS Observation Wave FALSE [1] 3-month 34 0.327 0.327
+# 3 XOBS Observation Wave FALSE [2] 6-month 28 0.269 0.269
+# 4 XRA Random assignment FALSE [0] Control 50 0.481 0.481
+# 5 XRA Random assignment FALSE [1] Treatment 54 0.519 0.519
+# 6 B17 Pregnant FALSE [1] Yes 6 0.0577 0.0741
+# 7 B17 Pregnant FALSE [0] No 75 0.721 0.926
+# 8 B17 Pregnant TRUE [-9] Legitim… 22 0.212 NA
+# 9 B17 Pregnant TRUE [-4] Missing 1 0.00962 NA
+# # ℹ 1 more variable: pct_of_missing
+
+cb_summarize_text(cb)
+# # A tibble: 14 × 7
+# name label is_missing unique_n value n pct_of_all
+#
+# 1 XPID Participant ID FALSE 42 003 3 0.0288
+# 2 XPID Participant ID FALSE 42 004 3 0.0288
+# 3 XPID Participant ID FALSE 42 005 3 0.0288
+# 4 XPID Participant ID FALSE 42 006 3 0.0288
+# 5 XPID Participant ID FALSE 42 010 3 0.0288
+# 6 XPID Participant ID FALSE 42 (37 … 89 0.856
+# 7 SU1f99v QCS - P90 Days Other AOD … FALSE 8 AA 1 0.00962
+# 8 SU1f99v QCS - P90 Days Other AOD … FALSE 8 Alco… 1 0.00962
+# 9 SU1f99v QCS - P90 Days Other AOD … FALSE 8 Case… 1 0.00962
+# 10 SU1f99v QCS - P90 Days Other AOD … FALSE 8 Group 1 0.00962
+# 11 SU1f99v QCS - P90 Days Other AOD … FALSE 8 NA 1 0.00962
+# 12 SU1f99v QCS - P90 Days Other AOD … FALSE 8 (3 o… 3 0.0288
+# 13 SU1f99v QCS - P90 Days Other AOD … TRUE NA [-4]… 44 0.423
+# 14 SU1f99v QCS - P90 Days Other AOD … TRUE NA [-9]… 52 0.5
+# # ℹ 2 more variables: pct_of_valid , pct_of_missing
+```
+`cb_summarize_numeric()` and `cb_summarize_categorical()` can also return grouped
+summaries:
+```r
+cb_summarize_numeric(cb, group_by = XOBS)
+# # A tibble: 6 × 8
+# XOBS name label valid_n valid_pct mean SD median
+#
+# 1 Intake SU4a PPS - P90 days alcohol use 30 0.714 28.8 20.1 26.5
+# 2 3-month SU4a PPS - P90 days alcohol use 20 0.588 21.4 21.0 12.5
+# 3 6-month SU4a PPS - P90 days alcohol use 13 0.464 13.9 21.5 6
+# 4 Intake SU4b PPS - P90 days drunk or 5… 30 0.714 7.13 10.7 4
+# 5 3-month SU4b PPS - P90 days drunk or 5… 15 0.441 16.1 20.0 3
+# 6 6-month SU4b PPS - P90 days drunk or 5… 13 0.464 1 0.707 1
+# # ℹ 6 more variables: MAD , min , max , range , skew ,
+# # kurt
+
+cb_summarize_categorical(cb, group_by = XRA)
+# # A tibble: 12 × 7
+# XRA name label value n pct_of_all pct_of_valid
+#
+# 1 Control XOBS Observation Wave [0] Intake 20 0.4 0.4
+# 2 Control XOBS Observation Wave [1] 3-month 16 0.32 0.32
+# 3 Control XOBS Observation Wave [2] 6-month 14 0.28 0.28
+# 4 Treatment XOBS Observation Wave [0] Intake 22 0.407 0.407
+# 5 Treatment XOBS Observation Wave [1] 3-month 18 0.333 0.333
+# 6 Treatment XOBS Observation Wave [2] 6-month 14 0.259 0.259
+# 7 Control B17 Pregnant [1] Yes 3 0.06 0.0811
+# 8 Control B17 Pregnant [0] No 34 0.68 0.919
+# 9 Control B17 Pregnant (Missing) 13 0.26 NA
+# 10 Treatment B17 Pregnant [1] Yes 3 0.0556 0.0682
+# 11 Treatment B17 Pregnant [0] No 41 0.759 0.932
+# 12 Treatment B17 Pregnant (Missing) 10 0.185 NA
+```