From 656ad7af52a7468dea5d10ed50a16fbdb3dccf50 Mon Sep 17 00:00:00 2001
From: statzhero <panoramadata@gmail.com>
Date: Tue, 31 Mar 2026 18:37:37 +0200
Subject: [PATCH 1/7] feat: Add tidy-r skill for modern tidyverse R development

---
 .claude-plugin/marketplace.json               |   9 +
 tidyverse/README.md                           |   6 +-
 tidyverse/tidy-r/SKILL.md                     | 210 +++++++++++++++++
 .../tidy-r/references/grouping-examples.md    | 175 ++++++++++++++
 tidyverse/tidy-r/references/join-examples.md  | 123 ++++++++++
 .../tidy-r/references/migration-examples.md   | 165 ++++++++++++++
 .../references/recode-replace-examples.md     | 188 +++++++++++++++
 .../tidy-r/references/stringr-examples.md     | 102 +++++++++
 .../tidy-r/references/tidyverse-style.md      | 215 ++++++++++++++++++
 9 files changed, 1192 insertions(+), 1 deletion(-)
 create mode 100644 tidyverse/tidy-r/SKILL.md
 create mode 100644 tidyverse/tidy-r/references/grouping-examples.md
 create mode 100644 tidyverse/tidy-r/references/join-examples.md
 create mode 100644 tidyverse/tidy-r/references/migration-examples.md
 create mode 100644 tidyverse/tidy-r/references/recode-replace-examples.md
 create mode 100644 tidyverse/tidy-r/references/stringr-examples.md
 create mode 100644 tidyverse/tidy-r/references/tidyverse-style.md

diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json
index 4edc581..fb80891 100644
--- a/.claude-plugin/marketplace.json
+++ b/.claude-plugin/marketplace.json
@@ -76,6 +76,15 @@
         "./quarto/quarto-authoring",
         "./quarto/quarto-alt-text"
       ]
+    },
+    {
+      "name": "tidyverse",
+      "description": "Collection of skills for tidyverse-style R development",
+      "source": "./",
+      "strict": false,
+      "skills": [
+        "./tidyverse/tidy-r"
+      ]
     }
   ]
 }
diff --git a/tidyverse/README.md b/tidyverse/README.md
index e840b29..d58365d 100644
--- a/tidyverse/README.md
+++ b/tidyverse/README.md
@@ -1,6 +1,10 @@
 # Tidyverse Skills
 
-Skills specific to using tidyverse packages and tidyverse-specific package development patterns.
+Skills for tidyverse-style R development, covering modern patterns, style guidelines, and best practices.
+
+## Skills
+
+- **[tidy-r](./tidy-r/)** - Modern tidyverse patterns, style guide, and migration guidance for R development. Covers native pipe usage, join_by() syntax, .by grouping, pick/across/reframe operations, filter_out/when_any/when_all, recode_values/replace_values/replace_when, tidy selection, stringr patterns, naming conventions, and migration from base R or older tidyverse APIs.
 
 ## Potential Skills
 
diff --git a/tidyverse/tidy-r/SKILL.md b/tidyverse/tidy-r/SKILL.md
new file mode 100644
index 0000000..325d5eb
--- /dev/null
+++ b/tidyverse/tidy-r/SKILL.md
@@ -0,0 +1,210 @@
+---
+name: tidy-r
+description: |
+  Modern tidyverse patterns, style guide, and migration guidance for R development. Use when writing, reviewing, or refactoring tidyverse code. Covers native pipe, join_by(), .by grouping, pick/across/reframe, filter_out/when_any/when_all, recode_values/replace_values/replace_when, tidy selection, stringr, naming conventions, and migration from base R or older tidyverse APIs.
+metadata:
+  r_version: "4.5+"
+  tidyverse_version: "2.0+"
+  dplyr_version: "1.2+"
+allowed-tools: Read, Edit, Write, Grep, Glob, Bash, mcp__r-btw__*
+---
+
+# Writing Modern Tidyverse R
+
+This skill covers modern tidyverse patterns for R 4.5+ and tidyverse 2.0+, style guidelines, and migration from legacy patterns.
+
+## Core philosophy
+
+R's tidyverse evolves. Code from blog posts and StackOverflow often uses deprecated APIs, magrittr pipes, or base R patterns where a modern tidyverse function exists. This skill encodes the current recommended approach so the model writes code that experienced R developers would recognize as idiomatic.
+
+## When to use this skill
+
+- Writing new R code with dplyr, tidyr, stringr, purrr, or other tidyverse packages
+- Reviewing or refactoring existing R code for modern patterns
+- Migrating from base R, magrittr pipes, or older tidyverse APIs
+- Applying tidyverse style conventions (naming, spacing, error handling)
+- Choosing between similar functions (e.g., `case_when` vs `recode_values`)
+- Working with joins, grouping, recoding, or string manipulation in R
+
+## When NOT to use this skill
+
+- Writing data.table code (different paradigm)
+- Pure base R projects that intentionally avoid tidyverse
+- Shiny UI/server logic (use a Shiny-specific skill)
+- Package development internals (NAMESPACE, DESCRIPTION, roxygen)
+- ggplot2 visualization (use the socviz skill)
+- Statistical modeling or Bayesian analysis
+
+## Instructions
+
+When you receive a request, classify it and consult the appropriate reference:
+
+### Step 1: Classify the request
+
+| Category | Reference file | Trigger |
+|----------|---------------|---------|
+| **Joins** | [join-examples.md](references/join-examples.md) | Merging data, `*_join`, `join_by`, matching rows, lookup tables |
+| **Grouping & columns** | [grouping-examples.md](references/grouping-examples.md) | `.by`, `group_by`, `across`, `pick`, `reframe`, column operations |
+| **Recoding & replacing** | [recode-replace-examples.md](references/recode-replace-examples.md) | `case_when`, `recode_values`, `replace_values`, `replace_when`, `filter_out`, `when_any`, `when_all`, recoding, replacing, conditional updates |
+| **Strings** | [stringr-examples.md](references/stringr-examples.md) | String manipulation, regex, `str_*` functions, text processing |
+| **Style** | [tidyverse-style.md](references/tidyverse-style.md) | Naming, formatting, spacing, error messages, `cli::cli_abort` |
+| **Migration** | [migration-examples.md](references/migration-examples.md) | Updating old code, base R conversion, deprecated functions |
+
+### Step 2: Read the reference file(s)
+
+Use the Read tool to load the relevant reference. For requests that span multiple categories (e.g., "rewrite this old code" touches migration + style), read multiple files.
+
+### Step 3: Apply core principles
+
+1. **Use modern tidyverse patterns** - Prioritize dplyr 1.2+ features, native pipe, and current APIs
+2. **Write readable code first** - Optimize only when necessary
+3. **Follow tidyverse style guide** - Consistent naming, spacing, and structure
+4. **Use R MCP tools** - Automatically resolve function documentation and library references without being asked. If the `mcp__r-btw__*` tools are unavailable, fall back to running R help via Bash (see below)
+
+### R documentation lookup fallback
+
+When `mcp__r-btw__*` tools are available, use them to look up function signatures, help pages, and package docs. When they are not available (e.g., the r-btw MCP server is not configured), fall back to Bash:
+
+```bash
+# Help page for a function
+Rscript --vanilla -e '?dplyr::recode_values' 2>/dev/null || Rscript --vanilla -e 'utils::help("recode_values", package = "dplyr")'
+
+# Function signature / arguments
+Rscript --vanilla -e 'args(dplyr::recode_values)'
+
+# List exported functions in a package
+Rscript --vanilla -e 'ls("package:dplyr")'
+
+# Check if a package is installed
+Rscript --vanilla -e 'requireNamespace("dplyr", quietly = TRUE)'
+```
+
+### Step 4: Write the code
+
+Follow the quick reference and anti-patterns below. When in doubt, consult the reference files.
+
+## Quick reference
+
+### Pipe and lambda
+
+- Always `|>`, never `%>%`
+- Always `\(x)`, never `function(x)` or `~` in map/keep/etc.
+
+### Code organization
+
+Use newspaper style: high-level logic first, helpers below. Don't define functions inside other functions unless they are very brief.
+
+### Grouping
+
+- Use `.by` for per-operation grouping, never `group_by() |> ... |> ungroup()`
+- Place `.by` on its own line for readability
+
+### Joins
+
+- Use `join_by()`, never `c("a" = "b")`
+- Use `relationship`, `unmatched`, `na_matches` for quality control
+
+### Recoding and replacing (dplyr 1.2+)
+
+| Task | Function |
+|------|----------|
+| Recode values (new column) | `recode_values()` |
+| Replace values in place | `replace_values()` |
+| Conditional update in place | `replace_when()` |
+| Complex conditional (new column) | `case_when()` |
+| Drop rows (NA-safe) | `filter_out()` |
+| OR conditions | `when_any()` |
+| AND conditions | `when_all()` |
+
+### Error handling
+
+Use `cli::cli_abort()` with problem statement + bullets, never `stop()`.
+
+### R idioms
+
+- `TRUE`/`FALSE`, never `T`/`F`
+- `message()` for info, never `cat()`
+- `map_*()` over `sapply()` for type stability
+- `set.seed()` with date-time, never 42
+- `qs2::qs_save()`/`qs2::qs_read()`, never `qs`
+
+## Anti-patterns
+
+| Avoid | Use instead |
+|-------|-------------|
+| `%>%` | `|>` |
+| `function(x)` or `~` | `\(x)` |
+| `by = c("a" = "b")` | `by = join_by(a == b)` |
+| `multiple = "error"` in joins | `relationship = "many-to-one"` (or `"one-to-one"`) |
+| `sapply()` | `map_*()` (type-stable) |
+| `group_by() \|> ... \|> ungroup()` | `.by` argument |
+| `cat()` for messages | `message()` or `cli::cli_inform()` |
+| `stop()` for errors | `cli::cli_abort()` |
+| `distinct(id)` | `distinct(id, .keep_all = TRUE)` |
+| `mean(x, na.rm = TRUE)` | `mean(x)` with tidyna loaded |
+| `case_match(x, ...)` | `recode_values(x, ...)` |
+| `recode(x, ...)` | `recode_values(x, ...)` or `replace_values(x, ...)` |
+| `filter(x != val \| is.na(x))` | `filter_out(x == val)` |
+| `coalesce(x, default)` | `replace_values(x, NA ~ default)` |
+| `na_if(x, val)` | `replace_values(x, val ~ NA)` |
+| `qs::qsave()` / `qs::qread()` | `qs2::qs_save()` / `qs2::qs_read()` |
+
+## Complete workflow example
+
+```r
+library(tidyverse)
+
+# Read and clean data
+sales <- read_csv("data/sales.csv") |>
+  rename(
+    region = Region,
+    product = Product,
+    revenue = Revenue,
+    date = Date
+  ) |>
+  mutate(
+    quarter = quarter(date),
+    product = product |>
+      replace_values(
+        c("Widget A", "WidgetA") ~ "Widget A",
+        c("Widget B", "WidgetB") ~ "Widget B"
+      )
+  ) |>
+  filter_out(is.na(revenue))
+
+# Enrich with lookup table
+sales_enriched <- sales |>
+  left_join(
+    regions,
+    by = join_by(region == region_code),
+    unmatched = "error"
+  )
+
+# Summarise by group
+quarterly <- sales_enriched |>
+  summarise(
+    total_revenue = sum(revenue),
+    avg_revenue = mean(revenue),
+    n_transactions = n(),
+    .by = c(region_name, quarter)
+  ) |>
+  mutate(
+    performance = revenue |>
+      replace_when(
+        total_revenue > 100000 ~ "high",
+        total_revenue > 50000 ~ "medium"
+      )
+  ) |>
+  arrange(region_name, quarter)
+```
+
+## Best practices
+
+1. **Use `.unmatched = "error"`** in `case_when()` and `recode_values()` for defensive programming
+4. **Place `.by` on its own line** for readability
+5. **Prefer `filter_out()` over negated `filter()`** for NA-safe row removal
+6. **Use `recode_values()` over `case_match()`** (dplyr 1.2+ preferred API)
+7. **Use `replace_when()` over `case_when()` with `.default`** when updating a column in place
+8. **Name variables as nouns, functions as verbs** in snake_case
+9. **Explain "why" in comments**, not "what"
+10. **Use `qs2` for serialization** with `.qs2` extension
diff --git a/tidyverse/tidy-r/references/grouping-examples.md b/tidyverse/tidy-r/references/grouping-examples.md
new file mode 100644
index 0000000..d5ed53d
--- /dev/null
+++ b/tidyverse/tidy-r/references/grouping-examples.md
@@ -0,0 +1,175 @@
+# Modern Grouping and Column Operations (dplyr 1.1+)
+
+## Per-operation grouping with .by
+
+The `.by` argument replaces the old `group_by() |> ... |> ungroup()` pattern. Results are always ungrouped.
+
+### Basic usage
+
+```r
+data |>
+  summarise(
+    mean_value = mean(value),
+    .by = category
+  )
+```
+
+### Multiple grouping variables
+
+```r
+data |>
+  summarise(
+    total = sum(revenue),
+    .by = c(company, year)
+  )
+```
+
+### .by with mutate (window functions)
+
+```r
+data |>
+  mutate(
+    pct_of_group = revenue / sum(revenue),
+    rank = row_number(desc(revenue)),
+    .by = region
+  )
+```
+
+### .by with filter (group-level filtering)
+
+```r
+data |>
+  filter(
+    revenue == max(revenue),
+    .by = region
+  )
+```
+
+### Place .by on its own line
+
+```r
+# Good - readable
+data |>
+  summarise(
+    mean_value = mean(value),
+    .by = category
+  )
+
+# Avoid - crammed
+data |>
+  summarise(mean_value = mean(value), .by = category)
+```
+
+### Avoid - old persistent grouping pattern
+
+```r
+# Avoid
+data |>
+  group_by(category) |>
+  summarise(mean_value = mean(value)) |>
+  ungroup()
+```
+
+## pick() for column selection
+
+Use `pick()` inside data-masking functions to select columns by name or tidyselect helpers:
+
+```r
+data |>
+  summarise(
+    n_x_cols = ncol(pick(starts_with("x"))),
+    n_y_cols = ncol(pick(starts_with("y")))
+  )
+```
+
+### pick() to pass selected columns to functions
+
+```r
+data |>
+  mutate(
+    row_mean = rowMeans(pick(where(is.numeric)))
+  )
+```
+
+## across() for applying functions
+
+Apply one or more functions to multiple columns:
+
+### Single function
+
+```r
+data |>
+  summarise(
+    across(where(is.numeric), \(x) mean(x)),
+    .by = group
+  )
+```
+
+### Multiple functions with naming
+
+```r
+data |>
+  summarise(
+    across(
+      c(revenue, cost),
+      list(mean = \(x) mean(x), sd = \(x) sd(x)),
+      .names = "{.fn}_{.col}"
+    ),
+    .by = region
+  )
+```
+
+### Conditional transformation
+
+```r
+data |>
+  mutate(
+    across(where(is.character), str_to_lower)
+  )
+```
+
+## reframe() for multi-row results
+
+When a summary returns multiple rows per group, use `reframe()` instead of `summarise()`:
+
+```r
+data |>
+  reframe(
+    quantile = c(0.25, 0.50, 0.75),
+    value = quantile(x, c(0.25, 0.50, 0.75)),
+    .by = group
+  )
+```
+
+## Data masking vs tidy selection
+
+Understand the difference for writing functions:
+
+- **Data masking** (`arrange`, `filter`, `mutate`, `summarise`): expressions evaluated in data context
+- **Tidy selection** (`select`, `relocate`, `across`, `pick`): column selection helpers
+
+### Embrace with {{ }} for function arguments
+
+```r
+my_summary <- function(data, summary_var) {
+  data |>
+    summarise(mean_val = mean({{ summary_var }}))
+}
+```
+
+### Character vectors use .data[[]]
+
+```r
+for (var in names(mtcars)) {
+  mtcars |> count(.data[[var]]) |> print()
+}
+```
+
+### Multiple columns use across()
+
+```r
+my_summary <- function(data, summary_vars) {
+  data |>
+    summarise(across({{ summary_vars }}, \(x) mean(x)))
+}
+```
diff --git a/tidyverse/tidy-r/references/join-examples.md b/tidyverse/tidy-r/references/join-examples.md
new file mode 100644
index 0000000..ffb0a7a
--- /dev/null
+++ b/tidyverse/tidy-r/references/join-examples.md
@@ -0,0 +1,123 @@
+# Modern Join Syntax (dplyr 1.1+)
+
+## Use join_by() instead of character vectors
+
+### Equality joins
+
+```r
+transactions |>
+  inner_join(companies, by = join_by(company == id))
+```
+
+### Same-name columns
+
+```r
+# When both tables share a column name, use a single name
+orders |>
+  left_join(customers, by = join_by(customer_id))
+```
+
+### Inequality joins
+
+```r
+transactions |>
+  inner_join(companies, by = join_by(company == id, year >= since))
+```
+
+### Rolling joins (closest match)
+
+```r
+transactions |>
+  inner_join(companies, by = join_by(company == id, closest(year >= since)))
+```
+
+### Overlap joins
+
+```r
+# Find events during each interval
+intervals |>
+  inner_join(events, by = join_by(start <= time, end >= time))
+```
+
+### Avoid - Old character vector syntax
+
+```r
+# Avoid
+transactions |>
+  inner_join(companies, by = c("company" = "id"))
+```
+
+## Relationship and match handling
+
+### Enforce expected cardinality with relationship
+
+```r
+# 1:1 - each row matches at most one row in the other table
+inner_join(x, y, by = join_by(id), relationship = "one-to-one")
+
+# Many-to-one - many x rows can match one y row (lookup pattern)
+left_join(x, y, by = join_by(id), relationship = "many-to-one")
+
+# One-to-many
+inner_join(x, y, by = join_by(id), relationship = "one-to-many")
+```
+
+### Ensure all rows match
+
+```r
+inner_join(x, y, by = join_by(id), unmatched = "error")
+```
+
+### Prevent NA matching (recommended)
+
+```r
+# By default, NA matches NA in joins -- usually not desired
+left_join(x, y, by = join_by(id), na_matches = "never")
+```
+
+### Combining guards for production code
+
+```r
+sales |>
+  left_join(
+    products,
+    by = join_by(product_id),
+    relationship = "many-to-one",
+    unmatched = "error",
+    na_matches = "never"
+  )
+```
+
+## Logging joins with tidylog
+
+Use `tidylog::` prefix for joins to verify expected behavior. Call directly without loading the package.
+
+```r
+result <- transactions |>
+  tidylog::left_join(companies, by = join_by(company == id))
+
+# tidylog output:
+# left_join: added 2 columns (name, region)
+#            > rows only in x      12
+#            > rows only in y     (3)
+#            > matched rows       988
+#            > rows total        1000
+```
+
+### Interpreting join output
+
+| Output | Meaning |
+|--------|---------|
+| `rows only in x` | Rows in left table with no match (kept as NA in left joins) |
+| `rows only in y` | Rows in right table with no match (in parentheses, dropped in left joins) |
+| `matched rows` | Rows that matched between tables |
+| `rows total` | Final row count after join |
+
+### When to use tidylog
+
+- **Always for joins** to see how many rows matched, duplicated, or were dropped
+- **Critical filters** with `tidylog::filter()` to verify expected row counts
+- **Critical mutates** with `tidylog::mutate()` to verify expected changes
+- **Any operation where silent data loss is a risk**
+
+Don't use tidylog in production code, inside functions, or loops where output would be too verbose. It's for interactive verification only.
diff --git a/tidyverse/tidy-r/references/migration-examples.md b/tidyverse/tidy-r/references/migration-examples.md
new file mode 100644
index 0000000..0cd6cf9
--- /dev/null
+++ b/tidyverse/tidy-r/references/migration-examples.md
@@ -0,0 +1,165 @@
+# Migration: Base R and Old Tidyverse to Modern Patterns
+
+## Base R to Modern Tidyverse
+
+### Data manipulation
+
+```r
+subset(data, condition)          # -> filter(data, condition)
+data[order(data$x), ]            # -> arrange(data, x)
+aggregate(x ~ y, data, mean)     # -> summarise(data, mean(x), .by = y)
+merge(x, y, by = "id")           # -> left_join(x, y, by = join_by(id))
+```
+
+### Functional programming
+
+```r
+sapply(x, f)                     # -> map(x, f)  # type-stable
+lapply(x, f)                     # -> map(x, f)
+vapply(x, f, numeric(1))         # -> map_dbl(x, f)
+```
+
+### String manipulation
+
+```r
+grepl("pattern", text)           # -> str_detect(text, "pattern")
+gsub("old", "new", text)         # -> str_replace_all(text, "old", "new")
+substr(text, 1, 5)               # -> str_sub(text, 1, 5)
+nchar(text)                      # -> str_length(text)
+strsplit(text, ",")              # -> str_split(text, ",")
+tolower(text)                    # -> str_to_lower(text)
+sprintf("Hello %s", name)       # -> str_glue("Hello {name}")
+```
+
+## Old to New Tidyverse Patterns
+
+### Pipes
+
+```r
+data %>% function()              # -> data |> function()
+```
+
+### Anonymous functions
+
+```r
+map(x, function(x) x + 1)       # -> map(x, \(x) x + 1)
+map(x, ~ .x + 1)                # -> map(x, \(x) x + 1)
+```
+
+### Grouping (dplyr 1.1+)
+
+```r
+group_by(data, x) |>
+  summarise(mean(y)) |>
+  ungroup()                      # -> summarise(data, mean(y), .by = x)
+```
+
+### Joins
+
+```r
+by = c("a" = "b")                # -> by = join_by(a == b)
+```
+
+### Column selection
+
+```r
+across(starts_with("x"))         # -> pick(starts_with("x"))  # for selection only
+```
+
+### Multi-row summaries
+
+```r
+summarise(data, x, .groups = "drop") # -> reframe(data, x)
+```
+
+### Data reshaping
+
+```r
+gather()/spread()                # -> pivot_longer()/pivot_wider()
+```
+
+### String separation (tidyr 1.3+)
+
+```r
+separate(col, into = c("a", "b"))
+# -> separate_wider_delim(col, delim = "_", names = c("a", "b"))
+
+extract(col, into = "x", regex)
+# -> separate_wider_regex(col, patterns = c(x = regex))
+```
+
+### Superseded purrr functions (purrr 1.0+)
+
+```r
+map_dfr(x, f)                    # -> map(x, f) |> list_rbind()
+map_dfc(x, f)                    # -> map(x, f) |> list_cbind()
+map2_dfr(x, y, f)                # -> map2(x, y, f) |> list_rbind()
+pmap_dfr(list, f)                # -> pmap(list, f) |> list_rbind()
+imap_dfr(x, f)                   # -> imap(x, f) |> list_rbind()
+```
+
+### Recoding and replacing (dplyr 1.2+)
+
+```r
+case_match(x, val ~ result)      # -> recode_values(x, val ~ result)
+recode(x, old = "new")           # -> recode_values(x, "old" ~ "new")
+                                 #    or replace_values(x, "old" ~ "new")
+
+# Conditional replacement: case_when with .default = x -> replace_when
+case_when(
+  cond1 ~ val1,
+  cond2 ~ val2,
+  .default = x
+)                                # -> x |> replace_when(cond1 ~ val1, cond2 ~ val2)
+
+# NA handling
+coalesce(x, default)             # -> replace_values(x, NA ~ default)
+na_if(x, val)                    # -> replace_values(x, val ~ NA)
+tidyr::replace_na(x, default)    # -> replace_values(x, NA ~ default)
+```
+
+### Filter family (dplyr 1.2+)
+
+```r
+# Dropping rows with NA-safe negation
+filter(x != val | is.na(x))      # -> filter_out(x == val)
+
+# Combining conditions with OR
+filter(cond1 | cond2 | cond3)    # -> filter(when_any(cond1, cond2, cond3))
+
+# Combining conditions with AND (explicit)
+filter(cond1 & cond2 & cond3)    # -> filter(when_all(cond1, cond2, cond3))
+```
+
+### Serialization
+
+```r
+qs::qsave(x, "file.qs")         # -> qs2::qs_save(x, "file.qs2")
+qs::qread("file.qs")            # -> qs2::qs_read("file.qs2")
+```
+
+### Defunct in dplyr 1.2 (now errors)
+
+```r
+# Underscored SE verbs (defunct since 1.2, deprecated since 0.7)
+mutate_()                        # -> mutate() with modern programming
+filter_()                        # -> filter()
+summarise_()                     # -> summarise()
+# ... all *_() variants
+
+# _each variants (defunct since 1.2, deprecated since 0.7)
+mutate_each()                    # -> mutate(across(...))
+summarise_each()                 # -> summarise(across(...))
+
+# Multi-row summarise (defunct since 1.2, deprecated since 1.1)
+summarise(data, x)               # -> reframe(data, x) for multi-row results
+```
+
+### For side effects
+
+```r
+for (x in xs) write_file(x)     # -> walk(xs, write_file)
+for (i in seq_along(data)) {
+  write_csv(data[[i]], paths[[i]])
+}                                # -> walk2(data, paths, write_csv)
+```
diff --git a/tidyverse/tidy-r/references/recode-replace-examples.md b/tidyverse/tidy-r/references/recode-replace-examples.md
new file mode 100644
index 0000000..6c95616
--- /dev/null
+++ b/tidyverse/tidy-r/references/recode-replace-examples.md
@@ -0,0 +1,188 @@
+# Recoding, Replacing, and Filtering (dplyr 1.2+)
+
+dplyr 1.2 introduced a family of functions for recoding and replacing values, and for NA-safe filtering. These replace older patterns (`case_match`, `recode`, `coalesce`, `na_if`, negated filters).
+
+## The recode/replace family
+
+|                           | **Recoding** (new column) | **Replacing** (update in place) |
+|---------------------------|---------------------------|---------------------------------|
+| **Match with conditions** | `case_when()`             | `replace_when()`                |
+| **Match with values**     | `recode_values()`         | `replace_values()`              |
+
+## recode_values()
+
+Use instead of `case_match()` or repetitive `case_when()` with `==`.
+
+### Formula interface
+
+```r
+score |>
+  recode_values(
+    1 ~ "Strongly disagree",
+    2 ~ "Disagree",
+    3 ~ "Neutral",
+    4 ~ "Agree",
+    5 ~ "Strongly agree"
+  )
+```
+
+### Lookup table interface
+
+```r
+likert |>
+  mutate(score = recode_values(score, from = lookup$from, to = lookup$to))
+```
+
+### With .unmatched = "error" for safety
+
+```r
+# Errors if any value has no match
+score |>
+  recode_values(
+    1 ~ "Low",
+    2 ~ "Medium",
+    3 ~ "High",
+    .unmatched = "error"
+  )
+```
+
+### Avoid
+
+```r
+# Avoid - repetitive case_when with ==
+case_when(score == 1 ~ "Strongly disagree", score == 2 ~ "Disagree", ...)
+
+# Avoid - case_match() is soft-deprecated in dplyr 1.2
+case_match(score, 1 ~ "Strongly disagree", 2 ~ "Disagree", ...)
+
+# Avoid - recode() is soft-deprecated
+recode(score, `1` = "Strongly disagree", `2` = "Disagree", ...)
+```
+
+## replace_values()
+
+Use for partial updates by value. Unmatched values pass through unchanged.
+
+### Replace specific values
+
+```r
+name |>
+  replace_values(
+    c("UNC", "Chapel Hill") ~ "UNC Chapel Hill",
+    c("Duke", "Duke University") ~ "Duke"
+  )
+```
+
+### Replace NA (replaces coalesce/tidyr::replace_na)
+
+```r
+x |> replace_values(NA ~ 0)
+```
+
+### Convert sentinel values to NA (replaces na_if)
+
+```r
+x |> replace_values(from = c(0, -99), to = NA)
+```
+
+## replace_when()
+
+Use for conditional updates. Type-stable on the input; unmatched values pass through unchanged.
+
+### Conditional updates
+
+```r
+racers |>
+  mutate(
+    time = time |>
+      replace_when(
+        id %in% id_banned ~ NA,
+        id %in% id_penalty ~ time + 1/3
+      )
+  )
+```
+
+### Avoid - case_when with .default
+
+```r
+# Avoid - buries the primary input, loses type info
+mutate(time = case_when(
+  id %in% id_banned ~ NA,
+  id %in% id_penalty ~ time + 1/3,
+  .default = time
+))
+```
+
+## case_when() with .unmatched = "error"
+
+Still the right choice for complex conditional recoding into a new column. Use `.unmatched = "error"` for safety:
+
+```r
+tier <- case_when(
+  time < 23 ~ "A",
+  time < 27 ~ "B",
+  time < 30 ~ "C",
+  .unmatched = "error"
+)
+```
+
+## filter_out()
+
+NA-safe row removal. Treats `NA` as `FALSE`, so you don't accidentally drop NA rows:
+
+```r
+# Good - clear intent, NA-safe
+data |> filter_out(deceased, date < 2012)
+
+# Avoid - easy to get wrong with NA
+data |> filter(!(deceased & date < 2012) | is.na(deceased) | is.na(date))
+```
+
+## when_any() and when_all()
+
+Combine conditions with comma-separated syntax instead of `|` and `&`:
+
+### OR conditions
+
+```r
+data |>
+  filter(when_any(
+    name %in% c("US", "CA") & between(score, 200, 300),
+    name %in% c("PR", "RU") & between(score, 100, 200)
+  ))
+```
+
+### Drop rows matching any condition
+
+```r
+data |>
+  filter_out(when_any(
+    is.na(value),
+    status == "invalid"
+  ))
+```
+
+### AND conditions
+
+```r
+data |>
+  filter(when_all(
+    score > 50,
+    !is.na(region),
+    status == "active"
+  ))
+```
+
+## Migration quick reference
+
+| Old pattern | New pattern |
+|-------------|-------------|
+| `case_match(x, val ~ result)` | `recode_values(x, val ~ result)` |
+| `recode(x, old = "new")` | `recode_values(x, "old" ~ "new")` |
+| `case_when(..., .default = x)` | `x \|> replace_when(...)` |
+| `coalesce(x, default)` | `replace_values(x, NA ~ default)` |
+| `na_if(x, val)` | `replace_values(x, val ~ NA)` |
+| `tidyr::replace_na(x, default)` | `replace_values(x, NA ~ default)` |
+| `filter(x != val \| is.na(x))` | `filter_out(x == val)` |
+| `filter(c1 \| c2 \| c3)` | `filter(when_any(c1, c2, c3))` |
+| `filter(c1 & c2 & c3)` | `filter(when_all(c1, c2, c3))` |
diff --git a/tidyverse/tidy-r/references/stringr-examples.md b/tidyverse/tidy-r/references/stringr-examples.md
new file mode 100644
index 0000000..a56c5a9
--- /dev/null
+++ b/tidyverse/tidy-r/references/stringr-examples.md
@@ -0,0 +1,102 @@
+# String Manipulation with stringr
+
+Use stringr over base R string functions. Benefits: consistent `str_` prefix, string-first argument order, pipe-friendly and vectorized.
+
+## Core patterns
+
+### Pipe-friendly chaining
+
+```r
+text |>
+  str_to_lower() |>
+  str_trim() |>
+  str_replace_all("pattern", "replacement") |>
+  str_extract("\\d+")
+```
+
+### Detection and extraction
+
+```r
+str_detect(text, "pattern")       # logical: does it match?
+str_which(text, "pattern")        # integer: which elements match?
+str_count(text, "pattern")        # integer: how many matches?
+str_extract(text, "pattern")      # first match
+str_extract_all(text, "pattern")  # all matches (returns list)
+str_match(text, "(\\w+)@(\\w+)")  # capture groups as matrix
+```
+
+### Replacement
+
+```r
+str_replace(text, "old", "new")       # first occurrence
+str_replace_all(text, "old", "new")   # all occurrences
+str_remove(text, "pattern")           # remove first match
+str_remove_all(text, "pattern")       # remove all matches
+```
+
+### Splitting and combining
+
+```r
+str_split(text, ",")                  # split into list
+str_split_fixed(text, ",", n = 3)     # split into matrix (fixed columns)
+str_split_i(text, ",", i = 2)         # extract ith piece directly
+str_c("a", "b", "c", sep = "-")      # combine with separator
+str_flatten(words, collapse = ", ")   # collapse vector to single string
+```
+
+### Substring operations
+
+```r
+str_sub(text, 1, 5)                   # extract positions 1-5
+str_sub(text, -3)                     # last 3 characters
+str_length(text)                      # character count
+str_trunc(text, 20)                   # truncate with ellipsis
+```
+
+### Formatting
+
+```r
+str_to_lower(text)                    # lowercase
+str_to_upper(text)                    # uppercase
+str_to_title(text)                    # title case
+str_to_sentence(text)                # sentence case
+str_trim(text)                        # remove leading/trailing whitespace
+str_squish(text)                      # trim + collapse internal whitespace
+str_pad(text, 10, side = "left")      # pad to fixed width
+str_wrap(text, width = 80)            # word wrap
+```
+
+### Interpolation
+
+```r
+str_glue("Hello {name}, you scored {score}!")
+str_glue_data(df, "{name}: {value}")
+```
+
+## Pattern helpers
+
+Use these for clarity about what kind of matching you intend:
+
+```r
+str_detect(text, fixed("$"))           # literal match (no regex)
+str_detect(text, regex("\\d+"))        # explicit regex (default)
+str_detect(text, regex("hello", ignore_case = TRUE))  # case-insensitive
+str_detect(text, coll("e", locale = "fr"))  # locale-aware collation
+str_detect(text, boundary("word"))     # word boundaries
+```
+
+## stringr vs base R
+
+| stringr | base R | Notes |
+|---------|--------|-------|
+| `str_detect(text, "pat")` | `grepl("pat", text)` | Argument order differs |
+| `str_extract(text, "pat")` | `regmatches(text, regexpr(...))` | Much simpler |
+| `str_replace_all(text, "a", "b")` | `gsub("a", "b", text)` | Argument order differs |
+| `str_split(text, ",")` | `strsplit(text, ",")` | |
+| `str_length(text)` | `nchar(text)` | |
+| `str_sub(text, 1, 5)` | `substr(text, 1, 5)` | |
+| `str_to_lower(text)` | `tolower(text)` | |
+| `str_to_upper(text)` | `toupper(text)` | |
+| `str_to_title(text)` | `tools::toTitleCase(text)` | |
+| `str_trim(text)` | `trimws(text)` | |
+| `str_glue("Hello {x}")` | `sprintf("Hello %s", x)` | More readable |
diff --git a/tidyverse/tidy-r/references/tidyverse-style.md b/tidyverse/tidy-r/references/tidyverse-style.md
new file mode 100644
index 0000000..304b5bc
--- /dev/null
+++ b/tidyverse/tidy-r/references/tidyverse-style.md
@@ -0,0 +1,215 @@
+# Tidyverse Style Guide Summary
+
+Based on https://style.tidyverse.org/
+
+## Object Names
+
+- Use **snake_case**: lowercase letters, numbers, underscores only
+- Variables = **nouns**, functions = **verbs**
+- Avoid reusing common function/variable names
+- Prefix non-standard function arguments with `.` (e.g., `.data`, `.by`)
+- Avoid dots in names except for S3 methods
+
+```r
+# Good
+day_one
+calculate_mean
+user_data
+
+# Bad
+DayOne
+calculateMean
+day.one
+```
+
+## Spacing
+
+**Commas**: space after, never before
+
+```r
+# Good
+x[, 1]
+mean(x, na.rm = TRUE)
+
+# Bad
+x[,1]
+mean(x ,na.rm = TRUE)
+```
+
+**Infix operators**: surround with spaces (`==`, `+`, `-`, `<-`, etc.)
+
+```r
+# Good
+x == y
+z <- 2 + 2
+
+# Bad
+x==y
+z<-2+2
+```
+
+**No spaces** for high-precedence operators: `::`, `$`, `@`, `[`, `[[`, `^`, `:`
+
+```r
+# Good
+sqrt(x^2 + y^2)
+x <- 1:10
+pkg::fun()
+```
+
+## Assignment
+
+Use `<-`, not `=`
+
+```r
+# Good
+x <- 5
+
+# Bad
+x = 5
+```
+
+## Quotes
+
+Use double quotes `"`; single `'` only when text contains double quotes
+
+```r
+# Good
+"Text here"
+'They said "hello"'
+```
+
+## Line Length
+
+Limit to **80 characters**. For long function calls, put each argument on its own line:
+
+```r
+# Good
+do_something(
+  arg1 = "value",
+  arg2 = "value",
+  arg3 = "value"
+)
+```
+
+## Braces
+
+- `{` ends a line
+- Contents indented by **2 spaces**
+- `}` starts a line
+- `else` on same line as `}`
+
+```r
+if (condition) {
+  do_this()
+} else {
+  do_that()
+}
+```
+
+## Functions
+
+**Anonymous functions**: use `\(x)` for short lambdas
+
+```r
+# Good
+map(x, \(x) x + 1)
+
+# Bad
+map(x, function(x) x + 1)
+```
+
+**Return**: use `return()` only for early returns; rely on implicit return otherwise
+
+```r
+# Good
+add_one <- function(x) {
+  x + 1
+}
+
+# Early return
+check_input <- function(x) {
+  if (is.null(x)) {
+    return(NULL)
+  }
+  process(x)
+}
+```
+
+**Multi-line definitions**: single-indent style preferred
+
+```r
+long_function_name <- function(
+    a = "argument",
+    b = "argument"
+) {
+  # body
+}
+```
+
+## Pipes
+
+- Use `|>` (not `%>%`)
+- Space before pipe, newline after
+- Indent continuation by 2 spaces
+
+```r
+# Good
+data |>
+  filter(x > 0) |>
+  mutate(y = x * 2) |>
+  summarise(mean(y))
+
+# Bad
+data |> filter(x > 0) |> mutate(y = x * 2)
+```
+
+**Avoid pipes when**:
+- Manipulating multiple objects
+- Meaningful intermediate objects deserve names
+
+## Comments
+
+- Start with `# ` (hash + space)
+- Explain **why**, not what
+- Use sentence case
+
+```r
+# Skip NA values because downstream analysis requires complete cases
+data <- data |> filter(!is.na(value))
+```
+
+## Control Flow
+
+- Use `&&` and `||` in conditions (not `&` and `|`)
+- Use `TRUE`/`FALSE` (not `T`/`F`)
+- Never use semicolons
+
+## Error Messages
+
+Use `cli::cli_abort()` for errors. See https://style.tidyverse.org/errors.html
+
+**Problem statement**:
+- Start with concise problem in sentence case, ending with `.`
+- Use **"must"** when cause is clear: `` `n` must be a numeric vector, not a character vector.``
+- Use **"can't"** when you cannot state what was expected: ``Can't find column `b` in `.data`.``
+
+**Bullets**:
+- `x` (cross) for problems
+- `i` (info) for context
+- `!` (warning) for warnings
+
+**Formatting**:
+- Surround argument names in backticks: `` `x` ``
+- Use "column" to disambiguate (avoid "variable")
+- Keep under 80 characters; let cli wrap
+- List up to 5 issues, truncate with `...`
+
+**Hints**: place last with `i` bullet, end with `?`
+
+```r
+cli::cli_abort(c(
+  "{.arg x} must be a numeric vector, not {.obj_type_friendly {x}}.",
+  "i" = "Did you mean to use {.fn as.numeric}?"
+))
+```

From 3740f7291a65a353a1e3ec617dd545a5ef881104 Mon Sep 17 00:00:00 2001
From: statzhero <panoramadata@gmail.com>
Date: Thu, 2 Apr 2026 14:35:12 +0200
Subject: [PATCH 2/7] Add redundant groupings

---
 tidyverse/tidy-r/SKILL.md                     |  4 +
 .../tidy-r/references/grouping-examples.md    | 75 +++++++++++++++++++
 2 files changed, 79 insertions(+)

diff --git a/tidyverse/tidy-r/SKILL.md b/tidyverse/tidy-r/SKILL.md
index 325d5eb..5d672ad 100644
--- a/tidyverse/tidy-r/SKILL.md
+++ b/tidyverse/tidy-r/SKILL.md
@@ -97,6 +97,8 @@ Use newspaper style: high-level logic first, helpers below. Don't define functio
 ### Grouping
 
 - Use `.by` for per-operation grouping, never `group_by() |> ... |> ungroup()`
+- Never add `ungroup()` before or after `.by` — it always returns ungrouped data
+- Consolidate multiple `mutate(.by = x)` calls into one when they share the same `.by`; keep separate only when `.by` differs or a later column depends on an earlier one
 - Place `.by` on its own line for readability
 
 ### Joins
@@ -138,6 +140,8 @@ Use `cli::cli_abort()` with problem statement + bullets, never `stop()`.
 | `multiple = "error"` in joins | `relationship = "many-to-one"` (or `"one-to-one"`) |
 | `sapply()` | `map_*()` (type-stable) |
 | `group_by() \|> ... \|> ungroup()` | `.by` argument |
+| `ungroup() \|> mutate(..., .by = x)` | `mutate(..., .by = x)` (`.by` ignores existing groups) |
+| Repeated `mutate(.by = x)` with same `.by` | Single `mutate()` with all columns and one `.by` |
 | `cat()` for messages | `message()` or `cli::cli_inform()` |
 | `stop()` for errors | `cli::cli_abort()` |
 | `distinct(id)` | `distinct(id, .keep_all = TRUE)` |
diff --git a/tidyverse/tidy-r/references/grouping-examples.md b/tidyverse/tidy-r/references/grouping-examples.md
index d5ed53d..5310ece 100644
--- a/tidyverse/tidy-r/references/grouping-examples.md
+++ b/tidyverse/tidy-r/references/grouping-examples.md
@@ -70,6 +70,81 @@ data |>
   ungroup()
 ```
 
+### Avoid - redundant ungroup() around .by
+
+`.by` always returns ungrouped data, so `ungroup()` before or after is a no-op. Remove it.
+
+```r
+# Avoid - ungroup() is redundant
+data |>
+  ungroup() |>
+  mutate(
+    centered = x - mean(x),
+    .by = group
+  )
+
+# Good
+data |>
+  mutate(
+    centered = x - mean(x),
+    .by = group
+  )
+```
+
+### Consolidating mutate() calls
+
+When multiple columns share the same `.by`, combine them in a single `mutate()`.
+
+```r
+# Avoid - repeating .by = year across separate mutate() calls
+data |>
+  mutate(
+    above_med_a = a > median(a),
+    .by = year
+  ) |>
+  mutate(
+    above_med_b = b > median(b),
+    .by = year
+  )
+
+# Good - one mutate(), one .by
+data |>
+  mutate(
+    above_med_a = a > median(a),
+    above_med_b = b > median(b),
+    .by = year
+  )
+```
+
+**When to keep separate `mutate()` calls:**
+
+- **Different `.by` variables** between the calls
+- **Sequential dependency**: a later column uses a column created in an earlier `mutate()` within the same grouped context (the new column must exist before the group-level aggregate can reference it)
+
+```r
+# Separate calls needed: different .by variables
+data |>
+  mutate(
+    x_lag = dplyr::lag(x),
+    .by = id
+  ) |>
+  mutate(
+    above_med = x_lag > median(x_lag),
+    .by = year
+  )
+
+# Separate calls needed: b_rank depends on b_centered
+data |>
+  mutate(
+    b_centered = b - mean(b),
+    .by = group
+  ) |>
+  mutate(
+    b_rank = row_number(desc(b_centered)),
+    .by = group
+  )
+```
+
 ## pick() for column selection
 
 Use `pick()` inside data-masking functions to select columns by name or tidyselect helpers:

From 2ece3a3e402b93647b90cb237aa8f7a30619b009 Mon Sep 17 00:00:00 2001
From: statzhero <panoramadata@gmail.com>
Date: Tue, 7 Apr 2026 16:17:28 -0400
Subject: [PATCH 3/7] Fixes most comments

---
 tidyverse/tidy-r/SKILL.md                     | 116 +++++-------------
 .../{grouping-examples.md => grouping.md}     |   2 +-
 .../references/{join-examples.md => joins.md} |   2 +-
 .../{migration-examples.md => migration.md}   |  16 +--
 ...-replace-examples.md => recode-replace.md} |   2 +-
 .../{stringr-examples.md => stringr.md}       |   0
 6 files changed, 45 insertions(+), 93 deletions(-)
 rename tidyverse/tidy-r/references/{grouping-examples.md => grouping.md} (98%)
 rename tidyverse/tidy-r/references/{join-examples.md => joins.md} (98%)
 rename tidyverse/tidy-r/references/{migration-examples.md => migration.md} (91%)
 rename tidyverse/tidy-r/references/{recode-replace-examples.md => recode-replace.md} (98%)
 rename tidyverse/tidy-r/references/{stringr-examples.md => stringr.md} (100%)

diff --git a/tidyverse/tidy-r/SKILL.md b/tidyverse/tidy-r/SKILL.md
index 5d672ad..918b3d0 100644
--- a/tidyverse/tidy-r/SKILL.md
+++ b/tidyverse/tidy-r/SKILL.md
@@ -1,87 +1,42 @@
 ---
 name: tidy-r
-description: |
-  Modern tidyverse patterns, style guide, and migration guidance for R development. Use when writing, reviewing, or refactoring tidyverse code. Covers native pipe, join_by(), .by grouping, pick/across/reframe, filter_out/when_any/when_all, recode_values/replace_values/replace_when, tidy selection, stringr, naming conventions, and migration from base R or older tidyverse APIs.
+description: >
+  Modern tidyverse patterns, style guide, and migration guidance for R development.
+  Use this skill when writing R code, reviewing tidyverse code, updating legacy R
+  code to modern patterns, or enforcing consistent style. Covers native pipe usage,
+  join_by() syntax, .by grouping, pick/across/reframe, filter_out/when_any/when_all,
+  recode_values/replace_values/replace_when, tidy selection, stringr, naming
+  conventions, and migration from base R or older tidyverse APIs.
 metadata:
-  r_version: "4.5+"
-  tidyverse_version: "2.0+"
-  dplyr_version: "1.2+"
-allowed-tools: Read, Edit, Write, Grep, Glob, Bash, mcp__r-btw__*
+  r_version: ">=4.5.0"
+  tidyverse_version: ">=2.0.0"
+  dplyr_version: ">=1.2.0"
 ---
 
-# Writing Modern Tidyverse R
+# Modern Tidyverse R Reference
 
-This skill covers modern tidyverse patterns for R 4.5+ and tidyverse 2.0+, style guidelines, and migration from legacy patterns.
+Code from blog posts and StackOverflow often uses deprecated APIs, magrittr pipes, or base R patterns where a modern tidyverse function exists. This guide encodes the current recommended approach.
 
-## Core philosophy
+## Reference files
 
-R's tidyverse evolves. Code from blog posts and StackOverflow often uses deprecated APIs, magrittr pipes, or base R patterns where a modern tidyverse function exists. This skill encodes the current recommended approach so the model writes code that experienced R developers would recognize as idiomatic.
+Consult the appropriate reference file for detailed patterns and examples:
 
-## When to use this skill
-
-- Writing new R code with dplyr, tidyr, stringr, purrr, or other tidyverse packages
-- Reviewing or refactoring existing R code for modern patterns
-- Migrating from base R, magrittr pipes, or older tidyverse APIs
-- Applying tidyverse style conventions (naming, spacing, error handling)
-- Choosing between similar functions (e.g., `case_when` vs `recode_values`)
-- Working with joins, grouping, recoding, or string manipulation in R
-
-## When NOT to use this skill
-
-- Writing data.table code (different paradigm)
-- Pure base R projects that intentionally avoid tidyverse
-- Shiny UI/server logic (use a Shiny-specific skill)
-- Package development internals (NAMESPACE, DESCRIPTION, roxygen)
-- ggplot2 visualization (use the socviz skill)
-- Statistical modeling or Bayesian analysis
-
-## Instructions
-
-When you receive a request, classify it and consult the appropriate reference:
-
-### Step 1: Classify the request
-
-| Category | Reference file | Trigger |
-|----------|---------------|---------|
-| **Joins** | [join-examples.md](references/join-examples.md) | Merging data, `*_join`, `join_by`, matching rows, lookup tables |
-| **Grouping & columns** | [grouping-examples.md](references/grouping-examples.md) | `.by`, `group_by`, `across`, `pick`, `reframe`, column operations |
-| **Recoding & replacing** | [recode-replace-examples.md](references/recode-replace-examples.md) | `case_when`, `recode_values`, `replace_values`, `replace_when`, `filter_out`, `when_any`, `when_all`, recoding, replacing, conditional updates |
-| **Strings** | [stringr-examples.md](references/stringr-examples.md) | String manipulation, regex, `str_*` functions, text processing |
+| Topic | Reference file | When to consult |
+|-------|---------------|-----------------|
+| **Joins** | [joins.md](references/joins.md) | Merging data, `*_join`, `join_by`, matching rows, lookup tables |
+| **Grouping & columns** | [grouping.md](references/grouping.md) | `.by`, `group_by`, `across`, `pick`, `reframe`, column operations |
+| **Recoding & replacing** | [recode-replace.md](references/recode-replace.md) | `recode_values`, `replace_values`, `replace_when`, `filter_out`, `when_any`, `when_all` |
+| **Strings** | [stringr.md](references/stringr.md) | String manipulation, regex, `str_*` functions, text processing |
 | **Style** | [tidyverse-style.md](references/tidyverse-style.md) | Naming, formatting, spacing, error messages, `cli::cli_abort` |
-| **Migration** | [migration-examples.md](references/migration-examples.md) | Updating old code, base R conversion, deprecated functions |
-
-### Step 2: Read the reference file(s)
-
-Use the Read tool to load the relevant reference. For requests that span multiple categories (e.g., "rewrite this old code" touches migration + style), read multiple files.
+| **Migration** | [migration.md](references/migration.md) | Updating old code, base R conversion, deprecated functions |
 
-### Step 3: Apply core principles
-
-1. **Use modern tidyverse patterns** - Prioritize dplyr 1.2+ features, native pipe, and current APIs
-2. **Write readable code first** - Optimize only when necessary
-3. **Follow tidyverse style guide** - Consistent naming, spacing, and structure
-4. **Use R MCP tools** - Automatically resolve function documentation and library references without being asked. If the `mcp__r-btw__*` tools are unavailable, fall back to running R help via Bash (see below)
-
-### R documentation lookup fallback
-
-When `mcp__r-btw__*` tools are available, use them to look up function signatures, help pages, and package docs. When they are not available (e.g., the r-btw MCP server is not configured), fall back to Bash:
-
-```bash
-# Help page for a function
-Rscript --vanilla -e '?dplyr::recode_values' 2>/dev/null || Rscript --vanilla -e 'utils::help("recode_values", package = "dplyr")'
-
-# Function signature / arguments
-Rscript --vanilla -e 'args(dplyr::recode_values)'
-
-# List exported functions in a package
-Rscript --vanilla -e 'ls("package:dplyr")'
-
-# Check if a package is installed
-Rscript --vanilla -e 'requireNamespace("dplyr", quietly = TRUE)'
-```
+For requests that span multiple topics (e.g., "rewrite this old code" touches migration + style), read multiple files.
 
-### Step 4: Write the code
+## Core principles
 
-Follow the quick reference and anti-patterns below. When in doubt, consult the reference files.
+1. **Use modern tidyverse patterns** -- Prioritize dplyr 1.2+ features, native pipe, and current APIs
+2. **Write readable code first** -- Optimize only when necessary
+3. **Follow tidyverse style guide** -- Consistent naming, spacing, and structure
 
 ## Quick reference
 
@@ -97,7 +52,7 @@ Use newspaper style: high-level logic first, helpers below. Don't define functio
 ### Grouping
 
 - Use `.by` for per-operation grouping, never `group_by() |> ... |> ungroup()`
-- Never add `ungroup()` before or after `.by` — it always returns ungrouped data
+- Never add `ungroup()` before or after `.by` -- it always returns ungrouped data
 - Consolidate multiple `mutate(.by = x)` calls into one when they share the same `.by`; keep separate only when `.by` differs or a later column depends on an earlier one
 - Place `.by` on its own line for readability
 
@@ -128,7 +83,6 @@ Use `cli::cli_abort()` with problem statement + bullets, never `stop()`.
 - `message()` for info, never `cat()`
 - `map_*()` over `sapply()` for type stability
 - `set.seed()` with date-time, never 42
-- `qs2::qs_save()`/`qs2::qs_read()`, never `qs`
 
 ## Anti-patterns
 
@@ -151,9 +105,8 @@ Use `cli::cli_abort()` with problem statement + bullets, never `stop()`.
 | `filter(x != val \| is.na(x))` | `filter_out(x == val)` |
 | `coalesce(x, default)` | `replace_values(x, NA ~ default)` |
 | `na_if(x, val)` | `replace_values(x, val ~ NA)` |
-| `qs::qsave()` / `qs::qread()` | `qs2::qs_save()` / `qs2::qs_read()` |
 
-## Complete workflow example
+## Example
 
 ```r
 library(tidyverse)
@@ -205,10 +158,9 @@ quarterly <- sales_enriched |>
 ## Best practices
 
 1. **Use `.unmatched = "error"`** in `case_when()` and `recode_values()` for defensive programming
-4. **Place `.by` on its own line** for readability
-5. **Prefer `filter_out()` over negated `filter()`** for NA-safe row removal
-6. **Use `recode_values()` over `case_match()`** (dplyr 1.2+ preferred API)
-7. **Use `replace_when()` over `case_when()` with `.default`** when updating a column in place
-8. **Name variables as nouns, functions as verbs** in snake_case
-9. **Explain "why" in comments**, not "what"
-10. **Use `qs2` for serialization** with `.qs2` extension
+2. **Place `.by` on its own line** for readability
+3. **Prefer `filter_out()` over negated `filter()`** for NA-safe row removal
+4. **Use `recode_values()` over `case_match()`** (dplyr 1.2+ preferred API)
+5. **Use `replace_when()` over `case_when()` with `.default`** when updating a column in place
+6. **Name variables as nouns, functions as verbs** in snake_case
+7. **Explain "why" in comments**, not "what"
diff --git a/tidyverse/tidy-r/references/grouping-examples.md b/tidyverse/tidy-r/references/grouping.md
similarity index 98%
rename from tidyverse/tidy-r/references/grouping-examples.md
rename to tidyverse/tidy-r/references/grouping.md
index 5310ece..633b1ee 100644
--- a/tidyverse/tidy-r/references/grouping-examples.md
+++ b/tidyverse/tidy-r/references/grouping.md
@@ -1,4 +1,4 @@
-# Modern Grouping and Column Operations (dplyr 1.1+)
+# Modern Grouping and Column Operations (dplyr >=1.2.0)
 
 ## Per-operation grouping with .by
 
diff --git a/tidyverse/tidy-r/references/join-examples.md b/tidyverse/tidy-r/references/joins.md
similarity index 98%
rename from tidyverse/tidy-r/references/join-examples.md
rename to tidyverse/tidy-r/references/joins.md
index ffb0a7a..5520ac4 100644
--- a/tidyverse/tidy-r/references/join-examples.md
+++ b/tidyverse/tidy-r/references/joins.md
@@ -1,4 +1,4 @@
-# Modern Join Syntax (dplyr 1.1+)
+# Modern Join Syntax (dplyr >=1.2.0)
 
 ## Use join_by() instead of character vectors
 
diff --git a/tidyverse/tidy-r/references/migration-examples.md b/tidyverse/tidy-r/references/migration.md
similarity index 91%
rename from tidyverse/tidy-r/references/migration-examples.md
rename to tidyverse/tidy-r/references/migration.md
index 0cd6cf9..41bf538 100644
--- a/tidyverse/tidy-r/references/migration-examples.md
+++ b/tidyverse/tidy-r/references/migration.md
@@ -1,4 +1,4 @@
-# Migration: Base R and Old Tidyverse to Modern Patterns
+# Migration: Base R and Old Tidyverse to Modern Patterns (dplyr >=1.2.0)
 
 ## Base R to Modern Tidyverse
 
@@ -28,7 +28,7 @@ substr(text, 1, 5)               # -> str_sub(text, 1, 5)
 nchar(text)                      # -> str_length(text)
 strsplit(text, ",")              # -> str_split(text, ",")
 tolower(text)                    # -> str_to_lower(text)
-sprintf("Hello %s", name)       # -> str_glue("Hello {name}")
+sprintf("Hello %s", name)        # -> str_glue("Hello {name}")
 ```
 
 ## Old to New Tidyverse Patterns
@@ -46,7 +46,7 @@ map(x, function(x) x + 1)       # -> map(x, \(x) x + 1)
 map(x, ~ .x + 1)                # -> map(x, \(x) x + 1)
 ```
 
-### Grouping (dplyr 1.1+)
+### Grouping (dplyr >=1.2.0)
 
 ```r
 group_by(data, x) |>
@@ -78,7 +78,7 @@ summarise(data, x, .groups = "drop") # -> reframe(data, x)
 gather()/spread()                # -> pivot_longer()/pivot_wider()
 ```
 
-### String separation (tidyr 1.3+)
+### String separation (tidyr >=1.3.0)
 
 ```r
 separate(col, into = c("a", "b"))
@@ -88,7 +88,7 @@ extract(col, into = "x", regex)
 # -> separate_wider_regex(col, patterns = c(x = regex))
 ```
 
-### Superseded purrr functions (purrr 1.0+)
+### Superseded purrr functions (purrr >=1.0.0)
 
 ```r
 map_dfr(x, f)                    # -> map(x, f) |> list_rbind()
@@ -98,7 +98,7 @@ pmap_dfr(list, f)                # -> pmap(list, f) |> list_rbind()
 imap_dfr(x, f)                   # -> imap(x, f) |> list_rbind()
 ```
 
-### Recoding and replacing (dplyr 1.2+)
+### Recoding and replacing (dplyr >=1.2.0)
 
 ```r
 case_match(x, val ~ result)      # -> recode_values(x, val ~ result)
@@ -118,7 +118,7 @@ na_if(x, val)                    # -> replace_values(x, val ~ NA)
 tidyr::replace_na(x, default)    # -> replace_values(x, NA ~ default)
 ```
 
-### Filter family (dplyr 1.2+)
+### Filter family (dplyr >=1.2.0)
 
 ```r
 # Dropping rows with NA-safe negation
@@ -138,7 +138,7 @@ qs::qsave(x, "file.qs")         # -> qs2::qs_save(x, "file.qs2")
 qs::qread("file.qs")            # -> qs2::qs_read("file.qs2")
 ```
 
-### Defunct in dplyr 1.2 (now errors)
+### Defunct in dplyr >=1.2.0 (now errors)
 
 ```r
 # Underscored SE verbs (defunct since 1.2, deprecated since 0.7)
diff --git a/tidyverse/tidy-r/references/recode-replace-examples.md b/tidyverse/tidy-r/references/recode-replace.md
similarity index 98%
rename from tidyverse/tidy-r/references/recode-replace-examples.md
rename to tidyverse/tidy-r/references/recode-replace.md
index 6c95616..9889a8b 100644
--- a/tidyverse/tidy-r/references/recode-replace-examples.md
+++ b/tidyverse/tidy-r/references/recode-replace.md
@@ -1,4 +1,4 @@
-# Recoding, Replacing, and Filtering (dplyr 1.2+)
+# Recoding, Replacing, and Filtering (dplyr >=1.2.0)
 
 dplyr 1.2 introduced a family of functions for recoding and replacing values, and for NA-safe filtering. These replace older patterns (`case_match`, `recode`, `coalesce`, `na_if`, negated filters).
 
diff --git a/tidyverse/tidy-r/references/stringr-examples.md b/tidyverse/tidy-r/references/stringr.md
similarity index 100%
rename from tidyverse/tidy-r/references/stringr-examples.md
rename to tidyverse/tidy-r/references/stringr.md

From 698c090445641a3916adc477bb57977d0cb6a470 Mon Sep 17 00:00:00 2001
From: statzhero <panoramadata@gmail.com>
Date: Tue, 7 Apr 2026 16:22:19 -0400
Subject: [PATCH 4/7] Remove linebreaks

---
 tidyverse/tidy-r/SKILL.md | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/tidyverse/tidy-r/SKILL.md b/tidyverse/tidy-r/SKILL.md
index 918b3d0..8e1fbea 100644
--- a/tidyverse/tidy-r/SKILL.md
+++ b/tidyverse/tidy-r/SKILL.md
@@ -1,12 +1,7 @@
 ---
 name: tidy-r
 description: >
-  Modern tidyverse patterns, style guide, and migration guidance for R development.
-  Use this skill when writing R code, reviewing tidyverse code, updating legacy R
-  code to modern patterns, or enforcing consistent style. Covers native pipe usage,
-  join_by() syntax, .by grouping, pick/across/reframe, filter_out/when_any/when_all,
-  recode_values/replace_values/replace_when, tidy selection, stringr, naming
-  conventions, and migration from base R or older tidyverse APIs.
+  Modern tidyverse patterns, style guide, and migration guidance for R development. Use this skill when writing R code, reviewing tidyverse code, updating legacy R code to modern patterns, or enforcing consistent style. Covers native pipe usage, join_by() syntax, .by grouping, pick/across/reframe, filter_out/when_any/when_all, recode_values/replace_values/replace_when, tidy selection, stringr, naming conventions, and migration from base R or older tidyverse APIs.
 metadata:
   r_version: ">=4.5.0"
   tidyverse_version: ">=2.0.0"
@@ -61,7 +56,7 @@ Use newspaper style: high-level logic first, helpers below. Don't define functio
 - Use `join_by()`, never `c("a" = "b")`
 - Use `relationship`, `unmatched`, `na_matches` for quality control
 
-### Recoding and replacing (dplyr 1.2+)
+### Recoding and replacing (dplyr >=1.2.0)
 
 | Task | Function |
 |------|----------|
@@ -160,7 +155,7 @@ quarterly <- sales_enriched |>
 1. **Use `.unmatched = "error"`** in `case_when()` and `recode_values()` for defensive programming
 2. **Place `.by` on its own line** for readability
 3. **Prefer `filter_out()` over negated `filter()`** for NA-safe row removal
-4. **Use `recode_values()` over `case_match()`** (dplyr 1.2+ preferred API)
+4. **Use `recode_values()` over `case_match()`** (dplyr >=1.2.0 preferred API)
 5. **Use `replace_when()` over `case_when()` with `.default`** when updating a column in place
 6. **Name variables as nouns, functions as verbs** in snake_case
 7. **Explain "why" in comments**, not "what"

From 73950f1ed18d943773c2ccf5a5e0387d1dc59fb9 Mon Sep 17 00:00:00 2001
From: statzhero <panoramadata@gmail.com>
Date: Tue, 7 Apr 2026 17:04:05 -0400
Subject: [PATCH 5/7] Soften grouping stance

---
 tidyverse/tidy-r/SKILL.md                | 14 +++++++-------
 tidyverse/tidy-r/references/grouping.md  |  4 ++--
 tidyverse/tidy-r/references/migration.md |  2 +-
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/tidyverse/tidy-r/SKILL.md b/tidyverse/tidy-r/SKILL.md
index 8e1fbea..228d2e7 100644
--- a/tidyverse/tidy-r/SKILL.md
+++ b/tidyverse/tidy-r/SKILL.md
@@ -46,7 +46,7 @@ Use newspaper style: high-level logic first, helpers below. Don't define functio
 
 ### Grouping
 
-- Use `.by` for per-operation grouping, never `group_by() |> ... |> ungroup()`
+- Prefer `.by` for per-operation grouping; use `group_by()` when grouping must persist across multiple operations
 - Never add `ungroup()` before or after `.by` -- it always returns ungrouped data
 - Consolidate multiple `mutate(.by = x)` calls into one when they share the same `.by`; keep separate only when `.by` differs or a later column depends on an earlier one
 - Place `.by` on its own line for readability
@@ -88,12 +88,11 @@ Use `cli::cli_abort()` with problem statement + bullets, never `stop()`.
 | `by = c("a" = "b")` | `by = join_by(a == b)` |
 | `multiple = "error"` in joins | `relationship = "many-to-one"` (or `"one-to-one"`) |
 | `sapply()` | `map_*()` (type-stable) |
-| `group_by() \|> ... \|> ungroup()` | `.by` argument |
+| `group_by() \|> ... \|> ungroup()` for single operations | `.by` argument |
 | `ungroup() \|> mutate(..., .by = x)` | `mutate(..., .by = x)` (`.by` ignores existing groups) |
 | Repeated `mutate(.by = x)` with same `.by` | Single `mutate()` with all columns and one `.by` |
 | `cat()` for messages | `message()` or `cli::cli_inform()` |
 | `stop()` for errors | `cli::cli_abort()` |
-| `distinct(id)` | `distinct(id, .keep_all = TRUE)` |
 | `mean(x, na.rm = TRUE)` | `mean(x)` with tidyna loaded |
 | `case_match(x, ...)` | `recode_values(x, ...)` |
 | `recode(x, ...)` | `recode_values(x, ...)` or `replace_values(x, ...)` |
@@ -141,10 +140,11 @@ quarterly <- sales_enriched |>
     .by = c(region_name, quarter)
   ) |>
   mutate(
-    performance = revenue |>
-      replace_when(
-        total_revenue > 100000 ~ "high",
-        total_revenue > 50000 ~ "medium"
+    performance = total_revenue |>
+      recode_values(
+        \(x) x > 100000 ~ "high",
+        \(x) x > 50000 ~ "medium",
+        .default = "low"
       )
   ) |>
   arrange(region_name, quarter)
diff --git a/tidyverse/tidy-r/references/grouping.md b/tidyverse/tidy-r/references/grouping.md
index 633b1ee..61830f9 100644
--- a/tidyverse/tidy-r/references/grouping.md
+++ b/tidyverse/tidy-r/references/grouping.md
@@ -2,7 +2,7 @@
 
 ## Per-operation grouping with .by
 
-The `.by` argument replaces the old `group_by() |> ... |> ungroup()` pattern. Results are always ungrouped.
+The `.by` argument is preferred for per-operation grouping. Use `group_by()` when grouping must persist across multiple operations. `.by` results are always ungrouped.
 
 ### Basic usage
 
@@ -60,7 +60,7 @@ data |>
   summarise(mean_value = mean(value), .by = category)
 ```
 
-### Avoid - old persistent grouping pattern
+### Avoid for single operations - use .by instead
 
 ```r
 # Avoid
diff --git a/tidyverse/tidy-r/references/migration.md b/tidyverse/tidy-r/references/migration.md
index 41bf538..fb3e52a 100644
--- a/tidyverse/tidy-r/references/migration.md
+++ b/tidyverse/tidy-r/references/migration.md
@@ -8,7 +8,7 @@
 subset(data, condition)          # -> filter(data, condition)
 data[order(data$x), ]            # -> arrange(data, x)
 aggregate(x ~ y, data, mean)     # -> summarise(data, mean(x), .by = y)
-merge(x, y, by = "id")           # -> left_join(x, y, by = join_by(id))
+merge(x, y, by = "id")           # -> inner_join(x, y, by = join_by(id))
 ```
 
 ### Functional programming

From 2de0d63bca290195f656555e5ec1d81a93e7981a Mon Sep 17 00:00:00 2001
From: statzhero <panoramadata@gmail.com>
Date: Tue, 7 Apr 2026 17:22:45 -0400
Subject: [PATCH 6/7] Audit and improvements

---
 tidyverse/tidy-r/SKILL.md                     |  34 +----
 tidyverse/tidy-r/references/grouping.md       |  48 ++++++-
 tidyverse/tidy-r/references/joins.md          |  33 -----
 tidyverse/tidy-r/references/migration.md      |  10 ++
 tidyverse/tidy-r/references/stringr.md        |  17 ++-
 tidyverse/tidy-r/references/tidyselect.md     | 117 ++++++++++++++++++
 .../tidy-r/references/tidyverse-style.md      |   5 +-
 7 files changed, 196 insertions(+), 68 deletions(-)
 create mode 100644 tidyverse/tidy-r/references/tidyselect.md

diff --git a/tidyverse/tidy-r/SKILL.md b/tidyverse/tidy-r/SKILL.md
index 228d2e7..8190087 100644
--- a/tidyverse/tidy-r/SKILL.md
+++ b/tidyverse/tidy-r/SKILL.md
@@ -1,7 +1,7 @@
 ---
 name: tidy-r
 description: >
-  Modern tidyverse patterns, style guide, and migration guidance for R development. Use this skill when writing R code, reviewing tidyverse code, updating legacy R code to modern patterns, or enforcing consistent style. Covers native pipe usage, join_by() syntax, .by grouping, pick/across/reframe, filter_out/when_any/when_all, recode_values/replace_values/replace_when, tidy selection, stringr, naming conventions, and migration from base R or older tidyverse APIs.
+  Modern tidyverse patterns, style guide, and migration guidance for R development. Use this skill when writing R code, reviewing tidyverse code, updating legacy R code, or enforcing consistent style. Covers native pipe usage, join_by() syntax, .by grouping, pick/across/reframe, filter_out/when_any/when_all, recode_values/replace_values/replace_when, tidyselect helpers, .data/.env pronouns, stringr, naming conventions, and readr.
 metadata:
   r_version: ">=4.5.0"
   tidyverse_version: ">=2.0.0"
@@ -22,6 +22,7 @@ Consult the appropriate reference file for detailed patterns and examples:
 | **Grouping & columns** | [grouping.md](references/grouping.md) | `.by`, `group_by`, `across`, `pick`, `reframe`, column operations |
 | **Recoding & replacing** | [recode-replace.md](references/recode-replace.md) | `recode_values`, `replace_values`, `replace_when`, `filter_out`, `when_any`, `when_all` |
 | **Strings** | [stringr.md](references/stringr.md) | String manipulation, regex, `str_*` functions, text processing |
+| **Tidy selection** | [tidyselect.md](references/tidyselect.md) | Column selection helpers, `where()`, `all_of()`, `any_of()`, boolean ops, `.data`/`.env` pronouns |
 | **Style** | [tidyverse-style.md](references/tidyverse-style.md) | Naming, formatting, spacing, error messages, `cli::cli_abort` |
 | **Migration** | [migration.md](references/migration.md) | Updating old code, base R conversion, deprecated functions |
 
@@ -38,6 +39,7 @@ For requests that span multiple topics (e.g., "rewrite this old code" touches mi
 ### Pipe and lambda
 
 - Always `|>`, never `%>%`
+- Use `_` placeholder for non-first arguments: `x |> f(1, y = _)`. The placeholder must be named and used exactly once.
 - Always `\(x)`, never `function(x)` or `~` in map/keep/etc.
 
 ### Code organization
@@ -79,27 +81,6 @@ Use `cli::cli_abort()` with problem statement + bullets, never `stop()`.
 - `map_*()` over `sapply()` for type stability
 - `set.seed()` with date-time, never 42
 
-## Anti-patterns
-
-| Avoid | Use instead |
-|-------|-------------|
-| `%>%` | `|>` |
-| `function(x)` or `~` | `\(x)` |
-| `by = c("a" = "b")` | `by = join_by(a == b)` |
-| `multiple = "error"` in joins | `relationship = "many-to-one"` (or `"one-to-one"`) |
-| `sapply()` | `map_*()` (type-stable) |
-| `group_by() \|> ... \|> ungroup()` for single operations | `.by` argument |
-| `ungroup() \|> mutate(..., .by = x)` | `mutate(..., .by = x)` (`.by` ignores existing groups) |
-| Repeated `mutate(.by = x)` with same `.by` | Single `mutate()` with all columns and one `.by` |
-| `cat()` for messages | `message()` or `cli::cli_inform()` |
-| `stop()` for errors | `cli::cli_abort()` |
-| `mean(x, na.rm = TRUE)` | `mean(x)` with tidyna loaded |
-| `case_match(x, ...)` | `recode_values(x, ...)` |
-| `recode(x, ...)` | `recode_values(x, ...)` or `replace_values(x, ...)` |
-| `filter(x != val \| is.na(x))` | `filter_out(x == val)` |
-| `coalesce(x, default)` | `replace_values(x, NA ~ default)` |
-| `na_if(x, val)` | `replace_values(x, val ~ NA)` |
-
 ## Example
 
 ```r
@@ -150,12 +131,3 @@ quarterly <- sales_enriched |>
   arrange(region_name, quarter)
 ```
 
-## Best practices
-
-1. **Use `.unmatched = "error"`** in `case_when()` and `recode_values()` for defensive programming
-2. **Place `.by` on its own line** for readability
-3. **Prefer `filter_out()` over negated `filter()`** for NA-safe row removal
-4. **Use `recode_values()` over `case_match()`** (dplyr >=1.2.0 preferred API)
-5. **Use `replace_when()` over `case_when()` with `.default`** when updating a column in place
-6. **Name variables as nouns, functions as verbs** in snake_case
-7. **Explain "why" in comments**, not "what"
diff --git a/tidyverse/tidy-r/references/grouping.md b/tidyverse/tidy-r/references/grouping.md
index 61830f9..c576c8d 100644
--- a/tidyverse/tidy-r/references/grouping.md
+++ b/tidyverse/tidy-r/references/grouping.md
@@ -145,6 +145,22 @@ data |>
   )
 ```
 
+## .by with tidyr::fill()
+
+tidyr supports `.by` in `fill()`, matching the dplyr pattern:
+
+```r
+# Good - per-operation grouping
+data |>
+  tidyr::fill(value, .by = group, .direction = "down")
+
+# Avoid - group_by/ungroup wrapper
+data |>
+  group_by(group) |>
+  tidyr::fill(value, .direction = "down") |>
+  ungroup()
+```
+
 ## pick() for column selection
 
 Use `pick()` inside data-masking functions to select columns by name or tidyselect helpers:
@@ -232,7 +248,7 @@ my_summary <- function(data, summary_var) {
 }
 ```
 
-### Character vectors use .data[[]]
+### Character vectors in data-masked contexts use .data[[]]
 
 ```r
 for (var in names(mtcars)) {
@@ -240,6 +256,36 @@ for (var in names(mtcars)) {
 }
 ```
 
+### Character vectors in tidy-select contexts use all_of()/any_of()
+
+The `across(all_of())` bridge is the canonical pattern for passing character vectors into tidy-select:
+
+```r
+vars <- c("mpg", "wt", "hp")
+
+# Good - across(all_of()) for character vectors
+mtcars |>
+  summarise(across(all_of(vars), mean))
+
+# Good - any_of() when some columns may not exist
+mtcars |>
+  select(any_of(vars))
+
+# Avoid - .data[[]] inside tidy-select (deprecated)
+mtcars |>
+  select(.data[["mpg"]], .data[["wt"]])
+```
+
+### Access calling-environment variables with .env
+
+Use `.env$var` to disambiguate when a local variable shares a name with a column:
+
+```r
+threshold <- 10
+data |>
+  filter(value > .env$threshold)
+```
+
 ### Multiple columns use across()
 
 ```r
diff --git a/tidyverse/tidy-r/references/joins.md b/tidyverse/tidy-r/references/joins.md
index 5520ac4..7ea7140 100644
--- a/tidyverse/tidy-r/references/joins.md
+++ b/tidyverse/tidy-r/references/joins.md
@@ -88,36 +88,3 @@ sales |>
   )
 ```
 
-## Logging joins with tidylog
-
-Use `tidylog::` prefix for joins to verify expected behavior. Call directly without loading the package.
-
-```r
-result <- transactions |>
-  tidylog::left_join(companies, by = join_by(company == id))
-
-# tidylog output:
-# left_join: added 2 columns (name, region)
-#            > rows only in x      12
-#            > rows only in y     (3)
-#            > matched rows       988
-#            > rows total        1000
-```
-
-### Interpreting join output
-
-| Output | Meaning |
-|--------|---------|
-| `rows only in x` | Rows in left table with no match (kept as NA in left joins) |
-| `rows only in y` | Rows in right table with no match (in parentheses, dropped in left joins) |
-| `matched rows` | Rows that matched between tables |
-| `rows total` | Final row count after join |
-
-### When to use tidylog
-
-- **Always for joins** to see how many rows matched, duplicated, or were dropped
-- **Critical filters** with `tidylog::filter()` to verify expected row counts
-- **Critical mutates** with `tidylog::mutate()` to verify expected changes
-- **Any operation where silent data loss is a risk**
-
-Don't use tidylog in production code, inside functions, or loops where output would be too verbose. It's for interactive verification only.
diff --git a/tidyverse/tidy-r/references/migration.md b/tidyverse/tidy-r/references/migration.md
index fb3e52a..65d732a 100644
--- a/tidyverse/tidy-r/references/migration.md
+++ b/tidyverse/tidy-r/references/migration.md
@@ -131,6 +131,16 @@ filter(cond1 | cond2 | cond3)    # -> filter(when_any(cond1, cond2, cond3))
 filter(cond1 & cond2 & cond3)    # -> filter(when_all(cond1, cond2, cond3))
 ```
 
+### Reading data
+
+```r
+read.csv("file.csv")                 # -> read_csv("file.csv")  # tibble, faster, better type detection
+read.csv("file.csv", sep = "\t")     # -> read_tsv("file.csv")
+read.csv2("file.csv")                # -> read_csv2("file.csv")  # semicolon-delimited
+```
+
+For large files (>100 MB), `vroom::vroom()` is faster than `read_csv()`. For small files the difference is negligible.
+
 ### Serialization
 
 ```r
diff --git a/tidyverse/tidy-r/references/stringr.md b/tidyverse/tidy-r/references/stringr.md
index a56c5a9..fecadfd 100644
--- a/tidyverse/tidy-r/references/stringr.md
+++ b/tidyverse/tidy-r/references/stringr.md
@@ -53,13 +53,16 @@ str_length(text)                      # character count
 str_trunc(text, 20)                   # truncate with ellipsis
 ```
 
-### Formatting
+### Formatting and case conversion
 
 ```r
 str_to_lower(text)                    # lowercase
 str_to_upper(text)                    # uppercase
 str_to_title(text)                    # title case
 str_to_sentence(text)                # sentence case
+str_to_snake(text)                    # snake_case (stringr >=1.6.0)
+str_to_camel(text)                    # camelCase (stringr >=1.6.0)
+str_to_kebab(text)                    # kebab-case (stringr >=1.6.0)
 str_trim(text)                        # remove leading/trailing whitespace
 str_squish(text)                      # trim + collapse internal whitespace
 str_pad(text, 10, side = "left")      # pad to fixed width
@@ -73,6 +76,14 @@ str_glue("Hello {name}, you scored {score}!")
 str_glue_data(df, "{name}: {value}")
 ```
 
+### Case-insensitive matching (stringr >=1.6.0)
+
+```r
+str_ilike(text, "hello*")              # SQL ILIKE-style, case-insensitive glob
+# Replaces: str_like(text, "hello*", ignore_case = TRUE)
+# str_like() ignore_case argument is deprecated; use str_ilike() instead
+```
+
 ## Pattern helpers
 
 Use these for clarity about what kind of matching you intend:
@@ -98,5 +109,9 @@ str_detect(text, boundary("word"))     # word boundaries
 | `str_to_lower(text)` | `tolower(text)` | |
 | `str_to_upper(text)` | `toupper(text)` | |
 | `str_to_title(text)` | `tools::toTitleCase(text)` | |
+| `str_to_snake(text)` | — | stringr >=1.6.0 |
+| `str_to_camel(text)` | — | stringr >=1.6.0 |
+| `str_to_kebab(text)` | — | stringr >=1.6.0 |
+| `str_ilike(text, "pat*")` | — | case-insensitive glob, stringr >=1.6.0 |
 | `str_trim(text)` | `trimws(text)` | |
 | `str_glue("Hello {x}")` | `sprintf("Hello %s", x)` | More readable |
diff --git a/tidyverse/tidy-r/references/tidyselect.md b/tidyverse/tidy-r/references/tidyselect.md
new file mode 100644
index 0000000..64fbff6
--- /dev/null
+++ b/tidyverse/tidy-r/references/tidyselect.md
@@ -0,0 +1,117 @@
+# Tidy Selection
+
+Tidy selection is the column selection language used by `select()`, `relocate()`, `rename()`, `across()`, `pick()`, `pivot_longer()`, `pivot_wider()`, and other tidyverse functions that accept column specifications.
+
+## Selection helpers
+
+```r
+starts_with("x")          # columns starting with "x"
+ends_with("_id")           # columns ending with "_id"
+contains("score")          # columns containing "score"
+matches("^x\\d+$")        # columns matching a regex
+num_range("x", 1:5)       # x1, x2, x3, x4, x5
+last_col()                 # rightmost column
+everything()               # all columns
+where(is.numeric)          # columns satisfying a predicate
+```
+
+## Selecting by name
+
+```r
+data |> select(name, age)                      # by name
+data |> select(name:age)                       # range
+data |> select(!age)                           # exclude
+data |> select(where(is.numeric) & !id)        # boolean combination
+```
+
+## Boolean algebra on selections
+
+Selections support `!` (complement), `&` (intersection), and `|` (union):
+
+```r
+data |> select(where(is.numeric) & !c(id, year))
+data |> select(starts_with("x") | ends_with("_total"))
+data |> select(!where(is.character))
+```
+
+## Character vectors: all_of() and any_of()
+
+Use `all_of()` for strict matching (errors if a name is missing) and `any_of()` for permissive matching (silently ignores missing names):
+
+```r
+vars <- c("mpg", "wt", "hp")
+
+data |> select(all_of(vars))       # errors if any name absent
+data |> select(any_of(vars))       # ignores missing names
+```
+
+### The across(all_of()) bridge pattern
+
+This is the canonical way to pass character vectors into data-masked contexts that use tidy selection:
+
+```r
+vars <- c("revenue", "cost")
+
+data |>
+  summarise(across(all_of(vars), mean))
+
+data |>
+  mutate(across(all_of(vars), \(x) x / 1000))
+```
+
+## .data and .env pronouns
+
+### .data in data-masked contexts
+
+Use `.data[[var]]` when the column name is a string variable inside data-masked functions (`filter`, `mutate`, `summarise`):
+
+```r
+var <- "mpg"
+mtcars |> filter(.data[[var]] > 20)
+```
+
+### .data is deprecated in tidy-select contexts
+
+Do NOT use `.data$col` or `.data[[var]]` inside tidy-select functions (`select`, `across`, `pick`). Use string names or `all_of()`/`any_of()` instead:
+
+```r
+var <- "mpg"
+
+# Good
+data |> select(all_of(var))
+data |> select(any_of(var))
+
+# Avoid (deprecated)
+data |> select(.data[[var]])
+```
+
+### .env for environment variables
+
+Use `.env$var` to access variables from the calling environment when they might collide with column names:
+
+```r
+threshold <- 10
+
+# Good - unambiguous
+data |> filter(value > .env$threshold)
+
+# Risky - if data has a "threshold" column, it shadows the local variable
+data |> filter(value > threshold)
+```
+
+`.env` is most useful inside functions where you cannot control what columns the data has:
+
+```r
+filter_above <- function(data, col, cutoff) {
+  data |> filter({{ col }} > .env$cutoff)
+}
+```
+
+## Tidy selection vs data masking
+
+| Context | Used by | Column selection | Character vector bridge |
+|---------|---------|-----------------|----------------------|
+| **Tidy selection** | `select`, `across`, `pick`, `relocate`, `pivot_*` | helpers like `where()`, `starts_with()` | `all_of(vars)` |
+| **Data masking** | `filter`, `mutate`, `summarise`, `arrange` | `.data[[var]]` | `across(all_of(vars))` |
+
+The two contexts have different rules. Tidy selection uses helper functions; data masking evaluates R expressions in the data frame environment. `{{ }}` (embrace) works in both contexts for forwarding a single function argument.
diff --git a/tidyverse/tidy-r/references/tidyverse-style.md b/tidyverse/tidy-r/references/tidyverse-style.md
index 304b5bc..95c098f 100644
--- a/tidyverse/tidy-r/references/tidyverse-style.md
+++ b/tidyverse/tidy-r/references/tidyverse-style.md
@@ -140,8 +140,8 @@ check_input <- function(x) {
 
 ```r
 long_function_name <- function(
-    a = "argument",
-    b = "argument"
+  a = "argument",
+  b = "argument"
 ) {
   # body
 }
@@ -184,6 +184,7 @@ data <- data |> filter(!is.na(value))
 - Use `&&` and `||` in conditions (not `&` and `|`)
 - Use `TRUE`/`FALSE` (not `T`/`F`)
 - Never use semicolons
+- With `tidyna` loaded, `na.rm = TRUE` is the default for common aggregation functions -- write `mean(x)` instead of `mean(x, na.rm = TRUE)`
 
 ## Error Messages
 

From 416ab0c22ef4ad173d7e3a1c5aba04ffdb448c4b Mon Sep 17 00:00:00 2001
From: statzhero <panoramadata@gmail.com>
Date: Tue, 7 Apr 2026 18:49:03 -0400
Subject: [PATCH 7/7] Fix example

---
 tidyverse/tidy-r/SKILL.md | 64 +++++++++++++++------------------------
 1 file changed, 25 insertions(+), 39 deletions(-)

diff --git a/tidyverse/tidy-r/SKILL.md b/tidyverse/tidy-r/SKILL.md
index 8190087..4340cac 100644
--- a/tidyverse/tidy-r/SKILL.md
+++ b/tidyverse/tidy-r/SKILL.md
@@ -86,48 +86,34 @@ Use `cli::cli_abort()` with problem statement + bullets, never `stop()`.
 ```r
 library(tidyverse)
 
-# Read and clean data
-sales <- read_csv("data/sales.csv") |>
-  rename(
-    region = Region,
-    product = Product,
-    revenue = Revenue,
-    date = Date
-  ) |>
-  mutate(
-    quarter = quarter(date),
-    product = product |>
-      replace_values(
-        c("Widget A", "WidgetA") ~ "Widget A",
-        c("Widget B", "WidgetB") ~ "Widget B"
-      )
+penguins <- penguins |>
+  filter_out(is.na(sex)) |>
+  mutate(size = case_when(
+    body_mass > 4500 ~ "large",
+    body_mass > 3500 ~ "medium",
+    .default = "small"
+  ))
+
+# Coordinates for spatial join below
+island_coords <- tribble(
+  ~island,      ~latitude,
+  "Biscoe",     -65.5,
+  "Dream",      -64.7,
+  "Torgersen",  -64.8
+)
+
+island_summary <- penguins |>
+  summarise(
+    mean_flipper = mean(flipper_len),
+    mean_mass = mean(body_mass),
+    n = n(),
+    .by = c(species, island)
   ) |>
-  filter_out(is.na(revenue))
-
-# Enrich with lookup table
-sales_enriched <- sales |>
   left_join(
-    regions,
-    by = join_by(region == region_code),
+    island_coords,
+    by = join_by(island),
     unmatched = "error"
-  )
-
-# Summarise by group
-quarterly <- sales_enriched |>
-  summarise(
-    total_revenue = sum(revenue),
-    avg_revenue = mean(revenue),
-    n_transactions = n(),
-    .by = c(region_name, quarter)
-  ) |>
-  mutate(
-    performance = total_revenue |>
-      recode_values(
-        \(x) x > 100000 ~ "high",
-        \(x) x > 50000 ~ "medium",
-        .default = "low"
-      )
   ) |>
-  arrange(region_name, quarter)
+  arrange(species, island)
 ```