Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# Editors
.vscode/

# History files
.Rhistory
.Rapp.history
Expand Down
1 change: 1 addition & 0 deletions Syndemics/.Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@
^LICENSE\.md$
^\.github$
inst/data
.air.toml
31 changes: 31 additions & 0 deletions Syndemics/.lintr
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
linters: all_linters(
assignment_linter = assignment_linter(operator = c("<-"), allow_trailing = FALSE),
backport_linter = backport_linter("4.0.0"),
brace_linter = brace_linter(allow_single_line = TRUE),
indentation_linter = indentation_linter(indent = 4L),
line_length_linter = line_length_linter(length = 80L),
object_name_linter = object_name_linter(styles = c("snake_case", "lowercase", "SNAKE_CASE")),
return_linter = return_linter(return_style = "explicit", allow_implicit_else = FALSE),
cyclocomp_linter = cyclocomp_linter(complexity_limit = 10L),
pipe_consistency_linter = pipe_consistency_linter("auto"),
unused_import_linter = unused_import_linter(interpret_glue = TRUE),
undesirable_function_linter = undesirable_function_linter(
fun = modify_defaults(
defaults = default_undesirable_functions,
source = NULL,
library = NULL,
require = NULL)),
object_usage_linter = NULL, # This doesn't work well with dplyr
condition_call_linter = NULL,
condition_message_linter = NULL,
consecutive_mutate_linter = NULL,
expect_identical_linter = NULL,
implicit_integer_linter = NULL,
library_call_linter = NULL,
literal_coercion_linter = NULL,
print_linter = NULL, # This one is the opposite of our style guide
sample_int_linter = NULL, # I disagree with this one in terms of clarity
strings_as_factors_linter = NULL, # Not relevant given > R 4.0.0
unnecessary_placeholder_linter = NULL # I disagree with the style
)
exclusions: list()
4 changes: 3 additions & 1 deletion Syndemics/DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,9 @@ Suggests:
doParallel,
foreach,
knitr,
rmarkdown
rmarkdown,
testthat (>= 3.0.0)
VignetteBuilder: knitr
URL: https://github.com/SyndemicsLab/Syndemics
BugReports: https://github.com/SyndemicsLab/Syndemics/issues
Config/testthat/edition: 3
1 change: 1 addition & 0 deletions Syndemics/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ importFrom(data.table,dcast)
importFrom(data.table,fread)
importFrom(data.table,rbindlist)
importFrom(dplyr,bind_rows)
importFrom(dplyr,filter)
importFrom(dplyr,left_join)
importFrom(dplyr,mutate)
importFrom(dplyr,rename)
Expand Down
108 changes: 0 additions & 108 deletions Syndemics/R/buildLifeTables.R

This file was deleted.

142 changes: 142 additions & 0 deletions Syndemics/R/build_life_tables.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
#' Taking in the CDC NVSS Yearly life tables, extract and build the background mortality table expected by RESPOND
#'
#' Input: CDC NVSS Life Tables, output file name, stratifications
#' Output: Resulting background mortality compatible with RESPOND
#'
#' @param files A set of files to extract the background mortality out of
#' @param outputfile The name of the file to output the background mortality
#' @param races A list of races
#' @param sexes A list of sexes
#' @param age_groups A list of age groups
#' @param bin_size The size ages are grouped by
#'
#' @import data.table
#' @importFrom utils write.csv
#' @export

build_background_mortality_file <- function(
files,
outputfile,
races = c("black", "hispanic", "white"),
sexes = c("female", "male"),
age_groups = c("1_20", "21_40", "41_60", "61_80", "81_100"),
bin_size = 20
) {
background_mortality <- lapply(
files,
extract_background_mortality,
bin_size = bin_size,
age_groups = age_groups
)
result_table <- create_and_fill_table(
background_mortality,
races,
sexes,
age_groups
)
if (!missing(outputfile)) {
write.csv(result_table, outputfile, row.names = FALSE)
}

return(result_table)
}

#' Function used to extract background mortality values based on age from a single yearly CDC NVSS life table
#'
#' @param file_path The path to the CDC NVSS Life Table
#' @param bin_size The size ages are grouped by
#' @param age_groups A list of age groups
#'
#' @import data.table
#' @importFrom readxl read_excel
#' @keywords internal

extract_background_mortality <- function(
file_path,
bin_size = 20,
age_groups = c("1_20", "21_40", "41_60", "61_80", "81_100")
) {
data <- readxl::read_excel(file_path, skip = 1)
dt <- as.data.table(data)[(2:101)]

# Rename columns to standard names
setnames(
dt,
"Probability of dying between ages x and x + 1",
"year_prob",
skip_absent = TRUE
)
setnames(
dt,
"Number dying between ages x and x + 1",
"year_deaths",
skip_absent = TRUE
)

#Data table bindings
year_prob <- year_deaths <- V1 <- NULL

dt[, year_prob := as.numeric(year_prob)][,
year_deaths := as.numeric(year_deaths)
]

bin_groups <- (seq_len(nrow(dt)) - 1) %/% bin_size
deaths_by_group <- dt[, sum(year_deaths), by = bin_groups][, V1]

# 100k originates from the CDC NVSS data - reported in rates per 100,000 persons
weekly_rates <- (deaths_by_group / 100000) / 52
weekly_probs <- 1 - exp(-weekly_rates)

result <- data.table(
agegrp = age_groups,
weekly_probability = weekly_probs
)

return(result)
}

#' Create and fill the table with mortality values for all demographic combinations
#'
#' @param background_mortality List of extracted background mortality data.tables
#' @param races A list of races
#' @param sexes A list of sexes
#' @param age_groups A list of age groups
#'
#' @import data.table
#' @keywords internal

create_and_fill_table <- function(
background_mortality,
races = c("black", "hispanic", "white"),
sexes = c("female", "male"),
age_groups = c("1_20", "21_40", "41_60", "61_80", "81_100")
) {
#Data table bindings
agegrp <- NULL

combinations <- expand.grid(
races = races,
sexes = sexes,
stringsAsFactors = FALSE
)
combinations <- as.data.table(combinations)
result_table <- combinations[rep(
seq_len(nrow(combinations)),
each = length(age_groups)
)]
result_table[, agegrp := rep(age_groups, times = nrow(combinations))]
n_race_sex_combos <- length(races) * length(sexes)

mortality_data <- data.table()
for (i in seq_along(background_mortality)) {
group_index <- ((i - 1) %% n_race_sex_combos) + 1
bg_mort <- background_mortality[[i]]
demo_info <- combinations[group_index]
mortality_group <- cbind(demo_info[rep(1, nrow(bg_mort))], bg_mort)
mortality_data <- rbind(mortality_data, mortality_group)
}

setorder(mortality_data, races, sexes, agegrp)

return(mortality_data)
}
Loading
Loading