diff --git a/.gitignore b/.gitignore index ff7e71b..84662a9 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ /doc/ /Meta/ +Rplots.pdf diff --git a/CLAUDE.md b/CLAUDE.md index 98639e4..ddc6353 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -250,7 +250,6 @@ Cox-Snell residuals should follow Exp(1) if model is correct. - `tests/testthat/test-diagnostics.R`: Tests for diagnostic methods - `tests/testthat/test-derivatives.R`: Tests for score/Hessian computation - `tests/testthat/test-likelihood_model.R`: Likelihood interface tests -- `vignettes/getting_started.Rmd`: 5-minute quick start - `vignettes/failure_rate.Rmd`: Hazard-based modeling deep dive - `vignettes/custom_distributions.Rmd`: How to create custom distributions - `vignettes/reliability_engineering.Rmd`: Real-world applications diff --git a/DESCRIPTION b/DESCRIPTION index 313bbe3..b6e9f09 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -10,7 +10,8 @@ Description: Flexible framework for specifying survival distributions through curves, proportional hazards with covariates, and other non-standard hazard behaviors. Provides automatic computation of survival, CDF, PDF, quantiles, and sampling. Implements the likelihood model interface for - maximum likelihood estimation with right-censored survival data. + maximum likelihood estimation with right-censored and left-censored + survival data. License: GPL (>= 3) Encoding: UTF-8 Roxygen: list(markdown = TRUE) diff --git a/R/dfr_dist.R b/R/dfr_dist.R index 9277d97..0e29b6a 100644 --- a/R/dfr_dist.R +++ b/R/dfr_dist.R @@ -11,17 +11,28 @@ #' Defaults to "t". #' @param delta_col The column name for event indicators in data frames. #' Uses standard survival analysis convention: 1 = event -#' observed (exact), 0 = right-censored. Defaults to "delta". +#' observed (exact), 0 = right-censored, -1 = left-censored. +#' Defaults to "delta". #' @param cum_haz_rate Optional analytical cumulative hazard function H(t, par). #' If provided, used for faster exact cumulative hazard #' computation instead of numerical integration. #' Should return the integral of rate from 0 to t. #' @param score_fn Optional score function (gradient of log-likelihood). -#' Signature: score_fn(df, par, ...) returning a numeric vector. -#' If NULL, falls back to numerical gradient via numDeriv::grad. +#' Signature: score_fn(df, par, ob_col, delta_col, ...) +#' returning a numeric vector. The ob_col and delta_col arguments +#' indicate which columns in df contain observation times and event +#' indicators. If NULL, falls back to numerical gradient via +#' numDeriv::grad. Analytical score functions that only handle +#' delta in \{0, 1\} are automatically bypassed when left-censored +#' data (delta = -1) is present. #' @param hess_fn Optional Hessian function (second derivatives of log-likelihood). -#' Signature: hess_fn(df, par, ...) returning a matrix. +#' Signature: hess_fn(df, par, ob_col, delta_col, ...) returning +#' a matrix. The ob_col and delta_col arguments indicate which +#' columns in df contain observation times and event indicators. #' If NULL, falls back to numerical Hessian via numDeriv::hessian. +#' Analytical Hessian functions that only handle delta in \{0, 1\} +#' are automatically bypassed when left-censored data (delta = -1) +#' is present. #' @return A `dfr_dist` object that inherits from `likelihood_model`. #' @export dfr_dist <- function(rate, par = NULL, @@ -85,11 +96,13 @@ inv_cdf.dfr_dist <- function(x, ...) { cdf_fn <- cdf(x, ...) function(p, par = NULL, ...) { par <- get_params(par, x$par) - uniroot( - f = function(t) cdf_fn(t, par, ...) - p, - interval = c(0, 1e3), - extendInt = "upX" - )$root + sapply(p, function(pi) { + uniroot( + f = function(t) cdf_fn(t, par, ...) - pi, + interval = c(0, 1e3), + extendInt = "upX" + )$root + }) } } @@ -225,17 +238,19 @@ cum_haz.dfr_dist <- function(x, ...) { function(t, par = NULL, ...) { par <- get_params(par, x$par) - res <- do.call(integrate, - modifyList(integrator, list( - upper = t, - f = function(u) x$rate(u, par, ...)))) - if (res$message != "OK") { - warning(res$message) - } - if (res$abs.error > integrator$abs.tol) { - warning("Absolute error in cumulative hazard is greater than tolerance") - } - res$value + sapply(t, function(ti) { + res <- do.call(integrate, + modifyList(integrator, list( + upper = ti, + f = function(u) x$rate(u, par, ...)))) + if (res$message != "OK") { + warning(res$message) + } + if (res$abs.error > integrator$abs.tol) { + warning("Absolute error in cumulative hazard is greater than tolerance") + } + res$value + }) } } @@ -333,6 +348,7 @@ loglik.dfr_dist <- function(model, ...) { ll <- ll + sum(log1p(-exp(-H_left))) } + if (is.nan(ll) || is.na(ll)) return(-Inf) ll } } @@ -354,7 +370,11 @@ score.dfr_dist <- function(model, ...) { function(df, par = NULL, ...) { par <- get_params(par, model$par) if (!is.null(model$score_fn)) { - return(model$score_fn(df, par, ...)) + delta <- get_delta(df, model$delta_col) + if (!any(delta == -1)) { + return(model$score_fn(df, par, + ob_col = model$ob_col, delta_col = model$delta_col, ...)) + } } numDeriv::grad(function(p) ll_fn(df, par = p, ...), par) } @@ -377,7 +397,11 @@ hess_loglik.dfr_dist <- function(model, ...) { function(df, par = NULL, ...) { par <- get_params(par, model$par) if (!is.null(model$hess_fn)) { - return(model$hess_fn(df, par, ...)) + delta <- get_delta(df, model$delta_col) + if (!any(delta == -1)) { + return(model$hess_fn(df, par, + ob_col = model$ob_col, delta_col = model$delta_col, ...)) + } } numDeriv::hessian(function(p) ll_fn(df, par = p, ...), par) } diff --git a/R/distributions.R b/R/distributions.R index 48248f7..cae9a44 100644 --- a/R/distributions.R +++ b/R/distributions.R @@ -24,7 +24,6 @@ NULL #' #' Creates a DFR distribution with constant failure rate (exponential). #' The exponential distribution is "memoryless" - the hazard does not depend - #' on time, making it appropriate for random failures unrelated to age. #' #' @param lambda Rate parameter (failure rate). If NULL, must be provided @@ -74,12 +73,12 @@ dfr_exponential <- function(lambda = NULL) { cum_haz_rate = function(t, par, ...) { par[[1]] * t }, - score_fn = function(df, par, ...) { - delta <- get_delta(df) - c(sum(delta == 1) / par[[1]] - sum(df$t)) + score_fn = function(df, par, ob_col = "t", delta_col = "delta", ...) { + delta <- get_delta(df, delta_col) + c(sum(delta == 1) / par[[1]] - sum(df[[ob_col]])) }, - hess_fn = function(df, par, ...) { - delta <- get_delta(df) + hess_fn = function(df, par, ob_col = "t", delta_col = "delta", ...) { + delta <- get_delta(df, delta_col) matrix(-sum(delta == 1) / par[[1]]^2, nrow = 1, ncol = 1) }, par = lambda @@ -162,11 +161,11 @@ dfr_weibull <- function(shape = NULL, scale = NULL) { cum_haz_rate = function(t, par, ...) { (t / par[[2]])^par[[1]] }, - score_fn = function(df, par, ...) { + score_fn = function(df, par, ob_col = "t", delta_col = "delta", ...) { k <- par[[1]] sigma <- par[[2]] - t <- df$t - delta <- get_delta(df) + t <- df[[ob_col]] + delta <- get_delta(df, delta_col) n_events <- sum(delta == 1) t_ratio <- t / sigma @@ -179,11 +178,11 @@ dfr_weibull <- function(shape = NULL, scale = NULL) { c(dk, dsigma) }, - hess_fn = function(df, par, ...) { + hess_fn = function(df, par, ob_col = "t", delta_col = "delta", ...) { k <- par[[1]] sigma <- par[[2]] - t <- df$t - delta <- get_delta(df) + t <- df[[ob_col]] + delta <- get_delta(df, delta_col) n_events <- sum(delta == 1) t_ratio <- t / sigma @@ -264,11 +263,11 @@ dfr_gompertz <- function(a = NULL, b = NULL) { cum_haz_rate = function(t, par, ...) { (par[[1]] / par[[2]]) * (exp(par[[2]] * t) - 1) }, - score_fn = function(df, par, ...) { + score_fn = function(df, par, ob_col = "t", delta_col = "delta", ...) { a <- par[[1]] b <- par[[2]] - t <- df$t - delta <- get_delta(df) + t <- df[[ob_col]] + delta <- get_delta(df, delta_col) exp_bt <- exp(b * t) n_events <- sum(delta == 1) @@ -302,6 +301,7 @@ dfr_gompertz <- function(a = NULL, b = NULL) { #' The log-logistic distribution has: #' \itemize{ #' \item Hazard: \eqn{h(t) = \frac{(\beta/\alpha)(t/\alpha)^{\beta-1}}{1 + (t/\alpha)^\beta}} +#' \item Cumulative hazard: \eqn{H(t) = \log(1 + (t/\alpha)^\beta)} #' \item Survival: \eqn{S(t) = \frac{1}{1 + (t/\alpha)^\beta}} #' \item Median: \eqn{\alpha} (when beta > 1) #' } @@ -314,12 +314,10 @@ dfr_gompertz <- function(a = NULL, b = NULL) { #' \item Hazard is not monotonic throughout lifetime #' } #' -#' Note: The cumulative hazard has no closed form and is computed numerically. -#' For efficiency with large datasets, consider providing `cum_haz_rate` -#' using numerical integration cached appropriately. +#' The cumulative hazard has a closed form and is provided analytically. #' -#' @return A `dfr_dist` object with analytical rate function. -#' Cumulative hazard uses numerical integration. +#' @return A `dfr_dist` object with analytical rate, cumulative hazard, +#' and score function. #' #' @examples #' # Component with peak hazard around t = alpha @@ -349,11 +347,11 @@ dfr_loglogistic <- function(alpha = NULL, beta = NULL) { cum_haz_rate = function(t, par, ...) { log(1 + (t / par[[1]])^par[[2]]) }, - score_fn = function(df, par, ...) { + score_fn = function(df, par, ob_col = "t", delta_col = "delta", ...) { alpha <- par[[1]] beta <- par[[2]] - t <- df$t - delta <- get_delta(df) + t <- df[[ob_col]] + delta <- get_delta(df, delta_col) t_ratio <- t / alpha t_ratio_beta <- t_ratio^beta diff --git a/README.Rmd b/README.Rmd index a8fc8c4..e9fc92c 100644 --- a/README.Rmd +++ b/README.Rmd @@ -176,8 +176,7 @@ ll(df, par = c(0.5)) **Start Here:** -- [Package Overview](https://queelius.github.io/flexhaz/articles/flexhaz-package.html) - Motivation, complete example, and audience guide -- [Quick Start Guide](https://queelius.github.io/flexhaz/articles/getting_started.html) - 5-minute introduction +- [Package Overview & Quick Start](https://queelius.github.io/flexhaz/articles/flexhaz-package.html) - Motivation, complete example, and quick start guide **Real-World Applications:** diff --git a/README.md b/README.md index 826a84d..9002afd 100644 --- a/README.md +++ b/README.md @@ -190,8 +190,7 @@ ll(df, par = c(0.5)) **Start Here:** -- [Package Overview](https://queelius.github.io/flexhaz/articles/flexhaz-package.html) - Motivation, complete example, and audience guide -- [Quick Start Guide](https://queelius.github.io/flexhaz/articles/getting_started.html) - 5-minute introduction +- [Package Overview & Quick Start](https://queelius.github.io/flexhaz/articles/flexhaz-package.html) - Motivation, complete example, and quick start guide **Real-World Applications:** diff --git a/_pkgdown.yml b/_pkgdown.yml index 7fe2d28..301fc68 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -9,15 +9,13 @@ navbar: components: intro: text: Get Started - href: articles/getting_started.html + href: articles/flexhaz-package.html articles: text: Articles menu: - text: "Motivation & Applications" - - text: "Package Overview" + - text: "Package Overview & Quick Start" href: articles/flexhaz-package.html - - text: Getting Started - href: articles/getting_started.html - text: Reliability Engineering href: articles/reliability_engineering.html - text: "---" @@ -36,7 +34,6 @@ articles: navbar: ~ contents: - flexhaz-package - - getting_started - reliability_engineering - title: Theory contents: diff --git a/docs/404.html b/docs/404.html index dca807b..08fb61d 100644 --- a/docs/404.html +++ b/docs/404.html @@ -29,14 +29,13 @@