diff --git a/.gitignore b/.gitignore
index ff7e71b..84662a9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,3 @@
/doc/
/Meta/
+Rplots.pdf
diff --git a/CLAUDE.md b/CLAUDE.md
index 98639e4..ddc6353 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -250,7 +250,6 @@ Cox-Snell residuals should follow Exp(1) if model is correct.
- `tests/testthat/test-diagnostics.R`: Tests for diagnostic methods
- `tests/testthat/test-derivatives.R`: Tests for score/Hessian computation
- `tests/testthat/test-likelihood_model.R`: Likelihood interface tests
-- `vignettes/getting_started.Rmd`: 5-minute quick start
- `vignettes/failure_rate.Rmd`: Hazard-based modeling deep dive
- `vignettes/custom_distributions.Rmd`: How to create custom distributions
- `vignettes/reliability_engineering.Rmd`: Real-world applications
diff --git a/DESCRIPTION b/DESCRIPTION
index 313bbe3..b6e9f09 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -10,7 +10,8 @@ Description: Flexible framework for specifying survival distributions through
curves, proportional hazards with covariates, and other non-standard
hazard behaviors. Provides automatic computation of survival, CDF, PDF,
quantiles, and sampling. Implements the likelihood model interface for
- maximum likelihood estimation with right-censored survival data.
+ maximum likelihood estimation with right-censored and left-censored
+ survival data.
License: GPL (>= 3)
Encoding: UTF-8
Roxygen: list(markdown = TRUE)
diff --git a/R/dfr_dist.R b/R/dfr_dist.R
index 9277d97..0e29b6a 100644
--- a/R/dfr_dist.R
+++ b/R/dfr_dist.R
@@ -11,17 +11,28 @@
#' Defaults to "t".
#' @param delta_col The column name for event indicators in data frames.
#' Uses standard survival analysis convention: 1 = event
-#' observed (exact), 0 = right-censored. Defaults to "delta".
+#' observed (exact), 0 = right-censored, -1 = left-censored.
+#' Defaults to "delta".
#' @param cum_haz_rate Optional analytical cumulative hazard function H(t, par).
#' If provided, used for faster exact cumulative hazard
#' computation instead of numerical integration.
#' Should return the integral of rate from 0 to t.
#' @param score_fn Optional score function (gradient of log-likelihood).
-#' Signature: score_fn(df, par, ...) returning a numeric vector.
-#' If NULL, falls back to numerical gradient via numDeriv::grad.
+#' Signature: score_fn(df, par, ob_col, delta_col, ...)
+#' returning a numeric vector. The ob_col and delta_col arguments
+#' indicate which columns in df contain observation times and event
+#' indicators. If NULL, falls back to numerical gradient via
+#' numDeriv::grad. Analytical score functions that only handle
+#' delta in \{0, 1\} are automatically bypassed when left-censored
+#' data (delta = -1) is present.
#' @param hess_fn Optional Hessian function (second derivatives of log-likelihood).
-#' Signature: hess_fn(df, par, ...) returning a matrix.
+#' Signature: hess_fn(df, par, ob_col, delta_col, ...) returning
+#' a matrix. The ob_col and delta_col arguments indicate which
+#' columns in df contain observation times and event indicators.
#' If NULL, falls back to numerical Hessian via numDeriv::hessian.
+#' Analytical Hessian functions that only handle delta in \{0, 1\}
+#' are automatically bypassed when left-censored data (delta = -1)
+#' is present.
#' @return A `dfr_dist` object that inherits from `likelihood_model`.
#' @export
dfr_dist <- function(rate, par = NULL,
@@ -85,11 +96,13 @@ inv_cdf.dfr_dist <- function(x, ...) {
cdf_fn <- cdf(x, ...)
function(p, par = NULL, ...) {
par <- get_params(par, x$par)
- uniroot(
- f = function(t) cdf_fn(t, par, ...) - p,
- interval = c(0, 1e3),
- extendInt = "upX"
- )$root
+ sapply(p, function(pi) {
+ uniroot(
+ f = function(t) cdf_fn(t, par, ...) - pi,
+ interval = c(0, 1e3),
+ extendInt = "upX"
+ )$root
+ })
}
}
@@ -225,17 +238,19 @@ cum_haz.dfr_dist <- function(x, ...) {
function(t, par = NULL, ...) {
par <- get_params(par, x$par)
- res <- do.call(integrate,
- modifyList(integrator, list(
- upper = t,
- f = function(u) x$rate(u, par, ...))))
- if (res$message != "OK") {
- warning(res$message)
- }
- if (res$abs.error > integrator$abs.tol) {
- warning("Absolute error in cumulative hazard is greater than tolerance")
- }
- res$value
+ sapply(t, function(ti) {
+ res <- do.call(integrate,
+ modifyList(integrator, list(
+ upper = ti,
+ f = function(u) x$rate(u, par, ...))))
+ if (res$message != "OK") {
+ warning(res$message)
+ }
+ if (res$abs.error > integrator$abs.tol) {
+ warning("Absolute error in cumulative hazard is greater than tolerance")
+ }
+ res$value
+ })
}
}
@@ -333,6 +348,7 @@ loglik.dfr_dist <- function(model, ...) {
ll <- ll + sum(log1p(-exp(-H_left)))
}
+ if (is.nan(ll) || is.na(ll)) return(-Inf)
ll
}
}
@@ -354,7 +370,11 @@ score.dfr_dist <- function(model, ...) {
function(df, par = NULL, ...) {
par <- get_params(par, model$par)
if (!is.null(model$score_fn)) {
- return(model$score_fn(df, par, ...))
+ delta <- get_delta(df, model$delta_col)
+ if (!any(delta == -1)) {
+ return(model$score_fn(df, par,
+ ob_col = model$ob_col, delta_col = model$delta_col, ...))
+ }
}
numDeriv::grad(function(p) ll_fn(df, par = p, ...), par)
}
@@ -377,7 +397,11 @@ hess_loglik.dfr_dist <- function(model, ...) {
function(df, par = NULL, ...) {
par <- get_params(par, model$par)
if (!is.null(model$hess_fn)) {
- return(model$hess_fn(df, par, ...))
+ delta <- get_delta(df, model$delta_col)
+ if (!any(delta == -1)) {
+ return(model$hess_fn(df, par,
+ ob_col = model$ob_col, delta_col = model$delta_col, ...))
+ }
}
numDeriv::hessian(function(p) ll_fn(df, par = p, ...), par)
}
diff --git a/R/distributions.R b/R/distributions.R
index 48248f7..cae9a44 100644
--- a/R/distributions.R
+++ b/R/distributions.R
@@ -24,7 +24,6 @@ NULL
#'
#' Creates a DFR distribution with constant failure rate (exponential).
#' The exponential distribution is "memoryless" - the hazard does not depend
-
#' on time, making it appropriate for random failures unrelated to age.
#'
#' @param lambda Rate parameter (failure rate). If NULL, must be provided
@@ -74,12 +73,12 @@ dfr_exponential <- function(lambda = NULL) {
cum_haz_rate = function(t, par, ...) {
par[[1]] * t
},
- score_fn = function(df, par, ...) {
- delta <- get_delta(df)
- c(sum(delta == 1) / par[[1]] - sum(df$t))
+ score_fn = function(df, par, ob_col = "t", delta_col = "delta", ...) {
+ delta <- get_delta(df, delta_col)
+ c(sum(delta == 1) / par[[1]] - sum(df[[ob_col]]))
},
- hess_fn = function(df, par, ...) {
- delta <- get_delta(df)
+ hess_fn = function(df, par, ob_col = "t", delta_col = "delta", ...) {
+ delta <- get_delta(df, delta_col)
matrix(-sum(delta == 1) / par[[1]]^2, nrow = 1, ncol = 1)
},
par = lambda
@@ -162,11 +161,11 @@ dfr_weibull <- function(shape = NULL, scale = NULL) {
cum_haz_rate = function(t, par, ...) {
(t / par[[2]])^par[[1]]
},
- score_fn = function(df, par, ...) {
+ score_fn = function(df, par, ob_col = "t", delta_col = "delta", ...) {
k <- par[[1]]
sigma <- par[[2]]
- t <- df$t
- delta <- get_delta(df)
+ t <- df[[ob_col]]
+ delta <- get_delta(df, delta_col)
n_events <- sum(delta == 1)
t_ratio <- t / sigma
@@ -179,11 +178,11 @@ dfr_weibull <- function(shape = NULL, scale = NULL) {
c(dk, dsigma)
},
- hess_fn = function(df, par, ...) {
+ hess_fn = function(df, par, ob_col = "t", delta_col = "delta", ...) {
k <- par[[1]]
sigma <- par[[2]]
- t <- df$t
- delta <- get_delta(df)
+ t <- df[[ob_col]]
+ delta <- get_delta(df, delta_col)
n_events <- sum(delta == 1)
t_ratio <- t / sigma
@@ -264,11 +263,11 @@ dfr_gompertz <- function(a = NULL, b = NULL) {
cum_haz_rate = function(t, par, ...) {
(par[[1]] / par[[2]]) * (exp(par[[2]] * t) - 1)
},
- score_fn = function(df, par, ...) {
+ score_fn = function(df, par, ob_col = "t", delta_col = "delta", ...) {
a <- par[[1]]
b <- par[[2]]
- t <- df$t
- delta <- get_delta(df)
+ t <- df[[ob_col]]
+ delta <- get_delta(df, delta_col)
exp_bt <- exp(b * t)
n_events <- sum(delta == 1)
@@ -302,6 +301,7 @@ dfr_gompertz <- function(a = NULL, b = NULL) {
#' The log-logistic distribution has:
#' \itemize{
#' \item Hazard: \eqn{h(t) = \frac{(\beta/\alpha)(t/\alpha)^{\beta-1}}{1 + (t/\alpha)^\beta}}
+#' \item Cumulative hazard: \eqn{H(t) = \log(1 + (t/\alpha)^\beta)}
#' \item Survival: \eqn{S(t) = \frac{1}{1 + (t/\alpha)^\beta}}
#' \item Median: \eqn{\alpha} (when beta > 1)
#' }
@@ -314,12 +314,10 @@ dfr_gompertz <- function(a = NULL, b = NULL) {
#' \item Hazard is not monotonic throughout lifetime
#' }
#'
-#' Note: The cumulative hazard has no closed form and is computed numerically.
-#' For efficiency with large datasets, consider providing `cum_haz_rate`
-#' using numerical integration cached appropriately.
+#' The cumulative hazard has a closed form and is provided analytically.
#'
-#' @return A `dfr_dist` object with analytical rate function.
-#' Cumulative hazard uses numerical integration.
+#' @return A `dfr_dist` object with analytical rate, cumulative hazard,
+#' and score function.
#'
#' @examples
#' # Component with peak hazard around t = alpha
@@ -349,11 +347,11 @@ dfr_loglogistic <- function(alpha = NULL, beta = NULL) {
cum_haz_rate = function(t, par, ...) {
log(1 + (t / par[[1]])^par[[2]])
},
- score_fn = function(df, par, ...) {
+ score_fn = function(df, par, ob_col = "t", delta_col = "delta", ...) {
alpha <- par[[1]]
beta <- par[[2]]
- t <- df$t
- delta <- get_delta(df)
+ t <- df[[ob_col]]
+ delta <- get_delta(df, delta_col)
t_ratio <- t / alpha
t_ratio_beta <- t_ratio^beta
diff --git a/README.Rmd b/README.Rmd
index a8fc8c4..e9fc92c 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -176,8 +176,7 @@ ll(df, par = c(0.5))
**Start Here:**
-- [Package Overview](https://queelius.github.io/flexhaz/articles/flexhaz-package.html) - Motivation, complete example, and audience guide
-- [Quick Start Guide](https://queelius.github.io/flexhaz/articles/getting_started.html) - 5-minute introduction
+- [Package Overview & Quick Start](https://queelius.github.io/flexhaz/articles/flexhaz-package.html) - Motivation, complete example, and quick start guide
**Real-World Applications:**
diff --git a/README.md b/README.md
index 826a84d..9002afd 100644
--- a/README.md
+++ b/README.md
@@ -190,8 +190,7 @@ ll(df, par = c(0.5))
**Start Here:**
-- [Package Overview](https://queelius.github.io/flexhaz/articles/flexhaz-package.html) - Motivation, complete example, and audience guide
-- [Quick Start Guide](https://queelius.github.io/flexhaz/articles/getting_started.html) - 5-minute introduction
+- [Package Overview & Quick Start](https://queelius.github.io/flexhaz/articles/flexhaz-package.html) - Motivation, complete example, and quick start guide
**Real-World Applications:**
diff --git a/_pkgdown.yml b/_pkgdown.yml
index 7fe2d28..301fc68 100644
--- a/_pkgdown.yml
+++ b/_pkgdown.yml
@@ -9,15 +9,13 @@ navbar:
components:
intro:
text: Get Started
- href: articles/getting_started.html
+ href: articles/flexhaz-package.html
articles:
text: Articles
menu:
- text: "Motivation & Applications"
- - text: "Package Overview"
+ - text: "Package Overview & Quick Start"
href: articles/flexhaz-package.html
- - text: Getting Started
- href: articles/getting_started.html
- text: Reliability Engineering
href: articles/reliability_engineering.html
- text: "---"
@@ -36,7 +34,6 @@ articles:
navbar: ~
contents:
- flexhaz-package
- - getting_started
- reliability_engineering
- title: Theory
contents:
diff --git a/docs/404.html b/docs/404.html
index dca807b..08fb61d 100644
--- a/docs/404.html
+++ b/docs/404.html
@@ -29,14 +29,13 @@