diff --git a/R/D_matrix_contribution.R b/R/D_matrix_contribution.R index e3ba4b6..f545cef 100644 --- a/R/D_matrix_contribution.R +++ b/R/D_matrix_contribution.R @@ -22,5 +22,9 @@ D_matrix_contribution <- function(indices, model_fits, yy, xx, family, link) { D_i <- t(xx[indices, , drop = FALSE]) } + if (family == "gaussian" & link == "log") { + D_i <- t(model_fits[indices]*xx[indices, , drop = FALSE]) + } + return (D_i) } diff --git a/R/S_matrix_contribution.R b/R/S_matrix_contribution.R index 26ed302..c53b66a 100644 --- a/R/S_matrix_contribution.R +++ b/R/S_matrix_contribution.R @@ -9,8 +9,8 @@ #' S_matrix_contribution <- function(indices, model_fits, yy, xx, family, link) { - + S_i <- matrix(yy[indices] - model_fits[indices], ncol = 1) - + return (S_i) } diff --git a/R/V_matrix_contribution.R b/R/V_matrix_contribution.R index 781764c..e9fb043 100644 --- a/R/V_matrix_contribution.R +++ b/R/V_matrix_contribution.R @@ -30,7 +30,7 @@ V_matrix_contribution <- function(indices, model_fits, yy, xx, pp0 = 1, corr_mat } } - if (family == "gaussian" & link == "identity") { + if (family == "gaussian") { n <- length(yy) m <- ncol(xx) - pp0 # note, while sigma^2_tilde should in theory have a denominator, this would cancel out @@ -42,7 +42,6 @@ V_matrix_contribution <- function(indices, model_fits, yy, xx, pp0 = 1, corr_mat } else { V_i <- sigma2_tilde * corr_mat } - } return (V_i) diff --git a/R/fisher_info_contribution.R b/R/fisher_info_contribution.R index ffa2d64..4a88202 100644 --- a/R/fisher_info_contribution.R +++ b/R/fisher_info_contribution.R @@ -4,11 +4,12 @@ #' @param model_fits The fitted glm under the null hypothesis. #' @param yy Vector of observed responses. #' @param xx Design matrix for model. +#' @param m Number of parameters fixed under null hypothesis. #' @param family The model family for the fitted glm. #' @param link The link function utilized in the fitted glm. #' -fisher_info_contribution <- function(i, model_fits, yy, xx, family, link) { ### output is p x p... Di^T Vi Di +fisher_info_contribution <- function(i, model_fits, yy, xx, m = 1, family, link) { ### output is p x p... Di^T Vi Di fisher_info_mat <- matrix(NA, nrow = length(model_fits), ncol = 1) if (family == "poisson" & link == "log") { fisher_info_mat <- model_fits[i] * xx[i, ] %*% t(xx[i, ]) @@ -21,9 +22,18 @@ fisher_info_contribution <- function(i, model_fits, yy, xx, family, link) { ### if (family == "gaussian" & link == "identity") { n <- length(yy) p <- ncol(xx) - sigma2_tilde <- sum(yy - model_fits)/(n - p) + mod_df <- p-m + sigma2_tilde <- sum(yy - model_fits)/(n - mod_df) fisher_info_mat <- (1/sigma2_tilde)*xx[i, ] %*% t(xx[i, ]) } + if (family == "gaussian" & link == "log") { + n <- length(yy) + p <- ncol(xx) + mod_df <- p-m + sigma2_tilde <- sum(yy - model_fits)/(n - mod_df) + fisher_info_mat <- ((model_fits[i]^2)/sigma2_tilde)*xx[i, ] %*% t(xx[i, ]) + } + return(fisher_info_mat) } \ No newline at end of file diff --git a/R/gee_test.R b/R/gee_test.R index 7bd7576..3e167a4 100644 --- a/R/gee_test.R +++ b/R/gee_test.R @@ -92,7 +92,7 @@ gee_test <- function(..., use_geeasy = TRUE, use_jack_se = FALSE, cluster_corr_c gee_fails <- TRUE new_cl <- cl new_cl[1] <- call("glm") - new_cl <- call_modify(new_cl, corstr = zap(), id = zap()) + new_cl <- call_modify(new_cl, corstr = zap(), id = zap(), control = zap(), start = cl$control$init.beta) withCallingHandlers({ glm_result <- try({eval(new_cl, envir = rlang::caller_env())}) }, warning=function(w) { @@ -107,7 +107,7 @@ gee_test <- function(..., use_geeasy = TRUE, use_jack_se = FALSE, cluster_corr_c gee_family <- gee_result$family$family gee_link <- gee_result$family$link - if ((gee_family != "poisson" | gee_link != "log") & (gee_family != "binomial" | gee_link != "logit") & (gee_family != "gaussian" | gee_link != "identity")) { + if ((gee_family != "poisson" | gee_link != "log") & (gee_family != "binomial" | gee_link != "logit") & (gee_family != "gaussian" | gee_link != "identity") & (gee_family != "gaussian" | gee_link != "log")) { stop(paste("This is only implemented this for Poisson family with log link, Binomial family with logit link, and Gaussian family with identity link.\n", "You requested", gee_family, "family and", gee_link, "link \n", "Please open a GitHub issue if you're interested in other families.")) diff --git a/R/glm_test.R b/R/glm_test.R index 3343ebc..8d6fc18 100644 --- a/R/glm_test.R +++ b/R/glm_test.R @@ -16,10 +16,9 @@ glm_test <- function(...) { # https://stackoverflow.com/questions/20680399/how-to-wrap-glm-in-a-function-pass-dotdotdot-directly-to-another-function-fail cl <- match.call() cl[1] <- call("glm") - ### fit the glm, ignoring warnings about non integer inputs, as we take an estimating equations mindset withCallingHandlers({ - glm_result <- eval(cl, envir = rlang::caller_env()) ### is the issue here?? + glm_result <- eval(cl, envir = rlang::caller_env()) ### is the issue here?? }, warning=function(w) { if (startsWith(conditionMessage(w), "non-integer x")) invokeRestart("muffleWarning") @@ -32,8 +31,9 @@ glm_test <- function(...) { glm_family <- glm_result$family$family glm_link <- glm_result$family$link - if ((glm_family != "poisson" | glm_link != "log") & (glm_family != "binomial" | glm_link != "logit") & (glm_family != "gaussian" | glm_link != "identity")) { - stop(paste("This is only implemented this for Poisson family with log link, Binomial family with logit link, and Gaussian family with identity link.\n", + if ((glm_family != "poisson" | glm_link != "log") & (glm_family != "binomial" | glm_link != "logit") + & (glm_family != "gaussian" | glm_link != "identity") & (glm_family != "gaussian" | glm_link != "log")) { + stop(paste("This is only implemented this for Poisson family with log link, Binomial family with logit link, and Gaussian family with identity or log link.\n", "You requested", glm_family, "family and", glm_link, "link \n", "Please open a GitHub issue if you're interested in other families.")) } diff --git a/R/robust_score_test.R b/R/robust_score_test.R index cb48bf7..27ae203 100644 --- a/R/robust_score_test.R +++ b/R/robust_score_test.R @@ -32,19 +32,30 @@ robust_score_test <- function(glm_object, call_to_model, param = 1, yy <- model1$y nn <- nrow(xx) pp <- ncol(xx) - xx0 <- xx[ , -param] + xx0 <- xx[ , -param, drop = FALSE] pp0 <- length(param) # stop("no") withCallingHandlers({ - model0 <- glm.fit(x = xx0, - y = yy, - intercept = FALSE, - offset = with(model1$data, eval(call_to_model$offset)), - weights = with(model1$data, eval(call_to_model$weights)), - family = eval(call_to_model$family)) + if (model1family == "gaussian" & model1link == "log") { + model0 <- glm.fit(x = xx0, + y = yy, + intercept = FALSE, + offset = with(model1$data, eval(call_to_model$offset)), + weights = with(model1$data, eval(call_to_model$weights)), + family = eval(call_to_model$family), + start = rep(1, ncol(xx0))) + } else { + model0 <- glm.fit(x = xx0, + y = yy, + intercept = FALSE, + offset = with(model1$data, eval(call_to_model$offset)), + weights = with(model1$data, eval(call_to_model$weights)), + family = eval(call_to_model$family)) + } + }, warning = function(w) { if (startsWith(conditionMessage(w), "non-integer x")) @@ -52,7 +63,7 @@ robust_score_test <- function(glm_object, call_to_model, param = 1, }) model0_fits <- model0$fitted.values - if (is.factor(id)) { + if (length(id) == nn) { ## assume exchangeable @@ -149,16 +160,17 @@ robust_score_test <- function(glm_object, call_to_model, param = 1, solve(c_tilde %*% cov_U_hat %*% t(c_tilde), # (1 x p) x ((p x n) x (n x p)) x (p x 1) (c_tilde %*% u_tilde_sum))) - } else if (is.na(id)) { + } else if (length(id) != nn) { u_tilde <- sapply(1:nn, score_contribution, model_fits = model0_fits, family = model1family, link = model1link, xx = xx, yy = yy) ### p x n ## Reorder u tilde to match a u_tilde <- rbind(u_tilde[-param,], u_tilde[param,]) - + aa0 <- Reduce("+", sapply(1:nn, fisher_info_contribution, simplify=F, model_fits = model0_fits, - family = model1family, link = model1link, xx = xx, yy = yy)) ### p x p + family = model1family, link = model1link, xx = xx, yy = yy, m = pp0)) ### p x p + aa0_11 <- aa0[setdiff(seq_len(pp), param), setdiff(seq_len(pp), param), drop = FALSE] aa0_21 <- aa0[param, setdiff(seq_len(pp), param), drop = FALSE] # c_tilde <- cbind(-aa0_21 %*% solve(aa0_11), diag(rep(1, pp0))) # 1 x p diff --git a/R/score_contribution.R b/R/score_contribution.R index 372f9e2..4294d50 100644 --- a/R/score_contribution.R +++ b/R/score_contribution.R @@ -4,11 +4,12 @@ #' @param model_fits The fitted glm under the null hypothesis. #' @param yy Vector of observed responses. #' @param xx Design matrix for model. +#' @param m Number of parameters fixed under null hypothesis. #' @param family The model family for the fitted glm. #' @param link The link function utilized in the fitted glm. #' -score_contribution <- function(i, model_fits, yy, xx, family, link) { ### output is p x 1 +score_contribution <- function(i, model_fits, yy, xx, m = 1, family, link) { ### output is p x 1 score_mat <- matrix(NA, nrow = length(model_fits), ncol = 1) if (family == "poisson" & link == "log") { @@ -23,9 +24,18 @@ score_contribution <- function(i, model_fits, yy, xx, family, link) { ### output if (family == "gaussian" & link == "identity") { n <- length(yy) p <- ncol(xx) - sigma2_tilde <- sum(yy - model_fits)/(n - p) + mod_df <- p-m + sigma2_tilde <- sum(yy - model_fits)/(n - mod_df) score_vec <- matrix((1/sigma2_tilde)*(yy[i] - model_fits[i])*xx[i,], ncol = 1) } + + if (family == "gaussian" & link == "log") { + n <- length(yy) + p <- ncol(xx) + mod_df <- p-m + sigma2_tilde <- sum(yy - model_fits)/(n - mod_df) + score_vec <- matrix((1/sigma2_tilde)*model_fits[i]*(yy[i] - model_fits[i])*xx[i,], ncol = 1) + } return(score_vec) } \ No newline at end of file diff --git a/tests/testthat/test-basic.R b/tests/testthat/test-basic.R index 6900b15..49e4035 100644 --- a/tests/testthat/test-basic.R +++ b/tests/testthat/test-basic.R @@ -64,6 +64,9 @@ test_that("test gaussian example", { expect_type(glm_test(dist ~ speed, data = cars, family=gaussian(link = "identity"))$coef_tab, "double") + + expect_type(glm_test(dist ~ speed, data = cars, family=gaussian(link = "log"))$coef_tab, + "double") cars$id <- c(rep(1:5, 9), 6:10) cars$bin <- rep(0:1, each = 25) @@ -76,7 +79,7 @@ test_that("test gaussian example", { test_that("error with other model", { - expect_error(glm_test(dist ~ speed, data = cars, family=gaussian(link = "log"))) + expect_error(glm_test(dist ~ speed, data = cars, family=gaussian(link = "logit"))) })