From 3ff5a2ead9a5a45a1c7d16d9a5825349fb6083c3 Mon Sep 17 00:00:00 2001
From: DANIELA RODRIGUES <d.rodrigues@imperial.ac.uk>
Date: Fri, 24 Mar 2023 20:19:17 +0000
Subject: [PATCH 1/3] Included the option to personalise adjustment set for
 each candidate variable

---
 R/varimpact.R                      | 14 ++++++----
 R/vim-numerics.R                   | 28 +++++++++++++++----
 tests/testthat/test-vim-numerics.R | 44 ++++++++++++++++++++++++++++++
 3 files changed, 75 insertions(+), 11 deletions(-)
 create mode 100644 tests/testthat/test-vim-numerics.R

diff --git a/R/varimpact.R b/R/varimpact.R
index 4c71501..47a7367 100644
--- a/R/varimpact.R
+++ b/R/varimpact.R
@@ -71,6 +71,8 @@
 #' @param verbose_reduction Boolean - if TRUE, will display more detail during
 #'   variable reduction step (clustering).
 #' @param digits Number of digits to round the value labels.
+#' @param adjustment_exclusions List of variables to be removed from the adjustment set
+#' of each variable for which we want to estimate importance.
 #'
 #' @return Results object. TODO: add more detail here.
 #'
@@ -209,7 +211,8 @@ varimpact =
            verbose_tmle = FALSE,
            verbose_reduction = FALSE,
            parallel = TRUE,
-           digits = 4L) {
+           digits = 4L,
+           adjustment_exclusions = list()) {
 
   # Time the full function execution.
   time_start = proc.time()
@@ -255,9 +258,9 @@ varimpact =
 
   ########
   # Applied to Explanatory (X) data frame
-  sna = sapply(data, sum_na)
+  sna = sapply(X, sum_na)
 
-  n = nrow(data)
+  n = nrow(X)
 
   #######
   # Missing proportion by variable.
@@ -330,7 +333,8 @@ varimpact =
                  adjust_cutoff = adjust_cutoff,
                  verbose = verbose,
                  verbose_tmle = verbose_tmle,
-                 verbose_reduction = verbose_reduction)
+                 verbose_reduction = verbose_reduction,
+                 adjustment_exclusions= adjustment_exclusions)
 
   # Combine the separate continuous and factor results.
   results =
@@ -365,6 +369,6 @@ varimpact =
 
   # Set a custom class so that we can override print and summary.
   class(results) = "varimpact"
-
+  
   invisible(results)
 }
diff --git a/R/vim-numerics.R b/R/vim-numerics.R
index 374945b..bf7b341 100644
--- a/R/vim-numerics.R
+++ b/R/vim-numerics.R
@@ -13,6 +13,7 @@ vim_numerics =
            Qbounds,
            corthres,
            adjust_cutoff,
+           adjustment_exclusions,
            verbose = FALSE,
            verbose_tmle = FALSE,
            verbose_reduction = FALSE) {
@@ -230,12 +231,26 @@ vim_numerics =
                          numerics$miss.cont,
                          factors$datafac.dumW,
                          factors$miss.fac)
-
+          
+          #print(nameA)
+          #print(var_i)
+          #print(colnames(W))
+          
           # Remove any columns in which all values are NA.
           # CK: but we're using imputed data, so there should be no NAs actually.
           # (With the exception of the NA vectors possibly added above.
           W = W[, !apply(is.na(W), 2, all), drop = FALSE]
 
+          #print(nameA)
+          
+          # Remove variables from the adjustment set of each variable for which we want to measure importance
+          if (nameA %in% names(adjustment_exclusions) && length(adjustment_exclusions[[nameA]]) > 0) {
+            W = W[, setdiff(colnames(W), adjustment_exclusions[[nameA]]), drop = FALSE]
+          }
+          
+          #print(colnames(W))
+          #print(class(W))
+          
           # Separate adjustment matrix into the training and test folds.
           Wt = W[folds != fold_k, , drop = FALSE]
           Wv = W[folds == fold_k, , drop = FALSE]
@@ -344,6 +359,7 @@ vim_numerics =
               # Create a list to hold the results for this level.
               bin_result = list(
                 name = nameA,
+                W_names = colnames(W),
                 cv_fold = fold_k,
                 level = bin_j,
                 level_label = At_bin_labels[bin_j],
@@ -386,7 +402,7 @@ vim_numerics =
 
               # Save how many obs have this level/bin in this training fold.
               bin_result$train_cell_size = sum(IA)
-
+              
               ###############################################
               # TODO: send to calculate_estimates first, which would calculate
               # TMLE, IPTW, G-Comp, and Unadj estimates.
@@ -466,18 +482,18 @@ vim_numerics =
               }
 
               bin_result$test_msg = "success"
-
+              
               # Save to the main list.
               bin_results[[bin_j]] = bin_result
             }
-
+            
             # Finished looping over each level of the assignment variable
             # (primarily training fold, but now also some val fold work).
             if (verbose) cat(" done.\n")
 
             # Save individual bin results.
             fold_result$bin_results = bin_results
-
+            
             # Create a dataframe version of the bin results.
             fold_result$bin_df =
               do.call(rbind, lapply(bin_results, function(result) {
@@ -578,7 +594,7 @@ vim_numerics =
                                 "label = ", training_estimates[[minj]]$label, ")")
               }
               fold_result$message = message
-
+              
               if (verbose) {
                 cat(message, "\n")
               }
diff --git a/tests/testthat/test-vim-numerics.R b/tests/testthat/test-vim-numerics.R
new file mode 100644
index 0000000..b83f7b2
--- /dev/null
+++ b/tests/testthat/test-vim-numerics.R
@@ -0,0 +1,44 @@
+## Test 
+
+test_that("adjustment set functionality works appropriately", {
+  
+          library(varimpact)
+          library(SuperLearner)
+          library(devtools)
+          library(testthat)
+          
+          # Data setup taken from Chris Kennedy Github page (https://github.com/ck37/varimpact)
+          set.seed(1, "L'Ecuyer-CMRG")
+          N <- 300
+          num_normal <- 5
+          X <- as.data.frame(matrix(rnorm(N * num_normal), N, num_normal))
+          Y <- rbinom(N, 1, plogis(.2*X[, 1] + .1*X[, 2] - .2*X[, 3] + .1*X[, 3]*X[, 4] - .2*abs(X[, 4])))
+          # Add some missing data to X so we can test imputation.
+          for (i in 1:10) X[sample(nrow(X), 1), sample(ncol(X), 1)] <- NA
+          
+          Q_lib = c("SL.mean", "SL.glmnet", "SL.ranger", "SL.rpartPrune")
+          g_lib = c("SL.mean", "SL.glmnet")
+          
+          #vim = varimpact(Y = Y, data = X, Q.library = Q_lib, g.library = g_lib)
+          
+          vim = varimpact(Y = Y, data = X, Q.library = Q_lib, g.library = g_lib, adjustment_exclusions = list("V1" = c("V2","V3"), "V3" = c("V1"), "V5" = c("V1","V2","V3")))
+          
+          # V1
+          expect_equal(vim$all_vims$V1$fold_results[[1]]$bin_results[[1]]$W_names, c("V4","V5","Imiss_V1","Imiss_V2","Imiss_V3","Imiss_V5"))
+          
+          # V2
+          expect_equal(vim$all_vims$V2$fold_results[[1]]$bin_results[[1]]$W_names, c("V1","V3","V4","V5","Imiss_V1","Imiss_V2","Imiss_V3","Imiss_V5"))
+          
+          # V3
+          expect_equal(vim$all_vims$V3$fold_results[[1]]$bin_results[[1]]$W_names, c("V2","V4","V5","Imiss_V1","Imiss_V2","Imiss_V3","Imiss_V5"))
+          
+          # V4
+          expect_equal(vim$all_vims$V4$fold_results[[1]]$bin_results[[1]]$W_names, c("V1","V2","V3","V5","Imiss_V1","Imiss_V2","Imiss_V3","Imiss_V5"))
+          
+          # V5
+          expect_equal(vim$all_vims$V5$fold_results[[1]]$bin_results[[1]]$W_names, c("V4","Imiss_V1","Imiss_V2","Imiss_V3","Imiss_V5"))
+          
+          })
+
+
+

From 7d1f85b3afa376e0b8e352626be9678449555625 Mon Sep 17 00:00:00 2001
From: DANIELA RODRIGUES <d.rodrigues@imperial.ac.uk>
Date: Fri, 24 Mar 2023 21:17:48 +0000
Subject: [PATCH 2/3] test

---
 R/varimpact.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/varimpact.R b/R/varimpact.R
index 47a7367..8a43628 100644
--- a/R/varimpact.R
+++ b/R/varimpact.R
@@ -257,7 +257,7 @@ varimpact =
   }
 
   ########
-  # Applied to Explanatory (X) data frame
+  # Applied to Explanatory (X) data frame.
   sna = sapply(X, sum_na)
 
   n = nrow(X)

From 7b6f8f25d43ce37b818bb856f042c0af96940843 Mon Sep 17 00:00:00 2001
From: DANIELA RODRIGUES <d.rodrigues@imperial.ac.uk>
Date: Fri, 24 Mar 2023 21:23:27 +0000
Subject: [PATCH 3/3] changed "X" back to "data" in varimpact (initially
 changed it by mistake)

---
 R/varimpact.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/varimpact.R b/R/varimpact.R
index 8a43628..685f20b 100644
--- a/R/varimpact.R
+++ b/R/varimpact.R
@@ -258,9 +258,9 @@ varimpact =
 
   ########
   # Applied to Explanatory (X) data frame.
-  sna = sapply(X, sum_na)
+  sna = sapply(data, sum_na)
 
-  n = nrow(X)
+  n = nrow(data)
 
   #######
   # Missing proportion by variable.