From 5d7f12dfe9e575b02df30b0365a926d821b1e076 Mon Sep 17 00:00:00 2001 From: openhands Date: Wed, 13 Aug 2025 02:38:07 +0000 Subject: [PATCH] Fix Issue #6: Add support for single column data - Remove requirement for at least 2 columns in varimpact() - Add vector input support by converting to data frame - Add warning when using single variable for analysis - Fix character column conversion in separate_factors_numerics() - Add comprehensive tests for single column scenarios - Maintain backward compatibility with existing functionality Addresses issue #6: 'Single column in data' where users reported 'Error: invalid type (list) of argument' when using single variables. --- R/separate_factors_numerics.R | 7 +- R/varimpact.R | 13 +++- tests/testthat/test-single-column.R | 106 ++++++++++++++++++++++++++++ 3 files changed, 122 insertions(+), 4 deletions(-) create mode 100644 tests/testthat/test-single-column.R diff --git a/R/separate_factors_numerics.R b/R/separate_factors_numerics.R index 2c6b20b..53e00c1 100644 --- a/R/separate_factors_numerics.R +++ b/R/separate_factors_numerics.R @@ -19,7 +19,12 @@ separate_factors_numerics = is_char = sapply(data, is.character) # Convert strings to factors. - data[, is_char] = sapply(data[, is_char], as.factor) + if (any(is_char)) { + # Convert each character column to factor individually + for (col_name in names(data)[is_char]) { + data[[col_name]] = as.factor(data[[col_name]]) + } + } } # Identify factors. diff --git a/R/varimpact.R b/R/varimpact.R index 1874d62..792d1dd 100644 --- a/R/varimpact.R +++ b/R/varimpact.R @@ -218,9 +218,16 @@ varimpact = ###################### # Argument checks. - # Confirm that data has at least two columns. - if (ncol(data) < 2L) { - stop("Data argument must have at least two columns.") + # Handle vector input by converting to data frame + if (is.vector(data) && !is.list(data)) { + if (verbose) cat("Converting vector input to data frame.\n") + data <- data.frame(X1 = data) + } + + # Handle single column case with warning + if (ncol(data) == 1L) { + warning("Using single variable for variable importance analysis. Results may be limited.") + if (verbose) cat("Single variable detected in data.\n") } # Ensure that Y is numeric; e.g. can't be a factor. diff --git a/tests/testthat/test-single-column.R b/tests/testthat/test-single-column.R new file mode 100644 index 0000000..bfb7d0f --- /dev/null +++ b/tests/testthat/test-single-column.R @@ -0,0 +1,106 @@ +# Test single column support in varimpact +# This addresses issue #6: "Single column in data" + +library(testthat) +library(varimpact) + +context("Single column support") + +test_that("separate_factors_numerics handles single columns", { + # Test single numeric column + data_numeric <- data.frame(x1 = c(1.1, 2.2, 3.3, 4.4, 5.5)) + result <- separate_factors_numerics(data_numeric) + expect_equal(ncol(result$df_factors), 0) + expect_equal(ncol(result$df_numerics), 1) + expect_equal(colnames(result$df_numerics), "x1") + + # Test single factor column + data_factor <- data.frame(x1 = factor(c("A", "B", "C", "A", "B"))) + result <- separate_factors_numerics(data_factor) + expect_equal(ncol(result$df_factors), 1) + expect_equal(ncol(result$df_numerics), 0) + expect_equal(colnames(result$df_factors), "x1") + + # Test single character column (should be converted to factor) + data_char <- data.frame(x1 = c("A", "B", "C", "A", "B"), stringsAsFactors = FALSE) + result <- separate_factors_numerics(data_char) + expect_equal(ncol(result$df_factors), 1) + expect_equal(ncol(result$df_numerics), 0) + expect_equal(colnames(result$df_factors), "x1") + expect_true(is.factor(result$df_factors$x1)) +}) + +test_that("varimpact handles vector input", { + # Create test data + set.seed(1) + N <- 50 + X_vector <- rnorm(N) + Y <- rbinom(N, 1, plogis(0.2 * X_vector)) + + # Test that vector input is converted to data frame and processed + expect_warning( + vim <- varimpact(Y = Y, data = X_vector, verbose = FALSE, V = 2L), + "Using single variable for variable importance analysis" + ) + + # Check that the result is valid + expect_s3_class(vim, "varimpact") + expect_true(is.list(vim$results_all)) +}) + +test_that("varimpact handles single column data frame", { + # Create test data + set.seed(1) + N <- 50 + X_single <- data.frame(x1 = rnorm(N)) + Y <- rbinom(N, 1, plogis(0.2 * X_single$x1)) + + # Test that single column data frame is processed with warning + expect_warning( + vim <- varimpact(Y = Y, data = X_single, verbose = FALSE, V = 2L), + "Using single variable for variable importance analysis" + ) + + # Check that the result is valid + expect_s3_class(vim, "varimpact") + expect_true(is.list(vim$results_all)) +}) + +test_that("varimpact handles single factor column", { + # Create test data + set.seed(1) + N <- 50 + X_factor <- data.frame(x1 = factor(sample(c("A", "B", "C"), N, replace = TRUE))) + Y <- rbinom(N, 1, plogis(0.2 * as.numeric(X_factor$x1))) + + # Test that single factor column is processed with warning + expect_warning( + vim <- varimpact(Y = Y, data = X_factor, verbose = FALSE, V = 2L), + "Using single variable for variable importance analysis" + ) + + # Check that the result is valid + expect_s3_class(vim, "varimpact") + expect_true(is.list(vim$results_all)) +}) + +test_that("varimpact still works with multiple columns (regression test)", { + # Create test data + set.seed(1) + N <- 50 + X_multi <- data.frame( + x1 = rnorm(N), + x2 = rnorm(N), + x3 = factor(sample(c("A", "B"), N, replace = TRUE)) + ) + Y <- rbinom(N, 1, plogis(0.2 * X_multi$x1 + 0.1 * X_multi$x2)) + + # Test that multiple columns work without warning + expect_silent( + vim <- varimpact(Y = Y, data = X_multi, verbose = FALSE, V = 2L) + ) + + # Check that the result is valid + expect_s3_class(vim, "varimpact") + expect_true(is.list(vim$results_all)) +}) \ No newline at end of file