Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions DIMS/preprocessing/fill_missing_functions.R
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
fill_missing_intensities <- function(peakgroup_list, repl_pattern, thresh, not_random = FALSE) {
fill_missing_intensities <- function(peakgroup_list, repl_pattern, thresh, disable_randomness = FALSE) {
#' Replace intensities that are zero with random value
#'
#' @param peakgroup_list: Peak groups (matrix)
#' @param repl_pattern: Replication pattern (list of strings)
#' @param thresh: Value for threshold between noise and signal (integer)
#' @param thresh: Variable which indicates whether randomness should be disabled (boolean)
#'
#' @return final_outlist: peak groups with filled-in intensities (matrix)

# for unit test, turn off randomness
if (not_random) {
if (disable_randomness) {
set.seed(123)
}

Expand Down
6 changes: 6 additions & 0 deletions DIMS/tests/testthat/fixtures/test_peakgroup_list.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
"mzmed.pgrp" "nrsamples" "ppmdev" "assi_HMDB" "all_hmdb_names" "iso_HMDB" "HMDB_code" "all_hmdb_ids" "sec_hmdb_ids" "theormz_HMDB" "C101.1" "C102.1" "P2.1" "P3.1" "avg.int" "assi_noise" "theormz_noise" "avg.ctrls" "sd.ctrls" "C101.1_Zscore" "C102.1_Zscore" "P2.1_Zscore" "P3.1_Zscore"
"1" 300.199680958642 0.451108327135444 0.111112214857712 NA "NA;NA" NA "HMDB1234567" "HMDB1234567;HMDB1234567" NA NA 1000 5000 10000 50000 NA NA NA NA NA 9000 13000 90000 130000
"2" 300.000315890415 0.498603057814762 0.473299680976197 NA "NA;NA" NA "HMDB1234567_1" "HMDB1234567_1;HMDB1234567_1" NA NA 2000 6000 20000 60000 NA NA NA NA NA 10000 14000 1e+05 140000
"3" 300.254185894039 0.589562055887654 0.17426158930175 NA "NA;NA" NA "HMDB1234567_2" "HMDB1234567_2;HMDB1234567_2" NA NA 3000 7000 30000 70000 NA NA NA NA NA 11000 15000 110000 150000
"4" 300.755745105678 0.277923040557653 0.186787674436346 NA "NA;NA" NA "HMDB1234567_7" "HMDB1234567_7;HMDB1234567_7" NA NA 4000 8000 40000 80000 NA NA NA NA NA 12000 16000 120000 160000

35 changes: 13 additions & 22 deletions DIMS/tests/testthat/test_fill_missing.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,24 +4,18 @@ source("../../preprocessing/fill_missing_functions.R")

# test fill_missing_intensities
testthat::test_that("missing values are corretly filled with random values", {
# create peakgroup_list to test on in diagnostics setting
test_peakgroup_list <- data.frame(matrix(NA, nrow = 4, ncol = 23))
colnames(test_peakgroup_list) <- c("mzmed.pgrp", "nrsamples", "ppmdev", "assi_HMDB", "all_hmdb_names",
"iso_HMDB", "HMDB_code", "all_hmdb_ids", "sec_hmdb_ids", "theormz_HMDB",
"C101.1", "C102.1", "P2.1", "P3.1",
"avg.int", "assi_noise", "theormz_noise", "avg.ctrls", "sd.ctrls",
"C101.1_Zscore", "C102.1_Zscore", "P2.1_Zscore", "P3.1_Zscore")
test_peakgroup_list[, c(1)] <- 300 + runif(4)
test_peakgroup_list[, c(2, 3)] <- runif(8)
test_peakgroup_list[, "HMDB_code"] <- c("HMDB1234567", "HMDB1234567_1", "HMDB1234567_2", "HMDB1234567_7")
test_peakgroup_list[, "all_hmdb_ids"] <- paste(test_peakgroup_list[, "HMDB_code"],
test_peakgroup_list[, "HMDB_code"], sep = ";")
test_peakgroup_list[, "all_hmdb_names"] <- paste(test_peakgroup_list[, "assi_HMDB"],
test_peakgroup_list[, "assi_HMDB"], sep = ";")
test_peakgroup_list[, grep("C", colnames(test_peakgroup_list))] <- 1000 * (1:16)
test_peakgroup_list[, grep("P", colnames(test_peakgroup_list))] <- 0
# It's necessary to copy/symlink the files to the current location for the fill_missing_intensities function
test_files <- list.files("fixtures/", "test_peakgroup_list", full.names = TRUE)
file.symlink(file.path(test_files), getwd())

# create replication pattern of technical replicates
test_repl_pattern <- c(list(1), list(2), list(3), list(4))
names(test_repl_pattern) <- c("C101.1", "C102.1", "P2.1", "P3.1")

# read in peakgroup_list, set intensities for patient columns to zero
test_peakgroup_list <- read.table("./test_peakgroup_list.txt", sep= "\t")
test_peakgroup_list[, grep("P", colnames(test_peakgroup_list))] <- 0

test_thresh <- 2000

# create a large peak group list to test for negative values
Expand All @@ -32,14 +26,11 @@ testthat::test_that("missing values are corretly filled with random values", {
# for the sake of time, leave only one intensity column with zeros
test_large_peakgroup_list$P2.1 <- 1

expect_equal(round(fill_missing_intensities(test_peakgroup_list, test_repl_pattern, test_thresh, not_random = TRUE)$P2.1),
expect_equal(round(fill_missing_intensities(test_peakgroup_list, test_repl_pattern, test_thresh, disable_randomness = TRUE)$P2.1),
c(1944, 1977, 2156, 2007), TRUE, tolerance = 0.1)
# fill_missing_intensities should not produce any negative values, even if a large quantity of numbers are filled in
start.time <- Sys.time()
expect_gt(min(fill_missing_intensities(test_large_peakgroup_list, test_repl_pattern, test_thresh, not_random = FALSE)$P3.1),
expect_gt(min(fill_missing_intensities(test_large_peakgroup_list, test_repl_pattern, test_thresh, disable_randomness = FALSE)$P3.1),
0, TRUE)
end.time <- Sys.time()
time.taken <- end.time - start.time
time.taken

})

Loading