Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 0 additions & 31 deletions DIMS/MakeInit.R

This file was deleted.

18 changes: 0 additions & 18 deletions DIMS/MakeInit.nf

This file was deleted.

19 changes: 19 additions & 0 deletions DIMS/ParseSamplesheet.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# define parameters
args <- commandArgs(trailingOnly = TRUE)

sample_sheet <- as.data.frame(read.csv(args[1], sep = "\t"))
preprocessing_scripts_dir <- args[2]

# load in function script
source(paste0(preprocessing_scripts_dir, "parse_samplesheet_functions.R"))

# generate the replication pattern
repl_pattern <- generate_repl_pattern(sample_sheet)

# write the replication pattern to text file for troubleshooting purposes
sink("replication_pattern.txt")
print(repl_pattern)
sink()

# save replication pattern to file
save(repl_pattern, file = "init.RData")
18 changes: 18 additions & 0 deletions DIMS/ParseSamplesheet.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
process ParseSamplesheet {
tag "DIMS ParseSamplesheet"
label 'ParseSamplesheet'
container = 'docker://umcugenbioinf/dims:1.3'
shell = ['/bin/bash', '-euo', 'pipefail']

input:
path(samplesheet)

output:
path('init.RData')
path('replication_pattern.txt')

script:
"""
Rscript ${baseDir}/CustomModules/DIMS/ParseSamplesheet.R $samplesheet $params.preprocessing_scripts_dir
"""
}
29 changes: 29 additions & 0 deletions DIMS/preprocessing/parse_samplesheet_functions.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# function for parse_samplesheet
generate_repl_pattern <- function(sample_sheet) {
#' Generate replication pattern list based on information in sample_sheet
#'
#' @param sample_names: vector of sample names (vector of strings)
#' @param sample_sheet: matrix of file names and sample names
#'
#' @return ints_sorted: list of sample names with corresponding file names (technical replicates)

# get the right columns from the samplesheet
file_name_col <- grep("File_Name|File Name", colnames(sample_sheet))
sample_name_col <- grep("Sample_Name|Sample Name", colnames(sample_sheet))
# get the unique sample names from the samplesheet
sample_names <- sort(unique(trimws(as.vector(unlist(sample_sheet[sample_name_col])))))
# remove all characters from sample_names which are not letters, numbers, hyphens and periods
sample_names <- gsub("[^-.[:alnum:]]", "_", sample_names)

# create replication pattern (which technical replicates belong to which sample)
repl_pattern <- c()
for (sample_group in sample_names) {
file_indices <- which(sample_sheet[, sample_name_col] == sample_group)
file_names <- sample_sheet[file_indices, file_name_col]
repl_pattern <- c(repl_pattern, list(file_names))
}
names(repl_pattern) <- sample_names

return(repl_pattern)
}

24 changes: 24 additions & 0 deletions DIMS/tests/testthat/parse_samplesheet_functions.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# unit tests for ParseSamplesheet
# function: generate_repl_pattern

# source all functions for PeakGrouping
source("../../preprocessing/parse_samplesheet_functions.R")

# test generate_repl_pattern
testthat::test_that("replication pattern is correctly generated", {
# create sample sheet tot test on:
test_file_names <- paste0(rep("RES_20260101_", 6), sprintf("%03d", 1:6))
test_sample_names <- sort(rep(c("C1", "P2", "P3"), 2))
test_sample_sheet <- as.data.frame(cbind(File_Name = test_file_names, Sample_Name = test_sample_names))

# test that a list of length 3 is generated
expect_length(generate_repl_pattern(test_sample_sheet), 3)
# test list names
expect_equal(names(generate_repl_pattern(test_sample_sheet)), unique(test_sample_names), TRUE)

# test what happens if any sample name is used twice
test_sample_names <- gsub("P3", "P2", test_sample_names)
test_sample_sheet <- as.data.frame(cbind(File_Name = test_file_names, Sample_Name = test_sample_names))
expect_length(generate_repl_pattern(test_sample_sheet), 2)
expect_length(generate_repl_pattern(test_sample_sheet)$P2, 4)
})
Loading