Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 8 additions & 46 deletions DIMS/GenerateBreaks.R
Original file line number Diff line number Diff line change
@@ -1,61 +1,23 @@
## adapted from 1-generateBreaksFwhm.HPC.R ##

# load required package
suppressPackageStartupMessages(library("xcms"))

# define parameters
cmd_args <- commandArgs(trailingOnly = TRUE)

filepath <- cmd_args[1]
outdir <- cmd_args[2]
trim <- as.numeric(cmd_args[3])
resol <- as.numeric(cmd_args[4])

# initialize
trim_left_pos <- NULL
trim_right_pos <- NULL
trim_left_neg <- NULL
trim_right_neg <- NULL
breaks_fwhm <- NULL
breaks_fwhm_avg <- NULL
bins <- NULL
trim <- as.numeric(cmd_args[2])
resol <- as.numeric(cmd_args[3])

# read in mzML file
raw_data <- suppressMessages(xcms::xcmsRaw(filepath))

# Get time values for positive and negative scans
pos_times <- raw_data@scantime[raw_data@polarity == "positive"]
neg_times <- raw_data@scantime[raw_data@polarity == "negative"]
# get trim parameters and save them to file
get_trim_parameters(raw_data@scantime, raw_data@polarity, trim)

# trim (remove) scans at the start and end for positive
trim_left_pos <- round(pos_times[length(pos_times) * (trim * 1.5)]) # 15% aan het begin
trim_right_pos <- round(pos_times[length(pos_times) * (1 - (trim * 0.5))]) # 5% aan het eind
# create breaks of bins for intensities. Bin size is a function of fwhm which is a function of m/z
get_breaks_for_bins(raw_data$mzrange, resol)

# trim (remove) scans at the start and end for negative
trim_left_neg <- round(neg_times[length(neg_times) * trim])
trim_right_neg <- round(neg_times[length(neg_times) * (1 - trim)])

# Mass range m/z
low_mz <- raw_data@mzrange[1]
# Determine maximum m/z and save to file
high_mz <- raw_data@mzrange[2]

# determine number of segments (bins)
nr_segments <- 2 * (high_mz - low_mz)
segment <- seq(from = low_mz, to = high_mz, length.out = nr_segments + 1)

# determine start and end of each bin.
for (i in 1:nr_segments) {
start_segment <- segment[i]
end_segment <- segment[i+1]
resol_mz <- resol * (1 / sqrt(2) ^ (log2(start_segment / 200)))
fwhm_segment <- start_segment / resol_mz
breaks_fwhm <- c(breaks_fwhm, seq(from = (start_segment + fwhm_segment), to = end_segment, by = 0.2 * fwhm_segment))
# average the m/z instead of start value
range <- seq(from = (start_segment + fwhm_segment), to = end_segment, by = 0.2 * fwhm_segment)
delta_mz <- range[2] - range[1]
breaks_fwhm_avg <- c(breaks_fwhm_avg, range + 0.5 * delta_mz)
}

# generate output file
save(breaks_fwhm, breaks_fwhm_avg, trim_left_pos, trim_right_pos, trim_left_neg, trim_right_neg, file = "breaks.fwhm.RData")
save(high_mz, file = "highest_mz.RData")

4 changes: 2 additions & 2 deletions DIMS/GenerateBreaks.nf
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,13 @@ process GenerateBreaks {
input:
tuple(val(file_id), path(mzML_file))


output:
path('breaks.fwhm.RData'), emit: breaks
path('trim_params.RData'), emit: trim_params
path('highest_mz.RData'), emit: highest_mz

script:
"""
Rscript ${baseDir}/CustomModules/DIMS/GenerateBreaks.R $mzML_file ./ $params.trim $params.resolution
Rscript ${baseDir}/CustomModules/DIMS/GenerateBreaks.R $mzML_file $params.trim $params.resolution
"""
}
58 changes: 58 additions & 0 deletions DIMS/preprocessing/generate_breaks_functions.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
# GenerateBreaks functions
get_trim_parameters <- function(scantimes, polarities, trim = 0.1) {
#' determine the scans per scanmode which are trimmed off; save trim parameters to file
#'
#' @param scantimes: vector of scan times in seconds
#' @param polarities: vector of polarities (positive or negative)

# Get time values for positive and negative scans
pos_times <- scantimes[polarities == "positive"]
neg_times <- scantimes[polarities == "negative"]

# trim (remove) scans at the start (15%) and end (5%) for positive
trim_left_pos <- round(pos_times[length(pos_times) * (trim * 1.5)])
trim_right_pos <- round(pos_times[length(pos_times) * (1 - (trim * 0.5))])

# trim (remove) scans at the start and end (10%) for negative
trim_left_neg <- round(neg_times[length(neg_times) * trim])
trim_right_neg <- round(neg_times[length(neg_times) * (1 - trim)])

# save trim parameters to file
save(trim_left_pos, trim_right_pos, trim_left_neg, trim_right_neg, file = "trim_params.RData")
}

get_breaks_for_bins <- function(mzrange, resol = 140000) {
#' create a vector with the breaks in m/z of bins for intensities
#'
#' @param mzrange: vector of minimum and maximum m/z values (integeers)
#' @param resol: value for resolution (integer)

# initialize
breaks_fwhm <- NULL
breaks_fwhm_avg <- NULL

# determine number of segments used to create bins
nr_segments <- 2 * (mzrange[2] - mzrange[1])
segments <- seq(from = mzrange[1], to = mzrange[2], length.out = nr_segments + 1)

# determine start and end of each bin. fwhm (width of peaks) is assumed to be constant within a segment
for (segment_index in 1:nr_segments) {
start_segment <- segments[segment_index]
end_segment <- segments[segment_index + 1]
# determine resolution at given m/z value
resol_mz <- resol * (1 / sqrt(2) ^ (log2(start_segment / 200)))
# determine fwhm (full width at half maximum) of the peaks in this segment
fwhm_segment <- start_segment / resol_mz
# determine the breaks within this segment
breaks_segment <- seq(from = (start_segment + fwhm_segment), to = end_segment, by = 0.2 * fwhm_segment)
# add breaks for this segment to vector with all breaks
breaks_fwhm <- c(breaks_fwhm, seq(from = (start_segment + fwhm_segment), to = end_segment, by = 0.2 * fwhm_segment))
# get a vector of average m/z instead of start value
delta_mz <- breaks_segment[2] - breaks_segment[1]
avg_breaks_segment <- breaks_segment + 0.5 * delta_mz
breaks_fwhm_avg <- c(breaks_fwhm_avg, avg_breaks_segment)
}

# save breaks to file
save(breaks_fwhm, breaks_fwhm_avg, file = "breaks.fwhm.RData")
}
Binary file not shown.
Binary file added DIMS/tests/testthat/fixtures/test_trim_params.RData
Binary file not shown.
54 changes: 54 additions & 0 deletions DIMS/tests/testthat/test_generate_breaks.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# unit tests for GenerateBreaks
# functions: get_trim_parameters and get_breaks_for_bins

# source all functions for GenerateBreaks
source("../../preprocessing/generate_breaks_functions.R")

# test get_trim_parameters
testthat::test_that("trim parameters are correctly calculated", {
# create list of scan times to test on:
test_scantimes <- seq(0, 100, length = 168)
test_polarities <- c(rep("positive", 84), rep("negative", 84))
test_trim <- 0.1

# test that the function produces no output except trim_params.RData file
expect_silent(get_trim_parameters(test_scantimes, test_polarities, test_trim))
expect_true(file.exists("./trim_params.RData"))

# test the parameters in the output RData file
load("./trim_params.RData")
test_trim_left_pos <- trim_left_pos
test_trim_left_neg <- trim_left_neg
test_trim_right_pos <- trim_right_pos
test_trim_right_neg <- trim_right_neg
rm(trim_left_pos, trim_left_neg, trim_right_pos, trim_right_neg)

# load previously stored values from fixtures
load("fixtures/test_trim_params.RData")
expect_equal(test_trim_left_pos, trim_left_pos)
expect_equal(test_trim_left_neg, trim_left_neg)
expect_equal(test_trim_right_pos, trim_right_pos)
expect_equal(test_trim_right_neg, trim_right_neg)
})

# test get_breaks_for_bins
testthat::test_that("breaks are correctly calculated", {
# create list of scan times to test on:
test_mzrange <- c(300, 400)
test_resol <- 140000

# test that the function produces no output except breaks.fwhm.RData file
expect_silent(get_breaks_for_bins(test_mzrange, test_resol))
expect_true(file.exists("./breaks.fwhm.RData"))

# test the vectors in the output RData file
load("./breaks.fwhm.RData")
test_breaks_fwhm <- breaks_fwhm
test_breaks_fwhm_avg <- breaks_fwhm_avg
rm(breaks_fwhm, breaks_fwhm_avg)

# load breaks from fixtures and compare vectors
load("fixtures/breaks.fwhm.RData")
expect_equal(test_breaks_fwhm, breaks_fwhm)
expect_equal(test_breaks_fwhm_avg, breaks_fwhm_avg)
})
Loading