-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathTtesting.R
More file actions
127 lines (96 loc) · 3.95 KB
/
Copy pathTtesting.R
File metadata and controls
127 lines (96 loc) · 3.95 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
#################################
# Load data
#################################
# Imputed dataset
metlData <- read.csv("~/Roselab/Metabolite/data/data_for_analysis/metabolite_data_74.csv", row.names = "ID")
# Human metadata
metaData <- read.csv("~/Roselab/Metabolite/data/data_for_analysis/meta_data_74.csv")
# Metabolite metadata
metabolite_meta_data <- read.csv("~/Roselab/Metabolite/data/data_for_analysis/metabolite_metadata.csv")
# Metabolites to remove
metabolites_to_remove <- scan(
"~/Roselab/Metabolite/data/data_for_analysis/metabolites_to_remove.txt",
what = character(),
sep = "\n",
quiet= TRUE
)
#################################
# Function
#################################
process_results <- function(results_df, metabolite_meta_data) {
# Split row names (assumed to be "Index|Compound")
split_names <- strsplit(rownames(results_df), "\\|")
index <- sapply(split_names, `[`, 1)
compound <- sapply(split_names, function(x) paste(x[-1], collapse="|"))
results_df <- cbind(Index = index, Compound = compound, results_df)
# Merge with metabolite metadata (assuming metabolite_meta_data has a column named "Index")
merged_df <- merge(metabolite_meta_data, results_df, by = "Index", all.y = TRUE)
return(merged_df)
}
#################################
# T testing
# TODO: Need to add the main testing into a larger loop to test all time points
#################################
intra_ttest_results <- list()
timepoints_of_interest_ttest <- c("D5", "F3", "F6")
meta_tp <- metaSubset[metaSubset$Time == "F6", ]
metlData_tp <- metlData_log2_filtered[, meta_tp$Sample_ID, drop = FALSE]
group_vec <- factor(as.character(meta_tp$Arm), levels = c("NIF", "IF"))
# Extract metabolite values (row 1) and make it numeric
met_values <- as.numeric(metlData_tp[1, ])
names(met_values) <- colnames(metlData_tp) # keep sample IDs
# Split into two groups manually
group1 <- met_values[group_vec == "NIF"]
group2 <- met_values[group_vec == "IF"]
# Run the unpaired t-test
tt <- t.test(group1, group2, paired = FALSE)
# This is the prefered way to do a t.test, but because some metabolites cause a failure.,
# The loop helps identify which metabolites are causing the problems.
# pvals <- sapply(1:nrow(metlData_tp), function(i) {
# t.test( metlData_tp[i, group_vec=="NIF"],
# metlData_tp[i, group_vec=="IF"],
# paired = FALSE )$p.value
# })
results <- data.frame(
Metabolite = character(),
t.stat = numeric(),
p.value = numeric(),
stringsAsFactors = FALSE
)
# loop over each metabolite (row)
for (i in seq_len(nrow(metlData_tp))) {
met_name <- rownames(metlData_tp)[i]
# Skip if in the removal list
if (met_name %in% metabolites_to_remove) {
cat("Skipping:", met_name, "(in removal list)\n")
next
}
group1 <- metlData_tp[i, group_vec == "NIF"]
group2 <- metlData_tp[i, group_vec == "IF"]
# print the metabolite name
cat("----- Testing:", met_name, "-----\n")
# print the raw values
cat(" NIF:", paste0(round(group1, 3), collapse = ", "), "\n")
cat(" IF:", paste0(round(group2, 3), collapse = ", "), "\n")
# run the t-test
tt <- t.test(group1, group2, paired = FALSE)
cat(sprintf(" t = %.3f, p = %.4g\n\n", tt$statistic, tt$p.value))
# append to results
results <- rbind(
results,
data.frame(
Metabolite = met_name,
t.stat = unname(tt$statistic),
p.value = tt$p.value,
stringsAsFactors = FALSE
#TODO: Need to use the process_results function here to add the results back to metabolite metadata
#TODO: results should be saved in appropriate folder, depending on loop iteration
)
)
}
# TODO: FDR‐corrected column - Should test different methods, but nothing seems significant in terms
# of adjusted p-value.
results$FDR <- p.adjust(results$p.value, method = "fdr")
#################################
# TODO: Need to add the a within arm testing (e.g. D5 vs F3; another loop section - see Statistical_testing.R )
#################################