-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathplot_map_sizes.R
More file actions
executable file
·35 lines (33 loc) · 1.47 KB
/
plot_map_sizes.R
File metadata and controls
executable file
·35 lines (33 loc) · 1.47 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
#!/usr/bin/env Rscript
filesizes <- read.table("filesizes", header=FALSE, sep='\t')
filesizes$name <- filesizes[,1]
filesizes$read1 <- filesizes[,2]
filesizes$read2 <- filesizes[,3]
filesizes$mapped <- filesizes[,4]
quantiles <- quantile(filesizes$mapped)
lowerq <- quantiles[2]
upperq <- quantiles[4]
iqr <- IQR(filesizes$mapped)
mild.threshold.upper <- (iqr * 1.5) + upperq
mild.threshold.lower <- lowerq - (iqr * 1.5)
extreme.threshold.upper <- (iqr * 3) + upperq
extreme.threshold.lower <- lowerq - (iqr * 3)
outliers.minor <- filesizes$name[filesizes$mapped > mild.threshold.upper | filesizes$mapped < mild.threshold.lower]
outliers.extreme <- filesizes$name[filesizes$mapped > extreme.threshold.upper | filesizes$mapped < extreme.threshold.lower]
cat("Minor Outliers:", sprintf("%s", outliers.minor), "\n", sep=" ")
cat("Extreme Outliers:", sprintf("%s", outliers.extreme), "\n", sep=" ")
library(scatterplot3d)
png("3D_Filesizes.png", width=1000, height=1000)
scatterplot3d(filesizes$read1, filesizes$read2, filesizes$mapped, main="Filesizes")
filesizes$pe <- filesizes$read1 + filesizes$read2
png("2D_Filesizes.png", width=1000, height=1000)
plot(filesizes$pe, filesizes$mapped)
A = log2(filesizes$pe)
B = log2(filesizes$pe)
model <- lm(A ~ B)
filesizes$residuals <- residuals(model)
outliers.threshold.regression <- 0.4
outliers.regression <- filesizes$name[filesizes$residuals > outliers.threshold.regression]
png("2D_log2_filesizes.png", width=1000, height=1000)
plot(A ~ B)
abline(model)