-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathsplit_ragoo.R
More file actions
executable file
·58 lines (37 loc) · 1.15 KB
/
split_ragoo.R
File metadata and controls
executable file
·58 lines (37 loc) · 1.15 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
#!/usr/bin/Rscript
args <- commandArgs(TRUE)
if (length(args) == 0L || any(c('-h', '--help') %in% args)) {
message('usage: path/to/split_ragoo.R input output
input Ragoo output.
output Name of outfile
-h, --help to print help messages')
q('no')
}
library(Biostrings)
library(tidyverse)
ragoo <- readDNAStringSet(args[1])
t <- Biostrings::strsplit(x = ragoo, split = "N")
chr1_split <- t$Chr0_RaGOO
chr_rest <- ragoo[-1]
chr_all <- c(chr_rest, chr1_split)
names(chr_all) <- seq(1, length(chr_all))
sort_df <- data.frame(names = names(chr_all), width = width(chr_all)) %>% arrange(-width)
chr_sort <- chr_all[sort_df$names]
names(chr_sort) <- paste0('node_', seq(1, length(chr_sort)), "_length_", width(chr_sort))
### remove sequences with only Ns
all_n_seq <- sapply(seq(1, length(chr_sort)), function(y){
x <- chr_sort[y]
t <- alphabetFrequency(x)
if(t[,'N'] == width(x)){
return(y)
} else {
return(NA)
}
})
all_ns <- na.omit(all_n_seq)
if(length(all_ns) >0){
chr_final <- chr_sort[-all_ns]
} else {
chr_final <- chr_sort
}
writeXStringSet(chr_final, args[2])