-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathMakeBoxplot.R
More file actions
320 lines (306 loc) · 14.8 KB
/
MakeBoxplot.R
File metadata and controls
320 lines (306 loc) · 14.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
#'MakeBoxplot.R
#'Makes a boxplot from a set of flat file data,
#'created by the flat data making utility (name TBD)
MakeBoxplot <- function(input, out.file, args){
#Read in flat data as a data frame
#input = "/home/cew/Code/testing/test_flat_out_i.csv"
print(paste("Reading from", input))
in.data = read.csv(input, stringsAsFactors = FALSE)
nboxes = dim(in.data)[1]
#in.data <- in.data[order(in.data$Group, in.data$Order), ]
#sort by position
in.data <- in.data[order(in.data$Xpos),]
#get positioning data
at.vec <- in.data$Xpos
in.data$Min <- in.data$Min
in.data$Max <- in.data$Max
box.col = as.character(in.data$X11_color)
if(args$ylab=='na'){
box.y = unique(in.data$units)
box.y = box.y[!(box.y%in%c('missing'))][1]
}else{
box.y=args$ylab
}
if(args$title=='na'){
in.main <- paste("Boxplot derived from", input.params$input)
}else{
in.main <- args$title
}
#Calculate modified statistics
box.stats <- gfdl_bxp_stats(in.data)
#Code no longer used for maually calculating limits
ymax = max(in.data$Max)
if(!is.null(args$xlim)){
if(max(args$xlim) < max(in.data$Xpos)){
message("Warning: Max of xlim less than highest",
"position value. Using highest position instead.")
args$xlim[which.max(args$xlim)] <- max(in.data$Xpos)
}
}
#Command-line argument margin setting
#format is bottom,left,top,right
if (args$in_margins[1] != 'na'){
mai.set <- args$in_margins
print("Margin settings in inches:")
print(mai.set)
}else if (args$pixel_margins[1] != 'na'){
mai.set <- args$pixel_margins / 128
print("Margin settings in pixels:")
print(args$pixel_margins)
print("Please note: Assumed DPI is 128")
}else{
mai.set <- c(1.5,1.5,0.75,0.5)
print("Margin settings in inches:")
print(mai.set)
}
#set the dimensions of the output image (plot + mar), if needed
if(args$xdim == 'na' && args$ydim == 'na'){
print('setting x and y dimensions of output image')
#number of box spaces plus the x margins, L and R
args$xdim = (max(at.vec)/2) + mai.set[c(2,4)]
#length of y range of plot, plus y margins, bottom and top
args$ydim = (6) + (mai.set[c(1,3)])
}
#set some parameters related to the whisker and staple line width
box.wlwd = par()[['lwd']]*2
###############
###Start setting plotting parameters
#image.width = 6.5
#image.height = 8
#margin.height = 0
#legend.cex = 0.97
#out.file <- paste0(out.file, ".svg")
print(paste("Writing to", out.file))
print(paste0("Dimensions of output image, in inches: ", args$xdim, ",", args$ydim))
svg(out.file, onefile=T,
width=args$xdim, height=args$ydim,
pointsize=10)
####Note: if you don't include the par command WHILE SVG IS TURNED ON
####then svg resets the parameters when it calls itself.
par(mai=mai.set, cex.main=1.2, cex.lab=1)
#cex.lab affects the y, but not x, labels
gfdl_bxp(box.stats, #xlab=box.x,
#ylab=box.y,
main=in.main,
ylim=args$ylim,
xlim=args$xlim,
horizontal = args$plot_horizontal,
#names=box.names,
show.names=FALSE,
pars=list(boxfill=box.col,
mai=mai.set,
#mar=mar.set,
whisklwd = box.wlwd, whisklty = 'dotted', #solid
staplelwd=box.wlwd,
boxlwd = box.wlwd),
at=at.vec,
las=2) #mai=mai.set
if (args$plot_horizontal){
axis(2, at=at.vec, tick=TRUE, labels=in.data$Label,
cex.axis=args$x_axis_scale, las=2)
axis(3, labels=FALSE, tick=TRUE)
#Add x label
mtext_x = (nchar(ymax)/2 + 1)
mtext(box.y, side=1, line=mtext_x)
if (args$show_subtitle){
mtext(side=3, "From left to right: min,5p,25p,50p,75p,95p,max *AVG", line=3)
}
}else{
axis(1, at=at.vec, tick=TRUE, labels=in.data$Label,
cex.axis=args$x_axis_scale, las=2)
axis(4, labels=FALSE, tick=TRUE)
#Add y label
mtext_y = (nchar(ymax)/2 + 1)
mtext(box.y, side=2, line=mtext_y)
if (args$show_subtitle){
mtext(side=3, "From bottom to top: min,5p,25p,50p,75p,95p,max *AVG", line=3)
}
}
dev.off() #setting for single file
if(file.exists("Rplots.pdf")){
out.status = file.remove("Rplots.pdf") #removes the empty Rplots.pdf in the current working dir
}
}
################################################################################
##Generic testing code
base.path <- Sys.getenv("S8_PATH")
if(base.path==""){
#If S8_tools not set, assume that you are working from the correct directory
base.path <- getwd()
}
#Note: if you use this strucutre, it is VITAL htat you do not include ANY scripts in the directories that
#you are sourcing. Those scripts will run, and you will be frustrated.
out <- source(paste0(base.path,'/s8_utils/LoadRPackages.R'))
out <- sapply(list.files(pattern="[.]R$", path=paste(base.path,'/FudgeIO/',sep=''), full.names=TRUE), source);
out <- sapply(list.files(pattern="[.]R$", path=paste0(base.path,'/s8_utils/'), full.names=TRUE), source, .GlobalEnv);
out <- sapply(list.files(pattern="[.]R$", path=paste0(base.path,'/shared_code/'), full.names=TRUE), source, .GlobalEnv);
source(paste0(base.path,"/s8_utils/gfdl_bplt.R"))
###Main method of the script
args <- as.list(commandArgs(trailingOnly = TRUE))
ParseBoxArgs <- function(arg.list){
option_list <- list(
#Input and output options first: input, output, timestamp, overlay
make_option(c("-i", "--input"), action="store", default='na',
dest='input',
help=paste("Input to the script in the form of a single",
".csv file.")),
make_option(c("-o", "--output"), action="store", default="",
help=paste("Image output from the script; will be in the form",
"of a .svg file")),
make_option(c("--title"), action="store", default='na',
help=paste("The title for the output plot. If absent,",
"title will be derived from the input file.")),
make_option(c("--ylab"), action="store", default='na',
help=paste("The label for the axis of the plot along which the",
"boxplots extend (usually the y-axis). If not",
"set, y-label will be derived from the units",
"in the input file.")),
make_option(c("--ylim"), action="store", default='na',
help=paste("The limits of the y-axis, set as a comma-separated",
"string ('10,20').",
"If not set, R will set limits from input data.")),
make_option(c("--xlim"), action="store", default='na',
help=paste("The limits of the x-axis, set as a comma-separated",
"string ('0,8'), and used with the position data",
"in the input file. If upper limit is higher than the",
"highest position in input file, it will be ignored.",
"Please note that for normal xlim setting, the first value is 0.")),
make_option(c("--pixel_dim"), action="store", default='na',
help=paste("The dimensions of the output image, in pixels,",
"of the form '${xdim}x${ydim}'",
"Assumed output is 128 dpi; this may not be the",
"case for all systems, but seems to work for now.",
"Script will throw an error if set along with --in_dim;",
"If not set and no other dimension param is set,",
"the script deduces the likely dimensions of the plot.")),
make_option(c("--in_dim"), action="store", default='na',
help=paste("The dimensions of the output image, in inches,",
"of the form 'xinches,yinches'",
"This feeds directly into the display dimensions for",
"R's svg graphics device.",
"Script will throw an error if set along with --pixel_dim;",
"If not set and no other dimension param is set,",
"the script deduces the likely dimensions of the plot.")),
make_option(c("--pixel_margins"), action='store', default='na',
help=paste("The margins of the image, in pixels, in the form",
"bottom,left,top,right (4 comma-separated vals).",
"Assumed output is 128 dpi; if set along with",
"--in_margins, the script throws an error.",
"If neither margins option is set, script uses",
"defaults (in pixels) of 192,192,96,64")),
make_option(c("--in_margins"), action='store', default='na',
help=paste("The margins of the image, in inches, in the form",
"bottom,left,top,right (4 comma-separated values).",
"If set along with --pixel_margins, the script throws",
"an error.",
"If not set, uses defaults (in inches) of",
"1.5,1.5,0.75,0.5")),
make_option(c("--x_axis_scale"), action='store', default=1,
help=paste("The scaling to use for the labels on the axis",
"along which the bins lie/are spaced",
"(usually the x-axis)",
"as a percentage of the default font size.",
"Default is 1; 0.5 gives half the font size,",
"and 2 gives twice the font size.")),
make_option(c("--plot_horizontal"), action='store_true', default=FALSE,
help=paste("Whether to plot the boxplots horizontally,",
"rather than vertically. Note that regardless of",
"orientation, x_axis_scale refers to the axis",
"along which the bins lie, while ylab refers to the",
"axis along which the box plots extend (in units of",
"the original climate var).")),
make_option(c("--show_subtitle"), action='store_true', default=FALSE,
help=paste("Whether to plot a key describing the parts of the",
"boxplot beneath the title. Going from top to bottom",
"(and excluding the *), we have the max, 95th, 75th,",
"50th, 25th and 5th percentiles and min; the * is",
"the average. Defaults to FALSE.")),
make_option(c("--verbose"), action="store_true", default=FALSE,
help=paste("Whether to print extra status messages to facilitate debugging.",
"defaults to F, and takes a boolean as an argument"))
)
description = paste('Given a file of comma-separated data as produced by CSVBoxStatsScraper.py,',
'creates a series of boxplots from the results and saves the result as a .svg image.',
"These boxplots have hollow circles for the min and the",
"max, whiskers for the 5th and 95th percentiles, a box",
"over the 25th to 75th percentiles, an asterisk for the",
"mean and a bar for the median.")
epilouge = "Please note: flags may be specified in any order, and '=' not required to specify strings."
usage = paste("usage: %prog -i input -o output [--title title] [--ylab ylab]",
"[--ylim '10,20'] [--xlim '0,8']",
"[--pixel_dim '1024x800' | --in_dim '5,7' ]",
"[--pixel_margins 128,128,96,64 | --in_margins 1,1,0.7,0.5]",
"[--x_axis_scale 0.5]",
"[--plot_horizontal] [--show_subtitle]",
"[--verbose] [-h --help]")
return(parse_args(OptionParser(option_list=option_list, usage=usage, description = description, epilogue=epilouge)))
}
if(length(args) > 0){
input.params <- ParseBoxArgs(args)
#parsed.args <- CheckStatArgs(parsed.args)
}else{
#presumably calling from inside R; use default files
#create input list from specified vars
arg.list <- list()
input.params <- ParseBoxArgs(arg.list)
input.params$output="/home/cew/Pictures/boxplot_ii.svg"
input.params$input = "/home/cew/Code/section_8_tools/dev/modified_scraper_example.csv"
input.params$title = "Degree Days by dataset" #'na'
input.params$ylab = "Total Degree Days Farenheight"
input.params$ylim = 'na' #'2000,4000'
input.params$xlim = 'na'#"-1,5"
input.params$in_dim = "8,5" #"9,12"
input.params$pixel_dim = 'na'
input.params$in_margins = "na"
input.params$pixel_margins = "na"
input.params$x_axis_scale = 1
input.params$plot_horizontal = F
input.params$show_subtitle = T
input.params$verbose=TRUE
}
#Check for the correct suffix on output
if(!grepl("\\.svg", input.params$output)){
stop(paste("Error in MakeBoxplot: output should have a .svg suffix!"))
}
if(!file.exists(dirname(input.params$output))){
stop(paste("Error in MakeBoxplot: output dir",
dirname(input.params$output), 'does not exist!'))
}
#Check for y-limits; apply
if(!input.params$ylim=='na'){
ylim = strsplit(sub(" ", "", input.params$ylim), ",")
input.params$ylim <- as.numeric(c(unlist(ylim)))
}else{
input.params$ylim <- NULL
}
if(!input.params$xlim=='na'){
xlim = strsplit(sub(" ", "", input.params$xlim), ",")
input.params$xlim <- as.numeric(c(unlist(xlim)))
}else{
input.params$xlim <- NULL
}
#Adjust input dimensions, if applicable
if(input.params$in_dim=='na' && input.params$pixel_dim=='na'){
input.params[c('xdim', 'ydim')] = 'na'
}else if(input.params$in_dim!='na' && input.params$pixel_dim!='na'){
stop(paste("Error in MakeBoxplot: only one dimension parameter",
"should be specified!"))
}else if(input.params$pixel_dim!='na'){
pix_to_in = unlist(strsplit(input.params$pixel_dim, 'x'))
input.params[c('xdim', 'ydim')] <- as.numeric(pix_to_in)/128
}else{
in_dim = unlist(strsplit(sub(" ", "", input.params$in_dim), ','))
input.params[c('xdim', 'ydim')] = as.numeric(in_dim)
}
#Check margins (adjustment is within script)
if (input.params$in_margins != 'na' & input.params$pixel_margins != 'na'){
stop(paste("Error in MakeBoxplot: Only one of in_margins and",
"pixel_margins should be specified!"))
}else if (input.params$in_margins != 'na'){
input.params$in_margins <- sapply(unlist(strsplit(input.params$in_margins,
split=",")), 'as.numeric')
}else if (input.params$pixel_margins != 'na'){
input.params$pixel_margins <- sapply(unlist(strsplit(input.params$pixel_margins,
split=",")), 'as.numeric')
}
MakeBoxplot(input.params$input, input.params$output, input.params)