Skip to content

Commit 2562bc6

Browse files
committed
create SortGFF command line version
This tool did not implement JFrame elements in the script class so the CLI implementation did not need to create an src/window_interface/*/ SortGFFOutput class. There is a quirk with the BEDtoGFF tool that adds a semicolon to the end of the last column with the feature ID. To make this tool compatible with the GFF output, I split the last GFF column on the semicolon and took the first token as the ID. I also moved the parseCDT method originally in the window_interface/ Coordinate_Manipulation/*/Sort* classes to the util/CDTUtilities class. The parseCDT method is used to get the number of items in a row of the input CDT file and to check the formatting for consistent row sizes. Since both the SortBED and SortGFF tools use the same method, this commit merges them into the same copy that can be referenced from a general utilities class. This will make updates easier if this method needs to be adjusted in the future. util/CDTUtilities -move parseCDT method from SortGFFWindow class to the CDTUtilities class -add properties to the class to save information on consistent row sizes -add methods to retrieve these values NOTE: static methods of this class appear to be unused or copied into another class, consider retiring window_interface/*Window -adjust output argument when initializing script class to be full file basename and not just directory -lots of differences in this commit are a matter of entabbing -rename BED related variable names to be consistent with GFF variable naming -update CDT file loading to use CDTUtilities validation method -remove parseCDT() method since using CDTUtilities version script/* -use output as file basename rather than directory name -temporary fix using String's split() method on the semicolon cli/* -call appropriate script class -write up picocli parsing objects -write up validateInput() method to check inputs and write messages for the user -import CDTUtilities and parse CDT input to check consistent rows as a part of validateInput()
1 parent a09b9a0 commit 2562bc6

4 files changed

Lines changed: 181 additions & 82 deletions

File tree

src/cli/Coordinate_Manipulation/GFF_Manipulation/SortGFFCLI.java

Lines changed: 80 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,9 @@
1212
import java.io.FileNotFoundException;
1313

1414
import objects.ToolDescriptions;
15+
import util.CDTUtilities;
1516
import util.ExtensionFileFilter;
16-
//import scripts.Coordinate_Manipulation.GFF_Manipulation.SortGFF;
17+
import scripts.Coordinate_Manipulation.GFF_Manipulation.SortGFF;
1718

1819
/**
1920
Coordinate_ManipulationCLI/SortGFFCLI
@@ -25,6 +26,22 @@
2526
exitCodeOnExecutionException = 1)
2627
public class SortGFFCLI implements Callable<Integer> {
2728

29+
@Parameters( index = "0", description = "the GFF file to sort")
30+
private File gffFile;
31+
@Parameters( index = "1", description = "the reference CDT file to sort the input by")
32+
private File cdtFile;
33+
34+
@Option(names = {"-o", "--output"}, description = "specify output file basename with no .cdt/.gff/.jtv extension (default=<gffFile>_SORT")
35+
private String outputBasename = null;
36+
@Option(names = {"-c", "--center"}, description = "sort by center on the input size of expansion in bins (default=100)")
37+
private int center = -999;
38+
@Option(names = {"-x", "--index"}, description = "sort by index from the specified start to the specified stop (0-indexed and half-open interval)",
39+
arity = "2")
40+
private int[] index = {-999, -999};
41+
42+
private int CDT_SIZE = -999;
43+
private boolean byCenter = false;
44+
2845
@Override
2946
public Integer call() throws Exception {
3047
System.err.println( ">SortGFFCLI.call()" );
@@ -35,16 +52,73 @@ public Integer call() throws Exception {
3552
System.exit(1);
3653
}
3754

38-
//SEStats.getSEStats( output, bamFile, null );
55+
if( byCenter ){
56+
index[0] = (CDT_SIZE / 2) - (center / 2);
57+
index[1] = (CDT_SIZE / 2) + (center / 2);
58+
}
59+
60+
SortGFF.sortGFFbyCDT(outputBasename, gffFile, cdtFile, index[0], index[1]);
3961

40-
//System.err.println("Calculations Complete");
62+
System.err.println("Sort Complete");
4163
return(0);
4264
}
4365

4466
private String validateInput() throws IOException {
4567
String r = "";
46-
//validate input here
47-
//append messages to the user to `r`
68+
69+
//check inputs exist
70+
if(!gffFile.exists()){
71+
r += "(!)GFF file does not exist: " + gffFile.getName() + "\n";
72+
}
73+
if(!cdtFile.exists()){
74+
r += "(!)CDT file does not exist: " + cdtFile.getName() + "\n";
75+
}
76+
if(!"".equals(r)){ return(r); }
77+
//check input extensions
78+
if(!"gff".equals(ExtensionFileFilter.getExtension(gffFile))){
79+
r += "(!)Is this a GFF file? Check extension: " + gffFile.getName() + "\n";
80+
}
81+
if(!"cdt".equals(ExtensionFileFilter.getExtension(cdtFile))){
82+
r += "(!)Is this a CDT file? Check extension: " + cdtFile.getName() + "\n";
83+
}
84+
// validate CDT as file, with consistent row size, and save row_size value
85+
try {
86+
CDTUtilities cdt_obj = new CDTUtilities();
87+
cdt_obj.parseCDT(cdtFile);
88+
if( cdt_obj.isValid() ){ CDT_SIZE = cdt_obj.getSize(); }
89+
else{ r += "(!)CDT file doesn't have consistent row sizes. " + cdt_obj.getInvalidMessage(); }
90+
}catch (FileNotFoundException e1){ e1.printStackTrace(); }
91+
92+
//set default output filename
93+
if(outputBasename==null){
94+
outputBasename = ExtensionFileFilter.stripExtension(gffFile) + "_SORT";
95+
//check output filename is valid
96+
}else{
97+
//no extension check
98+
//check directory
99+
File BASEFILE = new File(outputBasename);
100+
if(BASEFILE.getParent()==null){
101+
// System.err.println("default to current directory");
102+
} else if(!new File(BASEFILE.getParent()).exists()){
103+
r += "(!)Check output directory exists: " + BASEFILE.getParent() + "\n";
104+
}
105+
}
106+
107+
// Set Center if Index not given
108+
if( index[0]==-999 && index[1]==-999 ) { byCenter = true; }
109+
// Center Specified
110+
if( byCenter ){
111+
if( center==-999 ){ center = 100; }
112+
else if( center<0 ){
113+
r += "(!)Invalid --center input, must be a positive integer value.";
114+
}
115+
// Index Specified
116+
}else{
117+
if( index[0]<0 || index[1]>CDT_SIZE || index[0]>index[1] ){
118+
r += "(!)Invalid --index value input, check that start>0, stop<CDT row size, and start<stop.";
119+
}
120+
}
121+
48122
return(r);
49123
}
50-
}
124+
}

src/scripts/Coordinate_Manipulation/GFF_Manipulation/SortGFF.java

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package scripts.Coordinate_Manipulation.GFF_Manipulation;
22

33
import java.io.File;
4+
import java.io.FileNotFoundException;
45
import java.io.IOException;
56
import java.io.PrintStream;
67
import java.util.ArrayList;
@@ -37,10 +38,10 @@ public static void sortGFFbyCDT(String outname, File gff, File cdt, int START_IN
3738
//Output sorted CDT File
3839
String newCDT = outname + ".cdt";
3940
PrintStream OUT = new PrintStream(newCDT);
40-
OUT.println(CDTHeader);
41-
for(int x = 0; x < SORT.size(); x++) {
42-
OUT.println(CDTFile.get(SORT.get(x).getName()));
43-
}
41+
OUT.println(CDTHeader);
42+
for(int x = 0; x < SORT.size(); x++) {
43+
OUT.println(CDTFile.get(SORT.get(x).getName()));
44+
}
4445
OUT.close();
4546
CDTFile = null; //Free up memory by getting CDT file out of memory
4647
JTVOutput.outputJTV(outname, "green");
@@ -50,18 +51,19 @@ public static void sortGFFbyCDT(String outname, File gff, File cdt, int START_IN
5051
scan = new Scanner(gff);
5152
while (scan.hasNextLine()) {
5253
String line = scan.nextLine();
53-
String ID = line.split("\t")[8];
54+
String ID = line.split("\t")[8].split(";")[0];
5455
if(!ID.contains("YORF") && !ID.contains("NAME")) {
5556
GFFFile.put(ID, line);
5657
}
5758
}
5859
scan.close();
5960
//Output sorted GFF File
60-
String newGFF = outname +".gff";
61-
OUT = new PrintStream(newGFF);
62-
for(int x = 0; x < SORT.size(); x++) {
63-
OUT.println(GFFFile.get(SORT.get(x).getName()));
64-
}
65-
OUT.close();
61+
String newGFF = outname +".gff";
62+
OUT = new PrintStream(newGFF);
63+
for(int x = 0; x < SORT.size(); x++) {
64+
OUT.println(GFFFile.get(SORT.get(x).getName()));
65+
}
66+
OUT.close();
6667
}
67-
}
68+
69+
}

src/util/CDTUtilities.java

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,44 @@
1111
import java.util.Vector;
1212

1313
public class CDTUtilities {
14+
15+
private File FILENAME;
16+
private int SIZE;
17+
private boolean consistentSize;
18+
private String invalidMessage;
19+
20+
// This function is almost exactly copied from window/*/SortBEDWindow & scripts/*/SortBED & scripts/*/SortGFF...good practice to merge at some point.
21+
public void parseCDT(File CDT) throws FileNotFoundException {
22+
FILENAME = CDT;
23+
SIZE = -999;
24+
consistentSize = true;
25+
invalidMessage = "";
26+
27+
Scanner scan = new Scanner(CDT);
28+
int currentRow = 1;
29+
while (scan.hasNextLine()) {
30+
String[] temp = scan.nextLine().split("\t");
31+
if(!temp[0].contains("YORF") && !temp[0].contains("NAME")) {
32+
int tempsize = temp.length - 2;
33+
if(SIZE == -999) { SIZE = tempsize; }
34+
else if(SIZE != tempsize) {
35+
invalidMessage = "Invalid Row at Index: " + currentRow;
36+
consistentSize = false;
37+
break;
38+
}
39+
currentRow++;
40+
}
41+
}
42+
scan.close();
43+
}
44+
45+
public boolean isValid(){ return consistentSize; }
46+
47+
public int getSize(){ return SIZE; }
48+
49+
public String getInvalidMessage(){ return invalidMessage; }
50+
51+
1452
public static Vector<double[]> loadCDT(File input) throws FileNotFoundException {
1553
Vector<double[]> matrix = new Vector<double[]>();
1654
Scanner scan = new Scanner(input);

src/window_interface/Coordinate_Manipulation/GFF_Manipulation/SortGFFWindow.java

Lines changed: 49 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
import javax.swing.SwingWorker;
3232
import javax.swing.border.EmptyBorder;
3333

34+
import util.CDTUtilities;
3435
import util.FileSelection;
3536
import scripts.Coordinate_Manipulation.GFF_Manipulation.SortGFF;
3637

@@ -74,42 +75,42 @@ public class SortGFFWindow extends JFrame implements ActionListener, PropertyCha
7475
private JLabel lblIndexStop;
7576

7677
class Task extends SwingWorker<Void, Void> {
77-
@Override
78-
public Void doInBackground() throws IOException {
79-
try {
80-
if(rdbtnSortbyCenter.isSelected() && Integer.parseInt(txtMid.getText()) > CDT_SIZE) {
81-
JOptionPane.showMessageDialog(null, "Sort Size is larger than CDT File!!!");
82-
} else if(rdbtnSortbyIndex.isSelected() && Integer.parseInt(txtStart.getText()) < 0) {
83-
JOptionPane.showMessageDialog(null, "Start Index is smaller than 0!!!");
84-
} else if(rdbtnSortbyIndex.isSelected() && Integer.parseInt(txtStop.getText()) > CDT_SIZE) {
85-
JOptionPane.showMessageDialog(null, "Start Index is smaller than 0!!!");
86-
} else {
87-
if(rdbtnSortbyCenter.isSelected()) {
88-
START_INDEX = (CDT_SIZE / 2) - (Integer.parseInt(txtMid.getText()) / 2);
89-
STOP_INDEX = (CDT_SIZE / 2) + (Integer.parseInt(txtMid.getText()) / 2);
90-
} else {
91-
START_INDEX = Integer.parseInt(txtStart.getText());
92-
STOP_INDEX = Integer.parseInt(txtStop.getText());
93-
}
94-
95-
String OUTPUT = txtOutput.getText();
96-
if(OUTPUT_PATH != null) { OUTPUT = OUTPUT_PATH.getCanonicalPath() + File.separator + txtOutput.getText(); }
97-
98-
setProgress(0);
99-
SortGFF.sortGFFbyCDT(OUTPUT, GFF_File, CDT_File, START_INDEX, STOP_INDEX);
78+
@Override
79+
public Void doInBackground() throws IOException {
80+
try {
81+
if(rdbtnSortbyCenter.isSelected() && Integer.parseInt(txtMid.getText()) > CDT_SIZE) {
82+
JOptionPane.showMessageDialog(null, "Sort Size is larger than CDT File!!!");
83+
} else if(rdbtnSortbyIndex.isSelected() && Integer.parseInt(txtStart.getText()) < 0) {
84+
JOptionPane.showMessageDialog(null, "Start Index is smaller than 0!!!");
85+
} else if(rdbtnSortbyIndex.isSelected() && Integer.parseInt(txtStop.getText()) > CDT_SIZE) {
86+
JOptionPane.showMessageDialog(null, "Stop Index is larger than CDT row size!!!");
87+
} else {
88+
if(rdbtnSortbyCenter.isSelected()) {
89+
START_INDEX = (CDT_SIZE / 2) - (Integer.parseInt(txtMid.getText()) / 2);
90+
STOP_INDEX = (CDT_SIZE / 2) + (Integer.parseInt(txtMid.getText()) / 2);
91+
} else {
92+
START_INDEX = Integer.parseInt(txtStart.getText());
93+
STOP_INDEX = Integer.parseInt(txtStop.getText());
94+
}
95+
96+
String OUTPUT = txtOutput.getText();
97+
if(OUTPUT_PATH != null) { OUTPUT = OUTPUT_PATH.getCanonicalPath() + File.separator + txtOutput.getText(); }
98+
99+
setProgress(0);
100+
SortGFF.sortGFFbyCDT(OUTPUT, GFF_File, CDT_File, START_INDEX, STOP_INDEX);
100101
setProgress(100);
101102
JOptionPane.showMessageDialog(null, "Sort Complete");
102-
}
103-
} catch(NumberFormatException nfe){
103+
}
104+
} catch(NumberFormatException nfe){
104105
JOptionPane.showMessageDialog(null, "Invalid Input in Fields!!!");
105106
}
106107
return null;
107-
}
108-
109-
public void done() {
110-
massXable(contentPane, true);
111-
setCursor(null); //turn off the wait cursor
112-
}
108+
}
109+
110+
public void done() {
111+
massXable(contentPane, true);
112+
setCursor(null); //turn off the wait cursor
113+
}
113114
}
114115

115116
public SortGFFWindow() {
@@ -191,7 +192,7 @@ public void actionPerformed(ActionEvent e) {
191192
contentPane.add(lblSizeOfExpansion);
192193

193194
txtOutput = new JTextField();
194-
sl_contentPane.putConstraint(SpringLayout.EAST, txtOutput, 0, SpringLayout.EAST, progressBar);
195+
sl_contentPane.putConstraint(SpringLayout.EAST, txtOutput, -15, SpringLayout.EAST, contentPane);
195196
txtOutput.setEnabled(false);
196197
contentPane.add(txtOutput);
197198
txtOutput.setColumns(10);
@@ -279,9 +280,9 @@ public void itemStateChanged(ItemEvent e) {
279280
sl_contentPane.putConstraint(SpringLayout.NORTH, btnLoadGFFFile, 10, SpringLayout.NORTH, contentPane);
280281
btnLoadGFFFile.addActionListener(new ActionListener() {
281282
public void actionPerformed(ActionEvent e) {
282-
File newBEDFile = FileSelection.getFile(fc,"gff");
283-
if(newBEDFile != null) {
284-
GFF_File = newBEDFile;
283+
File newGFFFile = FileSelection.getFile(fc,"gff");
284+
if(newGFFFile != null) {
285+
GFF_File = newGFFFile;
285286
lblGFFFile.setText(GFF_File.getName());
286287
txtOutput.setEnabled(true);
287288
String sortName = (GFF_File.getName()).substring(0, GFF_File.getName().length() - 4) + "_SORT";
@@ -300,11 +301,19 @@ public void actionPerformed(ActionEvent e) {
300301
public void actionPerformed(ActionEvent e) {
301302
File newCDTFile = FileSelection.getFile(fc,"cdt");
302303
if(newCDTFile != null) {
303-
CDT_File = newCDTFile;
304-
lblCDTFile.setText(CDT_File.getName());
305-
306-
try { CDT_VALID = parseCDTFile(CDT_File);
304+
try {
305+
CDT_File = newCDTFile;
306+
lblCDTFile.setText(CDT_File.getName());
307+
308+
CDTUtilities cdt_obj = new CDTUtilities();
309+
cdt_obj.parseCDT(CDT_File);
310+
CDT_SIZE = cdt_obj.getSize();
311+
CDT_VALID = cdt_obj.isValid();
312+
String message = cdt_obj.getInvalidMessage();
313+
System.err.println(CDT_File.getCanonicalPath() + ": " + message);
314+
if(!message.equals("")) { JOptionPane.showMessageDialog(null, message);}
307315
} catch (FileNotFoundException e1) { e1.printStackTrace(); }
316+
catch (IOException e2) { e2.printStackTrace(); }
308317

309318
if(CDT_VALID) {
310319
lblColumnCount.setText("Column Count: " + CDT_SIZE);
@@ -361,28 +370,4 @@ public void massXable(Container con, boolean status) {
361370
}
362371
}
363372
}
364-
365-
public boolean parseCDTFile(File CDT) throws FileNotFoundException {
366-
Scanner scan = new Scanner(CDT);
367-
int currentSize = -999;
368-
boolean consistentSize = true;
369-
int currentRow = 1;
370-
while (scan.hasNextLine()) {
371-
String[] temp = scan.nextLine().split("\t");
372-
if(!temp[0].contains("YORF") && !temp[0].contains("NAME")) {
373-
int tempsize = temp.length - 2;
374-
if(currentSize == -999) { currentSize = tempsize; }
375-
else if(currentSize != tempsize) {
376-
JOptionPane.showMessageDialog(null, "Invalid Row at Index: " + currentRow);
377-
consistentSize = false;
378-
scan.close();
379-
}
380-
currentRow++;
381-
}
382-
}
383-
scan.close();
384-
CDT_SIZE = currentSize;
385-
if(consistentSize) { return true; }
386-
else { return false; }
387-
}
388-
}
373+
}

0 commit comments

Comments
 (0)