-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathNearDuplicates.java
More file actions
27 lines (24 loc) · 852 Bytes
/
NearDuplicates.java
File metadata and controls
27 lines (24 loc) · 852 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
import java.io.IOException;
import java.util.ArrayList;
public class NearDuplicates {
public static void main(String[] args) throws IOException {
if(args.length == 5) {
String folder = args[0];
int permutations = Integer.parseInt(args[1]);
int bands= Integer.parseInt(args[2]);
double delta = Double.parseDouble(args[3]);
String docName = args[4];
MinHash min = new MinHash(folder, permutations);
int[][] matrix = min.minHashMatrix();
String[] names = min.allDocs();
LSH lsh = new LSH(matrix, names, bands);
ArrayList<String> result = lsh.nearDuplicatesOf(docName);
result = lsh.falsePositives(result, docName, delta);
for(String s : result) {
System.out.println(s);
}
} else {
System.out.println("INVALID ARGUMENTS - Arguments Order: Folder, Permutations, Bands, Threshold, Document");
}
}
}