-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.cpp
More file actions
151 lines (130 loc) · 4.14 KB
/
main.cpp
File metadata and controls
151 lines (130 loc) · 4.14 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
#include <sstream>
#include <fstream>
#include <stdio.h>
#include <iostream>
#include <string>
using namespace std;
int count_string_occurance(const string &source, const char &target) {
int count = 0;
for (char c : source) {
if (c == target) {
count++;
}
}
return count;
}
int count_cg(const string &source) {
int count = 0;
bool prev_c = false;
for (char c : source) {
if (c == 'C') {
prev_c = true;
} else if (prev_c && c == 'G') {
count++;
prev_c = false;
} else {
prev_c = false;
}
}
return count;
}
double cpg_percentage(int &c, int &g, int &cg, int &size) {
return (cg / (double)(c * g)) * size;
}
string slurp(const string &filename) {
ifstream in(filename, ifstream::in);
stringstream sstr;
sstr << in.rdbuf();
string file = sstr.str();
return file;
}
int main(int argc, char *argv[]) {
int c;
string filename;
bool fexists = false;
bool showtime = false;
if (argc > 1) {
for (c = 1; c < argc; c++) {
if (strcmp(argv[c], "-f") == 0) {
fexists = true;
filename = string(argv[c + 1]);
cout << "Running analysis with file input " + filename + "\n";
}
if (strcmp(argv[c], "-t") == 0) {
showtime = true;
}
}
}
if (fexists == false) {
cout << "Please provide a filename with the -f parameter";
}
clock_t begin_read = clock();
string file_to_analyze = slurp(filename);
clock_t end_read = clock();
double elapsed_secs_read = double(end_read - begin_read) / CLOCKS_PER_SEC;
// This approach will loop through the string 4 times.
// This sacrifices speed for readibility
// Compiler and other optimizations will get us close enough
clock_t begin_gcount = clock();
int gcount = count_string_occurance(file_to_analyze, 'G');
clock_t end_gcount = clock();
double elapsed_secs_gcount =
double(end_gcount - begin_gcount) / CLOCKS_PER_SEC;
clock_t begin_acount = clock();
int acount = count_string_occurance(file_to_analyze, 'A');
clock_t end_acount = clock();
double elapsed_secs_acount =
double(end_acount - begin_acount) / CLOCKS_PER_SEC;
clock_t begin_ccount = clock();
int ccount = count_string_occurance(file_to_analyze, 'C');
clock_t end_ccount = clock();
double elapsed_secs_ccount =
double(end_ccount - begin_ccount) / CLOCKS_PER_SEC;
clock_t begin_tcount = clock();
int tcount = count_string_occurance(file_to_analyze, 'T');
clock_t end_tcount = clock();
double elapsed_secs_tcount =
double(end_tcount - begin_tcount) / CLOCKS_PER_SEC;
double total = (double)file_to_analyze.size();
printf("Adenine (A) Count (Percentage): %i (%f%%) \n", acount,
(acount / total) * 100);
printf("Thymine (T) Count (Percentage): %i (%f%%) \n", tcount,
(tcount / total) * 100);
printf("Cytosine (C) Count (Percentage): %i (%f%%) \n", ccount,
(ccount / total) * 100);
printf("Guanine (G) Count (Percentage): %i (%f%%) \n", gcount,
(gcount / total) * 100);
if (showtime) {
printf("Read Time: %f \n"
"A Count Time: %f \n"
"T Count Time: %f \n"
"C Count Time: %f \n"
"G Count Time: %f \n",
elapsed_secs_read, elapsed_secs_acount, elapsed_secs_tcount,
elapsed_secs_ccount, elapsed_secs_gcount);
}
string string_to_analyze;
int window = 400;
int cg_count = 0;
int c_count = 0;
int g_count = 0;
double cg_percentage;
double occurance;
for (int i = 0; i < file_to_analyze.size(); i++) {
string_to_analyze = file_to_analyze.substr(i, window);
// printf("%s",string_to_analyze.c_str());
c_count = count_string_occurance(string_to_analyze, 'C');
g_count = count_string_occurance(string_to_analyze, 'G');
cg_percentage = (c_count + g_count) / (double)window;
if (cg_percentage > 0.5) {
// printf("String has %f%% CG - Greater than 50%%!\n", cg_percentage);
cg_count = count_cg(string_to_analyze);
occurance = cpg_percentage(c_count, g_count, cg_count, window);
if (occurance > 0.6) {
printf("Found possible CpG island from character %i to %i \n", i,
(i + window));
}
}
}
return 0;
}