-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtokenize.cpp
More file actions
64 lines (61 loc) · 1.94 KB
/
tokenize.cpp
File metadata and controls
64 lines (61 loc) · 1.94 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
#include <iostream>
#include <fstream>
#include <vector>
#include <string>
#include <algorithm>
#include <iterator>
using namespace std::literals;
int main(){
std::vector<std::string> words;
std::ifstream ifs("words.txt");
std::string word;
while(ifs >> word){
int left = 0, right = 0;
bool leftFound = false;
for(int i = 0; i < word.size(); i++){
if((word[i] >= 'a' && word[i] <= 'z') || (word[i] >= 'A' && word[i] <= 'Z')){
if(!leftFound){
leftFound = true;
left = i;
}
right = i;
if(word[i] >= 'A' && word[i] <= 'Z'){
word[i] -= ('A' - 'a'); // 全部变成小写
}
}
}
if(!leftFound) continue;
word = word.substr(left, right-left+1);
bool isValid = true;
for(int i = 0; i < word.size(); i++){
if(word[i] < 'a' || word[i] > 'z'){
isValid = false;
}
}
if(isValid) words.push_back(word);
}
std::sort(words.begin(), words.end());
auto last = std::unique(words.begin(), words.end());
words.erase(last, words.end());
// for(const auto& word: words){
// std::cout << word << std::endl;
// }
std::cout << words.size() << std::endl;
std::ofstream ofs("words/all.txt", std::ios::ate);
std::copy(words.begin(), words.end(), std::ostream_iterator<std::string>(ofs, "\n"));
auto begin = words.begin();
for(int i = 0; i < 26; i++){
char c = 'a' + i;
std::string fileName = "words/"s + c + ".txt"s;
std::ofstream ofs(fileName, std::ios::ate);
auto iter = begin;
for(; iter < words.end(); ++iter){
if((*iter)[0] != c){
break;
}
}
std::copy(begin, iter, std::ostream_iterator<std::string>(ofs, "\n"));
begin = iter;
}
return 0;
}