-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathStringScanner.hpp
More file actions
88 lines (72 loc) · 2.7 KB
/
StringScanner.hpp
File metadata and controls
88 lines (72 loc) · 2.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
#pragma once
#include <vector>
#include <string>
#include <cstdint>
#include <algorithm>
#include <unordered_map>
class StringScanner {
public:
//The string scanner uses the boyer horspoll algorithm
static std::vector<uintptr_t> SearchBMH(const uint8_t* data, size_t dataSize,
const std::string& pattern, bool caseSensitive = true) {
std::vector<uintptr_t> results;
if (pattern.empty() || dataSize < pattern.length()) {
return results;
}
std::string searchPattern = pattern;
if (!caseSensitive) {
std::transform(searchPattern.begin(), searchPattern.end(),
searchPattern.begin(), ::tolower);
}
size_t patternLen = searchPattern.length();
size_t dataLen = dataSize;
//preprocesser
std::unordered_map<char, size_t> badCharShift;
for (size_t i = 0; i < patternLen - 1; ++i) {
badCharShift[searchPattern[i]] = patternLen - i - 1;
}
size_t i = 0;
while (i <= dataLen - patternLen) {
int j = static_cast<int>(patternLen) - 1;
while (j >= 0) {
char dataChar = static_cast<char>(data[i + j]);
if (!caseSensitive) {
dataChar = static_cast<char>(::tolower(dataChar));
}
if (dataChar != searchPattern[j]) {
break;
}
j--;
}
if (j < 0) {
// matched
results.push_back(i);
i += patternLen;
}
else {
char dataChar = static_cast<char>(data[i + j]);
if (!caseSensitive) {
dataChar = static_cast<char>(::tolower(dataChar));
}
auto it = badCharShift.find(dataChar);
size_t shift = (it != badCharShift.end()) ? it->second : patternLen;
i += std::max<size_t>(1, shift - (patternLen - 1 - j));
}
}
return results;
}
//multi search
static std::vector<std::pair<std::string, uintptr_t>>
SearchMultiple(const uint8_t* data, size_t dataSize,
const std::vector<std::string>& patterns,
bool caseSensitive = true) {
std::vector<std::pair<std::string, uintptr_t>> results;
for (const auto& pattern : patterns) {
auto patternResults = SearchBMH(data, dataSize, pattern, caseSensitive);
for (auto offset : patternResults) {
results.emplace_back(pattern, offset);
}
}
return results;
}
};