-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathstring_search.cpp
More file actions
85 lines (70 loc) · 2.48 KB
/
string_search.cpp
File metadata and controls
85 lines (70 loc) · 2.48 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
#include "string_search.h"
#include <iostream>
#include <chrono>
#ifdef __x86_64__
#include <immintrin.h>
#define USE_X86_SIMD 1
#else
#define USE_X86_SIMD 0
#endif
int simd_string_search(const std::string& text, const std::string& pattern) {
int count = 0;
size_t text_len = text.length();
size_t pattern_len = pattern.length();
if (pattern_len == 0 || pattern_len > text_len) {
return 0;
}
const char first_char = pattern[0];
size_t i = 0;
#if USE_X86_SIMD
// x86-64 optimized path using SSE2
__m128i first_char_vec = _mm_set1_epi8(first_char);
for (; i + 16 <= text_len - pattern_len + 1; i += 16) {
__m128i text_chunk = _mm_loadu_si128(reinterpret_cast<const __m128i*>(text.data() + i));
__m128i cmp = _mm_cmpeq_epi8(text_chunk, first_char_vec);
int mask = _mm_movemask_epi8(cmp);
// Check each potential match
for (int bit = 0; bit < 16 && i + bit <= text_len - pattern_len; bit++) {
if (mask & (1 << bit)) {
bool match = true;
for (size_t j = 1; j < pattern_len; j++) {
if (text[i + bit + j] != pattern[j]) {
match = false;
break;
}
}
if (match) count++;
}
}
}
#endif
// Handle remaining characters (or all on non-x86)
for (; i <= text_len - pattern_len; i++) {
bool match = true;
for (size_t j = 0; j < pattern_len; j++) {
if (text[i + j] != pattern[j]) {
match = false;
break;
}
}
if (match) count++;
}
return count;
}
void benchmark_string_ops() {
std::cout << "\n=== String Search Benchmark ===" << std::endl;
// Create a large text
std::string text;
for (int i = 0; i < 100000; i++) {
text += "The quick brown fox jumps over the lazy dog. ";
}
std::string pattern = "fox";
auto start = std::chrono::high_resolution_clock::now();
int count = simd_string_search(text, pattern);
auto end = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end - start);
std::cout << "Text size: " << text.length() << " characters" << std::endl;
std::cout << "Pattern: \"" << pattern << "\"" << std::endl;
std::cout << "Occurrences found: " << count << std::endl;
std::cout << "Time: " << duration.count() << " ms" << std::endl;
}