-
Notifications
You must be signed in to change notification settings - Fork 38
Expand file tree
/
Copy pathstring_parser.hpp
More file actions
242 lines (185 loc) · 7.4 KB
/
string_parser.hpp
File metadata and controls
242 lines (185 loc) · 7.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
#pragma once
#ifndef STRING_PARSER_HPP
#define STRING_PARSER_HPP
#include <string>
#include <vector>
#include <optional>
#include <cctype>
#include <algorithm>
#include <limits>
#include <stdexcept>
#include <iostream>
#include "strings_util.hpp"
#include "string_filters.hpp"
namespace parser {
/**
* A simple set of string parser and tokenizer classes that can work with various
* string types and extract typed values from them without unnecessary reallocations.
*/
class token {
public:
explicit token(std::optional<strings::ci_string> value) noexcept : value_{std::move(value)} {}
// Non-explicit to allow implicit conversion from C-style strings
token(const char* value) noexcept : value_{value ? std::optional<strings::ci_string>{value} : std::nullopt} {}
// Non-explicit to allow implicit conversion from std::string
token(const strings::ci_string& value) noexcept : value_{value} {}
explicit token(strings::ci_string&& value) noexcept : value_{std::move(value)} {}
// Add constructor for std::string
token(const std::string& value) noexcept : value_{value.c_str()} {}
[[nodiscard]] std::optional<int> get_number() const noexcept;
[[nodiscard]] std::string get_string() const noexcept { return std::string{value_.value_or("").c_str()}; }
[[nodiscard]] std::optional<bool> get_bool() const noexcept;
[[nodiscard]] bool operator==(const strings::ci_string& s) const noexcept { return value_.value_or("") == s; }
[[nodiscard]] bool operator==(const std::string& s) const noexcept { return value_.value_or("") == s; }
[[nodiscard]] bool operator==(const char* s) const noexcept { return s && value_.value_or("") == s; }
[[nodiscard]] bool operator!=(const strings::ci_string& s) const noexcept { return value_.value_or("") != s; }
[[nodiscard]] bool operator!=(const std::string& s) const noexcept { return value_.value_or("") != s; }
[[nodiscard]] bool operator!=(const char* s) const noexcept { return !s || value_.value_or("") != s; }
[[nodiscard]] explicit operator bool() const noexcept { return value_.has_value(); }
private:
std::optional<strings::ci_string> value_;
};
// Forward declarations for friend operators
class string_parser;
std::istream& operator>>(std::istream& is, string_parser& parser);
class string_parser {
public:
constexpr string_parser() noexcept : data_{}, pos_{0} {}
explicit string_parser(const std::string& input) noexcept : data_{input}, pos_{0} {}
explicit string_parser(std::string&& input) noexcept : data_{std::move(input)}, pos_{0} {}
explicit string_parser(const char* input) noexcept : data_{input ? input : ""}, pos_{0} {}
// Assignment operators
string_parser& operator=(const std::string& input) noexcept;
string_parser& operator=(const char* input) noexcept;
// Parsing methods
[[nodiscard]] token get_token();
[[nodiscard]] bool get_at();
[[nodiscard]] std::vector<token> tokenize();
// Utility methods
void strip_white() noexcept;
constexpr void reset() noexcept { pos_ = 0; }
[[nodiscard]] std::string str() const noexcept { return data_.substr(pos_); }
[[nodiscard]] const std::string& original() const noexcept { return data_; }
[[nodiscard]] constexpr bool empty() const noexcept { return pos_ >= data_.length(); }
[[nodiscard]] constexpr size_t length() const noexcept { return pos_ < data_.length() ? data_.length() - pos_ : 0; }
void set_data(const std::string& input) noexcept { data_ = input; pos_ = 0; }
friend std::istream& operator>>(std::istream& is, string_parser& parser);
private:
std::string data_;
size_t pos_;
[[nodiscard]] static constexpr bool is_whitespace(char c) noexcept { return c == ' ' || c == '\t'; }
};
//
// Implementation section
//
// string_parser class implementations
inline std::istream& operator>>(std::istream& is, string_parser& parser) {
std::string input;
std::getline(is >> std::ws, input);
parser.set_data(input);
return is;
}
inline token string_parser::get_token() {
strip_white();
if (pos_ >= data_.length()) return token{std::nullopt};
if (data_[pos_] == ';') {
pos_ = data_.length();
return token{std::nullopt};
}
std::string temp;
size_t end_pos;
if (data_[pos_] == '"') {
pos_++;
end_pos = data_.find('"', pos_);
if (end_pos == std::string::npos) {
temp = data_.substr(pos_);
pos_ = data_.length();
return token{temp};
}
temp = data_.substr(pos_, end_pos - pos_);
pos_ = end_pos + 1;
} else {
end_pos = data_.find_first_of(" \t;", pos_);
if (end_pos == std::string::npos) {
temp = data_.substr(pos_);
pos_ = data_.length();
return token{temp};
}
temp = data_.substr(pos_, end_pos - pos_);
pos_ = end_pos;
if (data_[end_pos] == ';') {
pos_ = data_.length();
return token{temp};
}
}
strip_white();
return token{temp};
}
inline bool string_parser::get_at() {
strip_white();
if (pos_ < data_.length() && data_[pos_] == '@') {
pos_++; // Advance past the @ symbol
return true;
}
return false;
}
inline void string_parser::strip_white() noexcept {
while (pos_ < data_.length() && is_whitespace(data_[pos_])) pos_++;
}
inline std::vector<token> string_parser::tokenize() {
std::vector<token> tokens;
tokens.reserve(16);
while (token temp = get_token()) tokens.push_back(temp);
return tokens;
}
// token class implementations
inline std::optional<int> token::get_number() const noexcept {
if (!value_) return std::nullopt;
const auto& temp = *value_;
if (temp.empty()) return std::nullopt;
// Fast path for single-digit numbers
if (temp.length() == 1 && std::isdigit(temp[0])) return temp[0] - '0';
// Handle negative numbers and more complex parsing
bool negative = false;
size_t pos = 0;
if (temp[0] == '-') {
negative = true;
pos++;
}
if (pos >= temp.length() || !std::isdigit(temp[pos])) return std::nullopt;
constexpr int max_div_10 = std::numeric_limits<int>::max() / 10;
constexpr int max_mod_10 = std::numeric_limits<int>::max() % 10;
int value = 0;
while (pos < temp.length() && std::isdigit(temp[pos])) {
int digit = temp[pos] - '0';
// Check for overflow
if (value > max_div_10 || (value == max_div_10 && digit > max_mod_10)) return std::nullopt;
value = value * 10 + digit;
pos++;
}
// Make sure the entire string was consumed
if (pos != temp.length()) return std::nullopt;
return negative ? -value : value;
}
inline std::optional<bool> token::get_bool() const noexcept {
if (!value_) return std::nullopt;
const auto& temp = *value_;
if (temp == "true" || temp == "t" || temp == "yes" || temp == "y" || temp == "on" || temp == "1")
return true;
if (temp == "false" || temp == "f" || temp == "no" || temp == "n" || temp == "off" || temp == "0")
return false;
return std::nullopt;
}
// string_parser assignment operators
inline string_parser& string_parser::operator=(const std::string& input) noexcept {
data_ = input;
pos_ = 0;
return *this;
}
inline string_parser& string_parser::operator=(const char* input) noexcept {
data_ = input ? input : "";
pos_ = 0;
return *this;
}
} // namespace parser
#endif // STRING_PARSER_HPP