From 00ff2af4f9a4922933e4af3d223e2403f880a0f1 Mon Sep 17 00:00:00 2001 From: Arthur Date: Wed, 17 Jun 2026 04:37:09 +0200 Subject: [PATCH 1/3] feat(config): add tokenizer and config parser scaffolding - Tokenizer: full lexer (words, numbers, symbols, comments, line tracking) - ConfigParser: file reading + navigation helpers (current/advance/expect) + parseServer skeleton (recognizes server block structure) - tests: standalone harnesses for tokenizer and parser WIP: directive and location parsing still to come. Co-Authored-By: Claude Opus 4.8 (1M context) --- .gitignore | 4 ++ include/config/ConfigParser.hpp | 30 ++++++++++++++ include/config/Token.hpp | 21 ++++++++++ include/config/Tokenizer.hpp | 18 +++++++++ src/config/ConfigParser.cpp | 62 +++++++++++++++++++++++++++++ src/config/Tokenizer.cpp | 70 +++++++++++++++++++++++++++++++++ tests/config/test_parser.cpp | 28 +++++++++++++ tests/config/test_tokenizer.cpp | 39 ++++++++++++++++++ 8 files changed, 272 insertions(+) create mode 100644 include/config/ConfigParser.hpp create mode 100644 include/config/Token.hpp create mode 100644 include/config/Tokenizer.hpp create mode 100644 src/config/ConfigParser.cpp create mode 100644 src/config/Tokenizer.cpp create mode 100644 tests/config/test_parser.cpp create mode 100644 tests/config/test_tokenizer.cpp diff --git a/.gitignore b/.gitignore index 8eae4dd..c8fdfe8 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,9 @@ *.o +# test binaries (built locally from tests/) +test_parser +test_tok + .DS_Store .vscode diff --git a/include/config/ConfigParser.hpp b/include/config/ConfigParser.hpp new file mode 100644 index 0000000..08ddb6f --- /dev/null +++ b/include/config/ConfigParser.hpp @@ -0,0 +1,30 @@ +#ifndef CONFIG_PARSER_HPP +#define CONFIG_PARSER_HPP + +#include +#include +#include +#include +#include +#include +#include "Token.hpp" +#include "Config.hpp" +#include "ServerConfig.hpp" +#include "LocationConfig.hpp" + +class ConfigParser { +private: + std::vector _tokens; + size_t _pos; + const Token ¤t(); + void advance(); + void expect(TokenType type); + ServerConfig parseServer(); + LocationConfig parseLocaton(); + + +public: + Config parse(const std::string& path); +}; + +#endif diff --git a/include/config/Token.hpp b/include/config/Token.hpp new file mode 100644 index 0000000..e9b2050 --- /dev/null +++ b/include/config/Token.hpp @@ -0,0 +1,21 @@ +#ifndef TOKEN_HPP +#define TOKEN_HPP + +#include + +enum TokenType { + WORD, + NUMBER, + LBRACE, + RBRACE, + SEMICOLON, + END_OF_FILE +}; + +struct Token { + TokenType type; + std::string value; + int line; +}; + +#endif diff --git a/include/config/Tokenizer.hpp b/include/config/Tokenizer.hpp new file mode 100644 index 0000000..266f99a --- /dev/null +++ b/include/config/Tokenizer.hpp @@ -0,0 +1,18 @@ +#ifndef TOKENIZER_HPP +#define TOKENIZER_HPP + +#include +#include +#include +#include "Token.hpp" + +class Tokenizer { +private: + Token makeSymbol(TokenType type, const std::string &value, int line); + Token readWordOrNumber(const std::string &content, int line, size_t &i); + +public: + std::vector tokenize(const std::string& content); +}; + +#endif diff --git a/src/config/ConfigParser.cpp b/src/config/ConfigParser.cpp new file mode 100644 index 0000000..4a0d9c0 --- /dev/null +++ b/src/config/ConfigParser.cpp @@ -0,0 +1,62 @@ +#include "../../include/config/ConfigParser.hpp" +#include "../../include/config/Tokenizer.hpp" + + +Config ConfigParser::parse(const std::string &path) +{ + Config config; + Tokenizer tokenizer; + + std::ifstream file(path.c_str()); + if (!file.is_open()) + throw std::runtime_error("Cannot open file: " + path); + std::stringstream buffer; + buffer << file.rdbuf(); + std::string content = buffer.str(); + + _tokens = tokenizer.tokenize(content); + _pos = 0; + // std::cout << "Tokens charges : " << _tokens.size() << std::endl; + + return config; +} + +const Token &ConfigParser::current() +{ + return (_tokens[_pos]); +} +void ConfigParser::advance() +{ + _pos++; +} + +void ConfigParser::expect(TokenType type) +{ + if (current().type != type) + { + std::ostringstream oss; + oss << "Unexpected token at line " << current().line; + throw std::runtime_error(oss.str()); + } + advance(); + } + +ServerConfig ConfigParser::parseServer() +{ + ServerConfig server; + + if (current().type != WORD || current().value != "server") + throw std::runtime_error("Expected 'server' keyword"); + advance(); + + expect(LBRACE); + + while (current().type != RBRACE && current().type != END_OF_FILE) + { + // ... ici : si "location" -> parseLocation, sinon -> directive ... + advance(); + } + expect(RBRACE); + return server; +} + diff --git a/src/config/Tokenizer.cpp b/src/config/Tokenizer.cpp new file mode 100644 index 0000000..3ca18ef --- /dev/null +++ b/src/config/Tokenizer.cpp @@ -0,0 +1,70 @@ +#include "../../include/config/Tokenizer.hpp" + +std::vector Tokenizer::tokenize(const std::string &content) +{ + std::vector tokenList; + int line = 1; + size_t i = 0; + + while (content[i]) + { + if (content[i] == '\r' || content[i] == ' ' || content[i] == '\t') + i++; + else if (content[i] == '\n') + { + line++; + i++; + } + else if (content[i] == '{') + { + tokenList.push_back(makeSymbol(LBRACE, "{", line)); + i++; + } + else if (content[i] == '}') + { + tokenList.push_back(makeSymbol(RBRACE, "}", line)); + i++; + } + else if (content[i] == ';') + { + tokenList.push_back(makeSymbol(SEMICOLON, ";", line)); + i++; + } + else if (content[i] == '#') + { + while (content[i] && content[i] != '\n') + i++; + } + else + tokenList.push_back(readWordOrNumber(content, line, i)); + } + tokenList.push_back(makeSymbol(END_OF_FILE, "", line)); + return tokenList; +} + +Token Tokenizer::makeSymbol(TokenType type, const std::string &value, int line) +{ + Token t; + t.type = type; + t.value = value; + t.line = line; + return t; +} + +Token Tokenizer::readWordOrNumber(const std::string &content, int line, size_t &i) +{ + Token t; + t.line = line; + if (isdigit(content[i])) + t.type = NUMBER; + else + t.type = WORD; + while (content[i] && content[i] != ' ' && content[i] != '\t' && content[i] != '\r' + && content[i] != '\n' && content[i] != '{' && content[i] != '}' + && content[i] != ';' && content[i] != '#') + { + t.value += content[i]; + i++; + } + return t; +} \ No newline at end of file diff --git a/tests/config/test_parser.cpp b/tests/config/test_parser.cpp new file mode 100644 index 0000000..2e9e9d0 --- /dev/null +++ b/tests/config/test_parser.cpp @@ -0,0 +1,28 @@ +#include "../../include/config/ConfigParser.hpp" +#include + +// Banc de test du parser. +// Compiler : +// c++ -Wall -Wextra -Werror -std=c++98 \ +// src/config/ConfigParser.cpp src/config/Tokenizer.cpp \ +// src/config/Config.cpp src/config/ServerConfig.cpp src/config/LocationConfig.cpp \ +// tests/config/test_parser.cpp -o test_parser +// Lancer : +// ./test_parser config/default.conf + +int main(int argc, char** argv) +{ + std::string path = (argc > 1) ? argv[1] : "config/default.conf"; + + try + { + ConfigParser parser; + parser.parse(path); + } + catch (const std::exception& e) + { + std::cerr << "Error: " << e.what() << std::endl; + return 1; + } + return 0; +} diff --git a/tests/config/test_tokenizer.cpp b/tests/config/test_tokenizer.cpp new file mode 100644 index 0000000..8e01861 --- /dev/null +++ b/tests/config/test_tokenizer.cpp @@ -0,0 +1,39 @@ +#include "../../include/config/Tokenizer.hpp" +#include + +static const char* typeName(TokenType t) +{ + switch (t) + { + case WORD: return "WORD"; + case NUMBER: return "NUMBER"; + case LBRACE: return "LBRACE"; + case RBRACE: return "RBRACE"; + case SEMICOLON: return "SEMICOLON"; + case END_OF_FILE: return "EOF"; + } + return "?"; +} + +int main() +{ + std::string sample = + "server {\n" + " listen 8080;\n" + " # ceci est un commentaire a ignorer\n" + " server_name webserv.com;\n" + " location / {\n" + " methods GET POST;\n" + " }\n" + "}\n"; + + Tokenizer tk; + std::vector tokens = tk.tokenize(sample); + + std::cout << "Nombre de tokens : " << tokens.size() << std::endl; + for (size_t i = 0; i < tokens.size(); ++i) + std::cout << "[L" << tokens[i].line << "] " + << typeName(tokens[i].type) << " '" << tokens[i].value << "'" + << std::endl; + return 0; +} From 50744e46fcf3031a67022369713db715d583315e Mon Sep 17 00:00:00 2001 From: Arthur Date: Thu, 18 Jun 2026 01:57:35 +0200 Subject: [PATCH 2/3] feat(config): complete config parser - parseLocation + parseLocationDirective (root, index, autoindex, methods, upload_store, cgi_extensions) - parseSize: client_max_body_size with k/m/g suffix + overflow guard - advance() guard against reading past END_OF_FILE (no crash on truncated files) - verbose test harness printing the full parsed Config default.conf now parses fully; malformed/truncated configs throw cleanly without crashing. --- include/config/ConfigParser.hpp | 5 +- src/config/ConfigParser.cpp | 153 +++++++++++++++++++++++++++++++- tests/config/test_parser.cpp | 51 +++++++++-- 3 files changed, 195 insertions(+), 14 deletions(-) diff --git a/include/config/ConfigParser.hpp b/include/config/ConfigParser.hpp index 08ddb6f..a8f9a80 100644 --- a/include/config/ConfigParser.hpp +++ b/include/config/ConfigParser.hpp @@ -20,7 +20,10 @@ class ConfigParser { void advance(); void expect(TokenType type); ServerConfig parseServer(); - LocationConfig parseLocaton(); + LocationConfig parseLocation(); + void parseServerDirective(ServerConfig& server); + void parseLocationDirective(LocationConfig &location); + size_t parseSize(const std::string& value); public: diff --git a/src/config/ConfigParser.cpp b/src/config/ConfigParser.cpp index 4a0d9c0..c8ef9e9 100644 --- a/src/config/ConfigParser.cpp +++ b/src/config/ConfigParser.cpp @@ -1,5 +1,7 @@ #include "../../include/config/ConfigParser.hpp" #include "../../include/config/Tokenizer.hpp" +#include +#include Config ConfigParser::parse(const std::string &path) @@ -16,7 +18,12 @@ Config ConfigParser::parse(const std::string &path) _tokens = tokenizer.tokenize(content); _pos = 0; - // std::cout << "Tokens charges : " << _tokens.size() << std::endl; + + while (current().type != END_OF_FILE) + { + ServerConfig server = parseServer(); + config.addServer(server); + } return config; } @@ -27,6 +34,7 @@ const Token &ConfigParser::current() } void ConfigParser::advance() { + if (_pos + 1 < _tokens.size()) _pos++; } @@ -48,15 +56,152 @@ ServerConfig ConfigParser::parseServer() if (current().type != WORD || current().value != "server") throw std::runtime_error("Expected 'server' keyword"); advance(); + expect(LBRACE); + while (current().type != RBRACE && current().type != END_OF_FILE) + { + if (current().type == WORD && current().value == "location") + server.addLocation(parseLocation()); + else + parseServerDirective(server); + } + expect(RBRACE); + return server; +} + +void ConfigParser::parseServerDirective(ServerConfig& server) +{ + std::string name = current().value; + advance(); + + if (name == "listen") + { + if (current().type != NUMBER) + throw std::runtime_error("listen expects a number"); + server.setPort(std::atoi(current().value.c_str())); + advance(); + } + else if (name == "host") + { + server.setHost(current().value); + advance(); + } + else if (name == "server_name") + { + server.setServerName(current().value); + advance(); + } + else if (name == "client_max_body_size") + { + server.setMaxBodySize(parseSize(current().value)); + advance(); + } + else if (name == "error_page") + { + int code = std::atoi(current().value.c_str()); + advance(); + server.addErrorPage(code, current().value); + advance(); + } + else + throw std::runtime_error("Unknown server directive: " + name); + expect(SEMICOLON); +} + +LocationConfig ConfigParser::parseLocation() +{ + LocationConfig location; + advance(); + location.setPath(current().value); + advance(); expect(LBRACE); while (current().type != RBRACE && current().type != END_OF_FILE) + parseLocationDirective(location); + expect(RBRACE); + return location; +} + +void ConfigParser::parseLocationDirective(LocationConfig &location) +{ + std::string name = current().value; + advance(); + + if (name == "root") { - // ... ici : si "location" -> parseLocation, sinon -> directive ... + location.setRoot(current().value); advance(); } - expect(RBRACE); - return server; + else if (name == "index") + { + location.setIndex(current().value); + advance(); + } + else if (name == "autoindex") + { + if (current().value == "off") + location.setAutoindex(false); + else if (current().value == "on") + location.setAutoindex(true); + else + throw std::runtime_error("Unknown value: " + name + current().value); + advance(); + } + else if (name == "methods") + { + while (current().type != SEMICOLON && current().type != END_OF_FILE) + { + location.addMethod(current().value); + advance(); + } + } + else if (name == "upload_store") + { + location.setUploadPath(current().value); + advance(); + } + else if (name == "cgi_extensions") + { + location.setCgiExtension(current().value); + advance(); + location.setCgiPath(current().value); + advance(); + } + else + throw std::runtime_error("Unknown location directive: " + name); + expect(SEMICOLON); } +size_t ConfigParser::parseSize(const std::string& value) +{ + if (value.empty()) + throw std::runtime_error("client_max_body_size: empty value"); + + size_t i = 0; + while (i < value.size() && std::isdigit(static_cast(value[i]))) + ++i; + if (i == 0) + throw std::runtime_error("client_max_body_size: expected a number, got '" + value + "'"); + + size_t bytes = std::strtoul(value.substr(0, i).c_str(), NULL, 10); + + if (i == value.size()) + return bytes; + if (value.size() - i != 1) + throw std::runtime_error("client_max_body_size: invalid suffix in '" + value + "'"); + + size_t multiplier; + switch (value[i]) + { + case 'k': case 'K': multiplier = 1024UL; break; + case 'm': case 'M': multiplier = 1024UL * 1024UL; break; + case 'g': case 'G': multiplier = + 1024UL * 1024UL * 1024UL; break; + default: + throw std::runtime_error("client_max_body_size: unknown suffix in '" + value + "'"); + } + if (bytes != 0 && multiplier > (static_cast(-1) / bytes)) + throw std::runtime_error("client_max_body_size: value too large '" + value + "'"); + + return bytes * multiplier; +} diff --git a/tests/config/test_parser.cpp b/tests/config/test_parser.cpp index 2e9e9d0..75fc6c8 100644 --- a/tests/config/test_parser.cpp +++ b/tests/config/test_parser.cpp @@ -1,14 +1,7 @@ #include "../../include/config/ConfigParser.hpp" #include -// Banc de test du parser. -// Compiler : -// c++ -Wall -Wextra -Werror -std=c++98 \ -// src/config/ConfigParser.cpp src/config/Tokenizer.cpp \ -// src/config/Config.cpp src/config/ServerConfig.cpp src/config/LocationConfig.cpp \ -// tests/config/test_parser.cpp -o test_parser -// Lancer : -// ./test_parser config/default.conf +// Banc de test du parser : parse un .conf et affiche le Config obtenu. int main(int argc, char** argv) { @@ -17,7 +10,47 @@ int main(int argc, char** argv) try { ConfigParser parser; - parser.parse(path); + Config config = parser.parse(path); + + const std::vector& servers = config.getServers(); + std::cout << "Servers parsed: " << servers.size() << std::endl; + + for (size_t i = 0; i < servers.size(); ++i) + { + const ServerConfig& s = servers[i]; + std::cout << "\n=== server[" << i << "] ===" << std::endl; + std::cout << " host : " << s.getHost() << std::endl; + std::cout << " port : " << s.getPort() << std::endl; + std::cout << " server_name : " << s.getServerName() << std::endl; + std::cout << " max_body_size : " << s.getMaxBodySize() << std::endl; + + const std::map& pages = s.getErrorPages(); + for (std::map::const_iterator it = pages.begin(); it != pages.end(); ++it) + std::cout << " error_page : " << it->first << " -> " << it->second << std::endl; + + const std::vector& locs = s.getLocations(); + for (size_t j = 0; j < locs.size(); ++j) + { + const LocationConfig& l = locs[j]; + std::cout << " --- location " << l.getPath() << " ---" << std::endl; + std::cout << " root : " << l.getRoot() << std::endl; + std::cout << " index : " << l.getIndex() << std::endl; + std::cout << " autoindex : " << (l.getAutoindex() ? "on" : "off") << std::endl; + + const std::vector& methods = l.getAllowedMethods(); + std::cout << " methods :"; + for (size_t k = 0; k < methods.size(); ++k) + std::cout << " " << methods[k]; + std::cout << std::endl; + + if (!l.getUploadPath().empty()) + std::cout << " upload : " << l.getUploadPath() << std::endl; + if (!l.getCgiExtension().empty()) + std::cout << " cgi : " << l.getCgiExtension() << " -> " << l.getCgiPath() << std::endl; + if (!l.getRedirectUrl().empty()) + std::cout << " redirect : " << l.getRedirectUrl() << std::endl; + } + } } catch (const std::exception& e) { From 3ab231d7f75d637bdca1521de9760dc84ff1e100 Mon Sep 17 00:00:00 2001 From: Arthur Date: Thu, 18 Jun 2026 02:24:52 +0200 Subject: [PATCH 3/3] feat(config): parse 'redirect' directive in location blocks Co-Authored-By: Claude Opus 4.8 (1M context) --- src/config/ConfigParser.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/config/ConfigParser.cpp b/src/config/ConfigParser.cpp index c8ef9e9..e79d814 100644 --- a/src/config/ConfigParser.cpp +++ b/src/config/ConfigParser.cpp @@ -167,6 +167,11 @@ void ConfigParser::parseLocationDirective(LocationConfig &location) location.setCgiPath(current().value); advance(); } + else if (name == "redirect") + { + location.setRedirectUrl(current().value); + advance(); + } else throw std::runtime_error("Unknown location directive: " + name); expect(SEMICOLON);