From c2a96e53071ee145fe1ec2480f98f78fc04c53dd Mon Sep 17 00:00:00 2001 From: Juan Cruz Viotti Date: Thu, 21 May 2026 12:54:01 -0400 Subject: [PATCH 1/3] Implement a `to_title_case` text utility function Signed-off-by: Juan Cruz Viotti --- .github/workflows/website-build.yml | 1 + .github/workflows/website-deploy.yml | 1 + CMakeLists.txt | 9 ++ config.cmake.in | 3 + src/lang/text/CMakeLists.txt | 6 + src/lang/text/include/sourcemeta/core/text.h | 39 ++++++ src/lang/text/text.cc | 21 ++++ test/packaging/find_package/CMakeLists.txt | 1 + test/packaging/find_package/hello.cc | 1 + test/text/CMakeLists.txt | 5 + test/text/text_to_title_case_test.cc | 125 +++++++++++++++++++ 11 files changed, 212 insertions(+) create mode 100644 src/lang/text/CMakeLists.txt create mode 100644 src/lang/text/include/sourcemeta/core/text.h create mode 100644 src/lang/text/text.cc create mode 100644 test/text/CMakeLists.txt create mode 100644 test/text/text_to_title_case_test.cc diff --git a/.github/workflows/website-build.yml b/.github/workflows/website-build.yml index d81baa2ab..978a6a01a 100644 --- a/.github/workflows/website-build.yml +++ b/.github/workflows/website-build.yml @@ -23,6 +23,7 @@ jobs: -DSOURCEMETA_CORE_LANG_NUMERIC:BOOL=OFF -DSOURCEMETA_CORE_LANG_ERROR:BOOL=OFF -DSOURCEMETA_CORE_LANG_OPTIONS:BOOL=OFF + -DSOURCEMETA_CORE_LANG_TEXT:BOOL=OFF -DSOURCEMETA_CORE_UNICODE:BOOL=OFF -DSOURCEMETA_CORE_PUNYCODE:BOOL=OFF -DSOURCEMETA_CORE_TIME:BOOL=OFF diff --git a/.github/workflows/website-deploy.yml b/.github/workflows/website-deploy.yml index 527fa2110..b814ad807 100644 --- a/.github/workflows/website-deploy.yml +++ b/.github/workflows/website-deploy.yml @@ -33,6 +33,7 @@ jobs: -DSOURCEMETA_CORE_LANG_NUMERIC:BOOL=OFF -DSOURCEMETA_CORE_LANG_ERROR:BOOL=OFF -DSOURCEMETA_CORE_LANG_OPTIONS:BOOL=OFF + -DSOURCEMETA_CORE_LANG_TEXT:BOOL=OFF -DSOURCEMETA_CORE_UNICODE:BOOL=OFF -DSOURCEMETA_CORE_PUNYCODE:BOOL=OFF -DSOURCEMETA_CORE_TIME:BOOL=OFF diff --git a/CMakeLists.txt b/CMakeLists.txt index 8afa68c62..6c8c05bce 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -10,6 +10,7 @@ option(SOURCEMETA_CORE_LANG_PARALLEL "Build the Sourcemeta Core language paralle option(SOURCEMETA_CORE_LANG_NUMERIC "Build the Sourcemeta Core language numeric library" ON) option(SOURCEMETA_CORE_LANG_ERROR "Build the Sourcemeta Core language error library" ON) option(SOURCEMETA_CORE_LANG_OPTIONS "Build the Sourcemeta Core Options library" ON) +option(SOURCEMETA_CORE_LANG_TEXT "Build the Sourcemeta Core language text library" ON) option(SOURCEMETA_CORE_UNICODE "Build the Sourcemeta Core Unicode library" ON) option(SOURCEMETA_CORE_PUNYCODE "Build the Sourcemeta Core Punycode library" ON) option(SOURCEMETA_CORE_TIME "Build the Sourcemeta Core time library" ON) @@ -93,6 +94,10 @@ if(SOURCEMETA_CORE_LANG_OPTIONS) add_subdirectory(src/lang/options) endif() +if(SOURCEMETA_CORE_LANG_TEXT) + add_subdirectory(src/lang/text) +endif() + if(SOURCEMETA_CORE_UNICODE) add_subdirectory(src/core/unicode) endif() @@ -231,6 +236,10 @@ if(SOURCEMETA_CORE_TESTS) add_subdirectory(test/options) endif() + if(SOURCEMETA_CORE_LANG_TEXT) + add_subdirectory(test/text) + endif() + if(SOURCEMETA_CORE_UNICODE) add_subdirectory(test/unicode) endif() diff --git a/config.cmake.in b/config.cmake.in index 4f73aa20f..18a7effdb 100644 --- a/config.cmake.in +++ b/config.cmake.in @@ -30,6 +30,7 @@ if(NOT SOURCEMETA_CORE_COMPONENTS) list(APPEND SOURCEMETA_CORE_COMPONENTS markdown) list(APPEND SOURCEMETA_CORE_COMPONENTS error) list(APPEND SOURCEMETA_CORE_COMPONENTS options) + list(APPEND SOURCEMETA_CORE_COMPONENTS text) endif() include(CMakeFindDependencyMacro) @@ -136,6 +137,8 @@ foreach(component ${SOURCEMETA_CORE_COMPONENTS}) include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_error.cmake") elseif(component STREQUAL "options") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_options.cmake") + elseif(component STREQUAL "text") + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_text.cmake") else() message(FATAL_ERROR "Unknown Sourcemeta Core component: ${component}") endif() diff --git a/src/lang/text/CMakeLists.txt b/src/lang/text/CMakeLists.txt new file mode 100644 index 000000000..3b8602d58 --- /dev/null +++ b/src/lang/text/CMakeLists.txt @@ -0,0 +1,6 @@ +sourcemeta_library(NAMESPACE sourcemeta PROJECT core NAME text + SOURCES text.cc) + +if(SOURCEMETA_CORE_INSTALL) + sourcemeta_library_install(NAMESPACE sourcemeta PROJECT core NAME text) +endif() diff --git a/src/lang/text/include/sourcemeta/core/text.h b/src/lang/text/include/sourcemeta/core/text.h new file mode 100644 index 000000000..c9851754c --- /dev/null +++ b/src/lang/text/include/sourcemeta/core/text.h @@ -0,0 +1,39 @@ +#ifndef SOURCEMETA_CORE_TEXT_H_ +#define SOURCEMETA_CORE_TEXT_H_ + +#ifndef SOURCEMETA_CORE_TEXT_EXPORT +#include +#endif + +#include // std::string + +/// @defgroup text Text +/// @brief A collection of general-purpose text manipulation utilities +/// +/// This functionality is included as follows: +/// +/// ```cpp +/// #include +/// ``` + +namespace sourcemeta::core { + +/// @ingroup text +/// +/// Convert a string to Title Case in place. For example: +/// +/// ```cpp +/// #include +/// #include +/// #include +/// +/// std::string value{"hello_world"}; +/// sourcemeta::core::to_title_case(value); +/// assert(value == "Hello World"); +/// ``` +SOURCEMETA_CORE_TEXT_EXPORT +auto to_title_case(std::string &value) -> void; + +} // namespace sourcemeta::core + +#endif diff --git a/src/lang/text/text.cc b/src/lang/text/text.cc new file mode 100644 index 000000000..a57d7b773 --- /dev/null +++ b/src/lang/text/text.cc @@ -0,0 +1,21 @@ +#include + +#include // std::toupper + +namespace sourcemeta::core { + +auto to_title_case(std::string &value) -> void { + bool capitalize_next{true}; + for (auto &character : value) { + if (character == '_' || character == '-') { + character = ' '; + capitalize_next = true; + } else if (capitalize_next) { + character = static_cast( + std::toupper(static_cast(character))); + capitalize_next = false; + } + } +} + +} // namespace sourcemeta::core diff --git a/test/packaging/find_package/CMakeLists.txt b/test/packaging/find_package/CMakeLists.txt index 758378692..b98fbdfc3 100644 --- a/test/packaging/find_package/CMakeLists.txt +++ b/test/packaging/find_package/CMakeLists.txt @@ -25,3 +25,4 @@ target_link_libraries(core_hello PRIVATE sourcemeta::core::markdown) target_link_libraries(core_hello PRIVATE sourcemeta::core::options) target_link_libraries(core_hello PRIVATE sourcemeta::core::preprocessor) target_link_libraries(core_hello PRIVATE sourcemeta::core::jsonrpc) +target_link_libraries(core_hello PRIVATE sourcemeta::core::text) diff --git a/test/packaging/find_package/hello.cc b/test/packaging/find_package/hello.cc index 748264457..4c209fc1f 100644 --- a/test/packaging/find_package/hello.cc +++ b/test/packaging/find_package/hello.cc @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include diff --git a/test/text/CMakeLists.txt b/test/text/CMakeLists.txt new file mode 100644 index 000000000..501465d0d --- /dev/null +++ b/test/text/CMakeLists.txt @@ -0,0 +1,5 @@ +sourcemeta_googletest(NAMESPACE sourcemeta PROJECT core NAME text + SOURCES text_to_title_case_test.cc) + +target_link_libraries(sourcemeta_core_text_unit + PRIVATE sourcemeta::core::text) diff --git a/test/text/text_to_title_case_test.cc b/test/text/text_to_title_case_test.cc new file mode 100644 index 000000000..e91068020 --- /dev/null +++ b/test/text/text_to_title_case_test.cc @@ -0,0 +1,125 @@ +#include + +#include + +#include // std::string + +TEST(Text_to_title_case, empty_string) { + std::string value{""}; + sourcemeta::core::to_title_case(value); + EXPECT_EQ(value, ""); +} + +TEST(Text_to_title_case, single_lowercase_character) { + std::string value{"a"}; + sourcemeta::core::to_title_case(value); + EXPECT_EQ(value, "A"); +} + +TEST(Text_to_title_case, single_uppercase_character) { + std::string value{"A"}; + sourcemeta::core::to_title_case(value); + EXPECT_EQ(value, "A"); +} + +TEST(Text_to_title_case, single_underscore) { + std::string value{"_"}; + sourcemeta::core::to_title_case(value); + EXPECT_EQ(value, " "); +} + +TEST(Text_to_title_case, single_dash) { + std::string value{"-"}; + sourcemeta::core::to_title_case(value); + EXPECT_EQ(value, " "); +} + +TEST(Text_to_title_case, single_lowercase_word) { + std::string value{"hello"}; + sourcemeta::core::to_title_case(value); + EXPECT_EQ(value, "Hello"); +} + +TEST(Text_to_title_case, already_title_cased_word) { + std::string value{"Hello"}; + sourcemeta::core::to_title_case(value); + EXPECT_EQ(value, "Hello"); +} + +TEST(Text_to_title_case, all_uppercase_word) { + std::string value{"HELLO"}; + sourcemeta::core::to_title_case(value); + EXPECT_EQ(value, "HELLO"); +} + +TEST(Text_to_title_case, snake_case_two_words) { + std::string value{"hello_world"}; + sourcemeta::core::to_title_case(value); + EXPECT_EQ(value, "Hello World"); +} + +TEST(Text_to_title_case, kebab_case_two_words) { + std::string value{"hello-world"}; + sourcemeta::core::to_title_case(value); + EXPECT_EQ(value, "Hello World"); +} + +TEST(Text_to_title_case, mixed_snake_and_kebab_separators) { + std::string value{"hello_world-test"}; + sourcemeta::core::to_title_case(value); + EXPECT_EQ(value, "Hello World Test"); +} + +TEST(Text_to_title_case, snake_case_three_words) { + std::string value{"abc_def_ghi"}; + sourcemeta::core::to_title_case(value); + EXPECT_EQ(value, "Abc Def Ghi"); +} + +TEST(Text_to_title_case, preserves_existing_uppercase_after_separator) { + std::string value{"Hello_World"}; + sourcemeta::core::to_title_case(value); + EXPECT_EQ(value, "Hello World"); +} + +TEST(Text_to_title_case, leading_underscore) { + std::string value{"_hello"}; + sourcemeta::core::to_title_case(value); + EXPECT_EQ(value, " Hello"); +} + +TEST(Text_to_title_case, trailing_underscore) { + std::string value{"hello_"}; + sourcemeta::core::to_title_case(value); + EXPECT_EQ(value, "Hello "); +} + +TEST(Text_to_title_case, consecutive_separators) { + std::string value{"hello__world"}; + sourcemeta::core::to_title_case(value); + EXPECT_EQ(value, "Hello World"); +} + +TEST(Text_to_title_case, single_letter_words) { + std::string value{"a_b_c"}; + sourcemeta::core::to_title_case(value); + EXPECT_EQ(value, "A B C"); +} + +TEST(Text_to_title_case, digits_pass_through) { + std::string value{"abc123def"}; + sourcemeta::core::to_title_case(value); + EXPECT_EQ(value, "Abc123def"); +} + +TEST(Text_to_title_case, digit_after_separator) { + std::string value{"abc_123"}; + sourcemeta::core::to_title_case(value); + EXPECT_EQ(value, "Abc 123"); +} + +TEST(Text_to_title_case, space_in_input_is_not_a_separator) { + std::string value{"hello world"}; + sourcemeta::core::to_title_case(value); + EXPECT_EQ(value, "Hello world"); +} From 42513b90aca7f8efeda27baef050c49d25a27e12 Mon Sep 17 00:00:00 2001 From: Juan Cruz Viotti Date: Thu, 21 May 2026 13:04:11 -0400 Subject: [PATCH 2/3] More Signed-off-by: Juan Cruz Viotti --- src/lang/text/text.cc | 30 ++++++++++---- test/text/text_to_title_case_test.cc | 62 +++++++++++++++++++++++----- 2 files changed, 74 insertions(+), 18 deletions(-) diff --git a/src/lang/text/text.cc b/src/lang/text/text.cc index a57d7b773..be61af384 100644 --- a/src/lang/text/text.cc +++ b/src/lang/text/text.cc @@ -1,21 +1,35 @@ #include -#include // std::toupper +#include // std::toupper +#include // std::size_t namespace sourcemeta::core { auto to_title_case(std::string &value) -> void { + std::size_t write{0}; bool capitalize_next{true}; - for (auto &character : value) { + bool pending_separator{false}; + for (const char character : value) { if (character == '_' || character == '-') { - character = ' '; - capitalize_next = true; - } else if (capitalize_next) { - character = static_cast( - std::toupper(static_cast(character))); - capitalize_next = false; + if (write > 0) { + pending_separator = true; + } + } else { + if (pending_separator) { + value[write++] = ' '; + pending_separator = false; + capitalize_next = true; + } + if (capitalize_next) { + value[write++] = static_cast( + std::toupper(static_cast(character))); + capitalize_next = false; + } else { + value[write++] = character; + } } } + value.resize(write); } } // namespace sourcemeta::core diff --git a/test/text/text_to_title_case_test.cc b/test/text/text_to_title_case_test.cc index e91068020..b512cf78c 100644 --- a/test/text/text_to_title_case_test.cc +++ b/test/text/text_to_title_case_test.cc @@ -22,16 +22,22 @@ TEST(Text_to_title_case, single_uppercase_character) { EXPECT_EQ(value, "A"); } -TEST(Text_to_title_case, single_underscore) { +TEST(Text_to_title_case, single_underscore_is_empty) { std::string value{"_"}; sourcemeta::core::to_title_case(value); - EXPECT_EQ(value, " "); + EXPECT_EQ(value, ""); } -TEST(Text_to_title_case, single_dash) { +TEST(Text_to_title_case, single_dash_is_empty) { std::string value{"-"}; sourcemeta::core::to_title_case(value); - EXPECT_EQ(value, " "); + EXPECT_EQ(value, ""); +} + +TEST(Text_to_title_case, only_separators_is_empty) { + std::string value{"___"}; + sourcemeta::core::to_title_case(value); + EXPECT_EQ(value, ""); } TEST(Text_to_title_case, single_lowercase_word) { @@ -82,22 +88,58 @@ TEST(Text_to_title_case, preserves_existing_uppercase_after_separator) { EXPECT_EQ(value, "Hello World"); } -TEST(Text_to_title_case, leading_underscore) { +TEST(Text_to_title_case, leading_underscore_is_stripped) { std::string value{"_hello"}; sourcemeta::core::to_title_case(value); - EXPECT_EQ(value, " Hello"); + EXPECT_EQ(value, "Hello"); } -TEST(Text_to_title_case, trailing_underscore) { +TEST(Text_to_title_case, leading_dash_is_stripped) { + std::string value{"-hello"}; + sourcemeta::core::to_title_case(value); + EXPECT_EQ(value, "Hello"); +} + +TEST(Text_to_title_case, trailing_underscore_is_stripped) { std::string value{"hello_"}; sourcemeta::core::to_title_case(value); - EXPECT_EQ(value, "Hello "); + EXPECT_EQ(value, "Hello"); +} + +TEST(Text_to_title_case, trailing_dash_is_stripped) { + std::string value{"hello-"}; + sourcemeta::core::to_title_case(value); + EXPECT_EQ(value, "Hello"); } -TEST(Text_to_title_case, consecutive_separators) { +TEST(Text_to_title_case, separators_around_word_are_stripped) { + std::string value{"_hello_"}; + sourcemeta::core::to_title_case(value); + EXPECT_EQ(value, "Hello"); +} + +TEST(Text_to_title_case, multiple_leading_separators_are_stripped) { + std::string value{"__hello"}; + sourcemeta::core::to_title_case(value); + EXPECT_EQ(value, "Hello"); +} + +TEST(Text_to_title_case, multiple_trailing_separators_are_stripped) { + std::string value{"hello__"}; + sourcemeta::core::to_title_case(value); + EXPECT_EQ(value, "Hello"); +} + +TEST(Text_to_title_case, consecutive_separators_collapse_to_single_space) { std::string value{"hello__world"}; sourcemeta::core::to_title_case(value); - EXPECT_EQ(value, "Hello World"); + EXPECT_EQ(value, "Hello World"); +} + +TEST(Text_to_title_case, mixed_consecutive_separators_collapse) { + std::string value{"hello_-world"}; + sourcemeta::core::to_title_case(value); + EXPECT_EQ(value, "Hello World"); } TEST(Text_to_title_case, single_letter_words) { From c2e2baa5cdc5fc03235586acc1e433f87fac8075 Mon Sep 17 00:00:00 2001 From: Juan Cruz Viotti Date: Thu, 21 May 2026 13:14:47 -0400 Subject: [PATCH 3/3] Fix Signed-off-by: Juan Cruz Viotti --- src/lang/text/text.cc | 6 ++++-- test/text/text_to_title_case_test.cc | 12 ++++++++++++ 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/src/lang/text/text.cc b/src/lang/text/text.cc index be61af384..3c3b4ae5e 100644 --- a/src/lang/text/text.cc +++ b/src/lang/text/text.cc @@ -1,6 +1,6 @@ #include -#include // std::toupper +#include // std::isalpha, std::toupper #include // std::size_t namespace sourcemeta::core { @@ -23,7 +23,9 @@ auto to_title_case(std::string &value) -> void { if (capitalize_next) { value[write++] = static_cast( std::toupper(static_cast(character))); - capitalize_next = false; + if (std::isalpha(static_cast(character))) { + capitalize_next = false; + } } else { value[write++] = character; } diff --git a/test/text/text_to_title_case_test.cc b/test/text/text_to_title_case_test.cc index b512cf78c..86eb57ee7 100644 --- a/test/text/text_to_title_case_test.cc +++ b/test/text/text_to_title_case_test.cc @@ -160,6 +160,18 @@ TEST(Text_to_title_case, digit_after_separator) { EXPECT_EQ(value, "Abc 123"); } +TEST(Text_to_title_case, letter_after_leading_digits_in_segment) { + std::string value{"abc_123def"}; + sourcemeta::core::to_title_case(value); + EXPECT_EQ(value, "Abc 123Def"); +} + +TEST(Text_to_title_case, leading_digits_then_letter_at_start) { + std::string value{"123abc"}; + sourcemeta::core::to_title_case(value); + EXPECT_EQ(value, "123Abc"); +} + TEST(Text_to_title_case, space_in_input_is_not_a_separator) { std::string value{"hello world"}; sourcemeta::core::to_title_case(value);