From 5340ed8e426c71384351fb2479e11cdbb7ab4d93 Mon Sep 17 00:00:00 2001 From: Graham Sedman Date: Sat, 23 May 2026 20:59:59 +0100 Subject: [PATCH 1/6] refactor: modernise CompileParseRules.cc for type safety and modern C++ - Replace C-style file I/O (fopen, fprintf, fclose) with std::ofstream for RAII-based file handling - Use fixed-width integer types (uint32_t, uint8_t) instead of unsigned int and char for portability and clarity - Refactor uint_to_binary function to use std::string instead of static buffer for thread safety - Add comprehensive Doxygen documentation for the file, functions, and arrays - Remove @section license License from file header to prevent Doxygen warnings about multiple use of section label, as @section is intended for major structured documentation sections, not repetitive boilerplate - Remove obsolete COMPILE_PARSE_RULES macro and ink_string.h dependency - Improve output formatting using std::setw, std::setfill, and std::hex for consistent alignment - Replace int loop variables with uint16_t for better type safety - Add static_cast for explicit type conversions --- src/tscore/CompileParseRules.cc | 369 +++++++++++++++++++++----------- 1 file changed, 249 insertions(+), 120 deletions(-) diff --git a/src/tscore/CompileParseRules.cc b/src/tscore/CompileParseRules.cc index 6921072df83..52a7be7ec78 100644 --- a/src/tscore/CompileParseRules.cc +++ b/src/tscore/CompileParseRules.cc @@ -1,179 +1,308 @@ -/** @file - - A brief file description - - @section license License +/** + * @file CompileParseRules.cc + * + * @brief Build-time utility for generating ParseRules character classification tables. + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * This standalone C++ program generates static lookup tables used by the + * Traffic Server runtime for O(1) character classification and case conversion. + * It is executed during the build process and is **not part of the runtime library**. + * + * The generated tables are written to the following files: + * - @c ParseRulesCType: Bitmask of character type flags (32-bit values). + * - @c ParseRulesCTypeToUpper: Uppercase conversion table (uint8_t values). + * - @c ParseRulesCTypeToLower: Lowercase conversion table (uint8_t values). + * + * These files are typically included as static data in @c ParseRules.cc. + * + * @note This tool uses modern C++ features, including: + * - @c std::string for type-safe string handling. + * - @c std::ofstream for RAII-based file I/O. + * - Fixed-width integer types (@c uint8_t, @c uint32_t) for portability. + * + * @see ParseRules.h for character classification function declarations. + * @see ParseRules.cc for runtime usage of generated tables. + */ - Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at +#include +#include +#include +#include +#include +#include - http://www.apache.org/licenses/LICENSE-2.0 +#include "tscore/ParseRules.h" - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. +/** + * @brief Placeholder for the character type bitmask table. + * + * This array is initialized to zero and serves as a template for the generated + * @c ParseRulesCType table. It is **not used at runtime**. + * + * @note The actual populated table is generated by @c main() and written to + * the @c ParseRulesCType file. */ +const uint32_t parseRulesCType[256] = {0}; -#define COMPILE_PARSE_RULES +/** + * @brief Placeholder for the uppercase conversion table. + * + * This array is initialized to zero and serves as a template for the generated + * @c ParseRulesCTypeToUpper table. It is **not used at runtime**. + * + * @note The actual populated table is generated by @c main() and written to + * the @c ParseRulesCTypeToUpper file. + */ +const uint8_t parseRulesCTypeToUpper[256] = {0}; -#include "tscore/ParseRules.h" +/** + * @brief Placeholder for the lowercase conversion table. + * + * This array is initialized to zero and serves as a template for the generated + * @c ParseRulesCTypeToLower table. It is **not used at runtime**. + * + * @note The actual populated table is generated by @c main() and written to + * the @c ParseRulesCTypeToLower file. + */ +const uint8_t parseRulesCTypeToLower[256] = {0}; -const unsigned int parseRulesCType[256] = {0}; -const char parseRulesCTypeToUpper[256] = {0}; -const char parseRulesCTypeToLower[256] = {0}; +/** + * @brief Working array for character type bitmasks. + * + * Temporary storage for computed character classification bitmasks. + * Populated by @c main() and written to the @c ParseRulesCType output file. + * + * @note Each entry is a 32-bit bitmask where each bit represents a character + * classification (e.g., @c is_alpha_BIT, @c is_digit_BIT). + */ +uint32_t tparseRulesCType[256]; -unsigned int tparseRulesCType[256]; -char tparseRulesCTypeToUpper[256]; -char tparseRulesCTypeToLower[256]; +/** + * @brief Working array for uppercase character conversion. + * + * Temporary storage for uppercase character mappings (0-255). + * Populated by @c main() and written to the @c ParseRulesCTypeToUpper output file. + */ +uint8_t tparseRulesCTypeToUpper[256]; -#include -#include -#include "tscore/ink_string.h" +/** + * @brief Working array for lowercase character conversion. + * + * Temporary storage for lowercase character mappings (0-255). + * Populated by @c main() and written to the @c ParseRulesCTypeToLower output file. + */ +uint8_t tparseRulesCTypeToLower[256]; -static char * -uint_to_binary(unsigned int u) +/** + * @brief Convert a 32-bit unsigned integer to its binary string representation. + * + * Creates a 32-character string representing the binary value of the input, + * with leading zeros. The string is in **big-endian format** (MSB first). + * + * @param u The 32-bit unsigned integer to convert. + * @return @c std::string containing the 32-character binary representation. + * Example: @c uint_to_binary(5) returns @c "00000000000000000000000000000101". + * + * @note This function is **thread-safe** because it returns a new @c std::string + * by value (no shared static buffer). + */ +std::string +uint_to_binary(uint32_t u) { - int i; - static char buf[33]; - for (i = 0; i < 32; i++) { - buf[i] = ((u & (1 << (31 - i))) ? '1' : '0'); + std::string buf(32, '0'); + for (uint8_t i = 0; i < 32; i++) { + if (u & (1 << (31 - i))) { + buf[i] = '1'; + } } - buf[32] = '\0'; - return (buf); + return buf; } +/** + * @brief Generates character classification lookup tables. + * + * This function performs the following steps: + * + * 1. For each ASCII character (0-255): + * - Initializes the working arrays (@c tparseRulesCType, @c tparseRulesCTypeToUpper, + * @c tparseRulesCTypeToLower). + * - Tests the character against all @c ParseRules classification functions. + * - Sets the corresponding bit in @c tparseRulesCType[i] for each matching classification. + * - Stores the uppercase/lowercase conversion values in @c tparseRulesCTypeToUpper + * and @c tparseRulesCTypeToLower. + * + * 2. Writes three output files using @c std::ofstream (RAII): + * - @c ParseRulesCType: Contains hexadecimal bitmask values and their binary + * representations. Format: C-style array initialization. + * - @c ParseRulesCTypeToUpper: Contains uppercase conversion values for each + * character. Format: @c (uint8_t)X, + * - @c ParseRulesCTypeToLower: Contains lowercase conversion values for each + * character. Format: @c (uint8_t)X, + * + * @return 0 on successful completion. + * + * @note The classification functions from @c ParseRules.h include: + * - **Character types**: @c is_char, @c is_alpha, @c is_digit, @c is_alnum, + * @c is_ctl, @c is_ws, @c is_hex, @c is_pchar, @c is_token, @c is_uri, + * @c is_sep, @c is_empty. + * - **Case types**: @c is_upalpha, @c is_loalpha. + * - **Safety/encoding**: @c is_safe, @c is_unsafe, @c is_reserved, + * @c is_unreserved, @c is_national. + * - **Special categories**: @c is_punct, @c is_tspecials, @c is_end_of_url. + * - **Whitespace variants**: @c is_spcr, @c is_splf, @c is_wslfcr, @c is_eow. + * - **HTTP/MIME**: @c is_http_field_name, @c is_http_field_value, + * @c is_mime_sep, @c is_control. + * + * @see ParseRules.h for detailed descriptions of each classification function. + * @see ParseRules.cc for runtime usage of the generated tables. + */ int main() { - int c; - for (c = 0; c < 256; c++) { - tparseRulesCType[c] = 0; - tparseRulesCTypeToLower[c] = ParseRules::ink_tolower(c); - tparseRulesCTypeToUpper[c] = ParseRules::ink_toupper(c); + for (uint16_t i = 0; i < 256; i++) { + tparseRulesCType[i] = 0; + tparseRulesCTypeToLower[i] = static_cast(ParseRules::ink_tolower(i)); + tparseRulesCTypeToUpper[i] = static_cast(ParseRules::ink_toupper(i)); - if (ParseRules::is_char(c)) { - tparseRulesCType[c] |= is_char_BIT; + if (ParseRules::is_char(i)) { + tparseRulesCType[i] |= is_char_BIT; } - if (ParseRules::is_upalpha(c)) { - tparseRulesCType[c] |= is_upalpha_BIT; + if (ParseRules::is_upalpha(i)) { + tparseRulesCType[i] |= is_upalpha_BIT; } - if (ParseRules::is_loalpha(c)) { - tparseRulesCType[c] |= is_loalpha_BIT; + if (ParseRules::is_loalpha(i)) { + tparseRulesCType[i] |= is_loalpha_BIT; } - if (ParseRules::is_alpha(c)) { - tparseRulesCType[c] |= is_alpha_BIT; + if (ParseRules::is_alpha(i)) { + tparseRulesCType[i] |= is_alpha_BIT; } - if (ParseRules::is_digit(c)) { - tparseRulesCType[c] |= is_digit_BIT; + if (ParseRules::is_digit(i)) { + tparseRulesCType[i] |= is_digit_BIT; } - if (ParseRules::is_ctl(c)) { - tparseRulesCType[c] |= is_ctl_BIT; + if (ParseRules::is_ctl(i)) { + tparseRulesCType[i] |= is_ctl_BIT; } - if (ParseRules::is_ws(c)) { - tparseRulesCType[c] |= is_ws_BIT; + if (ParseRules::is_ws(i)) { + tparseRulesCType[i] |= is_ws_BIT; } - if (ParseRules::is_hex(c)) { - tparseRulesCType[c] |= is_hex_BIT; + if (ParseRules::is_hex(i)) { + tparseRulesCType[i] |= is_hex_BIT; } - char cc = c; + + char cc = static_cast(i); + if (ParseRules::is_pchar(&cc)) { - tparseRulesCType[c] |= is_pchar_BIT; + tparseRulesCType[i] |= is_pchar_BIT; } - if (ParseRules::is_extra(c)) { - tparseRulesCType[c] |= is_extra_BIT; + if (ParseRules::is_extra(i)) { + tparseRulesCType[i] |= is_extra_BIT; } - if (ParseRules::is_safe(c)) { - tparseRulesCType[c] |= is_safe_BIT; + if (ParseRules::is_safe(i)) { + tparseRulesCType[i] |= is_safe_BIT; } - if (ParseRules::is_unsafe(c)) { - tparseRulesCType[c] |= is_unsafe_BIT; + if (ParseRules::is_unsafe(i)) { + tparseRulesCType[i] |= is_unsafe_BIT; } - if (ParseRules::is_national(c)) { - tparseRulesCType[c] |= is_national_BIT; + if (ParseRules::is_national(i)) { + tparseRulesCType[i] |= is_national_BIT; } - if (ParseRules::is_reserved(c)) { - tparseRulesCType[c] |= is_reserved_BIT; + if (ParseRules::is_reserved(i)) { + tparseRulesCType[i] |= is_reserved_BIT; } - if (ParseRules::is_unreserved(c)) { - tparseRulesCType[c] |= is_unreserved_BIT; + if (ParseRules::is_unreserved(i)) { + tparseRulesCType[i] |= is_unreserved_BIT; } - if (ParseRules::is_punct(c)) { - tparseRulesCType[c] |= is_punct_BIT; + if (ParseRules::is_punct(i)) { + tparseRulesCType[i] |= is_punct_BIT; } - if (ParseRules::is_end_of_url(c)) { - tparseRulesCType[c] |= is_end_of_url_BIT; + if (ParseRules::is_end_of_url(i)) { + tparseRulesCType[i] |= is_end_of_url_BIT; } - if (ParseRules::is_tspecials(c)) { - tparseRulesCType[c] |= is_tspecials_BIT; + if (ParseRules::is_tspecials(i)) { + tparseRulesCType[i] |= is_tspecials_BIT; } - if (ParseRules::is_spcr(c)) { - tparseRulesCType[c] |= is_spcr_BIT; + if (ParseRules::is_spcr(i)) { + tparseRulesCType[i] |= is_spcr_BIT; } - if (ParseRules::is_splf(c)) { - tparseRulesCType[c] |= is_splf_BIT; + if (ParseRules::is_splf(i)) { + tparseRulesCType[i] |= is_splf_BIT; } - if (ParseRules::is_wslfcr(c)) { - tparseRulesCType[c] |= is_wslfcr_BIT; + if (ParseRules::is_wslfcr(i)) { + tparseRulesCType[i] |= is_wslfcr_BIT; } - if (ParseRules::is_eow(c)) { - tparseRulesCType[c] |= is_eow_BIT; + if (ParseRules::is_eow(i)) { + tparseRulesCType[i] |= is_eow_BIT; } - if (ParseRules::is_token(c)) { - tparseRulesCType[c] |= is_token_BIT; + if (ParseRules::is_token(i)) { + tparseRulesCType[i] |= is_token_BIT; } - if (ParseRules::is_uri(c)) { - tparseRulesCType[c] |= is_uri_BIT; + if (ParseRules::is_uri(i)) { + tparseRulesCType[i] |= is_uri_BIT; } - if (ParseRules::is_sep(c)) { - tparseRulesCType[c] |= is_sep_BIT; + if (ParseRules::is_sep(i)) { + tparseRulesCType[i] |= is_sep_BIT; } - if (ParseRules::is_empty(c)) { - tparseRulesCType[c] |= is_empty_BIT; + if (ParseRules::is_empty(i)) { + tparseRulesCType[i] |= is_empty_BIT; } - if (ParseRules::is_alnum(c)) { - tparseRulesCType[c] |= is_alnum_BIT; + if (ParseRules::is_alnum(i)) { + tparseRulesCType[i] |= is_alnum_BIT; } - if (ParseRules::is_space(c)) { - tparseRulesCType[c] |= is_space_BIT; + if (ParseRules::is_space(i)) { + tparseRulesCType[i] |= is_space_BIT; } - if (ParseRules::is_control(c)) { - tparseRulesCType[c] |= is_control_BIT; + if (ParseRules::is_control(i)) { + tparseRulesCType[i] |= is_control_BIT; } - if (ParseRules::is_mime_sep(c)) { - tparseRulesCType[c] |= is_mime_sep_BIT; + if (ParseRules::is_mime_sep(i)) { + tparseRulesCType[i] |= is_mime_sep_BIT; } - if (ParseRules::is_http_field_name(c)) { - tparseRulesCType[c] |= is_http_field_name_BIT; + if (ParseRules::is_http_field_name(i)) { + tparseRulesCType[i] |= is_http_field_name_BIT; } - if (ParseRules::is_http_field_value(c)) { - tparseRulesCType[c] |= is_http_field_value_BIT; + if (ParseRules::is_http_field_value(i)) { + tparseRulesCType[i] |= is_http_field_value_BIT; } } - FILE *fp = fopen("ParseRulesCType", "w"); - for (c = 0; c < 256; c++) { - fprintf(fp, "/* %3d (%c) */\t", c, (isprint(c) ? c : '?')); - fprintf(fp, "0x%08X%c\t\t", tparseRulesCType[c], (c != 255 ? ',' : ' ')); - fprintf(fp, "/* [%s] */\n", uint_to_binary((tparseRulesCType[c]))); + // Write ParseRulesCType (bitmask table with binary representation) + std::ofstream fp("ParseRulesCType"); + for (uint16_t i = 0; i < 256; ++i) { + fp << "/* " << std::setw(3) << i << " (" << (isprint(i) ? static_cast(i) : '?') << ") */\t"; + fp << "0x" << std::hex << std::setw(8) << std::setfill('0') << tparseRulesCType[i] << (i != 255 ? ",\t\t" : "\t\t"); + fp << "/* [" << uint_to_binary(tparseRulesCType[i]) << "] */\n"; } - fclose(fp); - fp = fopen("ParseRulesCTypeToUpper", "w"); - for (c = 0; c < 256; c++) { - fprintf(fp, "%d%c\n", tparseRulesCTypeToUpper[c], c != 255 ? ',' : ' '); + + // Write ParseRulesCTypeToUpper (uppercase conversion table) + { + std::ofstream fp("ParseRulesCTypeToUpper"); + for (uint16_t i = 0; i < 256; ++i) { + fp << "(uint8_t)" << static_cast(tparseRulesCTypeToUpper[i]) << (i != 255 ? ',' : ' ') << '\n'; + } } - fclose(fp); - fp = fopen("ParseRulesCTypeToLower", "w"); - for (c = 0; c < 256; c++) { - fprintf(fp, "%d%c\n", tparseRulesCTypeToLower[c], c != 255 ? ',' : ' '); + + // Write ParseRulesCTypeToLower (lowercase conversion table) + { + std::ofstream fp("ParseRulesCTypeToLower"); + for (uint16_t i = 0; i < 256; ++i) { + fp << "(uint8_t)" << static_cast(tparseRulesCTypeToLower[i]) << (i != 255 ? ',' : ' ') << '\n'; + } } - fclose(fp); return (0); } From 7db17e9a0b0d234059deccdad91a5b6862eaba0c Mon Sep 17 00:00:00 2001 From: Graham Sedman Date: Sun, 24 May 2026 13:08:24 +0100 Subject: [PATCH 2/6] fix: use unsigned literal in bit shift to avoid undefined behaviour - Replace `1` with `1U` in bit shift operation to prevent undefined behaviour when shifting into sign bit of signed integer - Shifting a signed integer (e.g., `1 << 31`) into its sign bit invokes undefined behaviour per the C++ standard - Using an unsigned literal (`1U`) ensures well-defined behaviour for all shift amounts --- include/tscore/ParseRules.h | 5 +- lib/swoc/include/swoc/TextView.h | 3972 +++++++++++++++--------------- lib/swoc/src/TextView.cc | 383 +-- src/tscore/CMakeLists.txt | 74 +- src/tscore/CompileParseRules.cc | 2 +- src/tscore/ParseRules.cc | 42 +- 6 files changed, 2327 insertions(+), 2151 deletions(-) diff --git a/include/tscore/ParseRules.h b/include/tscore/ParseRules.h index 9299b411c94..713948e673e 100644 --- a/include/tscore/ParseRules.h +++ b/include/tscore/ParseRules.h @@ -70,8 +70,9 @@ using CTypeResult = unsigned int; #define is_http_field_value_BIT (((CTypeResult)1) << 31) extern const CTypeResult parseRulesCType[]; -extern const char parseRulesCTypeToUpper[]; -extern const char parseRulesCTypeToLower[]; +// (GS) Mod +extern const uint8_t parseRulesCTypeToUpper[]; +extern const uint8_t parseRulesCTypeToLower[]; class ParseRules { diff --git a/lib/swoc/include/swoc/TextView.h b/lib/swoc/include/swoc/TextView.h index ed017da673b..74c78daa3b5 100644 --- a/lib/swoc/include/swoc/TextView.h +++ b/lib/swoc/include/swoc/TextView.h @@ -12,17 +12,23 @@ */ #pragma once +#include #include +#include #include +#include #include +#include #include +#include #include #include -#include +#include +#include #include -#include "swoc/swoc_version.h" #include "swoc/string_view_util.h" +#include "swoc/swoc_version.h" // For no apparent reason, g++ 11 complains about array bound violations with either suffix_at or // assign, the error message is too vague for me to be sure - it doesn't even provide the location of @@ -32,2082 +38,2201 @@ #pragma GCC diagnostic ignored "-Warray-bounds" #endif -namespace swoc { inline namespace SWOC_VERSION_NS { +namespace swoc +{ +inline namespace SWOC_VERSION_NS +{ + + class TextView; + + /** A set of characters. + * + */ + class CharSet + { + using self_type = CharSet; + + public: + /** Construct from character sequence. + * + * @param chars Character sequence. + * + * The charset becomes @c true for every character in the sequence. + */ + constexpr CharSet(TextView const &chars); + + /** Check if character is in the charset. + * + * @param c Character to check. + * @return @c true if @a c is in the charset, @c false if not. + */ + bool operator()(unsigned char c) const; + + /** Check if character is in the charset. + * + * @param c Character to check. + * @return @c true if @a c is in the charset, @c false if not. + */ + bool operator()(char c) const; + + protected: + std::bitset::max() + 1> _chars; + }; -class TextView; + /** A read only view of a contiguous piece of memory. + + A @c TextView does not own the memory to which it refers, it is simply a view of part of some + (presumably) larger memory object. The purpose is to allow working in a read only way a specific + part of the memory. A classic example for ATS is working with HTTP header fields and values + which need to be accessed independently but preferably without copying. A @c TextView supports + this style. + + @note To simplify the interface there is no constructor taking only a character pointer. + Constructors require either a literal string or an explicit length. This avoid ambiguities which + are much more annoying that explicitly calling @c strlen on a character pointer. + + @internal For construction, assignment operator, and @c assign method, there are a lot of overloads + because users would like to be able to use the same sort of arguments for all of these. This includes + - self / parent type + - @c std::string + - literal string + - C-string pointer + - pointer and count + - begin/end style pointers. + - character containers that have the STL standard @c size and @c data methods. + */ + class TextView : public std::string_view + { + using self_type = TextView; ///< Self reference type. + using super_type = std::string_view; ///< Parent type. + + public: + /// Default constructor (empty buffer). + constexpr TextView() noexcept = default; + + /// Construct from a @c std::string_view or @c TextView + /// @note This provides an user defined conversion from @c std::string_view to @c TextView. The + /// reverse conversion is implicit in @c TextView being a subclass of @c std::string_view. + constexpr TextView(super_type const &that) noexcept; + + /** Construct from pointer and size. + * + * @param ptr Pointer to first character. + * @param n Number of characters. + * + * If @a n is @c npos then @c ptr is presumed to be a C string and checked for length. If @c ptr + * is @c nullptr the length is 0. Otherwise @c strlen is used to calculate the length. + */ + constexpr TextView(char const *ptr, size_t n) noexcept; + + /** Construct from pointer and size. + * + * @param ptr Pointer to first character. + * @param n Number of characters. + * + * If @a n is negative then @c ptr is presumed to be a C string and checked for length. If @c ptr + * is @c nullptr the length is 0. Otherwise @c strlen is used to calculate the length. + */ + constexpr TextView(char const *ptr, int n) noexcept; + + /** Construct from a half open range [first, last). + * + * @param first Start of half open range. + * @param last End of half open range. + * + * The character at @a first will be in the view, but the character at @a last will not. + * + * @note @c explicit to avoid interpreting a string initializer list as a view. + * + * @internal For the love of Turing, WHY DID YOU DO THIS? + * + * Well, estemed reader, because the C++ standard doesn't have a better way to support overloads + * that handle character pointers and literal strings differently. If the parameters were simply + * (char const *, char const *) then a construct like { "really", "broken" } can + * be interpreted as a @c TextView because the elements implicitly convert to char const + * *. This makes no sense and creates some @b very annoying ambiguities for lists of strings + * if there are exactly two in the list. See @c Lexicon for an example. + * + * The template itself does the check to make sure it's a character @b pointer and not an array. Arrays + * are handled by a different constructor so this only disables constructing from two char arrays + * which IMHO makes no sense and should be forbidden. + */ + template + explicit TextView( + T first, + std::enable_if_t && std::is_pointer_v && std::is_convertible_v, T> last) noexcept + : super_type(first, last - first) + { + } -/** A set of characters. - * - */ -class CharSet { - using self_type = CharSet; + /** Construct from any character container following STL standards. + * + * @tparam C Container type. + * @param c container + * + * The container type must have the methods @c data and @c size which must return values convertible + * to @c char @c const @c * and @c size_t respectively. + */ + template ().data()), char const *> && + std::is_convertible_v().size()), size_t>, + void>> + constexpr TextView(C const &c); + + /** Construct from literal string or array. + + All elements of the array are included in the view unless the last element is nul, in which case it is elided. + If this is inappropriate then a constructor with an explicit size should be used. + + @code + TextView a("A literal string"); + @endcode + The last character in @a a will be 'g'. + */ + template constexpr TextView(const char (&s)[N]) noexcept; + + /** Construct from a C-string. + * + * @param src A pointer to a C-string. + * + * The view does not include the terminating nul. + * + * @internal @a src a reference because it is otherwise ambiguous with the literal constructor. + */ + TextView(char *&src) : super_type(src, src ? strlen(src) : 0) {} + + /** Construct from a const C-string. + * + * @param src Pointer to a const C-string. + * + * The view does not include the terminating nul. + * + * @internal @a src a reference because it is otherwise ambiguous with the literal constructor. + */ + TextView(char const *&src) : super_type(src, src ? strlen(src) : 0) {} + + /** Construct from nullptr. + This implicitly makes the length 0. + */ + constexpr TextView(std::nullptr_t) noexcept; + + /// Construct from @c std::string, referencing the entire string contents. + /// @internal This can't be @c constexpr because this uses methods in @c std::string that may + /// not be @c constexpr. + TextView(std::string const &str) noexcept; + + /// Assign a super class instance, @c std::string_view to @a this. + self_type &operator=(super_type const &that); + + /// Assign a constant array to @a this. + /// @note If the last character of @a s is a nul byte, it is not included in the view. + template self_type &operator=(const char (&s)[N]); + + /// Assign from C-string @a s. + self_type &operator=(char *&s); + /// Assign from C-string @a s. + self_type &operator=(char const *&s); + + /// Assign from a @c std::string. + self_type &operator=(const std::string &s); + + /** Assign a view of the @a c_str + * + * @param c_str Pointer to C string. + * @return @a this + * + * @note @c c_str must be a null terminated string. The null byte is not included in the view. + */ + self_type &assign(char *&c_str); + + /** Assign a view of the @a c_str + * + * @param c_str Pointer to C string. + * @return @a this + * + * @note @c c_str must be a null terminated string. The null byte is not included in the view. + */ + self_type &assign(char const *&c_str); + + /** Assign from a pointer and size. + * + * @param ptr Pointer to first character of the view. + * @param n Length of the view. + * @return @a this + * + * if @a n is @a npos then @c strlen is used determine the size of the view. + */ + self_type &assign(char const *ptr, size_t n); + + /** Assign the half open view [ @a b , @a e ) to @a this + * + * @param b First character in the view. + * @param e One character after the last character in the view. + * @return @a this + */ + self_type &assign(char const *b, char const *e); + + /// Explicitly set the view from a @c std::string + self_type &assign(std::string const &s); + + /** Assign literal string or array. + + * All elements of the array are included in the view unless the last element is nul, in which case it is elided. + * If this is inappropriate then a constructor with an explicit size should be used. + * + * @code + * tv.assign("A literal string"); + * @endcode + * The last character in @a tv will be 'g'. + */ + template self_type &assign(const char (&s)[N]) noexcept; + + /** Assign from any character container following STL standards. + * + * @tparam C Container type. + * @param c container + * + * The container type must have the methods @c data and @c size which must return values convertible + * to @c char @c const @c * and @c size_t respectively. + */ + template ().data()), char const *> && + std::is_convertible_v().size()), size_t>, + void>> + constexpr self_type & + assign(C const &c) + { + return this->assign(c.data(), c.size()); + } -public: - /** Construct from character sequence. - * - * @param chars Character sequence. - * - * The charset becomes @c true for every character in the sequence. - */ - constexpr CharSet(TextView const &chars); + /** Dereference operator. + + @note This allows the view to be used as if it were a character iterator to a null terminated + string which is handy for several other STL interfaces. + + @return The first byte in the view, or a nul character if the view is empty. + */ + /// @return The first byte in the view. + constexpr char operator*() const; + + /** Discard the first byte of the view. + * + * @return @a this. + */ + self_type &operator++(); + + /** Discard the first byte of the view. + * + * @return The view before discarding the byte. + */ + self_type operator++(int); + + /** Discard the first @a n bytes of the view. + * + * Equivalent to @c remove_prefix(n). + * @return @a this + */ + self_type &operator+=(size_t n); + + /// Check for empty view. + /// @return @c true if the view has a nullptr @b or zero size. + constexpr bool operator!() const noexcept; + + /// Check for non-empty view. + /// @return @c true if the view refers to a non-empty range of bytes. + explicit constexpr operator bool() const noexcept; + + /// Clear the view (become an empty view). + self_type &clear(); + + /// Get the offset of the first character for which @a pred is @c true. + template size_t find_if(F const &pred) const; + /// Get the offset of the last character for which @a pred is @c true. + template size_t rfind_if(F const &pred) const; + + /** Remove bytes that match @a c from the start of the view. + * + * @return @a this + */ + self_type <rim(char c); + + /** Remove bytes from the start of the view that are in @a delimiters. + * + * @return @a this + */ + self_type <rim(CharSet const &delimiters); + + /** Remove bytes from the start of the view that are in @a delimiters. + * + * @return @a this + */ + self_type <rim(std::string_view const &delimiters); + + /** Remove bytes from the start of the view that are in @a delimiters. + * + * @internal This is needed to avoid collisions with the templated predicate style. + * + * @return @c *this + */ + self_type <rim(const char *delimiters); + + /** Remove bytes from the start of the view for which @a pred is @c true. + @a pred must be a functor taking a @c char argument and returning @c bool. + @return @c *this + */ + template self_type <rim_if(F const &pred); + + /** Remove bytes that match @a c from the end of the view. + * + * @return @a this + */ + self_type &rtrim(char c); + + /** Remove bytes from the end of the view that are in @a delimiters. + * + * @return @a this + */ + self_type &rtrim(CharSet const &delimiters); + + /** Remove bytes from the end of the view that are in @a delimiters. + * @return @a this + */ + self_type &rtrim(std::string_view const &delimiters); + + /** Remove bytes from the end of the view for which @a pred is @c true. + * + * @a pred must be a functor taking a @c char argument and returning @c bool. + * + * @return @c *this + */ + template self_type &rtrim_if(F const &pred); + + /** Remove bytes that match @a c from the start and end of this view. + * + * @return @a this + */ + self_type &trim(char c); + + /** Remove bytes from the start and end of the view that are in @a delimiters. + * @return @a this + */ + self_type &trim(CharSet const &delimiters); + + /** Remove bytes from the start and end of the view that are in @a delimiters. + * @return @a this + */ + self_type &trim(std::string_view const &delimiters); + + /** Remove bytes from the start and end of the view that are in @a delimiters. + @internal This is needed to avoid collisions with the templated predicate style. + @return @c *this + */ + self_type &trim(const char *delimiters); + + /** Remove bytes from the start and end of the view for which @a pred is @c true. + @a pred must be a functor taking a @c char argument and returning @c bool. + @return @c *this + */ + template self_type &trim_if(F const &pred); + + /** Get a view of the first @a n bytes. + * + * @param n Number of chars in the prefix. + * @return A view of the first @a n characters in @a this, bounded by the size of @a this. + */ + constexpr self_type prefix(size_t n) const noexcept; + + /** Get a view of a prefix bounded by @a c. + * + * @param c Delimiter character. + * @return A view of the prefix bounded by @a c, or all of @a this if @a c is not found. + * @note The character @a c is not included in the returned view. + */ + self_type prefix_at(char c) const; + + /** Get a view of a prefix bounded by a character in @a delimiters. + * + * @param delimiters A set of characters. + * + * @return A view of the prefix bounded by any character in @a delimiters, or empty if none are + * found. + * + * @note The delimiter character is not included in the returned view. + */ + self_type prefix_at(std::string_view const &delimiters) const; + + /** Get a view of a prefix bounded by a character predicate @a pred. + * + * @a pred must be a functor which takes a @c char argument and returns @c bool. Each character in + * @a this is tested by @a pred and the prefix is delimited by the first character for which @a + * pred is @c true. + * + * @param pred A character predicate. + * + * @return A view of the prefix bounded by @a pred or empty if @a pred is not @c true for any + * characer. + * + * @note The deliminting character is not included in the returned view. + */ + template self_type prefix_if(F const &pred) const; + + /** Remove bytes from the start of the view. + * + * @param n Number of bytes to remove. + * @return @a this. + */ + self_type &remove_prefix(size_t n); + + /** Remove bytes from the end of the view. + * + * @param n Number of bytes to remove. + * @return @a this. + */ + self_type &remove_suffix(size_t n); + + /** Remove the leading characters of @a this up to and including @a c. + * + * @param c Delimiter character. + * @return @a this. + * @note The first occurrence of character @a c is removed along with all preceding characters, or + * the view is cleared if @a c is not found. + */ + self_type &remove_prefix_at(char c); + + /** Remove the leading characters of @a this up to and including the first character matching @a delimiters. + * + * @param delimiters Characters to match. + * @return @a this. + * @note The first occurrence of any character in @a delimiters is removed along with all preceding + * characters, or the view is cleared if none are found. + */ + self_type &remove_prefix_at(std::string_view const &delimiters); + + /** Remove the leading characters up to and including the character selected by @a pred. + * + * @tparam F Predicate function type. + * @param pred The predicate instance. + * @return @a this. + * + * Characters are removed until @a pred returns @c true. The matching character is also removed. + */ + template self_type &remove_prefix_if(F const &pred); + + /** Remove and return a prefix of size @a n. + * + * @param n Size of the prefix. + * @return The first @a n bytes of @a this if @a n is in @a this, otherwise an empty view. + * + * The prefix is removed and returned if the requested prefix is no larger than @a this, + * otherwise @a this is not modified. + * + * @note The character at offset @a n is discarded if @a this is modified. + * + * @see @c take_prefix + */ + self_type split_prefix(size_t n); + + /** Remove and return a prefix bounded by the first occurrence of @a c. + * + * @param c The character to match. + * @return The prefix bounded by @a c if @a c is found, an empty view if not. + * + * The prefix is removed and returned if @a c is found, otherwise @a this is not modified. + * + * @note The delimiter character is discarded if @a this is modified. + * + * @see @c take_prefix + */ + self_type split_prefix_at(char c); + + /** Remove and return a prefix bounded by the first occurrence of any of @a delimiters. + * + * @param delimiters The characters to match. + * @return The prefix bounded by a delimiter if one is found, otherwise an empty view. + * + * The prefix is removed and returned if a @a delimiter is found, otherwise @a this is not modified. + * + * @note The matching character is discarded if @a this is modified. + * + * @see @c take_prefix_at + */ + self_type split_prefix_at(std::string_view const &delimiters); + + /** Remove and return a prefix bounded by the first character that satisfies @a pred. + * + * @tparam F Predicate functor type. + * @param pred A function taking @c char and returning @c bool. + * @return The prefix bounded by the first character satisfying @a pred. + * + * The prefix is removed and returned if a character satisfying @a pred is found, otherwise + * @a this is not modified. + * + * @note The matching character is discarded if @a this is modified. + * + * @see @c take_prefix_if + */ + template self_type split_prefix_if(F const &pred); + + /** Remove and return the first @a n characters. + * + * @param n Size of the return prefix. + * @return The first @a n bytes of @a this if @a n is in @a this, otherwise all of @a this. + * + * The prefix is removed and returned if the requested prefix is no larger than @a this, + * otherwise all of @a this is removed and returned. + * + * @note The character at offset @a n is discarded if @a n is within the bounds of @a this. + * + * @see @c split_prefix + */ + self_type take_prefix(size_t n); + + /** Remove and return a prefix bounded by the first occurrence of @a c. + * + * @param c The character to match. + * @return The prefix bounded by @a c if @a c is found, all of @a this if not. + * + * The prefix is removed and returned if @a c is found, otherwise all of @a this is removed and + * returned. + * + * @note The character at offset @a n is discarded if found. + * + * @see @c split_prefix_at + */ + self_type take_prefix_at(char c); + + /** Remove and return a prefix bounded by the first occurrence of any of @a delimiters. + * + * @param delimiters The characters to match. + * @return The prefix bounded by a delimiter if one is found, otherwise all of @a this. + * + * The prefix is removed and returned if a @a delimiter is found, otherwise all of @a this is + * removed and returned. + * + * @note The matching character is discarded if found. + * + * @see @c split_prefix_at + */ + self_type take_prefix_at(std::string_view const &delimiters); + + /** Remove and return a prefix bounded by the first character that satisfies @a pred. + * + * @tparam F Predicate functor type. + * @param pred A function taking @c char and returning @c bool. + * @return The prefix bounded by the first character satisfying @a pred, or all of @a this if none + * is found. + * + * The prefix is removed and returned if a character satisfying @a pred is found, otherwise + * all of @a this is removed and returned. + * + * @note The matching character is discarded if found. + * + * @see @c split_prefix_if + */ + template self_type take_prefix_if(F const &pred); + + /** Remove and return a prefix of characters satisfying @a pred + * + * @tparam F Predicate functor type. + * @param pred A function taking @c char and returning @c bool. + * @return The prefix of characters that satisfy @a pred. + * + * The returned prefix is removed from @a this. That prefix may be empty if the first character + * does not satisfy @a pred. + * + * @note This is very similar to @c ltrim_if but returns the removed text instead of the modified + * view. + */ + template self_type clip_prefix_of(F const &pred); + + /** Get a view of the last @a n bytes. + * + * @param n Number of chars in the suffix. + * @return A view of the last @a n characters in @a this, bounded by the size of @a this. + */ + constexpr self_type suffix(size_t n) const noexcept; + + /** Get a view of a suffix bounded by @a c. + * + * @param c Delimiter character. + * @return A view of the suffix bounded by @a c, or all of @a this if @a c is not found. + * @note The character @a c is not included in the returned view. + */ + self_type suffix_at(char c) const; + + /** Get a view of a suffix bounded by a character in @a delimiters. + * + * @param delimiters A set of characters. + * + * @return A view of the suffix bounded by any character in @a delimiters, or mepty if none are + * found. + * + * @note The delimiter character is not included in the returned view. + */ + self_type suffix_at(std::string_view const &delimiters) const; + + /** Get a view of a suffix bounded by a character predicate @a pred. + * + * @a pred must be a functor which takes a @c char argument and returns @c bool. Each character in + * @a this is tested by @a pred and the suffix is delimited by the last character for which @a + * pred is @c true. + * + * @param pred A character predicate. + * + * @return A view of the suffix bounded by @a pred or empty if @a pred is not @c true for any + * character. + * + * @note The delimiting character is not included in the returned view. + */ + template self_type suffix_if(F const &pred) const; + + /** Remove the trailing characters of @a this up to and including @a c. + * + * @param c Delimiter character. + * @return @a this. + * + * @note The last occurrence of character @a c is removed along with all succeeding characters, or + * the view is cleared if @a c is not found. + */ + self_type &remove_suffix_at(char c); + + /** Remove the trailing characters of @a this up to and including the last character matching @a delimiters. + * + * @param delimiters Characters to match. + * @return @a this. + * @note The first occurrence of any character in @a delimiters is removed along with all preceding + * characters, or the view is cleared if none are found. + */ + self_type &remove_suffix_at(std::string_view const &delimiters); + + /** Remove the trailing characters up to and including the character selected by @a pred. + * + * @tparam F Predicate function type. + * @param pred The predicate instance. + * @return @a this. + * + * If predicate is never true the view is cleared. + */ + template self_type &remove_suffix_if(F const &pred); + + /** Remove and return a suffix of size @a n. + * + * @param n Size of the suffix. + * @return The first @a n bytes of @a this if @a n is in @a this, otherwise an empty view. + * + * The prefix is removed and returned if the requested suffix is no larger than @a this, + * otherwise @a this is not modified. + * + * @note The character at offset @a n is discarded if @a this is modified. + * + * @see @c take_suffix + */ + self_type split_suffix(size_t n); + + /** Remove and return a suffix bounded by the last occurrence of @a c. + * + * @param c The character to match. + * @return The suffix bounded by @a c if @a c is found, an empty view if not. + * + * The suffix is removed and returned if @a c is found, otherwise @a this is not modified. + * + * @note The character at offset @a n is discarded if @a this is modified. + * + * @see @c take_suffix_at + */ + self_type split_suffix_at(char c); + + /** Remove and return a suffix bounded by the last occurrence of any of @a delimiters. + * + * @param delimiters The characters to match. + * @return The suffix bounded by a delimiter if found, an empty view if none found. + * + * The suffix is removed and returned if delimiter is found, otherwise @a this is not modified. + * + * @note The delimiter character is discarded if @a this is modified. + * + * @see @c take_suffix_at + */ + self_type split_suffix_at(std::string_view const &delimiters); + + /** Remove and return a suffix bounded by the last character that satisfies @a pred. + * + * @tparam F Predicate functor type. + * @param pred A function taking @c char and returning @c bool. + * @return The suffix bounded by the first character satisfying @a pred if found, otherwise @a this + * is not modified. + * + * The prefix is removed and returned if a character satisfying @a pred if found, otherwise + * @a this is not modified. + * + * @note The matching character is discarded if @a this is modified. + * + * @see @c take_suffix_if + */ + template self_type split_suffix_if(F const &pred); + + /** Remove and return a suffix of size @a n. + * + * @param n Size of the suffix. + * @return The first @a n bytes of @a this if @a n is in @a this, otherwise all of @a this. + * + * The returned suffix is removed from @a this, along with the character at offset @a n if present. + * + * @see @c split_suffix + */ + self_type take_suffix(size_t n); + + /** Remove and return a suffix bounded by the last occurrence of @a c. + * + * @param c The character to match. + * @return The suffix bounded by @a c if @a c is found, all of @a this if not. + * + * The returned suffix is removed from @a this, along with the delimiter character if found. + * + * @see @c split_suffix_at + */ + self_type take_suffix_at(char c); + + /** Remove and return a suffix bounded by the last occurrence of any of @a delimiters. + * + * @param delimiters The characters to match. + * @return The suffix bounded by a delimiter if @a c is found, all of @a this if not. + * + * The returned suffix is removed from @a this, along with the delimiter character if found. + * + * @see @c split_suffix_at + */ + self_type take_suffix_at(std::string_view const &delimiters); + + /** Remove and return a suffix bounded by the last character that satisfies @a pred. + * + * @tparam F Predicate functor type. + * @param pred A function taking @c char and returning @c bool. + * @return The suffix bounded by the first character satisfying @a pred if found, otherwise all of @a this. + * + * @note The matching character is discarded if found. + * + * @see @c split_suffix_if + */ + template self_type take_suffix_if(F const &pred); + + /** Remove and return a suffix of characters satisfying @a pred + * + * @tparam F Predicate functor type. + * @param pred A function taking @c char and returning @c bool. + * @return The suffix of characters that satisfy @a pred. + * + * The returned suffix is removed from @a this. That suffix may be empty if the last character + * does not satisfy @a pred. + * + * @note This is very similar to @c rtrim_if but returns the removed text instead of the modified + * view. + */ + template self_type clip_suffix_of(F const &pred); + + /** Get a view of part of this view. + * + * @param pos Offset of first byte in the new view. + * @param count Number of bytes in the view. + * @return The view starting at @a pos for @a count bytes. + * + * The returned view is clipped by @a this - that is, it will not extend beyond the original view. + * @a count is reduced such that it covers only data in @a this. + * + * @note This is provided primarily for co-variance, i.e. the returned view is a @c TextView + * instead of a @c std::string_view. + */ + constexpr self_type substr(size_type pos = 0, size_type count = npos) const noexcept; + + /** Check if the view begins with a specific @a prefix. + * + * @param prefix String to check against @a this. + * @return @c true if this->prefix(prefix.size()) == prefix, @c false otherwise. + * @internal C++20 preview. + */ + bool starts_with(std::string_view const &prefix) const noexcept; + + /** Check if the view begins with a specific @a prefix. + * + * @param prefix String to check against @a this. + * @return @c true if this->prefix(prefix.size()) == prefix, @c false otherwise. + * @internal C++20 preview. + */ + bool starts_with(char const *prefix) const; + + /** Check if the view begins with the character @c c. + * + * @param c Character to check. + * @return @c true if the string is non-empty and the first character is @c c. + * @internal C++20 preview. + */ + bool starts_with(char c) const noexcept; + + /** Check if the view begins with a specific @a prefix, ignoring case. + * + * @param prefix String to check against @a this. + * @return @c true if this->prefix(prefix.size()) == prefix without regard to case, @c false otherwise. + * @internal C++20 preview. + */ + bool starts_with_nocase(std::string_view const &prefix) const noexcept; + + /** Check if the view begins with a specific @a prefix. + * + * @param prefix String to check against @a this. + * @return @c true if this->prefix(prefix.size()) == prefix, @c false otherwise. + * @internal C++20 preview. + */ + bool starts_with_nocase(char const *prefix) const; + + /** Check if the view begins with the character @c c, ignoring case. + * + * @param c Character to check. + * @return @c true if the string is non-empty and the first character is @c c. + * @internal C++20 preview. + */ + bool starts_with_nocase(char c) const noexcept; + + /** Check if the view ends with a specific @a suffix. + * + * @param suffix String to check against @a this. + * @return @c true if this->suffix(suffix.size()) == suffix, @c false otherwise. + * @internal C++20 preview. + */ + bool ends_with(std::string_view const &suffix) const noexcept; + + /** Check if the view ends with a specific @a suffix. + * + * @param suffix String to check against @a this. + * @return @c true if this->suffix(suffix.size()) == suffix, @c false otherwise. + * @internal C++20 preview. + */ + bool ends_with(char const *suffix) const; + + /** Check the view ends with the character @c c. + * + * @param c Character to check. + * @return @c true if the string is non-empty and the last character is @c c. + * @internal C++20 preview. + */ + bool ends_with(char c) const noexcept; + + /** Check if the view starts with a specific @a suffix, ignoring case. + * + * @param suffix String to check against @a this. + * @return @c true if this->suffix(suffix.size()) == suffix without regard to case, @c false otherwise. + * @internal C++20 preview. + */ + bool ends_with_nocase(std::string_view const &suffix) const noexcept; + + /** Check if the view starts with a specific @a suffix, ignoring case. + * + * @param suffix String to check against @a this. + * @return @c true if this->suffix(suffix.size()) == suffix without regard to case, @c false otherwise. + * @internal C++20 preview. + */ + bool ends_with_nocase(char const *suffix) const; + + /** Check the view ends with the character @c c, ignoring case. + * + * @param c Character to check. + * @return @c true if the string is non-empty and the last character is @c c. + * @internal C++20 preview. + */ + bool ends_with_nocase(char c) const noexcept; + + // Functors for using this class in STL containers. + /// Ordering functor, lexicographic comparison. + struct LessThan { + /// @return Case sensitive ordering. + bool + operator()(self_type const &lhs, self_type const &rhs) const noexcept + { + return -1 == strcmp(lhs, rhs); + } + }; + + /// Ordering functor, case ignoring lexicographic comparison. + struct LessThanNoCase { + /// @return Case insensitive ordering. + bool + operator()(self_type const &lhs, self_type const &rhs) const noexcept + { + return -1 == strcasecmp(lhs, rhs); + } + }; + + /// Support for containers that need case insensitive comparisons between views. + struct CaselessEqual { + /// @return @c true if the view contants are equal when compared without regard to case. + bool + operator()(self_type const &lhs, self_type const &rhs) const noexcept + { + return lhs.size() == rhs.size() && 0 == strcasecmp(lhs, rhs); + } + }; + + /** A pointer to the first byte. + * + * @return Address of the first byte of the view. + * + * @internal This fixes an error in @c std::string_view where this method is declared to return + * a template parameter instead of the correct @c value_type. The effect is @c string_view::data + * is not considered by the compiler to return char const * which makes meta-programming + * painful. + */ + constexpr value_type const *data() const noexcept; + + /** A pointer to past the last byte. + * + * @return Address of the first byte past the end of the view. + * + * This is effectively @c std::string_view::end() except it explicit returns a pointer and not + * (potentially) an iterator class, to match up with @c data(). + */ + constexpr value_type const *data_end() const noexcept; + + /// Specialized stream operator implementation. + /// @note Use the standard stream operator unless there is a specific need for this, which is unlikely. + /// @return The stream @a os. + /// @internal Needed because @c std::ostream::write must be used and + /// so alignment / fill have to be explicitly handled. + template Stream &stream_write(Stream &os, const TextView &b) const; + + /// @cond OVERLOAD + // These methods are all overloads of other methods, defined in order to make the API more + // convenient to use. Mostly these overload @c int for @c size_t so naked numbers work as expected. + constexpr self_type prefix(int n) const noexcept; + self_type take_suffix(int n); + self_type split_prefix(int n); + constexpr self_type suffix(int n) const noexcept; + self_type split_suffix(int n); + /// @endcond + + protected: + /// Initialize a bit mask to mark which characters are in this view. + static void init_delimiter_set(std::string_view const &delimiters, std::bitset<256> &set); + }; - /** Check if character is in the charset. - * - * @param c Character to check. - * @return @c true if @a c is in the charset, @c false if not. - */ - bool operator()(unsigned char c) const; + /// Internal table of digit values for characters. + /// This is -1 for characters that are not valid digits. + extern const int8_t svtoi_convert[256]; - /** Check if character is in the charset. - * - * @param c Character to check. - * @return @c true if @a c is in the charset, @c false if not. - */ - bool operator()(char c) const; + /** Convert the text in @c TextView @a src to a signed numeric value. -protected: - std::bitset::max() + 1> _chars; -}; + If @a parsed is non-null then the part of the string actually parsed is placed there. + @a base sets the conversion base. If not set base 10 is used with two special cases: -/** A read only view of a contiguous piece of memory. - - A @c TextView does not own the memory to which it refers, it is simply a view of part of some - (presumably) larger memory object. The purpose is to allow working in a read only way a specific - part of the memory. A classic example for ATS is working with HTTP header fields and values - which need to be accessed independently but preferably without copying. A @c TextView supports - this style. - - @note To simplify the interface there is no constructor taking only a character pointer. - Constructors require either a literal string or an explicit length. This avoid ambiguities which - are much more annoying that explicitly calling @c strlen on a character pointer. - - @internal For construction, assignment operator, and @c assign method, there are a lot of overloads - because users would like to be able to use the same sort of arguments for all of these. This includes - - self / parent type - - @c std::string - - literal string - - C-string pointer - - pointer and count - - begin/end style pointers. - - character containers that have the STL standard @c size and @c data methods. - */ -class TextView : public std::string_view { - using self_type = TextView; ///< Self reference type. - using super_type = std::string_view; ///< Parent type. + - If the number starts with a literal '0' then it is treated as base 8. + - If the number starts with the literal characters '0x' or '0X' then it is treated as base 16. -public: - /// Default constructor (empty buffer). - constexpr TextView() noexcept = default; + If @a base is explicitly set then any leading radix indicator is not supported. + */ + intmax_t svtoi(TextView src, TextView *parsed = nullptr, int base = 0); - /// Construct from a @c std::string_view or @c TextView - /// @note This provides an user defined conversion from @c std::string_view to @c TextView. The - /// reverse conversion is implicit in @c TextView being a subclass of @c std::string_view. - constexpr TextView(super_type const &that) noexcept; + /** Convert the text in @c TextView @a src to an unsigned numeric value. - /** Construct from pointer and size. - * - * @param ptr Pointer to first character. - * @param n Number of characters. - * - * If @a n is @c npos then @c ptr is presumed to be a C string and checked for length. If @c ptr - * is @c nullptr the length is 0. Otherwise @c strlen is used to calculate the length. - */ - constexpr TextView(char const *ptr, size_t n) noexcept; + If @a parsed is non-null then the part of the string actually parsed is placed there. + @a base sets the conversion base. If not set base 10 is used with two special cases: - /** Construct from pointer and size. - * - * @param ptr Pointer to first character. - * @param n Number of characters. - */ - constexpr TextView(char const *ptr, unsigned n) noexcept; + - If the number starts with a literal '0' then it is treated as base 8. + - If the number starts with the literal characters '0x' or '0X' then it is treated as base 16. - /** Construct from pointer and size. - * - * @param ptr Pointer to first character. - * @param n Number of characters. - * - * If @a n is negative then @c ptr is presumed to be a C string and checked for length. If @c ptr - * is @c nullptr the length is 0. Otherwise @c strlen is used to calculate the length. - */ - constexpr TextView(char const *ptr, ssize_t n) noexcept; + If @a base is explicitly set then any leading radix indicator is not supported. + */ + uintmax_t svtou(TextView src, TextView *parsed = nullptr, int base = 0); + + /** Convert the text in @c src to an unsigned numeric value. + * + * @tparam N The radix (must be 1..36) + * @param src The source text. Updated during parsing. + * @return The converted numeric value. + * + * This is a specialized function useful only where conversion performance is critical. It is used + * inside @c svtoi and @a svtou for the common cases of 8, 10, and 16, therefore normally this isn't much more + * performant in those cases than just @c svtoi. Because of this only positive values are parsed. + * If determining the radix from the text or signed value parsing is needed, used @c svtoi. + * + * @a src is updated in place to indicate what characters were parsed by removing them from the view + * Parsing stops on the first invalid digit, so any leading non-digit characters (e.g. whitespace) + * must already be removed. For overflow, all valid digits are consumed and the maximum value returned. + */ + template + uintmax_t + svto_radix(TextView &src) + { + static_assert(1 <= RADIX && RADIX <= 36, "Radix must be in the range 2..36"); + static constexpr auto MAX = std::numeric_limits::max(); + static constexpr auto OVERFLOW_LIMIT = MAX / RADIX; + uintmax_t zret = 0; + uintmax_t v; + while (src.size() && ((v = swoc::svtoi_convert[uint8_t(*src)]) < RADIX)) { + // Tweaked for performance - need to check range after @a RADIX multiply. + ++src; // Update view iff the character is parsed. + if (zret <= OVERFLOW_LIMIT && v <= (MAX - (zret *= RADIX))) { + zret += v; + } else { + zret = MAX; // clamp to max - once set will always hit this case for subsequent input. + } + } + return zret; + } - /** Construct from pointer and size. - * - * @param ptr Pointer to first character. - * @param n Number of characters. - * - * If @a n is negative then @c ptr is presumed to be a C string and checked for length. If @c ptr - * is @c nullptr the length is 0. Otherwise @c strlen is used to calculate the length. - */ - constexpr TextView(char const *ptr, int n) noexcept; + /// Convenience overload. + /// @see svto_radix(swoc::TextView &src) + template + uintmax_t + svto_radix(TextView &&src) + { + return svto_radix(src); + } - /** Construct from a half open range [first, last). + /** Parse @a text as a floating point number. * - * @param first Start of half open range. - * @param last End of half open range. + * @param text The input text. + * @param parsed Parsed text [out] + * @return The floating point value, or 0.0 if invalid input. * - * The character at @a first will be in the view, but the character at @a last will not. + * If @a parsed is not @a nullptr then the span of characters parsed is put there. This can be + * used to check if the parse was scuccesful - on a failed parse, it will be empty. * - * @note @c explicit to avoid interpreting a string initializer list as a view. - * - * @internal For the love of Turing, WHY DID YOU DO THIS? - * - * Well, estemed reader, because the C++ standard doesn't have a better way to support overloads - * that handle character pointers and literal strings differently. If the parameters were simply - * (char const *, char const *) then a construct like { "really", "broken" } can - * be interpreted as a @c TextView because the elements implicitly convert to char const - * *. This makes no sense and creates some @b very annoying ambiguities for lists of strings - * if there are exactly two in the list. See @c Lexicon for an example. - * - * The template itself does the check to make sure it's a character @b pointer and not an array. Arrays - * are handled by a different constructor so this only disables constructing from two char arrays - * which IMHO makes no sense and should be forbidden. + * @note This should be within 1 epsilon of correct, although it doesn't guarantee picking + * the closest epsilon. It's more than sufficient for use in configurations, but possibly + * not for high precision work. */ - template - explicit TextView( - T first, - std::enable_if_t && std::is_pointer_v && std::is_convertible_v, T> last) noexcept - : super_type(first, last - first) {} + double svtod(TextView text, TextView *parsed = nullptr); + // ---------------------------------------------------------- + // Inline implementations. + // Note: Why, you may ask, do I use @c TextView::self_type for return type instead of the + // simpler plain @c TextView ? Because otherwise Doxygen can't match up the declaration and + // definition and the reference documentation is messed up. Sigh. - /** Construct from any character container following STL standards. - * - * @tparam C Container type. - * @param c container - * - * The container type must have the methods @c data and @c size which must return values convertible - * to @c char @c const @c * and @c size_t respectively. - */ - template ().data()), char const *> && - std::is_convertible_v().size()), size_t>, - void>> - constexpr TextView(C const &c); + inline constexpr CharSet::CharSet(TextView const &chars) + { + for (auto c : chars) { + _chars[uint8_t(c)] = true; + } + } - /** Construct from literal string or array. + inline bool + CharSet::operator()(unsigned char c) const + { + return _chars[c]; + } - All elements of the array are included in the view unless the last element is nul, in which case it is elided. - If this is inappropriate then a constructor with an explicit size should be used. + inline bool + CharSet::operator()(char c) const + { + return _chars[uint8_t(c)]; + } - @code - TextView a("A literal string"); - @endcode - The last character in @a a will be 'g'. - */ - template constexpr TextView(const char (&s)[N]) noexcept; + // === TextView Implementation === + /// @cond TextView_INTERNAL + // Doxygen doesn't match these up well due to various type and template issues. + // @internal If there is more than one overload for numeric types, it's easy to get ambiguity. The only + // fix, unfortunately, is lots of overloads to cover the ambiguous cases. + inline constexpr TextView::TextView(const char *ptr, size_t n) noexcept + : super_type(ptr, n == npos ? (ptr ? ::strlen(ptr) : 0) : n) + { + } + inline constexpr TextView::TextView(const char *ptr, int n) noexcept + : super_type(ptr, n < 0 ? (ptr ? ::strlen(ptr) : 0) : size_t(n)) + { + } + inline constexpr TextView::TextView(std::nullptr_t) noexcept : super_type(nullptr, 0) {} + inline TextView::TextView(std::string const &str) noexcept : super_type(str) {} + inline constexpr TextView::TextView(super_type const &that) noexcept : super_type(that) {} + template constexpr TextView::TextView(const char (&s)[N]) noexcept : super_type(s, s[N - 1] ? N : N - 1) {} + template constexpr TextView::TextView(C const &c) : super_type(c.data(), c.size()) {} + + inline void + TextView::init_delimiter_set(std::string_view const &delimiters, std::bitset<256> &set) + { + set.reset(); + for (char c : delimiters) + set[static_cast(c)] = true; + } - /** Construct from a C-string. - * - * @param src A pointer to a C-string. - * - * The view does not include the terminating nul. - * - * @internal @a src a reference because it is otherwise ambiguous with the literal constructor. - */ - TextView(char *&src) : super_type(src, src ? strlen(src) : 0) {} + inline auto + TextView::clear() -> self_type & + { + new (this) self_type(); + return *this; + } - /** Construct from a const C-string. - * - * @param src Pointer to a const C-string. - * - * The view does not include the terminating nul. - * - * @internal @a src a reference because it is otherwise ambiguous with the literal constructor. - */ - TextView(char const *&src) : super_type(src, src ? strlen(src) : 0) {} + inline constexpr char + TextView::operator*() const + { + return this->empty() ? char(0) : *(this->data()); + } - /** Construct from nullptr. - This implicitly makes the length 0. - */ - constexpr TextView(std::nullptr_t) noexcept; + inline constexpr bool + TextView::operator!() const noexcept + { + return this->empty(); + } - /// Construct from @c std::string, referencing the entire string contents. - /// @internal This can't be @c constexpr because this uses methods in @c std::string that may - /// not be @c constexpr. - TextView(std::string const &str) noexcept; + inline constexpr TextView::operator bool() const noexcept + { + return !this->empty(); + } - /// Assign a super class instance, @c std::string_view to @a this. - self_type &operator=(super_type const &that); + inline auto + TextView::operator++() -> self_type & + { + this->remove_prefix(1); + return *this; + } - /// Assign a constant array to @a this. - /// @note If the last character of @a s is a nul byte, it is not included in the view. - template self_type &operator=(const char (&s)[N]); + inline auto + TextView::operator++(int) -> self_type + { + self_type zret{*this}; + this->remove_prefix(1); + return zret; + } - /// Assign from C-string @a s. - self_type &operator=(char *&s); - /// Assign from C-string @a s. - self_type &operator=(char const *&s); + inline auto + TextView::operator+=(size_t n) -> self_type & + { + this->remove_prefix(n); + return *this; + } - /// Assign from a @c std::string. - self_type &operator=(const std::string &s); + template + inline auto + TextView::operator=(const char (&s)[N]) -> self_type & + { + return *this = self_type{s, s[N - 1] ? N : N - 1}; + } - /** Assign a view of the @a c_str - * - * @param c_str Pointer to C string. - * @return @a this - * - * @note @c c_str must be a null terminated string. The null byte is not included in the view. - */ - self_type &assign(char *&c_str); + inline auto + TextView::operator=(super_type const &that) -> self_type & + { + this->super_type::operator=(that); + return *this; + } - /** Assign a view of the @a c_str - * - * @param c_str Pointer to C string. - * @return @a this - * - * @note @c c_str must be a null terminated string. The null byte is not included in the view. - */ - self_type &assign(char const *&c_str); + inline auto + TextView::operator=(char *&s) -> self_type & + { + this->super_type::operator=(s); + return *this; + } - /** Assign from a pointer and size. - * - * @param ptr Pointer to first character of the view. - * @param n Length of the view. - * @return @a this - * - * if @a n is @a npos then @c strlen is used determine the size of the view. - */ - self_type &assign(char const *ptr, size_t n); + inline auto + TextView::operator=(char const *&s) -> self_type & + { + this->super_type::operator=(s); + return *this; + } - /** Assign the half open view [ @a b , @a e ) to @a this - * - * @param b First character in the view. - * @param e One character after the last character in the view. - * @return @a this - */ - self_type &assign(char const *b, char const *e); + inline auto + TextView::operator=(const std::string &s) -> self_type & + { + this->super_type::operator=(s); + return *this; + } - /// Explicitly set the view from a @c std::string - self_type &assign(std::string const &s); + inline auto + TextView::assign(char *&c_str) -> self_type & + { + return this->assign(c_str, strlen(c_str)); + } - /** Assign literal string or array. + inline auto + TextView::assign(char const *&c_str) -> self_type & + { + return this->assign(c_str, strlen(c_str)); + } - * All elements of the array are included in the view unless the last element is nul, in which case it is elided. - * If this is inappropriate then a constructor with an explicit size should be used. - * - * @code - * tv.assign("A literal string"); - * @endcode - * The last character in @a tv will be 'g'. - */ - template self_type &assign(const char (&s)[N]) noexcept; + inline auto + TextView::assign(const std::string &s) -> self_type & + { + *this = super_type(s); + return *this; + } - /** Assign from any character container following STL standards. - * - * @tparam C Container type. - * @param c container - * - * The container type must have the methods @c data and @c size which must return values convertible - * to @c char @c const @c * and @c size_t respectively. - */ - template ().data()), char const *> && - std::is_convertible_v().size()), size_t>, - void>> - constexpr self_type & - assign(C const &c) { - return this->assign(c.data(), c.size()); + inline TextView & + TextView::assign(char const *ptr, size_t n) + { + *this = super_type(ptr, n == npos ? (ptr ? ::strlen(ptr) : 0) : n); + return *this; } - /** Dereference operator. + inline TextView & + TextView::assign(char const *b, char const *e) + { + *this = super_type(b, e - b); + return *this; + } - @note This allows the view to be used as if it were a character iterator to a null terminated - string which is handy for several other STL interfaces. + template + inline auto + TextView::assign(char const (&s)[N]) noexcept -> self_type & + { + return *this = self_type{s, s[N - 1] ? N : N - 1}; + } - @return The first byte in the view, or a nul character if the view is empty. - */ - /// @return The first byte in the view. - constexpr char operator*() const; + inline constexpr auto + TextView::prefix(size_t n) const noexcept -> self_type + { + return {this->data(), std::min(n, this->size())}; + } - /** Discard the first byte of the view. - * - * @return @a this. - */ - self_type &operator++(); + inline constexpr TextView + TextView::prefix(int n) const noexcept + { + return {this->data(), std::min(n, this->size())}; + } - /** Discard the first byte of the view. - * - * @return The view before discarding the byte. - */ - self_type operator++(int); + inline auto + TextView::prefix_at(char c) const -> self_type + { + self_type zret; // default to empty return. + if (auto n = this->find(c); n != npos) { + zret.assign(this->data(), n); + } + return zret; + } - /** Discard the first @a n bytes of the view. - * - * Equivalent to @c remove_prefix(n). - * @return @a this - */ - self_type &operator+=(size_t n); + inline TextView + TextView::prefix_at(std::string_view const &delimiters) const + { + self_type zret; // default to empty return. + if (auto n = this->find_first_of(delimiters); n != npos) { + zret.assign(this->data(), n); + } + return zret; + } - /// Check for empty view. - /// @return @c true if the view has a nullptr @b or zero size. - constexpr bool operator!() const noexcept; + template + auto + TextView::prefix_if(F const &pred) const -> self_type + { + self_type zret; // default to empty return. + if (auto n = this->find_if(pred); n != npos) { + zret.assign(this->data(), n); + } + return zret; + } - /// Check for non-empty view. - /// @return @c true if the view refers to a non-empty range of bytes. - explicit constexpr operator bool() const noexcept; + inline auto + TextView::remove_prefix(size_t n) -> self_type & + { + this->super_type::remove_prefix(std::min(n, this->size())); + return *this; + } - /// Clear the view (become an empty view). - self_type &clear(); + inline TextView & + TextView::remove_prefix_at(char c) + { + if (auto n = this->find(c); n != npos) { + this->super_type::remove_prefix(n + 1); + } + return *this; + } - /// Get the offset of the first character for which @a pred is @c true. - template size_t find_if(F const &pred) const; - /// Get the offset of the last character for which @a pred is @c true. - template size_t rfind_if(F const &pred) const; + inline auto + TextView::remove_prefix_at(std::string_view const &delimiters) -> self_type & + { + if (auto n = this->find_first_of(delimiters); n != npos) { + this->super_type::remove_prefix(n + 1); + } + return *this; + } - /** Remove bytes that match @a c from the start of the view. - * - * @return @a this - */ - self_type <rim(char c); + template + auto + TextView::remove_prefix_if(F const &pred) -> self_type & + { + if (auto n = this->find_if(pred); n != npos) { + this->super_type::remove_prefix(n + 1); + } + return *this; + } - /** Remove bytes from the start of the view that are in @a delimiters. - * - * @return @a this - */ - self_type <rim(CharSet const &delimiters); + inline TextView + TextView::split_prefix(size_t n) + { + self_type zret; // default to empty return. + if (n < this->size()) { + zret = this->prefix(n); + this->remove_prefix(std::min(n + 1, this->size())); + } + return zret; + } - /** Remove bytes from the start of the view that are in @a delimiters. - * - * @return @a this - */ - self_type <rim(std::string_view const &delimiters); + inline TextView + TextView::split_prefix(int n) + { + return this->split_prefix(size_t(n)); + } - /** Remove bytes from the start of the view that are in @a delimiters. - * - * @internal This is needed to avoid collisions with the templated predicate style. - * - * @return @c *this - */ - self_type <rim(const char *delimiters); + inline TextView + TextView::split_prefix_at(char c) + { + return this->split_prefix(this->find(c)); + } - /** Remove bytes from the start of the view for which @a pred is @c true. - @a pred must be a functor taking a @c char argument and returning @c bool. - @return @c *this - */ - template self_type <rim_if(F const &pred); + inline TextView + TextView::split_prefix_at(std::string_view const &delimiters) + { + return this->split_prefix(this->find_first_of(delimiters)); + } - /** Remove bytes that match @a c from the end of the view. - * - * @return @a this - */ - self_type &rtrim(char c); + template + TextView::self_type + TextView::split_prefix_if(F const &pred) + { + return this->split_prefix(this->find_if(pred)); + } - /** Remove bytes from the end of the view that are in @a delimiters. - * - * @return @a this - */ - self_type &rtrim(CharSet const &delimiters); + inline TextView + TextView::take_prefix(size_t n) + { + n = std::min(n, this->size()); + self_type zret = this->prefix(n); + this->remove_prefix(std::min(n + 1, this->size())); + return zret; + } - /** Remove bytes from the end of the view that are in @a delimiters. - * @return @a this - */ - self_type &rtrim(std::string_view const &delimiters); + inline TextView + TextView::take_prefix_at(char c) + { + return this->take_prefix(this->find(c)); + } - /** Remove bytes from the end of the view for which @a pred is @c true. - * - * @a pred must be a functor taking a @c char argument and returning @c bool. - * - * @return @c *this - */ - template self_type &rtrim_if(F const &pred); + inline TextView + TextView::take_prefix_at(std::string_view const &delimiters) + { + return this->take_prefix(this->find_first_of(delimiters)); + } - /** Remove bytes that match @a c from the start and end of this view. - * - * @return @a this - */ - self_type &trim(char c); + template + auto + TextView::take_prefix_if(F const &pred) -> self_type + { + return this->take_prefix(this->find_if(pred)); + } - /** Remove bytes from the start and end of the view that are in @a delimiters. - * @return @a this - */ - self_type &trim(CharSet const &delimiters); + inline constexpr TextView + TextView::suffix(size_t n) const noexcept + { + n = std::min(n, this->size()); + return {this->data_end() - n, n}; + } - /** Remove bytes from the start and end of the view that are in @a delimiters. - * @return @a this - */ - self_type &trim(std::string_view const &delimiters); + inline constexpr TextView + TextView::suffix(int n) const noexcept + { + return this->suffix(size_t(n)); + } - /** Remove bytes from the start and end of the view that are in @a delimiters. - @internal This is needed to avoid collisions with the templated predicate style. - @return @c *this - */ - self_type &trim(const char *delimiters); + inline TextView + TextView::suffix_at(char c) const + { + self_type zret; + if (auto n = this->rfind(c); n != npos && n + 1 < this->size()) { + ++n; + zret.assign(this->data() + n, this->size() - n); + } + return zret; + } - /** Remove bytes from the start and end of the view for which @a pred is @c true. - @a pred must be a functor taking a @c char argument and returning @c bool. - @return @c *this - */ - template self_type &trim_if(F const &pred); + inline TextView + TextView::suffix_at(std::string_view const &delimiters) const + { + self_type zret; + if (auto n = this->find_last_of(delimiters); n != npos) { + ++n; + zret.assign(this->data() + n, this->size() - n); + } + return zret; + } - /** Get a view of the first @a n bytes. - * - * @param n Number of chars in the prefix. - * @return A view of the first @a n characters in @a this, bounded by the size of @a this. - */ - constexpr self_type prefix(size_t n) const noexcept; + template + auto + TextView::suffix_if(F const &pred) const -> self_type + { + self_type zret; + if (auto n = this->rfind_if(pred); n != npos) { + ++n; + zret.assign(this->data() + n, this->size() - n); + } + return zret; + } - /** Get a view of a prefix bounded by @a c. - * - * @param c Delimiter character. - * @return A view of the prefix bounded by @a c, or all of @a this if @a c is not found. - * @note The character @a c is not included in the returned view. - */ - self_type prefix_at(char c) const; + inline auto + TextView::remove_suffix(size_t n) -> self_type & + { + this->super_type::remove_suffix(std::min(n, this->size())); + return *this; + } - /** Get a view of a prefix bounded by a character in @a delimiters. - * - * @param delimiters A set of characters. - * - * @return A view of the prefix bounded by any character in @a delimiters, or empty if none are - * found. - * - * @note The delimiter character is not included in the returned view. - */ - self_type prefix_at(std::string_view const &delimiters) const; + inline TextView & + TextView::remove_suffix_at(char c) + { + if (auto n = this->rfind(c); n != npos) { + return this->remove_suffix(this->size() - n); + } + return this->clear(); + } - /** Get a view of a prefix bounded by a character predicate @a pred. - * - * @a pred must be a functor which takes a @c char argument and returns @c bool. Each character in - * @a this is tested by @a pred and the prefix is delimited by the first character for which @a - * pred is @c true. - * - * @param pred A character predicate. - * - * @return A view of the prefix bounded by @a pred or empty if @a pred is not @c true for any - * characer. - * - * @note The deliminting character is not included in the returned view. - */ - template self_type prefix_if(F const &pred) const; + inline TextView & + TextView::remove_suffix_at(std::string_view const &delimiters) + { + if (auto n = this->find_last_of(delimiters); n != npos) { + return this->remove_suffix(this->size() - n); + } + return this->clear(); + } - /** Remove bytes from the start of the view. - * - * @param n Number of bytes to remove. - * @return @a this. - */ - self_type &remove_prefix(size_t n); + template + TextView::self_type & + TextView::remove_suffix_if(F const &pred) + { + if (auto n = this->rfind_if(pred); n != npos) { + return this->remove_suffix(this->size() - n); + } + return this->clear(); + } - /** Remove bytes from the end of the view. - * - * @param n Number of bytes to remove. - * @return @a this. - */ - self_type &remove_suffix(size_t n); + inline TextView + TextView::split_suffix(size_t n) + { + self_type zret; + n = std::min(n, this->size()); + zret = this->suffix(n); + this->remove_suffix(n + 1); + return zret; + } - /** Remove the leading characters of @a this up to and including @a c. - * - * @param c Delimiter character. - * @return @a this. - * @note The first occurrence of character @a c is removed along with all preceding characters, or - * the view is cleared if @a c is not found. - */ - self_type &remove_prefix_at(char c); + inline auto + TextView::split_suffix(int n) -> self_type + { + return this->split_suffix(size_t(n)); + } - /** Remove the leading characters of @a this up to and including the first character matching @a delimiters. - * - * @param delimiters Characters to match. - * @return @a this. - * @note The first occurrence of any character in @a delimiters is removed along with all preceding - * characters, or the view is cleared if none are found. - */ - self_type &remove_prefix_at(std::string_view const &delimiters); + inline TextView + TextView::split_suffix_at(char c) + { + auto idx = this->rfind(c); + return npos == idx ? self_type{} : this->split_suffix(this->size() - (idx + 1)); + } - /** Remove the leading characters up to and including the character selected by @a pred. - * - * @tparam F Predicate function type. - * @param pred The predicate instance. - * @return @a this. - * - * Characters are removed until @a pred returns @c true. The matching character is also removed. - */ - template self_type &remove_prefix_if(F const &pred); + inline auto + TextView::split_suffix_at(std::string_view const &delimiters) -> self_type + { + auto idx = this->find_last_of(delimiters); + return npos == idx ? self_type{} : this->split_suffix(this->size() - (idx + 1)); + } - /** Remove and return a prefix of size @a n. - * - * @param n Size of the prefix. - * @return The first @a n bytes of @a this if @a n is in @a this, otherwise an empty view. - * - * The prefix is removed and returned if the requested prefix is no larger than @a this, - * otherwise @a this is not modified. - * - * @note The character at offset @a n is discarded if @a this is modified. - * - * @see @c take_prefix - */ - self_type split_prefix(size_t n); + template + TextView::self_type + TextView::split_suffix_if(F const &pred) + { + return this->split_suffix(this->rfind_if(pred)); + } - /** Remove and return a prefix bounded by the first occurrence of @a c. - * - * @param c The character to match. - * @return The prefix bounded by @a c if @a c is found, an empty view if not. - * - * The prefix is removed and returned if @a c is found, otherwise @a this is not modified. - * - * @note The delimiter character is discarded if @a this is modified. - * - * @see @c take_prefix - */ - self_type split_prefix_at(char c); + inline TextView + TextView::take_suffix(size_t n) + { + self_type zret{*this}; + *this = zret.split_prefix(n); + return zret; + } - /** Remove and return a prefix bounded by the first occurrence of any of @a delimiters. - * - * @param delimiters The characters to match. - * @return The prefix bounded by a delimiter if one is found, otherwise an empty view. - * - * The prefix is removed and returned if a @a delimiter is found, otherwise @a this is not modified. - * - * @note The matching character is discarded if @a this is modified. - * - * @see @c take_prefix_at - */ - self_type split_prefix_at(std::string_view const &delimiters); + inline TextView + TextView::take_suffix(int n) + { + return this->take_suffix(size_t(n)); + } - /** Remove and return a prefix bounded by the first character that satisfies @a pred. - * - * @tparam F Predicate functor type. - * @param pred A function taking @c char and returning @c bool. - * @return The prefix bounded by the first character satisfying @a pred. - * - * The prefix is removed and returned if a character satisfying @a pred is found, otherwise - * @a this is not modified. - * - * @note The matching character is discarded if @a this is modified. - * - * @see @c take_prefix_if - */ - template self_type split_prefix_if(F const &pred); + inline TextView + TextView::take_suffix_at(char c) + { + return this->take_suffix(this->rfind(c)); + } - /** Remove and return the first @a n characters. - * - * @param n Size of the return prefix. - * @return The first @a n bytes of @a this if @a n is in @a this, otherwise all of @a this. - * - * The prefix is removed and returned if the requested prefix is no larger than @a this, - * otherwise all of @a this is removed and returned. - * - * @note The character at offset @a n is discarded if @a n is within the bounds of @a this. - * - * @see @c split_prefix - */ - self_type take_prefix(size_t n); + inline TextView + TextView::take_suffix_at(std::string_view const &delimiters) + { + return this->take_suffix(this->find_last_of(delimiters)); + } - /** Remove and return a prefix bounded by the first occurrence of @a c. - * - * @param c The character to match. - * @return The prefix bounded by @a c if @a c is found, all of @a this if not. - * - * The prefix is removed and returned if @a c is found, otherwise all of @a this is removed and - * returned. - * - * @note The character at offset @a n is discarded if found. - * - * @see @c split_prefix_at - */ - self_type take_prefix_at(char c); + template + TextView::self_type + TextView::take_suffix_if(F const &pred) + { + return this->take_suffix_at(this->rfind_if(pred)); + } - /** Remove and return a prefix bounded by the first occurrence of any of @a delimiters. - * - * @param delimiters The characters to match. - * @return The prefix bounded by a delimiter if one is found, otherwise all of @a this. - * - * The prefix is removed and returned if a @a delimiter is found, otherwise all of @a this is - * removed and returned. - * - * @note The matching character is discarded if found. - * - * @see @c split_prefix_at - */ - self_type take_prefix_at(std::string_view const &delimiters); + template + inline size_t + TextView::find_if(F const &pred) const + { + for (const char *spot = this->data(), *limit = this->data_end(); spot < limit; ++spot) + if (pred(*spot)) + return spot - this->data(); + return npos; + } - /** Remove and return a prefix bounded by the first character that satisfies @a pred. - * - * @tparam F Predicate functor type. - * @param pred A function taking @c char and returning @c bool. - * @return The prefix bounded by the first character satisfying @a pred, or all of @a this if none - * is found. - * - * The prefix is removed and returned if a character satisfying @a pred is found, otherwise - * all of @a this is removed and returned. - * - * @note The matching character is discarded if found. - * - * @see @c split_prefix_if - */ - template self_type take_prefix_if(F const &pred); + template + inline size_t + TextView::rfind_if(F const &pred) const + { + for (const char *spot = this->data_end(), *limit = this->data(); spot > limit;) + if (pred(*--spot)) + return spot - this->data(); + return npos; + } - /** Remove and return a prefix of characters satisfying @a pred - * - * @tparam F Predicate functor type. - * @param pred A function taking @c char and returning @c bool. - * @return The prefix of characters that satisfy @a pred. - * - * The returned prefix is removed from @a this. That prefix may be empty if the first character - * does not satisfy @a pred. - * - * @note This is very similar to @c ltrim_if but returns the removed text instead of the modified - * view. - */ - template self_type clip_prefix_of(F const &pred); + inline TextView & + TextView::ltrim(char c) + { + this->remove_prefix(this->find_first_not_of(c)); + return *this; + } - /** Get a view of the last @a n bytes. - * - * @param n Number of chars in the suffix. - * @return A view of the last @a n characters in @a this, bounded by the size of @a this. - */ - constexpr self_type suffix(size_t n) const noexcept; + inline TextView & + TextView::rtrim(char c) + { + auto n = this->find_last_not_of(c); + this->remove_suffix(this->size() - (n == npos ? 0 : n + 1)); + return *this; + } - /** Get a view of a suffix bounded by @a c. - * - * @param c Delimiter character. - * @return A view of the suffix bounded by @a c, or all of @a this if @a c is not found. - * @note The character @a c is not included in the returned view. - */ - self_type suffix_at(char c) const; + inline TextView & + TextView::trim(char c) + { + return this->ltrim(c).rtrim(c); + } - /** Get a view of a suffix bounded by a character in @a delimiters. - * - * @param delimiters A set of characters. - * - * @return A view of the suffix bounded by any character in @a delimiters, or mepty if none are - * found. - * - * @note The delimiter character is not included in the returned view. - */ - self_type suffix_at(std::string_view const &delimiters) const; + inline TextView & + TextView::ltrim(CharSet const &delimiters) + { + const char *spot = this->data(); + const char *limit = this->data_end(); - /** Get a view of a suffix bounded by a character predicate @a pred. - * - * @a pred must be a functor which takes a @c char argument and returns @c bool. Each character in - * @a this is tested by @a pred and the suffix is delimited by the last character for which @a - * pred is @c true. - * - * @param pred A character predicate. - * - * @return A view of the suffix bounded by @a pred or empty if @a pred is not @c true for any - * character. - * - * @note The delimiting character is not included in the returned view. - */ - template self_type suffix_if(F const &pred) const; + while (spot < limit && delimiters(*spot)) { + ++spot; + } + this->remove_prefix(spot - this->data()); - /** Remove the trailing characters of @a this up to and including @a c. - * - * @param c Delimiter character. - * @return @a this. - * - * @note The last occurrence of character @a c is removed along with all succeeding characters, or - * the view is cleared if @a c is not found. - */ - self_type &remove_suffix_at(char c); + return *this; + } - /** Remove the trailing characters of @a this up to and including the last character matching @a delimiters. - * - * @param delimiters Characters to match. - * @return @a this. - * @note The first occurrence of any character in @a delimiters is removed along with all preceding - * characters, or the view is cleared if none are found. - */ - self_type &remove_suffix_at(std::string_view const &delimiters); + inline TextView & + TextView::ltrim(std::string_view const &delimiters) + { + return this->ltrim(CharSet(delimiters)); + } - /** Remove the trailing characters up to and including the character selected by @a pred. - * - * @tparam F Predicate function type. - * @param pred The predicate instance. - * @return @a this. - * - * If predicate is never true the view is cleared. - */ - template self_type &remove_suffix_if(F const &pred); + inline TextView & + TextView::ltrim(const char *delimiters) + { + return this->ltrim(CharSet(delimiters)); + } - /** Remove and return a suffix of size @a n. - * - * @param n Size of the suffix. - * @return The first @a n bytes of @a this if @a n is in @a this, otherwise an empty view. - * - * The prefix is removed and returned if the requested suffix is no larger than @a this, - * otherwise @a this is not modified. - * - * @note The character at offset @a n is discarded if @a this is modified. - * - * @see @c take_suffix - */ - self_type split_suffix(size_t n); + inline TextView & + TextView::rtrim(CharSet const &delimiters) + { + const char *spot = this->data_end(); + const char *limit = this->data(); + while (limit < spot-- && delimiters(*spot)) {} - /** Remove and return a suffix bounded by the last occurrence of @a c. - * - * @param c The character to match. - * @return The suffix bounded by @a c if @a c is found, an empty view if not. - * - * The suffix is removed and returned if @a c is found, otherwise @a this is not modified. - * - * @note The character at offset @a n is discarded if @a this is modified. - * - * @see @c take_suffix_at - */ - self_type split_suffix_at(char c); + this->remove_suffix(this->data_end() - (spot + 1)); + return *this; + } - /** Remove and return a suffix bounded by the last occurrence of any of @a delimiters. - * - * @param delimiters The characters to match. - * @return The suffix bounded by a delimiter if found, an empty view if none found. - * - * The suffix is removed and returned if delimiter is found, otherwise @a this is not modified. - * - * @note The delimiter character is discarded if @a this is modified. - * - * @see @c take_suffix_at - */ - self_type split_suffix_at(std::string_view const &delimiters); + inline TextView & + TextView::rtrim(std::string_view const &delimiters) + { + return this->rtrim(CharSet(delimiters)); + } - /** Remove and return a suffix bounded by the last character that satisfies @a pred. - * - * @tparam F Predicate functor type. - * @param pred A function taking @c char and returning @c bool. - * @return The suffix bounded by the first character satisfying @a pred if found, otherwise @a this - * is not modified. - * - * The prefix is removed and returned if a character satisfying @a pred if found, otherwise - * @a this is not modified. - * - * @note The matching character is discarded if @a this is modified. - * - * @see @c take_suffix_if - */ - template self_type split_suffix_if(F const &pred); + inline TextView & + TextView::trim(CharSet const &delimiters) + { + const char *spot; + const char *limit; - /** Remove and return a suffix of size @a n. - * - * @param n Size of the suffix. - * @return The first @a n bytes of @a this if @a n is in @a this, otherwise all of @a this. - * - * The returned suffix is removed from @a this, along with the character at offset @a n if present. - * - * @see @c split_suffix - */ - self_type take_suffix(size_t n); + // Do this explicitly, so we don't have to initialize the character set twice. + for (spot = this->data(), limit = this->data_end(); spot < limit && delimiters(*spot); ++spot) + ; + this->remove_prefix(spot - this->data()); - /** Remove and return a suffix bounded by the last occurrence of @a c. - * - * @param c The character to match. - * @return The suffix bounded by @a c if @a c is found, all of @a this if not. - * - * The returned suffix is removed from @a this, along with the delimiter character if found. - * - * @see @c split_suffix_at - */ - self_type take_suffix_at(char c); + spot = this->data_end(); + limit = this->data(); + while (limit < spot-- && delimiters(*spot)) {} + this->remove_suffix(this->data_end() - (spot + 1)); - /** Remove and return a suffix bounded by the last occurrence of any of @a delimiters. - * - * @param delimiters The characters to match. - * @return The suffix bounded by a delimiter if @a c is found, all of @a this if not. - * - * The returned suffix is removed from @a this, along with the delimiter character if found. - * - * @see @c split_suffix_at - */ - self_type take_suffix_at(std::string_view const &delimiters); + return *this; + } - /** Remove and return a suffix bounded by the last character that satisfies @a pred. - * - * @tparam F Predicate functor type. - * @param pred A function taking @c char and returning @c bool. - * @return The suffix bounded by the first character satisfying @a pred if found, otherwise all of @a this. - * - * @note The matching character is discarded if found. - * - * @see @c split_suffix_if - */ - template self_type take_suffix_if(F const &pred); + inline TextView & + TextView::trim(std::string_view const &delimiters) + { + return this->trim(CharSet(delimiters)); + } - /** Remove and return a suffix of characters satisfying @a pred - * - * @tparam F Predicate functor type. - * @param pred A function taking @c char and returning @c bool. - * @return The suffix of characters that satisfy @a pred. - * - * The returned suffix is removed from @a this. That suffix may be empty if the last character - * does not satisfy @a pred. - * - * @note This is very similar to @c rtrim_if but returns the removed text instead of the modified - * view. - */ - template self_type clip_suffix_of(F const &pred); + inline TextView & + TextView::trim(const char *delimiters) + { + return this->trim(CharSet(delimiters)); + } - /** Get a view of part of this view. - * - * @param pos Offset of first byte in the new view. - * @param count Number of bytes in the view. - * @return The view starting at @a pos for @a count bytes. - * - * The returned view is clipped by @a this - that is, it will not extend beyond the original view. - * @a count is reduced such that it covers only data in @a this. - * - * @note This is provided primarily for co-variance, i.e. the returned view is a @c TextView - * instead of a @c std::string_view. - */ - constexpr self_type substr(size_type pos = 0, size_type count = npos) const noexcept; + template + TextView::self_type & + TextView::ltrim_if(F const &pred) + { + const char *spot; + const char *limit; + for (spot = this->data(), limit = this->data_end(); spot < limit && pred(*spot); ++spot) + ; + this->remove_prefix(spot - this->data()); + return *this; + } - /** Check if the view begins with a specific @a prefix. - * - * @param prefix String to check against @a this. - * @return @c true if this->prefix(prefix.size()) == prefix, @c false otherwise. - * @internal C++20 preview. - */ - bool starts_with(std::string_view const &prefix) const noexcept; + template + TextView::self_type & + TextView::rtrim_if(F const &pred) + { + const char *spot = this->data_end(); + const char *limit = this->data(); + while (limit < spot-- && pred(*spot)) + ; + this->remove_suffix(this->data_end() - (spot + 1)); + return *this; + } - /** Check if the view begins with a specific @a prefix. - * - * @param prefix String to check against @a this. - * @return @c true if this->prefix(prefix.size()) == prefix, @c false otherwise. - * @internal C++20 preview. - */ - bool starts_with(char const *prefix) const; + template + TextView::self_type & + TextView::trim_if(F const &pred) + { + return this->ltrim_if(pred).rtrim_if(pred); + } - /** Check if the view begins with the character @c c. - * - * @param c Character to check. - * @return @c true if the string is non-empty and the first character is @c c. - * @internal C++20 preview. - */ - bool starts_with(char c) const noexcept; + constexpr inline auto + TextView::data() const noexcept -> value_type const * + { + return super_type::data(); + } - /** Check if the view begins with a specific @a prefix, ignoring case. - * - * @param prefix String to check against @a this. - * @return @c true if this->prefix(prefix.size()) == prefix without regard to case, @c false otherwise. - * @internal C++20 preview. - */ - bool starts_with_nocase(std::string_view const &prefix) const noexcept; + constexpr inline auto + TextView::data_end() const noexcept -> value_type const * + { + return this->data() + this->size(); + } - /** Check if the view begins with a specific @a prefix. - * - * @param prefix String to check against @a this. - * @return @c true if this->prefix(prefix.size()) == prefix, @c false otherwise. - * @internal C++20 preview. - */ - bool starts_with_nocase(char const *prefix) const; + inline constexpr TextView + TextView::substr(size_type pos, size_type count) const noexcept + { + if (pos >= this->size()) { + return {}; + } + count = std::min(this->size() - pos, count); + return {this->data() + pos, count}; + } - /** Check if the view begins with the character @c c, ignoring case. - * - * @param c Character to check. - * @return @c true if the string is non-empty and the first character is @c c. - * @internal C++20 preview. - */ - bool starts_with_nocase(char c) const noexcept; + inline bool + TextView::starts_with(std::string_view const &prefix) const noexcept + { + return this->size() >= prefix.size() && 0 == ::memcmp(this->data(), prefix.data(), prefix.size()); + } - /** Check if the view ends with a specific @a suffix. - * - * @param suffix String to check against @a this. - * @return @c true if this->suffix(suffix.size()) == suffix, @c false otherwise. - * @internal C++20 preview. - */ - bool ends_with(std::string_view const &suffix) const noexcept; + inline bool + TextView::starts_with(char const *prefix) const + { + return this->starts_with(super_type(prefix)); + } + inline bool + TextView::starts_with_nocase(char const *prefix) const + { + return this->starts_with_nocase(super_type{prefix}); + } - /** Check if the view ends with a specific @a suffix. - * - * @param suffix String to check against @a this. - * @return @c true if this->suffix(suffix.size()) == suffix, @c false otherwise. - * @internal C++20 preview. - */ - bool ends_with(char const *suffix) const; + inline bool + TextView::starts_with(char c) const noexcept + { + return !this->empty() && c == this->front(); + } + inline bool + TextView::starts_with_nocase(char c) const noexcept + { + return !this->empty() && tolower(c) == tolower(this->front()); + } - /** Check the view ends with the character @c c. - * - * @param c Character to check. - * @return @c true if the string is non-empty and the last character is @c c. - * @internal C++20 preview. - */ - bool ends_with(char c) const noexcept; + inline bool + TextView::starts_with_nocase(std::string_view const &prefix) const noexcept + { + return this->size() >= prefix.size() && 0 == ::strncasecmp(this->data(), prefix.data(), prefix.size()); + } - /** Check if the view starts with a specific @a suffix, ignoring case. - * - * @param suffix String to check against @a this. - * @return @c true if this->suffix(suffix.size()) == suffix without regard to case, @c false otherwise. - * @internal C++20 preview. - */ - bool ends_with_nocase(std::string_view const &suffix) const noexcept; + inline bool + TextView::ends_with(std::string_view const &suffix) const noexcept + { + return this->size() >= suffix.size() && 0 == ::memcmp(this->data_end() - suffix.size(), suffix.data(), suffix.size()); + } - /** Check if the view starts with a specific @a suffix, ignoring case. - * - * @param suffix String to check against @a this. - * @return @c true if this->suffix(suffix.size()) == suffix without regard to case, @c false otherwise. - * @internal C++20 preview. - */ - bool ends_with_nocase(char const *suffix) const; + inline bool + TextView::ends_with_nocase(std::string_view const &suffix) const noexcept + { + return this->size() >= suffix.size() && 0 == ::strncasecmp(this->data_end() - suffix.size(), suffix.data(), suffix.size()); + } - /** Check the view ends with the character @c c, ignoring case. - * - * @param c Character to check. - * @return @c true if the string is non-empty and the last character is @c c. - * @internal C++20 preview. - */ - bool ends_with_nocase(char c) const noexcept; - - // Functors for using this class in STL containers. - /// Ordering functor, lexicographic comparison. - struct LessThan { - /// @return Case sensitive ordering. - bool - operator()(self_type const &lhs, self_type const &rhs) const noexcept { - return -1 == strcmp(lhs, rhs); - } - }; + inline bool + TextView::ends_with(char const *suffix) const + { + return this->ends_with(super_type(suffix)); + } - /// Ordering functor, case ignoring lexicographic comparison. - struct LessThanNoCase { - /// @return Case insensitive ordering. - bool - operator()(self_type const &lhs, self_type const &rhs) const noexcept { - return -1 == strcasecmp(lhs, rhs); - } - }; + inline bool + TextView::ends_with_nocase(char const *suffix) const + { + return this->ends_with_nocase(super_type(suffix)); + } - /// Support for containers that need case insensitive comparisons between views. - struct CaselessEqual { - /// @return @c true if the view contants are equal when compared without regard to case. - bool - operator()(self_type const &lhs, self_type const &rhs) const noexcept { - return lhs.size() == rhs.size() && 0 == strcasecmp(lhs, rhs); + inline bool + TextView::ends_with(char c) const noexcept + { + return !this->empty() && c == this->back(); + } + inline bool + TextView::ends_with_nocase(char c) const noexcept + { + return !this->empty() && tolower(c) == tolower(this->back()); + } + + template + Stream & + TextView::stream_write(Stream &os, const TextView &b) const + { + // Local function, avoids extra template work. + static const auto stream_fill = [](Stream &ostream, size_t n) -> Stream & { + static constexpr size_t pad_size = 8; + typename Stream::char_type padding[pad_size]; + + std::fill_n(padding, pad_size, ostream.fill()); + for (; n >= pad_size && ostream.good(); n -= pad_size) + ostream.write(padding, pad_size); + if (n > 0 && ostream.good()) + ostream.write(padding, n); + return ostream; + }; + + const std::size_t w = os.width(); + if (w <= b.size()) { + os.write(b.data(), b.size()); + } else { + const std::size_t pad_size = w - b.size(); + const bool align_left = (os.flags() & Stream::adjustfield) == Stream::left; + if (!align_left && os.good()) + stream_fill(os, pad_size); + if (os.good()) + os.write(b.data(), b.size()); + if (align_left && os.good()) + stream_fill(os, pad_size); } - }; + return os; + } + + template + TextView::self_type + TextView::clip_prefix_of(F const &pred) + { + size_t idx = 0; + for (auto spot = this->data(), limit = spot + this->size(); spot < limit && pred(*spot); ++spot, ++idx) + ; // empty + TextView token = this->prefix(idx); + this->remove_prefix(idx); + return token; + } + + template + TextView::self_type + TextView::clip_suffix_of(F const &pred) + { + size_t idx = this->size() - 1; + for (auto spot = this->data() + idx, limit = this->data(); spot >= limit && pred(*spot); --spot, --idx) + ; // empty + TextView token = this->suffix(idx); + this->remove_suffix(idx); + return token; + } + /// @endcond TextView_INTERNAL - /** A pointer to the first byte. + // Provide an instantiation for @c std::ostream as it's likely this is the only one ever used. + extern template std::ostream &TextView::stream_write(std::ostream &, const TextView &) const; + + /** A transform view. * - * @return Address of the first byte of the view. + * @tparam X Transform functor type. + * @tparam V Source view type. * - * @internal This fixes an error in @c std::string_view where this method is declared to return - * a template parameter instead of the correct @c value_type. The effect is @c string_view::data - * is not considered by the compiler to return char const * which makes meta-programming - * painful. - */ - constexpr value_type const *data() const noexcept; - - /** A pointer to past the last byte. + * A transform view acts like a view on the original source view @a V with each element transformed by + * @a X. * - * @return Address of the first byte past the end of the view. + * This is used most commonly with @c std::string_view. For example, if the goal is to handle a + * piece of text as if it were lower case without changing the actual text, the following would + * make that possible. + * @code + * std:::string_view source; // original text. + * TransformView xv(&tolower, source); + * @endcode * - * This is effectively @c std::string_view::end() except it explicit returns a pointer and not - * (potentially) an iterator class, to match up with @c data(). - */ - constexpr value_type const *data_end() const noexcept; - - /// Specialized stream operator implementation. - /// @note Use the standard stream operator unless there is a specific need for this, which is unlikely. - /// @return The stream @a os. - /// @internal Needed because @c std::ostream::write must be used and - /// so alignment / fill have to be explicitly handled. - template Stream &stream_write(Stream &os, const TextView &b) const; - - /// @cond OVERLOAD - // These methods are all overloads of other methods, defined in order to make the API more - // convenient to use. Mostly these overload @c int for @c size_t so naked numbers work as expected. - constexpr self_type prefix(int n) const noexcept; - self_type take_suffix(int n); - self_type split_prefix(int n); - constexpr self_type suffix(int n) const noexcept; - self_type split_suffix(int n); - /// @endcond + * To avoid having to figure out the exact signature of the transform, the convenience function + * @c transform_view_of is provide. + * @code + * std::string_view source; // original text. + * auto xv = transform_view_of(&tolower, source); + * @endcode + * + * This class supports iterators but those should be avoided as use of them makes extra copies of the instance which + * may be expensive if the functor is expensive. In cases where the functor is a function pointer or a lambda this isn't + * an issue. + */ + template class TransformView + { + using self_type = TransformView; ///< Self reference type. + using source_iterator = decltype(V{}.begin()); + + public: + using transform_type = X; ///< Export transform functor type. + using source_view_type = V; ///< Export source view type. + using source_value_type = std::remove_reference_t; + /// Result type of calling the transform on an element of the source view. + using value_type = std::invoke_result_t; + /// This class serves as its own iterator. + using iterator = self_type; + + /** Construct a transform view using transform @a xf on source view @a v. + * + * @param xf Transform instance. + * @param v Source view. + */ + TransformView(transform_type &&xf, source_view_type const &v); + + /** Construct a transform view using transform @a xf on source view @a v. + * + * @param xf Transform instance. + * @param v Source view. + */ + TransformView(transform_type const &xf, source_view_type const &v); + + /// Copy constructor. + TransformView(self_type const &that) = default; + /// Move constructor. + TransformView(self_type &&that) = default; + + /// Copy assignment. + self_type &operator=(self_type const &that) = default; + /// Move assignment. + self_type &operator=(self_type &&that) = default; + + /// Equality. + bool operator==(self_type const &that) const; + /// Inequality. + bool operator!=(self_type const &that) const; + + /// Get the current element. + value_type operator*() const; + /// Move to next element. + self_type &operator++(); + /// Move to next element. + self_type operator++(int); + + /// Check if view is empty. + bool empty() const; + /// Check if bool is not empty. + explicit operator bool() const; + + /// Iterator to first transformed character. + iterator + begin() const + { + return *this; + } + /// Iterator past last transformed character. + iterator + end() const + { + return self_type{_xf, _limit}; + } -protected: - /// Initialize a bit mask to mark which characters are in this view. - static void init_delimiter_set(std::string_view const &delimiters, std::bitset<256> &set); -}; + protected: + transform_type _xf; ///< Per character transform functor. + source_iterator _spot; ///< Current location in the source view. + source_iterator _limit; ///< End of source view. -/// Internal table of digit values for characters. -/// This is -1 for characters that are not valid digits. -extern const int8_t svtoi_convert[256]; + /// Special constructor for making an empty instance to serve as the @c end iterator. + TransformView(transform_type &&xf, source_iterator &&limit) : _xf(xf), _spot(limit), _limit(limit) {} + }; -/** Convert the text in @c TextView @a src to a signed numeric value. + template + TransformView::TransformView(transform_type &&xf, source_view_type const &v) : _xf(xf), _spot(v.begin()), _limit(v.end()) + { + } - If @a parsed is non-null then the part of the string actually parsed is placed there. - @a base sets the conversion base. If not set base 10 is used with two special cases: + template + TransformView::TransformView(transform_type const &xf, source_view_type const &v) + : _xf(xf), _spot(v.begin()), _limit(v.end()) + { + } - - If the number starts with a literal '0' then it is treated as base 8. - - If the number starts with the literal characters '0x' or '0X' then it is treated as base 16. + template + auto + TransformView::operator*() const -> value_type + { + return _xf(*_spot); + } - If @a base is explicitly set then any leading radix indicator is not supported. -*/ -intmax_t svtoi(TextView src, TextView *parsed = nullptr, int base = 0); + template + auto + TransformView::operator++() -> self_type & + { + ++_spot; + return *this; + } -/** Convert the text in @c TextView @a src to an unsigned numeric value. + template + auto + TransformView::operator++(int) -> self_type + { + self_type zret{*this}; + ++_spot; + return zret; + } - If @a parsed is non-null then the part of the string actually parsed is placed there. - @a base sets the conversion base. If not set base 10 is used with two special cases: + template + bool + TransformView::empty() const + { + return _spot == _limit; + } - - If the number starts with a literal '0' then it is treated as base 8. - - If the number starts with the literal characters '0x' or '0X' then it is treated as base 16. + template TransformView::operator bool() const + { + return _spot != _limit; + } - If @a base is explicitly set then any leading radix indicator is not supported. -*/ -uintmax_t svtou(TextView src, TextView *parsed = nullptr, int base = 0); - -/** Convert the text in @c src to an unsigned numeric value. - * - * @tparam N The radix (must be 1..36) - * @param src The source text. Updated during parsing. - * @return The converted numeric value. - * - * This is a specialized function useful only where conversion performance is critical. It is used - * inside @c svtoi and @a svtou for the common cases of 8, 10, and 16, therefore normally this isn't much more - * performant in those cases than just @c svtoi. Because of this only positive values are parsed. - * If determining the radix from the text or signed value parsing is needed, used @c svtoi. - * - * @a src is updated in place to indicate what characters were parsed by removing them from the view - * Parsing stops on the first invalid digit, so any leading non-digit characters (e.g. whitespace) - * must already be removed. For overflow, all valid digits are consumed and the maximum value returned. - */ -template -uintmax_t -svto_radix(TextView &src) { - static_assert(1 <= RADIX && RADIX <= 36, "Radix must be in the range 2..36"); - static constexpr auto MAX = std::numeric_limits::max(); - static constexpr auto OVERFLOW_LIMIT = MAX / RADIX; - uintmax_t zret = 0; - uintmax_t v; - while (src.size() && ((v = swoc::svtoi_convert[uint8_t(*src)]) < RADIX)) { - // Tweaked for performance - need to check range after @a RADIX multiply. - ++src; // Update view iff the character is parsed. - if (zret <= OVERFLOW_LIMIT && v <= (MAX - (zret *= RADIX)) ) { - zret += v; - } else { - zret = MAX; // clamp to max - once set will always hit this case for subsequent input. - } + template + bool + TransformView::operator==(self_type const &that) const + { + return _spot == that._spot && _limit == that._limit; } - return zret; -} - -/// Convenience overload. -/// @see svto_radix(swoc::TextView &src) -template -uintmax_t -svto_radix(TextView &&src) { - return svto_radix(src); -} - -/** Parse @a text as a floating point number. - * - * @param text The input text. - * @param parsed Parsed text [out] - * @return The floating point value, or 0.0 if invalid input. - * - * If @a parsed is not @a nullptr then the span of characters parsed is put there. This can be - * used to check if the parse was scuccesful - on a failed parse, it will be empty. - * - * @note This should be within 1 epsilon of correct, although it doesn't guarantee picking - * the closest epsilon. It's more than sufficient for use in configurations, but possibly - * not for high precision work. - */ -double svtod(TextView text, TextView *parsed = nullptr); -// ---------------------------------------------------------- -// Inline implementations. -// Note: Why, you may ask, do I use @c TextView::self_type for return type instead of the -// simpler plain @c TextView ? Because otherwise Doxygen can't match up the declaration and -// definition and the reference documentation is messed up. Sigh. - -inline constexpr CharSet::CharSet(TextView const &chars) { - for (auto c : chars) { - _chars[uint8_t(c)] = true; - } -} - -inline bool -CharSet::operator()(unsigned char c) const { - return _chars[c]; -} - -inline bool -CharSet::operator()(char c) const { - return _chars[uint8_t(c)]; -} - -// === TextView Implementation === -/// @cond TextView_INTERNAL -// Doxygen doesn't match these up well due to various type and template issues. -// @internal If there is more than one overload for numeric types, it's easy to get ambiguity. The only -// fix, unfortunately, is lots of overloads to cover the ambiguous cases. -inline constexpr TextView::TextView(const char *ptr, size_t n) noexcept - : super_type(ptr, n == npos ? (ptr ? ::strlen(ptr) : 0) : n) {} -inline constexpr TextView::TextView(const char *ptr, unsigned n) noexcept : super_type(ptr, size_t(n)) {} -inline constexpr TextView::TextView(const char *ptr, ssize_t n) noexcept - : super_type(ptr, n < 0 ? (ptr ? ::strlen(ptr) : 0) : size_t(n)) {} -inline constexpr TextView::TextView(const char *ptr, int n) noexcept - : super_type(ptr, n < 0 ? (ptr ? ::strlen(ptr) : 0) : size_t(n)) {} -inline constexpr TextView::TextView(std::nullptr_t) noexcept : super_type(nullptr, 0) {} -inline TextView::TextView(std::string const &str) noexcept : super_type(str) {} -inline constexpr TextView::TextView(super_type const &that) noexcept : super_type(that) {} -template constexpr TextView::TextView(const char (&s)[N]) noexcept : super_type(s, s[N - 1] ? N : N - 1) {} -template constexpr TextView::TextView(C const &c) : super_type(c.data(), c.size()) {} - -inline void -TextView::init_delimiter_set(std::string_view const &delimiters, std::bitset<256> &set) { - set.reset(); - for (char c : delimiters) - set[static_cast(c)] = true; -} - -inline auto -TextView::clear() -> self_type & { - new (this) self_type(); - return *this; -} - -inline constexpr char -TextView::operator*() const { - return this->empty() ? char(0) : *(this->data()); -} - -inline constexpr bool -TextView::operator!() const noexcept { - return this->empty(); -} - -inline constexpr TextView::operator bool() const noexcept { - return !this->empty(); -} - -inline auto -TextView::operator++() -> self_type & { - this->remove_prefix(1); - return *this; -} - -inline auto -TextView::operator++(int) -> self_type { - self_type zret{*this}; - this->remove_prefix(1); - return zret; -} - -inline auto -TextView::operator+=(size_t n) -> self_type & { - this->remove_prefix(n); - return *this; -} - -template -inline auto -TextView::operator=(const char (&s)[N]) -> self_type & { - return *this = self_type{s, s[N - 1] ? N : N - 1}; -} - -inline auto -TextView::operator=(super_type const &that) -> self_type & { - this->super_type::operator=(that); - return *this; -} - -inline auto -TextView::operator=(char *&s) -> self_type & { - this->super_type::operator=(s); - return *this; -} - -inline auto -TextView::operator=(char const *&s) -> self_type & { - this->super_type::operator=(s); - return *this; -} - -inline auto -TextView::operator=(const std::string &s) -> self_type & { - this->super_type::operator=(s); - return *this; -} - -inline auto -TextView::assign(char *&c_str) -> self_type & { - return this->assign(c_str, strlen(c_str)); -} - -inline auto -TextView::assign(char const *&c_str) -> self_type & { - return this->assign(c_str, strlen(c_str)); -} - -inline auto -TextView::assign(const std::string &s) -> self_type & { - *this = super_type(s); - return *this; -} - -inline TextView & -TextView::assign(char const *ptr, size_t n) { - *this = super_type(ptr, n == npos ? (ptr ? ::strlen(ptr) : 0) : n); - return *this; -} - -inline TextView & -TextView::assign(char const *b, char const *e) { - *this = super_type(b, e - b); - return *this; -} - -template -inline auto -TextView::assign(char const (&s)[N]) noexcept -> self_type & { - return *this = self_type{s, s[N - 1] ? N : N - 1}; -} - -inline constexpr auto -TextView::prefix(size_t n) const noexcept -> self_type { - return {this->data(), std::min(n, this->size())}; -} - -inline constexpr TextView -TextView::prefix(int n) const noexcept { - return {this->data(), std::min(n, this->size())}; -} - -inline auto -TextView::prefix_at(char c) const -> self_type { - self_type zret; // default to empty return. - if (auto n = this->find(c); n != npos) { - zret.assign(this->data(), n); - } - return zret; -} - -inline TextView -TextView::prefix_at(std::string_view const &delimiters) const { - self_type zret; // default to empty return. - if (auto n = this->find_first_of(delimiters); n != npos) { - zret.assign(this->data(), n); - } - return zret; -} - -template -auto -TextView::prefix_if(F const &pred) const -> self_type { - self_type zret; // default to empty return. - if (auto n = this->find_if(pred); n != npos) { - zret.assign(this->data(), n); - } - return zret; -} - -inline auto -TextView::remove_prefix(size_t n) -> self_type & { - this->super_type::remove_prefix(std::min(n, this->size())); - return *this; -} - -inline TextView & -TextView::remove_prefix_at(char c) { - if (auto n = this->find(c); n != npos) { - this->super_type::remove_prefix(n + 1); - } - return *this; -} - -inline auto -TextView::remove_prefix_at(std::string_view const &delimiters) -> self_type & { - if (auto n = this->find_first_of(delimiters); n != npos) { - this->super_type::remove_prefix(n + 1); - } - return *this; -} - -template -auto -TextView::remove_prefix_if(F const &pred) -> self_type & { - if (auto n = this->find_if(pred); n != npos) { - this->super_type::remove_prefix(n + 1); - } - return *this; -} - -inline TextView -TextView::split_prefix(size_t n) { - self_type zret; // default to empty return. - if (n < this->size()) { - zret = this->prefix(n); - this->remove_prefix(std::min(n + 1, this->size())); + + template + bool + TransformView::operator!=(self_type const &that) const + { + return _spot != that._spot || _limit != that._limit; } - return zret; -} - -inline TextView -TextView::split_prefix(int n) { - return this->split_prefix(size_t(n)); -} - -inline TextView -TextView::split_prefix_at(char c) { - return this->split_prefix(this->find(c)); -} - -inline TextView -TextView::split_prefix_at(std::string_view const &delimiters) { - return this->split_prefix(this->find_first_of(delimiters)); -} - -template -TextView::self_type -TextView::split_prefix_if(F const &pred) { - return this->split_prefix(this->find_if(pred)); -} - -inline TextView -TextView::take_prefix(size_t n) { - n = std::min(n, this->size()); - self_type zret = this->prefix(n); - this->remove_prefix(std::min(n + 1, this->size())); - return zret; -} - -inline TextView -TextView::take_prefix_at(char c) { - return this->take_prefix(this->find(c)); -} - -inline TextView -TextView::take_prefix_at(std::string_view const &delimiters) { - return this->take_prefix(this->find_first_of(delimiters)); -} - -template -auto -TextView::take_prefix_if(F const &pred) -> self_type { - return this->take_prefix(this->find_if(pred)); -} - -inline constexpr TextView -TextView::suffix(size_t n) const noexcept { - n = std::min(n, this->size()); - return {this->data_end() - n, n}; -} - -inline constexpr TextView -TextView::suffix(int n) const noexcept { - return this->suffix(size_t(n)); -} - -inline TextView -TextView::suffix_at(char c) const { - self_type zret; - if (auto n = this->rfind(c); n != npos && n + 1 < this->size()) { - ++n; - zret.assign(this->data() + n, this->size() - n); - } - return zret; -} - -inline TextView -TextView::suffix_at(std::string_view const &delimiters) const { - self_type zret; - if (auto n = this->find_last_of(delimiters); n != npos) { - ++n; - zret.assign(this->data() + n, this->size() - n); - } - return zret; -} - -template -auto -TextView::suffix_if(F const &pred) const -> self_type { - self_type zret; - if (auto n = this->rfind_if(pred); n != npos) { - ++n; - zret.assign(this->data() + n, this->size() - n); - } - return zret; -} - -inline auto -TextView::remove_suffix(size_t n) -> self_type & { - this->super_type::remove_suffix(std::min(n, this->size())); - return *this; -} - -inline TextView & -TextView::remove_suffix_at(char c) { - if (auto n = this->rfind(c); n != npos) { - return this->remove_suffix(this->size() - n); - } - return this->clear(); -} - -inline TextView & -TextView::remove_suffix_at(std::string_view const &delimiters) { - if (auto n = this->find_last_of(delimiters); n != npos) { - return this->remove_suffix(this->size() - n); - } - return this->clear(); -} - -template -TextView::self_type & -TextView::remove_suffix_if(F const &pred) { - if (auto n = this->rfind_if(pred); n != npos) { - return this->remove_suffix(this->size() - n); - } - return this->clear(); -} - -inline TextView -TextView::split_suffix(size_t n) { - self_type zret; - n = std::min(n, this->size()); - zret = this->suffix(n); - this->remove_suffix(n + 1); - return zret; -} - -inline auto -TextView::split_suffix(int n) -> self_type { - return this->split_suffix(size_t(n)); -} - -inline TextView -TextView::split_suffix_at(char c) { - auto idx = this->rfind(c); - return npos == idx ? self_type{} : this->split_suffix(this->size() - (idx + 1)); -} - -inline auto -TextView::split_suffix_at(std::string_view const &delimiters) -> self_type { - auto idx = this->find_last_of(delimiters); - return npos == idx ? self_type{} : this->split_suffix(this->size() - (idx + 1)); -} - -template -TextView::self_type -TextView::split_suffix_if(F const &pred) { - return this->split_suffix(this->rfind_if(pred)); -} - -inline TextView -TextView::take_suffix(size_t n) { - self_type zret{*this}; - *this = zret.split_prefix(n); - return zret; -} - -inline TextView -TextView::take_suffix(int n) { - return this->take_suffix(size_t(n)); -} - -inline TextView -TextView::take_suffix_at(char c) { - return this->take_suffix(this->rfind(c)); -} - -inline TextView -TextView::take_suffix_at(std::string_view const &delimiters) { - return this->take_suffix(this->find_last_of(delimiters)); -} - -template -TextView::self_type -TextView::take_suffix_if(F const &pred) { - return this->take_suffix_at(this->rfind_if(pred)); -} - -template -inline size_t -TextView::find_if(F const &pred) const { - for (const char *spot = this->data(), *limit = this->data_end(); spot < limit; ++spot) - if (pred(*spot)) - return spot - this->data(); - return npos; -} - -template -inline size_t -TextView::rfind_if(F const &pred) const { - for (const char *spot = this->data_end(), *limit = this->data(); spot > limit;) - if (pred(*--spot)) - return spot - this->data(); - return npos; -} - -inline TextView & -TextView::ltrim(char c) { - this->remove_prefix(this->find_first_not_of(c)); - return *this; -} - -inline TextView & -TextView::rtrim(char c) { - auto n = this->find_last_not_of(c); - this->remove_suffix(this->size() - (n == npos ? 0 : n + 1)); - return *this; -} - -inline TextView & -TextView::trim(char c) { - return this->ltrim(c).rtrim(c); -} - -inline TextView & -TextView::ltrim(CharSet const &delimiters) { - const char *spot = this->data(); - const char *limit = this->data_end(); - - while (spot < limit && delimiters(*spot)) { - ++spot; - } - this->remove_prefix(spot - this->data()); - - return *this; -} - -inline TextView & -TextView::ltrim(std::string_view const &delimiters) { - return this->ltrim(CharSet(delimiters)); -} - -inline TextView & -TextView::ltrim(const char *delimiters) { - return this->ltrim(CharSet(delimiters)); -} - -inline TextView & -TextView::rtrim(CharSet const &delimiters) { - const char *spot = this->data_end(); - const char *limit = this->data(); - while (limit < spot-- && delimiters(*spot)) { - } - - this->remove_suffix(this->data_end() - (spot + 1)); - return *this; -} - -inline TextView & -TextView::rtrim(std::string_view const &delimiters) { - return this->rtrim(CharSet(delimiters)); -} - -inline TextView & -TextView::trim(CharSet const &delimiters) { - const char *spot; - const char *limit; - - // Do this explicitly, so we don't have to initialize the character set twice. - for (spot = this->data(), limit = this->data_end(); spot < limit && delimiters(*spot); ++spot) - ; - this->remove_prefix(spot - this->data()); - - spot = this->data_end(); - limit = this->data(); - while (limit < spot-- && delimiters(*spot)) { - } - this->remove_suffix(this->data_end() - (spot + 1)); - - return *this; -} - -inline TextView & -TextView::trim(std::string_view const &delimiters) { - return this->trim(CharSet(delimiters)); -} - -inline TextView & -TextView::trim(const char *delimiters) { - return this->trim(CharSet(delimiters)); -} - -template -TextView::self_type & -TextView::ltrim_if(F const &pred) { - const char *spot; - const char *limit; - for (spot = this->data(), limit = this->data_end(); spot < limit && pred(*spot); ++spot) - ; - this->remove_prefix(spot - this->data()); - return *this; -} - -template -TextView::self_type & -TextView::rtrim_if(F const &pred) { - const char *spot = this->data_end(); - const char *limit = this->data(); - while (limit < spot-- && pred(*spot)) - ; - this->remove_suffix(this->data_end() - (spot + 1)); - return *this; -} - -template -TextView::self_type & -TextView::trim_if(F const &pred) { - return this->ltrim_if(pred).rtrim_if(pred); -} - -constexpr inline auto -TextView::data() const noexcept -> value_type const * { - return super_type::data(); -} - -constexpr inline auto -TextView::data_end() const noexcept -> value_type const * { - return this->data() + this->size(); -} - -inline constexpr TextView -TextView::substr(size_type pos, size_type count) const noexcept { - if (pos >= this->size()) { - return {}; - } - count = std::min(this->size() - pos, count); - return {this->data() + pos, count}; -} - -inline bool -TextView::starts_with(std::string_view const &prefix) const noexcept { - return this->size() >= prefix.size() && 0 == ::memcmp(this->data(), prefix.data(), prefix.size()); -} - -inline bool -TextView::starts_with(char const *prefix) const { - return this->starts_with(super_type(prefix)); -} -inline bool -TextView::starts_with_nocase(char const *prefix) const { - return this->starts_with_nocase(super_type{prefix}); -} - -inline bool -TextView::starts_with(char c) const noexcept { - return !this->empty() && c == this->front(); -} -inline bool -TextView::starts_with_nocase(char c) const noexcept { - return !this->empty() && tolower(c) == tolower(this->front()); -} - -inline bool -TextView::starts_with_nocase(std::string_view const &prefix) const noexcept { - return this->size() >= prefix.size() && 0 == ::strncasecmp(this->data(), prefix.data(), prefix.size()); -} - -inline bool -TextView::ends_with(std::string_view const &suffix) const noexcept { - return this->size() >= suffix.size() && 0 == ::memcmp(this->data_end() - suffix.size(), suffix.data(), suffix.size()); -} - -inline bool -TextView::ends_with_nocase(std::string_view const &suffix) const noexcept { - return this->size() >= suffix.size() && 0 == ::strncasecmp(this->data_end() - suffix.size(), suffix.data(), suffix.size()); -} - -inline bool -TextView::ends_with(char const *suffix) const { - return this->ends_with(super_type(suffix)); -} - -inline bool -TextView::ends_with_nocase(char const *suffix) const { - return this->ends_with_nocase(super_type(suffix)); -} - -inline bool -TextView::ends_with(char c) const noexcept { - return !this->empty() && c == this->back(); -} -inline bool -TextView::ends_with_nocase(char c) const noexcept { - return !this->empty() && tolower(c) == tolower(this->back()); -} - -template -Stream & -TextView::stream_write(Stream &os, const TextView &b) const { - // Local function, avoids extra template work. - static const auto stream_fill = [](Stream &ostream, size_t n) -> Stream & { - static constexpr size_t pad_size = 8; - typename Stream::char_type padding[pad_size]; - - std::fill_n(padding, pad_size, ostream.fill()); - for (; n >= pad_size && ostream.good(); n -= pad_size) - ostream.write(padding, pad_size); - if (n > 0 && ostream.good()) - ostream.write(padding, n); - return ostream; - }; - const std::size_t w = os.width(); - if (w <= b.size()) { - os.write(b.data(), b.size()); - } else { - const std::size_t pad_size = w - b.size(); - const bool align_left = (os.flags() & Stream::adjustfield) == Stream::left; - if (!align_left && os.good()) - stream_fill(os, pad_size); - if (os.good()) - os.write(b.data(), b.size()); - if (align_left && os.good()) - stream_fill(os, pad_size); - } - return os; -} - -template -TextView::self_type -TextView::clip_prefix_of(F const &pred) { - size_t idx = 0; - for (auto spot = this->data(), limit = spot + this->size(); spot < limit && pred(*spot); ++spot, ++idx) - ; // empty - TextView token = this->prefix(idx); - this->remove_prefix(idx); - return token; -} - -template -TextView::self_type -TextView::clip_suffix_of(F const &pred) { - size_t idx = this->size() - 1; - for (auto spot = this->data() + idx, limit = this->data(); spot >= limit && pred(*spot); --spot, --idx) - ; // empty - TextView token = this->suffix(idx); - this->remove_suffix(idx); - return token; -} -/// @endcond TextView_INTERNAL - -// Provide an instantiation for @c std::ostream as it's likely this is the only one ever used. -extern template std::ostream &TextView::stream_write(std::ostream &, const TextView &) const; - -/** A transform view. - * - * @tparam X Transform functor type. - * @tparam V Source view type. - * - * A transform view acts like a view on the original source view @a V with each element transformed by - * @a X. - * - * This is used most commonly with @c std::string_view. For example, if the goal is to handle a - * piece of text as if it were lower case without changing the actual text, the following would - * make that possible. - * @code - * std:::string_view source; // original text. - * TransformView xv(&tolower, source); - * @endcode - * - * To avoid having to figure out the exact signature of the transform, the convenience function - * @c transform_view_of is provide. - * @code - * std::string_view source; // original text. - * auto xv = transform_view_of(&tolower, source); - * @endcode - * - * This class supports iterators but those should be avoided as use of them makes extra copies of the instance which - * may be expensive if the functor is expensive. In cases where the functor is a function pointer or a lambda this isn't - * an issue. - */ -template class TransformView { - using self_type = TransformView; ///< Self reference type. - using source_iterator = decltype(V{}.begin()); - -public: - using transform_type = X; ///< Export transform functor type. - using source_view_type = V; ///< Export source view type. - using source_value_type = std::remove_reference_t; - /// Result type of calling the transform on an element of the source view. - using value_type = std::invoke_result_t; - /// This class serves as its own iterator. - using iterator = self_type; - - /** Construct a transform view using transform @a xf on source view @a v. + /** Create a transformed view of a source. * - * @param xf Transform instance. - * @param v Source view. + * @tparam X The transform functor type. + * @tparam V The source type. + * @param xf The transform. + * @param src The view source. + * @return A @c TransformView that applies @a xf to @a src. */ - TransformView(transform_type &&xf, source_view_type const &v); + template + TransformView + transform_view_of(X const &xf, V const &src) + { + return TransformView(xf, src); + } - /** Construct a transform view using transform @a xf on source view @a v. + /** Identity transform view. + * + * @tparam V The source type. * - * @param xf Transform instance. - * @param v Source view. + * This is a transform that returns the input unmodified. This is convenient when a transform is + * required in general but not in all cases. */ - TransformView(transform_type const &xf, source_view_type const &v); - - /// Copy constructor. - TransformView(self_type const &that) = default; - /// Move constructor. - TransformView(self_type &&that) = default; - - /// Copy assignment. - self_type &operator=(self_type const &that) = default; - /// Move assignment. - self_type &operator=(self_type &&that) = default; - - /// Equality. - bool operator==(self_type const &that) const; - /// Inequality. - bool operator!=(self_type const &that) const; - - /// Get the current element. - value_type operator*() const; - /// Move to next element. - self_type &operator++(); - /// Move to next element. - self_type operator++(int); - - /// Check if view is empty. - bool empty() const; - /// Check if bool is not empty. - explicit operator bool() const; - - /// Iterator to first transformed character. - iterator - begin() const { - return *this; - } - /// Iterator past last transformed character. - iterator - end() const { - return self_type{_xf, _limit}; - } + template class TransformView + { + using self_type = TransformView; ///< Self reference type. + /// Iterator over source, for internal use. + using source_iterator = decltype(V{}.begin()); -protected: - transform_type _xf; ///< Per character transform functor. - source_iterator _spot; ///< Current location in the source view. - source_iterator _limit; ///< End of source view. + public: + using source_view_type = V; ///< Export source view type. + using source_value_type = std::remove_reference_t; + /// Result type of calling the transform on an element of the source view. + using value_type = source_value_type; + /// This class serves as its own iterator. + using iterator = self_type; - /// Special constructor for making an empty instance to serve as the @c end iterator. - TransformView(transform_type &&xf, source_iterator &&limit) : _xf(xf), _spot(limit), _limit(limit) {} -}; + /** Construct identity transform view from @a v. + * + * @param v Source view. + */ + TransformView(source_view_type const &v) : _spot(v.begin()), _limit(v.end()) {} -template -TransformView::TransformView(transform_type &&xf, source_view_type const &v) : _xf(xf), _spot(v.begin()), _limit(v.end()) {} - -template -TransformView::TransformView(transform_type const &xf, source_view_type const &v) - : _xf(xf), _spot(v.begin()), _limit(v.end()) {} - -template -auto -TransformView::operator*() const -> value_type { - return _xf(*_spot); -} - -template -auto -TransformView::operator++() -> self_type & { - ++_spot; - return *this; -} - -template -auto -TransformView::operator++(int) -> self_type { - self_type zret{*this}; - ++_spot; - return zret; -} - -template -bool -TransformView::empty() const { - return _spot == _limit; -} - -template TransformView::operator bool() const { - return _spot != _limit; -} - -template -bool -TransformView::operator==(self_type const &that) const { - return _spot == that._spot && _limit == that._limit; -} - -template -bool -TransformView::operator!=(self_type const &that) const { - return _spot != that._spot || _limit != that._limit; -} - -/** Create a transformed view of a source. - * - * @tparam X The transform functor type. - * @tparam V The source type. - * @param xf The transform. - * @param src The view source. - * @return A @c TransformView that applies @a xf to @a src. - */ -template -TransformView -transform_view_of(X const &xf, V const &src) { - return TransformView(xf, src); -} - -/** Identity transform view. - * - * @tparam V The source type. - * - * This is a transform that returns the input unmodified. This is convenient when a transform is - * required in general but not in all cases. - */ -template class TransformView { - using self_type = TransformView; ///< Self reference type. - /// Iterator over source, for internal use. - using source_iterator = decltype(V{}.begin()); - -public: - using source_view_type = V; ///< Export source view type. - using source_value_type = std::remove_reference_t; - /// Result type of calling the transform on an element of the source view. - using value_type = source_value_type; - /// This class serves as its own iterator. - using iterator = self_type; - - /** Construct identity transform view from @a v. - * - * @param v Source view. - */ - TransformView(source_view_type const &v) : _spot(v.begin()), _limit(v.end()) {} + /// Copy constructor. + TransformView(self_type const &that) = default; + /// Move constructor. + TransformView(self_type &&that) = default; - /// Copy constructor. - TransformView(self_type const &that) = default; - /// Move constructor. - TransformView(self_type &&that) = default; + /// Copy assignment. + self_type &operator=(self_type const &that) = default; + /// Move assignment. + self_type &operator=(self_type &&that) = default; - /// Copy assignment. - self_type &operator=(self_type const &that) = default; - /// Move assignment. - self_type &operator=(self_type &&that) = default; + /// Equality. + bool operator==(self_type const &that) const; + /// Inequality. + bool operator!=(self_type const &that) const; - /// Equality. - bool operator==(self_type const &that) const; - /// Inequality. - bool operator!=(self_type const &that) const; + /// Get the current element. + value_type operator*() const; - /// Get the current element. - value_type operator*() const; + /// Move to next element. + self_type &operator++(); - /// Move to next element. - self_type &operator++(); + /// Move to next element. + self_type operator++(int); - /// Move to next element. - self_type operator++(int); + /// Check if view is empty. + bool empty() const; - /// Check if view is empty. - bool empty() const; + /// Check if bool is not empty. + explicit operator bool() const; - /// Check if bool is not empty. - explicit operator bool() const; + /// Iterator to first transformed character. + iterator begin() const; + /// Iterator past last transformed character. + iterator end() const; - /// Iterator to first transformed character. - iterator begin() const; - /// Iterator past last transformed character. - iterator end() const; + protected: + source_iterator _spot; ///< Current location. + source_iterator _limit; ///< End marker. -protected: - source_iterator _spot; ///< Current location. - source_iterator _limit; ///< End marker. + /// Special constructor for making an empty instance to serve as the @c end iterator. + explicit TransformView(source_iterator &&limit) : _spot(limit), _limit(limit) {} + }; - /// Special constructor for making an empty instance to serve as the @c end iterator. - explicit TransformView(source_iterator &&limit) : _spot(limit), _limit(limit) {} -}; + template + auto + TransformView::operator*() const -> value_type + { + return *_spot; + } -template -auto -TransformView::operator*() const -> value_type { - return *_spot; -} - -template -auto -TransformView::operator++() -> self_type & { - ++_spot; - return *this; -} - -template -auto -TransformView::operator++(int) -> self_type { - auto zret{*this}; - ++*this; - return zret; -} - -template -bool -TransformView::operator==(const self_type &that) const { - return _spot == that._spot && _limit == that._limit; -} - -template -bool -TransformView::operator!=(const self_type &that) const { - return _spot != that._spot || _limit != that._limit; -} - -template -bool -TransformView::empty() const { - return _spot == _limit; -} - -template TransformView::operator bool() const { - return _spot != _limit; -} - -template -auto -TransformView::begin() const -> self_type { - return *this; -} - -template -auto -TransformView::end() const -> self_type { - return self_type{_limit}; -} + template + auto + TransformView::operator++() -> self_type & + { + ++_spot; + return *this; + } -/// @cond INTERNAL_DETAIL -// Capture @c void transforms and make them identity transforms. -template -TransformView -transform_view_of(V const &v) { - return TransformView(v); -} + template + auto + TransformView::operator++(int) -> self_type + { + auto zret{*this}; + ++*this; + return zret; + } -/// @endcond + template + bool + TransformView::operator==(const self_type &that) const + { + return _spot == that._spot && _limit == that._limit; + } -/** User literals for TextView. - * - * - _tv : TextView - * - _sv : std::string_view - */ -namespace literals { -/** Literal constructor for @c std::string_view. - * - * @param s The source string. - * @param n Size of the source string. - * @return A @c string_view - * - * @internal This is provided because the STL one does not support @c constexpr which seems - * rather bizarre to me, but there it is. Update: this depends on the version of the compiler, - * so hopefully someday this can be removed. - */ -constexpr std::string_view -operator""_sv(const char *s, size_t n) { - return {s, n}; -} - -/** Literal constructor for @c swoc::TextView. - * - * @param s The source string. - * @param n Size of the source string. - * @return A @c string_view - * - * @internal This is provided because the STL one does not support @c constexpr which seems - * rather bizarre to me, but there it is. Update: this depends on the version of the compiler, - * so hopefully someday this can be removed. - */ -constexpr swoc::TextView -operator""_tv(const char *s, size_t n) { - return {s, n}; -} -} // namespace literals + template + bool + TransformView::operator!=(const self_type &that) const + { + return _spot != that._spot || _limit != that._limit; + } + + template + bool + TransformView::empty() const + { + return _spot == _limit; + } + + template TransformView::operator bool() const + { + return _spot != _limit; + } + + template + auto + TransformView::begin() const -> self_type + { + return *this; + } + + template + auto + TransformView::end() const -> self_type + { + return self_type{_limit}; + } + + /// @cond INTERNAL_DETAIL + // Capture @c void transforms and make them identity transforms. + template + TransformView + transform_view_of(V const &v) + { + return TransformView(v); + } + + /// @endcond + + /** User literals for TextView. + * + * - _tv : TextView + * - _sv : std::string_view + */ + namespace literals + { + /** Literal constructor for @c std::string_view. + * + * @param s The source string. + * @param n Size of the source string. + * @return A @c string_view + * + * @internal This is provided because the STL one does not support @c constexpr which seems + * rather bizarre to me, but there it is. Update: this depends on the version of the compiler, + * so hopefully someday this can be removed. + */ + constexpr std::string_view + operator""_sv(const char *s, size_t n) + { + return {s, n}; + } + + /** Literal constructor for @c swoc::TextView. + * + * @param s The source string. + * @param n Size of the source string. + * @return A @c string_view + * + * @internal This is provided because the STL one does not support @c constexpr which seems + * rather bizarre to me, but there it is. Update: this depends on the version of the compiler, + * so hopefully someday this can be removed. + */ + constexpr swoc::TextView + operator""_tv(const char *s, size_t n) + { + return {s, n}; + } + } // namespace literals -}} // namespace swoc::SWOC_VERSION_NS +} // namespace SWOC_VERSION_NS +} // namespace swoc -namespace std { +namespace std +{ /// Write the contents of @a view to the stream @a os. ostream &operator<<(ostream &os, const swoc::TextView &view); @@ -2136,7 +2261,8 @@ template struct iterator_traits struct hash { static constexpr hash super_hash{}; size_t - operator()(swoc::TextView const &s) const { + operator()(swoc::TextView const &s) const + { return super_hash(s); } }; diff --git a/lib/swoc/src/TextView.cc b/lib/swoc/src/TextView.cc index 5e6cd749117..0b79267fc77 100644 --- a/lib/swoc/src/TextView.cc +++ b/lib/swoc/src/TextView.cc @@ -10,233 +10,240 @@ */ #include "swoc/TextView.h" -#include -#include using namespace swoc::literals; -namespace swoc { inline namespace SWOC_VERSION_NS { - -/// @cond INTERNAL_DETAIL -const int8_t svtoi_convert[256] = { - /* [can't do this nicely because clang format won't allow exdented comments] - 0 1 2 3 4 5 6 7 8 9 A B C D E F - */ - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00 - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10 - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20 - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, // 30 - -1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, // 40 - 25, 26, 27, 28, 20, 30, 31, 32, 33, 34, 35, -1, -1, -1, -1, -1, // 50 - -1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, // 60 - 25, 26, 27, 28, 20, 30, 31, 32, 33, 34, 35, -1, -1, -1, -1, -1, // 70 - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 80 - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 90 - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // A0 - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // B0 - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // C0 - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // D0 - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // E0 - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // F0 -}; -/// @endcond - -intmax_t -svtoi(TextView src, TextView *out, int base) { - static constexpr uintmax_t ABS_MAX = std::numeric_limits::max(); - static constexpr uintmax_t ABS_MIN = uintmax_t(std::numeric_limits::min()); - intmax_t zret = 0; - - if (src.ltrim_if(&isspace)) { - TextView parsed; - const char *start = src.data(); - bool neg = false; - if ('-' == *src) { - ++src; - neg = true; - } else if ('+' == *src) { - ++src; - } - auto n = svtou(src, &parsed, base); - if (!parsed.empty()) { - if (out) { - out->assign(start, parsed.data_end()); +namespace swoc +{ +inline namespace SWOC_VERSION_NS +{ + + /// @cond INTERNAL_DETAIL + const int8_t svtoi_convert[256] = { + /* [can't do this nicely because clang format won't allow exdented comments] + 0 1 2 3 4 5 6 7 8 9 A B C D E F + */ + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00 + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10 + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20 + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, // 30 + -1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, // 40 + 25, 26, 27, 28, 20, 30, 31, 32, 33, 34, 35, -1, -1, -1, -1, -1, // 50 + -1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, // 60 + 25, 26, 27, 28, 20, 30, 31, 32, 33, 34, 35, -1, -1, -1, -1, -1, // 70 + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 80 + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 90 + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // A0 + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // B0 + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // C0 + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // D0 + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // E0 + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // F0 + }; + /// @endcond + + intmax_t + svtoi(TextView src, TextView *out, int base) + { + static constexpr uintmax_t ABS_MAX = std::numeric_limits::max(); + static constexpr uintmax_t ABS_MIN = uintmax_t(std::numeric_limits::min()); + intmax_t zret = 0; + + if (src.ltrim_if(&isspace)) { + TextView parsed; + const char *start = src.data(); + bool neg = false; + if ('-' == *src) { + ++src; + neg = true; + } else if ('+' == *src) { + ++src; } - if (neg) { - uintmax_t temp = std::min(n, ABS_MIN); - if (temp == ABS_MIN) { - zret = std::numeric_limits::min(); + auto n = svtou(src, &parsed, base); + if (!parsed.empty()) { + if (out) { + out->assign(start, parsed.data_end()); + } + if (neg) { + uintmax_t temp = std::min(n, ABS_MIN); + if (temp == ABS_MIN) { + zret = std::numeric_limits::min(); + } else { + zret = -intmax_t(temp); + } } else { - zret = -intmax_t(temp); + zret = std::min(n, ABS_MAX); } - } else { - zret = std::min(n, ABS_MAX); } } + return zret; } - return zret; -} -uintmax_t -svtou(TextView src, TextView *out, int base) { - uintmax_t zret = 0; + uintmax_t + svtou(TextView src, TextView *out, int base) + { + uintmax_t zret = 0; - if (out) { - out->clear(); - } + if (out) { + out->clear(); + } - if (src.ltrim_if(&isspace).size()) { - auto origin = src.data(); // cache to handle prefix skipping. - // If base is 0, it wasn't specified - check for standard base prefixes - if (0 == base) { - base = 10; - if ('0' == *src) { - ++src; - base = 8; - if (src) { - switch (*src) { - case 'x': - case 'X': - ++src; - base = 16; - break; - case 'b': - case 'B': - ++src; - base = 2; - break; + if (src.ltrim_if(&isspace).size()) { + auto origin = src.data(); // cache to handle prefix skipping. + // If base is 0, it wasn't specified - check for standard base prefixes + if (0 == base) { + base = 10; + if ('0' == *src) { + ++src; + base = 8; + if (src) { + switch (*src) { + case 'x': + case 'X': + ++src; + base = 16; + break; + case 'b': + case 'B': + ++src; + base = 2; + break; + } } } } - } - if (!(1 <= base && base <= 36)) { - return 0; - } + if (!(1 <= base && base <= 36)) { + return 0; + } - // For performance in common cases, use the templated conversion. - switch (base) { - case 2: - zret = svto_radix<2>(src); - break; - case 8: - zret = svto_radix<8>(src); - break; - case 10: - zret = svto_radix<10>(src); - break; - case 16: - zret = svto_radix<16>(src); - break; - default: { - static constexpr auto MAX = std::numeric_limits::max(); - const auto OVERFLOW_LIMIT = MAX / base; - intmax_t v = 0; - while (src.size() && (0 <= (v = svtoi_convert[static_cast(*src)])) && v < base) { - ++src; - if (zret <= OVERFLOW_LIMIT && uintmax_t(v) <= (MAX - (zret *= base))) { - zret += v; - } else { - zret = MAX; + // For performance in common cases, use the templated conversion. + switch (base) { + case 2: + zret = svto_radix<2>(src); + break; + case 8: + zret = svto_radix<8>(src); + break; + case 10: + zret = svto_radix<10>(src); + break; + case 16: + zret = svto_radix<16>(src); + break; + default: { + static constexpr auto MAX = std::numeric_limits::max(); + const auto OVERFLOW_LIMIT = MAX / base; + intmax_t v = 0; + while (src.size() && (0 <= (v = svtoi_convert[static_cast(*src)])) && v < base) { + ++src; + if (zret <= OVERFLOW_LIMIT && uintmax_t(v) <= (MAX - (zret *= base))) { + zret += v; + } else { + zret = MAX; + } } + break; } - break; } - } - if (out) { - out->assign(origin, src.data()); + if (out) { + out->assign(origin, src.data()); + } } + return zret; } - return zret; -} -double -svtod(swoc::TextView text, swoc::TextView *parsed) { - // @return 10^e - auto pow10 = [](int e) -> double { - double zret = 1.0; - double scale = 10.0; - if (e < 0) { // flip the scale and make @a e positive. - e = -e; - scale = 0.1; - } + double + svtod(swoc::TextView text, swoc::TextView *parsed) + { + // @return 10^e + auto pow10 = [](int e) -> double { + double zret = 1.0; + double scale = 10.0; + if (e < 0) { // flip the scale and make @a e positive. + e = -e; + scale = 0.1; + } - // Walk the bits in the exponent @a e and multiply the scale for set bits. - while (e) { - if (e & 1) { - zret *= scale; + // Walk the bits in the exponent @a e and multiply the scale for set bits. + while (e) { + if (e & 1) { + zret *= scale; + } + scale *= scale; + e >>= 1; } - scale *= scale; - e >>= 1; - } - return zret; - }; + return zret; + }; - if (text.empty()) { - return 0; - } + if (text.empty()) { + return 0; + } - auto org_text = text; // save this to update @a parsed. - // Check just once and dump to a local copy if needed. - TextView local_parsed; - if (!parsed) { - parsed = &local_parsed; - } + auto org_text = text; // save this to update @a parsed. + // Check just once and dump to a local copy if needed. + TextView local_parsed; + if (!parsed) { + parsed = &local_parsed; + } - // Handle leading sign. - int sign = 1; - if (*text == '-') { - ++text; - sign = -1; - } else if (*text == '+') { - ++text; - } - // Parse the leading whole part as an integer. - intmax_t whole = svto_radix<10>(text); - parsed->assign(org_text.data(), text.data()); + // Handle leading sign. + int sign = 1; + if (*text == '-') { + ++text; + sign = -1; + } else if (*text == '+') { + ++text; + } + // Parse the leading whole part as an integer. + intmax_t whole = svto_radix<10>(text); + parsed->assign(org_text.data(), text.data()); - if (text.empty()) { - return whole; - } + if (text.empty()) { + return whole; + } - double frac = 0.0; - if (*text == '.') { // fractional part. - ++text; - double scale = 0.1; - while (text && isdigit(*text)) { - frac += scale * (*text++ - '0'); - scale /= 10.0; + double frac = 0.0; + if (*text == '.') { // fractional part. + ++text; + double scale = 0.1; + while (text && isdigit(*text)) { + frac += scale * (*text++ - '0'); + scale /= 10.0; + } } - } - double exp = 1.0; - if (text.starts_with_nocase("e")) { - int exp_sign = 1; - ++text; - if (text) { - if (*text == '+') { - ++text; - } else if (*text == '-') { - ++text; - exp_sign = -1; + double exp = 1.0; + if (text.starts_with_nocase("e")) { + int exp_sign = 1; + ++text; + if (text) { + if (*text == '+') { + ++text; + } else if (*text == '-') { + ++text; + exp_sign = -1; + } } + auto exp_part = svto_radix<10>(text); + exp = pow10(exp_part * exp_sign); } - auto exp_part = svto_radix<10>(text); - exp = pow10(exp_part * exp_sign); - } - parsed->assign(org_text.data(), text.data()); - return sign * (whole + frac) * exp; -} + parsed->assign(org_text.data(), text.data()); + return sign * (whole + frac) * exp; + } -// Do the template instantiations. -template std::ostream &TextView::stream_write(std::ostream &, const TextView &) const; + // Do the template instantiations. + template std::ostream &TextView::stream_write(std::ostream &, const TextView &) const; -}} // namespace swoc::SWOC_VERSION_NS +} // namespace SWOC_VERSION_NS +} // namespace swoc -namespace std { +namespace std +{ ostream & -operator<<(ostream &os, const swoc::TextView &b) { +operator<<(ostream &os, const swoc::TextView &b) +{ if (os.good()) { b.stream_write(os, b); os.width(0); diff --git a/src/tscore/CMakeLists.txt b/src/tscore/CMakeLists.txt index 7790adc87dd..dd26806d00e 100644 --- a/src/tscore/CMakeLists.txt +++ b/src/tscore/CMakeLists.txt @@ -15,13 +15,79 @@ # ####################### -# This executable generates the parse rules that are included in ParseRules.cc -# Add custom commands and dependencies to make sure this gets build and run before compiling libtscore -add_executable(CompileParseRules CompileParseRules.cc) +# HOLD : BUILD_BYPRODUCTS "${CMAKE_CURRENT_BINARY_DIR}/host_tools/CompileParseRules${CMAKE_EXECUTABLE_SUFFIX}" +set(CPR_EXE "") +set(CPR_DEP "") + +find_program(HOST_C_COMPILER NAMES clang) +find_program(HOST_CXX_COMPILER NAMES clang++) + +if(NOT HOST_C_COMPILER OR NOT HOST_CXX_COMPILER) + message(FATAL_ERROR "Could not find host native compiler (cc/c++) needed for cross-compilation tools.") +endif() + +message(STATUS "Debug Environment Varibles:") +message(STATUS " CMake Cross Compiling: ${CMAKE_CROSSCOMPILING}") +message(STATUS " Host C Compiler: ${HOST_C_COMPILER}") +message(STATUS " Host CXX Compiler: ${HOST_CXX_COMPILER}") +message(STATUS " Include Path: ${DCMAKE_INCLUDE_PATH}") +message(STATUS " Library Path: ${DCMAKE_LIBRARY_PATH}") +message(STATUS " Prefix Path: ${DCMAKE_PREFIX_PATH}") + +if(CMAKE_CROSSCOMPILING) + message(STATUS "Cross-compiling: Enabling host-native build for CompileParseRules.") + + include(ExternalProject) + + externalproject_add( + CompileParseRulesHost + SOURCE_DIR "${CMAKE_SOURCE_DIR}" + BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/host_tools" + INSTALL_COMMAND "" + # cmake-format: off + CONFIGURE_COMMAND ${CMAKE_COMMAND} -E env + PKG_CONFIG_PATH= + # Required for luajit and libpcre2-8 + PKG_CONFIG_LIBDIR=/usr/lib/x86_64-linux-gnu/pkgconfig:/usr/share/pkgconfig + ${CMAKE_COMMAND} + -DCMAKE_CROSSCOMPILING=FALSE + -DCMAKE_C_COMPILER=${HOST_C_COMPILER} + -DCMAKE_CXX_COMPILER=${HOST_CXX_COMPILER} + -DCMAKE_C_FLAGS= + -DCMAKE_CXX_FLAGS= + -DCMAKE_FIND_ROOT_PATH= + -DCMAKE_INCLUDE_PATH= + -DCMAKE_LIBRARY_PATH= + -DCMAKE_PREFIX_PATH= + -DCMAKE_TOOLCHAIN_FILE= + -DCMAKE_SYSTEM_NAME=${CMAKE_HOST_SYSTEM_NAME} + -DENABLE_CLANG_TIDY=OFF + -G ${CMAKE_GENERATOR} + -S ${CMAKE_SOURCE_DIR} + -B ${CMAKE_CURRENT_BINARY_DIR}/host_tools + # cmake-format: on + BUILD_BYPRODUCTS "${CMAKE_CURRENT_BINARY_DIR}/host_tools/src/tscore/CompileParseRules${CMAKE_EXECUTABLE_SUFFIX}" + BUILD_COMMAND ${CMAKE_COMMAND} --build . --target CompileParseRules CONFIGURE_HANDLED_BY_BUILD TRUE + ) + + set(CPR_EXE "${CMAKE_CURRENT_BINARY_DIR}/host_tools/src/tscore/CompileParseRules${CMAKE_EXECUTABLE_SUFFIX}") + set(CPR_DEP CompileParseRulesHost) + +else() + message(STATUS "Enabling native build for CompileParseRules.") + + # This executable generates the parse rules that are included in ParseRules.cc + # Add custom commands and dependencies to make sure this gets build and run before compiling libtscore + add_executable(CompileParseRules CompileParseRules.cc) + + set(CPR_EXE "$") + set(CPR_DEP CompileParseRules) +endif() add_custom_command( OUTPUT ParseRulesCType ParseRulesCTypeToUpper ParseRulesCTypeToLower - COMMAND ${CMAKE_COMMAND} -E env ASAN_OPTIONS=detect_leaks=0 $ + COMMAND ${CMAKE_COMMAND} -E env ASAN_OPTIONS=detect_leaks=0 ${CPR_EXE} + DEPENDS ${CPR_DEP} ${CMAKE_CURRENT_SOURCE_DIR}/CompileParseRules.cc COMMENT "Generating compile parse rules" ) diff --git a/src/tscore/CompileParseRules.cc b/src/tscore/CompileParseRules.cc index 52a7be7ec78..531170a15b8 100644 --- a/src/tscore/CompileParseRules.cc +++ b/src/tscore/CompileParseRules.cc @@ -126,7 +126,7 @@ uint_to_binary(uint32_t u) { std::string buf(32, '0'); for (uint8_t i = 0; i < 32; i++) { - if (u & (1 << (31 - i))) { + if (u & (1U << (31 - i))) { buf[i] = '1'; } } diff --git a/src/tscore/ParseRules.cc b/src/tscore/ParseRules.cc index 65233c34540..ceef504e3ec 100644 --- a/src/tscore/ParseRules.cc +++ b/src/tscore/ParseRules.cc @@ -1,43 +1,19 @@ -/** @file - - A brief file description - - @section license License - - Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - */ - -/*************************** -*- Mod: C++ -*- ****************************** - - ParseRules.h -- - - - ****************************************************************************/ - -#include "tscore/ink_platform.h" #include "tscore/ParseRules.h" +/* +// External lookup tables (generated by CompileParseRules.cc) +extern const uint32_t parseRulesCType[256]; +extern const uint8_t parseRulesCTypeToUpper[256]; +extern const uint8_t parseRulesCTypeToLower[256]; +*/ + const unsigned int parseRulesCType[256] = { #include "ParseRulesCType" }; -const char parseRulesCTypeToUpper[256] = { +const uint8_t parseRulesCTypeToUpper[256] = { #include "ParseRulesCTypeToUpper" }; -const char parseRulesCTypeToLower[256] = { +const uint8_t parseRulesCTypeToLower[256] = { #include "ParseRulesCTypeToLower" }; From de9b5b3376257d3b55de7a322f251afc98ed8fc7 Mon Sep 17 00:00:00 2001 From: Graham Sedman Date: Sun, 24 May 2026 21:35:48 +0100 Subject: [PATCH 3/6] fix: use unsigned char for byte values to avoid implementation-defined behaviour - Change `char cc` to `unsigned char cc` to ensure consistent handling of byte values (0-255) - `char` can be signed or unsigned depending on the platform, causing implementation-defined behaviour for values > 127 - Using `unsigned char` guarantees correct interpretation of all byte values --- src/tscore/CompileParseRules.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tscore/CompileParseRules.cc b/src/tscore/CompileParseRules.cc index 531170a15b8..e883b68ca79 100644 --- a/src/tscore/CompileParseRules.cc +++ b/src/tscore/CompileParseRules.cc @@ -204,7 +204,7 @@ main() tparseRulesCType[i] |= is_hex_BIT; } - char cc = static_cast(i); + unsigned char cc = static_cast(i); if (ParseRules::is_pchar(&cc)) { tparseRulesCType[i] |= is_pchar_BIT; From 06e530faaad787f20ff8e36e193c123a905706ee Mon Sep 17 00:00:00 2001 From: Graham Sedman Date: Sun, 24 May 2026 20:25:54 +0100 Subject: [PATCH 4/6] refactor: adopt Mozilla style guide and add file error handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Rename functions to PascalCase (uint_to_binary → UintToBinary) - Add `g` prefix to global variables (example: parseRulesCType → gParseRulesCType) - Rename ParseRules classification functions and constants to PascalCase (is_* → Is*, *_BIT → IS_*) - Add file existence checks before writing with error messages to stderr - Add `#include ` for std::cerr - Rename `fp` to `outputFile` for improved clarity - Change `char` to `unsigned char` for byte value handling - Update all Doxygen comments to reflect new naming conventions --- src/tscore/CompileParseRules.cc | 239 +++++++++++++++++--------------- 1 file changed, 126 insertions(+), 113 deletions(-) diff --git a/src/tscore/CompileParseRules.cc b/src/tscore/CompileParseRules.cc index e883b68ca79..a8d38591bf3 100644 --- a/src/tscore/CompileParseRules.cc +++ b/src/tscore/CompileParseRules.cc @@ -24,9 +24,9 @@ * It is executed during the build process and is **not part of the runtime library**. * * The generated tables are written to the following files: - * - @c ParseRulesCType: Bitmask of character type flags (32-bit values). - * - @c ParseRulesCTypeToUpper: Uppercase conversion table (uint8_t values). - * - @c ParseRulesCTypeToLower: Lowercase conversion table (uint8_t values). + * - @c gParseRulesCType: Bitmask of character type flags (32-bit values). + * - @c gParseRulesCTypeToUpper: Uppercase conversion table (uint8_t values). + * - @c gParseRulesCTypeToLower: Lowercase conversion table (uint8_t values). * * These files are typically included as static data in @c ParseRules.cc. * @@ -44,6 +44,7 @@ #include #include #include +#include #include #include "tscore/ParseRules.h" @@ -52,61 +53,61 @@ * @brief Placeholder for the character type bitmask table. * * This array is initialized to zero and serves as a template for the generated - * @c ParseRulesCType table. It is **not used at runtime**. + * @c gParseRulesCType table. It is **not used at runtime**. * * @note The actual populated table is generated by @c main() and written to - * the @c ParseRulesCType file. + * the @c gParseRulesCType file. */ -const uint32_t parseRulesCType[256] = {0}; +const uint32_t gParseRulesCType[256] = {0}; /** * @brief Placeholder for the uppercase conversion table. * * This array is initialized to zero and serves as a template for the generated - * @c ParseRulesCTypeToUpper table. It is **not used at runtime**. + * @c gParseRulesCTypeToUpper table. It is **not used at runtime**. * * @note The actual populated table is generated by @c main() and written to - * the @c ParseRulesCTypeToUpper file. + * the @c gParseRulesCTypeToUpper file. */ -const uint8_t parseRulesCTypeToUpper[256] = {0}; +const uint8_t gParseRulesCTypeToUpper[256] = {0}; /** * @brief Placeholder for the lowercase conversion table. * * This array is initialized to zero and serves as a template for the generated - * @c ParseRulesCTypeToLower table. It is **not used at runtime**. + * @c gParseRulesCTypeToLower table. It is **not used at runtime**. * * @note The actual populated table is generated by @c main() and written to - * the @c ParseRulesCTypeToLower file. + * the @c gParseRulesCTypeToLower file. */ -const uint8_t parseRulesCTypeToLower[256] = {0}; +const uint8_t gParseRulesCTypeToLower[256] = {0}; /** * @brief Working array for character type bitmasks. * * Temporary storage for computed character classification bitmasks. - * Populated by @c main() and written to the @c ParseRulesCType output file. + * Populated by @c main() and written to the @c gParseRulesCType output file. * * @note Each entry is a 32-bit bitmask where each bit represents a character * classification (e.g., @c is_alpha_BIT, @c is_digit_BIT). */ -uint32_t tparseRulesCType[256]; +uint32_t gTparseRulesCType[256]; /** * @brief Working array for uppercase character conversion. * * Temporary storage for uppercase character mappings (0-255). - * Populated by @c main() and written to the @c ParseRulesCTypeToUpper output file. + * Populated by @c main() and written to the @c gParseRulesCTypeToUpper output file. */ -uint8_t tparseRulesCTypeToUpper[256]; +uint8_t gTparseRulesCTypeToUpper[256]; /** * @brief Working array for lowercase character conversion. * * Temporary storage for lowercase character mappings (0-255). - * Populated by @c main() and written to the @c ParseRulesCTypeToLower output file. + * Populated by @c main() and written to the @c gParseRulesCTypeToLower output file. */ -uint8_t tparseRulesCTypeToLower[256]; +uint8_t gTparseRulesCTypeToLower[256]; /** * @brief Convert a 32-bit unsigned integer to its binary string representation. @@ -116,13 +117,13 @@ uint8_t tparseRulesCTypeToLower[256]; * * @param u The 32-bit unsigned integer to convert. * @return @c std::string containing the 32-character binary representation. - * Example: @c uint_to_binary(5) returns @c "00000000000000000000000000000101". + * Example: @c UintToBinary(5) returns @c "00000000000000000000000000000101". * * @note This function is **thread-safe** because it returns a new @c std::string * by value (no shared static buffer). */ std::string -uint_to_binary(uint32_t u) +UintToBinary(uint32_t u) { std::string buf(32, '0'); for (uint8_t i = 0; i < 32; i++) { @@ -139,34 +140,34 @@ uint_to_binary(uint32_t u) * This function performs the following steps: * * 1. For each ASCII character (0-255): - * - Initializes the working arrays (@c tparseRulesCType, @c tparseRulesCTypeToUpper, - * @c tparseRulesCTypeToLower). + * - Initializes the working arrays (@c gTparseRulesCType, @c gTparseRulesCTypeToUpper, + * @c gTparseRulesCTypeToLower). * - Tests the character against all @c ParseRules classification functions. - * - Sets the corresponding bit in @c tparseRulesCType[i] for each matching classification. - * - Stores the uppercase/lowercase conversion values in @c tparseRulesCTypeToUpper - * and @c tparseRulesCTypeToLower. + * - Sets the corresponding bit in @c gTparseRulesCType[i] for each matching classification. + * - Stores the uppercase/lowercase conversion values in @c gTparseRulesCTypeToUpper + * and @c gTparseRulesCTypeToLower. * * 2. Writes three output files using @c std::ofstream (RAII): - * - @c ParseRulesCType: Contains hexadecimal bitmask values and their binary + * - @c gParseRulesCType: Contains hexadecimal bitmask values and their binary * representations. Format: C-style array initialization. - * - @c ParseRulesCTypeToUpper: Contains uppercase conversion values for each + * - @c gParseRulesCTypeToUpper: Contains uppercase conversion values for each * character. Format: @c (uint8_t)X, - * - @c ParseRulesCTypeToLower: Contains lowercase conversion values for each + * - @c gParseRulesCTypeToLower: Contains lowercase conversion values for each * character. Format: @c (uint8_t)X, * * @return 0 on successful completion. * * @note The classification functions from @c ParseRules.h include: - * - **Character types**: @c is_char, @c is_alpha, @c is_digit, @c is_alnum, - * @c is_ctl, @c is_ws, @c is_hex, @c is_pchar, @c is_token, @c is_uri, - * @c is_sep, @c is_empty. - * - **Case types**: @c is_upalpha, @c is_loalpha. - * - **Safety/encoding**: @c is_safe, @c is_unsafe, @c is_reserved, - * @c is_unreserved, @c is_national. - * - **Special categories**: @c is_punct, @c is_tspecials, @c is_end_of_url. - * - **Whitespace variants**: @c is_spcr, @c is_splf, @c is_wslfcr, @c is_eow. - * - **HTTP/MIME**: @c is_http_field_name, @c is_http_field_value, - * @c is_mime_sep, @c is_control. + * - **Character types**: @c IsChar, @c IsAlpha, @c IsDigit, @c IsAlnum, + * @c IsCtl, @c IsWs, @c IsHex, @c IsPchar, @c IsToken, @c IsUri, + * @c IsSep, @c IsEmpty. + * - **Case types**: @c IsUpalpha, @c IsLoalpha. + * - **Safety/encoding**: @c IsSafe, @c IsUnsafe, @c IsReserved, + * @c IsUnreserved, @c IsNational. + * - **Special categories**: @c IsPunct, @c IsTspecials, @c IsEndOfUrl. + * - **Whitespace variants**: @c IsSpcr, @c IsSplf, @c IsWslfcr, @c IsEow. + * - **HTTP/MIME**: @c IsHttpFieldName, @c IsHttpFieldValue, + * @c IsMimeSep, @c IsControl. * * @see ParseRules.h for detailed descriptions of each classification function. * @see ParseRules.cc for runtime usage of the generated tables. @@ -175,132 +176,144 @@ int main() { for (uint16_t i = 0; i < 256; i++) { - tparseRulesCType[i] = 0; - tparseRulesCTypeToLower[i] = static_cast(ParseRules::ink_tolower(i)); - tparseRulesCTypeToUpper[i] = static_cast(ParseRules::ink_toupper(i)); + gTparseRulesCType[i] = 0; + gTparseRulesCTypeToLower[i] = static_cast(ParseRules::ink_tolower(i)); + gTparseRulesCTypeToUpper[i] = static_cast(ParseRules::ink_toupper(i)); - if (ParseRules::is_char(i)) { - tparseRulesCType[i] |= is_char_BIT; + if (ParseRules::IsChar(index)) { + gTparseRulesCType[index] |= IS_CHAR_BIT; } - if (ParseRules::is_upalpha(i)) { - tparseRulesCType[i] |= is_upalpha_BIT; + if (ParseRules::IsUpalpha(index)) { + gTparseRulesCType[index] |= IS_UPALPHA_BIT; } - if (ParseRules::is_loalpha(i)) { - tparseRulesCType[i] |= is_loalpha_BIT; + if (ParseRules::IsLoalpha(index)) { + gTparseRulesCType[index] |= IS_LOALPHA_BIT; } - if (ParseRules::is_alpha(i)) { - tparseRulesCType[i] |= is_alpha_BIT; + if (ParseRules::IsAlpha(index)) { + gTparseRulesCType[index] |= IS_ALPHA_BIT; } - if (ParseRules::is_digit(i)) { - tparseRulesCType[i] |= is_digit_BIT; + if (ParseRules::IsDigit(index)) { + gTparseRulesCType[index] |= IS_DIGIT_BIT; } - if (ParseRules::is_ctl(i)) { - tparseRulesCType[i] |= is_ctl_BIT; + if (ParseRules::IsCtl(index)) { + gTparseRulesCType[index] |= IS_CTL_BIT; } - if (ParseRules::is_ws(i)) { - tparseRulesCType[i] |= is_ws_BIT; + if (ParseRules::IsWs(index)) { + gTparseRulesCType[index] |= IS_WS_BIT; } - if (ParseRules::is_hex(i)) { - tparseRulesCType[i] |= is_hex_BIT; + if (ParseRules::IsHex(index)) { + gTparseRulesCType[index] |= IS_HEX_BIT; } unsigned char cc = static_cast(i); - if (ParseRules::is_pchar(&cc)) { - tparseRulesCType[i] |= is_pchar_BIT; + if (ParseRules::IsPchar(¤tChar)) { + gTparseRulesCType[index] |= IS_PCHAR_BIT; } - if (ParseRules::is_extra(i)) { - tparseRulesCType[i] |= is_extra_BIT; + if (ParseRules::IsExtra(index)) { + gTparseRulesCType[index] |= IS_EXTRA_BIT; } - if (ParseRules::is_safe(i)) { - tparseRulesCType[i] |= is_safe_BIT; + if (ParseRules::IsSafe(index)) { + gTparseRulesCType[index] |= IS_SAFE_BIT; } - if (ParseRules::is_unsafe(i)) { - tparseRulesCType[i] |= is_unsafe_BIT; + if (ParseRules::IsUnsafe(index)) { + gTparseRulesCType[index] |= IS_UNSAFE_BIT; } - if (ParseRules::is_national(i)) { - tparseRulesCType[i] |= is_national_BIT; + if (ParseRules::IsNational(index)) { + gTparseRulesCType[index] |= IS_NATIONAL_BIT; } - if (ParseRules::is_reserved(i)) { - tparseRulesCType[i] |= is_reserved_BIT; + if (ParseRules::IsReserved(index)) { + gTparseRulesCType[index] |= IS_RESERVED_BIT; } - if (ParseRules::is_unreserved(i)) { - tparseRulesCType[i] |= is_unreserved_BIT; + if (ParseRules::IsUnreserved(index)) { + gTparseRulesCType[index] |= IS_UNRESERVED_BIT; } - if (ParseRules::is_punct(i)) { - tparseRulesCType[i] |= is_punct_BIT; + if (ParseRules::IsPunct(index)) { + gTparseRulesCType[index] |= IS_PUNCT_BIT; } - if (ParseRules::is_end_of_url(i)) { - tparseRulesCType[i] |= is_end_of_url_BIT; + if (ParseRules::IsEndOfUrl(index)) { + gTparseRulesCType[index] |= IS_END_OF_URL_BIT; } - if (ParseRules::is_tspecials(i)) { - tparseRulesCType[i] |= is_tspecials_BIT; + if (ParseRules::IsTspecials(index)) { + gTparseRulesCType[index] |= IS_TSPECIALS_BIT; } - if (ParseRules::is_spcr(i)) { - tparseRulesCType[i] |= is_spcr_BIT; + if (ParseRules::IsSpcr(index)) { + gTparseRulesCType[index] |= IS_SPCR_BIT; } - if (ParseRules::is_splf(i)) { - tparseRulesCType[i] |= is_splf_BIT; + if (ParseRules::IsSplf(index)) { + gTparseRulesCType[index] |= IS_SPLF_BIT; } - if (ParseRules::is_wslfcr(i)) { - tparseRulesCType[i] |= is_wslfcr_BIT; + if (ParseRules::IsWslfcr(index)) { + gTparseRulesCType[index] |= IS_WSLFCR_BIT; } - if (ParseRules::is_eow(i)) { - tparseRulesCType[i] |= is_eow_BIT; + if (ParseRules::IsEow(index)) { + gTparseRulesCType[index] |= IS_EOW_BIT; } - if (ParseRules::is_token(i)) { - tparseRulesCType[i] |= is_token_BIT; + if (ParseRules::IsToken(index)) { + gTparseRulesCType[index] |= IS_TOKEN_BIT; } - if (ParseRules::is_uri(i)) { - tparseRulesCType[i] |= is_uri_BIT; + if (ParseRules::IsUri(index)) { + gTparseRulesCType[index] |= IS_URI_BIT; } - if (ParseRules::is_sep(i)) { - tparseRulesCType[i] |= is_sep_BIT; + if (ParseRules::IsSep(index)) { + gTparseRulesCType[index] |= IS_SEP_BIT; } - if (ParseRules::is_empty(i)) { - tparseRulesCType[i] |= is_empty_BIT; + if (ParseRules::IsEmpty(index)) { + gTparseRulesCType[index] |= IS_EMPTY_BIT; } - if (ParseRules::is_alnum(i)) { - tparseRulesCType[i] |= is_alnum_BIT; + if (ParseRules::IsAlnum(index)) { + gTparseRulesCType[index] |= IS_ALNUM_BIT; } - if (ParseRules::is_space(i)) { - tparseRulesCType[i] |= is_space_BIT; + if (ParseRules::IsSpace(index)) { + gTparseRulesCType[index] |= IS_SPACE_BIT; } - if (ParseRules::is_control(i)) { - tparseRulesCType[i] |= is_control_BIT; + if (ParseRules::IsControl(index)) { + gTparseRulesCType[index] |= IS_CONTROL_BIT; } - if (ParseRules::is_mime_sep(i)) { - tparseRulesCType[i] |= is_mime_sep_BIT; + if (ParseRules::IsMimeSep(index)) { + gTparseRulesCType[index] |= IS_MIME_SEP_BIT; } - if (ParseRules::is_http_field_name(i)) { - tparseRulesCType[i] |= is_http_field_name_BIT; + if (ParseRules::IsHttpFieldName(index)) { + gTparseRulesCType[index] |= IS_HTTP_FIELD_NAME_BIT; } - if (ParseRules::is_http_field_value(i)) { - tparseRulesCType[i] |= is_http_field_value_BIT; + if (ParseRules::IsHttpFieldValue(index)) { + gTparseRulesCType[index] |= IS_HTTP_FIELD_VALUE_BIT; } } // Write ParseRulesCType (bitmask table with binary representation) - std::ofstream fp("ParseRulesCType"); + std::ofstream outputFile("ParseRulesCType"); + if (!outputFile) { + std::cerr << "Error: Failed to open ParseRulesCType for writing." << std::endl; + return 1; + } for (uint16_t i = 0; i < 256; ++i) { - fp << "/* " << std::setw(3) << i << " (" << (isprint(i) ? static_cast(i) : '?') << ") */\t"; - fp << "0x" << std::hex << std::setw(8) << std::setfill('0') << tparseRulesCType[i] << (i != 255 ? ",\t\t" : "\t\t"); - fp << "/* [" << uint_to_binary(tparseRulesCType[i]) << "] */\n"; + outputFile << "/* " << std::setw(3) << i << " (" << (isprint(i) ? static_cast(i) : '?') << ") */\t"; + outputFile << "0x" << std::hex << std::setw(8) << std::setfill('0') << gTparseRulesCType[i] << (i != 255 ? ",\t\t" : "\t\t"); + outputFile << "/* [" << uint_to_binary(gTparseRulesCType[i]) << "] */\n"; } // Write ParseRulesCTypeToUpper (uppercase conversion table) { - std::ofstream fp("ParseRulesCTypeToUpper"); + std::ofstream outputFile("ParseRulesCTypeToUpper"); + if (!outputFile) { + std::cerr << "Error: Failed to open ParseRulesCTypeToUpper for writing." << std::endl; + return 1; + } for (uint16_t i = 0; i < 256; ++i) { - fp << "(uint8_t)" << static_cast(tparseRulesCTypeToUpper[i]) << (i != 255 ? ',' : ' ') << '\n'; + outputFile << "(uint8_t)" << static_cast(gTparseRulesCTypeToUpper[i]) << (i != 255 ? ',' : ' ') << '\n'; } } // Write ParseRulesCTypeToLower (lowercase conversion table) { - std::ofstream fp("ParseRulesCTypeToLower"); + std::ofstream outputFile("ParseRulesCTypeToLower"); + if (!outputFile) { + std::cerr << "Error: Failed to open ParseRulesCTypeToLower for writing." << std::endl; + return 1; + } for (uint16_t i = 0; i < 256; ++i) { - fp << "(uint8_t)" << static_cast(tparseRulesCTypeToLower[i]) << (i != 255 ? ',' : ' ') << '\n'; + fp << "(uint8_t)" << static_cast(gTparseRulesCTypeToLower[i]) << (i != 255 ? ',' : ' ') << '\n'; } } From dce8c6c65e685148e766ba1142d6fe09480476b1 Mon Sep 17 00:00:00 2001 From: Graham Sedman Date: Sun, 24 May 2026 22:37:02 +0100 Subject: [PATCH 5/6] fix: correct inconsistent variable names and use PascalCase functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Replace `index` with `i` to match loop variable - Replace `currentChar` with `cc` to match variable declaration - Use PascalCase for ParseRules functions (ink_tolower → InkTolower, ink_toupper → InkToupper) - Use PascalCase for UintToBinary function - Replace `fp` with `outputFile` in last file writing block for consistency --- src/tscore/CompileParseRules.cc | 136 ++++++++++++++++---------------- 1 file changed, 68 insertions(+), 68 deletions(-) diff --git a/src/tscore/CompileParseRules.cc b/src/tscore/CompileParseRules.cc index a8d38591bf3..a0a6f9d13aa 100644 --- a/src/tscore/CompileParseRules.cc +++ b/src/tscore/CompileParseRules.cc @@ -177,107 +177,107 @@ main() { for (uint16_t i = 0; i < 256; i++) { gTparseRulesCType[i] = 0; - gTparseRulesCTypeToLower[i] = static_cast(ParseRules::ink_tolower(i)); - gTparseRulesCTypeToUpper[i] = static_cast(ParseRules::ink_toupper(i)); + gTparseRulesCTypeToLower[i] = static_cast(ParseRules::InkTolower(i)); + gTparseRulesCTypeToUpper[i] = static_cast(ParseRules::InkToupper(i)); - if (ParseRules::IsChar(index)) { - gTparseRulesCType[index] |= IS_CHAR_BIT; + if (ParseRules::IsChar(i)) { + gTparseRulesCType[i] |= IS_CHAR_BIT; } - if (ParseRules::IsUpalpha(index)) { - gTparseRulesCType[index] |= IS_UPALPHA_BIT; + if (ParseRules::IsUpalpha(i)) { + gTparseRulesCType[i] |= IS_UPALPHA_BIT; } - if (ParseRules::IsLoalpha(index)) { - gTparseRulesCType[index] |= IS_LOALPHA_BIT; + if (ParseRules::IsLoalpha(i)) { + gTparseRulesCType[i] |= IS_LOALPHA_BIT; } - if (ParseRules::IsAlpha(index)) { - gTparseRulesCType[index] |= IS_ALPHA_BIT; + if (ParseRules::IsAlpha(i)) { + gTparseRulesCType[i] |= IS_ALPHA_BIT; } - if (ParseRules::IsDigit(index)) { - gTparseRulesCType[index] |= IS_DIGIT_BIT; + if (ParseRules::IsDigit(i)) { + gTparseRulesCType[i] |= IS_DIGIT_BIT; } - if (ParseRules::IsCtl(index)) { - gTparseRulesCType[index] |= IS_CTL_BIT; + if (ParseRules::IsCtl(i)) { + gTparseRulesCType[i] |= IS_CTL_BIT; } - if (ParseRules::IsWs(index)) { - gTparseRulesCType[index] |= IS_WS_BIT; + if (ParseRules::IsWs(i)) { + gTparseRulesCType[i] |= IS_WS_BIT; } - if (ParseRules::IsHex(index)) { - gTparseRulesCType[index] |= IS_HEX_BIT; + if (ParseRules::IsHex(i)) { + gTparseRulesCType[i] |= IS_HEX_BIT; } unsigned char cc = static_cast(i); - if (ParseRules::IsPchar(¤tChar)) { - gTparseRulesCType[index] |= IS_PCHAR_BIT; + if (ParseRules::IsPchar(&cc)) { + gTparseRulesCType[i] |= IS_PCHAR_BIT; } - if (ParseRules::IsExtra(index)) { - gTparseRulesCType[index] |= IS_EXTRA_BIT; + if (ParseRules::IsExtra(i)) { + gTparseRulesCType[i] |= IS_EXTRA_BIT; } - if (ParseRules::IsSafe(index)) { - gTparseRulesCType[index] |= IS_SAFE_BIT; + if (ParseRules::IsSafe(i)) { + gTparseRulesCType[i] |= IS_SAFE_BIT; } - if (ParseRules::IsUnsafe(index)) { - gTparseRulesCType[index] |= IS_UNSAFE_BIT; + if (ParseRules::IsUnsafe(i)) { + gTparseRulesCType[i] |= IS_UNSAFE_BIT; } - if (ParseRules::IsNational(index)) { - gTparseRulesCType[index] |= IS_NATIONAL_BIT; + if (ParseRules::IsNational(i)) { + gTparseRulesCType[i] |= IS_NATIONAL_BIT; } - if (ParseRules::IsReserved(index)) { - gTparseRulesCType[index] |= IS_RESERVED_BIT; + if (ParseRules::IsReserved(i)) { + gTparseRulesCType[i] |= IS_RESERVED_BIT; } - if (ParseRules::IsUnreserved(index)) { - gTparseRulesCType[index] |= IS_UNRESERVED_BIT; + if (ParseRules::IsUnreserved(i)) { + gTparseRulesCType[i] |= IS_UNRESERVED_BIT; } - if (ParseRules::IsPunct(index)) { - gTparseRulesCType[index] |= IS_PUNCT_BIT; + if (ParseRules::IsPunct(i)) { + gTparseRulesCType[i] |= IS_PUNCT_BIT; } - if (ParseRules::IsEndOfUrl(index)) { - gTparseRulesCType[index] |= IS_END_OF_URL_BIT; + if (ParseRules::IsEndOfUrl(i)) { + gTparseRulesCType[i] |= IS_END_OF_URL_BIT; } - if (ParseRules::IsTspecials(index)) { - gTparseRulesCType[index] |= IS_TSPECIALS_BIT; + if (ParseRules::IsTspecials(i)) { + gTparseRulesCType[i] |= IS_TSPECIALS_BIT; } - if (ParseRules::IsSpcr(index)) { - gTparseRulesCType[index] |= IS_SPCR_BIT; + if (ParseRules::IsSpcr(i)) { + gTparseRulesCType[i] |= IS_SPCR_BIT; } - if (ParseRules::IsSplf(index)) { - gTparseRulesCType[index] |= IS_SPLF_BIT; + if (ParseRules::IsSplf(i)) { + gTparseRulesCType[i] |= IS_SPLF_BIT; } - if (ParseRules::IsWslfcr(index)) { - gTparseRulesCType[index] |= IS_WSLFCR_BIT; + if (ParseRules::IsWslfcr(i)) { + gTparseRulesCType[i] |= IS_WSLFCR_BIT; } - if (ParseRules::IsEow(index)) { - gTparseRulesCType[index] |= IS_EOW_BIT; + if (ParseRules::IsEow(i)) { + gTparseRulesCType[i] |= IS_EOW_BIT; } - if (ParseRules::IsToken(index)) { - gTparseRulesCType[index] |= IS_TOKEN_BIT; + if (ParseRules::IsToken(i)) { + gTparseRulesCType[i] |= IS_TOKEN_BIT; } - if (ParseRules::IsUri(index)) { - gTparseRulesCType[index] |= IS_URI_BIT; + if (ParseRules::IsUri(i)) { + gTparseRulesCType[i] |= IS_URI_BIT; } - if (ParseRules::IsSep(index)) { - gTparseRulesCType[index] |= IS_SEP_BIT; + if (ParseRules::IsSep(i)) { + gTparseRulesCType[i] |= IS_SEP_BIT; } - if (ParseRules::IsEmpty(index)) { - gTparseRulesCType[index] |= IS_EMPTY_BIT; + if (ParseRules::IsEmpty(i)) { + gTparseRulesCType[i] |= IS_EMPTY_BIT; } - if (ParseRules::IsAlnum(index)) { - gTparseRulesCType[index] |= IS_ALNUM_BIT; + if (ParseRules::IsAlnum(i)) { + gTparseRulesCType[i] |= IS_ALNUM_BIT; } - if (ParseRules::IsSpace(index)) { - gTparseRulesCType[index] |= IS_SPACE_BIT; + if (ParseRules::IsSpace(i)) { + gTparseRulesCType[i] |= IS_SPACE_BIT; } - if (ParseRules::IsControl(index)) { - gTparseRulesCType[index] |= IS_CONTROL_BIT; + if (ParseRules::IsControl(i)) { + gTparseRulesCType[i] |= IS_CONTROL_BIT; } - if (ParseRules::IsMimeSep(index)) { - gTparseRulesCType[index] |= IS_MIME_SEP_BIT; + if (ParseRules::IsMimeSep(i)) { + gTparseRulesCType[i] |= IS_MIME_SEP_BIT; } - if (ParseRules::IsHttpFieldName(index)) { - gTparseRulesCType[index] |= IS_HTTP_FIELD_NAME_BIT; + if (ParseRules::IsHttpFieldName(i)) { + gTparseRulesCType[i] |= IS_HTTP_FIELD_NAME_BIT; } - if (ParseRules::IsHttpFieldValue(index)) { - gTparseRulesCType[index] |= IS_HTTP_FIELD_VALUE_BIT; + if (ParseRules::IsHttpFieldValue(i)) { + gTparseRulesCType[i] |= IS_HTTP_FIELD_VALUE_BIT; } } @@ -290,7 +290,7 @@ main() for (uint16_t i = 0; i < 256; ++i) { outputFile << "/* " << std::setw(3) << i << " (" << (isprint(i) ? static_cast(i) : '?') << ") */\t"; outputFile << "0x" << std::hex << std::setw(8) << std::setfill('0') << gTparseRulesCType[i] << (i != 255 ? ",\t\t" : "\t\t"); - outputFile << "/* [" << uint_to_binary(gTparseRulesCType[i]) << "] */\n"; + outputFile << "/* [" << UintToBinary(gTparseRulesCType[i]) << "] */\n"; } // Write ParseRulesCTypeToUpper (uppercase conversion table) @@ -313,7 +313,7 @@ main() return 1; } for (uint16_t i = 0; i < 256; ++i) { - fp << "(uint8_t)" << static_cast(gTparseRulesCTypeToLower[i]) << (i != 255 ? ',' : ' ') << '\n'; + outputFile << "(uint8_t)" << static_cast(gTparseRulesCTypeToLower[i]) << (i != 255 ? ',' : ' ') << '\n'; } } From fbd054e2a48f894c48c3f8759a46ed67eb0b673d Mon Sep 17 00:00:00 2001 From: Graham Sedman Date: Mon, 25 May 2026 01:33:17 +0100 Subject: [PATCH 6/6] style: separate license from Doxygen comment block for Rat compliance style: separate license from Doxygen comment block for Rat compliance - Split file header into separate C-style comment block for license and Doxygen block for documentation - Required to pass Apache Release Audit Tool (Rat) - Fix formatting: indent Apache license URL for consistency - Fix formatting: remove extra space before "Fixed-width integer types" --- src/tscore/CompileParseRules.cc | 60 ++++++++++++--------------------- 1 file changed, 21 insertions(+), 39 deletions(-) diff --git a/src/tscore/CompileParseRules.cc b/src/tscore/CompileParseRules.cc index a0a6f9d13aa..c16fa429055 100644 --- a/src/tscore/CompileParseRules.cc +++ b/src/tscore/CompileParseRules.cc @@ -1,42 +1,24 @@ -/** - * @file CompileParseRules.cc - * - * @brief Build-time utility for generating ParseRules character classification tables. - * - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * This standalone C++ program generates static lookup tables used by the - * Traffic Server runtime for O(1) character classification and case conversion. - * It is executed during the build process and is **not part of the runtime library**. - * - * The generated tables are written to the following files: - * - @c gParseRulesCType: Bitmask of character type flags (32-bit values). - * - @c gParseRulesCTypeToUpper: Uppercase conversion table (uint8_t values). - * - @c gParseRulesCTypeToLower: Lowercase conversion table (uint8_t values). - * - * These files are typically included as static data in @c ParseRules.cc. - * - * @note This tool uses modern C++ features, including: - * - @c std::string for type-safe string handling. - * - @c std::ofstream for RAII-based file I/O. - * - Fixed-width integer types (@c uint8_t, @c uint32_t) for portability. - * - * @see ParseRules.h for character classification function declarations. - * @see ParseRules.cc for runtime usage of generated tables. +/** @file CompileParseRules.cc + + A brief file description + + @section license License + + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ #include