diff --git a/examples/readme_examples.cpp b/examples/readme_examples.cpp index 4d4b299..1686865 100644 --- a/examples/readme_examples.cpp +++ b/examples/readme_examples.cpp @@ -7,6 +7,7 @@ #include #include +#include #include #include #include @@ -230,6 +231,24 @@ std::basic_string transcode_to(std::basic_string const& inpu return input | to_utf | std::ranges::to>(); } +#if __cpp_lib_ranges_chunk >= 202202L +std::u8string parse_message_subset( + std::span message, std::size_t offset, std::size_t length) { + return std::span{message.begin() + offset, message.begin() + offset + length} + | std::views::chunk(2) + | std::views::transform( + [](const auto chunk) { + std::array a{}; + std::ranges::copy(chunk, a.begin()); + return std::bit_cast(a); + }) + | from_big_endian + | as_char16_t + | to_utf8 + | std::ranges::to(); +} +#endif + bool readme_examples() { using namespace std::string_view_literals; #ifndef _MSC_VER @@ -274,6 +293,16 @@ bool readme_examples() { if (transcode_to(u8"foo") != U"foo") { return false; } +#ifndef _MSC_VER // TODO: figure out why this test fails on MSVC +#if __cpp_lib_ranges_chunk >= 202202L + std::array message{ + std::byte{0x12}, std::byte{0xD8}, std::byte{0x3D}, std::byte{0xDE}, std::byte{0x42}, + std::byte{0x34}}; + if (!std::ranges::equal(u8"\xf0\x9f\x99\x82"sv, parse_message_subset(message, 1, 4))) { + return false; + } +#endif +#endif return true; } diff --git a/papers/P2728.md b/papers/P2728.md index 2a793fa..33cf948 100644 --- a/papers/P2728.md +++ b/papers/P2728.md @@ -397,6 +397,31 @@ void change_playing_card_suits() { } ``` +## Handling Byte Offsets and Endianness + +Say we want to handle a set of bytes in a message starting at offset N with length K that +is UTF16BE text: + +```cpp +std::u8string parse_message_subset( + std::span message, std::size_t offset, std::size_t length) { + return std::span{message.begin() + offset, message.begin() + offset + length} + | std::views::chunk(2) + | std::views::transform( + [](const auto chunk) { + std::array a{}; + std::ranges::copy(chunk, a.begin()); + return std::bit_cast(a); + }) + | std::views::from_big_endian + | std::views::as_char16_t + | std::views::to_utf8 + | std::ranges::to(); +} +``` + +Note that this depends on P4030R0 "Endian Views" for `std::views::from_big_endian`. + # Dependencies The code unit views depend on [@P3117R1] "Extending Conditionally Borrowed". @@ -1045,6 +1070,7 @@ gives back the original underlying view if it detects that it's reversing anothe ## Changes since R10 - Fix the wording around rejecting arrays for the code unit adaptors +- Add example for handling byte offsets and endianness ## Changes since R9