diff --git a/IO/PLY/CMakeLists.txt b/IO/PLY/CMakeLists.txt index dbe19bfc..b00ef82c 100644 --- a/IO/PLY/CMakeLists.txt +++ b/IO/PLY/CMakeLists.txt @@ -3,6 +3,15 @@ set(classes vtkPLYReader vtkPLYWriter) +# Vendored third-party fast PLY parser (MIT, Vilya Harvey) backing the fvtk +# binary-LE fast path in vtkPLYReader. Plain source, not a wrapped VTK class. +set(sources + miniply.cxx) +set(private_headers + miniply.h) + vtk_module_add_module(VTK::IOPLY - CLASSES ${classes}) + CLASSES ${classes} + SOURCES ${sources} + PRIVATE_HEADERS ${private_headers}) vtk_add_test_mangling(VTK::IOPLY) diff --git a/IO/PLY/Copyright.txt b/IO/PLY/Copyright.txt index 5d46e4a1..d8ac0db7 100644 --- a/IO/PLY/Copyright.txt +++ b/IO/PLY/Copyright.txt @@ -11,3 +11,12 @@ THE SOFTWARE IS PROVIDED "AS IS" AND WITHOUT WARRANTY OF ANY KIND, EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. =========================================================================*/ + +/*========================================================================= +The vendored fast PLY parser (miniply.h / miniply.cxx) is: + +MIT License, Copyright (c) 2019 Vilya Harvey + +It backs the fvtk binary little-endian fast path in vtkPLYReader. The full MIT +license text is reproduced at the top of miniply.h and miniply.cxx. +=========================================================================*/ diff --git a/IO/PLY/miniply.cxx b/IO/PLY/miniply.cxx new file mode 100644 index 00000000..f2f9b39a --- /dev/null +++ b/IO/PLY/miniply.cxx @@ -0,0 +1,2057 @@ +/* +MIT License + +Copyright (c) 2019 Vilya Harvey + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ + +#include "miniply.h" + +#include +#include +#include +#include +#include +#include +#include + +#ifndef _WIN32 +#include +#endif + + +namespace miniply { + + // + // Public constants + // + + // Standard PLY element names + const char* kPLYVertexElement = "vertex"; + const char* kPLYFaceElement = "face"; + + + // + // PLY constants + // + + static constexpr uint32_t kPLYReadBufferSize = 128 * 1024; + static constexpr uint32_t kPLYTempBufferSize = kPLYReadBufferSize; + + static const char* kPLYFileTypes[] = { "ascii", "binary_little_endian", "binary_big_endian", nullptr }; + static const uint32_t kPLYPropertySize[]= { 1, 1, 2, 2, 4, 4, 4, 8 }; + + struct PLYTypeAlias { + const char* name; + PLYPropertyType type; + }; + + static const PLYTypeAlias kTypeAliases[] = { + { "char", PLYPropertyType::Char }, + { "uchar", PLYPropertyType::UChar }, + { "short", PLYPropertyType::Short }, + { "ushort", PLYPropertyType::UShort }, + { "int", PLYPropertyType::Int }, + { "uint", PLYPropertyType::UInt }, + { "float", PLYPropertyType::Float }, + { "float32",PLYPropertyType::Float }, + { "float64",PLYPropertyType::Double }, + { "double", PLYPropertyType::Double }, + + { "uint8", PLYPropertyType::UChar }, + { "uint16", PLYPropertyType::UShort }, + { "uint32", PLYPropertyType::UInt }, + + { "int8", PLYPropertyType::Char }, + { "int16", PLYPropertyType::Short }, + { "int32", PLYPropertyType::Int }, + + { nullptr, PLYPropertyType::None } + }; + + + // + // Constants + // + + static constexpr double kDoubleDigits[10] = { 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0 }; + + static constexpr float kPi = 3.14159265358979323846f; + + + // + // Vec2 type + // + + struct Vec2 { + float x, y; + }; + + static inline Vec2 operator - (Vec2 lhs, Vec2 rhs) { return Vec2{ lhs.x - rhs.x, lhs.y - rhs.y }; } + + static inline float dot(Vec2 lhs, Vec2 rhs) { return lhs.x * rhs.x + lhs.y * rhs.y; } + static inline float length(Vec2 v) { return std::sqrt(dot(v, v)); } + static inline Vec2 normalize(Vec2 v) { float len = length(v); return Vec2{ v.x / len, v.y / len }; } + + + // + // Vec3 type + // + + struct Vec3 { + float x, y, z; + }; + + static inline Vec3 operator - (Vec3 lhs, Vec3 rhs) { return Vec3{ lhs.x - rhs.x, lhs.y - rhs.y, lhs.z - rhs.z }; } + + static inline float dot(Vec3 lhs, Vec3 rhs) { return lhs.x * rhs.x + lhs.y * rhs.y + lhs.z * rhs.z; } + static inline float length(Vec3 v) { return std::sqrt(dot(v, v)); } + static inline Vec3 normalize(Vec3 v) { float len = length(v); return Vec3{ v.x / len, v.y / len, v.z / len }; } + static inline Vec3 cross(Vec3 lhs, Vec3 rhs) { return Vec3{ lhs.y * rhs.z - lhs.z * rhs.y, lhs.z * rhs.x - lhs.x * rhs.z, lhs.x * rhs.y - lhs.y * rhs.x }; } + + + // + // Internal-only functions + // + + static inline bool is_whitespace(char ch) + { + return ch == ' ' || ch == '\t' || ch == '\r'; + } + + + static inline bool is_digit(char ch) + { + return ch >= '0' && ch <= '9'; + } + + + static inline bool is_letter(char ch) + { + ch |= 32; // upper and lower case letters differ only at this bit. + return ch >= 'a' && ch <= 'z'; + } + + + static inline bool is_alnum(char ch) + { + return is_digit(ch) || is_letter(ch); + } + + + static inline bool is_keyword_start(char ch) + { + return is_letter(ch) || ch == '_'; + } + + + static inline bool is_keyword_part(char ch) + { + return is_alnum(ch) || ch == '_'; + } + + + static inline bool is_safe_buffer_end(char ch) + { + return (ch > 0 && ch <= 32) || (ch >= 127); + } + + + static int file_open(FILE** f, const char* filename, const char* mode) + { + #ifdef _WIN32 + return fopen_s(f, filename, mode); + #else + *f = fopen(filename, mode); + return (*f != nullptr) ? 0 : errno; + #endif + } + + + static inline int file_seek(FILE* file, int64_t offset, int origin) + { + #ifdef _WIN32 + return _fseeki64(file, offset, origin); + #else + static_assert(sizeof(off_t) == sizeof(int64_t), "off_t is not 64 bits."); + return fseeko(file, offset, origin); + #endif + } + + + static bool int_literal(const char* start, char const** end, int* val) + { + const char* pos = start; + + bool negative = false; + if (*pos == '-') { + negative = true; + ++pos; + } + else if (*pos == '+') { + ++pos; + } + + bool hasLeadingZeroes = *pos == '0'; + if (hasLeadingZeroes) { + do { + ++pos; + } while (*pos == '0'); + } + + int numDigits = 0; + int localVal = 0; + while (is_digit(*pos)) { + // FIXME: this will overflow if we get too many digits. + localVal = localVal * 10 + static_cast(*pos - '0'); + ++numDigits; + ++pos; + } + + if (numDigits == 0 && hasLeadingZeroes) { + numDigits = 1; + } + + if (numDigits == 0 || is_letter(*pos) || *pos == '_') { + return false; + } + else if (numDigits > 10) { + // Overflow, literal value is larger than an int can hold. + // FIXME: this won't catch *all* cases of overflow, make it exact. + return false; + } + + if (val != nullptr) { + *val = negative ? -localVal : localVal; + } + if (end != nullptr) { + *end = pos; + } + return true; + } + + + static bool double_literal(const char* start, char const** end, double* val) + { + const char* pos = start; + + bool negative = false; + if (*pos == '-') { + negative = true; + ++pos; + } + else if (*pos == '+') { + ++pos; + } + + double localVal = 0.0; + + bool hasIntDigits = is_digit(*pos); + if (hasIntDigits) { + do { + localVal = localVal * 10.0 + kDoubleDigits[*pos - '0']; + ++pos; + } while (is_digit(*pos)); + } + else if (*pos != '.') { +// set_error("Not a floating point number"); + return false; + } + + bool hasFracDigits = false; + if (*pos == '.') { + ++pos; + hasFracDigits = is_digit(*pos); + if (hasFracDigits) { + double scale = 0.1; + do { + localVal += scale * kDoubleDigits[*pos - '0']; + scale *= 0.1; + ++pos; + } while (is_digit(*pos)); + } + else if (!hasIntDigits) { +// set_error("Floating point number has no digits before or after the decimal point"); + return false; + } + } + + bool hasExponent = *pos == 'e' || *pos == 'E'; + if (hasExponent) { + ++pos; + bool negativeExponent = false; + if (*pos == '-') { + negativeExponent = true; + ++pos; + } + else if (*pos == '+') { + ++pos; + } + + if (!is_digit(*pos)) { +// set_error("Floating point exponent has no digits"); + return false; // error: exponent part has no digits. + } + + double exponent = 0.0; + do { + exponent = exponent * 10.0 + kDoubleDigits[*pos - '0']; + ++pos; + } while (is_digit(*pos)); + + if (val != nullptr) { + if (negativeExponent) { + exponent = -exponent; + } + localVal *= std::pow(10.0, exponent); + } + } + + if (*pos == '.' || *pos == '_' || is_alnum(*pos)) { +// set_error("Floating point number has trailing chars"); + return false; + } + + if (negative) { + localVal = -localVal; + } + + if (val != nullptr) { + *val = localVal; + } + if (end != nullptr) { + *end = pos; + } + return true; + } + + + static bool float_literal(const char* start, char const** end, float* val) + { + double tmp = 0.0; + bool ok = double_literal(start, end, &tmp); + if (ok && val != nullptr) { + *val = static_cast(tmp); + } + return ok; + } + + + static inline void endian_swap_2(uint8_t* data) + { + uint16_t tmp = *reinterpret_cast(data); + tmp = static_cast((tmp >> 8) | (tmp << 8)); + *reinterpret_cast(data) = tmp; + } + + + static inline void endian_swap_4(uint8_t* data) + { + uint32_t tmp = *reinterpret_cast(data); + tmp = (tmp >> 16) | (tmp << 16); + tmp = ((tmp & 0xFF00FF00) >> 8) | ((tmp & 0x00FF00FF) << 8); + *reinterpret_cast(data) = tmp; + } + + + static inline void endian_swap_8(uint8_t* data) + { + uint64_t tmp = *reinterpret_cast(data); + tmp = (tmp >> 32) | (tmp << 32); + tmp = ((tmp & 0xFFFF0000FFFF0000) >> 16) | ((tmp & 0x0000FFFF0000FFFF) << 16); + tmp = ((tmp & 0xFF00FF00FF00FF00) >> 8) | ((tmp & 0x00FF00FF00FF00FF) << 8); + *reinterpret_cast(data) = tmp; + } + + + static inline void endian_swap(uint8_t* data, PLYPropertyType type) + { + switch (kPLYPropertySize[uint32_t(type)]) { + case 2: endian_swap_2(data); break; + case 4: endian_swap_4(data); break; + case 8: endian_swap_8(data); break; + default: break; + } + } + + + static inline void endian_swap_array(uint8_t* data, PLYPropertyType type, int n) + { + switch (kPLYPropertySize[uint32_t(type)]) { + case 2: + for (const uint8_t* end = data + 2 * n; data < end; data += 2) { + endian_swap_2(data); + } + break; + case 4: + for (const uint8_t* end = data + 4 * n; data < end; data += 4) { + endian_swap_4(data); + } + break; + case 8: + for (const uint8_t* end = data + 8 * n; data < end; data += 8) { + endian_swap_8(data); + } + break; + default: + break; + } + } + + + template + static void copy_and_convert_to(T* dest, const uint8_t* src, PLYPropertyType srcType) + { + switch (srcType) { + case PLYPropertyType::Char: *dest = static_cast(*reinterpret_cast(src)); break; + case PLYPropertyType::UChar: *dest = static_cast(*reinterpret_cast(src)); break; + case PLYPropertyType::Short: *dest = static_cast(*reinterpret_cast(src)); break; + case PLYPropertyType::UShort: *dest = static_cast(*reinterpret_cast(src)); break; + case PLYPropertyType::Int: *dest = static_cast(*reinterpret_cast(src)); break; + case PLYPropertyType::UInt: *dest = static_cast(*reinterpret_cast(src)); break; + case PLYPropertyType::Float: *dest = static_cast(*reinterpret_cast(src)); break; + case PLYPropertyType::Double: *dest = static_cast(*reinterpret_cast(src)); break; + case PLYPropertyType::None: break; + } + } + + + static void copy_and_convert(uint8_t* dest, PLYPropertyType destType, const uint8_t* src, PLYPropertyType srcType) + { + switch (destType) { + case PLYPropertyType::Char: copy_and_convert_to(reinterpret_cast (dest), src, srcType); break; + case PLYPropertyType::UChar: copy_and_convert_to(reinterpret_cast (dest), src, srcType); break; + case PLYPropertyType::Short: copy_and_convert_to(reinterpret_cast (dest), src, srcType); break; + case PLYPropertyType::UShort: copy_and_convert_to(reinterpret_cast(dest), src, srcType); break; + case PLYPropertyType::Int: copy_and_convert_to(reinterpret_cast (dest), src, srcType); break; + case PLYPropertyType::UInt: copy_and_convert_to(reinterpret_cast(dest), src, srcType); break; + case PLYPropertyType::Float: copy_and_convert_to(reinterpret_cast (dest), src, srcType); break; + case PLYPropertyType::Double: copy_and_convert_to(reinterpret_cast (dest), src, srcType); break; + case PLYPropertyType::None: break; + } + } + + + static inline bool compatible_types(PLYPropertyType srcType, PLYPropertyType destType) + { + return (srcType == destType) || + (srcType < PLYPropertyType::Float && (uint32_t(srcType) ^ 0x1) == uint32_t(destType)); + } + + + // + // PLYElement methods + // + + void PLYElement::calculate_offsets() + { + fixedSize = true; + for (PLYProperty& prop : properties) { + if (prop.countType != PLYPropertyType::None) { + fixedSize = false; + break; + } + } + + // Note that each list property gets its own separate storage. Only fixed + // size properties go into the common data block. The `rowStride` is the + // size of a row in the common data block. + rowStride = 0; + for (PLYProperty& prop : properties) { + if (prop.countType != PLYPropertyType::None) { + continue; + } + prop.offset = rowStride; + rowStride += kPLYPropertySize[uint32_t(prop.type)]; + } + } + + + uint32_t PLYElement::find_property(const char *propName) const + { + for (uint32_t i = 0, endI = uint32_t(properties.size()); i < endI; i++) { + if (strcmp(propName, properties.at(i).name.c_str()) == 0) { + return i; + } + } + return kInvalidIndex; + } + + + bool PLYElement::find_properties(uint32_t propIdxs[], uint32_t numIdxs, ...) const + { + va_list args; + va_start(args, numIdxs); + bool foundAll = find_properties_va(propIdxs, numIdxs, args); + va_end(args); + return foundAll; + } + + + bool PLYElement::find_properties_va(uint32_t propIdxs[], uint32_t numIdxs, va_list names) const + { + for (uint32_t i = 0; i < numIdxs; i++) { + propIdxs[i] = find_property(va_arg(names, const char*)); + if (propIdxs[i] == kInvalidIndex) { + return false; + } + } + return true; + } + + + bool PLYElement::convert_list_to_fixed_size(uint32_t listPropIdx, uint32_t listSize, uint32_t newPropIdxs[]) + { + if (fixedSize || listPropIdx >= properties.size() || properties[listPropIdx].countType == PLYPropertyType::None) { + return false; + } + + PLYProperty oldListProp = properties[listPropIdx]; + + // If the generated names are less than 256 chars, we will use an array on + // the stack as temporary storage. In the rare case that they're longer, + // we'll allocate an array of sufficient size on the heap and use that + // instead. This means we'll avoid allocating in all but the most extreme + // cases. + char inlineBuf[256]; + size_t nameBufSize = oldListProp.name.size() + 12; // the +12 allows space for an '_', a number up to 10 digits long and the terminating null. + char* nameBuf = inlineBuf; + if (nameBufSize > sizeof(inlineBuf)) { + nameBuf = new char[nameBufSize]; + } + + // Set up a property for the list count column. + PLYProperty& countProp = properties[listPropIdx]; + snprintf(nameBuf, nameBufSize, "%s_count", oldListProp.name.c_str()); + countProp.name = nameBuf; + countProp.type = oldListProp.countType; + countProp.countType = PLYPropertyType::None; + countProp.stride = kPLYPropertySize[uint32_t(oldListProp.countType)]; + + if (listSize > 0) { + // Set up additional properties for the list entries, 1 per entry. + if (listPropIdx + 1 == properties.size()) { + properties.resize(properties.size() + listSize); + } + else { + properties.insert(properties.begin() + listPropIdx + 1, listSize, PLYProperty()); + } + + for (uint32_t i = 0; i < listSize; i++) { + uint32_t propIdx = listPropIdx + 1 + i; + + PLYProperty& itemProp = properties[propIdx]; + snprintf(nameBuf, nameBufSize, "%s_%u", oldListProp.name.c_str(), i); + itemProp.name = nameBuf; + itemProp.type = oldListProp.type; + itemProp.countType = PLYPropertyType::None; + itemProp.stride = kPLYPropertySize[uint32_t(oldListProp.type)]; + + newPropIdxs[i] = propIdx; + } + } + + if (nameBuf != inlineBuf) { + delete[] nameBuf; + } + + calculate_offsets(); + return true; + } + + + // + // PLYReader methods + // + + PLYReader::PLYReader(const char* filename) + { + m_buf = new char[kPLYReadBufferSize + 1]; + m_buf[kPLYReadBufferSize] = '\0'; + + m_tmpBuf = new char[kPLYTempBufferSize + 1]; + m_tmpBuf[kPLYTempBufferSize] = '\0'; + + m_bufEnd = m_buf + kPLYReadBufferSize; + m_pos = m_bufEnd; + m_end = m_bufEnd; + + if (file_open(&m_f, filename, "rb") != 0) { + m_f = nullptr; + m_valid = false; + return; + } + m_valid = true; + + refill_buffer(); + + m_valid = keyword("ply") && next_line() && + keyword("format") && advance() && + typed_which(kPLYFileTypes, &m_fileType) && advance() && + int_literal(&m_majorVersion) && advance() && + match(".") && advance() && + int_literal(&m_minorVersion) && next_line() && + parse_elements() && + keyword("end_header") && advance() && match("\n") && accept(); + if (!m_valid) { + return; + } + m_inDataSection = true; + if (m_fileType == PLYFileType::ASCII) { + advance(); + } + + for (PLYElement& elem : m_elements) { + elem.calculate_offsets(); + } + } + + + PLYReader::~PLYReader() + { + if (m_f != nullptr) { + fclose(m_f); + } + delete[] m_buf; + delete[] m_tmpBuf; + } + + + bool PLYReader::valid() const + { + return m_valid; + } + + + bool PLYReader::has_element() const + { + return m_valid && m_currentElement < m_elements.size(); + } + + + const PLYElement* PLYReader::element() const + { + assert(has_element()); + return &m_elements[m_currentElement]; + } + + + bool PLYReader::load_element() + { + assert(has_element()); + if (m_elementLoaded) { + return true; + } + + PLYElement& elem = m_elements[m_currentElement]; + return elem.fixedSize ? load_fixed_size_element(elem) : load_variable_size_element(elem); + } + + + void PLYReader::next_element() + { + if (!has_element()) { + return; + } + + // If the element was loaded, the read buffer should already be positioned at + // the start of the next element. + PLYElement& elem = m_elements[m_currentElement]; + m_currentElement++; + + if (m_elementLoaded) { + // Clear any temporary storage used for list properties in the current element. + for (PLYProperty& prop : elem.properties) { + if (prop.countType == PLYPropertyType::None) { + continue; + } + prop.listData.clear(); + prop.listData.shrink_to_fit(); + prop.rowCount.clear(); + prop.rowCount.shrink_to_fit(); + } + + // Clear temporary storage for the non-list properties in the current element. + m_elementData.clear(); + m_elementLoaded = false; + return; + } + + // If the element wasn't loaded, we have to move the file pointer past its + // contents. How we do that depends on whether this is an ASCII or binary + // file and, if it's a binary, whether the element is fixed or variable + // size. + if (m_fileType == PLYFileType::ASCII) { + for (uint32_t row = 0; row < elem.count; row++) { + next_line(); + } + } + else if (elem.fixedSize) { + int64_t elementStart = static_cast(m_pos - m_buf); + int64_t elementSize = elem.rowStride * elem.count; + int64_t elementEnd = elementStart + elementSize; + if (elementEnd >= kPLYReadBufferSize) { + m_bufOffset += elementEnd; + file_seek(m_f, m_bufOffset, SEEK_SET); + m_bufEnd = m_buf + kPLYReadBufferSize; + m_pos = m_bufEnd; + m_end = m_bufEnd; + refill_buffer(); + } + else { + m_pos = m_buf + elementEnd; + m_end = m_pos; + } + } + else if (m_fileType == PLYFileType::Binary) { + for (uint32_t row = 0; row < elem.count; row++) { + for (const PLYProperty& prop : elem.properties) { + if (prop.countType == PLYPropertyType::None) { + uint32_t numBytes = kPLYPropertySize[uint32_t(prop.type)]; + if (m_pos + numBytes > m_bufEnd) { + if (!refill_buffer() || m_pos + numBytes > m_bufEnd) { + m_valid = false; + return; + } + } + m_pos += numBytes; + m_end = m_pos; + continue; + } + + uint32_t numBytes = kPLYPropertySize[uint32_t(prop.countType)]; + if (m_pos + numBytes > m_bufEnd) { + if (!refill_buffer() || m_pos + numBytes > m_bufEnd) { + m_valid = false; + return; + } + } + + int count = 0; + copy_and_convert_to(&count, reinterpret_cast(m_pos), prop.countType); + + if (count < 0) { + m_valid = false; + return; + } + + numBytes += uint32_t(count) * kPLYPropertySize[uint32_t(prop.type)]; + if (m_pos + numBytes > m_bufEnd) { + if (!refill_buffer() || m_pos + numBytes > m_bufEnd) { + m_valid = false; + return; + } + } + m_pos += numBytes; + m_end = m_pos; + } + } + } + else { // PLYFileType::BinaryBigEndian + for (uint32_t row = 0; row < elem.count; row++) { + for (const PLYProperty& prop : elem.properties) { + if (prop.countType == PLYPropertyType::None) { + uint32_t numBytes = kPLYPropertySize[uint32_t(prop.type)]; + if (m_pos + numBytes > m_bufEnd) { + if (!refill_buffer() || m_pos + numBytes > m_bufEnd) { + m_valid = false; + return; + } + } + m_pos += numBytes; + m_end = m_pos; + continue; + } + + uint32_t numBytes = kPLYPropertySize[uint32_t(prop.countType)]; + if (m_pos + numBytes > m_bufEnd) { + if (!refill_buffer() || m_pos + numBytes > m_bufEnd) { + m_valid = false; + return; + } + } + + int count = 0; + uint8_t tmp[8]; + memcpy(tmp, m_pos, numBytes); + endian_swap(tmp, prop.countType); + copy_and_convert_to(&count, tmp, prop.countType); + + if (count < 0) { + m_valid = false; + return; + } + + numBytes += uint32_t(count) * kPLYPropertySize[uint32_t(prop.type)]; + if (m_pos + numBytes > m_bufEnd) { + if (!refill_buffer() || m_pos + numBytes > m_bufEnd) { + m_valid = false; + return; + } + } + + m_pos += numBytes; + m_end = m_pos; + } + } + } + } + + + PLYFileType PLYReader::file_type() const + { + return m_fileType; + } + + + int PLYReader::version_major() const + { + return m_majorVersion; + } + + + int PLYReader::version_minor() const + { + return m_minorVersion; + } + + + uint32_t PLYReader::num_elements() const + { + return m_valid ? static_cast(m_elements.size()) : 0; + } + + + uint32_t PLYReader::find_element(const char* name) const + { + for (uint32_t i = 0, endI = num_elements(); i < endI; i++) { + const PLYElement& elem = m_elements[i]; + if (strcmp(elem.name.c_str(), name) == 0) { + return i; + } + } + return kInvalidIndex; + } + + + PLYElement* PLYReader::get_element(uint32_t idx) + { + return (idx < num_elements()) ? &m_elements[idx] : nullptr; + } + + + bool PLYReader::element_is(const char* name) const + { + return has_element() && strcmp(element()->name.c_str(), name) == 0; + } + + + uint32_t PLYReader::num_rows() const + { + return has_element() ? element()->count : 0; + } + + + uint32_t PLYReader::find_property(const char* name) const + { + return has_element() ? element()->find_property(name) : kInvalidIndex; + } + + + bool PLYReader::find_properties(uint32_t propIdxs[], uint32_t numIdxs, ...) const + { + if (!has_element()) { + return false; + } + va_list args; + va_start(args, numIdxs); + bool foundAll = element()->find_properties_va(propIdxs, numIdxs, args); + va_end(args); + return foundAll; + } + + + bool PLYReader::extract_properties(const uint32_t propIdxs[], uint32_t numProps, PLYPropertyType destType, void *dest) const + { + if (numProps == 0) { + return false; + } + + const PLYElement* elem = element(); + + // Make sure all property indexes are valid and that none of the properties + // are lists (this function only extracts non-list data). + for (uint32_t i = 0; i < numProps; i++) { + if (propIdxs[i] >= elem->properties.size()) { + return false; + } + } + + // Find out whether we have contiguous columns. If so, we may be able to + // use a more efficient data extraction technique. + bool contiguousCols = true; + uint32_t expectedOffset = elem->properties[propIdxs[0]].offset; + for (uint32_t i = 0; i < numProps; i++) { + uint32_t propIdx = propIdxs[i]; + const PLYProperty& prop = elem->properties[propIdx]; + if (prop.offset != expectedOffset) { + contiguousCols = false; + break; + } + expectedOffset = prop.offset + kPLYPropertySize[uint32_t(prop.type)]; + } + + // If the row we're extracting is contiguous in memory (i.e. there are no + // gaps anywhere in a row - start, end or middle), we can use an even MORE + // efficient data extraction technique. + bool contiguousRows = contiguousCols && + (elem->properties[propIdxs[0]].offset == 0) && + (expectedOffset == elem->rowStride); + + // If no data conversion is required, we can memcpy chunks of data + // directly over to `dest`. How big those chunks will be depends on whether + // the columns and/or rows are contiguous, as determined above. + bool conversionRequired = false; + for (uint32_t i = 0; i < numProps; i++) { + uint32_t propIdx = propIdxs[i]; + const PLYProperty& prop = elem->properties[propIdx]; + if (!compatible_types(prop.type, destType)) { + conversionRequired = true; + break; + } + } + + uint8_t* to = reinterpret_cast(dest); + if (!conversionRequired) { + // If no data conversion is required, we can just use memcpy to get + // values into dest. + if (contiguousRows) { + // Most efficient case is when the rows are contiguous. It means we're + // simply copying the entire data block for this element, which we can + // do with a single memcpy. + std::memcpy(to, m_elementData.data(), m_elementData.size()); + } + else if (contiguousCols) { + // If the rows aren't contiguous, but the columns we're extracting + // within each row are, then we can do a single memcpy per row. + const uint8_t* from = m_elementData.data() + elem->properties[propIdxs[0]].offset; + const uint8_t* end = m_elementData.data() + m_elementData.size(); + const size_t numBytes = expectedOffset - elem->properties[propIdxs[0]].offset; + while (from < end) { + std::memcpy(to, from, numBytes); + from += elem->rowStride; + to += numBytes; + } + } + else { + // If the columns aren't contiguous, we must memcpy each one separately. + const uint8_t* row = m_elementData.data(); + const uint8_t* end = m_elementData.data() + m_elementData.size(); + uint8_t* to = reinterpret_cast(dest); + size_t colBytes = kPLYPropertySize[uint32_t(destType)]; // size of an output column in bytes. + while (row < end) { + for (uint32_t i = 0; i < numProps; i++) { + uint32_t propIdx = propIdxs[i]; + const PLYProperty& prop = elem->properties[propIdx]; + std::memcpy(to, row + prop.offset, colBytes); + to += colBytes; + } + row += elem->rowStride; + } + } + } + else { + // We will have to do data type conversions on the column values here. We + // cannot simply use memcpy in this case, every column has to be + // processed separately. + const uint8_t* row = m_elementData.data(); + const uint8_t* end = m_elementData.data() + m_elementData.size(); + uint8_t* to = reinterpret_cast(dest); + size_t colBytes = kPLYPropertySize[uint32_t(destType)]; // size of an output column in bytes. + while (row < end) { + for (uint32_t i = 0; i < numProps; i++) { + uint32_t propIdx = propIdxs[i]; + const PLYProperty& prop = elem->properties[propIdx]; + copy_and_convert(to, destType, row + prop.offset, prop.type); + to += colBytes; + } + row += elem->rowStride; + } + } + + return true; + } + + + bool PLYReader::extract_properties_with_stride(const uint32_t propIdxs[], uint32_t numProps, PLYPropertyType destType, void *dest, uint32_t destStride) const + { + if (numProps == 0) { + return false; + } + + // The destination stride must be greater than or equal to the combined + // size of all properties we're extracting. Zero is treated as a special + // value meaning packed with no spacing. + const uint32_t minDestStride = numProps * kPLYPropertySize[uint32_t(destType)]; + if (destStride == 0 || destStride == minDestStride) { + return extract_properties(propIdxs, numProps, destType, dest); + } + else if (destStride < minDestStride) { + return false; + } + + const PLYElement* elem = element(); + + // Make sure all property indexes are valid and that none of the properties + // are lists (this function only extracts non-list data). + for (uint32_t i = 0; i < numProps; i++) { + if (propIdxs[i] >= elem->properties.size()) { + return false; + } + } + + // Find out whether we have contiguous columns. If so, we may be able to + // use a more efficient data extraction technique. + bool contiguousCols = true; + uint32_t expectedOffset = elem->properties[propIdxs[0]].offset; + for (uint32_t i = 0; i < numProps; i++) { + uint32_t propIdx = propIdxs[i]; + const PLYProperty& prop = elem->properties[propIdx]; + if (prop.offset != expectedOffset) { + contiguousCols = false; + break; + } + expectedOffset = prop.offset + kPLYPropertySize[uint32_t(prop.type)]; + } + + // If no data conversion is required, we can memcpy chunks of data + // directly over to `dest`. How big those chunks will be depends on whether + // the columns and/or rows are contiguous, as determined above. + bool conversionRequired = false; + for (uint32_t i = 0; i < numProps; i++) { + uint32_t propIdx = propIdxs[i]; + const PLYProperty& prop = elem->properties[propIdx]; + if (!compatible_types(prop.type, destType)) { + conversionRequired = true; + break; + } + } + + uint8_t* to = reinterpret_cast(dest); + if (!conversionRequired) { + // If no data conversion is required, we can just use memcpy to get + // values into dest. When the destination requires some padding between + // rows, the best we can do is a memcpy per row. + if (contiguousCols) { + // If the rows aren't contiguous, but the columns we're extracting + // within each row are, then we can do a single memcpy per row. + const uint8_t* from = m_elementData.data() + elem->properties[propIdxs[0]].offset; + const uint8_t* end = m_elementData.data() + m_elementData.size(); + const size_t numBytes = expectedOffset - elem->properties[propIdxs[0]].offset; + while (from < end) { + std::memcpy(to, from, numBytes); + from += elem->rowStride; + to += destStride; + } + } + else { + // If the columns aren't contiguous, we must memcpy each one separately. + const uint8_t* row = m_elementData.data(); + const uint8_t* end = m_elementData.data() + m_elementData.size(); + uint8_t* to = reinterpret_cast(dest); + const size_t colBytes = kPLYPropertySize[uint32_t(destType)]; // size of an output column in bytes. + const size_t colPadding = destStride - minDestStride; + while (row < end) { + for (uint32_t i = 0; i < numProps; i++) { + uint32_t propIdx = propIdxs[i]; + const PLYProperty& prop = elem->properties[propIdx]; + std::memcpy(to, row + prop.offset, colBytes); + to += colBytes; + } + row += elem->rowStride; + to += colPadding; + } + } + } + else { + // We will have to do data type conversions on the column values here. We + // cannot simply use memcpy in this case, every column has to be + // processed separately. + const uint8_t* row = m_elementData.data(); + const uint8_t* end = m_elementData.data() + m_elementData.size(); + uint8_t* to = reinterpret_cast(dest); + size_t colBytes = kPLYPropertySize[uint32_t(destType)]; // size of an output column in bytes. + size_t colPadding = destStride - minDestStride; + while (row < end) { + for (uint32_t i = 0; i < numProps; i++) { + uint32_t propIdx = propIdxs[i]; + const PLYProperty& prop = elem->properties[propIdx]; + copy_and_convert(to, destType, row + prop.offset, prop.type); + to += colBytes; + } + row += elem->rowStride; + to += colPadding; + } + } + + return true; + } + + + const uint32_t* PLYReader::get_list_counts(uint32_t propIdx) const + { + if (!has_element() || propIdx >= element()->properties.size() || element()->properties[propIdx].countType == PLYPropertyType::None) { + return nullptr; + } + return element()->properties[propIdx].rowCount.data(); + } + + + uint32_t PLYReader::sum_of_list_counts(uint32_t propIdx) const + { + if (!has_element() || propIdx >= element()->properties.size() || element()->properties[propIdx].countType == PLYPropertyType::None) { + return 0; + } + const PLYProperty& prop = element()->properties[propIdx]; + return prop.listData.size() / kPLYPropertySize[uint32_t(prop.type)]; + } + + + const uint8_t* PLYReader::get_list_data(uint32_t propIdx) const + { + if (!has_element() || propIdx >= element()->properties.size() || element()->properties[propIdx].countType == PLYPropertyType::None) { + return nullptr; + } + return element()->properties[propIdx].listData.data(); + } + + + bool PLYReader::extract_list_property(uint32_t propIdx, PLYPropertyType destType, void *dest) const + { + if (!has_element() || propIdx >= element()->properties.size() || element()->properties[propIdx].countType == PLYPropertyType::None) { + return false; + } + + const PLYProperty& prop = element()->properties[propIdx]; + if (compatible_types(prop.type, destType)) { + // If no type conversion is required, we can just copy the list data + // directly over with a single memcpy. + std::memcpy(dest, prop.listData.data(), prop.listData.size()); + } + else { + // If type conversion is required we'll have to process each list value separately. + const uint8_t* from = prop.listData.data(); + const uint8_t* end = prop.listData.data() + prop.listData.size(); + uint8_t* to = reinterpret_cast(dest); + const size_t toBytes = kPLYPropertySize[uint32_t(destType)]; + const size_t fromBytes = kPLYPropertySize[uint32_t(prop.type)]; + while (from < end) { + copy_and_convert(to, destType, from, prop.type); + to += toBytes; + from += fromBytes; + } + } + + return true; + } + + + uint32_t PLYReader::num_triangles(uint32_t propIdx) const + { + const uint32_t* counts = get_list_counts(propIdx); + if (counts == nullptr) { + return 0; + } + + const uint32_t numRows = element()->count; + uint32_t num = 0; + for (uint32_t i = 0; i < numRows; i++) { + if (counts[i] >= 3) { + num += counts[i] - 2; + } + } + return num; + } + + + bool PLYReader::requires_triangulation(uint32_t propIdx) const + { + const uint32_t* counts = get_list_counts(propIdx); + if (counts == nullptr) { + return false; + } + + const uint32_t numRows = element()->count; + for (uint32_t i = 0; i < numRows; i++) { + if (counts[i] != 3) { + return true; + } + } + return false; + } + + + bool PLYReader::extract_triangles(uint32_t propIdx, const float pos[], uint32_t numVerts, PLYPropertyType destType, void *dest) const + { + if (!requires_triangulation(propIdx)) { + return extract_list_property(propIdx, destType, dest); + } + + const PLYElement* elem = element(); + const PLYProperty& prop = elem->properties[propIdx]; + + const uint32_t* counts = prop.rowCount.data(); + const uint8_t* data = prop.listData.data(); + + uint8_t* to = reinterpret_cast(dest); + + bool convertSrc = !compatible_types(elem->properties[propIdx].type, PLYPropertyType::Int); + bool convertDst = !compatible_types(PLYPropertyType::Int, destType); + + size_t srcValBytes = kPLYPropertySize[uint32_t(prop.type)]; + size_t destValBytes = kPLYPropertySize[uint32_t(destType)]; + + if (convertSrc && convertDst) { + std::vector faceIndices, triIndices; + faceIndices.reserve(32); + triIndices.reserve(64); + const uint8_t* face = data; + for (uint32_t faceIdx = 0; faceIdx < elem->count; faceIdx++) { + const uint8_t* faceEnd = face + srcValBytes * counts[faceIdx]; + faceIndices.clear(); + faceIndices.reserve(counts[faceIdx]); + for (; face < faceEnd; face += srcValBytes) { + int idx = -1; + copy_and_convert_to(&idx, face, prop.type); + faceIndices.push_back(idx); + } + + triIndices.resize((counts[faceIdx] - 2) * 3); + triangulate_polygon(counts[faceIdx], pos, numVerts, faceIndices.data(), triIndices.data()); + for (int idx : triIndices) { + copy_and_convert(to, destType, reinterpret_cast(&idx), PLYPropertyType::Int); + to += destValBytes; + } + } + } + else if (convertSrc) { + std::vector faceIndices; + faceIndices.reserve(32); + const uint8_t* face = data; + for (uint32_t faceIdx = 0; faceIdx < elem->count; faceIdx++) { + const uint8_t* faceEnd = face + srcValBytes * counts[faceIdx]; + faceIndices.clear(); + faceIndices.reserve(counts[faceIdx]); + for (; face < faceEnd; face += srcValBytes) { + int idx = -1; + copy_and_convert_to(&idx, face, prop.type); + faceIndices.push_back(idx); + } + + uint32_t numTris = triangulate_polygon(counts[faceIdx], pos, numVerts, faceIndices.data(), reinterpret_cast(to)); + to += numTris * 3 * destValBytes; + } + } + else if (convertDst) { + std::vector triIndices; + triIndices.reserve(64); + const uint8_t* face = data; + for (uint32_t faceIdx = 0; faceIdx < elem->count; faceIdx++) { + triIndices.resize((counts[faceIdx] - 2) * 3); + triangulate_polygon(counts[faceIdx], pos, numVerts, reinterpret_cast(face), triIndices.data()); + for (int idx : triIndices) { + copy_and_convert(to, destType, reinterpret_cast(&idx), PLYPropertyType::Int); + to += destValBytes; + } + face += srcValBytes * counts[faceIdx]; + } + } + else { + const uint8_t* face = data; + for (uint32_t faceIdx = 0; faceIdx < elem->count; faceIdx++) { + uint32_t numTris = triangulate_polygon(counts[faceIdx], pos, numVerts, reinterpret_cast(face), reinterpret_cast(to)); + face += counts[faceIdx] * srcValBytes; + to += numTris * 3 * destValBytes; + } + } + + return true; + } + + + bool PLYReader::find_pos(uint32_t propIdxs[3]) const + { + return find_properties(propIdxs, 3, "x", "y", "z"); + } + + + bool PLYReader::find_normal(uint32_t propIdxs[3]) const + { + return find_properties(propIdxs, 3, "nx", "ny", "nz"); + } + + + bool PLYReader::find_texcoord(uint32_t propIdxs[2]) const + { + return find_properties(propIdxs, 2, "u", "v") || + find_properties(propIdxs, 2, "s", "t") || + find_properties(propIdxs, 2, "texture_u", "texture_v") || + find_properties(propIdxs, 2, "texture_s", "texture_t"); + } + + + bool PLYReader::find_color(uint32_t propIdxs[3]) const + { + return find_properties(propIdxs, 3, "r", "g", "b") || + find_properties(propIdxs, 3, "red", "green", "blue"); + } + + + bool PLYReader::find_indices(uint32_t propIdxs[1]) const + { + return find_properties(propIdxs, 1, "vertex_indices") || + find_properties(propIdxs, 1, "vertex_index"); + } + + + // + // PLYReader private methods + // + + bool PLYReader::refill_buffer() + { + if (m_f == nullptr || m_atEOF) { + // Nothing left to read. + return false; + } + + if (m_pos == m_buf && m_end == m_bufEnd) { + // Can't make any more room in the buffer! + return false; + } + + // Move everything from the start of the current token onwards, to the + // start of the read buffer. + int64_t bufSize = static_cast(m_bufEnd - m_buf); + if (bufSize < kPLYReadBufferSize) { + m_buf[bufSize] = m_buf[kPLYReadBufferSize]; + m_buf[kPLYReadBufferSize] = '\0'; + m_bufEnd = m_buf + kPLYReadBufferSize; + } + size_t keep = static_cast(m_bufEnd - m_pos); + if (keep > 0 && m_pos > m_buf) { + std::memmove(m_buf, m_pos, sizeof(char) * keep); + m_bufOffset += static_cast(m_pos - m_buf); + } + m_end = m_buf + (m_end - m_pos); + m_pos = m_buf; + + // Fill the remaining space in the buffer with data from the file. + size_t fetched = fread(m_buf + keep, sizeof(char), kPLYReadBufferSize - keep, m_f) + keep; + m_atEOF = fetched < kPLYReadBufferSize; + m_bufEnd = m_buf + fetched; + + if (!m_inDataSection || m_fileType == PLYFileType::ASCII) { + return rewind_to_safe_char(); + } + return true; + } + + + bool PLYReader::rewind_to_safe_char() + { + // If it looks like a token might run past the end of this buffer, move + // the buffer end pointer back before it & rewind the file. This way the + // next refill will pick up the whole of the token. + if (!m_atEOF && (m_bufEnd[-1] == '\n' || !is_safe_buffer_end(m_bufEnd[-1]))) { + const char* safe = m_bufEnd - 2; + // If '\n' is the last char in the buffer, then a call to `next_line()` + // will move `m_pos` to point at the null terminator but won't refresh + // the buffer. It would be clearer to fix this in `next_line()` but I + // believe it'll be more performant to simply treat `\n` as an unsafe + // character here. + while (safe >= m_end && (*safe == '\n' || !is_safe_buffer_end(*safe))) { + --safe; + } + if (safe < m_end) { + // No safe places to rewind to in the whole buffer! + return false; + } + ++safe; + m_buf[kPLYReadBufferSize] = *safe; + m_bufEnd = safe; + } + m_buf[m_bufEnd - m_buf] = '\0'; + + return true; + } + + + bool PLYReader::accept() + { + m_pos = m_end; + return true; + } + + + // Advances to end of line or to next non-whitespace char. + bool PLYReader::advance() + { + m_pos = m_end; + while (true) { + while (is_whitespace(*m_pos)) { + ++m_pos; + } + if (m_pos == m_bufEnd) { + m_end = m_pos; + if (refill_buffer()) { + continue; + } + return false; + } + break; + } + m_end = m_pos; + return true; + } + + + bool PLYReader::next_line() + { + m_pos = m_end; + do { + while (*m_pos != '\n') { + if (m_pos == m_bufEnd) { + m_end = m_pos; + if (refill_buffer()) { + continue; + } + return false; + } + ++m_pos; + } + ++m_pos; // move past the newline char + m_end = m_pos; + } while (match("comment") || match("obj_info")); + + return true; + } + + + bool PLYReader::match(const char* str) + { + m_end = m_pos; + while (m_end < m_bufEnd && *str != '\0' && *m_end == *str) { + ++m_end; + ++str; + } + if (*str != '\0') { + return false; + } + return true; + } + + + bool PLYReader::which(const char* values[], uint32_t* index) + { + for (uint32_t i = 0; values[i] != nullptr; i++) { + if (keyword(values[i])) { + *index = i; + return true; + } + } + return false; + } + + + bool PLYReader::which_property_type(PLYPropertyType* type) + { + for (uint32_t i = 0; kTypeAliases[i].name != nullptr; i++) { + if (keyword(kTypeAliases[i].name)) { + *type = kTypeAliases[i].type; + return true; + } + } + return false; + } + + + bool PLYReader::keyword(const char* kw) + { + return match(kw) && !is_keyword_part(*m_end); + } + + + bool PLYReader::identifier(char* dest, size_t destLen) + { + m_end = m_pos; + if (!is_keyword_start(*m_end) || destLen == 0) { + return false; + } + do { + ++m_end; + } while (is_keyword_part(*m_end)); + + size_t len = static_cast(m_end - m_pos); + if (len >= destLen) { + return false; // identifier too large for dest! + } + std::memcpy(dest, m_pos, sizeof(char) * len); + dest[len] = '\0'; + return true; + } + + + bool PLYReader::int_literal(int* value) + { + return miniply::int_literal(m_pos, &m_end, value); + } + + + bool PLYReader::float_literal(float* value) + { + return miniply::float_literal(m_pos, &m_end, value); + } + + + bool PLYReader::double_literal(double* value) + { + return miniply::double_literal(m_pos, &m_end, value); + } + + + bool PLYReader::parse_elements() + { + m_elements.reserve(4); + while (m_valid && keyword("element")) { + parse_element(); + } + return true; + } + + + bool PLYReader::parse_element() + { + int count = 0; + + m_valid = keyword("element") && advance() && + identifier(m_tmpBuf, kPLYTempBufferSize) && advance() && + int_literal(&count) && next_line(); + if (!m_valid || count < 0) { + return false; + } + + m_elements.push_back(PLYElement()); + PLYElement& elem = m_elements.back(); + elem.name = m_tmpBuf; + elem.count = static_cast(count); + elem.properties.reserve(10); + + while (m_valid && keyword("property")) { + parse_property(elem.properties); + } + + return true; + } + + + bool PLYReader::parse_property(std::vector& properties) + { + PLYPropertyType type = PLYPropertyType::None; + PLYPropertyType countType = PLYPropertyType::None; + + m_valid = keyword("property") && advance(); + if (!m_valid) { + return false; + } + + if (keyword("list")) { + // This is a list property + m_valid = advance() && which_property_type(&countType) && advance(); + if (!m_valid) { + return false; + } + } + + m_valid = which_property_type(&type) && advance() && + identifier(m_tmpBuf, kPLYTempBufferSize) && next_line(); + if (!m_valid) { + return false; + } + + properties.push_back(PLYProperty()); + PLYProperty& prop = properties.back(); + prop.name = m_tmpBuf; + prop.type = type; + prop.countType = countType; + + return true; + } + + + bool PLYReader::load_fixed_size_element(PLYElement& elem) + { + size_t numBytes = static_cast(elem.count) * elem.rowStride; + + m_elementData.resize(numBytes); + + if (m_fileType == PLYFileType::ASCII) { + size_t back = 0; + + for (uint32_t row = 0; row < elem.count; row++) { + for (PLYProperty& prop : elem.properties) { + if (!load_ascii_scalar_property(prop, back)) { + m_valid = false; + return false; + } + } + next_line(); + } + } + else { + uint8_t* dst = m_elementData.data(); + uint8_t* dstEnd = dst + numBytes; + while (dst < dstEnd) { + size_t bytesAvailable = static_cast(m_bufEnd - m_pos); + if (dst + bytesAvailable > dstEnd) { + bytesAvailable = static_cast(dstEnd - dst); + } + std::memcpy(dst, m_pos, bytesAvailable); + m_pos += bytesAvailable; + m_end = m_pos; + dst += bytesAvailable; + if (!refill_buffer()) { + break; + } + } + if (dst < dstEnd) { + m_valid = false; + return false; + } + + // We assume the CPU is little endian, so if the file is big-endian we + // need to do an endianness swap on every data item in the block. + if (m_fileType == PLYFileType::BinaryBigEndian) { + uint8_t* data = m_elementData.data(); + for (uint32_t row = 0; row < elem.count; row++) { + for (PLYProperty& prop : elem.properties) { + size_t numBytes = kPLYPropertySize[uint32_t(prop.type)]; + switch (numBytes) { + case 2: + endian_swap_2(data); + break; + case 4: + endian_swap_4(data); + break; + case 8: + endian_swap_8(data); + break; + default: + break; + } + data += numBytes; + } + } + } + } + + m_elementLoaded = true; + return true; + } + + + bool PLYReader::load_variable_size_element(PLYElement& elem) + { + m_elementData.resize(static_cast(elem.count) * elem.rowStride); + + // Preallocate enough space for each row in the property to contain three + // items. This is based on the assumptions that (a) the most common use for + // list properties is vertex indices; and (b) most faces are triangles. + // This gives a performance boost because we won't have to grow the + // listData vector as many times during loading. + for (PLYProperty& prop : elem.properties) { + if (prop.countType != PLYPropertyType::None) { + prop.listData.reserve(elem.count * kPLYPropertySize[uint32_t(prop.type)] * 3); + } + } + + if (m_fileType == PLYFileType::Binary) { + size_t back = 0; + for (uint32_t row = 0; row < elem.count; row++) { + for (PLYProperty& prop : elem.properties) { + if (prop.countType == PLYPropertyType::None) { + m_valid = load_binary_scalar_property(prop, back); + } + else { + load_binary_list_property(prop); + } + } + } + } + else if (m_fileType == PLYFileType::ASCII) { + size_t back = 0; + for (uint32_t row = 0; row < elem.count; row++) { + for (PLYProperty& prop : elem.properties) { + if (prop.countType == PLYPropertyType::None) { + m_valid = load_ascii_scalar_property(prop, back); + } + else { + load_ascii_list_property(prop); + } + } + next_line(); + } + } + else { // m_fileType == PLYFileType::BinaryBigEndian + size_t back = 0; + for (uint32_t row = 0; row < elem.count; row++) { + for (PLYProperty& prop : elem.properties) { + if (prop.countType == PLYPropertyType::None) { + m_valid = load_binary_scalar_property_big_endian(prop, back); + } + else { + load_binary_list_property_big_endian(prop); + } + } + } + } + + m_elementLoaded = true; + return true; + } + + + bool PLYReader::load_ascii_scalar_property(PLYProperty& prop, size_t& destIndex) + { + uint8_t value[8]; + if (!ascii_value(prop.type, value)) { + return false; + } + + size_t numBytes = kPLYPropertySize[uint32_t(prop.type)]; + std::memcpy(m_elementData.data() + destIndex, value, numBytes); + destIndex += numBytes; + return true; + } + + + bool PLYReader::load_ascii_list_property(PLYProperty& prop) + { + int count = 0; + m_valid = (prop.countType < PLYPropertyType::Float) && int_literal(&count) && advance() && (count >= 0); + if (!m_valid) { + return false; + } + + const size_t numBytes = kPLYPropertySize[uint32_t(prop.type)]; + + size_t back = prop.listData.size(); + prop.rowCount.push_back(static_cast(count)); + prop.listData.resize(back + numBytes * size_t(count)); + + for (uint32_t i = 0; i < uint32_t(count); i++) { + if (!ascii_value(prop.type, prop.listData.data() + back)) { + m_valid = false; + return false; + } + back += numBytes; + } + + return true; + } + + + bool PLYReader::load_binary_scalar_property(PLYProperty& prop, size_t& destIndex) + { + size_t numBytes = kPLYPropertySize[uint32_t(prop.type)]; + if (m_pos + numBytes > m_bufEnd) { + if (!refill_buffer() || m_pos + numBytes > m_bufEnd) { + m_valid = false; + return false; + } + } + std::memcpy(m_elementData.data() + destIndex, m_pos, numBytes); + m_pos += numBytes; + m_end = m_pos; + destIndex += numBytes; + return true; + } + + + bool PLYReader::load_binary_list_property(PLYProperty& prop) + { + size_t countBytes = kPLYPropertySize[uint32_t(prop.countType)]; + if (m_pos + countBytes > m_bufEnd) { + if (!refill_buffer() || m_pos + countBytes > m_bufEnd) { + m_valid = false; + return false; + } + } + + int count = 0; + copy_and_convert_to(&count, reinterpret_cast(m_pos), prop.countType); + + if (count < 0) { + m_valid = false; + return false; + } + + m_pos += countBytes; + m_end = m_pos; + + const size_t listBytes = kPLYPropertySize[uint32_t(prop.type)] * uint32_t(count); + if (m_pos + listBytes > m_bufEnd) { + if (!refill_buffer() || m_pos + listBytes > m_bufEnd) { + m_valid = false; + return false; + } + } + size_t back = prop.listData.size(); + prop.rowCount.push_back(static_cast(count)); + prop.listData.resize(back + listBytes); + std::memcpy(prop.listData.data() + back, m_pos, listBytes); + + m_pos += listBytes; + m_end = m_pos; + return true; + } + + + bool PLYReader::load_binary_scalar_property_big_endian(PLYProperty &prop, size_t &destIndex) + { + size_t startIndex = destIndex; + if (load_binary_scalar_property(prop, destIndex)) { + endian_swap(m_elementData.data() + startIndex, prop.type); + return true; + } + else { + return false; + } + } + + + bool PLYReader::load_binary_list_property_big_endian(PLYProperty &prop) + { + size_t countBytes = kPLYPropertySize[uint32_t(prop.countType)]; + if (m_pos + countBytes > m_bufEnd) { + if (!refill_buffer() || m_pos + countBytes > m_bufEnd) { + m_valid = false; + return false; + } + } + + int count = 0; + uint8_t tmp[8]; + std::memcpy(tmp, m_pos, countBytes); + endian_swap(tmp, prop.countType); + copy_and_convert_to(&count, tmp, prop.countType); + + if (count < 0) { + m_valid = false; + return false; + } + + m_pos += countBytes; + m_end = m_pos; + + const size_t typeBytes = kPLYPropertySize[uint32_t(prop.type)]; + const size_t listBytes = typeBytes * uint32_t(count); + if (m_pos + listBytes > m_bufEnd) { + if (!refill_buffer() || m_pos + listBytes > m_bufEnd) { + m_valid = false; + return false; + } + } + size_t back = prop.listData.size(); + prop.rowCount.push_back(static_cast(count)); + prop.listData.resize(back + listBytes); + + uint8_t* list = prop.listData.data() + back; + std::memcpy(list, m_pos, listBytes); + endian_swap_array(list, prop.type, count); + + m_pos += listBytes; + m_end = m_pos; + return true; + } + + + bool PLYReader::ascii_value(PLYPropertyType propType, uint8_t value[8]) + { + int tmpInt = 0; + + switch (propType) { + case PLYPropertyType::Char: + case PLYPropertyType::UChar: + case PLYPropertyType::Short: + case PLYPropertyType::UShort: + m_valid = int_literal(&tmpInt); + break; + case PLYPropertyType::Int: + case PLYPropertyType::UInt: + m_valid = int_literal(reinterpret_cast(value)); + break; + case PLYPropertyType::Float: + m_valid = float_literal(reinterpret_cast(value)); + break; + case PLYPropertyType::Double: + default: + m_valid = double_literal(reinterpret_cast(value)); + break; + } + + if (!m_valid) { + return false; + } + advance(); + + switch (propType) { + case PLYPropertyType::Char: + reinterpret_cast(value)[0] = static_cast(tmpInt); + break; + case PLYPropertyType::UChar: + value[0] = static_cast(tmpInt); + break; + case PLYPropertyType::Short: + reinterpret_cast(value)[0] = static_cast(tmpInt); + break; + case PLYPropertyType::UShort: + reinterpret_cast(value)[0] = static_cast(tmpInt); + break; + default: + break; + } + return true; + } + + + // + // Polygon triangulation + // + + static float angle_at_vert(uint32_t idx, + const std::vector& points2D, + const std::vector& prev, + const std::vector& next) + { + Vec2 xaxis = normalize(points2D[next[idx]] - points2D[idx]); + Vec2 yaxis = Vec2{-xaxis.y, xaxis.x}; + Vec2 p2p0 = points2D[prev[idx]] - points2D[idx]; + float angle = std::atan2(dot(p2p0, yaxis), dot(p2p0, xaxis)); + if (angle <= 0.0f || angle >= kPi) { + angle = 10000.0f; + } + return angle; + } + + + uint32_t triangulate_polygon(uint32_t n, const float pos[], uint32_t numVerts, const int indices[], int dst[]) + { + if (n < 3) { + return 0; + } + else if (n == 3) { + dst[0] = indices[0]; + dst[1] = indices[1]; + dst[2] = indices[2]; + return 1; + } + else if (n == 4) { + dst[0] = indices[0]; + dst[1] = indices[1]; + dst[2] = indices[3]; + + dst[3] = indices[2]; + dst[4] = indices[3]; + dst[5] = indices[1]; + return 2; + } + + // Check that all indices for this face are in the valid range before we + // try to dereference them. + for (uint32_t i = 0; i < n; i++) { + if (indices[i] < 0 || uint32_t(indices[i]) >= numVerts) { + return 0; + } + } + + const Vec3* vpos = reinterpret_cast(pos); + + // Calculate the geometric normal of the face + Vec3 origin = vpos[indices[0]]; + Vec3 faceU = normalize(vpos[indices[1]] - origin); + Vec3 faceNormal = normalize(cross(faceU, normalize(vpos[indices[n - 1]] - origin))); + Vec3 faceV = normalize(cross(faceNormal, faceU)); + + // Project the faces points onto the plane perpendicular to the normal. + std::vector points2D(n, Vec2{0.0f, 0.0f}); + for (uint32_t i = 1; i < n; i++) { + Vec3 p = vpos[indices[i]] - origin; + points2D[i] = Vec2{dot(p, faceU), dot(p, faceV)}; + } + + std::vector next(n, 0u); + std::vector prev(n, 0u); + uint32_t first = 0; + for (uint32_t i = 0, j = n - 1; i < n; i++) { + next[j] = i; + prev[i] = j; + j = i; + } + + // Do ear clipping. + while (n > 3) { + // Find the (remaining) vertex with the sharpest angle. + uint32_t bestI = first; + float bestAngle = angle_at_vert(first, points2D, prev, next); + for (uint32_t i = next[first]; i != first; i = next[i]) { + float angle = angle_at_vert(i, points2D, prev, next); + if (angle < bestAngle) { + bestI = i; + bestAngle = angle; + } + } + + // Clip the triangle at bestI. + uint32_t nextI = next[bestI]; + uint32_t prevI = prev[bestI]; + + dst[0] = indices[bestI]; + dst[1] = indices[nextI]; + dst[2] = indices[prevI]; + dst += 3; + + if (bestI == first) { + first = nextI; + } + next[prevI] = nextI; + prev[nextI] = prevI; + --n; + } + + // Add the final triangle. + dst[0] = indices[first]; + dst[1] = indices[next[first]]; + dst[2] = indices[prev[first]]; + + return n - 2; + } + +} // namespace miniply diff --git a/IO/PLY/miniply.h b/IO/PLY/miniply.h new file mode 100644 index 00000000..abdec426 --- /dev/null +++ b/IO/PLY/miniply.h @@ -0,0 +1,317 @@ +/* +MIT License + +Copyright (c) 2019 Vilya Harvey + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ + +#ifndef MINIPLY_H +#define MINIPLY_H + +#include +#include +#include +#include +#include + + +/// miniply - A simple and fast parser for PLY files +/// ================================================ +/// +/// For details about the PLY format see: +/// * http://paulbourke.net/dataformats/ply/ +/// * https://en.wikipedia.org/wiki/PLY_(file_format) + +namespace miniply { + + // + // Constants + // + + static constexpr uint32_t kInvalidIndex = 0xFFFFFFFFu; + + // Standard PLY element names + extern const char* kPLYVertexElement; // "vertex" + extern const char* kPLYFaceElement; // "face" + + + // + // PLY Parsing types + // + + enum class PLYFileType { + ASCII, + Binary, + BinaryBigEndian, + }; + + + enum class PLYPropertyType { + Char, + UChar, + Short, + UShort, + Int, + UInt, + Float, + Double, + + None, //!< Special value used in Element::listCountType to indicate a non-list property. + }; + + + struct PLYProperty { + std::string name; + PLYPropertyType type = PLYPropertyType::None; //!< Type of the data. Must be set to a value other than None. + PLYPropertyType countType = PLYPropertyType::None; //!< None indicates this is not a list type, otherwise it's the type for the list count. + uint32_t offset = 0; //!< Byte offset from the start of the row. + uint32_t stride = 0; + + std::vector listData; + std::vector rowCount; // Entry `i` is the number of items (*not* the number of bytes) in row `i`. + }; + + + struct PLYElement { + std::string name; //!< Name of this element. + std::vector properties; + uint32_t count = 0; //!< The number of items in this element (e.g. the number of vertices if this is the vertex element). + bool fixedSize = true; //!< `true` if there are only fixed-size properties in this element, i.e. no list properties. + uint32_t rowStride = 0; //!< The number of bytes from the start of one row to the start of the next, for this element. + + void calculate_offsets(); + + /// Returns the index for the named property in this element, or `kInvalidIndex` + /// if it can't be found. + uint32_t find_property(const char* propName) const; + + /// Return the indices for several properties in one go. Use it like this: + /// ``` + /// uint32_t indexes[3]; + /// if (elem.find_properties(indexes, 3, "foo", "bar", "baz")) { ... } + /// ``` + /// `propIdxs` is where the property indexes will be stored. `numIdxs` is + /// the number of properties we will look up. There must be exactly + /// `numIdxs` parameters after `numIdxs`; each of the is a c-style string + /// giving the name of a property. + /// + /// The return value will be true if all properties were found. If it was + /// not true, you should not use any values from propIdxs. + bool find_properties(uint32_t propIdxs[], uint32_t numIdxs, ...) const; + + /// Same as `find_properties`, for when you already have a `va_list`. This + /// is called internally by both `PLYElement::find_properties` and + /// `PLYReader::find_properties`. + bool find_properties_va(uint32_t propIdxs[], uint32_t numIdxs, va_list names) const; + + /// Call this on the element at some point before you load its data, when + /// you know that every row's list will have the same length. It will + /// replace the single variable-size property with a set of new fixed-size + /// properties: one for the list count, followed by one for each of the + /// list values. This will allow miniply to load and extract the property + /// data a lot more efficiently, giving a big performance increase. + /// + /// After you've called this, you must use PLYReader's `extract_columns` + /// method to get the data, rather than `extract_list_column`. + /// + /// The `newPropIdxs` parameter must be an array with at least `listSize` + /// entries. If the function returns true, this will have been populated + /// with the indices of the new properties that represent the list values + /// (i.e. not including the list count property, which will have the same + /// index as the old list property). + /// + /// The function returns false if the property index is invalid, or the + /// property it refers to is not a list property. In these cases it will + /// not modify anything. Otherwise it will return true. + bool convert_list_to_fixed_size(uint32_t listPropIdx, uint32_t listSize, uint32_t newPropIdxs[]); + }; + + + class PLYReader { + public: + PLYReader(const char* filename); + ~PLYReader(); + + bool valid() const; + bool has_element() const; + const PLYElement* element() const; + bool load_element(); + void next_element(); + + PLYFileType file_type() const; + int version_major() const; + int version_minor() const; + uint32_t num_elements() const; + uint32_t find_element(const char* name) const; + PLYElement* get_element(uint32_t idx); + + /// Check whether the current element has the given name. + bool element_is(const char* name) const; + + /// Number of rows in the current element. + uint32_t num_rows() const; + + /// Returns the index for the named property in the current element, or + /// `kInvalidIndex` if it can't be found. + uint32_t find_property(const char* name) const; + + /// Equivalent to calling `find_properties` on the current element. + bool find_properties(uint32_t propIdxs[], uint32_t numIdxs, ...) const; + + /// Copy the data for the specified properties into `dest`, which must be + /// an array with at least enough space to hold all of the extracted column + /// data. `propIdxs` is an array containing the indexes of the properties + /// to copy; it has `numProps` elements. + /// + /// `destType` specifies the data type for values stored in `dest`. All + /// property values will be converted to this type if necessary. + /// + /// This function does some checks up front to pick the most efficient code + /// path for extracting the data. It considers: + /// (a) whether any data conversion is required. + /// (b) whether all property values to be extracted are in contiguous + /// memory locations for any given item. + /// (c) whether the data for all rows is contiguous in memory. + /// In the best case it reduces to a single memcpy call. In the worst case + /// we must iterate over all values to be copied, applying type conversions + /// as we go. + /// + /// Note that this function does not handle list-valued properties. Use + /// `extract_list_column()` for those instead. + bool extract_properties(const uint32_t propIdxs[], uint32_t numProps, PLYPropertyType destType, void* dest) const; + + /// The same as `extract_properties`, but does not require rows in the + /// destination to be contiguous: `destStride` is the number of bytes + /// between the start of one row and the start of the next row in the + /// destination memory. + /// + /// This is useful for when your destination is an array of structs where + /// you cannot extract all of the properties with a single + /// `extract_properties` call, e.g. when not all of the struct members + /// have the same type, or when the data you're extracting is only a + /// subset of the columns in each destination row. + /// + /// This is a tiny bit slower than `extract_properties`. Wherever possible + /// you should use `extract_properties` in preference to this method. + bool extract_properties_with_stride(const uint32_t propIdxs[], uint32_t numProps, PLYPropertyType destType, void* dest, uint32_t destStride) const; + + /// Get the array of item counts for a list property. Entry `i` in this + /// array is the number of items in the `i`th list. + const uint32_t* get_list_counts(uint32_t propIdx) const; + + /// Get the sum of all item counts for a list property. This can be useful + /// to determine how big a destination array you'll need for a call to + /// `extract_list_property`. It's equivalent to summing up all the values + /// in the array returned by `get_list_counts`, but faster. + uint32_t sum_of_list_counts(uint32_t propIdx) const; + + const uint8_t* get_list_data(uint32_t propIdx) const; + bool extract_list_property(uint32_t propIdx, PLYPropertyType destType, void* dest) const; + + uint32_t num_triangles(uint32_t propIdx) const; + bool requires_triangulation(uint32_t propIdx) const; + bool extract_triangles(uint32_t propIdx, const float pos[], uint32_t numVerts, PLYPropertyType destType, void* dest) const; + + bool find_pos(uint32_t propIdxs[3]) const; + bool find_normal(uint32_t propIdxs[3]) const; + bool find_texcoord(uint32_t propIdxs[2]) const; + bool find_color(uint32_t propIdxs[3]) const; + bool find_indices(uint32_t propIdxs[1]) const; + + private: + bool refill_buffer(); + bool rewind_to_safe_char(); + bool accept(); + bool advance(); + bool next_line(); + bool match(const char* str); + bool which(const char* values[], uint32_t* index); + bool which_property_type(PLYPropertyType* type); + bool keyword(const char* kw); + bool identifier(char* dest, size_t destLen); + + template // T must be a type compatible with uint32_t. + bool typed_which(const char* values[], T* index) { + return which(values, reinterpret_cast(index)); + } + + bool int_literal(int* value); + bool float_literal(float* value); + bool double_literal(double* value); + + bool parse_elements(); + bool parse_element(); + bool parse_property(std::vector& properties); + + bool load_fixed_size_element(PLYElement& elem); + bool load_variable_size_element(PLYElement& elem); + + bool load_ascii_scalar_property(PLYProperty& prop, size_t& destIndex); + bool load_ascii_list_property(PLYProperty& prop); + bool load_binary_scalar_property(PLYProperty& prop, size_t& destIndex); + bool load_binary_list_property(PLYProperty& prop); + bool load_binary_scalar_property_big_endian(PLYProperty& prop, size_t& destIndex); + bool load_binary_list_property_big_endian(PLYProperty& prop); + + bool ascii_value(PLYPropertyType propType, uint8_t value[8]); + + private: + FILE* m_f = nullptr; + char* m_buf = nullptr; + const char* m_bufEnd = nullptr; + const char* m_pos = nullptr; + const char* m_end = nullptr; + bool m_inDataSection = false; + bool m_atEOF = false; + int64_t m_bufOffset = 0; + + bool m_valid = false; + + PLYFileType m_fileType = PLYFileType::ASCII; //!< Whether the file was ascii, binary little-endian, or binary big-endian. + int m_majorVersion = 0; + int m_minorVersion = 0; + std::vector m_elements; //!< Element descriptors for this file. + + size_t m_currentElement = 0; + bool m_elementLoaded = false; + std::vector m_elementData; + + char* m_tmpBuf = nullptr; + }; + + + /// Given a polygon with `n` vertices, where `n` > 3, triangulate it and + /// store the indices for the resulting triangles in `dst`. The `pos` + /// parameter is the array of all vertex positions for the mesh; `indices` is + /// the list of `n` indices for the polygon we're triangulating; and `dst` is + /// where we write the new indices to. + /// + /// The triangulation will always produce `n - 2` triangles, so `dst` must + /// have enough space for `3 * (n - 2)` indices. + /// + /// If `n == 3`, we simply copy the input indices to `dst`. If `n < 3`, + /// nothing gets written to dst. + /// + /// The return value is the number of triangles. + uint32_t triangulate_polygon(uint32_t n, const float pos[], uint32_t numVerts, const int indices[], int dst[]); + +} // namespace miniply + +#endif // MINIPLY_H diff --git a/IO/PLY/vtk.module b/IO/PLY/vtk.module index 38153e3b..93bdcd18 100644 --- a/IO/PLY/vtk.module +++ b/IO/PLY/vtk.module @@ -13,6 +13,7 @@ SPDX_LICENSE_IDENTIFIER SPDX_COPYRIGHT_TEXT Copyright (c) Ken Martin, Will Schroeder, Bill Lorensen Copyright (c) 1994 The Board of Trustees of The Leland Stanford + Copyright (c) 2019 Vilya Harvey DEPENDS VTK::CommonCore VTK::IOCore diff --git a/IO/PLY/vtkPLYReader.cxx b/IO/PLY/vtkPLYReader.cxx index fdf48485..80b922c6 100644 --- a/IO/PLY/vtkPLYReader.cxx +++ b/IO/PLY/vtkPLYReader.cxx @@ -19,13 +19,22 @@ #include "vtkPolygon.h" #include "vtkSmartPointer.h" #include "vtkStringArray.h" +#include "vtkTypeInt32Array.h" +#include "vtkTypeInt64Array.h" #include "vtkUnsignedCharArray.h" +#include "miniply.h" + #include #include #include #include +#include +#include +#include +#include +#include #include VTK_ABI_NAMESPACE_BEGIN @@ -132,6 +141,19 @@ int vtkPLYReader::RequestData(vtkInformation* vtkNotUsed(request), // get the output vtkPolyData* output = vtkPolyData::SafeDownCast(outInfo->Get(vtkDataObject::DATA_OBJECT())); + // fvtk fast path: bulk-column binary-LE read via the vendored miniply parser. + // Only when reading from a real file (not a stream/string). Declines (-1) for + // anything outside its narrow byte-exact envelope, falling through to legacy. + if (!this->ReadFromInputStream && !this->ReadFromInputString && this->FileName && + this->FileName[0] != '\0') + { + int fastResult = this->ReadPLYFast(output); + if (fastResult >= 0) + { + return fastResult; + } + } + PlyProperty vertProps[] = { { "x", PLY_FLOAT, PLY_FLOAT, static_cast(offsetof(plyVertex, x)), 0, 0, 0, 0 }, { "y", PLY_FLOAT, PLY_FLOAT, static_cast(offsetof(plyVertex, x) + sizeof(float)), 0, 0, 0, @@ -650,6 +672,502 @@ int vtkPLYReader::RequestData(vtkInformation* vtkNotUsed(request), return 1; } +//------------------------------------------------------------------------------ +// fvtk fast binary-PLY bulk reader. +// +// Reads a binary little-endian PLY with the vendored miniply parser (MIT, +// Copyright (c) 2019 Vilya Harvey -- see miniply.h) doing columnar bulk +// extraction instead of vtkPLY's per-row property dispatch. It engages only +// when every consumed property's stored type already equals its VTK destination +// type, so each value is a verbatim little-endian copy: the output is then +// byte-identical to the legacy reader, with point and face order preserved and +// polygons left untriangulated (PLY faces index into the point array, so order +// is load-bearing). Any file outside this narrow envelope returns -1 and the +// caller falls back to the legacy reader. +int vtkPLYReader::ReadPLYFast(vtkPolyData* output) +{ + using miniply::PLYElement; + using miniply::PLYProperty; + using miniply::PLYPropertyType; + using miniply::PLYReader; + constexpr uint32_t kInvalid = miniply::kInvalidIndex; + + PLYReader reader(this->FileName); + if (!reader.valid()) + { + return -1; // not openable as PLY; let the legacy path emit the warning + } + // Binary little-endian only. ASCII float parsing and big-endian byte swaps + // risk a last-ULP divergence from vtkPLY, and PLY point positions are sacred + // (faces reference them by index), so we will not relax them. + if (reader.file_type() != miniply::PLYFileType::Binary) + { + return -1; + } + + auto isFloat = [&](const PLYElement* e, uint32_t i) { + return i != kInvalid && e->properties[i].type == PLYPropertyType::Float; + }; + auto isUChar = [&](const PLYElement* e, uint32_t i) { + return i != kInvalid && e->properties[i].type == PLYPropertyType::UChar; + }; + + // ---- required: vertex element with float x, y, z ---- + uint32_t vIdx = reader.find_element(miniply::kPLYVertexElement); + if (vIdx == kInvalid) + { + return -1; // legacy errors "Cannot read geometry" + } + PLYElement* vElem = reader.get_element(vIdx); + if (!vElem->fixedSize) + { + return -1; // a list property among vertices -> outside the columnar envelope + } + uint32_t xi = vElem->find_property("x"); + uint32_t yi = vElem->find_property("y"); + uint32_t zi = vElem->find_property("z"); + if (!isFloat(vElem, xi) || !isFloat(vElem, yi) || !isFloat(vElem, zi)) + { + return -1; + } + + // ---- optional vertex texture coords (u,v or texture_u,texture_v), float ---- + bool texCoordsPointsAvailable = false; + uint32_t ui = kInvalid, vi = kInvalid; + { + uint32_t a = vElem->find_property("u"); + uint32_t b = vElem->find_property("v"); + if (a == kInvalid || b == kInvalid) + { + a = vElem->find_property("texture_u"); + b = vElem->find_property("texture_v"); + } + if (a != kInvalid && b != kInvalid) + { + if (!isFloat(vElem, a) || !isFloat(vElem, b)) + { + return -1; + } + ui = a; + vi = b; + texCoordsPointsAvailable = true; + } + } + + // ---- optional vertex normals (nx,ny,nz), float ---- + bool normalPointsAvailable = false; + uint32_t nxi = kInvalid, nyi = kInvalid, nzi = kInvalid; + { + uint32_t a = vElem->find_property("nx"); + uint32_t b = vElem->find_property("ny"); + uint32_t c = vElem->find_property("nz"); + if (a != kInvalid && b != kInvalid && c != kInvalid) + { + if (!isFloat(vElem, a) || !isFloat(vElem, b) || !isFloat(vElem, c)) + { + return -1; + } + nxi = a; + nyi = b; + nzi = c; + normalPointsAvailable = true; + } + } + + // ---- optional vertex colors (red/green/blue[/alpha] or diffuse_*), uchar ---- + bool rgbPointsAvailable = false; + bool rgbPointsHaveAlpha = false; + uint32_t pri = kInvalid, pgi = kInvalid, pbi = kInvalid, pai = kInvalid; + { + uint32_t r = vElem->find_property("red"); + uint32_t g = vElem->find_property("green"); + uint32_t b = vElem->find_property("blue"); + if (r == kInvalid || g == kInvalid || b == kInvalid) + { + r = vElem->find_property("diffuse_red"); + g = vElem->find_property("diffuse_green"); + b = vElem->find_property("diffuse_blue"); + } + if (r != kInvalid && g != kInvalid && b != kInvalid) + { + if (!isUChar(vElem, r) || !isUChar(vElem, g) || !isUChar(vElem, b)) + { + return -1; + } + pri = r; + pgi = g; + pbi = b; + rgbPointsAvailable = true; + uint32_t a = vElem->find_property("alpha"); + if (a != kInvalid) + { + if (!isUChar(vElem, a)) + { + return -1; + } + pai = a; + rgbPointsHaveAlpha = true; + } + } + } + + // ---- optional face element with an integer vertex_indices list ---- + bool faceElemPresent = false; + PLYElement* fElem = nullptr; + uint32_t viIdx = kInvalid; + uint32_t fIdx = reader.find_element(miniply::kPLYFaceElement); + if (fIdx != kInvalid) + { + if (fIdx < vIdx) + { + return -1; // face declared before vertex: outside our ordering assumption + } + faceElemPresent = true; + fElem = reader.get_element(fIdx); + viIdx = fElem->find_property("vertex_indices"); + if (viIdx == kInvalid) + { + return -1; + } + const PLYProperty& vp = fElem->properties[viIdx]; + if (vp.countType == PLYPropertyType::None || vp.type == PLYPropertyType::Float || + vp.type == PLYPropertyType::Double) + { + return -1; // must be a list of integers (verbatim integer copy) + } + // The per-face texcoord path duplicates points in the legacy reader; decline + // so the legacy reader handles that fidelity-sensitive case. + if (fElem->find_property("texcoord") != kInvalid && !texCoordsPointsAvailable) + { + return -1; + } + } + + // ---- optional face intensity (uchar) and face colors (uchar) ---- + bool intensityAvailable = false; + uint32_t intensityIdx = kInvalid; + bool rgbCellsAvailable = false; + bool rgbCellsHaveAlpha = false; + uint32_t fri = kInvalid, fgi = kInvalid, fbi = kInvalid, fai = kInvalid; + if (faceElemPresent) + { + uint32_t a = fElem->find_property("intensity"); + if (a != kInvalid) + { + if (!isUChar(fElem, a)) + { + return -1; + } + intensityIdx = a; + intensityAvailable = true; + } + uint32_t r = fElem->find_property("red"); + uint32_t g = fElem->find_property("green"); + uint32_t b = fElem->find_property("blue"); + if (r != kInvalid && g != kInvalid && b != kInvalid) + { + if (!isUChar(fElem, r) || !isUChar(fElem, g) || !isUChar(fElem, b)) + { + return -1; + } + fri = r; + fgi = g; + fbi = b; + rgbCellsAvailable = true; + uint32_t fa = fElem->find_property("alpha"); + if (fa != kInvalid) + { + if (!isUChar(fElem, fa)) + { + return -1; + } + fai = fa; + rgbCellsHaveAlpha = true; + } + } + } + + // ============================================================================ + // Detection passed: build everything into locals, then attach to `output` only + // on success so a partial/corrupt read leaves `output` untouched for fallback. + // ============================================================================ + const uint32_t numPts = vElem->count; + const uint32_t numPolys = faceElemPresent ? fElem->count : 0; + + vtkSmartPointer pts = vtkSmartPointer::New(); + pts->SetDataTypeToFloat(); + pts->SetNumberOfPoints(numPts); + + vtkSmartPointer tcoords; + if (texCoordsPointsAvailable) + { + tcoords = vtkSmartPointer::New(); + tcoords->SetName("TCoords"); + tcoords->SetNumberOfComponents(2); + tcoords->SetNumberOfTuples(numPts); + } + vtkSmartPointer normals; + if (normalPointsAvailable) + { + normals = vtkSmartPointer::New(); + normals->SetName("Normals"); + normals->SetNumberOfComponents(3); + normals->SetNumberOfTuples(numPts); + } + vtkSmartPointer rgbPoints; + if (rgbPointsAvailable) + { + rgbPoints = vtkSmartPointer::New(); + rgbPoints->SetName(rgbPointsHaveAlpha ? "RGBA" : "RGB"); + rgbPoints->SetNumberOfComponents(rgbPointsHaveAlpha ? 4 : 3); + rgbPoints->SetNumberOfTuples(numPts); + } + vtkSmartPointer intensity; + if (intensityAvailable) + { + intensity = vtkSmartPointer::New(); + intensity->SetName("intensity"); + intensity->SetNumberOfComponents(1); + intensity->SetNumberOfTuples(numPolys); + } + vtkSmartPointer rgbCells; + if (rgbCellsAvailable) + { + rgbCells = vtkSmartPointer::New(); + rgbCells->SetName(rgbCellsHaveAlpha ? "RGBA" : "RGB"); + rgbCells->SetNumberOfComponents(rgbCellsHaveAlpha ? 4 : 3); + rgbCells->SetNumberOfTuples(numPolys); + } + vtkSmartPointer polys; + + bool gotVerts = false; + bool gotFaces = false; + while (reader.has_element()) + { + if (reader.element_is(miniply::kPLYVertexElement) && !gotVerts) + { + if (!reader.load_element()) + { + return -1; + } + uint32_t idx[4]; + idx[0] = xi; + idx[1] = yi; + idx[2] = zi; + if (!reader.extract_properties(idx, 3, PLYPropertyType::Float, pts->GetVoidPointer(0))) + { + return -1; + } + if (texCoordsPointsAvailable) + { + idx[0] = ui; + idx[1] = vi; + if (!reader.extract_properties(idx, 2, PLYPropertyType::Float, tcoords->GetPointer(0))) + { + return -1; + } + } + if (normalPointsAvailable) + { + idx[0] = nxi; + idx[1] = nyi; + idx[2] = nzi; + if (!reader.extract_properties(idx, 3, PLYPropertyType::Float, normals->GetPointer(0))) + { + return -1; + } + } + if (rgbPointsAvailable) + { + idx[0] = pri; + idx[1] = pgi; + idx[2] = pbi; + idx[3] = pai; + uint32_t n = rgbPointsHaveAlpha ? 4 : 3; + if (!reader.extract_properties(idx, n, PLYPropertyType::UChar, rgbPoints->GetPointer(0))) + { + return -1; + } + } + gotVerts = true; + } + else if (faceElemPresent && reader.element_is(miniply::kPLYFaceElement) && !gotFaces) + { + if (!reader.load_element()) + { + return -1; + } + const uint32_t* counts = reader.get_list_counts(viIdx); + const uint32_t total = reader.sum_of_list_counts(viIdx); + std::vector conn(total); + if (total > 0 && !reader.extract_list_property(viIdx, PLYPropertyType::Int, conn.data())) + { + return -1; + } + // fvtk-wide rule (width-relaxed): default the cell array to 32-bit + // offsets/connectivity, widening to 64-bit only when a value cannot fit in + // int32 (numPolys/total or an index >= 2^31). Integer VALUES are identical + // to stock VTK; only the storage container narrows (stock defaults to + // 64-bit). This halves the cell-array footprint for the overwhelmingly + // common case. See [[fvtk-int32-default-width-relaxed]]. + // Cheap, robust width check: 32-bit storage is safe iff every value that + // will be stored fits in int32. Offset values range over [0, total]; + // connectivity values are point indices in [0, numPts). Bounding those two + // counts bounds every stored value (numPolys <= total, so it needs no + // separate check). Two unsigned compares -- no per-element scan. + constexpr uint32_t kI32Max = 0x7FFFFFFFu; + const bool fits32 = (numPts <= kI32Max) && (total <= kI32Max); + polys = vtkSmartPointer::New(); + if (fits32) + { + vtkNew offArr; + offArr->SetNumberOfValues(static_cast(numPolys) + 1); + vtkNew connArr; + connArr->SetNumberOfValues(static_cast(total)); + vtkTypeInt32* op = offArr->GetPointer(0); + vtkTypeInt32* cp = connArr->GetPointer(0); + vtkTypeInt32 acc = 0; + op[0] = 0; + for (uint32_t j = 0; j < numPolys; ++j) + { + acc += static_cast(counts[j]); + op[j + 1] = acc; + } + for (uint32_t i = 0; i < total; ++i) + { + cp[i] = static_cast(conn[i]); + } + polys->SetData(offArr, connArr); + } + else + { + vtkNew offArr; + offArr->SetNumberOfValues(static_cast(numPolys) + 1); + vtkNew connArr; + connArr->SetNumberOfValues(static_cast(total)); + vtkTypeInt64* op = offArr->GetPointer(0); + vtkTypeInt64* cp = connArr->GetPointer(0); + vtkTypeInt64 acc = 0; + op[0] = 0; + for (uint32_t j = 0; j < numPolys; ++j) + { + acc += static_cast(counts[j]); + op[j + 1] = acc; + } + for (uint32_t i = 0; i < total; ++i) + { + cp[i] = static_cast(conn[i]); + } + polys->SetData(offArr, connArr); + } + + if (intensityAvailable) + { + uint32_t idx[1] = { intensityIdx }; + if (!reader.extract_properties(idx, 1, PLYPropertyType::UChar, intensity->GetPointer(0))) + { + return -1; + } + } + if (rgbCellsAvailable) + { + uint32_t idx[4] = { fri, fgi, fbi, fai }; + uint32_t n = rgbCellsHaveAlpha ? 4 : 3; + if (!reader.extract_properties(idx, n, PLYPropertyType::UChar, rgbCells->GetPointer(0))) + { + return -1; + } + } + gotFaces = true; + } + reader.next_element(); + } + + if (!gotVerts || (faceElemPresent && !gotFaces)) + { + return -1; // file didn't yield the elements its header advertised + } + + // Comments: miniply discards them, so replicate vtkPLY's header handling to + // keep GetComments() faithful (a side accessor, not part of the serialized + // mesh). Re-scan the short ASCII header for "comment" lines. + this->Comments->Reset(); + if (FILE* hf = vtksys::SystemTools::Fopen(this->FileName, "rb")) + { + char line[1024]; + while (std::fgets(line, sizeof(line), hf)) + { + for (char* p = line; *p; ++p) + { + if (*p == '\t') + { + *p = ' '; + } + else if (*p == '\r' || *p == '\n') + { + *p = '\0'; + break; + } + } + if (std::strncmp(line, "end_header", 10) == 0) + { + break; + } + if (std::strncmp(line, "comment", 7) == 0 && (line[7] == ' ' || line[7] == '\0')) + { + int i = 7; + while (line[i] == ' ') + { + ++i; + } + this->Comments->InsertNextValue(line + i); + } + } + std::fclose(hf); + } + + // Attach to output in the legacy reader's array order so the result is + // structurally identical (same array indices and active attributes). + if (intensityAvailable) + { + output->GetCellData()->AddArray(intensity); + output->GetCellData()->SetActiveScalars("intensity"); + } + if (rgbCellsAvailable) + { + output->GetCellData()->AddArray(rgbCells); + output->GetCellData()->SetActiveScalars(rgbCells->GetName()); + } + output->SetPoints(pts); + if (rgbPointsAvailable) + { + output->GetPointData()->SetScalars(rgbPoints); + } + if (normalPointsAvailable) + { + output->GetPointData()->SetNormals(normals); + } + if (texCoordsPointsAvailable) + { + output->GetPointData()->SetTCoords(tcoords); + } + if (faceElemPresent) + { + output->SetPolys(polys); + } + + vtkDebugMacro(<< "Read (fast): " << numPts << " points, " << numPolys << " polygons"); + // Optional diagnostic breadcrumb: when FVTK_PLY_FASTPATH_TRACE is set, report + // (to stderr) that the fast path handled this file. Off by default -> no cost + // and no behavior change; used by the byte-exact validation to confirm which + // files engage the fast path vs fall back to the legacy reader. + if (std::getenv("FVTK_PLY_FASTPATH_TRACE")) + { + std::fprintf(stderr, "FVTK_PLY_FAST %s\n", this->FileName ? this->FileName : ""); + } + return 1; +} + int vtkPLYReader::CanReadFile(const char* filename) { FILE* fd = vtksys::SystemTools::Fopen(filename, "rb"); diff --git a/IO/PLY/vtkPLYReader.h b/IO/PLY/vtkPLYReader.h index a1c6e2b5..92506219 100644 --- a/IO/PLY/vtkPLYReader.h +++ b/IO/PLY/vtkPLYReader.h @@ -37,6 +37,7 @@ #include "vtkResourceStream.h" // For vtkResourceStream VTK_ABI_NAMESPACE_BEGIN +class vtkPolyData; class vtkStringArray; class VTKIOPLY_EXPORT vtkPLYReader : public vtkAbstractPolyDataReader @@ -117,6 +118,18 @@ class VTKIOPLY_EXPORT vtkPLYReader : public vtkAbstractPolyDataReader vtkPLYReader(const vtkPLYReader&) = delete; void operator=(const vtkPLYReader&) = delete; + /** + * fvtk fast path: bulk-column read of a binary little-endian PLY using the + * vendored miniply parser. Reads only from `FileName` (not stream/string). + * Returns 1 if it fully populated `output`, -1 if the file is outside the + * fast path's narrow envelope (caller must fall back to the legacy reader), + * or 0 on a hard error. The fast path is engaged only when every consumed + * property's stored type equals its VTK destination type, so each value is a + * verbatim little-endian copy and the output is byte-identical to the legacy + * reader (point/face order preserved, polygons not triangulated). + */ + int ReadPLYFast(vtkPolyData* output); + float FaceTextureTolerance; bool DuplicatePointsForFaceTexture; }; diff --git a/fvtk-config/_source_unity_exclude.cmake b/fvtk-config/_source_unity_exclude.cmake index 4a7a4350..dc1aa422 100644 --- a/fvtk-config/_source_unity_exclude.cmake +++ b/fvtk-config/_source_unity_exclude.cmake @@ -94,4 +94,8 @@ set(FVTK_SOURCE_UNITY_SKIP_FILES vtkMINCImageWriter.cxx vtkNIFTIImageWriter.cxx vtkPLYWriter.cxx + # Vendored third-party miniply parser (its own `namespace miniply`, plus + # file-scope helpers); compile standalone so it never shares a batch with the + # IOPLY reader/writer TUs. + miniply.cxx CACHE INTERNAL "fvtk: source .cxx basenames pulled out of UNITY_BUILD batching")