From b7f34dca127acf38bd40050c8284424ed6630561 Mon Sep 17 00:00:00 2001 From: Chris Kaczmarek Date: Fri, 12 Jun 2026 13:18:39 +0200 Subject: [PATCH 1/2] feat: add HDT read/write serialization support Read uses the hdt crate's dictionary iteration mapped into oxrdf quads, with literals parsed via oxrdf Literal::from_str. Co-Authored-By: Claude Fable 5 --- Cargo.lock | 703 ++++++++++++++++++++------- Cargo.toml | 1 + lib/maplib/src/model.rs | 10 + lib/triplestore/Cargo.toml | 1 + lib/triplestore/src/errors.rs | 2 + lib/triplestore/src/triples_read.rs | 227 ++++++--- lib/triplestore/src/triples_write.rs | 45 +- py_maplib/maplib/__init__.pyi | 15 +- py_maplib/src/lib.rs | 39 +- py_maplib/tests/test_hdt.py | 171 +++++++ 10 files changed, 956 insertions(+), 258 deletions(-) create mode 100644 py_maplib/tests/test_hdt.py diff --git a/Cargo.lock b/Cargo.lock index 454a6371..c9f54007 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -83,6 +83,56 @@ dependencies = [ "libc", ] +[[package]] +name = "anstream" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "824a212faf96e9acacdbd09febd34438f8f711fb84e09a8916013cd7815ca28d" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "940b3a0ca603d1eade50a4846a2afffd5ef57a9feac2c0e2ec2e14f9ead76000" + +[[package]] +name = "anstyle-parse" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52ce7f38b242319f7cabaa6813055467063ecdc9d355bbb4ce0c68908cd8130e" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" +dependencies = [ + "windows-sys 0.61.2", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" +dependencies = [ + "anstyle", + "once_cell_polyfill", + "windows-sys 0.61.2", +] + [[package]] name = "anyhow" version = "1.0.102" @@ -100,9 +150,9 @@ dependencies = [ [[package]] name = "ar_archive_writer" -version = "0.5.1" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7eb93bbb63b9c227414f6eb3a0adfddca591a8ce1e9b60661bb08969b87e340b" +checksum = "4087686b4b0a3427190bae57a1d9a478dbb2d40c5dc1bd6e2b6d797913bdd348" dependencies = [ "object", ] @@ -191,7 +241,7 @@ dependencies = [ "arrow-schema", "chrono", "half", - "hashbrown 0.17.0", + "hashbrown 0.17.1", "num-complex 0.4.6", "num-integer", "num-traits", @@ -390,7 +440,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d" dependencies = [ "proc-macro2", - "quote", + "quote 1.0.45", "syn 2.0.117", ] @@ -401,7 +451,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" dependencies = [ "proc-macro2", - "quote", + "quote 1.0.45", "syn 2.0.117", ] @@ -431,9 +481,9 @@ checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" [[package]] name = "autocfg" -version = "1.5.0" +version = "1.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" +checksum = "f2032f911046de80f0a198e0901378627c33f59ea0ac00e363d481118bd70a53" [[package]] name = "base64" @@ -441,6 +491,15 @@ version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" +[[package]] +name = "bincode" +version = "1.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" +dependencies = [ + "serde", +] + [[package]] name = "bincode" version = "2.0.1" @@ -461,15 +520,27 @@ dependencies = [ "virtue", ] +[[package]] +name = "binout" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "222fb4925a15bea6a68075021910e03d6aa2d04951d71ff1d956190a551d738f" + [[package]] name = "bitflags" -version = "2.11.1" +version = "2.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4512299f36f043ab09a583e57bceb5a5aab7a73db1805848e8fef3c9e8c78b3" +checksum = "b4388bee8683e3d04af747c73422af53102d2bd24d9eadb6cbc100baef4b43f8" dependencies = [ "serde_core", ] +[[package]] +name = "bitset-core" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f421f1bcb30aa9d851a03c2920ab5d96ca920d5786645a597b5fc37922f8b89e" + [[package]] name = "bitvec" version = "1.0.1" @@ -507,9 +578,9 @@ dependencies = [ [[package]] name = "block-buffer" -version = "0.12.0" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cdd35008169921d80bc60d3d0ab416eecb028c4cd653352907921d95084790be" +checksum = "d2f6c7dbe95a6ed67ad9f18e57daf93a2f034c524b99fd2b76d18fdfeb6660aa" dependencies = [ "hybrid-array", ] @@ -534,7 +605,7 @@ dependencies = [ "once_cell", "proc-macro-crate", "proc-macro2", - "quote", + "quote 1.0.45", "syn 2.0.117", ] @@ -546,9 +617,9 @@ checksum = "36f64beae40a84da1b4b26ff2761a5b895c12adc41dc25aaee1c4f2bbfe97a6e" [[package]] name = "brotli" -version = "8.0.2" +version = "8.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4bd8b9603c7aa97359dbd97ecf258968c95f3adddd6db2f7e7a5bef101c84560" +checksum = "8119e4516436f5708bbc474a9d395bf12f1b5395e93a92a56e647ac3388c8610" dependencies = [ "alloc-no-stdlib", "alloc-stdlib", @@ -557,9 +628,9 @@ dependencies = [ [[package]] name = "brotli-decompressor" -version = "5.0.0" +version = "5.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "874bb8112abecc98cbd6d81ea4fa7e94fb9449648c93cc89aa40c81c24d7de03" +checksum = "5962523e1b92ce1b5e793d9169b9943eece10d39f62550bc04bb605d75b94924" dependencies = [ "alloc-no-stdlib", "alloc-stdlib", @@ -567,9 +638,9 @@ dependencies = [ [[package]] name = "bumpalo" -version = "3.20.2" +version = "3.20.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb" +checksum = "72f5acc6cb2ba439de613abc23857ec3d78374d8ed5ac84e9d11336e87da8649" [[package]] name = "bytecheck" @@ -589,7 +660,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3db406d29fbcd95542e92559bed4d8ad92636d1ca8b3b72ede10b4bcc010e659" dependencies = [ "proc-macro2", - "quote", + "quote 1.0.45", "syn 1.0.109", ] @@ -609,7 +680,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f9abbd1bc6865053c427f7198e6af43bfdedc55ab791faed4fbd361d789575ff" dependencies = [ "proc-macro2", - "quote", + "quote 1.0.45", "syn 2.0.117", ] @@ -622,6 +693,12 @@ dependencies = [ "serde", ] +[[package]] +name = "bytesize" +version = "2.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6bd91ee7b2422bcb158d90ef4d14f75ef67f340943fc4149891dcce8f8b972a3" + [[package]] name = "castaway" version = "0.2.4" @@ -633,9 +710,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.2.61" +version = "1.2.64" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d16d90359e986641506914ba71350897565610e87ce0ad9e6f28569db3dd5c6d" +checksum = "dad887fd958be91b5098c0248def011f4523ab786cd411be668777e55063501f" dependencies = [ "find-msvc-tools", "jobserver", @@ -668,9 +745,9 @@ dependencies = [ [[package]] name = "chrono" -version = "0.4.44" +version = "0.4.45" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c673075a2e0e5f4a1dde27ce9dee1ea4558c7ffe648f576438a20ca1d2acc4b0" +checksum = "1aa79e62e7697b8e29b513a68abacf485adcd1fe8284a4316c5ae868e6633327" dependencies = [ "iana-time-zone", "js-sys", @@ -724,6 +801,58 @@ dependencies = [ "representation", ] +[[package]] +name = "clap" +version = "4.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ddb117e43bbf7dacf0a4190fef4d345b9bad68dfc649cb349e7d17d28428e51" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "714a53001bf66416adb0e2ef5ac857140e7dc3a0c48fb28b2f10762fc4b5069f" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2ce8604710f6733aa641a2b3731eaa1e8b3d9973d5e3565da11800813f997a9" +dependencies = [ + "heck", + "proc-macro2", + "quote 1.0.45", + "syn 2.0.117", +] + +[[package]] +name = "clap_lex" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8d4a3bb8b1e0c1050499d1815f5ab16d04f0959b233085fb31653fbfc9d98f9" + +[[package]] +name = "co_sort" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5cc18e115ded94ba1e1b820c7631d25b7364e27c25f066ecbce37aaf88abdcf4" + +[[package]] +name = "colorchoice" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d07550c9036bf2ae0c684c4297d503f838287c83c53686d05370d0e139ae570" + [[package]] name = "comfy-table" version = "7.2.2" @@ -737,9 +866,9 @@ dependencies = [ [[package]] name = "compact_str" -version = "0.9.0" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fdb1325a1cece981e8a296ab8f0f9b63ae357bd0784a9faaf548cc7b480707a" +checksum = "9dfdd1c2274d9aa354115b09dc9a901d6c5576818cdf70d14cae2bdb47df00ab" dependencies = [ "castaway", "cfg-if", @@ -759,6 +888,16 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "console_error_panic_hook" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a06aeb73f470f66dcdbf7223caeebb85984942f22f1adb2a088cf9668146bbbc" +dependencies = [ + "cfg-if", + "wasm-bindgen", +] + [[package]] name = "const-oid" version = "0.10.2" @@ -825,6 +964,21 @@ dependencies = [ "libc", ] +[[package]] +name = "crc" +version = "3.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5eb8a2a1cd12ab0d987a5d5e825195d372001a4094a0376319d5a0ad71c1ba0d" +dependencies = [ + "crc-catalog", +] + +[[package]] +name = "crc-catalog" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "217698eaf96b4a3f0bc4f3662aaa55bdf913cd54d7204591faa790070c6d0853" + [[package]] name = "crc32fast" version = "1.5.0" @@ -918,9 +1072,9 @@ dependencies = [ [[package]] name = "crypto-common" -version = "0.2.1" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77727bb15fa921304124b128af125e7e3b968275d1b108b379190264f4423710" +checksum = "ce6e4c961d6cd6c9a86db418387425e8bdeaf05b3c8bc1411e6dca4c252f1453" dependencies = [ "hybrid-array", ] @@ -946,6 +1100,20 @@ dependencies = [ "memchr", ] +[[package]] +name = "dashmap" +version = "6.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6361d5c062261c78a176addb82d4c821ae42bed6089de0e12603cd25de2059c" +dependencies = [ + "cfg-if", + "crossbeam-utils", + "hashbrown 0.14.5", + "lock_api", + "once_cell", + "parking_lot_core", +] + [[package]] name = "datalog" version = "0.1.0" @@ -970,7 +1138,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e567bd82dcff979e4b03460c307b3cdc9e96fde3d73bed1496d2bc75d9dd62a" dependencies = [ "proc-macro2", - "quote", + "quote 1.0.45", "syn 2.0.117", ] @@ -986,13 +1154,13 @@ dependencies = [ [[package]] name = "digest" -version = "0.11.2" +version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4850db49bf08e663084f7fb5c87d202ef91a3907271aff24a94eb97ff039153c" +checksum = "f1dd6dbb5841937940781866fa1281a1ff7bd3bf827091440879f9994983d5c2" dependencies = [ - "block-buffer 0.12.0", + "block-buffer 0.12.1", "const-oid", - "crypto-common 0.2.1", + "crypto-common 0.2.2", ] [[package]] @@ -1006,12 +1174,12 @@ dependencies = [ [[package]] name = "displaydoc" -version = "0.2.5" +version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" +checksum = "1ac70aa55017e108007fbaf5aa0f54b021c98f92ff8af59d42eda9da96e3dd4f" dependencies = [ "proc-macro2", - "quote", + "quote 1.0.45", "syn 2.0.117", ] @@ -1030,11 +1198,38 @@ version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0881ea181b1df73ff77ffaaf9c7544ecc11e82fba9b5f27b262a3c73a332555" +[[package]] +name = "dyn_size_of" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a742b95783b1f45b900129082cbc47717b6a77ee8d17eea70a8ea62462f5de3" + [[package]] name = "either" -version = "1.15.0" +version = "1.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" +checksum = "91622ff5e7162018101f2fea40d6ebf4a78bbe5a49736a2020649edf9693679e" + +[[package]] +name = "env_filter" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32e90c2accc4b07a8456ea0debdc2e7587bdd890680d71173a15d4ae604f6eef" +dependencies = [ + "log", +] + +[[package]] +name = "env_logger" +version = "0.11.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0621c04f2196ac3f488dd583365b9c09be011a4ab8b9f37248ffcc8f6198b56a" +dependencies = [ + "anstream", + "anstyle", + "env_filter", + "log", +] [[package]] name = "equivalent" @@ -1178,6 +1373,12 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "fsum" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f5f673e5179fc055a5cb48fb40fc3f317160598d60c93b0ef8173504117765b0" + [[package]] name = "fts" version = "0.1.0" @@ -1251,7 +1452,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e835b70203e41293343137df5c0664546da5745f82ec9b84d40be8336958447b" dependencies = [ "proc-macro2", - "quote", + "quote 1.0.45", "syn 2.0.117", ] @@ -1294,6 +1495,17 @@ dependencies = [ "version_check", ] +[[package]] +name = "generic-tests" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9ff6d6584f4f6fa911d5e07856abf1a48dc5599b3734f2eaea130f2c3baa989" +dependencies = [ + "proc-macro2", + "quote 1.0.45", + "syn 2.0.117", +] + [[package]] name = "getrandom" version = "0.2.17" @@ -1343,9 +1555,9 @@ checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" [[package]] name = "h2" -version = "0.4.13" +version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f44da3a8150a6703ed5d34e164b875fd14c2cdab9af1252a9a1020bde2bdc54" +checksum = "171fefbc92fe4a4de27e0698d6a5b392d6a0e333506bc49133760b3bcf948733" dependencies = [ "atomic-waker", "bytes", @@ -1393,6 +1605,16 @@ dependencies = [ "ahash 0.7.8", ] +[[package]] +name = "hashbrown" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +dependencies = [ + "ahash 0.8.12", + "allocator-api2", +] + [[package]] name = "hashbrown" version = "0.15.5" @@ -1420,9 +1642,36 @@ dependencies = [ [[package]] name = "hashbrown" -version = "0.17.0" +version = "0.17.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f467dd6dccf739c208452f8014c75c18bb8301b050ad1cfb27153803edb0f51" +checksum = "ed5909b6e89a2db4456e54cd5f673791d7eca6732202bbf2a9cc504fe2f9b84a" + +[[package]] +name = "hdt" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b2b1ea7cbd25071f796251a1081ded2a164dd24e034d5f75342da35c157cf4e" +dependencies = [ + "bitset-core", + "bytesize", + "console_error_panic_hook", + "crc", + "env_logger", + "getrandom 0.2.17", + "getrandom 0.3.4", + "lasso", + "log", + "mem_dbg", + "mownstr", + "ntriple", + "oxttl", + "qwt", + "rayon", + "serde", + "serde_json", + "thiserror", + "wasm-bindgen", +] [[package]] name = "heck" @@ -1453,9 +1702,9 @@ dependencies = [ [[package]] name = "http" -version = "1.4.0" +version = "1.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3ba2a386d7f85a81f119ad7498ebe444d2e22c2af0b86b069416ace48b3311a" +checksum = "6970f50e31d6fc17d3fa27329444bfa74e196cf62e95052a3f6fee181dba6425" dependencies = [ "bytes", "itoa", @@ -1498,18 +1747,18 @@ checksum = "135b12329e5e3ce057a9f972339ea52bc954fe1e9358ef27f95e89716fbc5424" [[package]] name = "hybrid-array" -version = "0.4.11" +version = "0.4.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08d46837a0ed51fe95bd3b05de33cd64a1ee88fc797477ca48446872504507c5" +checksum = "9155a582abd142abc056962c29e3ce5ff2ad5469f4246b537ed42c5deba857da" dependencies = [ "typenum", ] [[package]] name = "hyper" -version = "1.9.0" +version = "1.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6299f016b246a94207e63da54dbe807655bf9e00044f73ded42c3ac5305fbcca" +checksum = "55281c53a1894c864990125767da440a4e630446785086f52523b20033b74498" dependencies = [ "atomic-waker", "bytes", @@ -1715,7 +1964,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d466e9454f08e4a911e14806c24e16fba1b4c121d1ea474396f396069cf949d9" dependencies = [ "equivalent", - "hashbrown 0.17.0", + "hashbrown 0.17.1", "serde", "serde_core", ] @@ -1727,14 +1976,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d98f6fed1fde3f8c21bc40a1abb88dd75e67924f9cffc3ef95607bad8017f8e2" [[package]] -name = "iri-string" -version = "0.7.12" +name = "is_terminal_polyfill" +version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25e659a4bb38e810ebc252e53b5814ff908a8c58c2a9ce2fae1bbec24cbf4e20" -dependencies = [ - "memchr", - "serde", -] +checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" [[package]] name = "itertools" @@ -1763,13 +2008,12 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.95" +version = "0.3.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2964e92d1d9dc3364cae4d718d93f227e3abb088e747d92e0395bfdedf1c12ca" +checksum = "f2025f20d7a4fa7785846e7b63d10a76d3f1cee98ee5cb79ea59703f95e42162" dependencies = [ "cfg-if", "futures-util", - "once_cell", "wasm-bindgen", ] @@ -1779,6 +2023,16 @@ version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "574b0cd5e90ee2ba03a66d0611fc9a09c9a0c28b2ecc2dc8a181dd31a53ca5d7" +[[package]] +name = "lasso" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e14eda50a3494b3bf7b9ce51c52434a761e383d7238ce1dd5dcec2fbc13e9fb" +dependencies = [ + "dashmap", + "hashbrown 0.14.5", +] + [[package]] name = "lazy_static" version = "1.5.0" @@ -1862,9 +2116,9 @@ checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981" [[package]] name = "libmimalloc-sys" -version = "0.1.47" +version = "0.1.49" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d1eacfa31c33ec25e873c136ba5669f00f9866d0688bea7be4d3f7e43067df6" +checksum = "6a45a52f43e1c16f667ccfe4dd8c85b7f7c204fd5e3bf46c5b0db9a5c3c0b8e9" dependencies = [ "cc", ] @@ -1898,9 +2152,9 @@ dependencies = [ [[package]] name = "log" -version = "0.4.29" +version = "0.4.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" +checksum = "953f07c43838f8e6f9758cab68bf5bed85465e7587ebe0b823f1bcd81978ad3a" [[package]] name = "lru-slab" @@ -1980,14 +2234,35 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69b6441f590336821bb897fb28fc622898ccceb1d6cea3fde5ea86b090c4de98" dependencies = [ "cfg-if", - "digest 0.11.2", + "digest 0.11.3", +] + +[[package]] +name = "mem_dbg" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "728cc9dc97593cd22f7bc81fbef70a2d391d7a9a855e7d658b653318124a6cf0" +dependencies = [ + "bitflags", + "mem_dbg-derive", +] + +[[package]] +name = "mem_dbg-derive" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d84f40c93b0508d5565db79a814d02d5b2545967205ce44be211592aafa34d6c" +dependencies = [ + "proc-macro2", + "quote 1.0.45", + "syn 2.0.117", ] [[package]] name = "memchr" -version = "2.8.0" +version = "2.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" +checksum = "88904434abc2901f197fe8cc55f0445e7ded921dba5911dad2e2b39b48e663c4" [[package]] name = "memmap2" @@ -2000,13 +2275,25 @@ dependencies = [ [[package]] name = "mimalloc" -version = "0.1.50" +version = "0.1.52" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3627c4272df786b9260cabaa46aec1d59c93ede723d4c3ef646c503816b0640" +checksum = "2d4139bb28d14ad1facf21d5eb8825051b326e172d216b39f6d31df53cc97862" dependencies = [ "libmimalloc-sys", ] +[[package]] +name = "minimum_redundancy" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a28ed9f799347e3fc3b0cc999332dbba53499551bbcf1070fcc12737645ac05b" +dependencies = [ + "binout", + "co_sort", + "dyn_size_of", + "fsum", +] + [[package]] name = "miniz_oxide" version = "0.8.9" @@ -2019,15 +2306,21 @@ dependencies = [ [[package]] name = "mio" -version = "1.2.0" +version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50b7e5b27aa02a74bac8c3f23f448f8d87ff11f92d3aac1a6ed369ee08cc56c1" +checksum = "02bd0af71c67b473010cbbc60715ee815645a4dc942899111f494b4b737d6fda" dependencies = [ "libc", "wasi", "windows-sys 0.61.2", ] +[[package]] +name = "mownstr" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b33dce847b8623c1f2e473ed3a05e43d0c395e3b93fab62378b6ae94b0a1c42c" + [[package]] name = "ndarray" version = "0.17.2" @@ -2067,6 +2360,15 @@ dependencies = [ "winapi", ] +[[package]] +name = "ntriple" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "020fb7cf74ddf131e4ba84e13221d2493ae4d17cad3982a9158771442d6b0730" +dependencies = [ + "peg 0.5.7", +] + [[package]] name = "nu-ansi-term" version = "0.50.3" @@ -2112,7 +2414,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ed3955f1a9c7c0c15e092f9c887db08b1fc683305fdf6eb6684f22555355e202" dependencies = [ "proc-macro2", - "quote", + "quote 1.0.45", "syn 2.0.117", ] @@ -2194,7 +2496,7 @@ dependencies = [ "itertools", "parking_lot", "percent-encoding", - "quick-xml 0.39.2", + "quick-xml 0.39.4", "rand 0.10.1", "reqwest", "ring", @@ -2216,6 +2518,12 @@ version = "1.21.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50" +[[package]] +name = "once_cell_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" + [[package]] name = "openssl-probe" version = "0.2.1" @@ -2233,9 +2541,9 @@ dependencies = [ [[package]] name = "oxilangtag" -version = "0.1.5" +version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23f3f87617a86af77fa3691e6350483e7154c2ead9f1261b75130e21ca0f8acb" +checksum = "5d3b4eb570abd4a1dcb062c31fd37b832264d9dc7292c3e69acfe926c87b063f" dependencies = [ "serde", ] @@ -2349,11 +2657,26 @@ dependencies = [ "windows-link 0.2.1", ] +[[package]] +name = "paste" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" + +[[package]] +name = "peg" +version = "0.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40df12dde1d836ed2a4c3bfc2799797e3abaf807d97520d28d6e3f3bf41a5f85" +dependencies = [ + "quote 0.3.15", +] + [[package]] name = "peg" -version = "0.8.5" +version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9928cfca101b36ec5163e70049ee5368a8a1c3c6efc9ca9c5f9cc2f816152477" +checksum = "0aad070be5b63aa72103f2fcdd70a83adbd5e90112ce5b574171ff1c65501773" dependencies = [ "peg-macros", "peg-runtime", @@ -2361,20 +2684,20 @@ dependencies = [ [[package]] name = "peg-macros" -version = "0.8.5" +version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6298ab04c202fa5b5d52ba03269fb7b74550b150323038878fe6c372d8280f71" +checksum = "ddd8ef6825cae95355031ae26a99b616a2a21f22ba2de0197c43dfb05acbe7ee" dependencies = [ "peg-runtime", "proc-macro2", - "quote", + "quote 1.0.45", ] [[package]] name = "peg-runtime" -version = "0.8.5" +version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "132dca9b868d927b35b5dd728167b2dee150eb1ad686008fc71ccb298b776fca" +checksum = "7011d97b484a5ebdc4b1fdb3b12d5e4bbbea56e9d22b688f2e79e04b65a7d8a6" [[package]] name = "percent-encoding" @@ -3031,7 +3354,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "590b0a94aa8f97992d52f1198600ecc1c1f7cfa03c1b31cae057143455804ac0" dependencies = [ "argminmax", - "bincode", + "bincode 2.0.1", "bytemuck", "bytes", "compact_str", @@ -3151,7 +3474,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "16b845dbfca988fa33db069c0e230574d15a3088f147a87b64c7589eb662c9ac" dependencies = [ "proc-macro2", - "quote", + "quote 1.0.45", "syn 1.0.109", ] @@ -3225,7 +3548,7 @@ checksum = "df6e520eff47c45997d2fc7dd8214b25dd1310918bbb2642156ef66a67f29813" dependencies = [ "proc-macro2", "pyo3-macros-backend", - "quote", + "quote 1.0.45", "syn 2.0.117", ] @@ -3238,7 +3561,7 @@ dependencies = [ "heck", "proc-macro2", "pyo3-build-config", - "quote", + "quote 1.0.45", "syn 2.0.117", ] @@ -3271,9 +3594,9 @@ dependencies = [ [[package]] name = "quick-xml" -version = "0.39.2" +version = "0.39.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "958f21e8e7ceb5a1aa7fa87fab28e7c75976e0bfe7e23ff069e0a260f894067d" +checksum = "cdcc8dd4e2f670d309a5f0e83fe36dfdc05af317008fea29144da1a2ac858e5e" dependencies = [ "memchr", "serde", @@ -3344,6 +3667,12 @@ dependencies = [ "windows-sys 0.60.2", ] +[[package]] +name = "quote" +version = "0.3.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a6e920b65c65f10b2ae65c831a81a073a89edd28c7cce89475bff467ab4167a" + [[package]] name = "quote" version = "1.0.45" @@ -3353,6 +3682,24 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "qwt" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1518389953b62e3b4bac17d8edc67e8291ec040523ab682479d8c706efc0b7b" +dependencies = [ + "bincode 1.3.3", + "clap", + "generic-tests", + "mem_dbg", + "minimum_redundancy", + "num-traits", + "paste", + "rand 0.8.6", + "serde", + "serde-big-array", +] + [[package]] name = "r-efi" version = "5.3.0" @@ -3508,7 +3855,7 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b" dependencies = [ - "quote", + "quote 1.0.45", "syn 2.0.117", ] @@ -3537,15 +3884,15 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b7186006dcb21920990093f30e3dea63b7d6e977bf1256be20c3563a5db070da" dependencies = [ "proc-macro2", - "quote", + "quote 1.0.45", "syn 2.0.117", ] [[package]] name = "regex" -version = "1.12.3" +version = "1.12.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276" +checksum = "f1292b7759ae1cb9ec195452d1390a074f0cd8541ab7a5a8c31cd6db45d4a6ba" dependencies = [ "aho-corasick", "memchr", @@ -3566,9 +3913,9 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.8.10" +version = "0.8.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a" +checksum = "d6f6ff9a378485b298a5286656da665ba74413d36db0979633275d2e708145d4" [[package]] name = "rend" @@ -3684,7 +4031,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "84d7b42d4b8d06048d3ac8db0eb31bcb942cbeb709f0b5f2b2ebde398d3038f5" dependencies = [ "proc-macro2", - "quote", + "quote 1.0.45", "syn 1.0.109", ] @@ -3709,9 +4056,9 @@ dependencies = [ [[package]] name = "rust_decimal" -version = "1.41.0" +version = "1.42.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2ce901f9a19d251159075a4c37af514c3b8ef99c22e02dd8c19161cf397ee94a" +checksum = "be2a24f50780bc85f09cc6ac299bdf1424302742d77221106859c9d8b102126a" dependencies = [ "arrayvec", "borsh", @@ -3754,9 +4101,9 @@ dependencies = [ [[package]] name = "rustls" -version = "0.23.39" +version = "0.23.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c2c118cb077cca2822033836dfb1b975355dfb784b5e8da48f7b6c5db74e60e" +checksum = "ef86cd5876211988985292b91c96a8f2d298df24e75989a43a3c73f2d4d8168b" dependencies = [ "once_cell", "ring", @@ -3768,9 +4115,9 @@ dependencies = [ [[package]] name = "rustls-native-certs" -version = "0.8.3" +version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "612460d5f7bea540c490b2b6395d8e34a953e52b491accd6c86c8164c5932a63" +checksum = "dab5152771c58876a2146916e53e35057e1a4dfa2b9df0f0305b07f611fdea4d" dependencies = [ "openssl-probe", "rustls-pki-types", @@ -3886,6 +4233,15 @@ dependencies = [ "serde_derive", ] +[[package]] +name = "serde-big-array" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "11fc7cc2c76d73e0f27ee52abbd64eec84d46f370c88371120433196934e4b7f" +dependencies = [ + "serde", +] + [[package]] name = "serde_core" version = "1.0.228" @@ -3902,15 +4258,15 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" dependencies = [ "proc-macro2", - "quote", + "quote 1.0.45", "syn 2.0.117", ] [[package]] name = "serde_json" -version = "1.0.149" +version = "1.0.150" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" +checksum = "e8014e44b4736ed0538adeecded0fce2a272f22dc9578a7eb6b2d9993c74cfb9" dependencies = [ "itoa", "memchr", @@ -3950,7 +4306,7 @@ checksum = "aacc4cc499359472b4abe1bf11d0b12e688af9a805fa5e3016f9a386dc2d0214" dependencies = [ "cfg-if", "cpufeatures 0.3.0", - "digest 0.11.2", + "digest 0.11.3", ] [[package]] @@ -3993,9 +4349,9 @@ dependencies = [ [[package]] name = "shlex" -version = "1.3.0" +version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" +checksum = "f8fadd59c855ef2080decdef8ff161eb6661b86933c9d82e5ba29dc602a55aba" [[package]] name = "signal-hook" @@ -4047,9 +4403,9 @@ checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e" [[package]] name = "siphasher" -version = "1.0.2" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2aa850e253778c88a04c3d7323b043aeda9d3e30d5971937c1855769763678e" +checksum = "8ee5873ec9cce0195efcb7a4e9507a04cd49aec9c83d0389df45b1ef7ba2e649" [[package]] name = "slab" @@ -4068,9 +4424,9 @@ dependencies = [ [[package]] name = "smallvec" -version = "1.15.1" +version = "1.15.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" +checksum = "8ed6a63f02c8539c91a8685a86f4099661ba3da017932f6ebbea6de3f0fa7c90" [[package]] name = "snap" @@ -4080,9 +4436,9 @@ checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b" [[package]] name = "socket2" -version = "0.6.3" +version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a766e1110788c36f4fa1c2b71b387a7815aa65f88ce0229841826633d93723e" +checksum = "52d1cfed4120b4d927bf7c0f86d2087a4a7d6027c906d9f9d525a80573b9be51" dependencies = [ "libc", "windows-sys 0.61.2", @@ -4108,7 +4464,7 @@ dependencies = [ "oxilangtag", "oxiri", "oxrdf", - "peg", + "peg 0.8.6", "rand 0.10.1", "thiserror", ] @@ -4146,7 +4502,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "028e551d5e270b31b9f3ea271778d9d827148d4287a5d96167b6bb9787f5cc38" dependencies = [ "proc-macro2", - "quote", + "quote 1.0.45", "syn 2.0.117", ] @@ -4196,6 +4552,12 @@ version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fe895eb47f22e2ddd4dabc02bce419d2e643c8e3b585c78158b349195bc24d82" +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + [[package]] name = "strum_macros" version = "0.27.2" @@ -4204,7 +4566,7 @@ checksum = "7695ce3845ea4b33927c055a39dc438a45b059f7c1b3d91d38d10355fb8cbca7" dependencies = [ "heck", "proc-macro2", - "quote", + "quote 1.0.45", "syn 2.0.117", ] @@ -4221,7 +4583,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" dependencies = [ "proc-macro2", - "quote", + "quote 1.0.45", "unicode-ident", ] @@ -4232,7 +4594,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" dependencies = [ "proc-macro2", - "quote", + "quote 1.0.45", "unicode-ident", ] @@ -4252,7 +4614,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" dependencies = [ "proc-macro2", - "quote", + "quote 1.0.45", "syn 2.0.117", ] @@ -4289,7 +4651,7 @@ dependencies = [ "oxilangtag", "oxiri", "oxrdf", - "peg", + "peg 0.8.6", "pyo3", "representation", "spargebra", @@ -4314,7 +4676,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" dependencies = [ "proc-macro2", - "quote", + "quote 1.0.45", "syn 2.0.117", ] @@ -4383,9 +4745,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.52.1" +version = "1.52.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b67dee974fe86fd92cc45b7a95fdd2f99a36a6d7b0d431a231178d3d670bbcc6" +checksum = "8fc7f01b389ac15039e4dc9531aa973a135d7a4135281b12d7c1bc79fd57fffe" dependencies = [ "bytes", "libc", @@ -4403,7 +4765,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "385a6cb71ab9ab790c5fe8d67f1645e6c450a7ce006a33de03daa956cf70a496" dependencies = [ "proc-macro2", - "quote", + "quote 1.0.45", "syn 2.0.117", ] @@ -4441,9 +4803,9 @@ dependencies = [ [[package]] name = "toml_edit" -version = "0.25.11+spec-1.1.0" +version = "0.25.12+spec-1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b59c4d22ed448339746c59b905d24568fcbb3ab65a500494f7b8c3e97739f2b" +checksum = "d2153edc6955a6c354fad8f5efd38b6a8769bdccf9fe50f8e1329f81b0baa5d7" dependencies = [ "indexmap", "toml_datetime", @@ -4477,20 +4839,20 @@ dependencies = [ [[package]] name = "tower-http" -version = "0.6.8" +version = "0.6.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4e6559d53cc268e5031cd8429d05415bc4cb4aefc4aa5d6cc35fbf5b924a1f8" +checksum = "4cfcf7e2740e6fc6d4d688b4ef00650406bb94adf4731e43c096c3a19fe40840" dependencies = [ "bitflags", "bytes", "futures-util", "http", "http-body", - "iri-string", "pin-project-lite", "tower", "tower-layer", "tower-service", + "url", ] [[package]] @@ -4524,7 +4886,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da" dependencies = [ "proc-macro2", - "quote", + "quote 1.0.45", "syn 2.0.117", ] @@ -4575,6 +4937,7 @@ dependencies = [ "cimxml_import", "file_io", "fts", + "hdt", "itoa", "memmap2", "ordered-float", @@ -4608,9 +4971,9 @@ checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" [[package]] name = "typenum" -version = "1.20.0" +version = "1.20.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40ce102ab67701b8526c123c1bab5cbe42d7040ccfd0f64af1a385808d2f43de" +checksum = "b6f5e870be6c3b371b77fe0ee0bafb859fa4964b4404c27de1d380043c4dda20" [[package]] name = "unicode-ident" @@ -4638,9 +5001,9 @@ dependencies = [ [[package]] name = "unicode-segmentation" -version = "1.13.2" +version = "1.13.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9629274872b2bfaf8d66f5f15725007f635594914870f65218920345aa11aa8c" +checksum = "c6f5d3c3b1bf09027a88a6bc961fc00497d651009560b5463668dc81b0fa87a8" [[package]] name = "unicode-width" @@ -4690,6 +5053,12 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + [[package]] name = "utils" version = "0.1.0" @@ -4702,9 +5071,9 @@ dependencies = [ [[package]] name = "uuid" -version = "1.23.1" +version = "1.23.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ddd74a9687298c6858e9b88ec8935ec45d22e8fd5e6394fa1bd4e99a87789c76" +checksum = "144d6b123cef80b301b8f72a9e2ca4370ddec21950d0a103dd22c437006d2db7" dependencies = [ "getrandom 0.4.2", "js-sys", @@ -4814,9 +5183,9 @@ dependencies = [ [[package]] name = "wasm-bindgen" -version = "0.2.118" +version = "0.2.123" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0bf938a0bacb0469e83c1e148908bd7d5a6010354cf4fb73279b7447422e3a89" +checksum = "a254a4b10c19a76f09a27640e7ffbf9bc30bf67e16a3bf28aaefa4920fe81563" dependencies = [ "cfg-if", "once_cell", @@ -4828,9 +5197,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-futures" -version = "0.4.68" +version = "0.4.73" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f371d383f2fb139252e0bfac3b81b265689bf45b6874af544ffa4c975ac1ebf8" +checksum = "54568702fabf5d4849ce2b90fadfa64168a097eaf4b351ce9df8b687a0086aaf" dependencies = [ "js-sys", "wasm-bindgen", @@ -4838,32 +5207,32 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.118" +version = "0.2.123" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eeff24f84126c0ec2db7a449f0c2ec963c6a49efe0698c4242929da037ca28ed" +checksum = "24a40fc75b0ec6f3746ceb10d36f53a93dcd68a93b11b6445983945d79eba0dc" dependencies = [ - "quote", + "quote 1.0.45", "wasm-bindgen-macro-support", ] [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.118" +version = "0.2.123" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d08065faf983b2b80a79fd87d8254c409281cf7de75fc4b773019824196c904" +checksum = "908f34bd9b9ce3d4caf07b72dfab63d61504d156856c6bd3cd87fa350cf3985b" dependencies = [ "bumpalo", "proc-macro2", - "quote", + "quote 1.0.45", "syn 2.0.117", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.118" +version = "0.2.123" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5fd04d9e306f1907bd13c6361b5c6bfc7b3b3c095ed3f8a9246390f8dbdee129" +checksum = "7acbf7616c27b194bbb550bf77ed0c2c3e5b7fd1260a93082b95fb7f47959b92" dependencies = [ "unicode-ident", ] @@ -4917,9 +5286,9 @@ dependencies = [ [[package]] name = "web-sys" -version = "0.3.95" +version = "0.3.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f2dfbb17949fa2088e5d39408c48368947b86f7834484e87b73de55bc14d97d" +checksum = "6e0871acf327f283dc6da28a1696cdc64fb355ba9f935d052021fa77f35cce69" dependencies = [ "js-sys", "wasm-bindgen", @@ -5032,7 +5401,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" dependencies = [ "proc-macro2", - "quote", + "quote 1.0.45", "syn 2.0.117", ] @@ -5043,7 +5412,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" dependencies = [ "proc-macro2", - "quote", + "quote 1.0.45", "syn 2.0.117", ] @@ -5281,9 +5650,9 @@ checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" [[package]] name = "winnow" -version = "1.0.2" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2ee1708bef14716a11bae175f579062d4554d95be2c6829f518df847b7b3fdd0" +checksum = "0592e1c9d151f854e6fd382574c3a0855250e1d9b2f99d9281c6e6391af352f1" dependencies = [ "memchr", ] @@ -5339,7 +5708,7 @@ dependencies = [ "anyhow", "prettyplease", "proc-macro2", - "quote", + "quote 1.0.45", "syn 2.0.117", "wit-bindgen-core", "wit-bindgen-rust", @@ -5405,9 +5774,9 @@ checksum = "fdd20c5420375476fbd4394763288da7eb0cc0b8c11deed431a91562af7335d3" [[package]] name = "yoke" -version = "0.8.2" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "abe8c5fda708d9ca3df187cae8bfb9ceda00dd96231bed36e445a1a48e66f9ca" +checksum = "709fe23a0424b6a435d82152b1bd3fdfb0833487d5fa90d05d42762a9891fef5" dependencies = [ "stable_deref_trait", "yoke-derive", @@ -5421,36 +5790,36 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "de844c262c8848816172cef550288e7dc6c7b7814b4ee56b3e1553f275f1858e" dependencies = [ "proc-macro2", - "quote", + "quote 1.0.45", "syn 2.0.117", "synstructure", ] [[package]] name = "zerocopy" -version = "0.8.48" +version = "0.8.52" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eed437bf9d6692032087e337407a86f04cd8d6a16a37199ed57949d415bd68e9" +checksum = "ce1022995ff5ff5d841ad7d994facc23098cd40152f2c1d11cd607c6f530653f" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.48" +version = "0.8.52" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70e3cd084b1788766f53af483dd21f93881ff30d7320490ec3ef7526d203bad4" +checksum = "1ae7f38b72ec2a254e2b87ef277cf2cd4fb97cbebf944faa6f33354da0867930" dependencies = [ "proc-macro2", - "quote", + "quote 1.0.45", "syn 2.0.117", ] [[package]] name = "zerofrom" -version = "0.1.7" +version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69faa1f2a1ea75661980b013019ed6687ed0e83d069bc1114e2cc74c6c04c4df" +checksum = "0ec05a11813ea801ff6d75110ad09cd0824ddba17dfe17128ea0d5f68e6c5272" dependencies = [ "zerofrom-derive", ] @@ -5462,7 +5831,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "11532158c46691caf0f2593ea8358fed6bbf68a0315e80aae9bd41fbade684a1" dependencies = [ "proc-macro2", - "quote", + "quote 1.0.45", "syn 2.0.117", "synstructure", ] @@ -5502,7 +5871,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "625dc425cab0dca6dc3c3319506e6593dcb08a9f387ea3b284dbd52a92c40555" dependencies = [ "proc-macro2", - "quote", + "quote 1.0.45", "syn 2.0.117", ] diff --git a/Cargo.toml b/Cargo.toml index 2090c4d7..a2808b1f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -61,6 +61,7 @@ peg = "0.8" rand = "0.10.1" oxilangtag = "0.1.5" fundu = "2.0.1" +hdt = { version = "0.6.0", default-features = false, features = ["nt"] } memmap2 = "0.9.5" sprs = "0.11.3" walkdir = "2.5.0" diff --git a/lib/maplib/src/model.rs b/lib/maplib/src/model.rs index 2a38adef..c8760683 100644 --- a/lib/maplib/src/model.rs +++ b/lib/maplib/src/model.rs @@ -473,6 +473,16 @@ impl Model { Ok(()) } + pub fn write_hdt( + &mut self, + buffer: &mut W, + graph: &NamedGraph, + ) -> Result<(), MaplibError> { + self.triplestore + .write_hdt(buffer, graph) + .map_err(MaplibError::TriplestoreError) + } + pub fn write_cim_xml( &mut self, buffer: &mut W, diff --git a/lib/triplestore/Cargo.toml b/lib/triplestore/Cargo.toml index 3c0c0410..e1b68669 100644 --- a/lib/triplestore/Cargo.toml +++ b/lib/triplestore/Cargo.toml @@ -29,6 +29,7 @@ uuid.workspace = true thiserror.workspace = true oxrdfio.workspace = true oxttl.workspace = true +hdt.workspace = true memmap2.workspace = true sparesults.workspace = true tracing.workspace = true diff --git a/lib/triplestore/src/errors.rs b/lib/triplestore/src/errors.rs index 5c92775f..4dc0ff7b 100644 --- a/lib/triplestore/src/errors.rs +++ b/lib/triplestore/src/errors.rs @@ -60,6 +60,8 @@ pub enum TriplestoreError { DecodeError(PolarsError), #[error("Error collecting lazy triples {0}")] LazyLoadError(PolarsError), + #[error("HDT error: {0}")] + HDTError(String), } impl From> for TriplestoreError { diff --git a/lib/triplestore/src/triples_read.rs b/lib/triplestore/src/triples_read.rs index 5503b317..80edd4ad 100644 --- a/lib/triplestore/src/triples_read.rs +++ b/lib/triplestore/src/triples_read.rs @@ -4,8 +4,9 @@ use crate::TriplesToAdd; use std::cmp; use cimxml_import::{fix_cim_quad, Remapper}; +use hdt::Hdt; use memmap2::MmapOptions; -use oxrdf::{BlankNode, GraphName, NamedNode, NamedOrBlankNode, Quad, Term, Triple}; +use oxrdf::{BlankNode, GraphName, Literal, NamedNode, NamedOrBlankNode, Quad, Term, Triple}; use oxrdfio::{ JsonLdProfileSet, LoadedDocument, RdfFormat, RdfParser, RdfSyntaxError, SliceQuadParser, }; @@ -25,7 +26,10 @@ use representation::{ use representation::{OBJECT_COL_NAME, SUBJECT_COL_NAME}; use std::collections::HashMap; use std::fs::File; +use std::io::Cursor; use std::path::Path; +use std::str::FromStr; +use std::sync::Arc; use std::time::Instant; use tracing::{debug, instrument}; @@ -35,6 +39,7 @@ const UTF8_BOM: [u8; 3] = [0xEF, 0xBB, 0xBF]; pub enum ExtendedRdfFormat { Normal(RdfFormat), CIMXML, + HDT, } impl Triplestore { @@ -70,6 +75,8 @@ impl Triplestore { ExtendedRdfFormat::Normal(RdfFormat::JsonLd { profile: JsonLdProfileSet::empty(), }) + } else if path.extension() == Some("hdt".as_ref()) { + ExtendedRdfFormat::HDT } else { todo!("Have not implemented file format {:?}", path); }; @@ -152,84 +159,23 @@ impl Triplestore { } else { matches!(rdf_format, ExtendedRdfFormat::Normal(RdfFormat::NTriples)) }; - let mut readers = if matches!( - rdf_format, - ExtendedRdfFormat::Normal(RdfFormat::NTriples) - | ExtendedRdfFormat::Normal(RdfFormat::Turtle) - ) && parallel - { - let threads = if let Ok(threads) = std::thread::available_parallelism() { - threads.get() - } else { - 1 - }; - - let mut readers = vec![]; - if rdf_format == ExtendedRdfFormat::Normal(RdfFormat::Turtle) { - let mut parser = TurtleParser::new(); - for (k, v) in prefixes { - parser = parser.with_prefix(k, v.as_str()).unwrap(); - } - if !checked { - parser = parser.lenient(); - } - if let Some(base_iri) = base_iri { - parser = parser.with_base_iri(base_iri).unwrap(); - } - for r in parser.split_slice_for_parallel_parsing(use_slice, threads) { - readers.push(MyFromSliceQuadReader { - parser: MyFromSliceQuadReaderKind::TurtlePar(r), - }); - } - } else if rdf_format == ExtendedRdfFormat::Normal(RdfFormat::NTriples) { - let mut parser = NTriplesParser::new(); - if !checked { - parser = parser.lenient(); - } - for r in parser.split_slice_for_parallel_parsing(use_slice, threads) { - readers.push(MyFromSliceQuadReader { - parser: MyFromSliceQuadReaderKind::NTriplesPar(r), - }); - } - } - readers + let hdt; + let mut readers = if rdf_format == ExtendedRdfFormat::HDT { + hdt = Hdt::read(Cursor::new(use_slice)) + .map_err(|e| TriplestoreError::HDTError(e.to_string()))?; + vec![hdt_reader(&hdt)] + } else if parallel && rdf_format == ExtendedRdfFormat::Normal(RdfFormat::Turtle) { + parallel_turtle_readers(use_slice, prefixes, checked, base_iri) + } else if parallel && rdf_format == ExtendedRdfFormat::Normal(RdfFormat::NTriples) { + parallel_ntriples_readers(use_slice, checked) } else { - let use_format = match rdf_format { - ExtendedRdfFormat::Normal(n) => n, - ExtendedRdfFormat::CIMXML => RdfFormat::RdfXml, - }; - let mut parser = RdfParser::from(use_format.clone()); - if !checked { - parser = parser.lenient(); - } - if let Some(base_iri) = &base_iri { - parser = parser.with_base_iri(base_iri).unwrap(); - } - let mut for_slice = parser.for_slice(use_slice); - if matches!(use_format, RdfFormat::JsonLd { .. }) { - for_slice = for_slice.with_document_loader(move |url| { - if let Some(doc) = known_contexts.get(url) { - Ok(LoadedDocument { - url: url.to_string(), - content: doc.clone().into_bytes(), - format: RdfFormat::JsonLd { - profile: JsonLdProfileSet::empty(), - }, - }) - } else { - Err(Box::new(TriplestoreError::MissingContext(url.to_string()))) - } - }); - } - if matches!(rdf_format, ExtendedRdfFormat::CIMXML) { - vec![MyFromSliceQuadReader { - parser: MyFromSliceQuadReaderKind::CIMXML(for_slice, base_iri.clone()), - }] - } else { - vec![MyFromSliceQuadReader { - parser: MyFromSliceQuadReaderKind::Other(for_slice), - }] - } + vec![rdf_parser_reader( + use_slice, + &rdf_format, + checked, + base_iri, + known_contexts, + )] }; debug!("Effective parallelization for reading is {}", readers.len()); @@ -508,6 +454,101 @@ fn create_predicate_map<'a>( Ok((out_r, graph_predicate_map)) } +fn hdt_reader(hdt: &Hdt) -> MyFromSliceQuadReader<'_> { + MyFromSliceQuadReader { + parser: MyFromSliceQuadReaderKind::HDT(Box::new( + hdt.triples_all().map(hdt_string_triple_to_quad), + )), + } +} + +fn parallel_turtle_readers<'a>( + slice: &'a [u8], + prefixes: &HashMap, + checked: bool, + base_iri: Option, +) -> Vec> { + let threads = std::thread::available_parallelism() + .map(|t| t.get()) + .unwrap_or(1); + let mut parser = TurtleParser::new(); + for (k, v) in prefixes { + parser = parser.with_prefix(k, v.as_str()).unwrap(); + } + if !checked { + parser = parser.lenient(); + } + if let Some(base_iri) = base_iri { + parser = parser.with_base_iri(base_iri).unwrap(); + } + parser + .split_slice_for_parallel_parsing(slice, threads) + .into_iter() + .map(|r| MyFromSliceQuadReader { + parser: MyFromSliceQuadReaderKind::TurtlePar(r), + }) + .collect() +} + +fn parallel_ntriples_readers(slice: &[u8], checked: bool) -> Vec> { + let threads = std::thread::available_parallelism() + .map(|t| t.get()) + .unwrap_or(1); + let mut parser = NTriplesParser::new(); + if !checked { + parser = parser.lenient(); + } + parser + .split_slice_for_parallel_parsing(slice, threads) + .into_iter() + .map(|r| MyFromSliceQuadReader { + parser: MyFromSliceQuadReaderKind::NTriplesPar(r), + }) + .collect() +} + +fn rdf_parser_reader<'a>( + slice: &'a [u8], + rdf_format: &ExtendedRdfFormat, + checked: bool, + base_iri: Option, + known_contexts: HashMap, +) -> MyFromSliceQuadReader<'a> { + let use_format = match rdf_format { + ExtendedRdfFormat::Normal(n) => *n, + ExtendedRdfFormat::CIMXML => RdfFormat::RdfXml, + ExtendedRdfFormat::HDT => unreachable!("HDT is handled in read_triples"), + }; + let mut parser = RdfParser::from(use_format); + if !checked { + parser = parser.lenient(); + } + if let Some(base_iri) = &base_iri { + parser = parser.with_base_iri(base_iri).unwrap(); + } + let mut for_slice = parser.for_slice(slice); + if matches!(use_format, RdfFormat::JsonLd { .. }) { + for_slice = for_slice.with_document_loader(move |url| { + let Some(doc) = known_contexts.get(url) else { + return Err(Box::new(TriplestoreError::MissingContext(url.to_string()))); + }; + Ok(LoadedDocument { + url: url.to_string(), + content: doc.clone().into_bytes(), + format: RdfFormat::JsonLd { + profile: JsonLdProfileSet::empty(), + }, + }) + }); + } + let parser = if matches!(rdf_format, ExtendedRdfFormat::CIMXML) { + MyFromSliceQuadReaderKind::CIMXML(for_slice, base_iri) + } else { + MyFromSliceQuadReaderKind::Other(for_slice) + }; + MyFromSliceQuadReader { parser } +} + //Adapted from proposed change to https://github.com/oxigraph/ #[must_use] pub struct MyFromSliceQuadReader<'a> { @@ -519,6 +560,7 @@ pub enum MyFromSliceQuadReaderKind<'a> { CIMXML(SliceQuadParser<'a>, Option), TurtlePar(SliceTurtleParser<'a>), NTriplesPar(SliceNTriplesParser<'a>), + HDT(Box + Send + 'a>), } impl Iterator for MyFromSliceQuadReader<'_> { @@ -545,10 +587,39 @@ impl Iterator for MyFromSliceQuadReader<'_> { Ok(triple) => Ok(triple.in_graph(GraphName::default())), Err(e) => Err(e.into()), }, + MyFromSliceQuadReaderKind::HDT(iter) => Ok(iter.next()?), }) } } +fn hdt_string_triple_to_quad(t: [Arc; 3]) -> Quad { + let [s, p, o] = t; + let subject = if let Some(label) = s.strip_prefix("_:") { + NamedOrBlankNode::BlankNode(BlankNode::new_unchecked(label)) + } else { + NamedOrBlankNode::NamedNode(NamedNode::new_unchecked(s.as_ref())) + }; + let predicate = NamedNode::new_unchecked(p.as_ref()); + Quad::new( + subject, + predicate, + hdt_object_string_to_term(&o), + GraphName::DefaultGraph, + ) +} + +fn hdt_object_string_to_term(o: &str) -> Term { + if o.starts_with('"') { + return Literal::from_str(o) + .unwrap_or_else(|_| Literal::new_simple_literal(o)) + .into(); + } + if let Some(label) = o.strip_prefix("_:") { + return BlankNode::new_unchecked(label).into(); + } + NamedNode::new_unchecked(o).into() +} + fn get_or_insert_dt( base_rdfnode_type_ref: BaseRDFNodeTypeRef, type_map: &mut HashMap, diff --git a/lib/triplestore/src/triples_write.rs b/lib/triplestore/src/triples_write.rs index 619bec6f..6ede13d6 100644 --- a/lib/triplestore/src/triples_write.rs +++ b/lib/triplestore/src/triples_write.rs @@ -1,5 +1,6 @@ use super::Triplestore; use crate::errors::TriplestoreError; +use hdt::Hdt; use oxrdf::NamedNode; use oxrdfio::{RdfFormat, RdfSerializer}; use polars::prelude::{by_name, col}; @@ -15,7 +16,9 @@ use representation::{ LANG_STRING_LANG_FIELD, LANG_STRING_VALUE_FIELD, OBJECT_COL_NAME, SUBJECT_COL_NAME, }; use std::collections::HashMap; -use std::io::Write; +use std::fs::File; +use std::io::{BufWriter, Write}; +use std::path::Path; mod fast_ntriples; mod pretty_turtle; @@ -132,6 +135,38 @@ impl Triplestore { Ok(()) } + pub fn write_hdt( + &mut self, + buf: &mut W, + graph: &NamedGraph, + ) -> Result<(), TriplestoreError> { + self.check_graph_exists(graph)?; + // The hdt crate can only build an HDT dataset from an N-Triples file, + // so serialize to a temporary file first and convert it. + let tmp_path = std::env::temp_dir().join(format!("maplib_hdt_{}.nt", uuid::Uuid::new_v4())); + let _cleanup = RemoveFileOnDrop(tmp_path.clone()); + self.write_hdt_via_ntriples(buf, graph, &tmp_path) + } + + fn write_hdt_via_ntriples( + &mut self, + buf: &mut W, + graph: &NamedGraph, + tmp_path: &Path, + ) -> Result<(), TriplestoreError> { + let file = File::create(tmp_path).map_err(|e| TriplestoreError::HDTError(e.to_string()))?; + let mut writer = BufWriter::new(file); + self.write_triples(&mut writer, RdfFormat::NTriples, graph, &HashMap::new())?; + writer + .into_inner() + .map_err(|e| TriplestoreError::HDTError(e.to_string()))?; + let hdt = Hdt::read_nt(tmp_path).map_err(|e| TriplestoreError::HDTError(e.to_string()))?; + let mut buf = BufWriter::new(buf); + hdt.write(&mut buf) + .map_err(|e| TriplestoreError::HDTError(e.to_string()))?; + Ok(()) + } + pub(crate) fn check_graph_exists(&self, graph: &NamedGraph) -> Result<(), TriplestoreError> { if !self.graph_triples_map.contains_key(graph) { Err(TriplestoreError::GraphDoesNotExist(graph.to_string())) @@ -141,6 +176,14 @@ impl Triplestore { } } +struct RemoveFileOnDrop(std::path::PathBuf); + +impl Drop for RemoveFileOnDrop { + fn drop(&mut self) { + let _ = std::fs::remove_file(&self.0); + } +} + pub fn convert_datelike_to_string(df: &mut DataFrame, c: &str) { match df.column(c).unwrap().dtype() { DataType::Date => { diff --git a/py_maplib/maplib/__init__.pyi b/py_maplib/maplib/__init__.pyi index b3260b0f..8941b1d1 100644 --- a/py_maplib/maplib/__init__.pyi +++ b/py_maplib/maplib/__init__.pyi @@ -828,7 +828,7 @@ class Model: def read( self, file_path: Union[str, Path], - format: LiteralType["ntriples", "turtle", "rdf/xml", "cim/xml", "json-ld"] = None, + format: LiteralType["ntriples", "turtle", "rdf/xml", "cim/xml", "json-ld", "hdt"] = None, base_iri: str = None, transient: bool = False, parallel: bool = None, @@ -849,7 +849,7 @@ class Model: >>> m.read("my_triples.ttl") :param file_path: The path of the file containing triples - :param format: One of "ntriples", "turtle", "rdf/xml", "json-ld" or "cim/xml", otherwise it is inferred from the file extension. + :param format: One of "ntriples", "turtle", "rdf/xml", "json-ld", "cim/xml" or "hdt", otherwise it is inferred from the file extension. :param base_iri: Base iri :param transient: Should these triples be included when writing the graph to the file system? :param parallel: Parse triples in parallel, currently only NTRiples and Turtle. Assumes all prefixes are in the beginning of the document. Defaults to true only for NTriples. @@ -954,7 +954,7 @@ class Model: def write( self, file_path: Union[str, Path], - format=LiteralType["ntriples", "turtle", "rdf/xml"], + format=LiteralType["ntriples", "turtle", "rdf/xml", "hdt"], graph: str = None, prefixes: Dict[str, str] = None, ) -> None: @@ -966,7 +966,8 @@ class Model: >>> m.write("my_triples.nt", format="ntriples") :param file_path: The path of the file containing triples - :param format: One of "ntriples", "turtle", "rdf/xml". + :param format: One of "ntriples", "turtle", "rdf/xml", "hdt". + HDT is written via a temporary N-Triples file; literals with special characters are stored N-Triples-escaped, following the Rust hdt crate. :param graph: The IRI of the graph to write. :param prefixes: The prefixes that will be used in turtle serialization. """ @@ -1075,8 +1076,8 @@ class Model: def size(self, graph:str=None) -> int: """ - Get the number of triples in a graph. - + Get the number of triples in a graph. + :param graph: The named graph we are returning the size for :return: The inferred N-Tuples. """ @@ -1144,4 +1145,4 @@ class VirtualizedDatabase: :param:database: An instance of a class containing a query method. :param:resource_sql_map: A dict providing a sqlalchemy Select for each resource. :param:sql_dialect: The SQL dialect accepted by the query method. - """ \ No newline at end of file + """ diff --git a/py_maplib/src/lib.rs b/py_maplib/src/lib.rs index ad01c3b4..5097f647 100644 --- a/py_maplib/src/lib.rs +++ b/py_maplib/src/lib.rs @@ -1549,6 +1549,12 @@ fn reads_mutex( let graph = parse_optional_named_node(graph)?; let named_graph = NamedGraph::from_maybe_named_node(graph.as_ref()); let format = resolve_format(format).map_err(PyMaplibError::from)?; + if format == ExtendedRdfFormat::HDT { + return Err(PyMaplibError::FunctionArgumentError( + "HDT is a binary format, use read() instead of reads()".to_string(), + ) + .into()); + } inner .reads( s, @@ -1574,18 +1580,34 @@ fn write_triples_mutex( prefixes: Option>, ) -> PyResult<()> { let format = if let Some(format) = format { - resolve_normal_format(&format).map_err(PyMaplibError::from)? + resolve_format(&format).map_err(PyMaplibError::from)? } else { - RdfFormat::NTriples + ExtendedRdfFormat::Normal(RdfFormat::NTriples) }; + if format == ExtendedRdfFormat::CIMXML { + return Err(PyMaplibError::FunctionArgumentError( + "Use write_cim_xml to write CIM XML".to_string(), + ) + .into()); + } let path_buf = PathBuf::from(file_path); let mut actual_file = File::create(path_buf.as_path()) .map_err(|x| PyMaplibError::from(MaplibError::FileCreateIOError(x)))?; let graph = parse_optional_named_node(graph)?; let named_graph = NamedGraph::from_maybe_named_node(graph.as_ref()); - inner - .write_triples(&mut actual_file, &named_graph, format, prefixes.as_ref()) - .unwrap(); + match format { + ExtendedRdfFormat::Normal(format) => { + inner + .write_triples(&mut actual_file, &named_graph, format, prefixes.as_ref()) + .unwrap(); + } + ExtendedRdfFormat::HDT => { + inner + .write_hdt(&mut actual_file, &named_graph) + .map_err(PyMaplibError::from)?; + } + ExtendedRdfFormat::CIMXML => unreachable!("rejected above"), + } Ok(()) } @@ -1657,6 +1679,12 @@ fn writes_mutex( prefixes: Option>, ) -> PyResult { let format = if let Some(format) = format { + if format.eq_ignore_ascii_case("hdt") { + return Err(PyMaplibError::FunctionArgumentError( + "HDT is a binary format, use write() instead of writes()".to_string(), + ) + .into()); + } resolve_normal_format(&format).map_err(PyMaplibError::from)? } else { RdfFormat::NTriples @@ -1928,6 +1956,7 @@ fn resolve_normal_format(format: &str) -> Result { fn resolve_format(format: &str) -> Result { match format.to_lowercase().as_str() { "cim" | "cim/xml" | "cimxml" => Ok(ExtendedRdfFormat::CIMXML), + "hdt" => Ok(ExtendedRdfFormat::HDT), _ => match resolve_normal_format(format) { Ok(o) => Ok(ExtendedRdfFormat::Normal(o)), Err(e) => Err(e), diff --git a/py_maplib/tests/test_hdt.py b/py_maplib/tests/test_hdt.py new file mode 100644 index 00000000..495b257c --- /dev/null +++ b/py_maplib/tests/test_hdt.py @@ -0,0 +1,171 @@ +import pathlib + +import polars as pl +import pytest +import rdflib +from rdflib.compare import isomorphic + +from maplib import Model + +pl.Config.set_fmt_str_lengths(300) + +PATH_HERE = pathlib.Path(__file__).parent +TESTDATA_PATH = PATH_HERE / "testdata" + + +def model_as_rdflib_graph(m: Model) -> rdflib.Graph: + g = rdflib.Graph() + g.parse(data=m.writes(format="ntriples"), format="nt") + return g + + +def test_write_read_hdt_round_trip_is_isomorphic(tmp_path): + m = Model() + m.read(str(TESTDATA_PATH / "read_ntriples.nt")) + hdt_path = tmp_path / "out.hdt" + m.write(str(hdt_path), format="hdt") + + m2 = Model() + m2.read(str(hdt_path), format="hdt") + + assert isomorphic(model_as_rdflib_graph(m), model_as_rdflib_graph(m2)) + + +def test_read_hdt_infers_format_from_file_extension(tmp_path): + m = Model() + m.read(str(TESTDATA_PATH / "read_ntriples.nt")) + hdt_path = tmp_path / "out.hdt" + m.write(str(hdt_path), format="hdt") + + m2 = Model() + m2.read(str(hdt_path)) + res = m2.query( + """ + SELECT ?v ?o WHERE { + ?s ?v ?o . + } + """ + ) + assert res.height == 8 + + +def test_hdt_round_trip_preserves_typed_and_language_literals(tmp_path): + m = Model() + m.reads( + """ + "plain" . + "hello"@en . + "42"^^ . + "1.5"^^ . + "line1\\nline2 \\"quoted\\"" . + . + """, + format="ntriples", + ) + hdt_path = tmp_path / "literals.hdt" + m.write(str(hdt_path), format="hdt") + + m2 = Model() + m2.read(str(hdt_path)) + + assert isomorphic(model_as_rdflib_graph(m), model_as_rdflib_graph(m2)) + + +def test_empty_graph_hdt_round_trip(tmp_path): + m = Model() + m.reads(" .", format="ntriples") + m.update("DELETE WHERE { ?s ?p ?o }") + hdt_path = tmp_path / "empty.hdt" + m.write(str(hdt_path), format="hdt") + + m2 = Model() + m2.read(str(hdt_path), format="hdt") + res = m2.query("SELECT ?s ?p ?o WHERE { ?s ?p ?o }") + assert res.height == 0 + + +def test_writes_with_hdt_format_raises_since_hdt_is_binary(): + m = Model() + m.reads(" .", format="ntriples") + with pytest.raises(Exception, match="binary"): + m.writes(format="hdt") + + +def test_reads_with_hdt_format_raises_since_hdt_is_binary(): + m = Model() + with pytest.raises(Exception, match="binary"): + m.reads("anything", format="hdt") + + +def test_hdt_round_trip_preserves_object_position_blank_nodes(tmp_path): + m = Model() + m.reads( + """ + _:b1 . + _:b1 _:b2 . + _:b2 "leaf" . + """, + format="ntriples", + ) + hdt_path = tmp_path / "blanks.hdt" + m.write(str(hdt_path), format="hdt") + + m2 = Model() + m2.read(str(hdt_path)) + + assert isomorphic(model_as_rdflib_graph(m), model_as_rdflib_graph(m2)) + + +def test_hdt_round_trip_preserves_backslashes_in_literals(tmp_path): + m = Model() + m.reads( + " \"C:\\\\new\\\\path\" .", + format="ntriples", + ) + hdt_path = tmp_path / "backslash.hdt" + m.write(str(hdt_path), format="hdt") + + m2 = Model() + m2.read(str(hdt_path)) + res = m2.query("SELECT ?o WHERE { ?s ?p ?o }") + assert res.get_column("o").to_list() == ["C:\\new\\path"] + + +def test_reading_truncated_hdt_file_raises_normal_exception(tmp_path): + m = Model() + m.read(str(TESTDATA_PATH / "read_ntriples.nt")) + hdt_path = tmp_path / "out.hdt" + m.write(str(hdt_path), format="hdt") + + content = hdt_path.read_bytes() + truncated_path = tmp_path / "truncated.hdt" + truncated_path.write_bytes(content[: len(content) // 2]) + + m2 = Model() + with pytest.raises(Exception, match="HDT"): + m2.read(str(truncated_path)) + + +def test_write_cim_xml_format_does_not_truncate_existing_file(tmp_path): + m = Model() + m.reads(" .", format="ntriples") + out_path = tmp_path / "out.xml" + out_path.write_text("precious") + with pytest.raises(Exception, match="write_cim_xml"): + m.write(str(out_path), format="cim/xml") + assert out_path.read_text() == "precious" + + +def test_hdt_round_trip_preserves_language_tags(tmp_path): + m = Model() + m.reads( + ' "hello"@en-US .', + format="ntriples", + ) + hdt_path = tmp_path / "lang.hdt" + m.write(str(hdt_path), format="hdt") + + m2 = Model() + m2.read(str(hdt_path)) + + assert m.writes(format="ntriples").strip() == m2.writes(format="ntriples").strip() From e6cf67fd1e4720929d4bb1064303e6dde55986c7 Mon Sep 17 00:00:00 2001 From: Chris Kaczmarek Date: Fri, 12 Jun 2026 14:54:35 +0200 Subject: [PATCH 2/2] feat: build HDT in memory when writing, without a temporary file --- lib/triplestore/src/triples_write.rs | 56 ++--- .../src/triples_write/hdt_write.rs | 206 ++++++++++++++++++ py_maplib/maplib/__init__.pyi | 2 +- 3 files changed, 227 insertions(+), 37 deletions(-) create mode 100644 lib/triplestore/src/triples_write/hdt_write.rs diff --git a/lib/triplestore/src/triples_write.rs b/lib/triplestore/src/triples_write.rs index 6ede13d6..cc6b05d9 100644 --- a/lib/triplestore/src/triples_write.rs +++ b/lib/triplestore/src/triples_write.rs @@ -1,6 +1,5 @@ use super::Triplestore; use crate::errors::TriplestoreError; -use hdt::Hdt; use oxrdf::NamedNode; use oxrdfio::{RdfFormat, RdfSerializer}; use polars::prelude::{by_name, col}; @@ -16,11 +15,10 @@ use representation::{ LANG_STRING_LANG_FIELD, LANG_STRING_VALUE_FIELD, OBJECT_COL_NAME, SUBJECT_COL_NAME, }; use std::collections::HashMap; -use std::fs::File; -use std::io::{BufWriter, Write}; -use std::path::Path; +use std::io::Write; mod fast_ntriples; +mod hdt_write; mod pretty_turtle; mod serializers; @@ -141,30 +139,24 @@ impl Triplestore { graph: &NamedGraph, ) -> Result<(), TriplestoreError> { self.check_graph_exists(graph)?; - // The hdt crate can only build an HDT dataset from an N-Triples file, - // so serialize to a temporary file first and convert it. - let tmp_path = std::env::temp_dir().join(format!("maplib_hdt_{}.nt", uuid::Uuid::new_v4())); - let _cleanup = RemoveFileOnDrop(tmp_path.clone()); - self.write_hdt_via_ntriples(buf, graph, &tmp_path) - } - - fn write_hdt_via_ntriples( - &mut self, - buf: &mut W, - graph: &NamedGraph, - tmp_path: &Path, - ) -> Result<(), TriplestoreError> { - let file = File::create(tmp_path).map_err(|e| TriplestoreError::HDTError(e.to_string()))?; - let mut writer = BufWriter::new(file); - self.write_triples(&mut writer, RdfFormat::NTriples, graph, &HashMap::new())?; - writer - .into_inner() - .map_err(|e| TriplestoreError::HDTError(e.to_string()))?; - let hdt = Hdt::read_nt(tmp_path).map_err(|e| TriplestoreError::HDTError(e.to_string()))?; - let mut buf = BufWriter::new(buf); - hdt.write(&mut buf) - .map_err(|e| TriplestoreError::HDTError(e.to_string()))?; - Ok(()) + let mut builder = hdt_write::HdtBuilder::new(); + for (predicate, df_map) in self.graph_triples_map.get(graph).unwrap() { + for ((subject_type, object_type), tt) in df_map { + for (lf, _) in tt.get_lazy_frames(&None, &None)? { + let triples = global_df_as_triples( + lf.collect().unwrap(), + subject_type.clone(), + object_type.clone(), + predicate, + self.global_cats.clone(), + ); + for t in &triples { + builder.add_triple(t); + } + } + } + } + builder.finish(buf) } pub(crate) fn check_graph_exists(&self, graph: &NamedGraph) -> Result<(), TriplestoreError> { @@ -176,14 +168,6 @@ impl Triplestore { } } -struct RemoveFileOnDrop(std::path::PathBuf); - -impl Drop for RemoveFileOnDrop { - fn drop(&mut self) { - let _ = std::fs::remove_file(&self.0); - } -} - pub fn convert_datelike_to_string(df: &mut DataFrame, c: &str) { match df.column(c).unwrap().dtype() { DataType::Date => { diff --git a/lib/triplestore/src/triples_write/hdt_write.rs b/lib/triplestore/src/triples_write/hdt_write.rs new file mode 100644 index 00000000..da6d2fec --- /dev/null +++ b/lib/triplestore/src/triples_write/hdt_write.rs @@ -0,0 +1,206 @@ +use crate::errors::TriplestoreError; +use hdt::containers::rdf::{Id, Literal as HdtLiteral, Term as HdtTerm, Triple as HdtTriple}; +use hdt::containers::ControlInfo; +use hdt::dict_sect_pfc::DictSectPFC; +use hdt::four_sect_dict::FourSectDict; +use hdt::header::Header; +use hdt::triples::{TripleId, TriplesBitmap}; +use hdt::IdKind; +use oxrdf::{NamedOrBlankNode, Term, Triple}; +use std::collections::hash_map::Entry; +use std::collections::{BTreeSet, HashMap}; +use std::io::{BufWriter, Write}; + +const BLOCK_SIZE: usize = 16; +const BASE_IRI: &str = "https://github.com/DataTreehouse/maplib"; + +pub(super) struct HdtBuilder { + term_ids: HashMap, + roles: Vec, + triples: Vec<[usize; 3]>, +} + +#[derive(Default, Clone, Copy)] +struct TermRoles { + subject: bool, + predicate: bool, + object: bool, +} + +impl HdtBuilder { + pub(super) fn new() -> Self { + Self { + term_ids: HashMap::new(), + roles: Vec::new(), + triples: Vec::new(), + } + } + + pub(super) fn add_triple(&mut self, t: &Triple) { + let s = self.intern(subject_dict_string(&t.subject)); + let p = self.intern(t.predicate.as_str().to_owned()); + let o = self.intern(object_dict_string(&t.object)); + self.roles[s].subject = true; + self.roles[p].predicate = true; + self.roles[o].object = true; + self.triples.push([s, p, o]); + } + + fn intern(&mut self, term: String) -> usize { + let next_id = self.term_ids.len(); + match self.term_ids.entry(term) { + Entry::Occupied(e) => *e.get(), + Entry::Vacant(e) => { + e.insert(next_id); + self.roles.push(TermRoles::default()); + next_id + } + } + } + + pub(super) fn finish(self, buf: &mut W) -> Result<(), TriplestoreError> { + let dict = self.build_dict(); + let encoded = self.encode_triples(&dict)?; + let triples = TriplesBitmap::from_triples(&encoded); + let header = statistics_header(&dict, encoded.len()); + + let mut writer = BufWriter::new(buf); + ControlInfo::global() + .write(&mut writer) + .map_err(hdt_error)?; + header.write(&mut writer).map_err(hdt_error)?; + dict.write(&mut writer).map_err(hdt_error)?; + triples.write(&mut writer).map_err(hdt_error)?; + writer.flush().map_err(hdt_error)?; + Ok(()) + } + + fn build_dict(&self) -> FourSectDict { + let mut shared = BTreeSet::new(); + let mut subjects = BTreeSet::new(); + let mut predicates = BTreeSet::new(); + let mut objects = BTreeSet::new(); + for (term, &id) in &self.term_ids { + let roles = &self.roles[id]; + if roles.predicate { + predicates.insert(term.as_str()); + } + if roles.subject && roles.object { + shared.insert(term.as_str()); + } else if roles.subject { + subjects.insert(term.as_str()); + } else if roles.object { + objects.insert(term.as_str()); + } + } + FourSectDict { + shared: DictSectPFC::compress(&shared, BLOCK_SIZE), + subjects: DictSectPFC::compress(&subjects, BLOCK_SIZE), + predicates: DictSectPFC::compress(&predicates, BLOCK_SIZE), + objects: DictSectPFC::compress(&objects, BLOCK_SIZE), + } + } + + fn encode_triples(&self, dict: &FourSectDict) -> Result, TriplestoreError> { + let mut term_by_id = vec![""; self.term_ids.len()]; + for (term, &id) in &self.term_ids { + term_by_id[id] = term.as_str(); + } + let mut encoded = Vec::with_capacity(self.triples.len()); + for [s, p, o] in &self.triples { + let triple_id: TripleId = [ + dict.string_to_id(term_by_id[*s], IdKind::Subject), + dict.string_to_id(term_by_id[*p], IdKind::Predicate), + dict.string_to_id(term_by_id[*o], IdKind::Object), + ]; + if triple_id.contains(&0) { + return Err(TriplestoreError::HDTError(format!( + "term of ({}, {}, {}) missing from the HDT dictionary", + term_by_id[*s], term_by_id[*p], term_by_id[*o] + ))); + } + encoded.push(triple_id); + } + encoded.sort_unstable(); + encoded.dedup(); + Ok(encoded) + } +} + +fn subject_dict_string(subject: &NamedOrBlankNode) -> String { + match subject { + NamedOrBlankNode::NamedNode(nn) => nn.as_str().to_owned(), + NamedOrBlankNode::BlankNode(bn) => bn.to_string(), + } +} + +fn object_dict_string(object: &Term) -> String { + match object { + Term::NamedNode(nn) => nn.as_str().to_owned(), + Term::BlankNode(bn) => bn.to_string(), + Term::Literal(lit) => lit.to_string(), + } +} + +fn statistics_header(dict: &FourSectDict, num_triples: usize) -> Header { + use hdt::vocab::*; + + let mut body = BTreeSet::new(); + let base = Id::Named(BASE_IRI.to_owned()); + let stats_id = Id::Blank("statistics".to_owned()); + let pub_id = Id::Blank("publicationInformation".to_owned()); + let format_id = Id::Blank("format".to_owned()); + let dict_id = Id::Blank("dictionary".to_owned()); + let triples_id = Id::Blank("triples".to_owned()); + + let distinct_subjects = dict.subjects.num_strings() + dict.shared.num_strings(); + let distinct_objects = dict.objects.num_strings() + dict.shared.num_strings(); + + insert_literal(&mut body, &base, RDF_TYPE, HDT_CONTAINER); + insert_literal(&mut body, &base, RDF_TYPE, VOID_DATASET); + insert_literal(&mut body, &base, VOID_TRIPLES, num_triples); + insert_literal(&mut body, &base, VOID_PROPERTIES, dict.predicates.num_strings()); + insert_literal(&mut body, &base, VOID_DISTINCT_SUBJECTS, distinct_subjects); + insert_literal(&mut body, &base, VOID_DISTINCT_OBJECTS, distinct_objects); + + insert_id(&mut body, &base, HDT_STATISTICAL_INFORMATION, &stats_id); + insert_id(&mut body, &base, HDT_STATISTICAL_INFORMATION, &pub_id); + insert_id(&mut body, &base, HDT_FORMAT_INFORMATION, &format_id); + insert_id(&mut body, &format_id, HDT_DICTIONARY, &dict_id); + insert_id(&mut body, &format_id, HDT_TRIPLES, &triples_id); + + insert_literal(&mut body, &dict_id, HDT_DICT_SHARED_SO, dict.shared.num_strings()); + insert_literal(&mut body, &dict_id, HDT_DICT_MAPPING, "1"); + insert_literal(&mut body, &dict_id, HDT_DICT_SIZE_STRINGS, dict.size_in_bytes()); + insert_literal(&mut body, &dict_id, HDT_DICT_BLOCK_SIZE, BLOCK_SIZE); + + insert_literal(&mut body, &triples_id, DC_TERMS_FORMAT, HDT_TYPE_BITMAP); + insert_literal(&mut body, &triples_id, HDT_NUM_TRIPLES, num_triples); + insert_literal(&mut body, &triples_id, HDT_TRIPLES_ORDER, "SPO"); + + let mut serialized_body = Vec::new(); + for triple in &body { + writeln!(serialized_body, "{triple}").unwrap(); + } + Header { + format: "ntriples".to_owned(), + length: serialized_body.len(), + body, + } +} + +fn insert_literal(body: &mut BTreeSet, s: &Id, p: &str, o: impl ToString) { + body.insert(HdtTriple::new( + s.clone(), + p.to_owned(), + HdtTerm::Literal(HdtLiteral::new(o.to_string())), + )); +} + +fn insert_id(body: &mut BTreeSet, s: &Id, p: &str, o: &Id) { + body.insert(HdtTriple::new(s.clone(), p.to_owned(), HdtTerm::Id(o.clone()))); +} + +fn hdt_error(e: impl std::fmt::Display) -> TriplestoreError { + TriplestoreError::HDTError(e.to_string()) +} diff --git a/py_maplib/maplib/__init__.pyi b/py_maplib/maplib/__init__.pyi index 8941b1d1..e22da8d5 100644 --- a/py_maplib/maplib/__init__.pyi +++ b/py_maplib/maplib/__init__.pyi @@ -967,7 +967,7 @@ class Model: :param file_path: The path of the file containing triples :param format: One of "ntriples", "turtle", "rdf/xml", "hdt". - HDT is written via a temporary N-Triples file; literals with special characters are stored N-Triples-escaped, following the Rust hdt crate. + HDT is built in memory; literals with special characters are stored N-Triples-escaped, following the Rust hdt crate. :param graph: The IRI of the graph to write. :param prefixes: The prefixes that will be used in turtle serialization. """