diff --git a/.gitignore b/.gitignore index 2ef8385..9b38ae9 100644 --- a/.gitignore +++ b/.gitignore @@ -67,4 +67,6 @@ Thumbs.db # Project specific build/ -*.log \ No newline at end of file +*.log +cuik_molmaker/ +scratch/ diff --git a/CMakeLists.txt b/CMakeLists.txt index 10a129f..e6a40de 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -202,6 +202,7 @@ add_library(cuik_molmaker_core SHARED src/features.cpp src/float_features.cpp src/one_hot.cpp + src/reaction_features.cpp ) # Add include directories for core library diff --git a/src/cuik_molmaker_cpp.cpp b/src/cuik_molmaker_cpp.cpp index d0d5481..5540a15 100644 --- a/src/cuik_molmaker_cpp.cpp +++ b/src/cuik_molmaker_cpp.cpp @@ -64,4 +64,31 @@ PYBIND11_MODULE(cuik_molmaker_cpp, m) { m.def("list_all_atom_float_features", &list_all_atom_float_features, "Returns a list of all atom float features."); m.def("list_all_bond_features", &list_all_bond_features, "Returns a list of all bond features."); + + // Reaction featurization (CGR) + m.def("reaction_mode_names_to_array", + &reaction_mode_names_to_array, + "Convert reaction mode name strings to int64 array (mirrors atom_onehot_feature_names_to_array)."); + m.def( + "batch_reaction_featurizer", + [](const std::vector& reac_smiles_list, + const std::vector& prod_smiles_list, + const py::array_t& atom_property_list_onehot, + const py::array_t& atom_property_list_float, + const py::array_t& bond_property_list, + bool keep_h, + bool add_h, + bool offset_carbon, + int64_t mode_int) { + return batch_reaction_featurizer(reac_smiles_list, + prod_smiles_list, + atom_property_list_onehot, + atom_property_list_float, + bond_property_list, + keep_h, + add_h, + offset_carbon, + ReactionMode(mode_int)); + }, + "Featurize a batch of reactions (CGR) and return 5 NumPy arrays."); } diff --git a/src/features.h b/src/features.h index 5ab00ed..4ec760a 100644 --- a/src/features.h +++ b/src/features.h @@ -12,6 +12,7 @@ #include #include #include +#include #include // RDKit headers @@ -313,6 +314,39 @@ struct GraphData { std::unique_ptr mol; }; +//! Condensed Graph of Reaction featurization modes, matching chemprop's RxnMode enum +enum class ReactionMode { + REAC_DIFF, //!< First half = reactant feats; second half = prod - reac diff + REAC_PROD, //!< First half = reactant feats; second half = product feats + PROD_DIFF, //!< First half = product feats; second half = prod - reac diff + REAC_DIFF_BALANCE, //!< Like REAC_DIFF but unmatched atoms copy own feats (diff = 0) + REAC_PROD_BALANCE, //!< Like REAC_PROD but unmatched atoms copy own feats + PROD_DIFF_BALANCE, //!< Like PROD_DIFF but unmatched atoms copy own feats + UNKNOWN +}; + +//! Data representing a reaction (two molecules + atom correspondence) before CGR featurization. +//! Both GraphData members retain their RDKit mol pointers — required by one_hot.cpp features. +struct CompactReaction { + GraphData reac; //!< Reactant side (owns RDKit mol + CompactAtom/Bond caches) + GraphData prod; //!< Product side + + //! Atom mapping: reactant atom index → product atom index (built from atom-map numbers) + std::unordered_map r2p_idx_map; + //! Inverse: product atom index → reactant atom index + std::unordered_map p2r_idx_map; + + //! Reactant atoms with no matching product atom (map num absent on product side) + std::vector reac_only_idxs; + //! Product atoms with no matching reactant atom; these become CGR nodes n_reac..n_cgr-1 + std::vector prod_only_idxs; + + //! Bond lookup for O(1) cross-referencing. Key = (min_atom_idx << 32) | max_atom_idx + //! using the *side-local* (reactant or product) atom indices. + std::unordered_map reac_bond_lookup; + std::unordered_map prod_bond_lookup; +}; + //! Computes the total dimension of atom features based on the property lists CUIK_EXPORT size_t compute_atom_dim(const py::array_t& atom_property_list_onehot, const py::array_t& atom_property_list_float); @@ -436,3 +470,38 @@ CUIK_EXPORT std::vector batch_mol_featurizer(const std::vector parse_rxn_side_mol(const std::string& smiles, bool keep_h, bool add_h); + +//! Parses a reaction SMILES pair into a CompactReaction (atom correspondence + both GraphData). +//! Both reac_smi and prod_smi must contain atom-map numbers. +//! keep_h / add_h semantics match chemprop's _ReactionDatapointMixin.from_smi exactly. +CUIK_EXPORT CompactReaction parse_reaction(const std::string& reac_smi, + const std::string& prod_smi, + bool keep_h, + bool add_h); + +//! Converts reaction mode name strings to a NumPy int64 array (mirrors atom_onehot_feature_names_to_array). +CUIK_EXPORT py::array_t reaction_mode_names_to_array(const std::vector& modes); + +//! Featurizes a batch of reactions as Condensed Graphs of Reaction (CGR). +//! Mirrors batch_mol_featurizer in interface and return convention (5 arrays). +//! @param reac_smiles_list Reactant SMILES (atom-mapped); parallel to prod_smiles_list +//! @param prod_smiles_list Product SMILES (atom-mapped) +//! @param keep_h If true, retain explicit mapped [H:n] atoms (required for E2/SN2) +//! @param add_h If true, add unmapped Hs via RDKit::MolOps::addHs (after parsing) +//! @param mode CGR featurization mode (which combination of reac/prod/diff) +//! @return 5 arrays: [atom_feats, bond_feats, edge_index, rev_edge_index, batch] +CUIK_EXPORT std::vector batch_reaction_featurizer(const std::vector& reac_smiles_list, + const std::vector& prod_smiles_list, + const py::array_t& atom_property_list_onehot, + const py::array_t& atom_property_list_float, + const py::array_t& bond_property_list, + bool keep_h, + bool add_h, + bool offset_carbon, + ReactionMode mode); diff --git a/src/one_hot.cpp b/src/one_hot.cpp index 77e5106..e85bd92 100644 --- a/src/one_hot.cpp +++ b/src/one_hot.cpp @@ -177,6 +177,27 @@ size_t get_one_hot_atom_feature_size(AtomOneHotFeature feature) { } } +// Returns the 0-based index within the atomic-number one-hot block for a given atomicNum. +// Mirrors the logic in each ATOMIC_NUM* case of get_one_hot_atom_feature. +// For unrecognized atomic numbers, returns the "other" slot (last valid index). +size_t get_atomic_num_onehot_index(uint8_t atomicNum, AtomOneHotFeature feature) { + switch (feature) { + case AtomOneHotFeature::ATOMIC_NUM: { + // 1-indexed, atomicNum in [1,100] → index [0,99]; unknown → 100 + size_t idx = size_t(atomicNum); + --idx; + return (idx >= atomicNumCount) ? atomicNumCount : idx; + } + case AtomOneHotFeature::ATOMIC_NUM_COMMON: + return atomicNumCommonLookup[size_t(atomicNum)]; // returns atomicNumCommonCount for unknowns + case AtomOneHotFeature::ATOMIC_NUM_ORGANIC: + return atomicNumOrganicLookup[size_t(atomicNum)]; // returns atomicNumOrganicCount for unknowns + default: + assert(0 && "get_atomic_num_onehot_index called with non-atomic-num feature"); + return 0; + } +} + // Fills in a particular atom `feature`'s one-hot encoding into `data`, for all atoms. // See the declaration in one_hot.h for more details. template diff --git a/src/one_hot.h b/src/one_hot.h index 7645d3b..9878c20 100644 --- a/src/one_hot.h +++ b/src/one_hot.h @@ -15,6 +15,12 @@ //! `data` argument. Implementation is in one_hot.cpp size_t get_one_hot_atom_feature_size(AtomOneHotFeature feature); +//! Returns the 0-based index within the one-hot block for the given atomic number. +//! Used by the CGR num_only representation: all-zero except the atomic-number index. +//! Matches the "other/unknown" fallback slot for unrecognized atomic numbers, +//! exactly mirroring chemprop's MultiHotAtomFeaturizer.num_only(). Implementation in one_hot.cpp +size_t get_atomic_num_onehot_index(uint8_t atomicNum, AtomOneHotFeature feature); + //! Fills in a particular atom `feature`'s one-hot encoding into `data`, for all atoms. //! Template type `T` can be `int16_t` (FP16), `float`, or `double`. //! Implementation is in one_hot.cpp diff --git a/src/reaction_features.cpp b/src/reaction_features.cpp new file mode 100644 index 0000000..d5cd979 --- /dev/null +++ b/src/reaction_features.cpp @@ -0,0 +1,708 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! @file CGR (Condensed Graph of Reaction) featurization. +//! Implements parse_rxn_side_mol, parse_reaction, reaction_mode_names_to_array, +//! and batch_reaction_featurizer (all declared in features.h). + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "features.h" +#include "float_features.h" +#include "one_hot.h" + +namespace py = pybind11; + +static constexpr uint32_t NO_IDX = std::numeric_limits::max(); + +// --------------------------------------------------------------------------- +// Internal: populate CompactAtom/Bond arrays from an already-parsed mol. +// Mirrors read_graph() in features.cpp but accepts a pre-built mol. +// --------------------------------------------------------------------------- +static void populate_graph_arrays(GraphData& gd) { + const RDKit::ROMol& mol = *gd.mol; + const size_t num_atoms = gd.num_atoms; + const size_t num_bonds = gd.num_bonds; + + gd.atoms = std::unique_ptr(new CompactAtom[num_atoms]); + for (size_t i = 0; i < num_atoms; ++i) { + const RDKit::Atom* a = mol.getAtomWithIdx(i); + gd.atoms[i] = CompactAtom{uint8_t(a->getAtomicNum()), + uint8_t(a->getTotalDegree()), + int8_t(a->getFormalCharge()), + uint8_t(a->getChiralTag()), + uint8_t(a->getTotalNumHs()), + uint8_t(a->getHybridization()), + a->getIsAromatic(), + float(a->getMass())}; + } + + gd.bonds = std::unique_ptr(new CompactBond[num_bonds]); + const RDKit::RingInfo* const ringInfo = mol.getRingInfo(); + for (size_t i = 0; i < num_bonds; ++i) { + const RDKit::Bond* b = mol.getBondWithIdx(i); + gd.bonds[i] = CompactBond{uint8_t(b->getBondType()), + b->getIsConjugated(), + ringInfo->numBondRings(i) != 0, + uint8_t(b->getStereo()), + b->getBeginAtomIdx(), + b->getEndAtomIdx()}; + } +} + +// --------------------------------------------------------------------------- +// Internal: bond lookup map — key = (min_atom_idx << 32) | max_atom_idx → bond_idx +// --------------------------------------------------------------------------- +static std::unordered_map build_bond_lookup(const GraphData& gd) { + std::unordered_map lookup; + lookup.reserve(gd.num_bonds); + for (size_t i = 0; i < gd.num_bonds; ++i) { + uint32_t a = gd.bonds[i].beginAtomIdx; + uint32_t b = gd.bonds[i].endAtomIdx; + if (a > b) + std::swap(a, b); + lookup[(uint64_t(a) << 32) | uint64_t(b)] = uint32_t(i); + } + return lookup; +} + +// --------------------------------------------------------------------------- +// parse_rxn_side_mol +// --------------------------------------------------------------------------- +std::unique_ptr parse_rxn_side_mol(const std::string& smiles, bool keep_h, bool add_h) { + RDKit::SmilesParserParams params; + params.removeHs = !keep_h; // keep_h=true keeps explicit [H:n] atoms + std::unique_ptr mol{RDKit::SmilesToMol(smiles, params)}; + if (!mol) + return mol; + // Do NOT clearProp(molAtomMapNumber) — needed to build r2p_idx_map. + // Do NOT reorder atoms — reactions preserve SMILES parse order. + if (add_h) + RDKit::MolOps::addHs(*mol); + return mol; +} + +// --------------------------------------------------------------------------- +// parse_reaction +// --------------------------------------------------------------------------- +CompactReaction parse_reaction(const std::string& reac_smi, const std::string& prod_smi, bool keep_h, bool add_h) { + std::unique_ptr reac_mol = parse_rxn_side_mol(reac_smi, keep_h, add_h); + std::unique_ptr prod_mol = parse_rxn_side_mol(prod_smi, keep_h, add_h); + if (!reac_mol) + throw std::runtime_error("Failed to parse reactant SMILES: " + reac_smi); + if (!prod_mol) + throw std::runtime_error("Failed to parse product SMILES: " + prod_smi); + + const size_t n_reac_atoms = reac_mol->getNumAtoms(); + const size_t n_prod_atoms = prod_mol->getNumAtoms(); + + // Build mapno → product atom index + std::unordered_map mapno_to_prod_idx; + mapno_to_prod_idx.reserve(n_prod_atoms); + for (const auto* atom : prod_mol->atoms()) { + int map_num = atom->getAtomMapNum(); + if (map_num > 0) + mapno_to_prod_idx[map_num] = atom->getIdx(); + } + + // Build r2p / p2r maps; classify reactant atoms as matched or reac-only + std::unordered_map r2p_idx_map, p2r_idx_map; + std::vector reac_only_idxs; + r2p_idx_map.reserve(n_reac_atoms); + p2r_idx_map.reserve(n_reac_atoms); + + for (const auto* atom : reac_mol->atoms()) { + uint32_t r_idx = atom->getIdx(); + int map_num = atom->getAtomMapNum(); + auto it = (map_num > 0) ? mapno_to_prod_idx.find(map_num) : mapno_to_prod_idx.end(); + if (it != mapno_to_prod_idx.end()) { + r2p_idx_map[r_idx] = it->second; + p2r_idx_map[it->second] = r_idx; + } else { + reac_only_idxs.push_back(r_idx); + } + } + + // Classify product atoms: those without a reactant counterpart are product-only + std::vector prod_only_idxs; + for (const auto* atom : prod_mol->atoms()) { + if (p2r_idx_map.find(atom->getIdx()) == p2r_idx_map.end()) + prod_only_idxs.push_back(atom->getIdx()); + } + + // Build GraphData for each side (mol is moved in; arrays populated from it) + GraphData reac_gd{n_reac_atoms, + std::unique_ptr(), + reac_mol->getNumBonds(), + std::unique_ptr(), + std::move(reac_mol)}; + GraphData prod_gd{n_prod_atoms, + std::unique_ptr(), + prod_mol->getNumBonds(), + std::unique_ptr(), + std::move(prod_mol)}; + + populate_graph_arrays(reac_gd); + populate_graph_arrays(prod_gd); + + auto reac_bond_lookup = build_bond_lookup(reac_gd); + auto prod_bond_lookup = build_bond_lookup(prod_gd); + + return CompactReaction{std::move(reac_gd), + std::move(prod_gd), + std::move(r2p_idx_map), + std::move(p2r_idx_map), + std::move(reac_only_idxs), + std::move(prod_only_idxs), + std::move(reac_bond_lookup), + std::move(prod_bond_lookup)}; +} + +// --------------------------------------------------------------------------- +// reaction_mode_names_to_array +// --------------------------------------------------------------------------- +static const std::unordered_map rxn_mode_name_to_enum{ + { std::string("REAC_DIFF"), int64_t(ReactionMode::REAC_DIFF)}, + { std::string("REAC_PROD"), int64_t(ReactionMode::REAC_PROD)}, + { std::string("PROD_DIFF"), int64_t(ReactionMode::PROD_DIFF)}, + {std::string("REAC_DIFF_BALANCE"), int64_t(ReactionMode::REAC_DIFF_BALANCE)}, + {std::string("REAC_PROD_BALANCE"), int64_t(ReactionMode::REAC_PROD_BALANCE)}, + {std::string("PROD_DIFF_BALANCE"), int64_t(ReactionMode::PROD_DIFF_BALANCE)}, +}; + +py::array_t reaction_mode_names_to_array(const std::vector& modes) { + const size_t n = modes.size(); + std::unique_ptr out(new int64_t[n ? n : 1]); + for (size_t i = 0; i < n; ++i) { + auto it = rxn_mode_name_to_enum.find(modes[i]); + out[i] = (it != rxn_mode_name_to_enum.end()) ? it->second : int64_t(ReactionMode::UNKNOWN); + } + const int64_t dims[1] = {int64_t(n)}; + return py_array_from_array(std::move(out), dims, 1); +} + +// --------------------------------------------------------------------------- +// Internal: fill all-atom feature temp array using existing all-atoms functions. +// Result: buf[atomIdx * single_fdim .. (atomIdx+1)*single_fdim - 1] = full feature vector for atomIdx. +// --------------------------------------------------------------------------- +static void fill_all_atom_features(const GraphData& gd, + std::vector& buf, + size_t single_fdim, + const py::array_t& atom_property_list_onehot, + const py::array_t& atom_property_list_float, + bool offset_carbon) { + buf.assign(gd.num_atoms * single_fdim, 0.0f); + if (gd.num_atoms == 0) + return; + + const size_t n_onehot = (atom_property_list_onehot.ndim() == 1) ? size_t(atom_property_list_onehot.shape(0)) : 0; + const size_t n_float = (atom_property_list_float.ndim() == 1) ? size_t(atom_property_list_float.shape(0)) : 0; + const int64_t* oh_ptr = (n_onehot > 0) ? static_cast(atom_property_list_onehot.data()) : nullptr; + const int64_t* fl_ptr = (n_float > 0) ? static_cast(atom_property_list_float.data()) : nullptr; + + float* p = buf.data(); + for (size_t i = 0; i < n_onehot; ++i) { + auto feat = AtomOneHotFeature(oh_ptr[i]); + size_t fsz = get_one_hot_atom_feature(gd, p, feat, single_fdim); + p += fsz; + } + for (size_t i = 0; i < n_float; ++i) { + get_atom_float_feature(gd, p, AtomFloatFeature(fl_ptr[i]), single_fdim, offset_carbon); + ++p; + } +} + +// --------------------------------------------------------------------------- +// Internal: build a num_only feature vector into out[0..single_fdim-1]. +// All zeros except the atomic-number one-hot index for atomicNum, at offset 0. +// Mirrors chemprop's MultiHotAtomFeaturizer.num_only(atom). +// --------------------------------------------------------------------------- +static void build_num_only(uint8_t atomicNum, float* out, size_t single_fdim, AtomOneHotFeature first_onehot_feat) { + std::fill(out, out + single_fdim, 0.0f); + size_t idx = get_atomic_num_onehot_index(atomicNum, first_onehot_feat); + out[idx] = 1.0f; +} + +// --------------------------------------------------------------------------- +// CGR atom feature computation (fills output for one reaction). +// +// CGR atom layout: +// nodes 0..n_reac-1 = reactant atoms (in RDKit parse order) +// nodes n_reac..n_cgr-1 = product-only atoms (in prod_only_idxs order) +// +// Feature layout per CGR node (cgr_atom_fdim = single_fdim + second_len): +// [first_half: single_fdim values | second_half: second_len values] +// +// second_len = single_fdim - atomic_num_block_w +// (strips the entire atomic-num one-hot block from the second side) +// +// Diff direction (for *_DIFF modes): ALWAYS prod_feats - reac_feats. +// --------------------------------------------------------------------------- +static void fill_cgr_atom_features(const CompactReaction& rxn, + ReactionMode mode, + float* cgr_out, + size_t cgr_atom_fdim, + size_t single_fdim, + size_t atomic_num_block_w, + const py::array_t& atom_property_list_onehot, + const py::array_t& atom_property_list_float, + bool offset_carbon) { + const size_t n_reac = rxn.reac.num_atoms; + const size_t n_cgr = n_reac + rxn.prod_only_idxs.size(); + const size_t second_len = single_fdim - atomic_num_block_w; + + // Mode flags + const bool is_balance = (mode == ReactionMode::REAC_DIFF_BALANCE || mode == ReactionMode::REAC_PROD_BALANCE || + mode == ReactionMode::PROD_DIFF_BALANCE); + const bool prod_first = (mode == ReactionMode::PROD_DIFF || mode == ReactionMode::PROD_DIFF_BALANCE); + const bool use_diff = (mode == ReactionMode::REAC_DIFF || mode == ReactionMode::PROD_DIFF || + mode == ReactionMode::REAC_DIFF_BALANCE || mode == ReactionMode::PROD_DIFF_BALANCE); + + // 1. Pre-compute full feature vectors for all reactant and product atoms. + // reac_buf[r * single_fdim .. (r+1)*single_fdim-1] = full feature vector for reactant atom r. + std::vector reac_buf, prod_buf; + fill_all_atom_features(rxn.reac, + reac_buf, + single_fdim, + atom_property_list_onehot, + atom_property_list_float, + offset_carbon); + fill_all_atom_features(rxn.prod, + prod_buf, + single_fdim, + atom_property_list_onehot, + atom_property_list_float, + offset_carbon); + + // Determine the first onehot feature (needed for num_only). + AtomOneHotFeature first_feat = AtomOneHotFeature::ATOMIC_NUM; + if (atom_property_list_onehot.ndim() == 1 && atom_property_list_onehot.shape(0) > 0) + first_feat = AtomOneHotFeature(static_cast(atom_property_list_onehot.data())[0]); + + // Scratch buffers for num_only vectors (reused each iteration). + std::vector num_only_reac(single_fdim, 0.0f); + std::vector num_only_prod(single_fdim, 0.0f); + + // 2. For each CGR node, mix reac and prod feature vectors. + for (size_t u = 0; u < n_cgr; ++u) { + float* atom_out = cgr_out + u * cgr_atom_fdim; + + // Determine reac_feats and prod_feats for this CGR node. + const float* reac_feats = nullptr; + const float* prod_feats = nullptr; + + if (u < n_reac) { + // Reactant atom + reac_feats = reac_buf.data() + u * single_fdim; + auto it = rxn.r2p_idx_map.find(uint32_t(u)); + bool has_prod = (it != rxn.r2p_idx_map.end()); + if (has_prod) { + prod_feats = prod_buf.data() + it->second * single_fdim; + } else { + // Reac-only: product side + if (is_balance) { + prod_feats = reac_feats; // copy own feats + } else { + build_num_only(rxn.reac.atoms[u].atomicNum, num_only_prod.data(), single_fdim, first_feat); + prod_feats = num_only_prod.data(); + } + } + } else { + // Product-only atom + uint32_t p_idx = rxn.prod_only_idxs[u - n_reac]; + prod_feats = prod_buf.data() + p_idx * single_fdim; + if (is_balance) { + reac_feats = prod_feats; // copy own feats + } else { + build_num_only(rxn.prod.atoms[p_idx].atomicNum, num_only_reac.data(), single_fdim, first_feat); + reac_feats = num_only_reac.data(); + } + } + + // Write first half: reac side for REAC_* modes, prod side for PROD_* modes. + const float* first_src = prod_first ? prod_feats : reac_feats; + std::copy(first_src, first_src + single_fdim, atom_out); + + // Write second half (length = second_len), starting at atom_out[single_fdim]. + // Strips atomic_num_block_w from the start of the second side. + // Diff = prod - reac (always, regardless of which is "first"). + float* second_out = atom_out + single_fdim; + if (use_diff) { + for (size_t k = 0; k < second_len; ++k) + second_out[k] = prod_feats[atomic_num_block_w + k] - reac_feats[atomic_num_block_w + k]; + } else { + // REAC_PROD / REAC_PROD_BALANCE: second half = prod feats (atomic-num block stripped) + std::copy(prod_feats + atomic_num_block_w, prod_feats + atomic_num_block_w + second_len, second_out); + } + } +} + +// --------------------------------------------------------------------------- +// Internal: write one-hot + float features for a single bond into buf[0..single_bond_fdim-1]. +// bond_idx == NO_IDX means the bond doesn't exist on that side (IS_NULL featurization). +// --------------------------------------------------------------------------- +static void fill_single_bond_feats(const GraphData& gd, + uint32_t bond_idx, + float* buf, + size_t single_bond_fdim, + const py::array_t& bond_property_list) { + const size_t n_props = (bond_property_list.ndim() == 1) ? size_t(bond_property_list.shape(0)) : 0; + const int64_t* props = (n_props > 0) ? static_cast(bond_property_list.data()) : nullptr; + const bool is_null = (bond_idx == NO_IDX); + + float* p = buf; + for (size_t i = 0; i < n_props; ++i) { + auto feat = BondFeature(props[i]); + switch (feat) { + case BondFeature::IS_NULL: + *p++ = is_null ? 1.0f : 0.0f; + break; + case BondFeature::TYPE_ONE_HOT: { + // SINGLE→0, DOUBLE→1, TRIPLE→2, other(AROMATIC etc.)→3 + size_t fsz = get_one_hot_bond_feature_size(feat); + std::fill(p, p + fsz, 0.0f); + if (!is_null) { + uint8_t bt = gd.bonds[bond_idx].bondType; + size_t slot; + if (bt == 1) + slot = 0; // RDKit::Bond::SINGLE + else if (bt == 2) + slot = 1; // RDKit::Bond::DOUBLE + else if (bt == 3) + slot = 2; // RDKit::Bond::TRIPLE + else + slot = 3; // AROMATIC or other + p[slot] = 1.0f; + } + p += fsz; + break; + } + case BondFeature::STEREO_ONE_HOT: { + // STEREONONE=0, STEREOANY=1, STEREOZ=2, STEREOE=3, STEREOCIS=4, STEREOTRANS=5, other=6 + size_t fsz = get_one_hot_bond_feature_size(feat); + std::fill(p, p + fsz, 0.0f); + if (!is_null) { + uint8_t st = gd.bonds[bond_idx].stereo; + size_t slot = (size_t(st) < fsz) ? size_t(st) : fsz - 1; + p[slot] = 1.0f; + } + p += fsz; + break; + } + case BondFeature::IN_RING: + *p++ = (!is_null && gd.bonds[bond_idx].isInRing) ? 1.0f : 0.0f; + break; + case BondFeature::CONJUGATED: + *p++ = (!is_null && gd.bonds[bond_idx].isConjugated) ? 1.0f : 0.0f; + break; + case BondFeature::TYPE_FLOAT: + *p++ = is_null ? 0.0f : float(gd.bonds[bond_idx].bondType); + break; + default: + *p++ = 0.0f; + break; + } + } + (void)single_bond_fdim; +} + +// --------------------------------------------------------------------------- +// Internal: write CGR bond features for one undirected bond pair into out[0..cgr_bond_fdim-1]. +// cgr_bond_fdim = 2 * single_bond_fdim. +// Diff direction: ALWAYS prod - reac. +// --------------------------------------------------------------------------- +static void write_cgr_bond_feats(const CompactReaction& rxn, + uint32_t b_reac_idx, + uint32_t b_prod_idx, + ReactionMode mode, + float* out, + size_t single_bond_fdim, + const py::array_t& bond_property_list) { + const bool prod_first = (mode == ReactionMode::PROD_DIFF || mode == ReactionMode::PROD_DIFF_BALANCE); + const bool use_diff = (mode == ReactionMode::REAC_DIFF || mode == ReactionMode::PROD_DIFF || + mode == ReactionMode::REAC_DIFF_BALANCE || mode == ReactionMode::PROD_DIFF_BALANCE); + + // Fill per-side feature vectors + std::vector reac_bf(single_bond_fdim, 0.0f); + std::vector prod_bf(single_bond_fdim, 0.0f); + fill_single_bond_feats(rxn.reac, b_reac_idx, reac_bf.data(), single_bond_fdim, bond_property_list); + fill_single_bond_feats(rxn.prod, b_prod_idx, prod_bf.data(), single_bond_fdim, bond_property_list); + // Note: BALANCE adjustment is applied by the caller (enumerate_cgr_bonds) before calling this function. + // The b_reac_idx / b_prod_idx passed in are already the post-balance-adjusted indices. + // No BALANCE copying is done here. + + // First half + const float* first_src = prod_first ? prod_bf.data() : reac_bf.data(); + std::copy(first_src, first_src + single_bond_fdim, out); + + // Second half: diff = prod - reac (always), or prod feats for REAC_PROD modes + float* second_out = out + single_bond_fdim; + if (use_diff) { + for (size_t k = 0; k < single_bond_fdim; ++k) + second_out[k] = prod_bf[k] - reac_bf[k]; // ALWAYS prod - reac + } else { + // REAC_PROD / REAC_PROD_BALANCE: second half = prod feats + std::copy(prod_bf.begin(), prod_bf.end(), second_out); + } +} + +// --------------------------------------------------------------------------- +// Internal: bond enumeration result for one reaction. +// --------------------------------------------------------------------------- +struct BondEnumResult { + size_t num_directed; // total directed edges (undirected * 2) + std::unique_ptr bond_feats; // [num_directed, cgr_bond_fdim] + std::unique_ptr edge_index; // [2 * num_directed]: sources then dests + std::unique_ptr rev_edge_index; // [num_directed]: reverse edge index (local) +}; + +// --------------------------------------------------------------------------- +// Internal: enumerate CGR bonds for one reaction via the O(n²) scan that +// mirrors Python's CondensedGraphOfReactionFeaturizer ordering exactly. +// atom_offset: global atom index offset for this reaction (added to CGR node indices). +// --------------------------------------------------------------------------- +static BondEnumResult enumerate_cgr_bonds(const CompactReaction& rxn, + ReactionMode mode, + size_t cgr_bond_fdim, + size_t single_bond_fdim, + const py::array_t& bond_property_list, + size_t atom_offset) { + const size_t n_reac = rxn.reac.num_atoms; + const size_t n_cgr = n_reac + rxn.prod_only_idxs.size(); + + // Helper: get product-side atom index for CGR node u (NO_IDX if u has no product atom) + auto get_p = [&](size_t u) -> uint32_t { + if (u >= n_reac) + return rxn.prod_only_idxs[u - n_reac]; + auto it = rxn.r2p_idx_map.find(uint32_t(u)); + return (it != rxn.r2p_idx_map.end()) ? it->second : NO_IDX; + }; + + // Helper: look up a bond in a lookup map; returns NO_IDX if not found + auto lookup_bond = [](const std::unordered_map& lut, uint32_t a, uint32_t b) -> uint32_t { + if (a == NO_IDX || b == NO_IDX) + return NO_IDX; + if (a > b) + std::swap(a, b); + auto it = lut.find((uint64_t(a) << 32) | uint64_t(b)); + return (it != lut.end()) ? it->second : NO_IDX; + }; + + // First pass: count undirected bonds + size_t n_undirected = 0; + for (size_t u = 0; u < n_cgr; ++u) { + uint32_t r_u = (u < n_reac) ? uint32_t(u) : NO_IDX; + uint32_t p_u = get_p(u); + for (size_t v = u + 1; v < n_cgr; ++v) { + uint32_t r_v = (v < n_reac) ? uint32_t(v) : NO_IDX; + uint32_t p_v = get_p(v); + if (lookup_bond(rxn.reac_bond_lookup, r_u, r_v) == NO_IDX && + lookup_bond(rxn.prod_bond_lookup, p_u, p_v) == NO_IDX) + continue; + ++n_undirected; + } + } + + const size_t n_directed = 2 * n_undirected; + std::unique_ptr bond_feats(new float[n_directed * cgr_bond_fdim]()); + std::unique_ptr edge_index(new int64_t[2 * n_directed]); + std::unique_ptr rev_edge_index(new int64_t[n_directed]); + + const bool is_balance = (mode == ReactionMode::REAC_DIFF_BALANCE || mode == ReactionMode::REAC_PROD_BALANCE || + mode == ReactionMode::PROD_DIFF_BALANCE); + + // Second pass: fill arrays + size_t ep = 0; // undirected bond index + for (size_t u = 0; u < n_cgr; ++u) { + uint32_t r_u = (u < n_reac) ? uint32_t(u) : NO_IDX; + uint32_t p_u = get_p(u); + for (size_t v = u + 1; v < n_cgr; ++v) { + uint32_t r_v = (v < n_reac) ? uint32_t(v) : NO_IDX; + uint32_t p_v = get_p(v); + uint32_t b_reac = lookup_bond(rxn.reac_bond_lookup, r_u, r_v); + uint32_t b_prod = lookup_bond(rxn.prod_bond_lookup, p_u, p_v); + if (b_reac == NO_IDX && b_prod == NO_IDX) + continue; + + // Apply BALANCE bond copying — mirrors Python's _get_bonds logic exactly: + // * Both product-only (u>=n_reac && v>=n_reac): b_reac = b_prod + // * Both reactant-only (u= n_reac); + bool v_prod_only = (v >= n_reac); + if (u_prod_only && v_prod_only) { + b_reac = b_prod; // both product-only + } else if (!u_prod_only && !v_prod_only) { + bool u_matched = (p_u != NO_IDX); + bool v_matched = (p_v != NO_IDX); + if (!u_matched && !v_matched) + b_prod = b_reac; // both reactant-only + // else: at least one is matched → no copy (null stays null) + } + // mixed case (one reactant atom, one product-only): no copy + } + + // Directed edges: fwd (u→v) at position 2*ep, rev (v→u) at 2*ep+1 + size_t fwd = 2 * ep, rev = 2 * ep + 1; + + // Bond features (same for both directions) + write_cgr_bond_feats(rxn, + b_reac, + b_prod, + mode, + bond_feats.get() + fwd * cgr_bond_fdim, + single_bond_fdim, + bond_property_list); + std::copy(bond_feats.get() + fwd * cgr_bond_fdim, + bond_feats.get() + fwd * cgr_bond_fdim + cgr_bond_fdim, + bond_feats.get() + rev * cgr_bond_fdim); + + // edge_index: [sources | dests], interleaved fwd/rev + edge_index[fwd] = int64_t(u + atom_offset); // fwd src + edge_index[rev] = int64_t(v + atom_offset); // rev src + edge_index[n_directed + fwd] = int64_t(v + atom_offset); // fwd dst + edge_index[n_directed + rev] = int64_t(u + atom_offset); // rev dst + + // rev_edge_index: fwd and rev are each other's reverse (local indices) + rev_edge_index[fwd] = int64_t(rev); + rev_edge_index[rev] = int64_t(fwd); + + ++ep; + } + } + + return BondEnumResult{n_directed, std::move(bond_feats), std::move(edge_index), std::move(rev_edge_index)}; +} + +// --------------------------------------------------------------------------- +// batch_reaction_featurizer +// --------------------------------------------------------------------------- +std::vector batch_reaction_featurizer(const std::vector& reac_smiles_list, + const std::vector& prod_smiles_list, + const py::array_t& atom_property_list_onehot, + const py::array_t& atom_property_list_float, + const py::array_t& bond_property_list, + bool keep_h, + bool add_h, + bool offset_carbon, + ReactionMode mode) { + if (reac_smiles_list.size() != prod_smiles_list.size()) + throw std::runtime_error("reac_smiles_list and prod_smiles_list must have the same length"); + const size_t n_rxns = reac_smiles_list.size(); + + // Parse all reactions + std::vector reactions; + reactions.reserve(n_rxns); + for (size_t i = 0; i < n_rxns; ++i) + reactions.push_back(parse_reaction(reac_smiles_list[i], prod_smiles_list[i], keep_h, add_h)); + + // Feature dimensions + const size_t single_atom_fdim = compute_atom_dim(atom_property_list_onehot, atom_property_list_float); + // atomic_num_block_w = size of the atomic-num one-hot block (including "other" slot) + // = get_one_hot_atom_feature_size(first_onehot_feature) + size_t atomic_num_block_w = 0; + if (atom_property_list_onehot.ndim() == 1 && atom_property_list_onehot.shape(0) > 0) { + auto f = AtomOneHotFeature(static_cast(atom_property_list_onehot.data())[0]); + atomic_num_block_w = get_one_hot_atom_feature_size(f); + } + const size_t second_atom_len = single_atom_fdim - atomic_num_block_w; + const size_t cgr_atom_fdim = single_atom_fdim + second_atom_len; + const size_t single_bond_fdim = compute_bond_dim(bond_property_list); + const size_t cgr_bond_fdim = 2 * single_bond_fdim; + + // Total CGR atom count across all reactions + size_t total_cgr_atoms = 0; + for (const auto& rxn : reactions) + total_cgr_atoms += rxn.reac.num_atoms + rxn.prod_only_idxs.size(); + + // Allocate atom feature array and batch array + std::unique_ptr atom_data(new float[total_cgr_atoms * cgr_atom_fdim]()); + std::unique_ptr batch_data(new int64_t[total_cgr_atoms ? total_cgr_atoms : 1]); + + size_t atom_offset = 0; + for (size_t i = 0; i < n_rxns; ++i) { + const CompactReaction& rxn = reactions[i]; + const size_t n_cgr = rxn.reac.num_atoms + rxn.prod_only_idxs.size(); + fill_cgr_atom_features(rxn, + mode, + atom_data.get() + atom_offset * cgr_atom_fdim, + cgr_atom_fdim, + single_atom_fdim, + atomic_num_block_w, + atom_property_list_onehot, + atom_property_list_float, + offset_carbon); + for (size_t k = 0; k < n_cgr; ++k) + batch_data[atom_offset + k] = int64_t(i); + atom_offset += n_cgr; + } + + // Enumerate bonds per reaction + std::vector bond_results; + bond_results.reserve(n_rxns); + size_t total_directed = 0; + atom_offset = 0; + for (size_t i = 0; i < n_rxns; ++i) { + const CompactReaction& rxn = reactions[i]; + bond_results.push_back( + enumerate_cgr_bonds(rxn, mode, cgr_bond_fdim, single_bond_fdim, bond_property_list, atom_offset)); + total_directed += bond_results.back().num_directed; + atom_offset += rxn.reac.num_atoms + rxn.prod_only_idxs.size(); + } + + // Assemble global bond_feats, edge_index, rev_edge_index + std::unique_ptr bond_data(new float[total_directed * cgr_bond_fdim + 1]()); + std::unique_ptr edge_index(new int64_t[2 * total_directed + 1]); + std::unique_ptr rev_edge_index(new int64_t[total_directed + 1]); + + size_t bond_offset = 0; + for (size_t i = 0; i < n_rxns; ++i) { + const BondEnumResult& br = bond_results[i]; + const size_t n = br.num_directed; + // Bond features + std::copy(br.bond_feats.get(), + br.bond_feats.get() + n * cgr_bond_fdim, + bond_data.get() + bond_offset * cgr_bond_fdim); + // Source indices + std::copy(br.edge_index.get(), br.edge_index.get() + n, edge_index.get() + bond_offset); + // Dest indices (second half of local array → second half of global array) + std::copy(br.edge_index.get() + n, br.edge_index.get() + 2 * n, edge_index.get() + total_directed + bond_offset); + // rev_edge_index: shift local indices by bond_offset + for (size_t k = 0; k < n; ++k) + rev_edge_index[bond_offset + k] = br.rev_edge_index[k] + int64_t(bond_offset); + bond_offset += n; + } + + // Wrap in py::array_t + const int64_t atom_dims[2] = {int64_t(total_cgr_atoms), int64_t(cgr_atom_fdim)}; + const int64_t bond_dims[2] = {int64_t(total_directed), int64_t(cgr_bond_fdim)}; + const int64_t ei_dims[2] = {int64_t(2), int64_t(total_directed)}; + const int64_t rev_dims[1] = {int64_t(total_directed)}; + const int64_t batch_dims[1] = {int64_t(total_cgr_atoms)}; + + return { + py_array_from_array(std::move(atom_data), atom_dims, 2), + py_array_from_array(std::move(bond_data), bond_dims, 2), + py_array_from_array(std::move(edge_index), ei_dims, 2), + py_array_from_array(std::move(rev_edge_index), rev_dims, 1), + py_array_from_array(std::move(batch_data), batch_dims, 1), + }; +} diff --git a/tests/data/sample_rxns_100.csv b/tests/data/sample_rxns_100.csv new file mode 100644 index 0000000..de597c4 --- /dev/null +++ b/tests/data/sample_rxns_100.csv @@ -0,0 +1,111 @@ +rxn_smiles +[F:1][C@:5]([C:4]([H:3])([C:31]([H:32])([H:33])[H:34])[C:41]([H:42])([H:43])[H:44])([H:11])[N+:21]([O-:22])[O:23].[H-:2]>>[C:4](=[C:5]([H:11])[N+:21]([O-:22])[O:23])([C:31]([H:32])([H:33])[H:34])[C:41]([H:42])([H:43])[H:44].[F-:1].[H:2][H:3] +[F:1][C:5]([C:4]([H:3])([H:31])[H:41])([N:11]([H:12])[H:13])[N:21]([H:22])[H:23].[H-:2]>>[C:4](=[C:5]([N:11]([H:12])[H:13])[N:21]([H:22])[H:23])([H:31])[H:41].[F-:1].[H:2][H:3] +[Br:1][C@:5]([C:4]([H:3])([H:31])[C:41]([H:42])([H:43])[H:44])([H:11])[C:21]([H:22])([H:23])[H:24].[H-:2]>>[Br-:1].[C:4](=[C:5](/[H:11])[C:21]([H:22])([H:23])[H:24])(\[H:31])[C:41]([H:42])([H:43])[H:44].[H:2][H:3] +[Cl:1][C@:5]([C@:4]([H:3])([N+:31]([O-:32])[O:33])[N:41]([H:42])[H:43])([H:11])[C:21]([H:22])([H:23])[H:24].[H-:2]>>[C:4](=[C:5](/[H:11])[C:21]([H:22])([H:23])[H:24])(\[N+:31]([O-:32])[O:33])[N:41]([H:42])[H:43].[Cl-:1].[H:2][H:3] +[Br-:2].[Br:1][C:5]([C:4]([H:3])([H:31])[H:41])([H:11])[H:21]>>[Br-:1].[Br:2][H:3].[C:4](=[C:5]([H:11])[H:21])([H:31])[H:41] +[Cl:1][C@:5]([C@:4]([H:3])([C:31]#[N:32])[C:41]([H:42])([H:43])[H:44])([C:11]#[N:12])[N:21]([H:22])[H:23].[H-:2]>>[C:4](=[C:5](/[C:11]#[N:12])[N:21]([H:22])[H:23])(\[C:31]#[N:32])[C:41]([H:42])([H:43])[H:44].[Cl-:1].[H:2][H:3] +[Br:1][C@:5]([C:4]([H:3])([H:31])[N+:41]([O-:42])[O:43])([N+:11]([O-:12])[O:13])[H:21].[H-:2]>>[Br-:1].[C:4](=[C:5](/[N+:11]([O-:12])[O:13])[H:21])(\[H:31])[N+:41]([O-:42])[O:43].[H:2][H:3] +[Cl:1][C@:5]([C@:4]([H:3])([C:31]#[N:32])[N:41]([H:42])[H:43])([C:11]([H:12])([H:13])[H:14])[H:21].[F-:2]>>[C:4](=[C:5](/[C:11]([H:12])([H:13])[H:14])[H:21])(\[C:31]#[N:32])[N:41]([H:42])[H:43].[Cl-:1].[F:2][H:3] +[F:1][C@:5]([C:4]([H:3])([N:31]([H:32])[H:33])[H:41])([N+:11]([O-:12])[O:13])[N:21]([H:22])[H:23].[H-:2]>>[C:4](=[C:5](/[N+:11]([O-:12])[O:13])[N:21]([H:22])[H:23])(\[N:31]([H:32])[H:33])[H:41].[F-:1].[H:2][H:3] +[Cl:1][C@:5]([C:4]([H:3])([H:31])[H:41])([N:11]([H:12])[H:13])[H:21].[F-:2]>>[C:4](=[C:5]([N:11]([H:12])[H:13])[H:21])([H:31])[H:41].[Cl-:1].[F:2][H:3] +[Cl:1][C@:5]([C:4]([H:3])([C:31]([H:32])([H:33])[H:34])[H:41])([N+:11]([O-:12])[O:13])[C:21]([H:22])([H:23])[H:24].[F-:2]>>[C:4](=[C:5](/[N+:11]([O-:12])[O:13])[C:21]([H:22])([H:23])[H:24])(\[C:31]([H:32])([H:33])[H:34])[H:41].[Cl-:1].[F:2][H:3] +[Br:1][C@:5]([C:4]([H:3])([N:31]([H:32])[H:33])[N:41]([H:42])[H:43])([H:11])[C:21]([H:22])([H:23])[H:24].[Cl-:2]>>[Br-:1].[C:4](=[C:5]([H:11])[C:21]([H:22])([H:23])[H:24])([N:31]([H:32])[H:33])[N:41]([H:42])[H:43].[Cl:2][H:3] +[Cl-:2].[Cl:1][C@:5]([C:4]([H:3])([H:31])[C:41]([H:42])([H:43])[H:44])([C:11]#[N:12])[H:21]>>[C:4](=[C:5](/[C:11]#[N:12])[H:21])(\[H:31])[C:41]([H:42])([H:43])[H:44].[Cl-:1].[Cl:2][H:3] +[Br:1][C@:5]([C:4]([H:3])([C:31]([H:32])([H:33])[H:34])[C:41]([H:42])([H:43])[H:44])([C:11]([H:12])([H:13])[H:14])[N:21]([H:22])[H:23].[F-:2]>>[Br-:1].[C:4](=[C:5]([C:11]([H:12])([H:13])[H:14])[N:21]([H:22])[H:23])([C:31]([H:32])([H:33])[H:34])[C:41]([H:42])([H:43])[H:44].[F:2][H:3] +[Br:1][C:5]([C:4]([H:3])([N:31]([H:32])[H:33])[H:41])([H:11])[H:21].[F-:2]>>[Br-:1].[C:4](=[C:5]([H:11])[H:21])([N:31]([H:32])[H:33])[H:41].[F:2][H:3] +[Cl:1][C@:5]([C:4]([H:3])([H:31])[C:41]([H:42])([H:43])[H:44])([C:11]([H:12])([H:13])[H:14])[H:21].[F-:2]>>[C:4](=[C:5](/[C:11]([H:12])([H:13])[H:14])[H:21])(\[H:31])[C:41]([H:42])([H:43])[H:44].[Cl-:1].[F:2][H:3] +[Cl:1][C@:5]([C:4]([H:3])([H:31])[H:41])([H:11])[C:21]#[N:22].[H-:2]>>[C:4](=[C:5]([H:11])[C:21]#[N:22])([H:31])[H:41].[Cl-:1].[H:2][H:3] +[Cl:1][C@:5]([C:4]([H:3])([N:31]([H:32])[H:33])[H:41])([C:11]#[N:12])[N+:21]([O-:22])[O:23].[H-:2]>>[C:4](=[C:5](/[C:11]#[N:12])[N+:21]([O-:22])[O:23])(\[N:31]([H:32])[H:33])[H:41].[Cl-:1].[H:2][H:3] +[Cl:1][C@:5]([C:4]([H:3])([H:31])[C:41]#[N:42])([H:11])[N:21]([H:22])[H:23].[H-:2]>>[C:4](=[C:5](/[H:11])[N:21]([H:22])[H:23])(\[H:31])[C:41]#[N:42].[Cl-:1].[H:2][H:3] +[Cl:1][C@:5]([C:4]([H:3])([N:31]([H:32])[H:33])[N:41]([H:42])[H:43])([C:11]([H:12])([H:13])[H:14])[N+:21]([O-:22])[O:23].[F-:2]>>[C:4](=[C:5]([C:11]([H:12])([H:13])[H:14])[N+:21]([O-:22])[O:23])([N:31]([H:32])[H:33])[N:41]([H:42])[H:43].[Cl-:1].[F:2][H:3] +[Br:1][C@:5]([C:4]([H:3])([H:31])[C:41]#[N:42])([C:11]([H:12])([H:13])[H:14])[N:21]([H:22])[H:23].[H-:2]>>[Br-:1].[C:4](=[C:5](/[C:11]([H:12])([H:13])[H:14])[N:21]([H:22])[H:23])(\[H:31])[C:41]#[N:42].[H:2][H:3] +[Cl:1][C@:5]([C:4]([H:3])([H:31])[N:41]([H:42])[H:43])([C:11]([H:12])([H:13])[H:14])[H:21].[F-:2]>>[C:4](=[C:5](/[C:11]([H:12])([H:13])[H:14])[H:21])(\[H:31])[N:41]([H:42])[H:43].[Cl-:1].[F:2][H:3] +[Cl:1][C@:5]([C:4]([H:3])([C:31]([H:32])([H:33])[H:34])[C:41]([H:42])([H:43])[H:44])([C:11]#[N:12])[C:21]([H:22])([H:23])[H:24].[F-:2]>>[C:4](=[C:5]([C:11]#[N:12])[C:21]([H:22])([H:23])[H:24])([C:31]([H:32])([H:33])[H:34])[C:41]([H:42])([H:43])[H:44].[Cl-:1].[F:2][H:3] +[Br-:2].[Br:1][C:5]([C@:4]([H:3])([C:31]#[N:32])[N:41]([H:42])[H:43])([H:11])[H:21]>>[Br-:1].[Br:2][H:3].[C:4](=[C:5]([H:11])[H:21])([C:31]#[N:32])[N:41]([H:42])[H:43] +[Cl:1][C@:5]([C:4]([H:3])([C:31]([H:32])([H:33])[H:34])[H:41])([C:11]#[N:12])[H:21].[F-:2]>>[C:4](=[C:5](/[C:11]#[N:12])[H:21])(\[C:31]([H:32])([H:33])[H:34])[H:41].[Cl-:1].[F:2][H:3] +[Br:1][C@:5]([C@:4]([H:3])([N:31]([H:32])[H:33])[C:41]([H:42])([H:43])[H:44])([C:11]([H:12])([H:13])[H:14])[C:21]#[N:22].[F-:2]>>[Br-:1].[C:4](=[C:5](/[C:11]([H:12])([H:13])[H:14])[C:21]#[N:22])(\[N:31]([H:32])[H:33])[C:41]([H:42])([H:43])[H:44].[F:2][H:3] +[Cl:1][C@:5]([C:4]([H:3])([C:31]([H:32])([H:33])[H:34])[C:41]([H:42])([H:43])[H:44])([N:11]([H:12])[H:13])[N+:21]([O-:22])[O:23].[F-:2]>>[C:4](=[C:5]([N:11]([H:12])[H:13])[N+:21]([O-:22])[O:23])([C:31]([H:32])([H:33])[H:34])[C:41]([H:42])([H:43])[H:44].[Cl-:1].[F:2][H:3] +[Br:1][C@:5]([C@:4]([H:3])([N+:31]([O-:32])[O:33])[N:41]([H:42])[H:43])([N+:11]([O-:12])[O:13])[C:21]([H:22])([H:23])[H:24].[F-:2]>>[Br-:1].[C:4](=[C:5](/[N+:11]([O-:12])[O:13])[C:21]([H:22])([H:23])[H:24])(\[N+:31]([O-:32])[O:33])[N:41]([H:42])[H:43].[F:2][H:3] +[Cl:1][C@:5]([C:4]([H:3])([H:31])[N:41]([H:42])[H:43])([N+:11]([O-:12])[O:13])[C:21]([H:22])([H:23])[H:24].[F-:2]>>[C:4](=[C:5](/[N+:11]([O-:12])[O:13])[C:21]([H:22])([H:23])[H:24])(\[H:31])[N:41]([H:42])[H:43].[Cl-:1].[F:2][H:3] +[Br:1][C@:5]([C:4]([H:3])([C:31]([H:32])([H:33])[H:34])[H:41])([N+:11]([O-:12])[O:13])[C:21]([H:22])([H:23])[H:24].[F-:2]>>[Br-:1].[C:4](=[C:5](/[N+:11]([O-:12])[O:13])[C:21]([H:22])([H:23])[H:24])(\[C:31]([H:32])([H:33])[H:34])[H:41].[F:2][H:3] +[Br:1][C:5]([C:4]([H:3])([H:31])[N:41]([H:42])[H:43])([H:11])[H:21].[H-:2]>>[Br-:1].[C:4](=[C:5]([H:11])[H:21])([H:31])[N:41]([H:42])[H:43].[H:2][H:3] +[Cl:1][C@:5]([C:4]([H:3])([N:31]([H:32])[H:33])[N:41]([H:42])[H:43])([C:11]([H:12])([H:13])[H:14])[C:21]#[N:22].[F-:2]>>[C:4](=[C:5]([C:11]([H:12])([H:13])[H:14])[C:21]#[N:22])([N:31]([H:32])[H:33])[N:41]([H:42])[H:43].[Cl-:1].[F:2][H:3] +[Cl:1][C@:5]([C:4]([H:3])([H:31])[N:41]([H:42])[H:43])([N+:11]([O-:12])[O:13])[H:21].[H-:2]>>[C:4](=[C:5](/[N+:11]([O-:12])[O:13])[H:21])(\[H:31])[N:41]([H:42])[H:43].[Cl-:1].[H:2][H:3] +[F:1][C:5]([C:4]([H:3])([H:31])[N:41]([H:42])[H:43])([C:11]([H:12])([H:13])[H:14])[C:21]([H:22])([H:23])[H:24].[H-:2]>>[C:4](=[C:5]([C:11]([H:12])([H:13])[H:14])[C:21]([H:22])([H:23])[H:24])([H:31])[N:41]([H:42])[H:43].[F-:1].[H:2][H:3] +[Br:1][C@:5]([C@:4]([H:3])([C:31]([H:32])([H:33])[H:34])[C:41]#[N:42])([N:11]([H:12])[H:13])[N+:21]([O-:22])[O:23].[H-:2]>>[Br-:1].[C:4](=[C:5](/[N:11]([H:12])[H:13])[N+:21]([O-:22])[O:23])(\[C:31]([H:32])([H:33])[H:34])[C:41]#[N:42].[H:2][H:3] +[Cl:1][C@:5]([C:4]([H:3])([H:31])[C:41]([H:42])([H:43])[H:44])([C:11]([H:12])([H:13])[H:14])[C:21]#[N:22].[H-:2]>>[C:4](=[C:5](/[C:11]([H:12])([H:13])[H:14])[C:21]#[N:22])(\[H:31])[C:41]([H:42])([H:43])[H:44].[Cl-:1].[H:2][H:3] +[F:1][C@:5]([C:4]([H:3])([N:31]([H:32])[H:33])[N:41]([H:42])[H:43])([C:11]([H:12])([H:13])[H:14])[C:21]#[N:22].[H-:2]>>[C:4](=[C:5]([C:11]([H:12])([H:13])[H:14])[C:21]#[N:22])([N:31]([H:32])[H:33])[N:41]([H:42])[H:43].[F-:1].[H:2][H:3] +[Cl:1][C:5]([C:4]([H:3])([C:31]([H:32])([H:33])[H:34])[H:41])([N+:11]([O-:12])[O:13])[N+:21]([O-:22])[O:23].[H-:2]>>[C:4](=[C:5]([N+:11]([O-:12])[O:13])[N+:21]([O-:22])[O:23])([C:31]([H:32])([H:33])[H:34])[H:41].[Cl-:1].[H:2][H:3] +[Br:1][C:5]([C:4]([H:3])([C:31]([H:32])([H:33])[H:34])[C:41]([H:42])([H:43])[H:44])([C:11]([H:12])([H:13])[H:14])[C:21]([H:22])([H:23])[H:24].[H-:2]>>[Br-:1].[C:4](=[C:5]([C:11]([H:12])([H:13])[H:14])[C:21]([H:22])([H:23])[H:24])([C:31]([H:32])([H:33])[H:34])[C:41]([H:42])([H:43])[H:44].[H:2][H:3] +[Cl:1][C@:5]([C:4]([H:3])([H:31])[C:41]#[N:42])([N+:11]([O-:12])[O:13])[N:21]([H:22])[H:23].[H-:2]>>[C:4](=[C:5](/[N+:11]([O-:12])[O:13])[N:21]([H:22])[H:23])(\[H:31])[C:41]#[N:42].[Cl-:1].[H:2][H:3] +[Cl:1][C@:5]([C:4]([H:3])([H:31])[C:41]([H:42])([H:43])[H:44])([C:11]#[N:12])[C:21]([H:22])([H:23])[H:24].[H-:2]>>[C:4](=[C:5](/[C:11]#[N:12])[C:21]([H:22])([H:23])[H:24])(\[H:31])[C:41]([H:42])([H:43])[H:44].[Cl-:1].[H:2][H:3] +[F:1][C@:5]([C:4]([H:3])([C:31]([H:32])([H:33])[H:34])[H:41])([H:11])[C:21]([H:22])([H:23])[H:24].[H-:2]>>[C:4](=[C:5](/[H:11])[C:21]([H:22])([H:23])[H:24])(\[C:31]([H:32])([H:33])[H:34])[H:41].[F-:1].[H:2][H:3] +[Br:1][C:5]([C:4]([H:3])([H:31])[C:41]([H:42])([H:43])[H:44])([H:11])[H:21].[H-:2]>>[Br-:1].[C:4](=[C:5]([H:11])[H:21])([H:31])[C:41]([H:42])([H:43])[H:44].[H:2][H:3] +[F:1][C@:5]([C@:4]([H:3])([C:31]([H:32])([H:33])[H:34])[N:41]([H:42])[H:43])([H:11])[C:21]#[N:22].[H-:2]>>[C:4](=[C:5](/[H:11])[C:21]#[N:22])(\[C:31]([H:32])([H:33])[H:34])[N:41]([H:42])[H:43].[F-:1].[H:2][H:3] +[Cl:1][C@:5]([C@:4]([H:3])([N:31]([H:32])[H:33])[N+:41]([O-:42])[O:43])([C:11]#[N:12])[N:21]([H:22])[H:23].[F-:2]>>[C:4](=[C:5](/[C:11]#[N:12])[N:21]([H:22])[H:23])(\[N:31]([H:32])[H:33])[N+:41]([O-:42])[O:43].[Cl-:1].[F:2][H:3] +[Cl:1][C@:5]([C:4]([H:3])([C:31]([H:32])([H:33])[H:34])[H:41])([C:11]([H:12])([H:13])[H:14])[C:21]#[N:22].[H-:2]>>[C:4](=[C:5](/[C:11]([H:12])([H:13])[H:14])[C:21]#[N:22])(\[C:31]([H:32])([H:33])[H:34])[H:41].[Cl-:1].[H:2][H:3] +[Br:1][C@:5]([C@:4]([H:3])([C:31]#[N:32])[N:41]([H:42])[H:43])([N+:11]([O-:12])[O:13])[H:21].[Cl-:2]>>[Br-:1].[C:4](=[C:5](/[N+:11]([O-:12])[O:13])[H:21])(\[C:31]#[N:32])[N:41]([H:42])[H:43].[Cl:2][H:3] +[Br-:2].[Cl:1][C:5]([C:4]([H:3])([C:31]([H:32])([H:33])[H:34])[C:41]([H:42])([H:43])[H:44])([H:11])[H:21]>>[Br:2][H:3].[C:4](=[C:5]([H:11])[H:21])([C:31]([H:32])([H:33])[H:34])[C:41]([H:42])([H:43])[H:44].[Cl-:1] +[Cl:1][C@:5]([C@:4]([H:3])([N:31]([H:32])[H:33])[C:41]([H:42])([H:43])[H:44])([N+:11]([O-:12])[O:13])[N:21]([H:22])[H:23].[H-:2]>>[C:4](=[C:5](/[N+:11]([O-:12])[O:13])[N:21]([H:22])[H:23])(\[N:31]([H:32])[H:33])[C:41]([H:42])([H:43])[H:44].[Cl-:1].[H:2][H:3] +[Br:1][C@:5]([C@:4]([H:3])([N+:31]([O-:32])[O:33])[N:41]([H:42])[H:43])([N+:11]([O-:12])[O:13])[H:21].[Cl-:2]>>[Br-:1].[C:4](=[C:5](/[N+:11]([O-:12])[O:13])[H:21])(\[N+:31]([O-:32])[O:33])[N:41]([H:42])[H:43].[Cl:2][H:3] +[F-:2].[F:1][C@:5]([C:4]([H:3])([H:31])[C:41]([H:42])([H:43])[H:44])([C:11]#[N:12])[H:21]>>[F-:1].[F:2][C@@:5]([C:4]([H:3])([H:31])[C:41]([H:42])([H:43])[H:44])([C:11]#[N:12])[H:21] +[Cl-:2].[F:1][C@:5]([C@:4]([H:3])([C:31]#[N:32])[N:41]([H:42])[H:43])([C:11]([H:12])([H:13])[H:14])[C:21]#[N:22]>>[Cl:2][C@@:5]([C@:4]([H:3])([C:31]#[N:32])[N:41]([H:42])[H:43])([C:11]([H:12])([H:13])[H:14])[C:21]#[N:22].[F-:1] +[Cl:1][C:5]([C:4]([H:3])([H:31])[N:41]([H:42])[H:43])([H:11])[H:21].[H-:2]>>[Cl-:1].[H:2][C:5]([C:4]([H:3])([H:31])[N:41]([H:42])[H:43])([H:11])[H:21] +[Br:1][C@:5]([C@:4]([H:3])([N:31]([H:32])[H:33])[C:41]#[N:42])([H:11])[N:21]([H:22])[H:23].[F-:2]>>[Br-:1].[F:2][C@@:5]([C@:4]([H:3])([N:31]([H:32])[H:33])[C:41]#[N:42])([H:11])[N:21]([H:22])[H:23] +[Br-:2].[Cl:1][C@:5]([C@:4]([H:3])([C:31]([H:32])([H:33])[H:34])[N+:41]([O-:42])[O:43])([C:11]#[N:12])[H:21]>>[Br:2][C@@:5]([C@:4]([H:3])([C:31]([H:32])([H:33])[H:34])[N+:41]([O-:42])[O:43])([C:11]#[N:12])[H:21].[Cl-:1] +[Br:1][C@:5]([C@:4]([H:3])([C:31]([H:32])([H:33])[H:34])[N:41]([H:42])[H:43])([N+:11]([O-:12])[O:13])[C:21]([H:22])([H:23])[H:24].[H-:2]>>[Br-:1].[H:2][C@@:5]([C@:4]([H:3])([C:31]([H:32])([H:33])[H:34])[N:41]([H:42])[H:43])([N+:11]([O-:12])[O:13])[C:21]([H:22])([H:23])[H:24] +[Br:1][C@:5]([C:4]([H:3])([H:31])[C:41]#[N:42])([H:11])[C:21]#[N:22].[Cl-:2]>>[Br-:1].[Cl:2][C@@:5]([C:4]([H:3])([H:31])[C:41]#[N:42])([H:11])[C:21]#[N:22] +[Cl:1][C@:5]([C@:4]([H:3])([C:31]#[N:32])[N+:41]([O-:42])[O:43])([C:11]#[N:12])[H:21].[F-:2]>>[Cl-:1].[F:2][C@@:5]([C@:4]([H:3])([C:31]#[N:32])[N+:41]([O-:42])[O:43])([C:11]#[N:12])[H:21] +[F:1][C@:5]([C@:4]([H:3])([N:31]([H:32])[H:33])[C:41]([H:42])([H:43])[H:44])([N+:11]([O-:12])[O:13])[N:21]([H:22])[H:23].[H-:2]>>[F-:1].[H:2][C@@:5]([C@:4]([H:3])([N:31]([H:32])[H:33])[C:41]([H:42])([H:43])[H:44])([N+:11]([O-:12])[O:13])[N:21]([H:22])[H:23] +[F:1][C:5]([C:4]([H:3])([H:31])[H:41])([C:11]([H:12])([H:13])[H:14])[N+:21]([O-:22])[O:23].[H-:2]>>[F-:1].[H:2][C:5]([C:4]([H:3])([H:31])[H:41])([C:11]([H:12])([H:13])[H:14])[N+:21]([O-:22])[O:23] +[Br:1][C:5]([C@:4]([H:3])([C:31]([H:32])([H:33])[H:34])[C:41]#[N:42])([H:11])[H:21].[F-:2]>>[Br-:1].[F:2][C:5]([C@:4]([H:3])([C:31]([H:32])([H:33])[H:34])[C:41]#[N:42])([H:11])[H:21] +[Br-:2].[Cl:1][C@:5]([C:4]([H:3])([N:31]([H:32])[H:33])[H:41])([N+:11]([O-:12])[O:13])[C:21]#[N:22]>>[Br:2][C@@:5]([C:4]([H:3])([N:31]([H:32])[H:33])[H:41])([N+:11]([O-:12])[O:13])[C:21]#[N:22].[Cl-:1] +[F-:2].[F:1][C@:5]([C@:4]([H:3])([C:31]([H:32])([H:33])[H:34])[C:41]#[N:42])([N:11]([H:12])[H:13])[N+:21]([O-:22])[O:23]>>[F-:1].[F:2][C@@:5]([C@:4]([H:3])([C:31]([H:32])([H:33])[H:34])[C:41]#[N:42])([N:11]([H:12])[H:13])[N+:21]([O-:22])[O:23] +[F-:2].[F:1][C@:5]([C@:4]([H:3])([N+:31]([O-:32])[O:33])[N:41]([H:42])[H:43])([C:11]([H:12])([H:13])[H:14])[C:21]#[N:22]>>[F-:1].[F:2][C@@:5]([C@:4]([H:3])([N+:31]([O-:32])[O:33])[N:41]([H:42])[H:43])([C:11]([H:12])([H:13])[H:14])[C:21]#[N:22] +[Cl:1][C:5]([C@:4]([H:3])([N+:31]([O-:32])[O:33])[N:41]([H:42])[H:43])([C:11]([H:12])([H:13])[H:14])[C:21]([H:22])([H:23])[H:24].[F-:2]>>[Cl-:1].[F:2][C:5]([C@:4]([H:3])([N+:31]([O-:32])[O:33])[N:41]([H:42])[H:43])([C:11]([H:12])([H:13])[H:14])[C:21]([H:22])([H:23])[H:24] +[Cl:1][C:5]([C:4]([H:3])([N:31]([H:32])[H:33])[N:41]([H:42])[H:43])([C:11]#[N:12])[C:21]#[N:22].[F-:2]>>[Cl-:1].[F:2][C:5]([C:4]([H:3])([N:31]([H:32])[H:33])[N:41]([H:42])[H:43])([C:11]#[N:12])[C:21]#[N:22] +[Cl-:2].[Cl:1][C:5]([C:4]([H:3])([C:31]([H:32])([H:33])[H:34])[C:41]([H:42])([H:43])[H:44])([C:11]#[N:12])[C:21]#[N:22]>>[Cl-:1].[Cl:2][C:5]([C:4]([H:3])([C:31]([H:32])([H:33])[H:34])[C:41]([H:42])([H:43])[H:44])([C:11]#[N:12])[C:21]#[N:22] +[Br:1][C@:5]([C:4]([H:3])([N:31]([H:32])[H:33])[N:41]([H:42])[H:43])([C:11]#[N:12])[C:21]([H:22])([H:23])[H:24].[F-:2]>>[Br-:1].[F:2][C@@:5]([C:4]([H:3])([N:31]([H:32])[H:33])[N:41]([H:42])[H:43])([C:11]#[N:12])[C:21]([H:22])([H:23])[H:24] +[Cl:1][C:5]([C:4]([H:3])([C:31]#[N:32])[C:41]#[N:42])([C:11]([H:12])([H:13])[H:14])[C:21]([H:22])([H:23])[H:24].[F-:2]>>[Cl-:1].[F:2][C:5]([C:4]([H:3])([C:31]#[N:32])[C:41]#[N:42])([C:11]([H:12])([H:13])[H:14])[C:21]([H:22])([H:23])[H:24] +[Br:1][C@:5]([C@:4]([H:3])([C:31]([H:32])([H:33])[H:34])[C:41]#[N:42])([N:11]([H:12])[H:13])[H:21].[H-:2]>>[Br-:1].[H:2][C:5]([C@:4]([H:3])([C:31]([H:32])([H:33])[H:34])[C:41]#[N:42])([N:11]([H:12])[H:13])[H:21] +[Cl-:2].[Cl:1][C@:5]([C:4]([H:3])([C:31]([H:32])([H:33])[H:34])[C:41]([H:42])([H:43])[H:44])([C:11]([H:12])([H:13])[H:14])[H:21]>>[Cl-:1].[Cl:2][C@@:5]([C:4]([H:3])([C:31]([H:32])([H:33])[H:34])[C:41]([H:42])([H:43])[H:44])([C:11]([H:12])([H:13])[H:14])[H:21] +[Cl-:2].[F:1][C@:5]([C:4]([H:3])([C:31]([H:32])([H:33])[H:34])[H:41])([C:11]([H:12])([H:13])[H:14])[H:21]>>[Cl:2][C@@:5]([C:4]([H:3])([C:31]([H:32])([H:33])[H:34])[H:41])([C:11]([H:12])([H:13])[H:14])[H:21].[F-:1] +[Br:1][C@:5]([C@:4]([H:3])([C:31]#[N:32])[N+:41]([O-:42])[O:43])([H:11])[C:21]([H:22])([H:23])[H:24].[Cl-:2]>>[Br-:1].[Cl:2][C@@:5]([C@:4]([H:3])([C:31]#[N:32])[N+:41]([O-:42])[O:43])([H:11])[C:21]([H:22])([H:23])[H:24] +[Br-:2].[F:1][C@:5]([C:4]([H:3])([H:31])[C:41]([H:42])([H:43])[H:44])([C:11]([H:12])([H:13])[H:14])[H:21]>>[Br:2][C@@:5]([C:4]([H:3])([H:31])[C:41]([H:42])([H:43])[H:44])([C:11]([H:12])([H:13])[H:14])[H:21].[F-:1] +[F-:2].[F:1][C:5]([C:4]([H:3])([N:31]([H:32])[H:33])[H:41])([N:11]([H:12])[H:13])[N:21]([H:22])[H:23]>>[F-:1].[F:2][C:5]([C:4]([H:3])([N:31]([H:32])[H:33])[H:41])([N:11]([H:12])[H:13])[N:21]([H:22])[H:23] +[Cl-:2].[Cl:1][C@:5]([C:4]([H:3])([H:31])[C:41]#[N:42])([C:11]#[N:12])[C:21]([H:22])([H:23])[H:24]>>[Cl-:1].[Cl:2][C@@:5]([C:4]([H:3])([H:31])[C:41]#[N:42])([C:11]#[N:12])[C:21]([H:22])([H:23])[H:24] +[F:1][C:5]([C:4]([H:3])([H:31])[H:41])([C:11]([H:12])([H:13])[H:14])[C:21]#[N:22].[H-:2]>>[F-:1].[H:2][C:5]([C:4]([H:3])([H:31])[H:41])([C:11]([H:12])([H:13])[H:14])[C:21]#[N:22] +[Br:1][C@:5]([C:4]([H:3])([H:31])[C:41]#[N:42])([N+:11]([O-:12])[O:13])[N:21]([H:22])[H:23].[Cl-:2]>>[Br-:1].[Cl:2][C@@:5]([C:4]([H:3])([H:31])[C:41]#[N:42])([N+:11]([O-:12])[O:13])[N:21]([H:22])[H:23] +[F-:2].[F:1][C@:5]([C:4]([H:3])([H:31])[N:41]([H:42])[H:43])([N+:11]([O-:12])[O:13])[H:21]>>[F-:1].[F:2][C@@:5]([C:4]([H:3])([H:31])[N:41]([H:42])[H:43])([N+:11]([O-:12])[O:13])[H:21] +[Cl-:2].[Cl:1][C@:5]([C:4]([H:3])([N:31]([H:32])[H:33])[H:41])([H:11])[N+:21]([O-:22])[O:23]>>[Cl-:1].[Cl:2][C@@:5]([C:4]([H:3])([N:31]([H:32])[H:33])[H:41])([H:11])[N+:21]([O-:22])[O:23] +[F-:2].[F:1][C:5]([C:4]([H:3])([H:31])[C:41]([H:42])([H:43])[H:44])([H:11])[H:21]>>[F-:1].[F:2][C:5]([C:4]([H:3])([H:31])[C:41]([H:42])([H:43])[H:44])([H:11])[H:21] +[Br:1][C@:5]([C@:4]([H:3])([C:31]#[N:32])[N:41]([H:42])[H:43])([C:11]([H:12])([H:13])[H:14])[H:21].[F-:2]>>[Br-:1].[F:2][C@@:5]([C@:4]([H:3])([C:31]#[N:32])[N:41]([H:42])[H:43])([C:11]([H:12])([H:13])[H:14])[H:21] +[Cl-:2].[Cl:1][C@:5]([C:4]([H:3])([N+:31]([O-:32])[O:33])[H:41])([C:11]#[N:12])[C:21]([H:22])([H:23])[H:24]>>[Cl-:1].[Cl:2][C@@:5]([C:4]([H:3])([N+:31]([O-:32])[O:33])[H:41])([C:11]#[N:12])[C:21]([H:22])([H:23])[H:24] +[Br-:2].[Br:1][C@:5]([C:4]([H:3])([C:31]#[N:32])[H:41])([C:11]#[N:12])[H:21]>>[Br-:1].[Br:2][C@@:5]([C:4]([H:3])([C:31]#[N:32])[H:41])([C:11]#[N:12])[H:21] +[Br-:2].[Cl:1][C:5]([C:4]([H:3])([C:31]([H:32])([H:33])[H:34])[H:41])([C:11]([H:12])([H:13])[H:14])[C:21]([H:22])([H:23])[H:24]>>[Br:2][C:5]([C:4]([H:3])([C:31]([H:32])([H:33])[H:34])[H:41])([C:11]([H:12])([H:13])[H:14])[C:21]([H:22])([H:23])[H:24].[Cl-:1] +[Cl:1][C@:5]([C:4]([H:3])([N+:31]([O-:32])[O:33])[H:41])([C:11]#[N:12])[H:21].[H-:2]>>[Cl-:1].[H:2][C:5]([C:4]([H:3])([N+:31]([O-:32])[O:33])[H:41])([C:11]#[N:12])[H:21] +[Br:1][C@:5]([C:4]([H:3])([C:31]([H:32])([H:33])[H:34])[H:41])([N:11]([H:12])[H:13])[C:21]#[N:22].[F-:2]>>[Br-:1].[F:2][C@@:5]([C:4]([H:3])([C:31]([H:32])([H:33])[H:34])[H:41])([N:11]([H:12])[H:13])[C:21]#[N:22] +[Cl-:2].[F:1][C@:5]([C:4]([H:3])([N+:31]([O-:32])[O:33])[H:41])([H:11])[C:21]([H:22])([H:23])[H:24]>>[Cl:2][C@@:5]([C:4]([H:3])([N+:31]([O-:32])[O:33])[H:41])([H:11])[C:21]([H:22])([H:23])[H:24].[F-:1] +[Br:1][C@:5]([C@:4]([H:3])([N:31]([H:32])[H:33])[C:41]#[N:42])([N:11]([H:12])[H:13])[C:21]([H:22])([H:23])[H:24].[H-:2]>>[Br-:1].[H:2][C@@:5]([C@:4]([H:3])([N:31]([H:32])[H:33])[C:41]#[N:42])([N:11]([H:12])[H:13])[C:21]([H:22])([H:23])[H:24] +[F-:2].[F:1][C@:5]([C@:4]([H:3])([C:31]([H:32])([H:33])[H:34])[N:41]([H:42])[H:43])([N:11]([H:12])[H:13])[H:21]>>[F-:1].[F:2][C@@:5]([C@:4]([H:3])([C:31]([H:32])([H:33])[H:34])[N:41]([H:42])[H:43])([N:11]([H:12])[H:13])[H:21] +[Br:1][C@:5]([C:4]([H:3])([C:31]#[N:32])[H:41])([C:11]#[N:12])[C:21]([H:22])([H:23])[H:24].[F-:2]>>[Br-:1].[F:2][C@@:5]([C:4]([H:3])([C:31]#[N:32])[H:41])([C:11]#[N:12])[C:21]([H:22])([H:23])[H:24] +[Cl-:2].[F:1][C@:5]([C:4]([H:3])([H:31])[N:41]([H:42])[H:43])([C:11]([H:12])([H:13])[H:14])[C:21]#[N:22]>>[Cl:2][C@@:5]([C:4]([H:3])([H:31])[N:41]([H:42])[H:43])([C:11]([H:12])([H:13])[H:14])[C:21]#[N:22].[F-:1] +[F:1][C@:5]([C:4]([H:3])([H:31])[C:41]([H:42])([H:43])[H:44])([N:11]([H:12])[H:13])[N+:21]([O-:22])[O:23].[H-:2]>>[F-:1].[H:2][C@@:5]([C:4]([H:3])([H:31])[C:41]([H:42])([H:43])[H:44])([N:11]([H:12])[H:13])[N+:21]([O-:22])[O:23] +[Br-:2].[Cl:1][C@:5]([C:4]([H:3])([C:31]([H:32])([H:33])[H:34])[H:41])([C:11]#[N:12])[H:21]>>[Br:2][C@@:5]([C:4]([H:3])([C:31]([H:32])([H:33])[H:34])[H:41])([C:11]#[N:12])[H:21].[Cl-:1] +[Cl:1][C@:5]([C@:4]([H:3])([N:31]([H:32])[H:33])[C:41]([H:42])([H:43])[H:44])([N:11]([H:12])[H:13])[N+:21]([O-:22])[O:23].[F-:2]>>[Cl-:1].[F:2][C@@:5]([C@:4]([H:3])([N:31]([H:32])[H:33])[C:41]([H:42])([H:43])[H:44])([N:11]([H:12])[H:13])[N+:21]([O-:22])[O:23] +[F-:2].[F:1][C@:5]([C:4]([H:3])([N+:31]([O-:32])[O:33])[H:41])([N+:11]([O-:12])[O:13])[H:21]>>[F-:1].[F:2][C@@:5]([C:4]([H:3])([N+:31]([O-:32])[O:33])[H:41])([N+:11]([O-:12])[O:13])[H:21] +[Cl-:2].[F:1][C:5]([C:4]([H:3])([H:31])[H:41])([C:11]#[N:12])[C:21]([H:22])([H:23])[H:24]>>[Cl:2][C:5]([C:4]([H:3])([H:31])[H:41])([C:11]#[N:12])[C:21]([H:22])([H:23])[H:24].[F-:1] +[Cl-:2].[Cl:1][C@:5]([C@:4]([H:3])([N:31]([H:32])[H:33])[C:41]([H:42])([H:43])[H:44])([H:11])[C:21]#[N:22]>>[Cl-:1].[Cl:2][C@@:5]([C@:4]([H:3])([N:31]([H:32])[H:33])[C:41]([H:42])([H:43])[H:44])([H:11])[C:21]#[N:22] +[Br-:2].[Br:1][C@:5]([C:4]([H:3])([N+:31]([O-:32])[O:33])[H:41])([C:11]#[N:12])[H:21]>>[Br-:1].[Br:2][C@@:5]([C:4]([H:3])([N+:31]([O-:32])[O:33])[H:41])([C:11]#[N:12])[H:21] +[Br-:2].[Br:1][C:5]([C@:4]([H:3])([N+:31]([O-:32])[O:33])[C:41]#[N:42])([H:11])[H:21]>>[Br-:1].[Br:2][C:5]([C@:4]([H:3])([N+:31]([O-:32])[O:33])[C:41]#[N:42])([H:11])[H:21] +[CH4:1]>>[CH3:1][OH:2] +[CH3:1][Br:2]>>[CH4:1] +[CH3:2][Cl:1].[OH2:6]>>[CH3:2][OH:6].[Cl:1][H:7] +[H-:1].[C:2]([H:3])([H:4])([H:5])[Br:6]>>[H:1][C:2]([H:3])([H:4])[H:5].[Br-:6] +[C:1]([H:2])([H:3])([H:4])[OH:5].[F-:6]>>[C:1]([H:3])([H:4])=[O:5].[F:6][H:2] +[CH3:1][CH3:2]>>[CH3:1][CH3:2].[Na+:88].[Cl-:89] +[Na+:1].[CH3:2][OH:3]>>[Na:1][OH:3].[CH3:2][H:4] +[C:1](=[O:2])[OH:3].[H:4][OH:5]>>[C:1]([H:4])([OH:3])[OH:5] +[C:1]([H:2])([H:3])=[C:4]([H:5])[H:6]>>[C:1]([H:2])([H:3])([H:5])[C:4]([H:6])[OH:9] +[CH3:1][C:2](=[O:3])[OH:4].[H:5][OH:6]>>[CH3:1][C:2](=[O:3])[OH:6].[H:5][OH:4] diff --git a/tests/data/sample_rxns_100_ORGANIC_PROD_DIFF_BALANCE_ref.xz b/tests/data/sample_rxns_100_ORGANIC_PROD_DIFF_BALANCE_ref.xz new file mode 100644 index 0000000..af9e081 Binary files /dev/null and b/tests/data/sample_rxns_100_ORGANIC_PROD_DIFF_BALANCE_ref.xz differ diff --git a/tests/data/sample_rxns_100_ORGANIC_PROD_DIFF_ref.xz b/tests/data/sample_rxns_100_ORGANIC_PROD_DIFF_ref.xz new file mode 100644 index 0000000..89d0b7d Binary files /dev/null and b/tests/data/sample_rxns_100_ORGANIC_PROD_DIFF_ref.xz differ diff --git a/tests/data/sample_rxns_100_ORGANIC_REAC_DIFF_BALANCE_ref.xz b/tests/data/sample_rxns_100_ORGANIC_REAC_DIFF_BALANCE_ref.xz new file mode 100644 index 0000000..11b62d5 Binary files /dev/null and b/tests/data/sample_rxns_100_ORGANIC_REAC_DIFF_BALANCE_ref.xz differ diff --git a/tests/data/sample_rxns_100_ORGANIC_REAC_DIFF_ref.xz b/tests/data/sample_rxns_100_ORGANIC_REAC_DIFF_ref.xz new file mode 100644 index 0000000..afbd37e Binary files /dev/null and b/tests/data/sample_rxns_100_ORGANIC_REAC_DIFF_ref.xz differ diff --git a/tests/data/sample_rxns_100_ORGANIC_REAC_PROD_BALANCE_ref.xz b/tests/data/sample_rxns_100_ORGANIC_REAC_PROD_BALANCE_ref.xz new file mode 100644 index 0000000..40bdee4 Binary files /dev/null and b/tests/data/sample_rxns_100_ORGANIC_REAC_PROD_BALANCE_ref.xz differ diff --git a/tests/data/sample_rxns_100_ORGANIC_REAC_PROD_ref.xz b/tests/data/sample_rxns_100_ORGANIC_REAC_PROD_ref.xz new file mode 100644 index 0000000..2cd2cc4 Binary files /dev/null and b/tests/data/sample_rxns_100_ORGANIC_REAC_PROD_ref.xz differ diff --git a/tests/data/sample_rxns_100_RIGR_PROD_DIFF_BALANCE_ref.xz b/tests/data/sample_rxns_100_RIGR_PROD_DIFF_BALANCE_ref.xz new file mode 100644 index 0000000..eda64cc Binary files /dev/null and b/tests/data/sample_rxns_100_RIGR_PROD_DIFF_BALANCE_ref.xz differ diff --git a/tests/data/sample_rxns_100_RIGR_PROD_DIFF_ref.xz b/tests/data/sample_rxns_100_RIGR_PROD_DIFF_ref.xz new file mode 100644 index 0000000..80e91cf Binary files /dev/null and b/tests/data/sample_rxns_100_RIGR_PROD_DIFF_ref.xz differ diff --git a/tests/data/sample_rxns_100_RIGR_REAC_DIFF_BALANCE_ref.xz b/tests/data/sample_rxns_100_RIGR_REAC_DIFF_BALANCE_ref.xz new file mode 100644 index 0000000..e0d892a Binary files /dev/null and b/tests/data/sample_rxns_100_RIGR_REAC_DIFF_BALANCE_ref.xz differ diff --git a/tests/data/sample_rxns_100_RIGR_REAC_DIFF_ref.xz b/tests/data/sample_rxns_100_RIGR_REAC_DIFF_ref.xz new file mode 100644 index 0000000..3169b47 Binary files /dev/null and b/tests/data/sample_rxns_100_RIGR_REAC_DIFF_ref.xz differ diff --git a/tests/data/sample_rxns_100_RIGR_REAC_PROD_BALANCE_ref.xz b/tests/data/sample_rxns_100_RIGR_REAC_PROD_BALANCE_ref.xz new file mode 100644 index 0000000..820e27e Binary files /dev/null and b/tests/data/sample_rxns_100_RIGR_REAC_PROD_BALANCE_ref.xz differ diff --git a/tests/data/sample_rxns_100_RIGR_REAC_PROD_ref.xz b/tests/data/sample_rxns_100_RIGR_REAC_PROD_ref.xz new file mode 100644 index 0000000..a409e8e Binary files /dev/null and b/tests/data/sample_rxns_100_RIGR_REAC_PROD_ref.xz differ diff --git a/tests/data/sample_rxns_100_V1_PROD_DIFF_BALANCE_ref.xz b/tests/data/sample_rxns_100_V1_PROD_DIFF_BALANCE_ref.xz new file mode 100644 index 0000000..aee3d73 Binary files /dev/null and b/tests/data/sample_rxns_100_V1_PROD_DIFF_BALANCE_ref.xz differ diff --git a/tests/data/sample_rxns_100_V1_PROD_DIFF_ref.xz b/tests/data/sample_rxns_100_V1_PROD_DIFF_ref.xz new file mode 100644 index 0000000..92f9b39 Binary files /dev/null and b/tests/data/sample_rxns_100_V1_PROD_DIFF_ref.xz differ diff --git a/tests/data/sample_rxns_100_V1_REAC_DIFF_BALANCE_ref.xz b/tests/data/sample_rxns_100_V1_REAC_DIFF_BALANCE_ref.xz new file mode 100644 index 0000000..e560112 Binary files /dev/null and b/tests/data/sample_rxns_100_V1_REAC_DIFF_BALANCE_ref.xz differ diff --git a/tests/data/sample_rxns_100_V1_REAC_DIFF_ref.xz b/tests/data/sample_rxns_100_V1_REAC_DIFF_ref.xz new file mode 100644 index 0000000..ef86ded Binary files /dev/null and b/tests/data/sample_rxns_100_V1_REAC_DIFF_ref.xz differ diff --git a/tests/data/sample_rxns_100_V1_REAC_PROD_BALANCE_ref.xz b/tests/data/sample_rxns_100_V1_REAC_PROD_BALANCE_ref.xz new file mode 100644 index 0000000..5e7df8a Binary files /dev/null and b/tests/data/sample_rxns_100_V1_REAC_PROD_BALANCE_ref.xz differ diff --git a/tests/data/sample_rxns_100_V1_REAC_PROD_ref.xz b/tests/data/sample_rxns_100_V1_REAC_PROD_ref.xz new file mode 100644 index 0000000..e7665c2 Binary files /dev/null and b/tests/data/sample_rxns_100_V1_REAC_PROD_ref.xz differ diff --git a/tests/data/sample_rxns_100_V2_PROD_DIFF_BALANCE_ref.xz b/tests/data/sample_rxns_100_V2_PROD_DIFF_BALANCE_ref.xz new file mode 100644 index 0000000..7c2fa6c Binary files /dev/null and b/tests/data/sample_rxns_100_V2_PROD_DIFF_BALANCE_ref.xz differ diff --git a/tests/data/sample_rxns_100_V2_PROD_DIFF_ref.xz b/tests/data/sample_rxns_100_V2_PROD_DIFF_ref.xz new file mode 100644 index 0000000..d9e661d Binary files /dev/null and b/tests/data/sample_rxns_100_V2_PROD_DIFF_ref.xz differ diff --git a/tests/data/sample_rxns_100_V2_REAC_DIFF_BALANCE_ref.xz b/tests/data/sample_rxns_100_V2_REAC_DIFF_BALANCE_ref.xz new file mode 100644 index 0000000..317daf9 Binary files /dev/null and b/tests/data/sample_rxns_100_V2_REAC_DIFF_BALANCE_ref.xz differ diff --git a/tests/data/sample_rxns_100_V2_REAC_DIFF_ref.xz b/tests/data/sample_rxns_100_V2_REAC_DIFF_ref.xz new file mode 100644 index 0000000..1fbc3b9 Binary files /dev/null and b/tests/data/sample_rxns_100_V2_REAC_DIFF_ref.xz differ diff --git a/tests/data/sample_rxns_100_V2_REAC_PROD_BALANCE_ref.xz b/tests/data/sample_rxns_100_V2_REAC_PROD_BALANCE_ref.xz new file mode 100644 index 0000000..666cc8c Binary files /dev/null and b/tests/data/sample_rxns_100_V2_REAC_PROD_BALANCE_ref.xz differ diff --git a/tests/data/sample_rxns_100_V2_REAC_PROD_ref.xz b/tests/data/sample_rxns_100_V2_REAC_PROD_ref.xz new file mode 100644 index 0000000..ba7d0c1 Binary files /dev/null and b/tests/data/sample_rxns_100_V2_REAC_PROD_ref.xz differ diff --git a/tests/python/test_reaction_features.py b/tests/python/test_reaction_features.py new file mode 100644 index 0000000..0a55173 --- /dev/null +++ b/tests/python/test_reaction_features.py @@ -0,0 +1,97 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # noqa: E501 +# SPDX-License-Identifier: Apache-2.0 + +import lzma +import os +import pickle + +import numpy as np +import pytest + +import cuik_molmaker + + +REACTION_MODES = [ + "REAC_DIFF", + "REAC_PROD", + "PROD_DIFF", + "REAC_DIFF_BALANCE", + "REAC_PROD_BALANCE", + "PROD_DIFF_BALANCE", +] + +# V1/V2/ORGANIC share the same bond features (bond_fdim=14 each, 28 total in CGR). +# RIGR uses reduced atom AND bond features (bond_fdim=2 each, 4 total in CGR). +FEATURIZER_CONFIGS = { + "V1": { + "atom_onehot": ["atomic-number", "total-degree", "formal-charge", + "chirality", "num-hydrogens", "hybridization"], + "atom_float": ["aromatic", "mass"], + "bond": ["is-null", "bond-type-onehot", "conjugated", "in-ring", "stereo"], + }, + "V2": { + "atom_onehot": ["atomic-number-common", "total-degree", "formal-charge", + "chirality", "num-hydrogens", "hybridization-expanded"], + "atom_float": ["aromatic", "mass"], + "bond": ["is-null", "bond-type-onehot", "conjugated", "in-ring", "stereo"], + }, + "ORGANIC": { + "atom_onehot": ["atomic-number-organic", "total-degree", "formal-charge", + "chirality", "num-hydrogens", "hybridization-organic"], + "atom_float": ["aromatic", "mass"], + "bond": ["is-null", "bond-type-onehot", "conjugated", "in-ring", "stereo"], + }, + "RIGR": { + "atom_onehot": ["atomic-number-common", "total-degree", "num-hydrogens"], + "atom_float": ["mass"], + "bond": ["is-null", "in-ring"], # RIGR reduces bond features too + }, +} + + +@pytest.mark.parametrize("atom_featurizer_version", list(FEATURIZER_CONFIGS.keys())) +@pytest.mark.parametrize("reaction_mode", REACTION_MODES) +def test_batch_reaction_featurizer(test_data_path, atom_featurizer_version, reaction_mode): + cfg = FEATURIZER_CONFIGS[atom_featurizer_version] + atom_onehot = cuik_molmaker.atom_onehot_feature_names_to_array(cfg["atom_onehot"]) + atom_float = cuik_molmaker.atom_float_feature_names_to_array(cfg["atom_float"]) + bond_feats = cuik_molmaker.bond_feature_names_to_array(cfg["bond"]) + mode_int = cuik_molmaker.reaction_mode_names_to_array([reaction_mode])[0] + + ref_file = f"sample_rxns_100_{atom_featurizer_version}_{reaction_mode}_ref.xz" + ref_path = os.path.join(test_data_path, ref_file) + with lzma.open(ref_path, "rb") as f: + ref = pickle.load(f) + + V, E, edge_index, rev_edge_index, batch = cuik_molmaker.batch_reaction_featurizer( + ref["reac_smiles"], + ref["prod_smiles"], + atom_onehot, + atom_float, + bond_feats, + True, # keep_h — required for atom-mapped reactions with explicit H + False, # add_h + False, # offset_carbon + mode_int, + ) + + np.testing.assert_allclose( + ref["V"], V, + err_msg=f"[{atom_featurizer_version}/{reaction_mode}] atom feats mismatch", + ) + np.testing.assert_allclose( + ref["E"], E, + err_msg=f"[{atom_featurizer_version}/{reaction_mode}] bond feats mismatch", + ) + np.testing.assert_allclose( + ref["edge_index"], edge_index, + err_msg=f"[{atom_featurizer_version}/{reaction_mode}] edge_index mismatch", + ) + np.testing.assert_allclose( + ref["rev_edge_index"], rev_edge_index, + err_msg=f"[{atom_featurizer_version}/{reaction_mode}] rev_edge_index mismatch", + ) + np.testing.assert_allclose( + ref["batch"], batch, + err_msg=f"[{atom_featurizer_version}/{reaction_mode}] batch mismatch", + )