Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]
name = "rdkit"
version = "0.4.12"
edition = "2021"
edition = "2024"
authors = ["Xavier Lange <xrlange@gmail.com>", "Javier Pineda <javierjpineda13@gmail.com"]
license = "MIT"
description = "High level RDKit functionality for rust"
Expand All @@ -17,7 +17,7 @@ cxx = "1"
flate2 = "1"
log = "0.4"
rdkit-sys = { path = "rdkit-sys", version = "0.4.9" }
thiserror = "1"
thiserror = "2"

[dev-dependencies]
env_logger = "0.9.0"
env_logger = "0.11"
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ RDKit
---

A high-level library for performing common RDKit tasks such as SMILES parsing, molecule normalization, etc. Uses
the C++ API via bindings from [rdkit-sys](https://crates.io/crate/rdkit-sys).
the C++ API via bindings from [rdkit-sys](https://crates.io/crates/rdkit-sys).

Notice: Requires rdkit 2023.09.1 or higher (like Ubuntu Noble 24.04)

Expand Down
102 changes: 102 additions & 0 deletions benches/atom_iteration_benchmark.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
#![allow(soft_unstable)]
#![feature(test)]
extern crate test;

use rdkit::ROMol;

/// Drug-like molecules of varying size for realistic benchmarking.
/// These cover common pharmaceutical scaffolds and natural products.
const SMILES_SET: &[&str] = &[
// aspirin
"CC(=O)Oc1ccccc1C(=O)O",
// ibuprofen
"CC(C)Cc1ccc(cc1)C(C)C(=O)O",
// caffeine
"Cn1c(=O)c2c(ncn2C)n(C)c1=O",
// diazepam
"O=C1CN=C(c2ccccc2)c2cc(Cl)ccc2N1C",
// atorvastatin (lipitor)
"CC(C)c1c(C(=O)Nc2ccccc2)c(-c2ccccc2)c(-c2ccc(F)cc2)n1CC[C@@H](O)C[C@@H](O)CC(=O)O",
// taxol core
"CC1=C2C(OC(=O)c3ccccc3)C(O)C4(OC(=O)C(O)(CC(OC(=O)c5ccccc5)C1O)C24C)C(=O)c1ccc(OC)cc1",
// vancomycin fragment
"OC1C(O)C(OC2C(O)C(O)C(O)C(CO)O2)OC(CO)C1NC(=O)C1CC(O)CN1C(=O)C(NC(=O)C1CC(=O)NC(=O)C1O)C(O)c1ccc(O)cc1",
];

/// Baseline: SMILES parsing cost.
#[bench]
fn bench_parse_smiles(b: &mut test::bench::Bencher) {
b.iter(|| {
for smiles in SMILES_SET {
test::black_box(ROMol::from_smiles(smiles).unwrap());
}
});
}

/// Iterate all atoms via atom_ref (&self), read 7 properties per atom.
/// This is the realistic featurization workload.
#[bench]
fn bench_atom_ref_all_properties(b: &mut test::bench::Bencher) {
let mols: Vec<ROMol> = SMILES_SET
.iter()
.map(|s| ROMol::from_smiles(s).unwrap())
.collect();

b.iter(|| {
for mol in &mols {
let n = mol.num_atoms(true);
for i in 0..n {
let atom = mol.atom_ref(i);
test::black_box(atom.symbol());
test::black_box(atom.get_atomic_num());
test::black_box(atom.get_formal_charge());
test::black_box(atom.get_is_aromatic());
test::black_box(atom.get_hybridization_type());
test::black_box(atom.get_degree());
test::black_box(atom.get_total_num_hs());
}
}
});
}

/// Same workload via atom_with_idx (&mut self).
/// Regression guard: should be the same speed as atom_ref.
#[bench]
fn bench_atom_mut_all_properties(b: &mut test::bench::Bencher) {
let mut mols: Vec<ROMol> = SMILES_SET
.iter()
.map(|s| ROMol::from_smiles(s).unwrap())
.collect();

b.iter(|| {
for mol in &mut mols {
let n = mol.num_atoms(true);
for i in 0..n {
let atom = mol.atom_with_idx(i);
test::black_box(atom.symbol());
test::black_box(atom.get_atomic_num());
test::black_box(atom.get_formal_charge());
test::black_box(atom.get_is_aromatic());
test::black_box(atom.get_hybridization_type());
test::black_box(atom.get_degree());
test::black_box(atom.get_total_num_hs());
}
}
});
}

/// Clone cost alone. Useful for understanding the cost of cloning
/// molecules when only &ROMol is available but mutation is needed.
#[bench]
fn bench_clone_molecules(b: &mut test::bench::Bencher) {
let mols: Vec<ROMol> = SMILES_SET
.iter()
.map(|s| ROMol::from_smiles(s).unwrap())
.collect();

b.iter(|| {
for mol in &mols {
test::black_box(mol.clone());
}
});
}
6 changes: 3 additions & 3 deletions rdkit-sys/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
name = "rdkit-sys"
authors = ["Xavier Lange (xrlange@gmail.com)", "chrissly31415"]
version = "0.4.12"
edition = "2021"
edition = "2024"
license = "MIT"
description = "RDKit CFFI library builder and bindings"
repository = "https://github.com/rdkit-rs/rdkit/tree/main/rdkit-sys"
Expand All @@ -14,9 +14,9 @@ exclude = ["rdkit-*", "*.tar.gz", "examples/"]
cxx = "1.0.109"

[build-dependencies]
env_logger = "0.10.0"
env_logger = "0.11"
cxx-build = "1.0.109"
which = "4.4.2"
which = "8"

[features]
default = []
Expand Down
12 changes: 8 additions & 4 deletions rdkit-sys/build.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
const CPP_VERSION_FLAG: &str = "-std=c++17";
const CPP_VERSION_FLAG: &str = "-std=c++20";

fn main() {
if std::env::var("DOCS_RS").is_ok() {
Expand Down Expand Up @@ -122,17 +122,21 @@ fn main() {
// "ChemReactions",
// "ChemTransforms",
"DataStructs",
// "Depictor",
"Depictor",
"Descriptors",
"DistGeomHelpers",
"EigenSolvers",
"FileParsers",
"Fingerprints",
"ForceField",
"ForceFieldHelpers",
// "GenericGroups",
"GraphMol",
"MolStandardize",
// "MolTransforms",
// "PartialCharges",
"PartialCharges",
"RDGeneral",
// "RDGeometryLib",
"RDGeometryLib",
// "RingDecomposerLib",
"ScaffoldNetwork",
"SmilesParse",
Expand Down
4 changes: 2 additions & 2 deletions rdkit-sys/rustfmt.toml
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,8 @@ trailing_comma = "Vertical"
match_block_trailing_comma = false
blank_lines_upper_bound = 1
blank_lines_lower_bound = 0
edition = "2021"
version = "One"
edition = "2024"
style_edition = "2024"
inline_attribute_width = 0
format_generated_files = true
merge_derives = true
Expand Down
23 changes: 23 additions & 0 deletions rdkit-sys/src/bridge/conformer.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#[cxx::bridge(namespace = "RDKit")]
pub mod ffi {
unsafe extern "C++" {
include!("wrapper/include/conformer.h");

pub type ROMol = crate::ro_mol_ffi::ROMol;

pub fn embed_molecule(mol: &mut SharedPtr<ROMol>) -> i32;
pub fn embed_multiple_confs(
mol: &mut SharedPtr<ROMol>,
num_confs: u32,
) -> UniquePtr<CxxVector<i32>>;
pub fn compute_2d_coords(mol: &mut SharedPtr<ROMol>) -> u32;

pub fn mol_get_num_conformers(mol: &SharedPtr<ROMol>) -> u32;
pub fn conformer_is_3d(mol: &SharedPtr<ROMol>, conf_id: i32) -> bool;
pub fn get_atom_pos(
mol: &SharedPtr<ROMol>,
conf_id: i32,
atom_idx: u32,
) -> UniquePtr<CxxVector<f64>>;
}
}
5 changes: 4 additions & 1 deletion rdkit-sys/src/bridge/mod.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
mod conformer;
pub use conformer::ffi as conformer_ffi;

mod descriptors;
pub use descriptors::ffi as descriptors_ffi;

Expand All @@ -11,7 +14,7 @@ mod mol_standardize;
pub use mol_standardize::ffi as mol_standardize_ffi;

mod periodic_table;
pub use periodic_table::{ffi as periodic_table_ffi, PeriodicTableOps};
pub use periodic_table::{PeriodicTableOps, ffi as periodic_table_ffi};

mod ro_mol;
pub use ro_mol::ffi as ro_mol_ffi;
Expand Down
9 changes: 9 additions & 0 deletions rdkit-sys/src/bridge/mol_ops.rs
Original file line number Diff line number Diff line change
Expand Up @@ -83,5 +83,14 @@ pub mod ffi {
pub fn romol_set_hybridization(mol: &mut SharedPtr<ROMol>);

pub fn clean_up(rw_mol: &mut SharedPtr<RWMol>);

pub fn set_aromaticity(mol: &mut SharedPtr<RWMol>);
pub fn assign_stereochemistry(mol: &mut SharedPtr<ROMol>);
pub fn mol_get_formal_charge(mol: &SharedPtr<ROMol>) -> i32;

pub type ROMolVec;
pub fn get_mol_frags(mol: &SharedPtr<ROMol>) -> SharedPtr<ROMolVec>;
pub fn romol_vec_size(vec: &SharedPtr<ROMolVec>) -> u32;
pub fn romol_vec_get(vec: &SharedPtr<ROMolVec>, idx: u32) -> SharedPtr<ROMol>;
}
}
2 changes: 1 addition & 1 deletion rdkit-sys/src/bridge/periodic_table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ pub trait PeriodicTableOps {
fn getElementName(self, atomic_number: u32) -> String;
fn getValenceList(self, atomic_number: u32) -> &'static CxxVector<i32>;
}
impl<'a> PeriodicTableOps for UniquePtr<PeriodicTable> {
impl PeriodicTableOps for UniquePtr<PeriodicTable> {
fn getElementSymbol(self, atomic_number: u32) -> String {
ffi::getElementSymbol(atomic_number)
}
Expand Down
1 change: 1 addition & 0 deletions rdkit-sys/src/bridge/ro_mol.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ pub mod ffi {

pub fn get_num_atoms(mol: &SharedPtr<ROMol>, onlyExplicit: bool) -> u32;
pub fn get_atom_with_idx(mol: &mut SharedPtr<ROMol>, idx: u32) -> Pin<&mut Atom>;
pub fn get_atom_with_idx_const(mol: &SharedPtr<ROMol>, idx: u32) -> Pin<&Atom>;
pub fn get_symbol(atom: Pin<&Atom>) -> String;
pub fn get_is_aromatic(atom: Pin<&Atom>) -> bool;
pub fn get_atomic_num(atom: Pin<&Atom>) -> i32;
Expand Down
4 changes: 3 additions & 1 deletion rdkit-sys/tests/test_atoms.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@ fn test_atoms() {

assert_eq!(
&atoms,
&["C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C"]
&[
"C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C"
]
);

let mut atom = rdkit_sys::ro_mol_ffi::get_atom_with_idx(&mut romol, 2);
Expand Down
5 changes: 4 additions & 1 deletion rdkit-sys/tests/test_ro_mol.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,10 @@ fn mol_to_molblock_test() {
cxx::let_cxx_string!(smiles = "CC");
let romol = rdkit_sys::ro_mol_ffi::smiles_to_mol(&smiles).unwrap();
let molblock = rdkit_sys::ro_mol_ffi::mol_to_molblock(&romol);
assert_eq!(molblock, "\n RDKit 2D\n\n 2 1 0 0 0 0 0 0 0 0999 V2000\n 0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 1.2990 0.7500 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 1 2 1 0\nM END\n");
assert_eq!(
molblock,
"\n RDKit 2D\n\n 2 1 0 0 0 0 0 0 0 0999 V2000\n 0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 1.2990 0.7500 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 1 2 1 0\nM END\n"
);
}

#[test]
Expand Down
7 changes: 5 additions & 2 deletions rdkit-sys/tests/test_rw_mol.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use cxx::{let_cxx_string, SharedPtr};
use cxx::{SharedPtr, let_cxx_string};
use rdkit_sys::{ro_mol_ffi::ROMol, rw_mol_ffi::RWMol};

#[test]
Expand Down Expand Up @@ -191,7 +191,10 @@ CC(=O)OC(CC(=O)[O-])C[N+](C)(C)C
let ro_mol = unsafe { std::mem::transmute::<SharedPtr<RWMol>, SharedPtr<ROMol>>(rw_mol) };

let smiles = rdkit_sys::ro_mol_ffi::mol_to_smiles(&ro_mol);
assert_eq!("[H]C([H])([H])C(=O)OC([H])(C([H])([H])C(=O)[O-])C([H])([H])[N+](C([H])([H])[H])(C([H])([H])[H])C([H])([H])[H]", &smiles);
assert_eq!(
"[H]C([H])([H])C(=O)OC([H])(C([H])([H])C(=O)[O-])C([H])([H])[N+](C([H])([H])[H])(C([H])([H])[H])C([H])([H])[H]",
&smiles
);
}

#[test]
Expand Down
17 changes: 17 additions & 0 deletions rdkit-sys/wrapper/include/conformer.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#pragma once

#include "rust/cxx.h"
#include <GraphMol/GraphMol.h>

namespace RDKit {

int embed_molecule(std::shared_ptr<ROMol> &mol);
std::unique_ptr<std::vector<int32_t>> embed_multiple_confs(std::shared_ptr<ROMol> &mol, unsigned int num_confs);
unsigned int compute_2d_coords(std::shared_ptr<ROMol> &mol);

unsigned int mol_get_num_conformers(const std::shared_ptr<ROMol> &mol);
bool conformer_is_3d(const std::shared_ptr<ROMol> &mol, int conf_id);
std::unique_ptr<std::vector<double>> get_atom_pos(const std::shared_ptr<ROMol> &mol, int conf_id,
unsigned int atom_idx);

} // namespace RDKit
9 changes: 9 additions & 0 deletions rdkit-sys/wrapper/include/mol_ops.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,4 +67,13 @@ void romol_set_hybridization(std::shared_ptr<ROMol> &mol);

// pub fn clean_up(rw_mol: &mut SharedPtr<RWMol>)
void clean_up(std::shared_ptr<RWMol> &rw_mol);

void set_aromaticity(std::shared_ptr<RWMol> &mol);
void assign_stereochemistry(std::shared_ptr<ROMol> &mol);
int mol_get_formal_charge(const std::shared_ptr<ROMol> &mol);

struct ROMolVec;
std::shared_ptr<ROMolVec> get_mol_frags(const std::shared_ptr<ROMol> &mol);
unsigned int romol_vec_size(const std::shared_ptr<ROMolVec> &vec);
std::shared_ptr<ROMol> romol_vec_get(const std::shared_ptr<ROMolVec> &vec, unsigned int idx);
} // namespace RDKit
2 changes: 2 additions & 0 deletions rdkit-sys/wrapper/include/ro_mol.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ unsigned int atom_sanitize_exception_get_atom_idx(const MolSanitizeExceptionUniq

unsigned int get_num_atoms(const std::shared_ptr<ROMol> &mol, bool only_explicit);
Atom &get_atom_with_idx(std::shared_ptr<ROMol> &mol, unsigned int idx);
const Atom &get_atom_with_idx_const(const std::shared_ptr<ROMol> &mol,
unsigned int idx);
rust::String get_symbol(const Atom &atom);
bool get_is_aromatic(const Atom &atom);
int get_atomic_num(const Atom &atom);
Expand Down
27 changes: 27 additions & 0 deletions rdkit-sys/wrapper/src/conformer.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#include "rust/cxx.h"
#include <GraphMol/Depictor/RDDepictor.h>
#include <GraphMol/DistGeomHelpers/Embedder.h>
#include <GraphMol/GraphMol.h>

namespace RDKit {

int embed_molecule(std::shared_ptr<ROMol> &mol) { return DGeomHelpers::EmbedMolecule(*mol); }

std::unique_ptr<std::vector<int32_t>> embed_multiple_confs(std::shared_ptr<ROMol> &mol, unsigned int num_confs) {
auto ids = DGeomHelpers::EmbedMultipleConfs(*mol, num_confs);
return std::make_unique<std::vector<int32_t>>(ids.begin(), ids.end());
}

unsigned int compute_2d_coords(std::shared_ptr<ROMol> &mol) { return RDDepict::compute2DCoords(*mol); }

unsigned int mol_get_num_conformers(const std::shared_ptr<ROMol> &mol) { return mol->getNumConformers(); }

bool conformer_is_3d(const std::shared_ptr<ROMol> &mol, int conf_id) { return mol->getConformer(conf_id).is3D(); }

std::unique_ptr<std::vector<double>> get_atom_pos(const std::shared_ptr<ROMol> &mol, int conf_id,
unsigned int atom_idx) {
const auto &pos = mol->getConformer(conf_id).getAtomPos(atom_idx);
return std::make_unique<std::vector<double>>(std::vector<double>{pos.x, pos.y, pos.z});
}

} // namespace RDKit
Loading