Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]
name = "rdkit"
version = "0.4.12"
edition = "2021"
edition = "2024"
authors = ["Xavier Lange <xrlange@gmail.com>", "Javier Pineda <javierjpineda13@gmail.com"]
license = "MIT"
description = "High level RDKit functionality for rust"
Expand All @@ -17,7 +17,7 @@ cxx = "1"
flate2 = "1"
log = "0.4"
rdkit-sys = { path = "rdkit-sys", version = "0.4.9" }
thiserror = "1"
thiserror = "2"

[dev-dependencies]
env_logger = "0.9.0"
env_logger = "0.11"
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ RDKit
---

A high-level library for performing common RDKit tasks such as SMILES parsing, molecule normalization, etc. Uses
the C++ API via bindings from [rdkit-sys](https://crates.io/crate/rdkit-sys).
the C++ API via bindings from [rdkit-sys](https://crates.io/crates/rdkit-sys).

Notice: Requires rdkit 2023.09.1 or higher (like Ubuntu Noble 24.04)

Expand Down
102 changes: 102 additions & 0 deletions benches/atom_iteration_benchmark.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
#![allow(soft_unstable)]
#![feature(test)]
extern crate test;

use rdkit::ROMol;

/// Drug-like molecules of varying size for realistic benchmarking.
/// These cover common pharmaceutical scaffolds and natural products.
const SMILES_SET: &[&str] = &[
// aspirin
"CC(=O)Oc1ccccc1C(=O)O",
// ibuprofen
"CC(C)Cc1ccc(cc1)C(C)C(=O)O",
// caffeine
"Cn1c(=O)c2c(ncn2C)n(C)c1=O",
// diazepam
"O=C1CN=C(c2ccccc2)c2cc(Cl)ccc2N1C",
// atorvastatin (lipitor)
"CC(C)c1c(C(=O)Nc2ccccc2)c(-c2ccccc2)c(-c2ccc(F)cc2)n1CC[C@@H](O)C[C@@H](O)CC(=O)O",
// taxol core
"CC1=C2C(OC(=O)c3ccccc3)C(O)C4(OC(=O)C(O)(CC(OC(=O)c5ccccc5)C1O)C24C)C(=O)c1ccc(OC)cc1",
// vancomycin fragment
"OC1C(O)C(OC2C(O)C(O)C(O)C(CO)O2)OC(CO)C1NC(=O)C1CC(O)CN1C(=O)C(NC(=O)C1CC(=O)NC(=O)C1O)C(O)c1ccc(O)cc1",
];

/// Baseline: SMILES parsing cost.
#[bench]
fn bench_parse_smiles(b: &mut test::bench::Bencher) {
b.iter(|| {
for smiles in SMILES_SET {
test::black_box(ROMol::from_smiles(smiles).unwrap());
}
});
}

/// Iterate all atoms via atom_ref (&self), read 7 properties per atom.
/// This is the realistic featurization workload.
#[bench]
fn bench_atom_ref_all_properties(b: &mut test::bench::Bencher) {
let mols: Vec<ROMol> = SMILES_SET
.iter()
.map(|s| ROMol::from_smiles(s).unwrap())
.collect();

b.iter(|| {
for mol in &mols {
let n = mol.num_atoms(true);
for i in 0..n {
let atom = mol.atom_ref(i);
test::black_box(atom.symbol());
test::black_box(atom.get_atomic_num());
test::black_box(atom.get_formal_charge());
test::black_box(atom.get_is_aromatic());
test::black_box(atom.get_hybridization_type());
test::black_box(atom.get_degree());
test::black_box(atom.get_total_num_hs());
}
}
});
}

/// Same workload via atom_with_idx (&mut self).
/// Regression guard: should be the same speed as atom_ref.
#[bench]
fn bench_atom_mut_all_properties(b: &mut test::bench::Bencher) {
let mut mols: Vec<ROMol> = SMILES_SET
.iter()
.map(|s| ROMol::from_smiles(s).unwrap())
.collect();

b.iter(|| {
for mol in &mut mols {
let n = mol.num_atoms(true);
for i in 0..n {
let atom = mol.atom_with_idx(i);
test::black_box(atom.symbol());
test::black_box(atom.get_atomic_num());
test::black_box(atom.get_formal_charge());
test::black_box(atom.get_is_aromatic());
test::black_box(atom.get_hybridization_type());
test::black_box(atom.get_degree());
test::black_box(atom.get_total_num_hs());
}
}
});
}

/// Clone cost alone. Useful for understanding the cost of cloning
/// molecules when only &ROMol is available but mutation is needed.
#[bench]
fn bench_clone_molecules(b: &mut test::bench::Bencher) {
let mols: Vec<ROMol> = SMILES_SET
.iter()
.map(|s| ROMol::from_smiles(s).unwrap())
.collect();

b.iter(|| {
for mol in &mols {
test::black_box(mol.clone());
}
});
}
6 changes: 3 additions & 3 deletions rdkit-sys/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
name = "rdkit-sys"
authors = ["Xavier Lange (xrlange@gmail.com)", "chrissly31415"]
version = "0.4.12"
edition = "2021"
edition = "2024"
license = "MIT"
description = "RDKit CFFI library builder and bindings"
repository = "https://github.com/rdkit-rs/rdkit/tree/main/rdkit-sys"
Expand All @@ -14,9 +14,9 @@ exclude = ["rdkit-*", "*.tar.gz", "examples/"]
cxx = "1.0.109"

[build-dependencies]
env_logger = "0.10.0"
env_logger = "0.11"
cxx-build = "1.0.109"
which = "4.4.2"
which = "8"

[features]
default = []
Expand Down
2 changes: 1 addition & 1 deletion rdkit-sys/build.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
const CPP_VERSION_FLAG: &str = "-std=c++17";
const CPP_VERSION_FLAG: &str = "-std=c++20";

fn main() {
if std::env::var("DOCS_RS").is_ok() {
Expand Down
4 changes: 2 additions & 2 deletions rdkit-sys/rustfmt.toml
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,8 @@ trailing_comma = "Vertical"
match_block_trailing_comma = false
blank_lines_upper_bound = 1
blank_lines_lower_bound = 0
edition = "2021"
version = "One"
edition = "2024"
style_edition = "2024"
inline_attribute_width = 0
format_generated_files = true
merge_derives = true
Expand Down
81 changes: 81 additions & 0 deletions rdkit-sys/src/bridge/bond.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
#[cxx::bridge(namespace = "RDKit")]
pub mod ffi {
#[repr(i32)]
#[derive(Debug, PartialEq)]
pub enum BondType {
UNSPECIFIED,
SINGLE,
DOUBLE,
TRIPLE,
QUADRUPLE,
QUINTUPLE,
HEXTUPLE,
ONEANDAHALF,
TWOANDAHALF,
THREEANDAHALF,
FOURANDAHALF,
FIVEANDAHALF,
AROMATIC,
IONIC,
HYDROGEN,
THREECENTER,
DATIVEONE,
DATIVE,
DATIVEL,
DATIVER,
OTHER,
ZERO,
}

#[repr(i32)]
#[derive(Debug, PartialEq)]
pub enum BondStereo {
STEREONONE,
STEREOANY,
STEREOZ,
STEREOE,
STEREOCIS,
STEREOTRANS,
}

#[repr(i32)]
#[derive(Debug, PartialEq)]
pub enum BondDir {
NONE,
BEGINWEDGE,
BEGINDASH,
ENDDOWNRIGHT,
ENDUPRIGHT,
EITHERDOUBLE,
UNKNOWN,
}

unsafe extern "C++" {
include!("wrapper/include/bond.h");

pub type ROMol = crate::ro_mol_ffi::ROMol;
pub type Bond;
pub type BondType;
pub type BondStereo;
pub type BondDir;

pub fn get_num_bonds(mol: &SharedPtr<ROMol>, only_heavy: bool) -> u32;
pub fn get_bond_with_idx(mol: &mut SharedPtr<ROMol>, idx: u32) -> Pin<&mut Bond>;
pub fn get_bond_idx_between_atoms(
mol: &SharedPtr<ROMol>,
begin_idx: u32,
end_idx: u32,
) -> i32;

pub fn bond_get_bond_type(bond: Pin<&Bond>) -> BondType;
pub fn bond_get_bond_type_as_double(bond: Pin<&Bond>) -> f64;
pub fn bond_get_begin_atom_idx(bond: Pin<&Bond>) -> u32;
pub fn bond_get_end_atom_idx(bond: Pin<&Bond>) -> u32;
pub fn bond_get_other_atom_idx(bond: Pin<&Bond>, this_idx: u32) -> u32;
pub fn bond_get_is_aromatic(bond: Pin<&Bond>) -> bool;
pub fn bond_get_is_conjugated(bond: Pin<&Bond>) -> bool;
pub fn bond_get_stereo(bond: Pin<&Bond>) -> BondStereo;
pub fn bond_get_bond_dir(bond: Pin<&Bond>) -> BondDir;
pub fn bond_get_idx(bond: Pin<&Bond>) -> u32;
}
}
5 changes: 4 additions & 1 deletion rdkit-sys/src/bridge/mod.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
mod bond;
pub use bond::ffi as bond_ffi;

mod descriptors;
pub use descriptors::ffi as descriptors_ffi;

Expand All @@ -11,7 +14,7 @@ mod mol_standardize;
pub use mol_standardize::ffi as mol_standardize_ffi;

mod periodic_table;
pub use periodic_table::{ffi as periodic_table_ffi, PeriodicTableOps};
pub use periodic_table::{PeriodicTableOps, ffi as periodic_table_ffi};

mod ro_mol;
pub use ro_mol::ffi as ro_mol_ffi;
Expand Down
3 changes: 3 additions & 0 deletions rdkit-sys/src/bridge/mol_ops.rs
Original file line number Diff line number Diff line change
Expand Up @@ -83,5 +83,8 @@ pub mod ffi {
pub fn romol_set_hybridization(mol: &mut SharedPtr<ROMol>);

pub fn clean_up(rw_mol: &mut SharedPtr<RWMol>);

pub fn sanitize_mol(mol: &mut SharedPtr<RWMol>) -> Result<()>;
pub fn kekulize_mol(mol: &mut SharedPtr<RWMol>) -> Result<()>;
}
}
2 changes: 1 addition & 1 deletion rdkit-sys/src/bridge/periodic_table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ pub trait PeriodicTableOps {
fn getElementName(self, atomic_number: u32) -> String;
fn getValenceList(self, atomic_number: u32) -> &'static CxxVector<i32>;
}
impl<'a> PeriodicTableOps for UniquePtr<PeriodicTable> {
impl PeriodicTableOps for UniquePtr<PeriodicTable> {
fn getElementSymbol(self, atomic_number: u32) -> String {
ffi::getElementSymbol(atomic_number)
}
Expand Down
1 change: 1 addition & 0 deletions rdkit-sys/src/bridge/ro_mol.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ pub mod ffi {

pub fn get_num_atoms(mol: &SharedPtr<ROMol>, onlyExplicit: bool) -> u32;
pub fn get_atom_with_idx(mol: &mut SharedPtr<ROMol>, idx: u32) -> Pin<&mut Atom>;
pub fn get_atom_with_idx_const(mol: &SharedPtr<ROMol>, idx: u32) -> Pin<&Atom>;
pub fn get_symbol(atom: Pin<&Atom>) -> String;
pub fn get_is_aromatic(atom: Pin<&Atom>) -> bool;
pub fn get_atomic_num(atom: Pin<&Atom>) -> i32;
Expand Down
13 changes: 13 additions & 0 deletions rdkit-sys/src/bridge/rw_mol.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,5 +25,18 @@ pub mod ffi {
pub fn rw_mol_to_ro_mol(mol: SharedPtr<RWMol>) -> SharedPtr<ROMol>;

pub fn smarts_to_mol(smarts: &CxxString) -> Result<SharedPtr<RWMol>>;

// Molecule editing
pub fn new_rw_mol() -> SharedPtr<RWMol>;
pub fn rw_mol_add_atom(mol: &mut SharedPtr<RWMol>, atomic_num: u32) -> u32;
pub fn rw_mol_add_bond(
mol: &mut SharedPtr<RWMol>,
begin_idx: u32,
end_idx: u32,
bond_order: i32,
) -> u32;
pub fn rw_mol_remove_atom(mol: &mut SharedPtr<RWMol>, idx: u32);
pub fn rw_mol_remove_bond(mol: &mut SharedPtr<RWMol>, begin_idx: u32, end_idx: u32);
pub fn rw_mol_get_num_atoms(mol: &SharedPtr<RWMol>, only_explicit: bool) -> u32;
}
}
4 changes: 3 additions & 1 deletion rdkit-sys/tests/test_atoms.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@ fn test_atoms() {

assert_eq!(
&atoms,
&["C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C"]
&[
"C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C"
]
);

let mut atom = rdkit_sys::ro_mol_ffi::get_atom_with_idx(&mut romol, 2);
Expand Down
5 changes: 4 additions & 1 deletion rdkit-sys/tests/test_ro_mol.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,10 @@ fn mol_to_molblock_test() {
cxx::let_cxx_string!(smiles = "CC");
let romol = rdkit_sys::ro_mol_ffi::smiles_to_mol(&smiles).unwrap();
let molblock = rdkit_sys::ro_mol_ffi::mol_to_molblock(&romol);
assert_eq!(molblock, "\n RDKit 2D\n\n 2 1 0 0 0 0 0 0 0 0999 V2000\n 0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 1.2990 0.7500 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 1 2 1 0\nM END\n");
assert_eq!(
molblock,
"\n RDKit 2D\n\n 2 1 0 0 0 0 0 0 0 0999 V2000\n 0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 1.2990 0.7500 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 1 2 1 0\nM END\n"
);
}

#[test]
Expand Down
7 changes: 5 additions & 2 deletions rdkit-sys/tests/test_rw_mol.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use cxx::{let_cxx_string, SharedPtr};
use cxx::{SharedPtr, let_cxx_string};
use rdkit_sys::{ro_mol_ffi::ROMol, rw_mol_ffi::RWMol};

#[test]
Expand Down Expand Up @@ -191,7 +191,10 @@ CC(=O)OC(CC(=O)[O-])C[N+](C)(C)C
let ro_mol = unsafe { std::mem::transmute::<SharedPtr<RWMol>, SharedPtr<ROMol>>(rw_mol) };

let smiles = rdkit_sys::ro_mol_ffi::mol_to_smiles(&ro_mol);
assert_eq!("[H]C([H])([H])C(=O)OC([H])(C([H])([H])C(=O)[O-])C([H])([H])[N+](C([H])([H])[H])(C([H])([H])[H])C([H])([H])[H]", &smiles);
assert_eq!(
"[H]C([H])([H])C(=O)OC([H])(C([H])([H])C(=O)[O-])C([H])([H])[N+](C([H])([H])[H])(C([H])([H])[H])C([H])([H])[H]",
&smiles
);
}

#[test]
Expand Down
27 changes: 27 additions & 0 deletions rdkit-sys/wrapper/include/bond.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#pragma once

#include "rust/cxx.h"
#include <GraphMol/GraphMol.h>

namespace RDKit {

using BondType = Bond::BondType;
using BondStereo = Bond::BondStereo;
using BondDir = Bond::BondDir;

unsigned int get_num_bonds(const std::shared_ptr<ROMol> &mol, bool only_heavy);
Bond &get_bond_with_idx(std::shared_ptr<ROMol> &mol, unsigned int idx);
int get_bond_idx_between_atoms(const std::shared_ptr<ROMol> &mol, unsigned int begin_idx, unsigned int end_idx);

BondType bond_get_bond_type(const Bond &bond);
double bond_get_bond_type_as_double(const Bond &bond);
unsigned int bond_get_begin_atom_idx(const Bond &bond);
unsigned int bond_get_end_atom_idx(const Bond &bond);
unsigned int bond_get_other_atom_idx(const Bond &bond, unsigned int this_idx);
bool bond_get_is_aromatic(const Bond &bond);
bool bond_get_is_conjugated(const Bond &bond);
BondStereo bond_get_stereo(const Bond &bond);
BondDir bond_get_bond_dir(const Bond &bond);
unsigned int bond_get_idx(const Bond &bond);

} // namespace RDKit
2 changes: 2 additions & 0 deletions rdkit-sys/wrapper/include/mol_ops.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,4 +67,6 @@ void romol_set_hybridization(std::shared_ptr<ROMol> &mol);

// pub fn clean_up(rw_mol: &mut SharedPtr<RWMol>)
void clean_up(std::shared_ptr<RWMol> &rw_mol);
void sanitize_mol(std::shared_ptr<RWMol> &mol);
void kekulize_mol(std::shared_ptr<RWMol> &mol);
} // namespace RDKit
2 changes: 2 additions & 0 deletions rdkit-sys/wrapper/include/ro_mol.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ unsigned int atom_sanitize_exception_get_atom_idx(const MolSanitizeExceptionUniq

unsigned int get_num_atoms(const std::shared_ptr<ROMol> &mol, bool only_explicit);
Atom &get_atom_with_idx(std::shared_ptr<ROMol> &mol, unsigned int idx);
const Atom &get_atom_with_idx_const(const std::shared_ptr<ROMol> &mol,
unsigned int idx);
rust::String get_symbol(const Atom &atom);
bool get_is_aromatic(const Atom &atom);
int get_atomic_num(const Atom &atom);
Expand Down
Loading