Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]
name = "rdkit"
version = "0.4.12"
edition = "2021"
edition = "2024"
authors = ["Xavier Lange <xrlange@gmail.com>", "Javier Pineda <javierjpineda13@gmail.com"]
license = "MIT"
description = "High level RDKit functionality for rust"
Expand All @@ -17,7 +17,7 @@ cxx = "1"
flate2 = "1"
log = "0.4"
rdkit-sys = { path = "rdkit-sys", version = "0.4.9" }
thiserror = "1"
thiserror = "2"

[dev-dependencies]
env_logger = "0.9.0"
env_logger = "0.11"
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ RDKit
---

A high-level library for performing common RDKit tasks such as SMILES parsing, molecule normalization, etc. Uses
the C++ API via bindings from [rdkit-sys](https://crates.io/crate/rdkit-sys).
the C++ API via bindings from [rdkit-sys](https://crates.io/crates/rdkit-sys).

Notice: Requires rdkit 2023.09.1 or higher (like Ubuntu Noble 24.04)

Expand Down
102 changes: 102 additions & 0 deletions benches/atom_iteration_benchmark.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
#![allow(soft_unstable)]
#![feature(test)]
extern crate test;

use rdkit::ROMol;

/// Drug-like molecules of varying size for realistic benchmarking.
/// These cover common pharmaceutical scaffolds and natural products.
const SMILES_SET: &[&str] = &[
// aspirin
"CC(=O)Oc1ccccc1C(=O)O",
// ibuprofen
"CC(C)Cc1ccc(cc1)C(C)C(=O)O",
// caffeine
"Cn1c(=O)c2c(ncn2C)n(C)c1=O",
// diazepam
"O=C1CN=C(c2ccccc2)c2cc(Cl)ccc2N1C",
// atorvastatin (lipitor)
"CC(C)c1c(C(=O)Nc2ccccc2)c(-c2ccccc2)c(-c2ccc(F)cc2)n1CC[C@@H](O)C[C@@H](O)CC(=O)O",
// taxol core
"CC1=C2C(OC(=O)c3ccccc3)C(O)C4(OC(=O)C(O)(CC(OC(=O)c5ccccc5)C1O)C24C)C(=O)c1ccc(OC)cc1",
// vancomycin fragment
"OC1C(O)C(OC2C(O)C(O)C(O)C(CO)O2)OC(CO)C1NC(=O)C1CC(O)CN1C(=O)C(NC(=O)C1CC(=O)NC(=O)C1O)C(O)c1ccc(O)cc1",
];

/// Baseline: SMILES parsing cost.
#[bench]
fn bench_parse_smiles(b: &mut test::bench::Bencher) {
b.iter(|| {
for smiles in SMILES_SET {
test::black_box(ROMol::from_smiles(smiles).unwrap());
}
});
}

/// Iterate all atoms via atom_ref (&self), read 7 properties per atom.
/// This is the realistic featurization workload.
#[bench]
fn bench_atom_ref_all_properties(b: &mut test::bench::Bencher) {
let mols: Vec<ROMol> = SMILES_SET
.iter()
.map(|s| ROMol::from_smiles(s).unwrap())
.collect();

b.iter(|| {
for mol in &mols {
let n = mol.num_atoms(true);
for i in 0..n {
let atom = mol.atom_ref(i);
test::black_box(atom.symbol());
test::black_box(atom.get_atomic_num());
test::black_box(atom.get_formal_charge());
test::black_box(atom.get_is_aromatic());
test::black_box(atom.get_hybridization_type());
test::black_box(atom.get_degree());
test::black_box(atom.get_total_num_hs());
}
}
});
}

/// Same workload via atom_with_idx (&mut self).
/// Regression guard: should be the same speed as atom_ref.
#[bench]
fn bench_atom_mut_all_properties(b: &mut test::bench::Bencher) {
let mut mols: Vec<ROMol> = SMILES_SET
.iter()
.map(|s| ROMol::from_smiles(s).unwrap())
.collect();

b.iter(|| {
for mol in &mut mols {
let n = mol.num_atoms(true);
for i in 0..n {
let atom = mol.atom_with_idx(i);
test::black_box(atom.symbol());
test::black_box(atom.get_atomic_num());
test::black_box(atom.get_formal_charge());
test::black_box(atom.get_is_aromatic());
test::black_box(atom.get_hybridization_type());
test::black_box(atom.get_degree());
test::black_box(atom.get_total_num_hs());
}
}
});
}

/// Clone cost alone. Useful for understanding the cost of cloning
/// molecules when only &ROMol is available but mutation is needed.
#[bench]
fn bench_clone_molecules(b: &mut test::bench::Bencher) {
let mols: Vec<ROMol> = SMILES_SET
.iter()
.map(|s| ROMol::from_smiles(s).unwrap())
.collect();

b.iter(|| {
for mol in &mols {
test::black_box(mol.clone());
}
});
}
6 changes: 3 additions & 3 deletions rdkit-sys/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
name = "rdkit-sys"
authors = ["Xavier Lange (xrlange@gmail.com)", "chrissly31415"]
version = "0.4.12"
edition = "2021"
edition = "2024"
license = "MIT"
description = "RDKit CFFI library builder and bindings"
repository = "https://github.com/rdkit-rs/rdkit/tree/main/rdkit-sys"
Expand All @@ -14,9 +14,9 @@ exclude = ["rdkit-*", "*.tar.gz", "examples/"]
cxx = "1.0.109"

[build-dependencies]
env_logger = "0.10.0"
env_logger = "0.11"
cxx-build = "1.0.109"
which = "4.4.2"
which = "8"

[features]
default = []
Expand Down
2 changes: 1 addition & 1 deletion rdkit-sys/build.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
const CPP_VERSION_FLAG: &str = "-std=c++17";
const CPP_VERSION_FLAG: &str = "-std=c++20";

fn main() {
if std::env::var("DOCS_RS").is_ok() {
Expand Down
4 changes: 2 additions & 2 deletions rdkit-sys/rustfmt.toml
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,8 @@ trailing_comma = "Vertical"
match_block_trailing_comma = false
blank_lines_upper_bound = 1
blank_lines_lower_bound = 0
edition = "2021"
version = "One"
edition = "2024"
style_edition = "2024"
inline_attribute_width = 0
format_generated_files = true
merge_derives = true
Expand Down
19 changes: 19 additions & 0 deletions rdkit-sys/src/bridge/fingerprint.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,24 @@ pub mod ffi {
pub fn explicit_bit_vect_to_u64_vec(
bitvect: &SharedPtr<ExplicitBitVect>,
) -> UniquePtr<CxxVector<u64>>;

// Configurable fingerprints
pub fn morgan_fingerprint_mol_with_params(
mol: &SharedPtr<ROMol>,
radius: u32,
n_bits: u32,
) -> SharedPtr<ExplicitBitVect>;
pub fn rdk_fingerprint_mol_with_params(
mol: &SharedPtr<ROMol>,
min_path: u32,
max_path: u32,
fp_size: u32,
) -> SharedPtr<ExplicitBitVect>;
pub fn pattern_fingerprint_mol_with_params(
mol: &SharedPtr<ROMol>,
fp_size: u32,
) -> SharedPtr<ExplicitBitVect>;
pub fn maccs_fingerprint_mol(mol: &SharedPtr<ROMol>) -> SharedPtr<ExplicitBitVect>;
pub fn explicit_bit_vect_num_bits(bitvect: &SharedPtr<ExplicitBitVect>) -> u32;
}
}
2 changes: 1 addition & 1 deletion rdkit-sys/src/bridge/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ mod mol_standardize;
pub use mol_standardize::ffi as mol_standardize_ffi;

mod periodic_table;
pub use periodic_table::{ffi as periodic_table_ffi, PeriodicTableOps};
pub use periodic_table::{PeriodicTableOps, ffi as periodic_table_ffi};

mod ro_mol;
pub use ro_mol::ffi as ro_mol_ffi;
Expand Down
2 changes: 1 addition & 1 deletion rdkit-sys/src/bridge/periodic_table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ pub trait PeriodicTableOps {
fn getElementName(self, atomic_number: u32) -> String;
fn getValenceList(self, atomic_number: u32) -> &'static CxxVector<i32>;
}
impl<'a> PeriodicTableOps for UniquePtr<PeriodicTable> {
impl PeriodicTableOps for UniquePtr<PeriodicTable> {
fn getElementSymbol(self, atomic_number: u32) -> String {
ffi::getElementSymbol(atomic_number)
}
Expand Down
1 change: 1 addition & 0 deletions rdkit-sys/src/bridge/ro_mol.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ pub mod ffi {

pub fn get_num_atoms(mol: &SharedPtr<ROMol>, onlyExplicit: bool) -> u32;
pub fn get_atom_with_idx(mol: &mut SharedPtr<ROMol>, idx: u32) -> Pin<&mut Atom>;
pub fn get_atom_with_idx_const(mol: &SharedPtr<ROMol>, idx: u32) -> Pin<&Atom>;
pub fn get_symbol(atom: Pin<&Atom>) -> String;
pub fn get_is_aromatic(atom: Pin<&Atom>) -> bool;
pub fn get_atomic_num(atom: Pin<&Atom>) -> i32;
Expand Down
4 changes: 3 additions & 1 deletion rdkit-sys/tests/test_atoms.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@ fn test_atoms() {

assert_eq!(
&atoms,
&["C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C"]
&[
"C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C"
]
);

let mut atom = rdkit_sys::ro_mol_ffi::get_atom_with_idx(&mut romol, 2);
Expand Down
5 changes: 4 additions & 1 deletion rdkit-sys/tests/test_ro_mol.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,10 @@ fn mol_to_molblock_test() {
cxx::let_cxx_string!(smiles = "CC");
let romol = rdkit_sys::ro_mol_ffi::smiles_to_mol(&smiles).unwrap();
let molblock = rdkit_sys::ro_mol_ffi::mol_to_molblock(&romol);
assert_eq!(molblock, "\n RDKit 2D\n\n 2 1 0 0 0 0 0 0 0 0999 V2000\n 0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 1.2990 0.7500 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 1 2 1 0\nM END\n");
assert_eq!(
molblock,
"\n RDKit 2D\n\n 2 1 0 0 0 0 0 0 0 0999 V2000\n 0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 1.2990 0.7500 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 1 2 1 0\nM END\n"
);
}

#[test]
Expand Down
7 changes: 5 additions & 2 deletions rdkit-sys/tests/test_rw_mol.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use cxx::{let_cxx_string, SharedPtr};
use cxx::{SharedPtr, let_cxx_string};
use rdkit_sys::{ro_mol_ffi::ROMol, rw_mol_ffi::RWMol};

#[test]
Expand Down Expand Up @@ -191,7 +191,10 @@ CC(=O)OC(CC(=O)[O-])C[N+](C)(C)C
let ro_mol = unsafe { std::mem::transmute::<SharedPtr<RWMol>, SharedPtr<ROMol>>(rw_mol) };

let smiles = rdkit_sys::ro_mol_ffi::mol_to_smiles(&ro_mol);
assert_eq!("[H]C([H])([H])C(=O)OC([H])(C([H])([H])C(=O)[O-])C([H])([H])[N+](C([H])([H])[H])(C([H])([H])[H])C([H])([H])[H]", &smiles);
assert_eq!(
"[H]C([H])([H])C(=O)OC([H])(C([H])([H])C(=O)[O-])C([H])([H])[N+](C([H])([H])[H])(C([H])([H])[H])C([H])([H])[H]",
&smiles
);
}

#[test]
Expand Down
11 changes: 11 additions & 0 deletions rdkit-sys/wrapper/include/fingerprint.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,15 @@ std::shared_ptr<ExplicitBitVect> copy_explicit_bit_vect(const std::shared_ptr<Ex
unsigned int get_num_on_bits(const std::shared_ptr<ExplicitBitVect> &bitvect);

std::unique_ptr<std::vector<uint64_t>> explicit_bit_vect_to_u64_vec(const std::shared_ptr<ExplicitBitVect> &bitvect);

// Configurable fingerprints
std::shared_ptr<ExplicitBitVect> morgan_fingerprint_mol_with_params(const std::shared_ptr<ROMol> &mol,
unsigned int radius, unsigned int n_bits);
std::shared_ptr<ExplicitBitVect> rdk_fingerprint_mol_with_params(const std::shared_ptr<ROMol> &mol,
unsigned int min_path, unsigned int max_path,
unsigned int fp_size);
std::shared_ptr<ExplicitBitVect> pattern_fingerprint_mol_with_params(const std::shared_ptr<ROMol> &mol,
unsigned int fp_size);
std::shared_ptr<ExplicitBitVect> maccs_fingerprint_mol(const std::shared_ptr<ROMol> &mol);
unsigned int explicit_bit_vect_num_bits(const std::shared_ptr<ExplicitBitVect> &bitvect);
} // namespace RDKit
2 changes: 2 additions & 0 deletions rdkit-sys/wrapper/include/ro_mol.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ unsigned int atom_sanitize_exception_get_atom_idx(const MolSanitizeExceptionUniq

unsigned int get_num_atoms(const std::shared_ptr<ROMol> &mol, bool only_explicit);
Atom &get_atom_with_idx(std::shared_ptr<ROMol> &mol, unsigned int idx);
const Atom &get_atom_with_idx_const(const std::shared_ptr<ROMol> &mol,
unsigned int idx);
rust::String get_symbol(const Atom &atom);
bool get_is_aromatic(const Atom &atom);
int get_atomic_num(const Atom &atom);
Expand Down
25 changes: 25 additions & 0 deletions rdkit-sys/wrapper/src/fingerprint.cc
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include "rust/cxx.h"
#include <DataStructs/ExplicitBitVect.h>
#include <GraphMol/Fingerprints/Fingerprints.h>
#include <GraphMol/Fingerprints/MACCS.h>
#include <GraphMol/Fingerprints/MorganFingerprints.h>

namespace RDKit {
Expand Down Expand Up @@ -28,4 +29,28 @@ std::unique_ptr<std::vector<uint64_t>> explicit_bit_vect_to_u64_vec(const std::s
std::vector<uint64_t> *bytes_heap = new std::vector<uint64_t>(bytes);
return std::unique_ptr<std::vector<uint64_t>>(bytes_heap);
}

std::shared_ptr<ExplicitBitVect> morgan_fingerprint_mol_with_params(const std::shared_ptr<ROMol> &mol,
unsigned int radius, unsigned int n_bits) {
return std::shared_ptr<ExplicitBitVect>(MorganFingerprints::getFingerprintAsBitVect(*mol, radius, n_bits));
}

std::shared_ptr<ExplicitBitVect> rdk_fingerprint_mol_with_params(const std::shared_ptr<ROMol> &mol,
unsigned int min_path, unsigned int max_path,
unsigned int fp_size) {
return std::shared_ptr<ExplicitBitVect>(RDKFingerprintMol(*mol, min_path, max_path, fp_size));
}

std::shared_ptr<ExplicitBitVect> pattern_fingerprint_mol_with_params(const std::shared_ptr<ROMol> &mol,
unsigned int fp_size) {
return std::shared_ptr<ExplicitBitVect>(PatternFingerprintMol(*mol, fp_size));
}

std::shared_ptr<ExplicitBitVect> maccs_fingerprint_mol(const std::shared_ptr<ROMol> &mol) {
return std::shared_ptr<ExplicitBitVect>(MACCSFingerprints::getFingerprintAsBitVect(*mol));
}

unsigned int explicit_bit_vect_num_bits(const std::shared_ptr<ExplicitBitVect> &bitvect) {
return bitvect->getNumBits();
}
} // namespace RDKit
4 changes: 4 additions & 0 deletions rdkit-sys/wrapper/src/ro_mol.cc
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,10 @@ unsigned int get_num_atoms(const std::shared_ptr<ROMol> &mol, bool only_explicit

Atom &get_atom_with_idx(std::shared_ptr<ROMol> &mol, unsigned int idx) { return *mol->getAtomWithIdx(idx); }

const Atom &get_atom_with_idx_const(const std::shared_ptr<ROMol> &mol, unsigned int idx) {
return *mol->getAtomWithIdx(idx);
}

rust::String get_symbol(const Atom &atom) { return atom.getSymbol(); }

bool get_is_aromatic(const Atom &atom) { return atom.getIsAromatic(); }
Expand Down
4 changes: 2 additions & 2 deletions rustfmt.toml
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,8 @@ trailing_comma = "Vertical"
match_block_trailing_comma = false
blank_lines_upper_bound = 1
blank_lines_lower_bound = 0
edition = "2021"
version = "One"
edition = "2024"
style_edition = "2024"
inline_attribute_width = 0
format_generated_files = true
merge_derives = true
Expand Down
2 changes: 1 addition & 1 deletion src/descriptors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ impl Properties {
let names = rdkit_sys::descriptors_ffi::get_property_names(&self.ptr);
let computed = rdkit_sys::descriptors_ffi::compute_properties(&self.ptr, &ro_mol.ptr);

assert!(names.len() != 0);
assert!(!names.is_empty());
assert!(computed.len() == names.len());

names
Expand Down
10 changes: 10 additions & 0 deletions src/fingerprint.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,26 @@ pub struct Fingerprint(pub BitVec<u8, bitvec::order::Lsb0>);

impl Fingerprint {
pub fn new(ptr: SharedPtr<rdkit_sys::fingerprint_ffi::ExplicitBitVect>) -> Self {
let num_bits = rdkit_sys::fingerprint_ffi::explicit_bit_vect_num_bits(&ptr) as usize;
let unique_ptr_bytes = rdkit_sys::fingerprint_ffi::explicit_bit_vect_to_u64_vec(&ptr);
let rdkit_fingerprint_bytes: Vec<u64> = unique_ptr_bytes.into_iter().copied().collect();
let mut bitvec_u64 = bitvec::vec::BitVec::<u64, Lsb0>::from_vec(rdkit_fingerprint_bytes);
bitvec_u64.truncate(num_bits);

let mut idiomatic_bitvec_u8 = bitvec::vec::BitVec::<u8, Lsb0>::new();
idiomatic_bitvec_u8.append(&mut bitvec_u64);

Fingerprint(idiomatic_bitvec_u8)
}

pub fn len(&self) -> usize {
self.0.len()
}

pub fn is_empty(&self) -> bool {
self.0.is_empty()
}

pub fn tanimoto_distance(&self, other: &Fingerprint) -> f32 {
let and = self.0.clone() & &other.0;
let or = self.0.clone() | &other.0;
Expand Down
Loading