From 86dca9502c06af2e3b036756663f4d7d6a7ac184 Mon Sep 17 00:00:00 2001 From: ntampellini Date: Thu, 12 Mar 2026 19:04:51 -0400 Subject: [PATCH 1/4] first commit --- CHANGELOG.md | 5 + docs/operators_keywords.rst | 46 +-- firecode/__main__.py | 1 - firecode/ase_manipulations.py | 94 +----- firecode/embedder_options.py | 9 +- firecode/optimization_methods.py | 28 +- firecode/pka.py | 5 +- firecode/solvents.py | 264 +++++++++------ firecode/standalone_optimizer.py | 5 +- firecode/tests/C2H4.xyz | 14 +- firecode/tests/test_suite.py | 53 ++- firecode/thermochemistry.py | 535 +++++++++++++++++++++++++++++++ firecode/units.py | 14 + firecode/vib_analysis.py | 0 14 files changed, 849 insertions(+), 224 deletions(-) create mode 100644 firecode/thermochemistry.py create mode 100644 firecode/vib_analysis.py diff --git a/CHANGELOG.md b/CHANGELOG.md index e09242b..39f4e34 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,11 @@ + +## FIRECODE 1.7.0 πŸ”₯ (WIP) +- Implemented vibrational analysis via ASE including quasi-RRHO thermochemistry (as in GoodVibes) to get free energies. +- Restructured solvent module. + ## FIRECODE 1.6.0 πŸ”₯ (March 11 2026) - Refreshed constraints handling in operators. - Added non-inline constraints specification after a molecule line. Syntax: "B/A/D i1 i2 [i3] [i4] [auto/value]" diff --git a/docs/operators_keywords.rst b/docs/operators_keywords.rst index 9195654..76feab8 100644 --- a/docs/operators_keywords.rst +++ b/docs/operators_keywords.rst @@ -23,26 +23,26 @@ Here is a list of the currently available operators: and will constrain the specified distances (both "fixed" and not). Generates a new ``mol_crest_confs.xyz`` file with the crest-optimized conformers. The default level is GFN2//GFN-FF (see CREST docs). Charge and multiplicity are passed from the molecule or keyword line. - + :: - + mtd> molecule.xyz 4A 8A charge=-1 - ``rdkit_search>`` - Performs a conformational search with the `ETKDGv3 algorithm `__ via RDKit. Generates a new ``mol_rdkit_confs.xyz`` file with the generated conformers. - ``refine>`` - Acts on a multimolecular input file, pruning similar structures and optimizing it - at the chosen theory level. Soft, non-fixed constraints ("interactions") are enforced during a + at the chosen theory level. Soft, non-fixed constraints ("interactions") are enforced during a first loose optimization, and subsequently relaxed. - ``neb>`` - Runs a NEB (Nudged Elastic Band) procedure on the provided structures. - The implementation is a climbing image nudged elastic band (CI-NEB) TS search. + The implementation is a climbing image nudged elastic band (CI-NEB) TS search. The operator should be used on a single file that contains two or more structures. The first and last structures will be used as start and endpoints. If three are provided, the intermediate structure will be used as a TS guess in the interpolation. If more are provided, they will all be used in place of (or to aid) interpolation to reach the desired number of images (set with the ``IMAGES=n`` or NEB(IMAGES=n) keywords). Default is 7. - + - ``scan>`` - Runs a scan of the desired distance or dihedral angle, based on the number of indices provided. For distance scans, the atomic indices will be approached if they are not bound, and separated if they are. This could be chained with, for example, a NEB calculation, where the scan trajectory will be used as a starting @@ -53,8 +53,8 @@ Here is a list of the currently available operators: neb> scan> mol.xyz 3 4 For dihedral scans, the dihedral is rotated the full 360Β° in both clockwise and counterclockwise directions. - Maxima above a certain threshold (set with ``KCAL=n``) are scanned again in smaller steps. All maxima for the second mode accurate - scan are saved to firecode_maxima_mol.xyz. The optimized geometries for the initial scans are saved with the relative + Maxima above a certain threshold (set with ``KCAL=n``) are scanned again in smaller steps. All maxima for the second mode accurate + scan are saved to firecode_maxima_mol.xyz. The optimized geometries for the initial scans are saved with the relative names. This version of the ``scan>`` operator is terminating and cannot be chained. Deprecated (legacy): @@ -80,16 +80,16 @@ General keywords Keywords are case-insensitive and are divided by at least one blank space. Some of them are self-sufficient (*i.e.* ``NCI``), while some others require an -additional input (*i.e.* ``STEPS=10`` or ``DIST(a=1.8, b=2, c=1.34)``). +additional input (*i.e.* ``STEPS=10`` or ``DIST(a=1.8, b=2, c=1.34)``). Commas are used to divide keyword arguments where more than one is accepted, like in ``DIST``. -- **CALC** - Overrides default calculator in ``settings.py``. +- **CALC** - Overrides default calculator in ``settings.py``. Available calculators are ORCA, XTB, TBLITE, AIMNET2 and UMA. Syntax: ``CALC=ORCA`` - **CHARGE** - Specify the charge to be used in optimizations. -- **CONFS** - Override the maximum number of conformers to be considered +- **CONFS** - Override the maximum number of conformers to be considered for the refinement or embedding of each molecule (default is 1000). Syntax: ``CONFS=10000`` - **CRESTNCI** - crest_search> runs: passes the "--nci" argument to CREST, running @@ -114,7 +114,7 @@ one is accepted, like in ``DIST``. Value can only be ``XTB`` or None. Syntax: ``FFCALC=XTB`` - **FFLEVEL** - Manually set the theory level to be used for force field - calculations. Default is GFN-FF for XTB. Support for other FF calculators has been discontinued. + calculations. Default is GFN-FF for XTB. Support for other FF calculators has been discontinued. Standard values can be modified by running the module with the -s flag (>>> python -m firecode -s). @@ -132,7 +132,7 @@ one is accepted, like in ``DIST``. - **LEVEL** - Manually set the theory level to be used. White spaces, if needed, can be expressed in input files with underscores. Syntax (ORCA): - ``LEVEL(B3LYP_def2-TZVP)``. Defaults can be found in settings.py, and can be modified by running + ``LEVEL(B3LYP_def2-TZVP)``. Defaults can be found in settings.py, and can be modified by running the module with the -s flag (``>>> firecode -s``). - **NEB** - Set NEB options. Syntax and default values: ``NEB(images=7, preopt=true, ci=true)``. @@ -141,6 +141,8 @@ one is accepted, like in ``DIST``. refine bonding distances. Set by default with the ``SHRINK`` keyword and for monomolecular TSs. +- **P** - Pressure, in atm (default 1.0). Syntax: ``P=100.0`` + - **PKA** - Specify the reference pKa for a compound in multimolecular pKa calculation runs. Syntax: ``PKA(mol.xyz)=11`` @@ -149,21 +151,25 @@ one is accepted, like in ``DIST``. ``settings.py``. Syntax: ``PROCS=32`` - **RMSD** - RMSD threshold (Angstroms) for structure pruning. - The smaller, the more retained structures (default is 0.25 A). + The smaller, the more retained structures (default is 0.25 A). Syntax: ``THRESH=n`` +- **T** - Temperature, in Kelvin (default 298.15). Syntax: ``T=300.0`` + +- **T_C** - Temperature, in Celsius (default 25.0). Syntax: ``T_C=25.0`` + Embedding-specific keywords (legacy) ++++++++++++++++++++++++++++++++++++ - **BYPASS** - Debug keyword. Used to skip all pruning steps and directly output all the embedded geometries. - + - **CLASHES** - Manually specify the max number of clashes and/or the distance threshold at which two atoms are considered clashing. The more forgiving (higher number, smaller dist), the more structures will reach the geometry optimization step. Default values are num=0 and dist=1.5 (A). Syntax: ``CLASHES(num=3,dist=1.2)`` - + - **DEEP** - Performs a deeper search, retaining more starting points for calculations and smaller turning angles. Equivalent to ``THRESH=0.3 STEPS=72 CLASHES=(num=1,dist=1.3)``. **Use with care!** @@ -172,18 +178,18 @@ Embedding-specific keywords (legacy) (C=C and C=N) during the embed. Likely to be useful only for monomolecular embeds, where molecular distortion is often important, and undesired isomerization processes can occur. - + - **NOOPT** - Skip the optimization steps, directly writing structures to file after compenetration and similarity pruning. Dihedral embeds: performs rigid scans instead of relaxed ones. - + - **RIGID** - Only applies to "cyclical"/"chelotropic" embeds. Avoid bending structures to better build TSs. - **ROTRANGE** - Only applies to "cyclical"/"chelotropic" embeds. Manually specify the rotation range to be explored around the structure pivot. Default is 45. Syntax: ``ROTRANGE=90`` - + - **SHRINK** - Exaggerate orbital dimensions during embed, scaling them by a specified factor. If used as a single keyword (``SHRINK``), orbital dimensions are scaled by a factor of one and a half. A syntax @@ -206,9 +212,9 @@ Embedding-specific keywords (legacy) default ``STEPS=24`` will perform 15Β° turns. For "cyclical" and "chelotropic" embeds, the rotation range to be explored is +-\ ``ROTRANGE`` degrees. Therefore the default values, equivalent to - ``ROTRANGE=45 STEPS=5``, will sample five equally spaced positions between + ``ROTRANGE=45 STEPS=5``, will sample five equally spaced positions between +45 and -45 degrees (going through zero). - **SUPRAFAC** - Only retain suprafacial orbital configurations in cyclical TSs. Thought for Diels-Alder and other cycloaddition - reactions. \ No newline at end of file + reactions. diff --git a/firecode/__main__.py b/firecode/__main__.py index 8ee6aef..624e44c 100644 --- a/firecode/__main__.py +++ b/firecode/__main__.py @@ -29,7 +29,6 @@ def main() -> None: - # Redirect stdout and stderr to handle encoding errors sys.stdout = TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace") diff --git a/firecode/ase_manipulations.py b/firecode/ase_manipulations.py index 75ec623..ddf8b47 100644 --- a/firecode/ase_manipulations.py +++ b/firecode/ase_manipulations.py @@ -26,7 +26,6 @@ import time from dataclasses import dataclass from functools import wraps -from shutil import rmtree from subprocess import getoutput from typing import TYPE_CHECKING, Any, Callable, Iterable, ParamSpec, Sequence, TypeVar, cast @@ -37,8 +36,6 @@ from ase.constraints import FixInternals from ase.mep import DyNEB from ase.optimize import FIRE, LBFGS -from ase.thermochemistry import IdealGasThermo -from ase.vibrations import Vibrations from prism_pruner.algebra import dihedral, normalize from prism_pruner.graph_manipulations import d_min_bond, find_paths from prism_pruner.rmsd import rmsd_and_max @@ -48,13 +45,12 @@ from firecode.calculators._xtb import xtb_gsolv from firecode.settings import DEFAULT_LEVELS from firecode.typing_ import Array1D_float, Array1D_str, Array2D_float, Array3D_float, MaybeNone -from firecode.units import EH_TO_EV, EH_TO_KCAL, EV_TO_KCAL +from firecode.units import EV_TO_KCAL from firecode.utils import ( HiddenPrints, NewFolderContext, cartesian_product, read_xyz, - suppress_stdout_stderr, write_xyz, ) @@ -867,7 +863,6 @@ def ase_popt( ase_calc = Opt_func_dispatcher(calculator).get_ase_calc(method, solvent) ase_atoms.calc = ase_calc - ase_calc.verbosity = 0 # type: ignore[union-attr] ase_atoms = set_charge_and_mult_on_ase_atoms(ase_atoms, charge, mult) maxiter = maxiter or 750 @@ -929,7 +924,7 @@ def ase_popt( with NewFolderContext(title, delete_after=(not debug)): t_start_opt = time.perf_counter() - with suppress_stdout_stderr(): + with HiddenPrints(): with LBFGS(ase_atoms, maxstep=0.2, trajectory=traj) as opt: opt.run(fmax=fmax, steps=maxiter) iterations = opt.nsteps @@ -1034,91 +1029,6 @@ def is_linear(atoms: Atoms, tol: float = 1e-3) -> bool: return cast("bool", s[1] < tol) -def ase_vib( - atoms: Array1D_str, - coords: Array2D_float, - ase_calc: ASECalculator, - charge: int, - mult: int, - temp: float = 298.15, - title: str = "temp", - return_gcorr: bool = True, - write_log: bool = True, -) -> float: - """returns: G(corr) or Free energy, in kcal/mol.""" - ase_atoms = Atoms(atoms, positions=coords) - ase_atoms = set_charge_and_mult_on_ase_atoms(ase_atoms, charge=charge, mult=mult) - - ase_calc.verbosity = 0 # type: ignore[attr-defined] - ase_atoms.calc = ase_calc - - vib = Vibrations(ase_atoms, delta=0.005) # type: ignore[no-untyped-call] - - with open(f"vib_{title}.out", "w", encoding="utf-8") as f: - with HiddenPrints(): - f.write("--> FIRECODE ASE Frequency calculation report\n\n") - - # tighten convergence - f.write("--> Tightening geom. opt. convergence to fmax=1E-4\n") - opt = LBFGS(ase_atoms, maxstep=0.01) - opt.run(fmax=1e-4) # type: ignore[no-untyped-call] - energy = ase_atoms.get_potential_energy() # type: ignore[no-untyped-call] - - # remove cache folder - if "vib" in os.listdir(): - rmtree(os.path.join(os.getcwd(), "vib")) - - # run vibrational analysis - vib.run() # type: ignore[no-untyped-call] - - # get energies (frequencies) and print summary - vib_energies = vib.get_energies() # type: ignore[no-untyped-call] - vib.summary(log=f) # type: ignore[no-untyped-call] - - if len(atoms) == 1: - geometry = "monatomic" - elif is_linear(ase_atoms): - geometry = "linear" - else: - geometry = "nonlinear" - - # compute thermo - thermo = IdealGasThermo( # type: ignore[no-untyped-call] - vib_energies=vib_energies, - potentialenergy=energy, - atoms=ase_atoms, - spin=(mult - 1) / 2, - geometry=geometry, - symmetrynumber=1, - ignore_imag_modes=True, - ) - - EE = energy / EH_TO_EV # was eV, now Eh - G = thermo.get_gibbs_energy(temperature=temp, pressure=101325.0) / EH_TO_EV # type: ignore[no-untyped-call] - H = thermo.get_enthalpy(temperature=temp) / EH_TO_EV # type: ignore[no-untyped-call] - S = thermo.get_entropy(temperature=temp, pressure=101325.0) / EH_TO_EV # type: ignore[no-untyped-call] - gcorr = G - EE - - f.write("\n--> What follows mocks an ORCA output for scraping purposes:\n\n") - f.write(f"T: {temp:.2f} K ({temp - 273.15:.2f} Β°C)\n") - f.write(f"FINAL SINGLE POINT ENERGY {EE:.8f} Eh\n") - f.write(f"FINAL GIBBS FREE ENERGY {G:.8f} Eh\n") - f.write(f"G-E(el) ... {gcorr:.8f} Eh {gcorr * EH_TO_KCAL:.2f} kcal/mol\n") - f.write(f"Total enthalpy ... {H:.8f} Eh\n") - f.write(f"Final entropy term ... {S:.8f} Eh/K\n") - - del vib - if os.path.isdir("vib"): - rmtree("vib") - - if not write_log: - os.remove(f"vib_{title}.out") - - if return_gcorr: - return cast("float", gcorr * EH_TO_KCAL) - return cast("float", G * EH_TO_KCAL) - - def fsm_operator(embedder: Embedder, optimize_endpoints: bool = True) -> str: """Connect two structures using the Freezing String Method and return the resulting list of coordinates.""" assert len(embedder.objects) == 1, "FSM requires a single input structure." diff --git a/firecode/embedder_options.py b/firecode/embedder_options.py index cd2c054..5ff24e4 100644 --- a/firecode/embedder_options.py +++ b/firecode/embedder_options.py @@ -82,6 +82,7 @@ # scrambled. Default is 1. Syntax: `NEWBONDS=1` "NOOPT": 1, # Skip the optimization steps, directly writing structures to file. "ONLYREFINED": 1, # Discard structures that do not successfully refine bonding distances. + "P": 1, # Pressure, in atmospheres. "PKA": 1, # Set reference pKa for a specific compound "PROCS": 1, # Set the number of parallel cores to be used by ORCA "REFINE": 1, # Same as calling refine> on a single file @@ -177,7 +178,8 @@ class Options: scramble_check: bool = False charge: int = 0 mult: int = 1 - T: float = 298.15 + T: float = 298.15 # in K + P: float = 1.0 # in atm ff_opt: bool = FF_OPT_BOOL ff_calc: str | None = FF_CALC @@ -567,6 +569,11 @@ def t_c(self, options: Options, *args: Any) -> None: options.T = float(kw.split("=")[1]) + 273.15 self.embedder.log(f"--> Set temperature to {options.T:.2f} K ({options.T - 273.15:.2f} Β°C)") + def p(self, options: Options, *args: Any) -> None: + kw = self.keywords_simple[self.keywords.index("P")] + options.P = float(kw.split("=")[1]) + self.embedder.log(f"--> Set pressure to {options.P:.1f} atm") + def set_options(self) -> None: for kw in self.sorted_keywords(): setter_function = getattr(self, kw.lower()) diff --git a/firecode/optimization_methods.py b/firecode/optimization_methods.py index 74bd6a7..a39f720 100644 --- a/firecode/optimization_methods.py +++ b/firecode/optimization_methods.py @@ -34,6 +34,7 @@ from firecode.ase_manipulations import ase_popt, ase_popt_with_alpb from firecode.calculators._xtb import xtb_opt from firecode.settings import DEFAULT_LEVELS +from firecode.solvents import epsilon_dict, to_xtb_solvents from firecode.typing_ import Array1D_float, Array1D_str, Array2D_float, Array3D_float, MaybeNone from firecode.utils import loadbar, molecule_check, scramble_check @@ -120,6 +121,7 @@ def load_aimnet2_calc( def load_tblite_calc(self, method: str | None, solvent: str | None) -> ASECalculator: try: from tblite.ase import TBLite + from tblite.exceptions import TBLiteValueError except ImportError as e: print(e) @@ -131,7 +133,6 @@ def load_tblite_calc(self, method: str | None, solvent: str | None) -> ASECalcul ) ) - solvation = None if solvent is None else ("alpb", solvent) method = method or DEFAULT_LEVELS["TBLITE"] # tblite is picky with names @@ -142,7 +143,30 @@ def load_tblite_calc(self, method: str | None, solvent: str | None) -> ASECalcul } method = synonyms.get(method, method) - self.ase_calc = TBLite(method=method, solvation=solvation) + + if solvent is None: + self.ase_calc = TBLite(method=method) + return self.ase_calc + + try: + if solvent in epsilon_dict: + epsilon = epsilon_dict[solvent] + + # add ALPB solvation via solvent epsilon + self.ase_calc = TBLite(method=method, solvation=("alpb", epsilon)) + + else: + # translate if needed + xtb_solvent_name = to_xtb_solvents.get(solvent, solvent) + + # add ALPB solvation via solvent name + self.ase_calc = TBLite(method=method, solvation=("alpb", xtb_solvent_name)) + + except TBLiteValueError: + print( + "--> WARNING: TBLITE was not able to set up ALPB solvation correctly. Defaulted to vacuum." + ) + self.ase_calc = TBLite(method=method) return self.ase_calc diff --git a/firecode/pka.py b/firecode/pka.py index 5368bb9..0c5bf15 100644 --- a/firecode/pka.py +++ b/firecode/pka.py @@ -28,8 +28,8 @@ from prism_pruner.algebra import normalize from prism_pruner.graph_manipulations import graphize -from firecode.ase_manipulations import ase_vib from firecode.optimization_methods import optimize, refine_structures +from firecode.thermochemistry import ase_vib from firecode.typing_ import Array1D_float, Array1D_str, Array2D_float, Array3D_float from firecode.utils import charge_to_str, loadbar, write_xyz @@ -334,7 +334,8 @@ def get_free_energies( ), charge=charge, mult=mult, - temp=embedder.options.T, + T_K=embedder.options.T, + C_mol_L=1.0, title=f"{title}_conf{s}", return_gcorr=False, ) diff --git a/firecode/solvents.py b/firecode/solvents.py index 5e03d8b..ccd79ce 100644 --- a/firecode/solvents.py +++ b/firecode/solvents.py @@ -20,112 +20,186 @@ """ -import sys - -xtb_solvents = [ - "acetone", - "acetonitrile", - "aniline", - "benzaldehyde", - "benzene", - "ch2cl2", - "chcl3", - "cs2", - "dioxane", - "dmf", - "dmso", - "ether", - "ethylacetate", - "furane", - "hexadecane", - "hexane", - "methanol", - "nitromethane", - "octanol", - "octanolwet", - "phenol", - "toluene", - "thf", - "water", - "none", # This is required for the ASE get_calc function -] - -xtb_solvents = xtb_solvents + ["" for _ in range(3 - len(xtb_solvents) % 3)] -gap = 18 -xtb_supported = "".join( - [ - ( - f"{xtb_solvents[i]}{' ' * (gap - len(xtb_solvents[i]))}" - f"{xtb_solvents[i + 1]}{' ' * (gap - len(xtb_solvents[i + 1]))}" - f"{xtb_solvents[i + 2]}\n" - ) - for i, _ in enumerate(xtb_solvents) - if i % 3 == 0 - ] -) - -epsilon_dict = { - "aceticacid": 6.15, - "acetone": 20.7, - "acetonitrile": 37.5, - "aniline": 7.06, - "benzaldehyde": 17.9, - "benzene": 2.28, - "chloroform": 4.8, - "cs2": 2.63, - "ch2cl2": 8.93, - "dioxane": 2.25, - "dmf": 36.71, - "dmso": 46.68, - "et2o": 4.27, - "dimethylether": 6.18, - "ethanol": 24.3, - "methanol": 32.63, - "ethylacetate": 6.02, - "furan": 2.94, - "hexadecane": 2.05, - "octanol": 10.30, - "phenol": 12.4, - "toluene": 2.38, - "thf": 7.58, - "water": 80.1, +from firecode.units import AVOGADRO_NA, A3_TO_mL + +to_xtb_solvents = { + "acetone": "acetone", + "mecn": "acetonitrile", + "aniline": "aniline", + "benzaldehyde": "benzaldehyde", + "benzene": "benzene", + "ch2cl2": "ch2cl2", + "chcl3": "chcl3", + "cs2": "cs2", + "dioxane": "dioxane", + "dmf": "dmf", + "dmso": "dmso", + "et2o": "ether", + "ethylacetate": "ethylacetate", + "furane": "furane", + "hexadecane": "hexadecane", + "hexane": "hexane", + "methanol": "methanol", + "meno2": "nitromethane", + "octanol": "octanol", + "octanolwet": "octanolwet", + "phenol": "phenol", + "toluene": "toluene", + "thf": "thf", + "h2o": "water", } +# convention: from long to short, from unusual to short solvent_synonyms = { - "ch3cooh": "aceticacid", - "ch3cn": "acetonitrile", - "chcl3": "chloroform", + "ch3cooh": "acoh", + "aceticacid": "acoh", + "ch3cn": "mecn", + "acetonitrile": "mecn", + "chloroform": "chcl3", "dcm": "ch2cl2", "dichloromethane": "ch2cl2", "carbondisuphide": "cs2", "carbondisulfide": "cs2", "diethylether": "et2o", - "etoh": "ethanol", - "ch3oh": "methanol", - "meoh": "methanol", - "h2o": "water", - "etoac": "ethylacetate", + "ethanol": "etoh", + "ch3oh": "meoh", + "methanol": "meoh", + "water": "h2o", + "ethylacetate": "etoac", + "phme": "toluene", + "phh": "benzene", + "dimethylformamide": "dmf", + "dimethylether": "me2o", + "triethylamine": "et3n", + "nitromethane": "meno2", } -new_theory_level = { - "ORCA": lambda theory_level, solvent: f"! CPCM\n%cpcm\nepsilon {epsilon_dict[solvent]}\nend", - # 'XTB':lambda theory_level, _: '', +# name: {molarity (M), molecular_volume(Γ…^3)} +# see https://organicchemistrydata.org/solvents/ +solvent_data = { + "none": { + "MW": 0.0, # g/mol + "density": 0.0, # g/mL + "molarity": 1.0, # mol/L + "molecular_volume": 1.0, # Γ…^3 + }, + "h2o": { + "molarity": 55.6, + "molecular_volume": 27.944, + "epsilon": 80.1, + }, + "toluene": { + "molarity": 9.4, + "molecular_volume": 149.070, + "epsilon": 2.38, + }, + "dmf": { + "molarity": 12.9, + "molecular_volume": 77.442, + "epsilon": 36.71, + }, + "acoh": { + "molarity": 17.4, + "molecular_volume": 86.10, + "epsilon": 6.15, + }, + "chcl3": { + "molarity": 12.5, + "molecular_volume": 97.0, + "epsilon": 4.8, + }, + "acetone": { + "MW": 58.08, + "density": 0.7845, + "epsilon": 20.7, + }, + "mecn": { + "MW": 41.052, + "density": 0.7857, + "epsilon": 37.5, + }, + "benzene": { + "MW": 78.11, + "density": 0.8765, + "epsilon": 2.28, + }, + "cs2": { + "MW": 76.13, + "density": 1.266, + "epsilon": 2.63, + }, + "dioxane": { + "MW": 88.11, + "density": 1.033, + "epsilon": 2.25, + }, + "dmso": { + "MW": 78.13, + "density": 1.092, + "epsilon": 46.68, + }, + "et2o": { + "MW": 74.12, + "density": 0.713, + "epsilon": 4.27, + }, + "me2o": { + "MW": 46.07, + "density": 0.735, + "epsilon": 6.18, + }, + "etoh": { + "MW": 46.07, + "density": 0.789, + "epsilon": 24.3, + }, + "meoh": { + "MW": 32.04, + "density": 0.791, + "epsilon": 32.63, + }, + "etoac": { + "MW": 88.11, + "density": 0.895, + "epsilon": 6.02, + }, + "thf": { + "MW": 72.106, + "density": 0.8833, + "epsilon": 7.58, + }, + "mtbe": { + "MW": 88.15, + "density": 0.741, + }, + "phcf3": { + "MW": 146.11, + "density": 1.19, + "epsilon": 9.18, + }, + "et3n": { + "MW": 101.19, + "density": 0.7255, + "epsilon": 2.4, + }, + "ch2cl2": { + "MW": 84.93, + "density": 1.33, + "epsilon": 8.93, + }, } +# estimate molarity or molecular_volume from +# MW and density if we are missing expt. data +for data_dict in solvent_data.values(): + if data_dict.get("molarity") is None: + data_dict["molarity"] = 1000 / (data_dict["density"] * data_dict["MW"]) -def get_solvent_line(solvent: str, calculator: str, theory_level: str) -> str: - """Get the solvent line for a specific external calculator.""" - if solvent is None: - return "" - - if solvent in solvent_synonyms: - solvent = solvent_synonyms[solvent] - - if solvent not in epsilon_dict: - print(f"Solvent '{solvent}' not recognized. Implemented solvents are:") - for s in epsilon_dict: - print(" " + s) - print("Please note that not all solvents will work with all calculators.") - sys.exit(1) + if data_dict.get("molecular_volume") is None: + data_dict["molecular_volume"] = ( + data_dict["MW"] / data_dict["density"] / AVOGADRO_NA / A3_TO_mL + ) - return new_theory_level[calculator](theory_level, solvent) +epsilon_dict = { + solvent: data["epsilon"] for solvent, data in solvent_data.items() if "epsilon" in data +} diff --git a/firecode/standalone_optimizer.py b/firecode/standalone_optimizer.py index 2a70833..7c0aee5 100644 --- a/firecode/standalone_optimizer.py +++ b/firecode/standalone_optimizer.py @@ -320,7 +320,7 @@ def standalone_optimize(optimizer: OptimizerOptions) -> None: """ if optimizer.free_energy: print("--> Requested free energy calculation - performing vibrational analysis") - from firecode.ase_manipulations import ase_vib + from firecode.thermochemistry import ase_vib if optimizer.sp: print("--> Single point calculation requested (no optimization)") @@ -431,7 +431,8 @@ def standalone_optimize(optimizer: OptimizerOptions) -> None: ase_calc=optimizer.ase_calc, charge=charge, mult=mult, - temp=optimizer.T, + T_K=optimizer.T, + solvent=optimizer.solvent, title=f"{name[:-4]}", ) diff --git a/firecode/tests/C2H4.xyz b/firecode/tests/C2H4.xyz index 0666259..a00214c 100644 --- a/firecode/tests/C2H4.xyz +++ b/firecode/tests/C2H4.xyz @@ -1,8 +1,8 @@ 6 -Title -C -1.08800 -0.88350 -0.00000 -H -0.55280 -1.81000 -0.00000 -H -2.15800 -0.88370 0.00000 -C -0.41070 0.29030 0.00000 -H -0.94590 1.21690 -0.00000 -H 0.65930 0.29060 0.00000 +Energy = -601.410936641382 kcal/mol +C -1.077364 -0.865009 -0.000000 +H -0.569017 -1.809963 0.000000 +H -2.149881 -0.897752 0.000000 +C -0.421333 0.271877 0.000000 +H -0.929692 1.216830 -0.000000 +H 0.651186 0.304617 -0.000000 diff --git a/firecode/tests/test_suite.py b/firecode/tests/test_suite.py index 36b78aa..f82c743 100644 --- a/firecode/tests/test_suite.py +++ b/firecode/tests/test_suite.py @@ -1,12 +1,13 @@ """Tests for FIRECODE.""" +import json from pathlib import Path import pytest from firecode.embedder import Embedder from firecode.optimization_methods import Opt_func_dispatcher -from firecode.utils import FolderContext, HiddenPrints, clean_directory, read_xyz +from firecode.utils import FolderContext, HiddenPrints, NewFolderContext, clean_directory, read_xyz HERE = Path(__file__).resolve().parent @@ -64,7 +65,7 @@ def run_firecode_input(name: str) -> None: clean_directory( to_remove_startswith=["firecode"], - to_remove_endswith=[".log", ".out", ".svg"], + to_remove_endswith=[".log", ".out", ".svg", ".json"], to_remove_contains=[ "clockwise", "_confs", @@ -160,3 +161,51 @@ def test_operator_fsm() -> None: def test_operator_pka() -> None: """Tests the pka operator.""" run_firecode_input("operator_pka") + + +@pytest.mark.codecov +def test_solvent_names() -> None: + """Tests the solvent names.""" + from firecode.solvents import epsilon_dict, solvent_data, solvent_synonyms, to_xtb_solvents + + for solvent in solvent_synonyms: + if solvent in solvent_data: + corrected = solvent_synonyms[solvent] + raise KeyError(f'"{solvent}" in solvent_data should be named "{corrected}"') + + err = [] + for translated_solvent in solvent_synonyms.values(): + if translated_solvent not in to_xtb_solvents: + if translated_solvent not in epsilon_dict: + err.append( + f'Solvent "{translated_solvent}" has no epsilon value defined in solvent_data: ' + "it should then be a key in to_xtb_solvents." + ) + + if err: + raise Exception("\n".join(err)) + + +@pytest.mark.codecov +def test_vib_analysis() -> None: + """Tests the ase_vib function.""" + from firecode.thermochemistry import ase_vib + + mol = read_xyz(str(HERE / "C2H4.xyz")) + solvent = "toluene" + ase_calc = Opt_func_dispatcher("XTB").get_ase_calc("GFN-FF", solvent) + with NewFolderContext(str(HERE / "temp_C2H4_vib")): + _ = ase_vib( + atoms=mol.atoms, + coords=mol.coords[0], + ase_calc=ase_calc, + charge=0, + mult=1, + solvent=solvent, + ) + + with open(str(HERE / "temp_C2H4_vib/temp_thermo.json"), "rb") as f: + thermo_dict = json.loads(f.read()) + + assert all([f >= 0 for f in thermo_dict["freqs_cm1"]]) + assert len(thermo_dict["freqs_cm1"]) == len(mol.atoms) * 3 diff --git a/firecode/thermochemistry.py b/firecode/thermochemistry.py new file mode 100644 index 0000000..71f6217 --- /dev/null +++ b/firecode/thermochemistry.py @@ -0,0 +1,535 @@ +# coding=utf-8 +"""FIRECODE: Filtering Refiner and Embedder for Conformationally Dense Ensembles +Copyright (C) 2021-2026 NicolΓ² Tampellini + +SPDX-License-Identifier: LGPL-3.0-or-later + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with this program. If not, see +https://www.gnu.org/licenses/lgpl-3.0.en.html#license-text. + +""" + +from __future__ import annotations + +import json +import math +import os +from shutil import rmtree +from time import perf_counter +from typing import TYPE_CHECKING, Any, Optional, cast + +import numpy as np +from ase import Atoms +from ase.optimize import LBFGS +from ase.vibrations import Vibrations +from prettytable import PrettyTable +from prism_pruner.utils import time_to_string + +from firecode.ase_manipulations import set_charge_and_mult_on_ase_atoms +from firecode.solvents import solvent_data, solvent_synonyms +from firecode.typing_ import Array1D_float, Array1D_str, Array2D_float +from firecode.units import ( + AVOGADRO_NA, + EH_TO_EV, + EH_TO_KCAL, + EV_TO_WAVENUMS, + KB__J_K, + A3_TO_mL, + AMU__kg, + ANGSTROEM_TO_m, + C__cm_s, + KB__eV_K, + PLANCK_h__J_s, + theta_per_cm1_K, +) +from firecode.utils import HiddenPrints + +if TYPE_CHECKING: + from ase.calculators.calculator import Calculator as ASECalculator + +# Frequencies below this are considered belonging to proper +# transition states and excluded from thermochemical analysis, +# while the (_TS_THR_CM_1 < v < 0) range will be treated as positive. +_TS_THR_CM_1: float = -50 + + +def _free_space_mL_per_L(solvent: str | None = None) -> float: + """Return accessible free space (mL per L) for a solute in bulk solvent. + Based on Shakhnovich & Whitesides (J. Org. Chem. 1998, 63, 3821) and + the GoodVibes implementation. + + """ + if not solvent: + return 1000.0 + + # standardize solvent name + solvent = solvent_synonyms.get(solvent.lower(), solvent.lower()) + + if solvent not in solvent_data: + raise NotImplementedError( + f'Unknown "{solvent}" passed as solvent. Currently ' + f"parametrized solvents for quasi-RRHO: {list(solvent_data.keys())}" + ) + + molarity = solvent_data[solvent]["molarity"] + mol_volume = solvent_data[solvent]["molecular_volume"] + + # v_free (Γ…^3 per molecule) for accessible volume + v_free = ( + 8.0 * ((1e27 / (molarity * AVOGADRO_NA)) ** (1.0 / 3.0) - mol_volume ** (1.0 / 3.0)) ** 3 + ) + + # Convert to mL free space per liter of bulk solvent + freespace_mL_per_L = v_free * molarity * AVOGADRO_NA * A3_TO_mL + + return float(freespace_mL_per_L) + + +def rotational_constants_cm1_from_I(I_amuA2: Array1D_float) -> Array1D_float: + I_SI = I_amuA2 * AMU__kg * (ANGSTROEM_TO_m**2) + B_cm = [] + for I in I_SI: + B_cm.append(0.0 if I <= 0 else PLANCK_h__J_s / (8.0 * math.pi**2 * C__cm_s * I)) + return np.array([B_cm[0], B_cm[1], B_cm[2]]) + + +def classify_geometry(I_amuA2: Array1D_float) -> str: + """Return one of: 'atom', 'linear', 'nonlinear' + - 'atom' if all principal moments are (near) zero + - 'linear' if one moment is ~0 but not all (diatomics, etc.) + - 'nonlinear' otherwise + """ + abs_I_amuA2 = np.abs(I_amuA2) + imax = float(np.max(abs_I_amuA2)) + # Treat as atom if all moments are essentially zero + if imax < 1e-12: + return "atom" + if (float(np.min(abs_I_amuA2)) / imax) < 1e-6: + return "linear" + return "nonlinear" + + +def rrho_thermo( + atoms: Atoms, + freqs_cm1: Array1D_float, + T_K: float = 298.15, + P_atm: float = 1.0, + symmetry_number: int = 1, + E_el_Eh: float = 0.0, + mult: int = 1, + qrrho: bool = True, + cutoff_cm1: Optional[float] = None, + qrrho_ref_cm1: float = 100.0, + qrrho_alpha: float = 4.0, + conc_mol_L: float | None = None, + solv: Optional[str] = None, +) -> dict[str, Any]: + # Remove 3/5/6 zero modes + I = atoms.get_moments_of_inertia() # type:ignore[no-untyped-call] + + match geom := classify_geometry(I): + case "atom": + start = 3 + case "linear": + start = 5 + case _: # 'nonlinear' + start = 6 + + vib_cm_all = [] + for i, f in enumerate(freqs_cm1): + if i < start: + continue + + # genuine TS imaginary mode β€” exclude entirely + if f < _TS_THR_CM_1: + continue + + # small negative = numerical noise β€” treat as positive + if abs(f) > 1e-3: + vib_cm_all.append(abs(f)) + + assert len(vib_cm_all) == len(freqs_cm1) - start, ( + f"Frequency mismatch: expected {len(freqs_cm1)}, read {len(vib_cm_all)}" + ) + + if cutoff_cm1 is None: + cutoff_cm1 = 1.0 if qrrho else 35.0 + vib_cm = [f for f in vib_cm_all if f > cutoff_cm1] + + mass_amu = float(np.sum(atoms.get_masses())) + mass_kg = mass_amu * AMU__kg + + B_A, B_B, B_C = rotational_constants_cm1_from_I(I) + theta = [theta_per_cm1_K * b for b in (abs(B_A), abs(B_B), abs(B_C))] + theta = [t if t > 0 else 1e-30 for t in theta] + + # vib_e = [f / EV_TO_WAVENUMS for f in vib_cm] + # ZPE_eV = 0.5 * sum(vib_e) # GoodVibes approximation: all modes contribute equally + + # In Grimme's original formulation (Angew. Chem. 2012), the ZPE should technically + # be interpolated as well β€” a free rotor has no zero-point energy, so the + # contribution should be (w Γ— 0.5 * hΞ½) per mode. The strict expression is: + + # ZPE = Ξ£ w_i Γ— (Β½hΞ½_i) + + # Added to vib_cm loop, see below. + + # Rot/trans energies (unchanged) + Erot_eV = 0.0 if geom == "atom" else ((1.0 if geom == "linear" else 1.5) * KB__eV_K * T_K) + Etrans_eV = 1.5 * KB__eV_K * T_K + + # ---------- Translational entropy: pressure OR concentration ---------- + # (Sackur-Tetrode) + # S/k = ln( (2Ο€ m kT)^{3/2} / (h^3 n) ) + 5/2 where n is number density [1/m^3]. + # - Gas phase: n = P / (kT) + # - Solution: n = (conc [mol/L]) * 1000 [L/m^3] * Na / (free_space_fraction), + # with free-space from Shakhnovich–Whitesides. + lambda_factor = ((2.0 * math.pi * mass_kg * KB__J_K * T_K) ** 1.5) / (PLANCK_h__J_s**3) + + if conc_mol_L is not None: + free_mL_per_L = _free_space_mL_per_L(solv) + # free-space fraction in a liter: + free_frac = max(free_mL_per_L / 1000.0, 1e-9) # avoid zero + number_density = conc_mol_L * 1000.0 * AVOGADRO_NA / free_frac # 1/m^3 + else: + P_Pa = P_atm * 101325.0 + number_density = P_Pa / (KB__J_K * T_K) + + S_trans_over_k = math.log(lambda_factor / number_density) + 2.5 + TS_trans_eV = KB__eV_K * T_K * S_trans_over_k + + # ---------- Rotational entropy ---------- + if geom == "atom": + S_rot_over_k = 0.0 + elif geom == "linear": + theta_rot = theta[1] + S_rot_over_k = math.log(T_K / (symmetry_number * theta_rot)) + 1.0 + + # adding Herzberg linear correction + S_rot_over_k += math.log(1.0 + theta_rot / (3.0 * T_K)) + else: + prod_theta = theta[0] * theta[1] * theta[2] + + # classical term + S_rot_over_k = ( + math.log(math.sqrt(math.pi) * (T_K**1.5) / (symmetry_number * math.sqrt(prod_theta))) + + 1.5 + ) + + # Herzberg formula: Add Euler-Maclaurin correction to ln(q_rot): + euler_maclaurin = (theta[0] + theta[1] + theta[2]) / (12.0 * T_K) + S_rot_over_k += math.log(1.0 + euler_maclaurin) + + TS_rot_eV = KB__eV_K * T_K * S_rot_over_k + + # ---------- Vibrational entropy + thermal vibrational energy ---------- + I_SI = I * AMU__kg * (ANGSTROEM_TO_m**2) + I_av = float(np.mean(I_SI)) if np.any(I_SI > 0) else 1e-46 + + ZPE_eV = 0.0 + S_vib_over_k = 0.0 + Evib_corr_eV = 0.0 + + for f_cm in vib_cm: + e_eV = f_cm / EV_TO_WAVENUMS + x = e_eV / (KB__eV_K * T_K) if T_K > 0 else float("inf") + + if T_K > 0: + S_HO_over_k = (x / math.expm1(x)) - math.log1p(-math.exp(-x)) + E_th_HO_eV = e_eV / math.expm1(x) + else: + S_HO_over_k = 0.0 + E_th_HO_eV = 0.0 + + if not qrrho: + # full weight + ZPE_eV += 0.5 * e_eV + + S_vib_over_k += S_HO_over_k + Evib_corr_eV += E_th_HO_eV + continue + + # qRRHO + w = 1.0 / (1.0 + (qrrho_ref_cm1 / f_cm) ** qrrho_alpha) + + nu_Hz = C__cm_s * f_cm + muK = PLANCK_h__J_s / (8.0 * math.pi**2 * nu_Hz) + muEff = (muK * I_av) / (muK + I_av) + + S_FR_over_k = 0.5 + 0.5 * math.log( + (8.0 * math.pi**2 * muEff * KB__J_K * T_K) / (PLANCK_h__J_s**2) + ) + + # qRRHO: ZPE interpolated, free rotor contributes no ZPE + ZPE_eV += w * (0.5 * e_eV) + S_vib_over_k += w * S_HO_over_k + (1.0 - w) * S_FR_over_k + Evib_corr_eV += w * E_th_HO_eV + (1.0 - w) * (0.5 * KB__eV_K * T_K) + + # Ignoring electronic entropy: beyond the + # scope of this implementation + TS_el_eV = 0.0 + + E_el_eV = E_el_Eh * EH_TO_EV + + U_total_eV = E_el_eV + ZPE_eV + Evib_corr_eV + Erot_eV + Etrans_eV + Hcorr_eV = KB__eV_K * T_K + H_total_eV = U_total_eV + Hcorr_eV + TS_vib_eV = KB__eV_K * T_K * S_vib_over_k + TS_tot_eV = TS_el_eV + TS_vib_eV + TS_rot_eV + TS_trans_eV + G_total_eV = H_total_eV - TS_tot_eV + G_minus_Eel_eV = G_total_eV - E_el_eV + + # ---- conversions (unchanged) ---- + ZPE_Eh = ZPE_eV / EH_TO_EV + Evib_corr_Eh = Evib_corr_eV / EH_TO_EV + Erot_Eh = Erot_eV / EH_TO_EV + Etrans_Eh = Etrans_eV / EH_TO_EV + U_total_Eh = U_total_eV / EH_TO_EV + H_total_Eh = H_total_eV / EH_TO_EV + Hcorr_Eh = Hcorr_eV / EH_TO_EV + TS_el_Eh = TS_el_eV / EH_TO_EV + TS_vib_Eh = TS_vib_eV / EH_TO_EV + TS_rot_Eh = TS_rot_eV / EH_TO_EV + TS_trans_Eh = TS_trans_eV / EH_TO_EV + G_total_Eh = G_total_eV / EH_TO_EV + G_minus_Eel_Eh = G_minus_Eel_eV / EH_TO_EV + + # symmetry sweep + rot_table = [] + for sn in range(1, 13): + if geom == "atom": + TS_rot_sn_Eh = 0.0 + elif geom == "linear": + S_rot_sn = math.log(T_K / (sn * (theta[1]))) + 1.0 + S_rot_sn += math.log(1.0 + theta_rot / (3.0 * T_K)) # adding Herzberg linear correction + TS_rot_sn_Eh = (KB__eV_K * T_K * S_rot_sn) / EH_TO_EV + else: + prod_theta = theta[0] * theta[1] * theta[2] + S_rot_sn = ( + math.log(math.sqrt(math.pi) * (T_K**1.5) / (sn * math.sqrt(prod_theta))) + 1.5 + ) + S_rot_sn += math.log(1.0 + euler_maclaurin) # adding Herzberg linear correction + TS_rot_sn_Eh = (KB__eV_K * T_K * S_rot_sn) / EH_TO_EV + rot_table.append((sn, TS_rot_sn_Eh)) + + return dict( + freqs_cm1=[float(f) for f in freqs_cm1], + mass_amu=float(np.sum(atoms.get_masses())), + rotconsts_cm1=(B_A, B_B, B_C), + ZPE_Eh=ZPE_Eh, + Evib_corr_Eh=Evib_corr_Eh, + Erot_Eh=Erot_Eh, + Etrans_Eh=Etrans_Eh, + U_total_Eh=U_total_Eh, + H_total_Eh=H_total_Eh, + Hcorr_Eh=Hcorr_Eh, + TS_el_Eh=TS_el_Eh, + TS_vib_Eh=TS_vib_Eh, + TS_rot_Eh=TS_rot_Eh, + TS_trans_Eh=TS_trans_Eh, + G_total_Eh=G_total_Eh, + G_minus_Eel_Eh=G_minus_Eel_Eh, + rot_table_Eh=rot_table, + qrrho=qrrho, + cutoff_cm1=float(cutoff_cm1), + qrrho_ref_cm1=float(qrrho_ref_cm1) if qrrho else None, + qrrho_alpha=float(qrrho_alpha) if qrrho else None, + conc_mol_L=conc_mol_L, + solv=(solv or "none"), + mult=int(mult), + ) + + +def ase_vib( + atoms: Array1D_str, + coords: Array2D_float, + ase_calc: ASECalculator, + charge: int, + mult: int, + T_K: float = 298.15, + P_atm: float = 1.0, + C_mol_L: float | None = None, + solvent: str | None = None, + title: str = "temp", + return_gcorr: bool = True, + write_log: bool = True, +) -> float: + """returns: G(corr) or Free energy, in kcal/mol.""" + ase_atoms = Atoms(atoms, positions=coords) + ase_atoms = set_charge_and_mult_on_ase_atoms(ase_atoms, charge=charge, mult=mult) + + ase_atoms.calc = ase_calc + + vib = Vibrations(ase_atoms, delta=0.005) # type: ignore[no-untyped-call] + + with open(f"vib_{title}.out", "w", encoding="utf-8") as f: + f.write("--> FIRECODE ASE Frequency calculation report\n") + f.write(f"charge={charge}, mult={mult}, C={C_mol_L} mol/L, P={P_atm} atm\n") + f.write( + f"Concentration {'not ' if C_mol_L is None else ''}provided: reference " + f"state used is {'gas ' if C_mol_L is None else 'solution'} phase.\n\n" + ) + + # tighten convergence + t_start = perf_counter() + f.write("--> Tightening geom. opt. convergence to fmax=1E-4\n") + opt = LBFGS(ase_atoms, maxstep=0.01) + with HiddenPrints(): + opt.run(fmax=1e-4) # type: ignore[no-untyped-call] + energy = ase_atoms.get_potential_energy() # type: ignore[no-untyped-call] + f.write( + f"Structure optimized to fmax=1E-4 in {time_to_string(perf_counter() - t_start)}\n\n" + ) + + # remove cache folder + if "vib" in os.listdir(): + rmtree(os.path.join(os.getcwd(), "vib")) + + # run vibrational analysis + t_start = perf_counter() + vib.run() # type: ignore[no-untyped-call] + f.write( + f"Vibrational frequencies calculated in {time_to_string(perf_counter() - t_start)}\n\n" + ) + + # get energies (frequencies) and print summary + freqs_complex = vib.get_energies() * EV_TO_WAVENUMS # type: ignore[no-untyped-call] + + # clamp small negatives and sort freqs + freqs = cleanup_freqs(ase_atoms, freqs_complex) + + table = PrettyTable() + table.field_names = ["Mode", "Freq. (cm^-1)"] + for i, freq in enumerate(freqs): + table.add_row([i, round(freq, 1)]) + + f.write("\n" + table.get_string() + "\n") + + # run quasi-RRHO + thermo_dict = rrho_thermo( + atoms=ase_atoms, + freqs_cm1=freqs, + T_K=T_K, + P_atm=P_atm, + symmetry_number=1, # detect_symmetry_number(atoms)? + E_el_Eh=energy / EH_TO_EV, + mult=1, + conc_mol_L=C_mol_L, + solv=solvent, + ) + + # save the raw thermo data as .json + with open(f"{title}_thermo.json", "w") as ff: + ff.write(json.dumps(thermo_dict, indent=4)) + + EE = energy / EH_TO_EV # was eV, now Eh + G = thermo_dict["G_total_Eh"] + Gcorr = thermo_dict["G_minus_Eel_Eh"] + H = thermo_dict["H_total_Eh"] + S = (H - G) / T_K # Eh/K + + f.write("\n--> What follows mocks an ORCA output for scraping:\n\n") + f.write(f"Number of atoms ... {len(atoms)}\n") + f.write(f"Temperature ...: {T_K:.2f} K ({T_K - 273.15:.2f} Β°C)\n\n") + + f.write("VIBRATIONAL FREQUENCIES\n") + f.write("-------------------------------------\n") + for i, freq in enumerate(freqs): + f.write(f" {i:>4}: {freq:4.2f} cm**-1\n") + + f.write(f"\nFINAL SINGLE POINT ENERGY {EE:.8f} Eh\n") + f.write(f"FINAL GIBBS FREE ENERGY {G:.8f} Eh\n") + f.write(f"G-E(el) ... {Gcorr:.8f} Eh {Gcorr * EH_TO_KCAL:.2f} kcal/mol\n") + f.write(f"Total enthalpy ... {H:.8f} Eh\n") + f.write(f"Final entropy term ... {S:.8f} Eh/K\n") + f.write("\n\n*** ORCA TERMINATED NORMALLY ***\n") + + del vib + if os.path.isdir("vib"): + rmtree("vib") + + if not write_log: + os.remove(f"vib_{title}.out") + + if return_gcorr: + return cast("float", Gcorr * EH_TO_KCAL) + return cast("float", G * EH_TO_KCAL) + + +def cleanup_freqs( + atoms: Atoms, + freqs_complex: list[complex], + scaling_factor: float = 1.0, +) -> Array1D_float: + """Return sorted and cleaned up frequencies.""" + # Geometry class -> rigid-body count + I = atoms.get_moments_of_inertia() # type:ignore[no-untyped-call] + + match classify_geometry(I): # "atom", "linear" or "nonlinear" + case "atom": + zero_first = 3 + case "linear": + zero_first = 5 + case _: # "nonlinear" + zero_first = 6 + + freqs_cm = [] + for f in np.asarray(freqs_complex): + if np.iscomplexobj(f) and abs(f.imag) > 1e-8: + freqs_cm.append(-float(abs(f.imag))) # imaginary -> negative + else: + freqs_cm.append(float(np.real(f))) + freqs = np.array(freqs_cm, dtype=float) + + nmode = len(freqs) + idx_all = np.arange(nmode) + + # Identify the most negative as "imag" if any + idx_imag = int(np.argmin(freqs)) if np.any(freqs < 0.0) else None + + # See if one looks like a real TS imgainary freq + ts = freqs[idx_imag] < _TS_THR_CM_1 if idx_imag is not None else False + + # Choose rigid-body indices as those with the smallest |f|, excluding the chosen imag + idx_sorted_abs = np.argsort(np.abs(freqs)) + rigid = [i for i in idx_sorted_abs if i != idx_imag][:zero_first] + + # Build permutation: + # rigid zeros, then (if TS) the imaginary, then the rest by increasing |f| + perm = [] + perm.extend(rigid) + if ts: + perm.append(idx_imag) + placed = set(perm) + rest = [i for i in idx_all if i not in placed] + rest_sorted = sorted(rest, key=lambda i: abs(freqs[i])) + perm.extend(rest_sorted) + + # Apply permutation to freqs and modes + freqs = freqs[perm] + # modes_mw = modes_mw[:, perm] + + # Post-facto cleanup: + # - force the first zero_first to exactly 0.0 + # - clamp tiny |f| < |_TS_THR_CM_1| to +abs(f) for the rest + freqs[:zero_first] = 0.0 + for i in range(zero_first, nmode): + if abs(freqs[i]) < abs(_TS_THR_CM_1): + freqs[i] = abs(freqs[i]) + + # Scale if needed + if scaling_factor != 1.0: + freqs *= scaling_factor + + return freqs diff --git a/firecode/units.py b/firecode/units.py index cee7076..6136909 100644 --- a/firecode/units.py +++ b/firecode/units.py @@ -26,3 +26,17 @@ EV_TO_KCAL = 23.060541945329334 EV_TO_WAVENUMS = 8065.544 R = 0.001985877534 # kcal/(mol*K) +AVOGADRO_NA = 6.02214076e23 + +KB__J_K = 1.380649e-23 +PLANCK_h__J_s = 6.62607015e-34 +C__m_s = 2.99792458e8 +C__cm_s = 2.99792458e10 +AMU__kg = 1.66053906660e-27 +ANGSTROEM_TO_m = 1e-10 +A3_TO_mL = 1e-24 + +# Conversions +J_TO_EV = 1.602176634e-19 +KB__eV_K = 8.617333262145e-5 # eV/K +theta_per_cm1_K = 1.438776877 # theta (K) = 1.438776877 * wavenumber(cm^-1) diff --git a/firecode/vib_analysis.py b/firecode/vib_analysis.py new file mode 100644 index 0000000..e69de29 From b38fa45b28efca0c1b1f762d0bbb70e3768bc60a Mon Sep 17 00:00:00 2001 From: ntampellini Date: Fri, 13 Mar 2026 12:12:40 -0400 Subject: [PATCH 2/4] expanded orca.out mock, xtb solvent names fix --- firecode/calculators/_xtb.py | 128 ++++++++++++++++------------------- firecode/thermochemistry.py | 17 +++-- firecode/utils.py | 7 +- 3 files changed, 76 insertions(+), 76 deletions(-) diff --git a/firecode/calculators/_xtb.py b/firecode/calculators/_xtb.py index 8a1bbc5..1432004 100644 --- a/firecode/calculators/_xtb.py +++ b/firecode/calculators/_xtb.py @@ -30,6 +30,7 @@ from prism_pruner.algebra import normalize from firecode.graph_manipulations import get_sum_graph +from firecode.solvents import to_xtb_solvents from firecode.typing_ import Array1D_str, Array2D_float, Array3D_float from firecode.units import EH_TO_KCAL from firecode.utils import NewFolderContext, clean_directory, read_xyz, write_xyz @@ -255,11 +256,11 @@ def xtb_opt( flags += f" -P {procs}" if solvent is not None: - if solvent == "methanol": + if solvent == "meoh": flags += " --gbsa methanol" else: - flags += f" --alpb {solvent}" + flags += f" --alpb {to_xtb_solvents.get(solvent, solvent)}" elif method.upper() in ("GFN-FF", "GFNFF"): flags += " --alpb ch2cl2" @@ -301,20 +302,18 @@ def xtb_opt( energy = energy_grepper(f"{title}.out", "TOTAL ENERGY", 3) # clean_directory((f'{title}.inp', f'{title}.xyz', f"{title}.out", trajname, outname)) - for filename in ( - "gfnff_topo", - "charges", - "wbo", - "xtbrestart", - "xtbtopo.mol", - ".xtboptok", - "gfnff_adjacency", - "gfnff_charges", - ): - try: - os.remove(filename) - except FileNotFoundError: - pass + clean_directory( + to_remove=( + "gfnff_topo", + "charges", + "wbo", + "xtbrestart", + "xtbtopo.mol", + ".xtboptok", + "gfnff_adjacency", + "gfnff_charges", + ), + ) return coords, energy, True @@ -452,7 +451,7 @@ def xtb_get_free_energy( flags += " --gbsa methanol" else: - flags += f" --alpb {solvent}" + flags += f" --alpb {to_xtb_solvents.get(solvent, solvent)}" try: with open("temp_hess.log", "w") as outfile: @@ -477,26 +476,23 @@ def xtb_get_free_energy( os.system(f"cat {outfile}") raise e - clean_directory() - for filename in ( - "gfnff_topo", - "charges", - "wbo", - "xtbrestart", - "xtbtopo.mol", - ".xtboptok", - "hessian", - "g98.out", - "vibspectrum", - "wbo", - "xtbhess.xyz", - "charges", - "temp_hess.log", - ): - try: - os.remove(filename) - except FileNotFoundError: - pass + clean_directory( + to_remove=( + "gfnff_topo", + "charges", + "wbo", + "xtbrestart", + "xtbtopo.mol", + ".xtboptok", + "hessian", + "g98.out", + "vibspectrum", + "wbo", + "xtbhess.xyz", + "charges", + "temp_hess.log", + ), + ) return result @@ -682,7 +678,7 @@ def crest_mtd_search( flags += " --gbsa methanol" else: - flags += f" --alpb {solvent}" + flags += f" --alpb {to_xtb_solvents.get(solvent, solvent)}" if kcal is None: kcal = 10 @@ -706,20 +702,18 @@ def crest_mtd_search( # clean_directory((f'{title}.inp', f'{title}.xyz', f"{title}.out")) - for filename in ( - "gfnff_topo", - "charges", - "wbo", - "xtbrestart", - "xtbtopo.mol", - ".xtboptok", - "gfnff_adjacency", - "gfnff_charges", - ): - try: - os.remove(filename) - except FileNotFoundError: - pass + clean_directory( + to_remove=( + "gfnff_topo", + "charges", + "wbo", + "xtbrestart", + "xtbtopo.mol", + ".xtboptok", + "gfnff_adjacency", + "gfnff_charges", + ), + ) return new_coords @@ -753,9 +747,9 @@ def xtb_gsolv( flags += f" --chrg {charge}" if mult != 1: - flags += f" --uhf {int(int(mult) - 1)}" + flags += f" --uhf {int(mult - 1)}" - flags += f" --{model} {solvent}" + flags += f" --{model} {to_xtb_solvents.get(solvent, solvent)}" try: with open(f"{title}.out", "w") as f: @@ -775,19 +769,17 @@ def xtb_gsolv( gsolv = energy_grepper(f"{title}.out", "-> Gsolv", 3) clean_directory((f"{title}.inp", f"{title}.xyz", f"{title}.out", outname)) - for filename in ( - "gfnff_topo", - "charges", - "wbo", - "xtbrestart", - "xtbtopo.mol", - ".xtboptok", - "gfnff_adjacency", - "gfnff_charges", - ): - try: - os.remove(filename) - except FileNotFoundError: - pass + clean_directory( + to_remove=( + "gfnff_topo", + "charges", + "wbo", + "xtbrestart", + "xtbtopo.mol", + ".xtboptok", + "gfnff_adjacency", + "gfnff_charges", + ) + ) return gsolv diff --git a/firecode/thermochemistry.py b/firecode/thermochemistry.py index 71f6217..0187172 100644 --- a/firecode/thermochemistry.py +++ b/firecode/thermochemistry.py @@ -441,7 +441,8 @@ def ase_vib( f.write("\n--> What follows mocks an ORCA output for scraping:\n\n") f.write(f"Number of atoms ... {len(atoms)}\n") - f.write(f"Temperature ...: {T_K:.2f} K ({T_K - 273.15:.2f} Β°C)\n\n") + f.write(f"Total Charge ... ... {charge}\n\n") + f.write(f"Temperature ...: {T_K:.2f} K ({T_K - 273.15:.2f} Β°C)\n") f.write("VIBRATIONAL FREQUENCIES\n") f.write("-------------------------------------\n") @@ -450,10 +451,16 @@ def ase_vib( f.write(f"\nFINAL SINGLE POINT ENERGY {EE:.8f} Eh\n") f.write(f"FINAL GIBBS FREE ENERGY {G:.8f} Eh\n") - f.write(f"G-E(el) ... {Gcorr:.8f} Eh {Gcorr * EH_TO_KCAL:.2f} kcal/mol\n") - f.write(f"Total enthalpy ... {H:.8f} Eh\n") - f.write(f"Final entropy term ... {S:.8f} Eh/K\n") - f.write("\n\n*** ORCA TERMINATED NORMALLY ***\n") + f.write(f"G-E(el) ... {Gcorr:.8f} Eh {Gcorr * EH_TO_KCAL:.2f} kcal/mol\n\n") + + kB_times_T = (KB__eV_K / EH_TO_EV) * T_K + f.write(f"Thermal Enthalpy correction ... {kB_times_T:.8f} Eh\n") + f.write(f"Total correction {H - EE - kB_times_T:.8f} Eh\n") + f.write(f"Total enthalpy ... {H:.8f} Eh\n\n") + + f.write(f"Final entropy term ... {S:.8f} Eh/K\n\n") + + f.write("*** ORCA TERMINATED NORMALLY ***\n") del vib if os.path.isdir("vib"): diff --git a/firecode/utils.py b/firecode/utils.py index 5ebba8c..2dcb256 100644 --- a/firecode/utils.py +++ b/firecode/utils.py @@ -759,12 +759,13 @@ def __enter__(self) -> None: os.chdir(self.new_folder_name) - def __exit__(self, *args: object) -> None: + def __exit__(self, exc_type: type, exc_val: Exception, exc_tb: object) -> None: # get out of working folder os.chdir(os.path.dirname(os.getcwd())) - # and eventually delete it - if self.delete_after: + # only delete if instructed to + # and no unhandled exception occurred + if self.delete_after and exc_type is None: shutil.rmtree(self.new_folder_name) From 14545ac06f4edddc67052fee2c19ad5a52d60f81 Mon Sep 17 00:00:00 2001 From: ntampellini Date: Sun, 15 Mar 2026 16:47:13 -0400 Subject: [PATCH 3/4] expanded printout and themo settings --- .gitignore | 1 + CHANGELOG.md | 5 +- firecode/ase_manipulations.py | 3 +- firecode/embedder.py | 202 ++++++++++++------ firecode/embedder_options.py | 21 ++ firecode/pka.py | 48 +---- firecode/solvents.py | 66 +++--- firecode/standalone_optimizer.py | 8 +- .../operator_rdkit_search.txt | 2 +- firecode/tests/test_suite.py | 1 + firecode/thermochemistry.py | 81 ++++++- 11 files changed, 282 insertions(+), 156 deletions(-) diff --git a/.gitignore b/.gitignore index bee573f..84cdf2f 100644 --- a/.gitignore +++ b/.gitignore @@ -7,5 +7,6 @@ firecode.egg-info/ firecode/calculators/uma-s-1p1.pt firecode/calculators/uma-s-1p2.pt +firecode/calculators/esen_sm_conserving_all.pt coverage.xml .coverage diff --git a/CHANGELOG.md b/CHANGELOG.md index 39f4e34..b32ae56 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,8 +8,11 @@ ## FIRECODE 1.7.0 πŸ”₯ (WIP) +- Restructured and expanded solvent module. - Implemented vibrational analysis via ASE including quasi-RRHO thermochemistry (as in GoodVibes) to get free energies. -- Restructured solvent module. +- Added FREQ keyword, that computes free energies at the end of the geometry optimization cycles. +- Added T (temperature, in K), T_C (temperature, in Β°C), P (pressure, in atm), and C (concentration, in mol/L) keywords. + ## FIRECODE 1.6.0 πŸ”₯ (March 11 2026) - Refreshed constraints handling in operators. diff --git a/firecode/ase_manipulations.py b/firecode/ase_manipulations.py index ddf8b47..ae9487f 100644 --- a/firecode/ase_manipulations.py +++ b/firecode/ase_manipulations.py @@ -586,7 +586,8 @@ def ase_neb( opt.run(fmax=0.05, steps=200 + opt.nsteps) energies = [image.get_total_energy() * EV_TO_KCAL for image in images] # type: ignore[no-untyped-call] - print(f"--> Updated temporary MEP at {title}_MEP_temp_pre_CI.xyz") + if verbose_print: + print(f"--> Updated temporary MEP at {title}_MEP_temp_pre_CI.xyz") ase_dump(f"{title}_MEP_temp_pre_CI.xyz", atoms, neb.images, energies) if climbing_image: diff --git a/firecode/embedder.py b/firecode/embedder.py index d42d0d1..5a5ab31 100644 --- a/firecode/embedder.py +++ b/firecode/embedder.py @@ -200,6 +200,20 @@ def log(self, string: str = "", p: bool = True) -> None: string += "\n" self.logfile.write(string) + def log_warnings(self) -> None: + """Logs the non-fatal errors (warnings) at the end of a run.""" + if self.warnings: + self.log() + self.log("{:*^76}".format(" W A R N I N G S ")) + self.log("{:*^76}".format(" your run generated these non-fatal warnings ")) + self.log() + + for warning in self.warnings: + self.log(auto_newline(warning, max_line_len=65)) + self.log() + + self.log("*" * 76) + def debuglog(self, string: str = "") -> None: if self.options.debug and self.debug_logfile is not None: string += "\n" @@ -240,7 +254,7 @@ def write_banner_and_info(self) -> None: .β–’β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘ β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘ β–‘β–’β–‘β–‘β–‘ β–‘β–‘β–‘β–‘β–’ * β–’ . ▒░░░▒░╔═════════════════════════════════════════════╗░░░ β–‘β–‘β–‘ β–‘β–‘β–‘β–‘β–’ β–’β–‘ β–‘β•‘ β•‘β–‘β–’β–‘β–‘ β–‘β–‘β–‘β–‘β–‘β–’ - +β–’ β–’β–‘β–‘β–‘β–‘β–‘β•‘ nicolo.tampellini@yale.edu β•‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–’ β–’ + +β–’ β–’β–‘β–‘β–‘β–‘β–‘β•‘ ntamp @ mit.edu β•‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–’ β–’ β–’β–‘β–‘β–‘β–‘β•‘ β•‘β–‘β–‘ β–‘β–‘β–‘β–’ .. β–’ β–’β–‘β–’β–‘β–‘β•‘ Version πŸ”₯{0:^24}β•‘β–‘β–’β–‘β–‘β–‘ β–‘β–‘β–‘β–’ * . . β–’ β–‘β–‘β–‘β–‘β–‘β•‘ User πŸ”₯{1:^24}β•‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–’ + @@ -736,7 +750,7 @@ def _read_pairings(self) -> None: self.internal_angle_dih_constraints.append(emb_constr) def get_str_all_constraints(self, filename: str | None = None) -> str: - """Return a string detailing all constraints associated with a molecule.""" + """Return a string detailing all constraints associated with a molecule, or all.""" s = "" if filename is None: @@ -1663,11 +1677,22 @@ def normal_termination(self) -> None: relative energies of the first 10 structures, if possible. """ + # rename the final ensemble to clearly + # indicate successful execution + if hasattr(self, "outname") and self.outname in os.listdir(): + os.rename(self.outname, f"firecode_final_ensemble_{self.stamp}.xyz") + + # log warning to end of log + self.log_warnings() + + # remove undesired files clean_directory() + self.log( f"\n--> FIRECODE normal termination: total time {time_to_string(time.perf_counter() - self.t_start_run, verbose=True)}." ) + # write some info on the final ensemble if hasattr(self, "structures"): show = 10 if len(self.structures) > 0 and hasattr(self, "energies"): @@ -1943,6 +1968,20 @@ def fitness_refining(self, threshold: float = 5.0, verbose: bool = False) -> Non self.zero_candidates_check() + def get_num_active_constraints(self, only_fixed_constraints: bool = True) -> int: + """Get the number of active constraints.""" + constr_str = self.get_str_all_constraints() + if constr_str == "": + n_constr = 0 + else: + n_constr = len(constr_str[1:-1].split()) + if only_fixed_constraints: + n_constr -= len( + [letter for letter in self.pairings_table.keys() if letter.islower()] + ) + + return n_constr + def force_field_refining( self, conv_thr: str = "tight", @@ -2140,13 +2179,11 @@ def force_field_refining( ################################################# PRUNING: ENERGY - _, sequence = zip( - *sorted(zip(self.energies, range(len(self.energies))), key=lambda x: x[0]) - ) - self.energies = self.scramble(self.energies, sequence) - self.structures = self.scramble(self.structures, sequence) - self.constrained_indices = self.scramble(self.constrained_indices, sequence) - # sorting structures based on energy + # sort structures based on energy + sorted_indices = np.argsort(self.energies) + self.energies = self.energies[sorted_indices] + self.structures = self.structures[sorted_indices] + self.constrained_indices = self.constrained_indices[sorted_indices] if self.options.debug: self.dump_status( @@ -2242,16 +2279,7 @@ def optimization_refining( "TBLITE": int(self.avail_cpus // 4), }[self.options.calculator] - # get number of constraints for this run - constr_str = self.get_str_all_constraints() - if constr_str == "": - n_constr = 0 - else: - n_constr = len(constr_str[1:-1].split()) - if only_fixed_constraints: - n_constr -= len( - [letter for letter in self.pairings_table.keys() if letter.islower()] - ) + n_constr = self.get_num_active_constraints(only_fixed_constraints=only_fixed_constraints) self.log( f"--> {task} ({self.options.theory_level}{f'/{self.options.solvent}' if self.options.solvent is not None else ''}" @@ -2432,13 +2460,11 @@ def optimization_refining( ################################################# PRUNING: ENERGY - _, sequence = zip( - *sorted(zip(self.energies, range(len(self.energies))), key=lambda x: x[0]) - ) - self.energies = self.scramble(self.energies, sequence) - self.structures = self.scramble(self.structures, sequence) - self.constrained_indices = self.scramble(self.constrained_indices, sequence) - # sorting structures based on energy + # sort structures based on energy + sorted_indices = np.argsort(self.energies) + self.energies = self.energies[sorted_indices] + self.structures = self.structures[sorted_indices] + self.constrained_indices = self.constrained_indices[sorted_indices] if self.options.debug: self.dump_status( @@ -2512,16 +2538,7 @@ def optimization_refining_serial( else: solvent_line = "vacuum" - # get number of constraints for this run - constr_str = self.get_str_all_constraints() - if constr_str == "": - n_constr = 0 - else: - n_constr = len(constr_str[1:-1].split()) - if only_fixed_constraints: - n_constr -= len( - [letter for letter in self.pairings_table.keys() if letter.islower()] - ) + n_constr = self.get_num_active_constraints(only_fixed_constraints=only_fixed_constraints) self.log( f"--> {task} ({self.options.theory_level}/{solvent_line}) level via {self.options.calculator}, single thread - [{n_constr} constraints]" @@ -2688,13 +2705,11 @@ def optimization_refining_serial( ################################################# PRUNING: ENERGY - _, sequence = zip( - *sorted(zip(self.energies, range(len(self.energies))), key=lambda x: x[0]) - ) - self.energies = self.scramble(self.energies, sequence) - self.structures = self.scramble(self.structures, sequence) - self.constrained_indices = self.scramble(self.constrained_indices, sequence) - # sorting structures based on energy + # sort structures based on energy + sorted_indices = np.argsort(self.energies) + self.energies = self.energies[sorted_indices] + self.structures = self.structures[sorted_indices] + self.constrained_indices = self.constrained_indices[sorted_indices] if self.options.debug: self.dump_status( @@ -2746,6 +2761,54 @@ def optimization_refining_serial( if not only_fixed_constraints: self.energies.fill(0) + def vibrational_analysis(self) -> None: + """Perform a vibrational analysis of each one of the Embedder structures.""" + from firecode.thermochemistry import get_free_energies + + self.log( + f"\n--> Frequency calc. / Thermochemical analysis ({self.options.theory_level}" + f"{f'/{self.options.solvent}' if self.options.solvent is not None else ''}" + f" level with {self.options.calculator} via ASE" + ) + + if self.get_num_active_constraints(only_fixed_constraints=True) != 0: + self.warn( + "--> WARNING! Vibrational analysis is being performed with one or more " + "fixed constraints! The thermochemical data should be interpreted carefully!" + ) + + title = self.objects[0].rootname if len(self.objects) == 1 else "structures" + + self.energies = get_free_energies( + embedder=self, + atoms=self.atoms, + structures=self.structures, + charge=self.options.charge, + mult=self.options.mult, + title=title, + logfunction=self.log, + ) + + # sort structures based on energy + sorted_indices = np.argsort(self.energies) + self.energies = self.energies[sorted_indices] + self.structures = self.structures[sorted_indices] + self.constrained_indices = self.constrained_indices[sorted_indices] + + self.outname = f"firecode_vib_ensemble_{self.stamp}.xyz" + with open(self.outname, "w") as f: + for i, (structure, energy) in enumerate( + zip(align_structures(self.structures), self.rel_energies()) + ): + write_xyz( + self.atoms, + structure, + f, + title=f"Structure {i + 1} - Rel. G. = {round(energy, 3)} kcal/mol ({self.options.theory_level})", + ) + + self.log(f"--> Wrote {len(self.structures)} optimized structures to {self.outname}") + def write_mol_info(self) -> None: """Writes information about the firecode molecules read from the input file.""" head = "" @@ -2910,20 +2973,6 @@ def write_run_options(self) -> None: self.log(f" - {line}") - def log_warnings(self) -> None: - """Logs the non-fatal errors (warnings) at the end of a run.""" - if self.warnings: - self.log() - self.log("{:*^76}".format(" W A R N I N G S ")) - self.log("{:*^76}".format(" your run generated these non-fatal warnings ")) - self.log() - - for warning in self.warnings: - self.log(auto_newline(warning, max_line_len=65)) - self.log() - - self.log("*" * 76) - def run(self) -> None: """Run the firecode program.""" self.write_mol_info() @@ -2949,46 +2998,57 @@ def run(self) -> None: self.log("\n--> Dry run requested: exiting.") self.normal_termination() - try: # except KeyboardInterrupt - try: # except ZeroCandidatesError() + try: + try: + # if this is an embed run, generate candidates self.generate_candidates() if self.options.bypass: self.write_structures("unoptimized", energies=False) self.normal_termination() + # remove structures with excessively close contacts self.compenetration_refining() + + # remove similar structures self.similarity_refining( rmsd=True if self.embed == "refine" else False, verbose=True ) + # perform geometry optimizations if self.options.optimization: + # perform initial ff optimization if self.options.ff_opt: - # perform safe optimization only for embeds + # embeds do a first pass with prevent_scrambling if len(self.objects) > 1 and self.options.ff_calc == "XTB": self.force_field_refining(conv_thr="loose", prevent_scrambling=True) + # non-embeds do a first pass with looser convergence + # if there are a lot of structures or if there is + # some temporary (non-fixed) constraint elif len(self.structures) > 500 or self.temporary_constraints_present(): self.force_field_refining(conv_thr="loose") self.force_field_refining(conv_thr="tight") + # If we just optimized at a (FF) level and the final + # optimization level is the same, avoid repeating it if not ( self.options.ff_opt and self.options.theory_level == self.options.ff_level ): - # If we just optimized at a (FF) level and the final - # optimization level is the same, avoid repeating it - + # do a first pass with looser convergence + # if there are a lot of structures or if there is + # some temporary (non-fixed) constraint + # (both fixed constraints and interactions enforced) if len(self.structures) > 500 or self.temporary_constraints_present(): self.optimization_refining(conv_thr="loose") - # final partial optimization (with both fixed constraints and interactions enforced) + # final uncompromised optimization (only fixed constraints) self.optimization_refining(conv_thr="tight", only_fixed_constraints=True) - # final uncompromised optimization (with only fixed constraints active) else: + # writing an output for "refine" runs with NOOPT self.write_structures("unoptimized", energies=False) - # accounting for output in "refine" runs with NOOPT except ZeroCandidatesError: t_end_run = time.perf_counter() @@ -3009,12 +3069,14 @@ def run(self) -> None: clean_directory() sys.exit(0) - ##################### POST FIRECODE - NEB + ##################### POST-REFINEMENT - self.log_warnings() - self.normal_termination() + if self.options.freq: + self.vibrational_analysis() - ################################################ END + ##################### END + + self.normal_termination() except KeyboardInterrupt: print("\n\nKeyboardInterrupt requested by user. Quitting.") diff --git a/firecode/embedder_options.py b/firecode/embedder_options.py index 5ff24e4..b1471b7 100644 --- a/firecode/embedder_options.py +++ b/firecode/embedder_options.py @@ -45,8 +45,10 @@ keywords_dict = { "BYPASS": 1, # Debug keyword. Used to skip all pruning steps and # directly output all the embedded geometries. + "C": 1, # concentration, in mol/L "CALC": 1, # Manually overrides the calculator in "settings.py" "CHARGE": 1, # Specifies charge for the embedding + "CONC": 1, # concentration, in mol/L "CONFS": 1, # Maximum number of conformers generated by csearch "CLASHES": 1, # Manually specify the max number of clashes and/or # the distance threshold at which two atoms are considered @@ -68,6 +70,7 @@ "FFCALC": 1, # Manually overrides the force field calculator in "settings.py" "FFLEVEL": 1, # Manually set the theory level to be used. # . Syntax: `FFLEVEL=UFF + "FREQ": 1, # Run frequency calculation after geometry optimization. "IMAGES": 1, # Number of images to be used in neb>/NEB jobs "KCAL": 1, # Trim output structures to a given value of relative energy. # Syntax: `KCAL=n`, where n can be an integer or float. @@ -172,6 +175,7 @@ class Options: max_newbonds: int = 0 optimization: bool = True + freq: bool = False calculator: str = CALCULATOR theory_level: str | MaybeNone = None # set later in _calculator_setup() solvent: str | None = None @@ -180,6 +184,7 @@ class Options: mult: int = 1 T: float = 298.15 # in K P: float = 1.0 # in atm + C: float = 1.0 # in mol/L ff_opt: bool = FF_OPT_BOOL ff_calc: str | None = FF_CALC @@ -574,6 +579,22 @@ def p(self, options: Options, *args: Any) -> None: options.P = float(kw.split("=")[1]) self.embedder.log(f"--> Set pressure to {options.P:.1f} atm") + def c(self, options: Options, *args: Any) -> None: + kw = self.keywords_simple[self.keywords.index("C")] + options.C = float(kw.split("=")[1]) + self.embedder.log(f"--> Set concentration to {options.C:.1f} mol/L") + + def conc(self, options: Options, *args: Any) -> None: + kw = self.keywords_simple[self.keywords.index("CONC")] + options.C = float(kw.split("=")[1]) + self.embedder.log(f"--> Set concentration to {options.C:.1f} mol/L") + + def freq(self, options: Options, *args: Any) -> None: + options.freq = True + self.embedder.log( + "--> FREQ keyword: will run a freqency calculation for each structure after refinement." + ) + def set_options(self) -> None: for kw in self.sorted_keywords(): setter_function = getattr(self, kw.lower()) diff --git a/firecode/pka.py b/firecode/pka.py index 0c5bf15..d181bb0 100644 --- a/firecode/pka.py +++ b/firecode/pka.py @@ -29,9 +29,9 @@ from prism_pruner.graph_manipulations import graphize from firecode.optimization_methods import optimize, refine_structures -from firecode.thermochemistry import ase_vib +from firecode.thermochemistry import get_free_energies from firecode.typing_ import Array1D_float, Array1D_str, Array2D_float, Array3D_float -from firecode.utils import charge_to_str, loadbar, write_xyz +from firecode.utils import charge_to_str, write_xyz if TYPE_CHECKING: from firecode.embedder import Embedder @@ -304,47 +304,3 @@ def pka_routine(filename: str, embedder: Embedder, search: bool = True) -> None: embedder.objects[mol_index].pka_data = ("B -> BH+", e_BH - e_B) embedder.log() - - -def get_free_energies( - embedder: Embedder, - atoms: Array1D_str, - structures: Array3D_float, - charge: int, - mult: int, - title: str = "temp", -) -> Array1D_float: - """ """ - - free_energies = [] - - for s, structure in enumerate(structures): - loadbar( - s, - len(structures), - f"{title} Performing vibrational analysis {s + 1}/{len(structures)} ", - ) - - free_energies.append( - ase_vib( - atoms, - structure, - ase_calc=embedder.dispatcher.get_ase_calc( - embedder.options.theory_level, embedder.options.solvent - ), - charge=charge, - mult=mult, - T_K=embedder.options.T, - C_mol_L=1.0, - title=f"{title}_conf{s}", - return_gcorr=False, - ) - ) - - loadbar( - len(structures), - len(structures), - f"{title} Performing vibrational analysis {len(structures)}/{len(structures)} ", - ) - - return np.array(free_energies) diff --git a/firecode/solvents.py b/firecode/solvents.py index ccd79ce..1428620 100644 --- a/firecode/solvents.py +++ b/firecode/solvents.py @@ -22,6 +22,38 @@ from firecode.units import AVOGADRO_NA, A3_TO_mL +# from any solvent name to the standard FIRECODE name. +# Convention: short over long (methanol->meoh) +# but common over short (phme->toluene) +# No spaces in solvent name! +solvent_synonyms = { + "ch3cooh": "acoh", + "aceticacid": "acoh", + "ch3cn": "mecn", + "acetonitrile": "mecn", + "chloroform": "chcl3", + "dcm": "ch2cl2", + "dichloromethane": "ch2cl2", + "carbondisuphide": "cs2", + "carbondisulfide": "cs2", + "diethylether": "et2o", + "ethanol": "etoh", + "ch3oh": "meoh", + "methanol": "meoh", + "water": "h2o", + "ethylacetate": "etoac", + "phme": "toluene", + "phh": "benzene", + "dimethylformamide": "dmf", + "dimethylether": "me2o", + "triethylamine": "et3n", + "nitromethane": "meno2", + "dimethylacetamide": "dmac", +} + +# Convert from standard to XTB names. +# do not remove dummy (s : s) entries, +# as keys are used for checking purposes to_xtb_solvents = { "acetone": "acetone", "mecn": "acetonitrile", @@ -35,11 +67,11 @@ "dmf": "dmf", "dmso": "dmso", "et2o": "ether", - "ethylacetate": "ethylacetate", + "etoac": "ethylacetate", "furane": "furane", "hexadecane": "hexadecane", "hexane": "hexane", - "methanol": "methanol", + "meoh": "methanol", "meno2": "nitromethane", "octanol": "octanol", "octanolwet": "octanolwet", @@ -49,31 +81,6 @@ "h2o": "water", } -# convention: from long to short, from unusual to short -solvent_synonyms = { - "ch3cooh": "acoh", - "aceticacid": "acoh", - "ch3cn": "mecn", - "acetonitrile": "mecn", - "chloroform": "chcl3", - "dcm": "ch2cl2", - "dichloromethane": "ch2cl2", - "carbondisuphide": "cs2", - "carbondisulfide": "cs2", - "diethylether": "et2o", - "ethanol": "etoh", - "ch3oh": "meoh", - "methanol": "meoh", - "water": "h2o", - "ethylacetate": "etoac", - "phme": "toluene", - "phh": "benzene", - "dimethylformamide": "dmf", - "dimethylether": "me2o", - "triethylamine": "et3n", - "nitromethane": "meno2", -} - # name: {molarity (M), molecular_volume(Γ…^3)} # see https://organicchemistrydata.org/solvents/ solvent_data = { @@ -187,6 +194,11 @@ "density": 1.33, "epsilon": 8.93, }, + "dmac": { + "MW": 87.122, + "density": 0.937, + "epsilon": 37.78, + }, } # estimate molarity or molecular_volume from diff --git a/firecode/standalone_optimizer.py b/firecode/standalone_optimizer.py index 7c0aee5..406655e 100644 --- a/firecode/standalone_optimizer.py +++ b/firecode/standalone_optimizer.py @@ -425,7 +425,7 @@ def standalone_optimize(optimizer: OptimizerOptions) -> None: ) t_start = perf_counter() - energy += ase_vib( + freqs, gcorr = ase_vib( mol.atoms, coords, ase_calc=optimizer.ase_calc, @@ -436,8 +436,12 @@ def standalone_optimize(optimizer: OptimizerOptions) -> None: title=f"{name[:-4]}", ) + energy += gcorr + num_neg = np.count_nonzero(freqs < 0.0) elapsed = perf_counter() - t_start - print(f"Calculated vibrational frequencies in {time_to_string(elapsed)}\n") + print( + f"Calculated vibrational frequencies ({num_neg} negatives) in {time_to_string(elapsed)}\n" + ) energies.append(energy) names_confs.append(name[:-4] + f"_conf{c_n + 1}") diff --git a/firecode/tests/operator_rdkit_search/operator_rdkit_search.txt b/firecode/tests/operator_rdkit_search/operator_rdkit_search.txt index 16d28a7..a25c129 100644 --- a/firecode/tests/operator_rdkit_search/operator_rdkit_search.txt +++ b/firecode/tests/operator_rdkit_search/operator_rdkit_search.txt @@ -1,2 +1,2 @@ -calc=xtb level=gfn-ff ffopt=off +calc=xtb level=gfn-ff ffopt=off freq refine> rdkit_search> opt> butane.xyz diff --git a/firecode/tests/test_suite.py b/firecode/tests/test_suite.py index f82c743..ed6eb81 100644 --- a/firecode/tests/test_suite.py +++ b/firecode/tests/test_suite.py @@ -72,6 +72,7 @@ def run_firecode_input(name: str) -> None: "_scan_max.xyz", "_scan.xyz", "_opt", + "_thermo", "CREST", "NEB", "FSM", diff --git a/firecode/thermochemistry.py b/firecode/thermochemistry.py index 0187172..b79dbce 100644 --- a/firecode/thermochemistry.py +++ b/firecode/thermochemistry.py @@ -27,7 +27,7 @@ import os from shutil import rmtree from time import perf_counter -from typing import TYPE_CHECKING, Any, Optional, cast +from typing import TYPE_CHECKING, Any, Callable, Optional, cast import numpy as np from ase import Atoms @@ -38,7 +38,7 @@ from firecode.ase_manipulations import set_charge_and_mult_on_ase_atoms from firecode.solvents import solvent_data, solvent_synonyms -from firecode.typing_ import Array1D_float, Array1D_str, Array2D_float +from firecode.typing_ import Array1D_float, Array1D_str, Array2D_float, Array3D_float from firecode.units import ( AVOGADRO_NA, EH_TO_EV, @@ -53,11 +53,13 @@ PLANCK_h__J_s, theta_per_cm1_K, ) -from firecode.utils import HiddenPrints +from firecode.utils import HiddenPrints, NewFolderContext, clean_directory, loadbar if TYPE_CHECKING: from ase.calculators.calculator import Calculator as ASECalculator + from firecode.embedder import Embedder + # Frequencies below this are considered belonging to proper # transition states and excluded from thermochemical analysis, # while the (_TS_THR_CM_1 < v < 0) range will be treated as positive. @@ -364,8 +366,8 @@ def ase_vib( title: str = "temp", return_gcorr: bool = True, write_log: bool = True, -) -> float: - """returns: G(corr) or Free energy, in kcal/mol.""" +) -> tuple[Array1D_float, float]: + """returns: tuple of Array of frequencies and either G(corr) or Free energy, in kcal/mol.""" ase_atoms = Atoms(atoms, positions=coords) ase_atoms = set_charge_and_mult_on_ase_atoms(ase_atoms, charge=charge, mult=mult) @@ -439,7 +441,7 @@ def ase_vib( H = thermo_dict["H_total_Eh"] S = (H - G) / T_K # Eh/K - f.write("\n--> What follows mocks an ORCA output for scraping:\n\n") + f.write("\n--> What follows mocks an ORCA output:\n\n") f.write(f"Number of atoms ... {len(atoms)}\n") f.write(f"Total Charge ... ... {charge}\n\n") f.write(f"Temperature ...: {T_K:.2f} K ({T_K - 273.15:.2f} Β°C)\n") @@ -470,8 +472,8 @@ def ase_vib( os.remove(f"vib_{title}.out") if return_gcorr: - return cast("float", Gcorr * EH_TO_KCAL) - return cast("float", G * EH_TO_KCAL) + return freqs, cast("float", Gcorr * EH_TO_KCAL) + return freqs, cast("float", G * EH_TO_KCAL) def cleanup_freqs( @@ -540,3 +542,66 @@ def cleanup_freqs( freqs *= scaling_factor return freqs + + +def get_free_energies( + embedder: Embedder, + atoms: Array1D_str, + structures: Array3D_float, + charge: int, + mult: int, + title: str = "temp", + logfunction: Callable[[str], None] | None = None, +) -> Array1D_float: + """Run vibrational analysis on a set of structures.""" + free_energies = [] + + with NewFolderContext(f"{title}_thermo", delete_after=False): + for s, structure in enumerate(structures): + loadbar( + s, + len(structures), + f"{title} Performing vibrational analysis {s + 1}/{len(structures)} ", + ) + + t_start = perf_counter() + + freqs, free_energy = ase_vib( + atoms, + structure, + ase_calc=embedder.dispatcher.get_ase_calc( + embedder.options.theory_level, embedder.options.solvent + ), + charge=charge, + mult=mult, + T_K=embedder.options.T, + C_mol_L=embedder.options.C, + title=f"{title}_conf{s}", + return_gcorr=False, + ) + + free_energies.append(free_energy) + + if logfunction is not None: + elapsed = perf_counter() - t_start + match num_neg := np.count_nonzero(freqs < 0.0): + case 0: + exit_str = "GS" + case 1: + exit_str = "TS" + case _: + exit_str = "??" + + logfunction( + f" - {title} - {exit_str} ({num_neg} negative freqs. - {time_to_string(elapsed)})" + ) + + loadbar( + len(structures), + len(structures), + f"{title} Performing vibrational analysis {len(structures)}/{len(structures)} ", + ) + + clean_directory(to_remove=("gfnff_topo",)) + + return np.array(free_energies) From d8c3cb17f3315ddcbec8b19e837df2a4783c8498 Mon Sep 17 00:00:00 2001 From: ntampellini Date: Mon, 16 Mar 2026 14:02:20 -0400 Subject: [PATCH 4/4] tweaked reference states --- firecode/__main__.py | 2 +- firecode/embedder_options.py | 14 ++++++++++---- firecode/standalone_optimizer.py | 1 + firecode/thermochemistry.py | 20 +++++++++++++------- 4 files changed, 25 insertions(+), 12 deletions(-) diff --git a/firecode/__main__.py b/firecode/__main__.py index 624e44c..7dd67d0 100644 --- a/firecode/__main__.py +++ b/firecode/__main__.py @@ -14,7 +14,7 @@ https://github.com/ntampellini/firecode -Nicolo' Tampellini - nicolo.tampellini@yale.edu +Nicolo' Tampellini - ntamp@mit.edu """ diff --git a/firecode/embedder_options.py b/firecode/embedder_options.py index b1471b7..9770edd 100644 --- a/firecode/embedder_options.py +++ b/firecode/embedder_options.py @@ -183,7 +183,7 @@ class Options: charge: int = 0 mult: int = 1 T: float = 298.15 # in K - P: float = 1.0 # in atm + P: float | None = None # in atm C: float = 1.0 # in mol/L ff_opt: bool = FF_OPT_BOOL ff_calc: str | None = FF_CALC @@ -577,17 +577,23 @@ def t_c(self, options: Options, *args: Any) -> None: def p(self, options: Options, *args: Any) -> None: kw = self.keywords_simple[self.keywords.index("P")] options.P = float(kw.split("=")[1]) - self.embedder.log(f"--> Set pressure to {options.P:.1f} atm") + self.embedder.log( + f"--> Set pressure to {options.P:.1f} atm and global reference state to gas." + ) def c(self, options: Options, *args: Any) -> None: kw = self.keywords_simple[self.keywords.index("C")] options.C = float(kw.split("=")[1]) - self.embedder.log(f"--> Set concentration to {options.C:.1f} mol/L") + self.embedder.log( + f"--> Set concentration to {options.C:.1f} mol/L and global reference state to solution." + ) def conc(self, options: Options, *args: Any) -> None: kw = self.keywords_simple[self.keywords.index("CONC")] options.C = float(kw.split("=")[1]) - self.embedder.log(f"--> Set concentration to {options.C:.1f} mol/L") + self.embedder.log( + f"--> Set concentration to {options.C:.1f} mol/L and global reference state to solution." + ) def freq(self, options: Options, *args: Any) -> None: options.freq = True diff --git a/firecode/standalone_optimizer.py b/firecode/standalone_optimizer.py index 406655e..449b7b8 100644 --- a/firecode/standalone_optimizer.py +++ b/firecode/standalone_optimizer.py @@ -433,6 +433,7 @@ def standalone_optimize(optimizer: OptimizerOptions) -> None: mult=mult, T_K=optimizer.T, solvent=optimizer.solvent, + C_mol_L=1.0, title=f"{name[:-4]}", ) diff --git a/firecode/thermochemistry.py b/firecode/thermochemistry.py index b79dbce..4519bc8 100644 --- a/firecode/thermochemistry.py +++ b/firecode/thermochemistry.py @@ -126,7 +126,8 @@ def rrho_thermo( atoms: Atoms, freqs_cm1: Array1D_float, T_K: float = 298.15, - P_atm: float = 1.0, + P_atm: float | None = 1.0, + conc_mol_L: float | None = None, symmetry_number: int = 1, E_el_Eh: float = 0.0, mult: int = 1, @@ -134,7 +135,6 @@ def rrho_thermo( cutoff_cm1: Optional[float] = None, qrrho_ref_cm1: float = 100.0, qrrho_alpha: float = 4.0, - conc_mol_L: float | None = None, solv: Optional[str] = None, ) -> dict[str, Any]: # Remove 3/5/6 zero modes @@ -199,12 +199,15 @@ def rrho_thermo( # with free-space from Shakhnovich–Whitesides. lambda_factor = ((2.0 * math.pi * mass_kg * KB__J_K * T_K) ** 1.5) / (PLANCK_h__J_s**3) + # if user provided a concentration, + # reference state is a solution if conc_mol_L is not None: free_mL_per_L = _free_space_mL_per_L(solv) # free-space fraction in a liter: free_frac = max(free_mL_per_L / 1000.0, 1e-9) # avoid zero number_density = conc_mol_L * 1000.0 * AVOGADRO_NA / free_frac # 1/m^3 else: + P_atm = P_atm or 1.0 P_Pa = P_atm * 101325.0 number_density = P_Pa / (KB__J_K * T_K) @@ -360,14 +363,17 @@ def ase_vib( charge: int, mult: int, T_K: float = 298.15, - P_atm: float = 1.0, + P_atm: float | None = 1.0, C_mol_L: float | None = None, solvent: str | None = None, title: str = "temp", return_gcorr: bool = True, write_log: bool = True, ) -> tuple[Array1D_float, float]: - """returns: tuple of Array of frequencies and either G(corr) or Free energy, in kcal/mol.""" + """returns: tuple of Array of frequencies and either G(corr) or Free energy, in kcal/mol. + + If pressure is provided, a gas reference state will be used. + """ ase_atoms = Atoms(atoms, positions=coords) ase_atoms = set_charge_and_mult_on_ase_atoms(ase_atoms, charge=charge, mult=mult) @@ -375,7 +381,7 @@ def ase_vib( vib = Vibrations(ase_atoms, delta=0.005) # type: ignore[no-untyped-call] - with open(f"vib_{title}.out", "w", encoding="utf-8") as f: + with open(f"{title}.out", "w", encoding="utf-8") as f: f.write("--> FIRECODE ASE Frequency calculation report\n") f.write(f"charge={charge}, mult={mult}, C={C_mol_L} mol/L, P={P_atm} atm\n") f.write( @@ -469,7 +475,7 @@ def ase_vib( rmtree("vib") if not write_log: - os.remove(f"vib_{title}.out") + os.remove(f"{title}.out") if return_gcorr: return freqs, cast("float", Gcorr * EH_TO_KCAL) @@ -593,7 +599,7 @@ def get_free_energies( exit_str = "??" logfunction( - f" - {title} - {exit_str} ({num_neg} negative freqs. - {time_to_string(elapsed)})" + f" - {title} conf. {s:>4d} - {exit_str} ({num_neg} negative freqs. - {time_to_string(elapsed)})" ) loadbar(