Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
255 changes: 175 additions & 80 deletions rmgpy/data/thermo.py
Original file line number Diff line number Diff line change
Expand Up @@ -2835,82 +2835,10 @@ def get_ring_groups_from_comments(self, thermo_data):

return ring_groups, polycyclic_groups

def extract_source_from_comments(self, species):
"""
`species`: A species object containing thermo data and thermo data comments

Parses the verbose string of comments from the thermo data of the species object,
and extracts the thermo sources.

Returns a dictionary with keys of 'Library', 'QM', 'ADS', and/or 'GAV'.
Commonly, species thermo are estimated using only one of these sources.
However, a radical can be estimated with more than one type of source, for
instance a saturated library value and a GAV HBI correction, or a QM saturated value
and a GAV HBI correction. Adsorbates can be estimated using a single library
for the adsorbate or a combination of a gas phase library for the
gas phase portion and an adsorption correction.

source = {'Library': String_Name_of_Library_Used,
'QM': String_of_Method_Used,
'GAV': Dictionary_of_Groups_Used,
'ADS': Dictionary_of_Adsorption_Group_Used,
}

The Dictionary_of_Groups_Used looks like
{'groupType':[List of tuples containing (Entry, Weight)]
"""
comment = species.thermo.comment
tokens = comment.split()

source = {}

if comment.startswith('Thermo library'):
# Store name of the library source, which is the 3rd token in the comments
source['Library'] = tokens[2]

elif comment.startswith('QM'):
# Store the level of the calculation, which is the 2nd token in the comments
source['QM'] = tokens[1]

elif comment.startswith('Gas phase thermo'):
# Handle adsorption correction thermo data of the following format:
# Library example
# Gas phase thermo for C(T) from Thermo library: primaryThermoLibrary.
# Adsorption correction: + Thermo group additivity estimation: adsorptionPt111(Cq*)

# GAV example
# Gas phase thermo for [CH]CC from Thermo group additivity estimation: group(Cs-CsCsHH) + group(Cs-CsHHH) + group(Cs-CsHHH) + radical(CCJ2_triplet).
# Adsorption correction: + Thermo group additivity estimation: adsorptionPt111(C=*RCR3)"

comment = comment.replace(r'\n', ' ')
comment = comment.replace('\n', ' ')
if 'Adsorption correction:' not in comment:
raise ValueError(f'adsorption correction in unrecognized format {comment}')

# Handle the gas-phase portion first
gas_comment = comment.split('Adsorption correction: + ')[0].strip()
if gas_comment.endswith('.'):
gas_comment = gas_comment[:-1] # delete the . at the end if it exists
gas_comment = gas_comment[gas_comment.find('from ', len('Gas phase thermo for ')) + len('from '):]
dummy_gas_phase_species = Species()
dummy_gas_phase_species.thermo = NASA()
dummy_gas_phase_species.thermo.comment = gas_comment
source = self.extract_source_from_comments(dummy_gas_phase_species)

# This is an adsorption correction
# comment is split into two parts: the gas phase, and the surface adsorption correction
ads_correction_comment = comment.split('Adsorption correction: +')[-1].strip()
dummy_adsorption_correction_species = Species()
dummy_adsorption_correction_species.thermo = NASA()
dummy_adsorption_correction_species.thermo.comment = ads_correction_comment
source['ADS'] = self.extract_source_from_comments(dummy_adsorption_correction_species)['GAV']

return source

# Check for group additivity contributions to the thermo in this species

# The contribution of the groups can be either additive or subtracting
# after changes to the polycyclic algorithm
def _parse_gav_groups(self, comment):
"""Extract the groups from the comment"""
groups = {}

comment = comment.replace(' + ', ' +')
comment = comment.replace(' - ', ' -')
Expand All @@ -2920,10 +2848,12 @@ def extract_source_from_comments(self, species):
# groups are still split by spaces
comment = comment.replace(')\n+', ') +')
comment = comment.replace(')\n-', ') -')
# `Thermo group additivity estimation:\nadsorptionPt111(...)` shows up in
# adsorbate comments - keep the trailing colon separated from the group token.
comment = comment.replace(':\n', ': ')
comment = comment.replace('\n', '')
tokens = comment.split(' ')

groups = {}
group_types = list(self.groups.keys())

regex = r"\((.*)\)" # only hit outermost parentheses
Expand Down Expand Up @@ -2951,22 +2881,187 @@ def extract_source_from_comments(self, species):

if groups:
# Indicate that group additivity is used when it is either an HBI correction
# onto a thermo library or QM value, or if the entire molecule is estimated using group additivity
# onto a thermo library or QM value, or if the entire molecule is estimated using group additivity
# Save the groups into the source dictionary

# Convert groups back into tuples
for groupType, groupDict in groups.items():
groups[groupType] = list(groupDict.items())

source['GAV'] = groups
return groups

def _parse_library_source(self, comment, library_species):
# handle the library source comment, which looks like "Thermo library: library_name"
# we then need to retrieve the specific library entry given the species
# but may have unfortunate line breaks in the middle

# trim the comment down to just the library portion so it starts with Thermo library:
split_loc = comment.find('Thermo library:')
if split_loc == -1:
raise ValueError(f"Expected 'Thermo library:' in comment, got {comment}")

comment = comment[split_loc:]

# library name is the token that comes immediately after 'Thermo library:'
assert 'Thermo library:' in comment, f"Expected 'Thermo library:' in comment, got {comment}"
tokens = comment.split()
library_name = tokens[2]

results = self.get_thermo_data_from_library(library_species, self.libraries[library_name])
if results is None:
raise DatabaseError(f"Could not find a library match for {library_species} in library {library_name}")

data, thermo_library, library_entry = results
return (library_name, library_entry)


def _parse_adsorption_correction(self, comment):
# handle the adsorption correction comment, which looks like
# "Adsorption correction: + Thermo group additivity estimation: adsorptionPt111(C-XR2CR3)"
# but may have unfortunate line breaks in the middle

# check that the number of tokens matches our expectation for an adsorption correction
# should be 8, maybe 9 if there was a weird line break
tokens = comment.split()
if len(tokens) not in [8, 9]:
raise ValueError(f"Expected 8 or 9 tokens in adsorption correction comment, got {len(tokens)}: {comment}")

ADS = self._parse_gav_groups(comment)

if len(ADS) > 1:
raise ValueError("Only adsorption corrections should be present in the adsorption correction portion of the comment. Found: {}".format(ADS))

return ADS

def extract_source_from_comments(self, species):
"""
`species`: A species object containing thermo data and thermo data comments

Parses the verbose string of comments from the thermo data of the species object,
and extracts the thermo sources.

Returns a dictionary with keys of 'Library', 'QM', 'ADS', and/or 'GAV'.
Commonly, species thermo are estimated using only one of these sources.
However, a radical can be estimated with more than one type of source, for
instance a saturated library value and a GAV HBI correction, or a QM saturated value
and a GAV HBI correction. Adsorbates can be estimated using a single library
for the adsorbate or a combination of a gas phase library for the
gas phase portion and an adsorption correction.

source = {'Library': String_Name_of_Library_Used,
'QM': String_of_Method_Used,
'GAV': Dictionary_of_Groups_Used,
'ADS': Dictionary_of_Adsorption_Group_Used,
}

The Dictionary_of_Groups_Used looks like
{'groupType':[List of tuples containing (Entry, Weight)]
"""

# TODO: solvent, electrocat, LSR
source = {}

comment = species.thermo.comment
tokens = comment.split()

ads_correction = 'Gas phase thermo' in comment and 'Adsorption correction:' in comment
library = 'Thermo library' in comment
QM = 'QM' in tokens
GAV = 'Thermo group additivity estimation:' in comment # ambiguous since ads correction looks identical to group

# the biggest thing to split on first is the adsorption correction
if ads_correction:
# The source options here are:
# (Library(gas-phase species), Adsorption correction)
# (QM(gas-phase species), Adsorption correction) <--- not really, QM is dead/sleeping
# (GAV(gas-phase species), Adsorption correction)
# (Library(gas-phase species), GAV(radical correction), Adsorption correction)
# (QM(gas-phase species), GAV(radical correction), Adsorption correction) <--- not really, QM is dead/sleeping

# split the comment into the gas phase thermo portion and the adsorption correction portion
split_loc = comment.find('Adsorption correction:')
if split_loc == -1:
raise ValueError(f"Expected 'Adsorption correction:' in comment, got {comment}")
gas_comment = comment[:split_loc].strip()
if gas_comment.endswith('.'):
gas_comment = gas_comment[:-1] # the period that closed the gas-phase sentence
ads_correction_comment = comment[split_loc:].strip()
source['ADS'] = self._parse_adsorption_correction(ads_correction_comment)
GAV = 'Thermo group additivity estimation:' in gas_comment
if GAV:
# Get groups first
source['GAV'] = self._parse_gav_groups(gas_comment)

if library: # (Library(gas-phase species), GAV(radical correction), Adsorption correction)
# this means the library species is the desorbed, saturated gas-phase version of the adsorbate
desorbed_gas_species = Species()
# get_desorbed_molecules already returns a list of Molecule objects
desorbed_gas_species.molecule = species.molecule[0].get_desorbed_molecules() # does deepcopy

assert desorbed_gas_species.molecule[0].is_radical(), "Method only valid for radicals."
molecule = desorbed_gas_species.molecule[0] # no need to deepcopy again since get_desorbed_molecules already does deepcopy
molecule.saturate_radicals() # note, this returns a dictionary instead of the Molecule object, but it modifies the molecule in place, so we can just ignore the returned dictionary
saturated_desorbed_gas_species = Species(molecule=[molecule])
source['Library'] = self._parse_library_source(gas_comment, saturated_desorbed_gas_species)
if QM: # (QM(gas-phase species), GAV(radical correction), Adsorption correction) <--- not really, QM is dead/sleeping
# whatever token comes immediately after 'QM' is the method used
source['QM'] = tokens[tokens.index('QM') + 1]

else:
# no groups, so this is (Library + ADS) or (QM + ADS)
if library:
# in this case, the library species is the desorbed gas-phase molecule of the adsorbate
# get_desorbed_molecules already returns a list of Molecule objects
desorbed_gas_species = Species(molecule=species.molecule[0].get_desorbed_molecules()) # does deepcopy
source['Library'] = self._parse_library_source(gas_comment, desorbed_gas_species)
if QM:
# whatever token comes immediately after 'QM' is the method used
source['QM'] = tokens[tokens.index('QM') + 1]

else:
# gas phase only, source options are:
# (Library)
# (QM)
# (GAV)
# (Library, GAV)
# (QM, GAV)

groups = self._parse_gav_groups(comment)
GAV = 'Thermo group additivity estimation:' in comment
if GAV and not groups:
raise ValueError("No groups were found in the comments but 'Thermo group additivity estimation:' was in the comment. Comment: {}".format(comment))
elif not GAV and groups:
if 'radical' not in groups.keys():
raise ValueError("Groups were found in the comments but 'Thermo group additivity estimation:' was not in the comment. Comment: {}".format(comment))

if groups:
# Get groups first
source['GAV'] = groups
if library: # (Library, GAV)
# get the saturated species for the library source
if not 'radical' in groups.keys():
raise ValueError("Method only valid for radicals, but no radical groups were found. Comment: {}".format(comment))

molecule = deepcopy(species.molecule[0])
assert molecule.is_radical(), "Method only valid for radicals."
molecule.saturate_radicals() # note, this returns a dictionary instead of the Molecule object, but it modifies the molecule in place, so we can just ignore the returned dictionary
saturated_species = Species(molecule=[molecule])
source['Library'] = self._parse_library_source(comment, saturated_species)
if QM: # (QM, GAV) <--- not really, QM is dead/sleeping
# whatever token comes immediately after 'QM' is the method used
source['QM'] = tokens[tokens.index('QM') + 1]
else: # (Library) or (QM)
if library:
source['Library'] = self._parse_library_source(comment, species)
if QM:
# whatever token comes immediately after 'QM' is the method used
source['QM'] = tokens[tokens.index('QM') + 1]

# Perform a sanity check that this molecule is estimated by at least one method
if not list(source.keys()):
raise ValueError('Species {0} thermo appears to not be estimated using any methods.'.format(species))

return source


class ThermoCentralDatabaseInterface(object):
"""
A class for interfacing with RMG online thermo central database.
Expand Down
Loading
Loading