Conformer
API¶
rdworks.conf.Conf
¶
Container for 3D conformers.
Source code in src/rdworks/conf.py
class Conf:
"""Container for 3D conformers."""
def __init__(
self,
molecule: str | Chem.Mol | None = None,
name: str = "",
compressed: bool = False,
) -> None:
"""Initialize.
Args:
molecule (Chem.Mol | MolBlock string): Molecule for 3D conformer.
name (str): Name prefix of the generated conformers. Defaults to ''.
compressed (bool): whether the MolBlock string is compressed or not.
Defaults to False.
Raises:
ValueError: if `molecule` is not rdkit.Chem.Mol object.
"""
assert isinstance(molecule, str | Chem.Mol) or molecule is None
self.name = name
self.rdmol = None # must contain one and only one rdkit conformer
self.natoms = 0
self.charge = 0 # molecular formal charge
self.spin = 1 # molecular spin multiplicity for ASE
# Molecular spin multiplicity describes the number of possible orientations of
# spin angular momentum for a given molecule, essentially indicating the total
# number of unpaired electrons.
# Spin Angular Momentum (S): up (+1/2) or down (-1/2)
# Spin Multiplicity (2S + 1)
# 0 unpaired electron has S = 0, 2S + 1 = 0, called a singlet.
# 1 unpaired electron has S = 1/2, 2S + 1 = 2, called a doublet (radical).
# 2 unpaired electrons has S = 1, 2S + 1 = 3, called a triplet.
self.smiles = ""
self.molblock = ""
self.symbols = []
self.numbers = []
self.positions = np.array([]) # (natoms, 3)
self.props = {}
if molecule is None:
return
if isinstance(molecule, str): # 3-D MolBLock string
if compressed:
molecule = rdworks.utils.decompress_string(molecule)
try:
self.rdmol = Chem.MolFromMolBlock(
molecule, sanitize=False, removeHs=False, strictParsing=True
)
except:
ValueError(f"Conf() Error: invalid MolBlock string")
elif isinstance(molecule, Chem.Mol): # 3-D
try:
self.rdmol = molecule
except:
ValueError(f"Conf() Error: invalid Chem.Mol object")
self._update()
def _update(self) -> None:
self.smiles = Chem.MolToSmiles(self.rdmol)
self.molblock = Chem.MolToMolBlock(self.rdmol)
self.symbols = [a.GetSymbol() for a in self.rdmol.GetAtoms()]
self.numbers = [a.GetAtomicNum() for a in self.rdmol.GetAtoms()]
self.positions = self.rdmol.GetConformer().GetPositions() # np.ndarray
# check hydrogens
num_atoms = self.rdmol.GetNumAtoms()
tot_atoms = self.rdmol.GetNumAtoms(onlyExplicit=False)
assert num_atoms == tot_atoms, "Conf() Error: missing hydrogens"
self.natoms = num_atoms
self.charge = rdmolops.GetFormalCharge(self.rdmol)
self.props.update(
{
"atoms": self.natoms,
"charge": self.charge,
}
)
assert self.rdmol.GetConformer().Is3D(), "Conf() Error: not 3D"
def __str__(self) -> str:
"""Returns a string representation.
Returns:
str: string representation.
"""
return f"<rdworks.Conf({self.rdmol} name={self.name} atoms={self.natoms})>"
##################################################
### Properties
##################################################
@property
def COG(self) -> np.array:
"""Returns the center of geometry (COG).
Returns:
np.array: the center of geometry.
"""
xyz = []
for i in range(0, self.natoms):
pos = self.rdmol.GetConformer().GetAtomPositions(i)
xyz.append([pos.x, pos.y, pos.z])
return np.mean(xyz, axis=0)
@property
def Rg(self) -> float:
"""Returns the radius of gyration (Rg).
Returns:
float: the radius of gyration.
"""
xyz = []
for i in range(0, self.natoms):
pos = self.rdmol.GetConformer().GetAtomPositions(i)
xyz.append([pos.x, pos.y, pos.z])
xyz = np.array(xyz)
cog = np.mean(xyz, axis=0)
a = xyz - cog
b = np.einsum("ij,ij->i", a, a)
return np.sqrt(np.mean(b))
@property
def SASA(self) -> dict:
"""Calculate Solvent Accessible Surface Area (total, polar, apolar).
Returns:
tuple[float, float, float]: (total, polar, apolar)
"""
radii = [ptable.GetRvdw(atom.GetAtomicNum()) for atom in self.rdmol.GetAtoms()]
# CalcSASA signature: CalcSASA(mol, radii, confId=conf_id) in modern RDKit
total_sasa = rdFreeSASA.CalcSASA(self.rdmol, radii=radii, confIdx=-1)
# try to read per-atom SASA values set by CalcSASA into atoms
atom_sasas = [atom.GetDoubleProp("SASA") for atom in self.rdmol.GetAtoms()]
# Now compute polar/apolar sums: polar = N,O and Hs attached to them
polar_idx = set()
for atom in self.rdmol.GetAtoms():
if atom.GetAtomicNum() in (7, 8): # N or O
polar_idx.add(atom.GetIdx())
# include attached hydrogens
for nbr in atom.GetNeighbors():
if nbr.GetAtomicNum() == 1:
polar_idx.add(nbr.GetIdx())
# default numeric 0 for None entries (defensive)
numeric_atom_sasas = [0.0 if v is None else float(v) for v in atom_sasas]
sasa_polar = sum(numeric_atom_sasas[i] for i in polar_idx)
sasa_total = sum(numeric_atom_sasas)
sasa_apolar = sasa_total - sasa_polar
return rdworks.utils.recursive_round(
{
"sasa_total": sasa_total,
"sasa_polar": sasa_polar,
"sasa_apolar": sasa_apolar,
},
decimals=2,
)
def copy(self) -> Self:
"""Returns a copy of self.
Returns:
Self: `rdworks.Conf` object.
"""
return copy.deepcopy(self)
def rename(self, name: str) -> Self:
"""Rename and returns self.
Args:
name (str): a new name for conformers.
Raises:
ValueError: if `name` is not given.
Returns:
Self: `rdworks.Conf` object.
"""
if not name:
raise ValueError("rdworks.Conf.rename() expects a name")
self.name = name
self.rdmol.SetProp("_Name", name)
return self
def sync(self, coord: np.ndarray | list) -> Self:
"""Synchronize the conformer coordinates with the provided `coord`.
Args:
coord (np.array): 3D coordinates.
Raises:
ValueError: if `coord` does not have the correct shape (natoms, 3).
Returns:
Self: `rdworks.Conf` object.
"""
if isinstance(coord, np.ndarray) and coord.shape != (self.natoms, 3):
raise ValueError(f"`coord.shape` should be ({self.natoms},3)")
elif isinstance(coord, list) and len(coord) != self.natoms:
raise ValueError(f"`coord` should be length of {self.natoms}")
for i, a in enumerate(self.rdmol.GetAtoms()):
self.rdmol.GetConformer().SetAtomPosition(a.GetIdx(), coord[i])
return self
def get_torsion_angle_atoms(self, strict: bool = True) -> list[tuple]:
"""Determine torsion/dihedral angle atoms (i-j-k-l) and rotating group for each rotatable bond (j-k).
Args:
strict (bool): whether to exclude amide/imide/ester/acid bonds.
Returns:
[(i, j, k, l), ...]
"""
return [d[:4] for d in get_torsion_angle_atom_indices(self.rdmol, strict)]
def get_torsion_angle(self, i: int, j: int, k: int, l: int) -> float:
"""Get dihedral angle (i-j-k-l) in degrees.
Args:
i (int): atom index
j (int): atom index
k (int): atom index
l (int): atom index
Returns:
float: dihedral angle in degrees.
"""
degree = rdMolTransforms.GetDihedralDeg(self.rdmol.GetConformer(), i, j, k, l)
return degree
def set_torsion_angle(self, i: int, j: int, k: int, l: int, degree: float) -> Self:
"""Set dihedral angle (i-j-k-l) in degrees.
Args:
i (int): atom index
j (int): atom index
k (int): atom index
l (int): atom index
degree (float): dihedral angle in degrees
Returns:
Self: modified Conf object
"""
rdMolTransforms.SetDihedralDeg(self.rdmol.GetConformer(), i, j, k, l, degree)
return self
def protonate(self, atom_indices: list[int]) -> Self:
"""Protonate given non-hydrogen atoms.
Args:
atom_indices (list[int]): atom indices of non-hydrogen atoms to protonate.
Returns:
Self: self.
"""
for idx in atom_indices:
atom = self.rdmol.GetAtomWithIdx(idx)
h = atom.GetNumExplicitHs()
c = atom.GetFormalCharge()
atom.SetNumExplicitHs(h + 1)
atom.SetFormalCharge(c + 1)
Chem.SanitizeMol(self.rdmol)
self.rdmol = Chem.AddHs(self.rdmol, addCoords=True)
# The Chem.AddHs function in RDKit returns a new Mol object with hydrogens added to the molecule.
# It modifies the input molecule by adding hydrogens,
# but the original molecule remains unchanged.
self._update()
return self
def deprotonate(self, atom_indices: list[int]) -> Self:
"""Deprotonate given non-hydrogen atoms.
Args:
atom_indices (list[int]): atom indices of non-hydrogen atoms to deprotonate.
Returns:
Self: self.
"""
for idx in atom_indices:
bonded_H_idx = None
atom = self.rdmol.GetAtomWithIdx(idx)
h = atom.GetNumExplicitHs()
if h - 1 >= 0:
atom.SetNumExplicitHs(h - 1) # (h-1) must be unsigned int
c = atom.GetFormalCharge()
atom.SetFormalCharge(c - 1)
neighbors = atom.GetNeighbors()
for neighbor in neighbors:
if neighbor.GetAtomicNum() == 1:
bonded_H_idx = neighbor.GetIdx()
break
if bonded_H_idx is not None:
edit_mol = Chem.EditableMol(self.rdmol)
edit_mol.RemoveAtom(bonded_H_idx)
self.rdmol = edit_mol.GetMol()
Chem.SanitizeMol(self.rdmol)
self._update()
return self
##################################################
### Endpoint methods
##################################################
def has_acceptable_bond_lengths(self, tolerance: float = 0.25) -> bool:
"""Check bond length.
Args:
tolerance (float, optional): tolerance from the sum of
van der Waals radii of bonded atoms. Defaults to 0.25 (A).
Returns:
bool: True if all bond lengths are accceptable.
"""
pt = Chem.GetPeriodicTable()
for bond in self.rdmol.GetBonds():
idx1 = bond.GetBeginAtomIdx()
idx2 = bond.GetEndAtomIdx()
nuc1 = self.rdmol.GetAtomWithIdx(idx1).GetAtomicNum()
nuc2 = self.rdmol.GetAtomWithIdx(idx2).GetAtomicNum()
sum_radii = pt.GetRvdw(nuc1) + pt.GetRvdw(nuc2) # (A)
# from mendeleev import element
# sum_radii = (element(nuc1).vdw_radius + element(nuc2).vdw_radius) * pm2angstrom
bond_length = rdMolTransforms.GetBondLength(
self.rdmol.GetConformer(), idx1, idx2
)
if abs(bond_length - sum_radii) > tolerance:
return False
return True
def singlepoint(
self, calculator: str | Callable = "MMFF94", water: str | None = None
) -> Self:
"""Get potential energy and set `E_tot(kcal/mol)` in the self.props.
Args:
calculator (str | Callable): MMFF94 (= MMFF), MMFF94s, UFF, xTB, or ASE calculator.
`MMFF94` or `MMFF` - Intended for general use, including organic molecules and proteins,
and primarily relies on data from quantum mechanical calculations.
It's often used in molecular dynamics simulations.
`MMFF94s` - A "static" variant of MMFF94, with adjusted parameters for out-of-plane
bending and dihedral torsions to favor planar geometries for specific nitrogen atoms.
This makes it better suited for geometry optimization studies where a static,
time-averaged structure is desired. The "s" stands for "static".
`UFF` - UFF refers to the "Universal Force Field," a force field model used for
molecular mechanics calculations. It's a tool for geometry optimization,
energy minimization, and exploring molecular conformations in 3D space.
UFF is often used to refine conformers generated by other methods,
such as random conformer generation, to produce more physically plausible
and stable structures.
'xTB' - GFN2-xTB
Args for xTB:
water (str, optional): water solvation model (choose 'gbsa' or 'alpb')
alpb: ALPB solvation model (Analytical Linearized Poisson-Boltzmann).
gbsa: generalized Born (GB) model with Surface Area contributions.
Defaults to None.
Returns:
float | None: potential energy in kcal/mol or None.
"""
if isinstance(calculator, str):
if calculator.lower() == "xTB".lower():
results = GFN2xTB(self.rdmol).singlepoint(water=water)
# results = SimpleNamespace(
# PE = datadict['total energy'] * hartree2kcalpermol,
# Gsolv = Gsolv,
# charges = datadict['partial charges'],
# wbo = Wiberg_bond_orders,
# )
PE = results.PE
elif (
calculator.lower() == "MMFF94".lower()
or calculator.lower() == "MMFF".lower()
):
mp = Chem.rdForceFieldHelpers.MMFFGetMoleculeProperties(
self.rdmol, mmffVariant="MMFF94"
)
ff = Chem.rdForceFieldHelpers.MMFFGetMoleculeForceField(self.rdmol, mp)
PE = ff.CalcEnergy()
elif calculator.lower() == "MMFF94s".lower():
mp = Chem.rdForceFieldHelpers.MMFFGetMoleculeProperties(
self.rdmol, mmffVariant="MMFF94s"
)
ff = Chem.rdForceFieldHelpers.MMFFGetMoleculeForceField(self.rdmol, mp)
PE = ff.CalcEnergy()
elif calculator.lower() == "UFF".lower():
ff = Chem.rdForceFieldHelpers.UFFGetMoleculeForceField(self.rdmol)
PE = ff.CalcEnergy()
else:
raise ValueError("Unsupported calculator")
self.props.update({"E_tot(kcal/mol)": PE})
return self
else:
try:
ase_atoms = ase.Atoms(symbols=self.symbols, positions=self.positions)
ase_atoms.info["charge"] = self.charge
ase_atoms.info["spin"] = self.spin
ase_atoms.calc = calculator
PE = ase_atoms.get_potential_energy() # np.array
if isinstance(PE, float):
PE = rdworks.units.ev2kcalpermol * PE
elif isinstance(PE, np.ndarray | list):
PE = rdworks.units.ev2kcalpermol * float(
PE[0]
) # np.float64 to float
self.props.update({"E_tot(kcal/mol)": PE})
return self
except:
raise RuntimeError("ASE calculator error")
def optimize(
self,
calculator: str | Callable = "MMFF94",
fmax: float = 0.05,
max_iter: int = 1000,
water: str | None = None,
) -> Self:
"""Optimize 3D geometry.
Args:
calculator (str | Callable): MMFF94 (= MMFF), MMFF94s, UFF, xTB or ASE calculator.
`MMFF94` or `MMFF` - Intended for general use, including organic molecules and proteins,
and primarily relies on data from quantum mechanical calculations.
It's often used in molecular dynamics simulations.
`MMFF94s` - A "static" variant of MMFF94, with adjusted parameters for out-of-plane
bending and dihedral torsions to favor planar geometries for specific nitrogen atoms.
This makes it better suited for geometry optimization studies where a static,
time-averaged structure is desired. The "s" stands for "static".
`UFF` - UFF refers to the "Universal Force Field," a force field model used for
molecular mechanics calculations. It's a tool for geometry optimization,
energy minimization, and exploring molecular conformations in 3D space.
UFF is often used to refine conformers generated by other methods,
such as random conformer generation, to produce more physically plausible
and stable structures.
fmax (float, optional): fmax for the calculator convergence. Defaults to 0.05.
max_iter (int, optional): max iterations for the calculator. Defaults to 1000.
Args for xTB calculator:
water (str, optional): water solvation model (choose 'gbsa' or 'alpb')
alpb: ALPB solvation model (Analytical Linearized Poisson-Boltzmann).
gbsa: generalized Born (GB) model with Surface Area contributions.
Defaults to None.
Returns:
Self: self
"""
if isinstance(calculator, str):
PE_start = self.singlepoint(calculator).props.get("E_tot(kcal/mol)")
PE_final = None
if calculator.lower() == "xTB".lower():
results = GFN2xTB(self.rdmol).optimize(water=water)
# results = SimpleNamespace(
# PE = datadict['total energy'] * hartree2kcalpermol,
# charges = datadict['partial charges'],
# wbo = Wiberg_bond_orders,
# geometry = rdmol_opt,
# )
try:
self.rdmol = results.geometry
PE_final = results.PE
retcode = 0
except:
retcode = 1
elif (
calculator.lower() == "MMFF94".lower()
or calculator.lower() == "MMFF".lower()
):
retcode = Chem.rdForceFieldHelpers.MMFFOptimizeMolecule(
self.rdmol, mmffVariant="MMFF94", maxIters=max_iter
)
# returns 0 if the optimization converged
elif calculator.lower() == "MMFF94s".lower():
retcode = Chem.rdForceFieldHelpers.MMFFOptimizeMolecule(
self.rdmol, mmffVariant="MMFF94s", maxIters=max_iter
)
# returns 0 if the optimization converged
elif calculator.lower() == "UFF".lower():
retcode = Chem.rdForceFieldHelpers.UFFOptimizeMolecule(
self.rdmol, maxIters=max_iter
)
# returns 0 if the optimization converged
if PE_final is None:
PE_final = self.singlepoint(calculator).props.get("E_tot(kcal/mol)")
self.props.update(
{
"E_tot_init(kcal/mol)": PE_start, # energy before optimization
"E_tot(kcal/mol)": PE_final, # energy after optimization
"Converged": retcode == 0, # True or False
}
)
return self
else:
# assuming ASE calculator
with io.StringIO() as logfile:
ase_atoms = ase.Atoms(symbols=self.symbols, positions=self.positions)
ase_atoms.info["charge"] = self.charge
ase_atoms.info["spin"] = self.spin
ase_atoms.calc = calculator
FIRE(ase_atoms, logfile=logfile).run(fmax=fmax)
lines = [
l.strip().split()[1:]
for l in logfile.getvalue().split("\n")
if l.startswith("FIRE")
]
data = [(float(e), float(f)) for (_, _, e, f) in lines]
self.props.update(
{
"E_tot_init(kcal/mol)": data[0][0]
* rdworks.units.ev2kcalpermol, # energy before optimization
"E_tot(kcal/mol)": data[-1][0]
* rdworks.units.ev2kcalpermol, # energy after optimization
"Converged": data[-1][1] < fmax, # True or False
}
)
# update atomic coordinates
return self.sync(ase_atoms.get_positions())
def calculate_torsion_energies_one(
self,
calculator: str | Callable,
indices: tuple[int],
simplify: bool = True,
fmax: float = 0.05,
interval: float = 20.0,
use_converged_only: bool = True,
water: str | None = None,
) -> Self:
"""Calculate potential energy profile for one torsion angle.
Args:
calculator (str | Callable): 'MMFF', 'UFF', 'xTB' or ASE calculator.
indices (tuple[int]): atom indices (i,j,k,l) for a torsion angle
simplify (bool, optional): whether to use fragementation. Defaults to True.
fmax (float, optional): convergence limit for optimize. Defaults to 0.05.
interval (float, optional): angle intervals. Defaults to 20.0.
use_converged_only (bool, optional): whether to use only converged data. Defaults to True.
Args for xTB calculator:
water (str, optional): water solvation model (choose 'gbsa' or 'alpb')
alpb: ALPB solvation model (Analytical Linearized Poisson-Boltzmann).
gbsa: generalized Born (GB) model with Surface Area contributions.
Defaults to None.
Returns:
Self: modified self.
"""
ref_conf = self.copy()
data = {
"indices": indices,
"angle": [],
"init": [],
"last": [],
"Converged": [],
}
if simplify:
(frag, frag_ijkl, frag_created, wbo_filtered) = create_torsion_fragment(
ref_conf.rdmol, indices
)
frag_conf = Conf(frag)
for angle in np.arange(-180.0, 180.0, interval):
# Iterated numpy.ndarray does not contain the last 180: -180., ..., (180).
conf = frag_conf.copy()
conf.set_torsion_angle(*frag_ijkl, angle) # atoms bonded to `l` move.
conf = conf.optimize(calculator, fmax=fmax, water=water)
data["angle"].append(angle)
# conf.optimize() updates coordinates and conf.props:
# `E_tot_init(kcal/mol)`, `E_tot(kcal/mol)`, `Converged`.
data["init"].append(conf.props["E_tot_init(kcal/mol)"])
data["last"].append(conf.props["E_tot(kcal/mol)"])
data["Converged"].append(conf.props["Converged"])
frag_cleaned, _ = clean_2d(frag, reset_isotope=True, remove_H=True)
# to serialize the molecule
data["frag"] = Chem.MolToMolBlock(frag_cleaned)
data["frag_indices"] = frag_ijkl
else:
for angle in np.arange(-180.0, 180.0, interval):
# Iterated numpy.ndarray does not contain the last 180: -180., ..., (180).
conf = ref_conf.copy()
conf.set_torsion_angle(*indices, angle) # atoms bonded to `l` move.
conf = conf.optimize(calculator, fmax=fmax, water=water)
data["angle"].append(angle)
# conf.optimize() updates coordinates and conf.props:
# `E_tot_init(kcal/mol)`, `E_tot(kcal/mol)`, `Converged`.
data["init"].append(conf.props["E_tot_init(kcal/mol)"])
data["last"].append(conf.props["E_tot(kcal/mol)"])
data["Converged"].append(conf.props["Converged"])
# Post-processing
if use_converged_only:
data["angle"] = list(itertools.compress(data["angle"], data["Converged"]))
data["init"] = list(itertools.compress(data["init"], data["Converged"]))
data["last"] = list(itertools.compress(data["last"], data["Converged"]))
relax = np.array(data["init"]) - np.median(data["last"])
E_rel = relax - np.min(relax)
torsion_energy_profile = {
"indices": data["indices"],
"angle": np.round(
np.array(data["angle"]), 1
).tolist(), # np.ndarray -> list for serialization
"E_rel(kcal/mol)": np.round(
E_rel, 2
).tolist(), # np.ndarray -> list for serialization
}
if simplify:
torsion_energy_profile.update(
{
"frag": data.get("frag", None),
"frag_indices": data.get("frag_indices", None),
}
)
self.props["torsion"] = torsion_energy_profile
return self
def __calculate_torsion_energies_batch(
self,
batchoptimizer: Callable,
indices: tuple,
simplify: bool = True,
fmax: float = 0.05,
interval: float = 20.0,
use_converged_only: bool = True,
batchsize_atoms: int = 16 * 1024,
) -> Self:
"""Calculate potential energy profile for torsion angles in a batch.
Args:
batchoptimizer (Callable): BatchOptimizer.
indices (tuple): atom indices (i,j,k,l) for a torsion angle
simplify (bool, optional): whether to use fragementation. Defaults to True.
fmax (float, optional): convergence limit for optimize. Defaults to 0.05.
interval (float, optional): angle intervals. Defaults to 20.0.
use_converged_only (bool, optional): whether to use only converged data. Defaults to True.
Returns:
Self: modified self.
"""
ref_conf = self.copy()
data = {
"indices": indices,
"angle": [],
"init": [],
"last": [],
"Converged": [],
}
confs = []
if simplify:
(frag, frag_ijkl, frag_created, wbo_filtered) = create_torsion_fragment(
ref_conf.rdmol, indices
)
frag_conf = Conf(frag)
for angle in np.arange(-180.0, 180.0, interval):
# Iterated numpy.ndarray does not contain the last 180: -180., ..., (180).
conf = frag_conf.copy()
conf.set_torsion_angle(*frag_ijkl, angle) # atoms bonded to `l` move.
frag_cleaned, _ = clean_2d(frag, reset_isotope=True, remove_H=True)
# to serialize the molecule
data["frag"] = Chem.MolToMolBlock(frag_cleaned)
data["frag_indices"] = frag_ijkl
data["angle"].append(angle)
confs.append(conf)
else:
for angle in np.arange(-180.0, 180.0, interval):
# Iterated numpy.ndarray does not contain the last 180: -180., ..., (180).
conf = ref_conf.copy()
conf.set_torsion_angle(*indices, angle) # atoms bonded to `l` move.
data["angle"].append(angle)
confs.append(conf)
batches = prepare_batches(confs, batchsize_atoms)
for batch in batches:
optimized = batchoptimizer(batch.rdmols).run()
for rdmol in optimized.mols:
# conf.optimize() updates coordinates and conf.props:
# `E_tot_init(kcal/mol)`, `E_tot(kcal/mol)`, `Converged`.
data["init"].append(float(rdmol.GetProp("E_tot_init(kcal/mol)")))
data["last"].append(float(rdmol.GetProp("E_tot(kcal/mol)")))
data["Converged"].append(
True if rdmol.GetProp("Converged") == "True" else False
)
# Post-processing
if use_converged_only:
data["angle"] = list(itertools.compress(data["angle"], data["Converged"]))
data["init"] = list(itertools.compress(data["init"], data["Converged"]))
data["last"] = list(itertools.compress(data["last"], data["Converged"]))
relax = np.array(data["init"]) - np.median(data["last"])
E_rel = relax - np.min(relax)
torsion_energy_profile = {
"indices": data["indices"],
"angle": np.round(
np.array(data["angle"]), 1
).tolist(), # np.ndarray -> list for serialization
"E_rel(kcal/mol)": np.round(
E_rel, 2
).tolist(), # np.ndarray -> list for serialization
}
if simplify:
torsion_energy_profile.update(
{
"frag": data.get("frag", None),
"frag_indices": data.get("frag_indices", None),
}
)
self.props["torsion"] = torsion_energy_profile
return self
def calculate_torsion_energies(
self,
calculator: str | Callable,
torsion_angle_idx: int | None = None,
simplify: bool = True,
fmax: float = 0.05,
interval: float = 20.0,
use_converged_only: bool = True,
batchsize_atoms: int = 0,
water: str | None = None,
) -> Self:
"""Calculates potential energy profiles for each torsion angle using ASE or BatchOptimizer.
Args:
calculator (str | Callable): 'MMFF', 'UFF', 'xTB', ASE calculator, or BatchOptimizer.
torsion_angle_idx (int | None): key to the torsion indices to calculate. Defaults to None (all).
simplify (bool, optional): whether to use fragment surrogate. Defaults to True.
fmax (float, optional): fmax of ASE optimizer. Defaults to 0.05.
interval (float, optional): interval of torsion angles in degree. Defaults to 15.0.
use_converged_only (bool, optional): whether to use only converged data. Defaults to True.
batchsize_atoms (int, optional): maximum number of atoms in a single batch.
Setting any number smaller than conf.natoms to disable batch optimization.
Defaults to 0.
Args for xTB calculator:
water (str, optional): water solvation model (choose 'gbsa' or 'alpb')
alpb: ALPB solvation model (Analytical Linearized Poisson-Boltzmann).
gbsa: generalized Born (GB) model with Surface Area contributions.
Defaults to None.
Returns:
Self: modified self.
"""
torsion_angle_atom_indices_list = self.get_torsion_angle_atoms()
# [(5, 4, 3, 1), ...]
if isinstance(torsion_angle_idx, int) and torsion_angle_idx < len(
torsion_angle_atom_indices_list
):
indices = {
torsion_angle_idx: torsion_angle_atom_indices_list[torsion_angle_idx]
}
# atom indices for a single torsion angle
# ex. {0: (5,4,3,1)}
else:
indices = {k: v for k, v in enumerate(torsion_angle_atom_indices_list)}
# atom indices for all torsion angles
# ex. {0: (5,4,3,1), 1: ...}
conf = self.copy()
torsion_energies = []
if batchsize_atoms >= conf.natoms:
for torsion_angle_idx, ijkl in indices.items():
conf = conf.__calculate_torsion_energies_batch(
calculator,
ijkl,
simplify,
fmax,
interval,
use_converged_only,
batchsize_atoms=batchsize_atoms,
)
conf.props["torsion"].update({"torsion_angle_idx": torsion_angle_idx})
torsion_energies.append(conf.props["torsion"])
else:
for torsion_angle_idx, ijkl in indices.items():
conf = conf.calculate_torsion_energies_one(
calculator,
ijkl,
simplify,
fmax,
interval,
use_converged_only,
water=water,
)
conf.props["torsion"].update({"torsion_angle_idx": torsion_angle_idx})
torsion_energies.append(conf.props["torsion"])
self.props["torsion"] = torsion_energies
return self
def __sp_torsion_energies(
self,
calculator: str | Callable,
indices: tuple[int],
simplify: bool = True,
interval: float = 20.0,
water: str | None = None,
) -> Self:
"""Single-Point based potential energy profile for one torsion angle.
Args:
calculator (str | Callable): 'MMFF', 'UFF', 'xTB' or ASE calculator.
indices (tuple[int]): atom indices (i,j,k,l) for a torsion angle
simplify (bool, optional): whether to use fragementation. Defaults to True.
interval (float, optional): angle intervals. Defaults to 20.0.
Args for xTB calculator:
water (str, optional): water solvation model (choose 'gbsa', 'alpb', 'cpcmx')
alpb: ALPB solvation model (Analytical Linearized Poisson-Boltzmann).
gbsa: generalized Born (GB) model with Surface Area contributions.
cpcmx: Extended Conductor-like Polarizable Continuum Solvation Model (CPCM-X).
Defaults to None.
Returns:
Self: modified self.
"""
ref_conf = self.copy() # optimized reference conformer
ref_conf = ref_conf.singlepoint(calculator, water=water)
data = {
"indices": indices,
"angle": [],
"init": [],
"last": [],
"Converged": [],
}
if simplify:
(frag, frag_ijkl, frag_created, wbo_filtered) = create_torsion_fragment(
ref_conf.rdmol, indices
)
frag_conf = Conf(frag)
for angle in np.arange(-180.0, 180.0, interval):
# Iterated numpy.ndarray does not contain the last 180: -180., ..., (180).
conf = frag_conf.copy()
conf.set_torsion_angle(*frag_ijkl, angle) # atoms bonded to `l` move.
conf = conf.singlepoint(calculator, water=water)
data["angle"].append(angle)
# conf.singlepoint() sets `E_tot(kcal/mol)`
# conf.optimize() updates coordinates and conf.props:
# `E_tot_init(kcal/mol)`, `E_tot(kcal/mol)`, `Converged`.
data["init"].append(conf.props["E_tot(kcal/mol)"])
data["last"].append(ref_conf.props["E_tot(kcal/mol)"])
frag_cleaned, _ = clean_2d(frag, reset_isotope=True, remove_H=True)
# to serialize the molecule
data["frag"] = Chem.MolToMolBlock(frag_cleaned)
data["frag_indices"] = frag_ijkl
else:
for angle in np.arange(-180.0, 180.0, interval):
# Iterated numpy.ndarray does not contain the last 180: -180., ..., (180).
conf = ref_conf.copy()
conf.set_torsion_angle(*indices, angle) # atoms bonded to `l` move.
conf = conf.singlepoint(calculator, water=water)
data["angle"].append(angle)
# conf.optimize() updates coordinates and conf.props:
# `E_tot_init(kcal/mol)`, `E_tot(kcal/mol)`, `Converged`.
data["init"].append(conf.props["E_tot(kcal/mol)"])
data["last"].append(ref_conf.props["E_tot(kcal/mol)"])
relax = np.array(data["init"]) - np.median(data["last"])
E_rel = relax - np.min(relax)
torsion_energy_profile = {
"indices": data["indices"],
"angle": np.round(
np.array(data["angle"]), 1
).tolist(), # np.ndarray -> list for serialization
"E_rel(kcal/mol)": np.round(
E_rel, 2
).tolist(), # np.ndarray -> list for serialization
}
if simplify:
torsion_energy_profile.update(
{
"frag": data.get("frag", None),
"frag_indices": data.get("frag_indices", None),
}
)
self.props["torsion"] = torsion_energy_profile
return self
def __sp_torsion_energies_batch(
self,
batch_calculator: Callable,
indices: tuple,
simplify: bool = True,
interval: float = 20.0,
water: str | None = None,
batchsize_atoms: int = 16 * 1024,
) -> Self:
"""Calculate Single-Point potential energy profile for torsion angles in a batch.
Args:
calculator (Callable): BatchSinglePoint calculator.
indices (tuple): atom indices (i,j,k,l) for a torsion angle
simplify (bool, optional): whether to use fragementation. Defaults to True.
interval (float, optional): angle intervals. Defaults to 20.0.
Args for xTB calculator:
water (str, optional): water solvation model (choose 'gbsa', 'alpb', 'cpcmx')
alpb: ALPB solvation model (Analytical Linearized Poisson-Boltzmann).
gbsa: generalized Born (GB) model with Surface Area contributions.
cpcmx: Extended Conductor-like Polarizable Continuum Solvation Model (CPCM-X).
Defaults to None.
Returns:
Self: modified self.
"""
ref_conf = self.copy() # optimized reference conformer
evaluated_batch = batch_calculator([ref_conf.rdmol]).run()
for rdmol in evaluated_batch.mols:
ref_conf.props["E_tot(kcal/mol)"] = float(rdmol.GetProp("E_tot(kcal/mol)"))
data = {
"indices": indices,
"angle": [],
"init": [],
"last": [],
"Converged": [],
}
confs = []
if simplify:
(frag, frag_ijkl, frag_created, wbo_filtered) = create_torsion_fragment(
ref_conf.rdmol, indices
)
frag_conf = Conf(frag)
for angle in np.arange(-180.0, 180.0, interval):
# Iterated numpy.ndarray does not contain the last 180: -180., ..., (180).
conf = frag_conf.copy()
conf.set_torsion_angle(*frag_ijkl, angle) # atoms bonded to `l` move.
frag_cleaned, _ = clean_2d(frag, reset_isotope=True, remove_H=True)
# to serialize the molecule
data["frag"] = Chem.MolToMolBlock(frag_cleaned)
data["frag_indices"] = frag_ijkl
data["angle"].append(angle)
confs.append(conf)
else:
for angle in np.arange(-180.0, 180.0, interval):
# Iterated numpy.ndarray does not contain the last 180: -180., ..., (180).
conf = ref_conf.copy()
conf.set_torsion_angle(*indices, angle) # atoms bonded to `l` move.
data["angle"].append(angle)
confs.append(conf)
batches = prepare_batches(confs, batchsize_atoms)
for batch in batches:
evaluated_batch = batch_calculator(batch.rdmols).run()
for rdmol in evaluated_batch.mols:
# batch single point calculator sets 'E_tot(kcal/mol)'
# batch optimizer updates coordinates and conf.props:
# `E_tot_init(kcal/mol)`, `E_tot(kcal/mol)`, `Converged`.
data["init"].append(float(rdmol.GetProp("E_tot(kcal/mol)")))
data["last"].append(ref_conf.props["E_tot(kcal/mol)"])
relax = np.array(data["init"]) - np.median(data["last"])
E_rel = relax - np.min(relax)
torsion_energy_profile = {
"indices": data["indices"],
"angle": np.round(
np.array(data["angle"]), 1
).tolist(), # np.ndarray -> list for serialization
"E_rel(kcal/mol)": np.round(
E_rel, 2
).tolist(), # np.ndarray -> list for serialization
}
if simplify:
torsion_energy_profile.update(
{
"frag": data.get("frag", None),
"frag_indices": data.get("frag_indices", None),
}
)
self.props["torsion"] = torsion_energy_profile
return self
def calculate_sp_torsion_energies(
self,
calculator: str | Callable,
torsion_angle_idx: int | None = None,
simplify: bool = True,
interval: float = 20.0,
water: str | None = None,
batchsize_atoms: int = 0,
) -> Self:
"""Calculates Single-Point potential energy profiles for each torsion angle using ASE or BatchSinglePoint.
Args:
calculator (str | Callable): 'MMFF', 'UFF', 'xTB', ASE calculator, or BatchOptimizer.
torsion_angle_idx (int | None): key to the torsion indices to calculate. Defaults to None (all).
simplify (bool, optional): whether to use fragment surrogate. Defaults to True.
fmax (float, optional): fmax of ASE optimizer. Defaults to 0.05.
interval (float, optional): interval of torsion angles in degree. Defaults to 15.0.
use_converged_only (bool, optional): whether to use only converged data. Defaults to True.
batchsize_atoms (int, optional): maximum number of atoms in a single batch.
Setting any number smaller than conf.natoms to disable batch optimization.
Defaults to 0.
Args for xTB calculator:
water (str, optional): water solvation model (choose 'gbsa' or 'alpb')
alpb: ALPB solvation model (Analytical Linearized Poisson-Boltzmann).
gbsa: generalized Born (GB) model with Surface Area contributions.
cpcmx: Extended Conductor-like Polarizable Continuum Solvation Model (CPCM-X).
Defaults to None.
Returns:
Self: modified self.
"""
torsion_angle_atom_indices_list = self.get_torsion_angle_atoms()
# [(5, 4, 3, 1), ...]
if isinstance(torsion_angle_idx, int) and torsion_angle_idx < len(
torsion_angle_atom_indices_list
):
indices = {
torsion_angle_idx: torsion_angle_atom_indices_list[torsion_angle_idx]
}
# atom indices for a single torsion angle
# ex. {0: (5,4,3,1)}
else:
indices = {k: v for k, v in enumerate(torsion_angle_atom_indices_list)}
# atom indices for all torsion angles
# ex. {0: (5,4,3,1), 1: ...}
conf = self.copy()
torsion_energies = []
if batchsize_atoms >= conf.natoms:
for torsion_angle_idx, ijkl in indices.items():
conf = conf.__sp_torsion_energies_batch(
calculator,
ijkl,
simplify,
interval,
water=water,
batchsize_atoms=batchsize_atoms,
)
conf.props["torsion"].update({"torsion_angle_idx": torsion_angle_idx})
torsion_energies.append(conf.props["torsion"])
else:
for torsion_angle_idx, ijkl in indices.items():
conf = conf.__sp_torsion_energies(
calculator, ijkl, simplify, interval, water
)
conf.props["torsion"].update({"torsion_angle_idx": torsion_angle_idx})
torsion_energies.append(conf.props["torsion"])
self.props["torsion"] = torsion_energies
return self
def dumps(self, key: str = "") -> str:
"""Returns JSON dumps of the `props`.
Args:
key (str): a key for the `props` dictionary. Defaults to '' (all).
Returns:
str: JSON dumps.
"""
if key:
return json.dumps({key: self.props[key]})
else:
return json.dumps(self.props)
def serialize(self, decimals: int = 3) -> str:
"""Serialize information necessary to rebuild a Conf object.
Args:
decimals (int, optional): number of decimal places for float data type. Defaults to 3.
Examples:
```python
serialized = conf1.serialize()
conf2 = Conf().deserialize(serialized)
assert conf1 == conf2
```
Returns:
str: serialized string for json.loads()
"""
serialized = rdworks.utils.serialize(
{
"name": self.name,
"natoms": self.natoms,
"charge": self.charge,
"spin": self.spin,
"props": rdworks.utils.recursive_round(self.props, decimals),
"molblock": self.molblock,
}
)
return serialized
def deserialize(self, serialized: str) -> Self:
"""De-serialize information and rebuild a Conf object.
Examples:
```python
serialized = conf1.serialize()
conf2 = Conf().deserialize(serialized)
assert conf1 == conf2
```
Args:
serialized (str): serialized string.
Returns:
Self: modified self.
"""
data = rdworks.utils.deserialize(serialized)
self.name = data["name"]
self.natoms = int(data["natoms"])
self.charge = int(data["charge"])
self.spin = int(data["spin"])
self.props = data["props"]
self.rdmol = Chem.MolFromMolBlock(
data["molblock"], sanitize=False, removeHs=False
)
self._update()
return self
def to_geometry(self) -> str:
"""Returns geometry input for psi4 or other quantum chemistry software.
Each line has atom symbol and its X, Y, and Z coordinates.
Unit is Angstrom.
O 0.0 0.0 0.0
H 0.757 0.586 0.0
H -0.757 0.586 0.0
Example:
import psi4
geometry = conf.to_geometry()
mol = psi4.geometry(geometry)
Returns:
str: geometry input for psi4 or other quantum chemistry software.
"""
lines = [
f"{e:5} {x:.3f} {y:.3f} {z:.3f}"
for e, (x, y, z) in zip(self.symbols, self.positions)
]
return "\n".join(lines)
def to_xyz(self) -> str:
"""Returns XYZ formatted strings.
Returns:
str: XYZ formatted strings.
"""
lines = [f"{self.natoms}", " "]
for e, (x, y, z) in zip(self.symbols, self.positions):
lines.append(f"{e:5} {x:23.14f} {y:23.14f} {z:23.14f}")
return "\n".join(lines)
def to_turbomole(self, bohr: bool = False) -> str:
"""Returns TURBOMOLE coord file formatted strings.
Turbomole coord file format:
- It starts with the keyword `$coord`.
- Each line after the $coord line specifies an atom, consisting of:
- Three real numbers representing the Cartesian coordinates (x, y, z).
- A string for the element name.
- Optional: an "f" label at the end to indicate that the atom's coordinates are frozen during optimization.
- Coordinates can be given in Bohr (default), Ångström (`$coord angs`), or fractional coordinates (`$coord frac`).
- Optional data groups like periodicity (`$periodic`), lattice parameters (`$lattice`), and cell parameters (`$cell`) can also be included.
- Regarding precision:
The precision of the coordinates is crucial for accurate calculations, especially geometry optimizations.
Tools like the TURBOMOLEOptimizer might check for differences in atomic positions with a tolerance of 1e-13.
Args:
bohr (bool): whether to use Bohr units of the coordinates. Defaults to False.
Otherwise, Angstrom units will be used.
Returns:
str: TURBOMOLE coord formatted file.
"""
if bohr:
lines = ["$coord"]
else:
lines = ["$coord angs"]
for (x, y, z), e in zip(self.positions, self.symbols):
lines.append(f"{x:20.15f} {y:20.15f} {z:20.15f} {e}")
lines.append("$end")
return "\n".join(lines)
def to_sdf(self, props: bool = True) -> str:
"""Returns the SDF-formatted strings.
Args:
props (bool, optional): include `props as SDF properties. Defaults to True.
Returns:
str: strings in the SDF format.
"""
in_memory = io.StringIO()
with Chem.SDWriter(in_memory) as f:
rdmol = Chem.Mol(self.rdmol)
rdmol.SetProp("_Name", self.name)
if props:
for k, v in self.props.items():
rdmol.SetProp(k, str(v))
f.write(rdmol)
return in_memory.getvalue()
def to_png(
self,
width: int = 300,
height: int = 300,
legend: str = "",
atom_index: bool = False,
highlight_atoms: list[int] | None = None,
highlight_bonds: list[int] | None = None,
redraw: bool = False,
coordgen: bool = False,
trim: bool = True,
) -> Image.Image:
"""Draw 2D molecule in PNG format.
Args:
width (int, optional): width. Defaults to 300.
height (int, optional): height. Defaults to 300.
legend (str, optional): legend. Defaults to ''.
atom_index (bool, optional): whether to show atom index. Defaults to False.
highlight_atoms (list[int] | None, optional): atom(s) to highlight. Defaults to None.
highlight_bonds (list[int] | None, optional): bond(s) to highlight. Defaults to None.
redraw (bool, optional): whether to redraw. Defaults to False.
coordgen (bool, optional): whether to use coordgen. Defaults to False.
trim (bool, optional): whether to trim white margins. Default to True.
Returns:
Image.Image: output PIL Image object.
"""
return render_png(
self.rdmol,
width=width,
height=height,
legend=legend,
atom_index=atom_index,
highlight_atoms=highlight_atoms,
highlight_bonds=highlight_bonds,
redraw=redraw,
coordgen=coordgen,
trim=trim,
)
def to_svg(
self,
width: int = 300,
height: int = 300,
legend: str = "",
atom_index: bool = False,
highlight_atoms: list[int] | None = None,
highlight_bonds: list[int] | None = None,
redraw: bool = False,
coordgen: bool = False,
optimize: bool = True,
) -> str:
"""Draw 2D molecule in SVG format.
Examples:
For Jupyternotebook, wrap the output with SVG:
>>> from IPython.display import SVG
>>> SVG(libr[0].to_svg())
Args:
width (int, optional): width. Defaults to 300.
height (int, optional): height. Defaults to 300.
legend (str, optional): legend. Defaults to ''.
atom_index (bool, optional): whether to show atom index. Defaults to False.
highlight_atoms (list[int] | None, optional): atom(s) to highlight. Defaults to None.
highlight_bonds (list[int] | None, optional): bond(s) to highlight. Defaults to None.
redraw (bool, optional): whether to redraw. Defaults to False.
coordgen (bool, optional): whether to use coordgen. Defaults to False.
optimize (bool, optional): whether to optimize SVG string. Defaults to True.
Returns:
str: SVG string
"""
return render_svg(
self.rdmol,
width=width,
height=height,
legend=legend,
atom_index=atom_index,
highlight_atoms=highlight_atoms,
highlight_bonds=highlight_bonds,
redraw=redraw,
coordgen=coordgen,
optimize=optimize,
)
Attributes¶
COG
property
¶
Returns the center of geometry (COG).
Returns:
-
array–np.array: the center of geometry.
Rg
property
¶
Returns the radius of gyration (Rg).
Returns:
-
float(float) –the radius of gyration.
SASA
property
¶
Calculate Solvent Accessible Surface Area (total, polar, apolar).
Returns:
-
dict–tuple[float, float, float]: (total, polar, apolar)
charge = 0
instance-attribute
¶
molblock = ''
instance-attribute
¶
name = name
instance-attribute
¶
natoms = 0
instance-attribute
¶
numbers = []
instance-attribute
¶
positions = np.array([])
instance-attribute
¶
props = {}
instance-attribute
¶
rdmol = molecule
instance-attribute
¶
smiles = ''
instance-attribute
¶
spin = 1
instance-attribute
¶
symbols = []
instance-attribute
¶
Functions¶
calculate_sp_torsion_energies(calculator, torsion_angle_idx=None, simplify=True, interval=20.0, water=None, batchsize_atoms=0)
¶
Calculates Single-Point potential energy profiles for each torsion angle using ASE or BatchSinglePoint.
Parameters:
-
calculator(str | Callable) –'MMFF', 'UFF', 'xTB', ASE calculator, or BatchOptimizer.
-
torsion_angle_idx(int | None, default:None) –key to the torsion indices to calculate. Defaults to None (all).
-
simplify(bool, default:True) –whether to use fragment surrogate. Defaults to True.
-
fmax(float) –fmax of ASE optimizer. Defaults to 0.05.
-
interval(float, default:20.0) –interval of torsion angles in degree. Defaults to 15.0.
-
use_converged_only(bool) –whether to use only converged data. Defaults to True.
-
batchsize_atoms(int, default:0) –maximum number of atoms in a single batch. Setting any number smaller than conf.natoms to disable batch optimization. Defaults to 0.
Args for xTB calculator
water (str, optional): water solvation model (choose 'gbsa' or 'alpb') alpb: ALPB solvation model (Analytical Linearized Poisson-Boltzmann). gbsa: generalized Born (GB) model with Surface Area contributions. cpcmx: Extended Conductor-like Polarizable Continuum Solvation Model (CPCM-X). Defaults to None.
Returns:
-
Self(Self) –modified self.
Source code in src/rdworks/conf.py
def calculate_sp_torsion_energies(
self,
calculator: str | Callable,
torsion_angle_idx: int | None = None,
simplify: bool = True,
interval: float = 20.0,
water: str | None = None,
batchsize_atoms: int = 0,
) -> Self:
"""Calculates Single-Point potential energy profiles for each torsion angle using ASE or BatchSinglePoint.
Args:
calculator (str | Callable): 'MMFF', 'UFF', 'xTB', ASE calculator, or BatchOptimizer.
torsion_angle_idx (int | None): key to the torsion indices to calculate. Defaults to None (all).
simplify (bool, optional): whether to use fragment surrogate. Defaults to True.
fmax (float, optional): fmax of ASE optimizer. Defaults to 0.05.
interval (float, optional): interval of torsion angles in degree. Defaults to 15.0.
use_converged_only (bool, optional): whether to use only converged data. Defaults to True.
batchsize_atoms (int, optional): maximum number of atoms in a single batch.
Setting any number smaller than conf.natoms to disable batch optimization.
Defaults to 0.
Args for xTB calculator:
water (str, optional): water solvation model (choose 'gbsa' or 'alpb')
alpb: ALPB solvation model (Analytical Linearized Poisson-Boltzmann).
gbsa: generalized Born (GB) model with Surface Area contributions.
cpcmx: Extended Conductor-like Polarizable Continuum Solvation Model (CPCM-X).
Defaults to None.
Returns:
Self: modified self.
"""
torsion_angle_atom_indices_list = self.get_torsion_angle_atoms()
# [(5, 4, 3, 1), ...]
if isinstance(torsion_angle_idx, int) and torsion_angle_idx < len(
torsion_angle_atom_indices_list
):
indices = {
torsion_angle_idx: torsion_angle_atom_indices_list[torsion_angle_idx]
}
# atom indices for a single torsion angle
# ex. {0: (5,4,3,1)}
else:
indices = {k: v for k, v in enumerate(torsion_angle_atom_indices_list)}
# atom indices for all torsion angles
# ex. {0: (5,4,3,1), 1: ...}
conf = self.copy()
torsion_energies = []
if batchsize_atoms >= conf.natoms:
for torsion_angle_idx, ijkl in indices.items():
conf = conf.__sp_torsion_energies_batch(
calculator,
ijkl,
simplify,
interval,
water=water,
batchsize_atoms=batchsize_atoms,
)
conf.props["torsion"].update({"torsion_angle_idx": torsion_angle_idx})
torsion_energies.append(conf.props["torsion"])
else:
for torsion_angle_idx, ijkl in indices.items():
conf = conf.__sp_torsion_energies(
calculator, ijkl, simplify, interval, water
)
conf.props["torsion"].update({"torsion_angle_idx": torsion_angle_idx})
torsion_energies.append(conf.props["torsion"])
self.props["torsion"] = torsion_energies
return self
calculate_torsion_energies(calculator, torsion_angle_idx=None, simplify=True, fmax=0.05, interval=20.0, use_converged_only=True, batchsize_atoms=0, water=None)
¶
Calculates potential energy profiles for each torsion angle using ASE or BatchOptimizer.
Parameters:
-
calculator(str | Callable) –'MMFF', 'UFF', 'xTB', ASE calculator, or BatchOptimizer.
-
torsion_angle_idx(int | None, default:None) –key to the torsion indices to calculate. Defaults to None (all).
-
simplify(bool, default:True) –whether to use fragment surrogate. Defaults to True.
-
fmax(float, default:0.05) –fmax of ASE optimizer. Defaults to 0.05.
-
interval(float, default:20.0) –interval of torsion angles in degree. Defaults to 15.0.
-
use_converged_only(bool, default:True) –whether to use only converged data. Defaults to True.
-
batchsize_atoms(int, default:0) –maximum number of atoms in a single batch. Setting any number smaller than conf.natoms to disable batch optimization. Defaults to 0.
Args for xTB calculator
water (str, optional): water solvation model (choose 'gbsa' or 'alpb') alpb: ALPB solvation model (Analytical Linearized Poisson-Boltzmann). gbsa: generalized Born (GB) model with Surface Area contributions. Defaults to None.
Returns:
-
Self(Self) –modified self.
Source code in src/rdworks/conf.py
def calculate_torsion_energies(
self,
calculator: str | Callable,
torsion_angle_idx: int | None = None,
simplify: bool = True,
fmax: float = 0.05,
interval: float = 20.0,
use_converged_only: bool = True,
batchsize_atoms: int = 0,
water: str | None = None,
) -> Self:
"""Calculates potential energy profiles for each torsion angle using ASE or BatchOptimizer.
Args:
calculator (str | Callable): 'MMFF', 'UFF', 'xTB', ASE calculator, or BatchOptimizer.
torsion_angle_idx (int | None): key to the torsion indices to calculate. Defaults to None (all).
simplify (bool, optional): whether to use fragment surrogate. Defaults to True.
fmax (float, optional): fmax of ASE optimizer. Defaults to 0.05.
interval (float, optional): interval of torsion angles in degree. Defaults to 15.0.
use_converged_only (bool, optional): whether to use only converged data. Defaults to True.
batchsize_atoms (int, optional): maximum number of atoms in a single batch.
Setting any number smaller than conf.natoms to disable batch optimization.
Defaults to 0.
Args for xTB calculator:
water (str, optional): water solvation model (choose 'gbsa' or 'alpb')
alpb: ALPB solvation model (Analytical Linearized Poisson-Boltzmann).
gbsa: generalized Born (GB) model with Surface Area contributions.
Defaults to None.
Returns:
Self: modified self.
"""
torsion_angle_atom_indices_list = self.get_torsion_angle_atoms()
# [(5, 4, 3, 1), ...]
if isinstance(torsion_angle_idx, int) and torsion_angle_idx < len(
torsion_angle_atom_indices_list
):
indices = {
torsion_angle_idx: torsion_angle_atom_indices_list[torsion_angle_idx]
}
# atom indices for a single torsion angle
# ex. {0: (5,4,3,1)}
else:
indices = {k: v for k, v in enumerate(torsion_angle_atom_indices_list)}
# atom indices for all torsion angles
# ex. {0: (5,4,3,1), 1: ...}
conf = self.copy()
torsion_energies = []
if batchsize_atoms >= conf.natoms:
for torsion_angle_idx, ijkl in indices.items():
conf = conf.__calculate_torsion_energies_batch(
calculator,
ijkl,
simplify,
fmax,
interval,
use_converged_only,
batchsize_atoms=batchsize_atoms,
)
conf.props["torsion"].update({"torsion_angle_idx": torsion_angle_idx})
torsion_energies.append(conf.props["torsion"])
else:
for torsion_angle_idx, ijkl in indices.items():
conf = conf.calculate_torsion_energies_one(
calculator,
ijkl,
simplify,
fmax,
interval,
use_converged_only,
water=water,
)
conf.props["torsion"].update({"torsion_angle_idx": torsion_angle_idx})
torsion_energies.append(conf.props["torsion"])
self.props["torsion"] = torsion_energies
return self
calculate_torsion_energies_one(calculator, indices, simplify=True, fmax=0.05, interval=20.0, use_converged_only=True, water=None)
¶
Calculate potential energy profile for one torsion angle.
Parameters:
-
calculator(str | Callable) –'MMFF', 'UFF', 'xTB' or ASE calculator.
-
indices(tuple[int]) –atom indices (i,j,k,l) for a torsion angle
-
simplify(bool, default:True) –whether to use fragementation. Defaults to True.
-
fmax(float, default:0.05) –convergence limit for optimize. Defaults to 0.05.
-
interval(float, default:20.0) –angle intervals. Defaults to 20.0.
-
use_converged_only(bool, default:True) –whether to use only converged data. Defaults to True.
Args for xTB calculator
water (str, optional): water solvation model (choose 'gbsa' or 'alpb') alpb: ALPB solvation model (Analytical Linearized Poisson-Boltzmann). gbsa: generalized Born (GB) model with Surface Area contributions. Defaults to None.
Returns:
-
Self(Self) –modified self.
Source code in src/rdworks/conf.py
def calculate_torsion_energies_one(
self,
calculator: str | Callable,
indices: tuple[int],
simplify: bool = True,
fmax: float = 0.05,
interval: float = 20.0,
use_converged_only: bool = True,
water: str | None = None,
) -> Self:
"""Calculate potential energy profile for one torsion angle.
Args:
calculator (str | Callable): 'MMFF', 'UFF', 'xTB' or ASE calculator.
indices (tuple[int]): atom indices (i,j,k,l) for a torsion angle
simplify (bool, optional): whether to use fragementation. Defaults to True.
fmax (float, optional): convergence limit for optimize. Defaults to 0.05.
interval (float, optional): angle intervals. Defaults to 20.0.
use_converged_only (bool, optional): whether to use only converged data. Defaults to True.
Args for xTB calculator:
water (str, optional): water solvation model (choose 'gbsa' or 'alpb')
alpb: ALPB solvation model (Analytical Linearized Poisson-Boltzmann).
gbsa: generalized Born (GB) model with Surface Area contributions.
Defaults to None.
Returns:
Self: modified self.
"""
ref_conf = self.copy()
data = {
"indices": indices,
"angle": [],
"init": [],
"last": [],
"Converged": [],
}
if simplify:
(frag, frag_ijkl, frag_created, wbo_filtered) = create_torsion_fragment(
ref_conf.rdmol, indices
)
frag_conf = Conf(frag)
for angle in np.arange(-180.0, 180.0, interval):
# Iterated numpy.ndarray does not contain the last 180: -180., ..., (180).
conf = frag_conf.copy()
conf.set_torsion_angle(*frag_ijkl, angle) # atoms bonded to `l` move.
conf = conf.optimize(calculator, fmax=fmax, water=water)
data["angle"].append(angle)
# conf.optimize() updates coordinates and conf.props:
# `E_tot_init(kcal/mol)`, `E_tot(kcal/mol)`, `Converged`.
data["init"].append(conf.props["E_tot_init(kcal/mol)"])
data["last"].append(conf.props["E_tot(kcal/mol)"])
data["Converged"].append(conf.props["Converged"])
frag_cleaned, _ = clean_2d(frag, reset_isotope=True, remove_H=True)
# to serialize the molecule
data["frag"] = Chem.MolToMolBlock(frag_cleaned)
data["frag_indices"] = frag_ijkl
else:
for angle in np.arange(-180.0, 180.0, interval):
# Iterated numpy.ndarray does not contain the last 180: -180., ..., (180).
conf = ref_conf.copy()
conf.set_torsion_angle(*indices, angle) # atoms bonded to `l` move.
conf = conf.optimize(calculator, fmax=fmax, water=water)
data["angle"].append(angle)
# conf.optimize() updates coordinates and conf.props:
# `E_tot_init(kcal/mol)`, `E_tot(kcal/mol)`, `Converged`.
data["init"].append(conf.props["E_tot_init(kcal/mol)"])
data["last"].append(conf.props["E_tot(kcal/mol)"])
data["Converged"].append(conf.props["Converged"])
# Post-processing
if use_converged_only:
data["angle"] = list(itertools.compress(data["angle"], data["Converged"]))
data["init"] = list(itertools.compress(data["init"], data["Converged"]))
data["last"] = list(itertools.compress(data["last"], data["Converged"]))
relax = np.array(data["init"]) - np.median(data["last"])
E_rel = relax - np.min(relax)
torsion_energy_profile = {
"indices": data["indices"],
"angle": np.round(
np.array(data["angle"]), 1
).tolist(), # np.ndarray -> list for serialization
"E_rel(kcal/mol)": np.round(
E_rel, 2
).tolist(), # np.ndarray -> list for serialization
}
if simplify:
torsion_energy_profile.update(
{
"frag": data.get("frag", None),
"frag_indices": data.get("frag_indices", None),
}
)
self.props["torsion"] = torsion_energy_profile
return self
copy()
¶
deprotonate(atom_indices)
¶
Deprotonate given non-hydrogen atoms.
Parameters:
-
atom_indices(list[int]) –atom indices of non-hydrogen atoms to deprotonate.
Returns:
-
Self(Self) –self.
Source code in src/rdworks/conf.py
def deprotonate(self, atom_indices: list[int]) -> Self:
"""Deprotonate given non-hydrogen atoms.
Args:
atom_indices (list[int]): atom indices of non-hydrogen atoms to deprotonate.
Returns:
Self: self.
"""
for idx in atom_indices:
bonded_H_idx = None
atom = self.rdmol.GetAtomWithIdx(idx)
h = atom.GetNumExplicitHs()
if h - 1 >= 0:
atom.SetNumExplicitHs(h - 1) # (h-1) must be unsigned int
c = atom.GetFormalCharge()
atom.SetFormalCharge(c - 1)
neighbors = atom.GetNeighbors()
for neighbor in neighbors:
if neighbor.GetAtomicNum() == 1:
bonded_H_idx = neighbor.GetIdx()
break
if bonded_H_idx is not None:
edit_mol = Chem.EditableMol(self.rdmol)
edit_mol.RemoveAtom(bonded_H_idx)
self.rdmol = edit_mol.GetMol()
Chem.SanitizeMol(self.rdmol)
self._update()
return self
deserialize(serialized)
¶
De-serialize information and rebuild a Conf object.
Examples:
Args: serialized (str): serialized string.
Returns:
-
Self(Self) –modified self.
Source code in src/rdworks/conf.py
def deserialize(self, serialized: str) -> Self:
"""De-serialize information and rebuild a Conf object.
Examples:
```python
serialized = conf1.serialize()
conf2 = Conf().deserialize(serialized)
assert conf1 == conf2
```
Args:
serialized (str): serialized string.
Returns:
Self: modified self.
"""
data = rdworks.utils.deserialize(serialized)
self.name = data["name"]
self.natoms = int(data["natoms"])
self.charge = int(data["charge"])
self.spin = int(data["spin"])
self.props = data["props"]
self.rdmol = Chem.MolFromMolBlock(
data["molblock"], sanitize=False, removeHs=False
)
self._update()
return self
dumps(key='')
¶
Returns JSON dumps of the props.
Parameters:
-
key(str, default:'') –a key for the
propsdictionary. Defaults to '' (all).
Returns:
-
str(str) –JSON dumps.
Source code in src/rdworks/conf.py
get_torsion_angle(i, j, k, l)
¶
Get dihedral angle (i-j-k-l) in degrees.
Parameters:
-
i(int) –atom index
-
j(int) –atom index
-
k(int) –atom index
-
l(int) –atom index
Returns:
-
float(float) –dihedral angle in degrees.
Source code in src/rdworks/conf.py
def get_torsion_angle(self, i: int, j: int, k: int, l: int) -> float:
"""Get dihedral angle (i-j-k-l) in degrees.
Args:
i (int): atom index
j (int): atom index
k (int): atom index
l (int): atom index
Returns:
float: dihedral angle in degrees.
"""
degree = rdMolTransforms.GetDihedralDeg(self.rdmol.GetConformer(), i, j, k, l)
return degree
get_torsion_angle_atoms(strict=True)
¶
Determine torsion/dihedral angle atoms (i-j-k-l) and rotating group for each rotatable bond (j-k).
Parameters:
-
strict(bool, default:True) –whether to exclude amide/imide/ester/acid bonds.
Returns:
-
list[tuple]–[(i, j, k, l), ...]
Source code in src/rdworks/conf.py
def get_torsion_angle_atoms(self, strict: bool = True) -> list[tuple]:
"""Determine torsion/dihedral angle atoms (i-j-k-l) and rotating group for each rotatable bond (j-k).
Args:
strict (bool): whether to exclude amide/imide/ester/acid bonds.
Returns:
[(i, j, k, l), ...]
"""
return [d[:4] for d in get_torsion_angle_atom_indices(self.rdmol, strict)]
has_acceptable_bond_lengths(tolerance=0.25)
¶
Check bond length.
Parameters:
-
tolerance(float, default:0.25) –tolerance from the sum of van der Waals radii of bonded atoms. Defaults to 0.25 (A).
Returns:
-
bool(bool) –True if all bond lengths are accceptable.
Source code in src/rdworks/conf.py
def has_acceptable_bond_lengths(self, tolerance: float = 0.25) -> bool:
"""Check bond length.
Args:
tolerance (float, optional): tolerance from the sum of
van der Waals radii of bonded atoms. Defaults to 0.25 (A).
Returns:
bool: True if all bond lengths are accceptable.
"""
pt = Chem.GetPeriodicTable()
for bond in self.rdmol.GetBonds():
idx1 = bond.GetBeginAtomIdx()
idx2 = bond.GetEndAtomIdx()
nuc1 = self.rdmol.GetAtomWithIdx(idx1).GetAtomicNum()
nuc2 = self.rdmol.GetAtomWithIdx(idx2).GetAtomicNum()
sum_radii = pt.GetRvdw(nuc1) + pt.GetRvdw(nuc2) # (A)
# from mendeleev import element
# sum_radii = (element(nuc1).vdw_radius + element(nuc2).vdw_radius) * pm2angstrom
bond_length = rdMolTransforms.GetBondLength(
self.rdmol.GetConformer(), idx1, idx2
)
if abs(bond_length - sum_radii) > tolerance:
return False
return True
optimize(calculator='MMFF94', fmax=0.05, max_iter=1000, water=None)
¶
Optimize 3D geometry.
Parameters:
-
calculator(str | Callable, default:'MMFF94') –MMFF94 (= MMFF), MMFF94s, UFF, xTB or ASE calculator.
MMFF94orMMFF- Intended for general use, including organic molecules and proteins, and primarily relies on data from quantum mechanical calculations. It's often used in molecular dynamics simulations.MMFF94s- A "static" variant of MMFF94, with adjusted parameters for out-of-plane bending and dihedral torsions to favor planar geometries for specific nitrogen atoms. This makes it better suited for geometry optimization studies where a static, time-averaged structure is desired. The "s" stands for "static".UFF- UFF refers to the "Universal Force Field," a force field model used for molecular mechanics calculations. It's a tool for geometry optimization, energy minimization, and exploring molecular conformations in 3D space. UFF is often used to refine conformers generated by other methods, such as random conformer generation, to produce more physically plausible and stable structures. -
fmax(float, default:0.05) –fmax for the calculator convergence. Defaults to 0.05.
-
max_iter(int, default:1000) –max iterations for the calculator. Defaults to 1000.
Args for xTB calculator
water (str, optional): water solvation model (choose 'gbsa' or 'alpb') alpb: ALPB solvation model (Analytical Linearized Poisson-Boltzmann). gbsa: generalized Born (GB) model with Surface Area contributions. Defaults to None.
Returns:
-
Self(Self) –self
Source code in src/rdworks/conf.py
def optimize(
self,
calculator: str | Callable = "MMFF94",
fmax: float = 0.05,
max_iter: int = 1000,
water: str | None = None,
) -> Self:
"""Optimize 3D geometry.
Args:
calculator (str | Callable): MMFF94 (= MMFF), MMFF94s, UFF, xTB or ASE calculator.
`MMFF94` or `MMFF` - Intended for general use, including organic molecules and proteins,
and primarily relies on data from quantum mechanical calculations.
It's often used in molecular dynamics simulations.
`MMFF94s` - A "static" variant of MMFF94, with adjusted parameters for out-of-plane
bending and dihedral torsions to favor planar geometries for specific nitrogen atoms.
This makes it better suited for geometry optimization studies where a static,
time-averaged structure is desired. The "s" stands for "static".
`UFF` - UFF refers to the "Universal Force Field," a force field model used for
molecular mechanics calculations. It's a tool for geometry optimization,
energy minimization, and exploring molecular conformations in 3D space.
UFF is often used to refine conformers generated by other methods,
such as random conformer generation, to produce more physically plausible
and stable structures.
fmax (float, optional): fmax for the calculator convergence. Defaults to 0.05.
max_iter (int, optional): max iterations for the calculator. Defaults to 1000.
Args for xTB calculator:
water (str, optional): water solvation model (choose 'gbsa' or 'alpb')
alpb: ALPB solvation model (Analytical Linearized Poisson-Boltzmann).
gbsa: generalized Born (GB) model with Surface Area contributions.
Defaults to None.
Returns:
Self: self
"""
if isinstance(calculator, str):
PE_start = self.singlepoint(calculator).props.get("E_tot(kcal/mol)")
PE_final = None
if calculator.lower() == "xTB".lower():
results = GFN2xTB(self.rdmol).optimize(water=water)
# results = SimpleNamespace(
# PE = datadict['total energy'] * hartree2kcalpermol,
# charges = datadict['partial charges'],
# wbo = Wiberg_bond_orders,
# geometry = rdmol_opt,
# )
try:
self.rdmol = results.geometry
PE_final = results.PE
retcode = 0
except:
retcode = 1
elif (
calculator.lower() == "MMFF94".lower()
or calculator.lower() == "MMFF".lower()
):
retcode = Chem.rdForceFieldHelpers.MMFFOptimizeMolecule(
self.rdmol, mmffVariant="MMFF94", maxIters=max_iter
)
# returns 0 if the optimization converged
elif calculator.lower() == "MMFF94s".lower():
retcode = Chem.rdForceFieldHelpers.MMFFOptimizeMolecule(
self.rdmol, mmffVariant="MMFF94s", maxIters=max_iter
)
# returns 0 if the optimization converged
elif calculator.lower() == "UFF".lower():
retcode = Chem.rdForceFieldHelpers.UFFOptimizeMolecule(
self.rdmol, maxIters=max_iter
)
# returns 0 if the optimization converged
if PE_final is None:
PE_final = self.singlepoint(calculator).props.get("E_tot(kcal/mol)")
self.props.update(
{
"E_tot_init(kcal/mol)": PE_start, # energy before optimization
"E_tot(kcal/mol)": PE_final, # energy after optimization
"Converged": retcode == 0, # True or False
}
)
return self
else:
# assuming ASE calculator
with io.StringIO() as logfile:
ase_atoms = ase.Atoms(symbols=self.symbols, positions=self.positions)
ase_atoms.info["charge"] = self.charge
ase_atoms.info["spin"] = self.spin
ase_atoms.calc = calculator
FIRE(ase_atoms, logfile=logfile).run(fmax=fmax)
lines = [
l.strip().split()[1:]
for l in logfile.getvalue().split("\n")
if l.startswith("FIRE")
]
data = [(float(e), float(f)) for (_, _, e, f) in lines]
self.props.update(
{
"E_tot_init(kcal/mol)": data[0][0]
* rdworks.units.ev2kcalpermol, # energy before optimization
"E_tot(kcal/mol)": data[-1][0]
* rdworks.units.ev2kcalpermol, # energy after optimization
"Converged": data[-1][1] < fmax, # True or False
}
)
# update atomic coordinates
return self.sync(ase_atoms.get_positions())
protonate(atom_indices)
¶
Protonate given non-hydrogen atoms.
Parameters:
-
atom_indices(list[int]) –atom indices of non-hydrogen atoms to protonate.
Returns:
-
Self(Self) –self.
Source code in src/rdworks/conf.py
def protonate(self, atom_indices: list[int]) -> Self:
"""Protonate given non-hydrogen atoms.
Args:
atom_indices (list[int]): atom indices of non-hydrogen atoms to protonate.
Returns:
Self: self.
"""
for idx in atom_indices:
atom = self.rdmol.GetAtomWithIdx(idx)
h = atom.GetNumExplicitHs()
c = atom.GetFormalCharge()
atom.SetNumExplicitHs(h + 1)
atom.SetFormalCharge(c + 1)
Chem.SanitizeMol(self.rdmol)
self.rdmol = Chem.AddHs(self.rdmol, addCoords=True)
# The Chem.AddHs function in RDKit returns a new Mol object with hydrogens added to the molecule.
# It modifies the input molecule by adding hydrogens,
# but the original molecule remains unchanged.
self._update()
return self
rename(name)
¶
Rename and returns self.
Parameters:
-
name(str) –a new name for conformers.
Raises:
-
ValueError–if
nameis not given.
Returns:
-
Self(Self) –rdworks.Confobject.
Source code in src/rdworks/conf.py
def rename(self, name: str) -> Self:
"""Rename and returns self.
Args:
name (str): a new name for conformers.
Raises:
ValueError: if `name` is not given.
Returns:
Self: `rdworks.Conf` object.
"""
if not name:
raise ValueError("rdworks.Conf.rename() expects a name")
self.name = name
self.rdmol.SetProp("_Name", name)
return self
serialize(decimals=3)
¶
Serialize information necessary to rebuild a Conf object.
Parameters:
-
decimals(int, default:3) –number of decimal places for float data type. Defaults to 3.
Examples:
Returns:
-
str(str) –serialized string for json.loads()
Source code in src/rdworks/conf.py
def serialize(self, decimals: int = 3) -> str:
"""Serialize information necessary to rebuild a Conf object.
Args:
decimals (int, optional): number of decimal places for float data type. Defaults to 3.
Examples:
```python
serialized = conf1.serialize()
conf2 = Conf().deserialize(serialized)
assert conf1 == conf2
```
Returns:
str: serialized string for json.loads()
"""
serialized = rdworks.utils.serialize(
{
"name": self.name,
"natoms": self.natoms,
"charge": self.charge,
"spin": self.spin,
"props": rdworks.utils.recursive_round(self.props, decimals),
"molblock": self.molblock,
}
)
return serialized
set_torsion_angle(i, j, k, l, degree)
¶
Set dihedral angle (i-j-k-l) in degrees.
Parameters:
-
i(int) –atom index
-
j(int) –atom index
-
k(int) –atom index
-
l(int) –atom index
-
degree(float) –dihedral angle in degrees
Returns:
-
Self(Self) –modified Conf object
Source code in src/rdworks/conf.py
def set_torsion_angle(self, i: int, j: int, k: int, l: int, degree: float) -> Self:
"""Set dihedral angle (i-j-k-l) in degrees.
Args:
i (int): atom index
j (int): atom index
k (int): atom index
l (int): atom index
degree (float): dihedral angle in degrees
Returns:
Self: modified Conf object
"""
rdMolTransforms.SetDihedralDeg(self.rdmol.GetConformer(), i, j, k, l, degree)
return self
singlepoint(calculator='MMFF94', water=None)
¶
Get potential energy and set E_tot(kcal/mol) in the self.props.
Parameters:
-
calculator(str | Callable, default:'MMFF94') –MMFF94 (= MMFF), MMFF94s, UFF, xTB, or ASE calculator.
MMFF94orMMFF- Intended for general use, including organic molecules and proteins, and primarily relies on data from quantum mechanical calculations. It's often used in molecular dynamics simulations.MMFF94s- A "static" variant of MMFF94, with adjusted parameters for out-of-plane bending and dihedral torsions to favor planar geometries for specific nitrogen atoms. This makes it better suited for geometry optimization studies where a static, time-averaged structure is desired. The "s" stands for "static".UFF- UFF refers to the "Universal Force Field," a force field model used for molecular mechanics calculations. It's a tool for geometry optimization, energy minimization, and exploring molecular conformations in 3D space. UFF is often used to refine conformers generated by other methods, such as random conformer generation, to produce more physically plausible and stable structures. 'xTB' - GFN2-xTB
Args for xTB
water (str, optional): water solvation model (choose 'gbsa' or 'alpb') alpb: ALPB solvation model (Analytical Linearized Poisson-Boltzmann). gbsa: generalized Born (GB) model with Surface Area contributions. Defaults to None.
Returns:
-
Self–float | None: potential energy in kcal/mol or None.
Source code in src/rdworks/conf.py
def singlepoint(
self, calculator: str | Callable = "MMFF94", water: str | None = None
) -> Self:
"""Get potential energy and set `E_tot(kcal/mol)` in the self.props.
Args:
calculator (str | Callable): MMFF94 (= MMFF), MMFF94s, UFF, xTB, or ASE calculator.
`MMFF94` or `MMFF` - Intended for general use, including organic molecules and proteins,
and primarily relies on data from quantum mechanical calculations.
It's often used in molecular dynamics simulations.
`MMFF94s` - A "static" variant of MMFF94, with adjusted parameters for out-of-plane
bending and dihedral torsions to favor planar geometries for specific nitrogen atoms.
This makes it better suited for geometry optimization studies where a static,
time-averaged structure is desired. The "s" stands for "static".
`UFF` - UFF refers to the "Universal Force Field," a force field model used for
molecular mechanics calculations. It's a tool for geometry optimization,
energy minimization, and exploring molecular conformations in 3D space.
UFF is often used to refine conformers generated by other methods,
such as random conformer generation, to produce more physically plausible
and stable structures.
'xTB' - GFN2-xTB
Args for xTB:
water (str, optional): water solvation model (choose 'gbsa' or 'alpb')
alpb: ALPB solvation model (Analytical Linearized Poisson-Boltzmann).
gbsa: generalized Born (GB) model with Surface Area contributions.
Defaults to None.
Returns:
float | None: potential energy in kcal/mol or None.
"""
if isinstance(calculator, str):
if calculator.lower() == "xTB".lower():
results = GFN2xTB(self.rdmol).singlepoint(water=water)
# results = SimpleNamespace(
# PE = datadict['total energy'] * hartree2kcalpermol,
# Gsolv = Gsolv,
# charges = datadict['partial charges'],
# wbo = Wiberg_bond_orders,
# )
PE = results.PE
elif (
calculator.lower() == "MMFF94".lower()
or calculator.lower() == "MMFF".lower()
):
mp = Chem.rdForceFieldHelpers.MMFFGetMoleculeProperties(
self.rdmol, mmffVariant="MMFF94"
)
ff = Chem.rdForceFieldHelpers.MMFFGetMoleculeForceField(self.rdmol, mp)
PE = ff.CalcEnergy()
elif calculator.lower() == "MMFF94s".lower():
mp = Chem.rdForceFieldHelpers.MMFFGetMoleculeProperties(
self.rdmol, mmffVariant="MMFF94s"
)
ff = Chem.rdForceFieldHelpers.MMFFGetMoleculeForceField(self.rdmol, mp)
PE = ff.CalcEnergy()
elif calculator.lower() == "UFF".lower():
ff = Chem.rdForceFieldHelpers.UFFGetMoleculeForceField(self.rdmol)
PE = ff.CalcEnergy()
else:
raise ValueError("Unsupported calculator")
self.props.update({"E_tot(kcal/mol)": PE})
return self
else:
try:
ase_atoms = ase.Atoms(symbols=self.symbols, positions=self.positions)
ase_atoms.info["charge"] = self.charge
ase_atoms.info["spin"] = self.spin
ase_atoms.calc = calculator
PE = ase_atoms.get_potential_energy() # np.array
if isinstance(PE, float):
PE = rdworks.units.ev2kcalpermol * PE
elif isinstance(PE, np.ndarray | list):
PE = rdworks.units.ev2kcalpermol * float(
PE[0]
) # np.float64 to float
self.props.update({"E_tot(kcal/mol)": PE})
return self
except:
raise RuntimeError("ASE calculator error")
sync(coord)
¶
Synchronize the conformer coordinates with the provided coord.
Parameters:
-
coord(array) –3D coordinates.
Raises:
-
ValueError–if
coorddoes not have the correct shape (natoms, 3).
Returns:
-
Self(Self) –rdworks.Confobject.
Source code in src/rdworks/conf.py
def sync(self, coord: np.ndarray | list) -> Self:
"""Synchronize the conformer coordinates with the provided `coord`.
Args:
coord (np.array): 3D coordinates.
Raises:
ValueError: if `coord` does not have the correct shape (natoms, 3).
Returns:
Self: `rdworks.Conf` object.
"""
if isinstance(coord, np.ndarray) and coord.shape != (self.natoms, 3):
raise ValueError(f"`coord.shape` should be ({self.natoms},3)")
elif isinstance(coord, list) and len(coord) != self.natoms:
raise ValueError(f"`coord` should be length of {self.natoms}")
for i, a in enumerate(self.rdmol.GetAtoms()):
self.rdmol.GetConformer().SetAtomPosition(a.GetIdx(), coord[i])
return self
to_geometry()
¶
Returns geometry input for psi4 or other quantum chemistry software.
Each line has atom symbol and its X, Y, and Z coordinates. Unit is Angstrom.
O 0.0 0.0 0.0 H 0.757 0.586 0.0 H -0.757 0.586 0.0
Example
import psi4 geometry = conf.to_geometry() mol = psi4.geometry(geometry)
Returns:
-
str(str) –geometry input for psi4 or other quantum chemistry software.
Source code in src/rdworks/conf.py
def to_geometry(self) -> str:
"""Returns geometry input for psi4 or other quantum chemistry software.
Each line has atom symbol and its X, Y, and Z coordinates.
Unit is Angstrom.
O 0.0 0.0 0.0
H 0.757 0.586 0.0
H -0.757 0.586 0.0
Example:
import psi4
geometry = conf.to_geometry()
mol = psi4.geometry(geometry)
Returns:
str: geometry input for psi4 or other quantum chemistry software.
"""
lines = [
f"{e:5} {x:.3f} {y:.3f} {z:.3f}"
for e, (x, y, z) in zip(self.symbols, self.positions)
]
return "\n".join(lines)
to_png(width=300, height=300, legend='', atom_index=False, highlight_atoms=None, highlight_bonds=None, redraw=False, coordgen=False, trim=True)
¶
Draw 2D molecule in PNG format.
Parameters:
-
width(int, default:300) –width. Defaults to 300.
-
height(int, default:300) –height. Defaults to 300.
-
legend(str, default:'') –legend. Defaults to ''.
-
atom_index(bool, default:False) –whether to show atom index. Defaults to False.
-
highlight_atoms(list[int] | None, default:None) –atom(s) to highlight. Defaults to None.
-
highlight_bonds(list[int] | None, default:None) –bond(s) to highlight. Defaults to None.
-
redraw(bool, default:False) –whether to redraw. Defaults to False.
-
coordgen(bool, default:False) –whether to use coordgen. Defaults to False.
-
trim(bool, default:True) –whether to trim white margins. Default to True.
Returns:
-
Image–Image.Image: output PIL Image object.
Source code in src/rdworks/conf.py
def to_png(
self,
width: int = 300,
height: int = 300,
legend: str = "",
atom_index: bool = False,
highlight_atoms: list[int] | None = None,
highlight_bonds: list[int] | None = None,
redraw: bool = False,
coordgen: bool = False,
trim: bool = True,
) -> Image.Image:
"""Draw 2D molecule in PNG format.
Args:
width (int, optional): width. Defaults to 300.
height (int, optional): height. Defaults to 300.
legend (str, optional): legend. Defaults to ''.
atom_index (bool, optional): whether to show atom index. Defaults to False.
highlight_atoms (list[int] | None, optional): atom(s) to highlight. Defaults to None.
highlight_bonds (list[int] | None, optional): bond(s) to highlight. Defaults to None.
redraw (bool, optional): whether to redraw. Defaults to False.
coordgen (bool, optional): whether to use coordgen. Defaults to False.
trim (bool, optional): whether to trim white margins. Default to True.
Returns:
Image.Image: output PIL Image object.
"""
return render_png(
self.rdmol,
width=width,
height=height,
legend=legend,
atom_index=atom_index,
highlight_atoms=highlight_atoms,
highlight_bonds=highlight_bonds,
redraw=redraw,
coordgen=coordgen,
trim=trim,
)
to_sdf(props=True)
¶
Returns the SDF-formatted strings.
Parameters:
-
props(bool, default:True) –include `props as SDF properties. Defaults to True.
Returns:
-
str(str) –strings in the SDF format.
Source code in src/rdworks/conf.py
def to_sdf(self, props: bool = True) -> str:
"""Returns the SDF-formatted strings.
Args:
props (bool, optional): include `props as SDF properties. Defaults to True.
Returns:
str: strings in the SDF format.
"""
in_memory = io.StringIO()
with Chem.SDWriter(in_memory) as f:
rdmol = Chem.Mol(self.rdmol)
rdmol.SetProp("_Name", self.name)
if props:
for k, v in self.props.items():
rdmol.SetProp(k, str(v))
f.write(rdmol)
return in_memory.getvalue()
to_svg(width=300, height=300, legend='', atom_index=False, highlight_atoms=None, highlight_bonds=None, redraw=False, coordgen=False, optimize=True)
¶
Draw 2D molecule in SVG format.
Examples:
For Jupyternotebook, wrap the output with SVG:
Parameters:
-
width(int, default:300) –width. Defaults to 300.
-
height(int, default:300) –height. Defaults to 300.
-
legend(str, default:'') –legend. Defaults to ''.
-
atom_index(bool, default:False) –whether to show atom index. Defaults to False.
-
highlight_atoms(list[int] | None, default:None) –atom(s) to highlight. Defaults to None.
-
highlight_bonds(list[int] | None, default:None) –bond(s) to highlight. Defaults to None.
-
redraw(bool, default:False) –whether to redraw. Defaults to False.
-
coordgen(bool, default:False) –whether to use coordgen. Defaults to False.
-
optimize(bool, default:True) –whether to optimize SVG string. Defaults to True.
Returns:
-
str(str) –SVG string
Source code in src/rdworks/conf.py
def to_svg(
self,
width: int = 300,
height: int = 300,
legend: str = "",
atom_index: bool = False,
highlight_atoms: list[int] | None = None,
highlight_bonds: list[int] | None = None,
redraw: bool = False,
coordgen: bool = False,
optimize: bool = True,
) -> str:
"""Draw 2D molecule in SVG format.
Examples:
For Jupyternotebook, wrap the output with SVG:
>>> from IPython.display import SVG
>>> SVG(libr[0].to_svg())
Args:
width (int, optional): width. Defaults to 300.
height (int, optional): height. Defaults to 300.
legend (str, optional): legend. Defaults to ''.
atom_index (bool, optional): whether to show atom index. Defaults to False.
highlight_atoms (list[int] | None, optional): atom(s) to highlight. Defaults to None.
highlight_bonds (list[int] | None, optional): bond(s) to highlight. Defaults to None.
redraw (bool, optional): whether to redraw. Defaults to False.
coordgen (bool, optional): whether to use coordgen. Defaults to False.
optimize (bool, optional): whether to optimize SVG string. Defaults to True.
Returns:
str: SVG string
"""
return render_svg(
self.rdmol,
width=width,
height=height,
legend=legend,
atom_index=atom_index,
highlight_atoms=highlight_atoms,
highlight_bonds=highlight_bonds,
redraw=redraw,
coordgen=coordgen,
optimize=optimize,
)
to_turbomole(bohr=False)
¶
Returns TURBOMOLE coord file formatted strings.
Turbomole coord file format:
- It starts with the keyword `$coord`.
- Each line after the $coord line specifies an atom, consisting of:
- Three real numbers representing the Cartesian coordinates (x, y, z).
- A string for the element name.
- Optional: an "f" label at the end to indicate that the atom's coordinates are frozen during optimization.
- Coordinates can be given in Bohr (default), Ångström (`$coord angs`), or fractional coordinates (`$coord frac`).
- Optional data groups like periodicity (`$periodic`), lattice parameters (`$lattice`), and cell parameters (`$cell`) can also be included.
- Regarding precision:
The precision of the coordinates is crucial for accurate calculations, especially geometry optimizations.
Tools like the TURBOMOLEOptimizer might check for differences in atomic positions with a tolerance of 1e-13.
Parameters:
-
bohr(bool, default:False) –whether to use Bohr units of the coordinates. Defaults to False. Otherwise, Angstrom units will be used.
Returns:
-
str(str) –TURBOMOLE coord formatted file.
Source code in src/rdworks/conf.py
def to_turbomole(self, bohr: bool = False) -> str:
"""Returns TURBOMOLE coord file formatted strings.
Turbomole coord file format:
- It starts with the keyword `$coord`.
- Each line after the $coord line specifies an atom, consisting of:
- Three real numbers representing the Cartesian coordinates (x, y, z).
- A string for the element name.
- Optional: an "f" label at the end to indicate that the atom's coordinates are frozen during optimization.
- Coordinates can be given in Bohr (default), Ångström (`$coord angs`), or fractional coordinates (`$coord frac`).
- Optional data groups like periodicity (`$periodic`), lattice parameters (`$lattice`), and cell parameters (`$cell`) can also be included.
- Regarding precision:
The precision of the coordinates is crucial for accurate calculations, especially geometry optimizations.
Tools like the TURBOMOLEOptimizer might check for differences in atomic positions with a tolerance of 1e-13.
Args:
bohr (bool): whether to use Bohr units of the coordinates. Defaults to False.
Otherwise, Angstrom units will be used.
Returns:
str: TURBOMOLE coord formatted file.
"""
if bohr:
lines = ["$coord"]
else:
lines = ["$coord angs"]
for (x, y, z), e in zip(self.positions, self.symbols):
lines.append(f"{x:20.15f} {y:20.15f} {z:20.15f} {e}")
lines.append("$end")
return "\n".join(lines)
to_xyz()
¶
Returns XYZ formatted strings.
Returns:
-
str(str) –XYZ formatted strings.