Utils
Graph.add_node(node_for_adding, **attr)[source]¶
Add a single node node_for_adding and update node attributes.
Parameters:
node_for_adding: node
A node can be any hashable Python object except None.
attr : keyword arguments, optional
Set or change node attributes using key=value.
Examples:
G = nx.Graph() # or DiGraph, MultiGraph, MultiDiGraph, etc
G.add_node(1)
G.add_node("Hello")
K3 = nx.Graph([(0, 1), (1, 2), (2, 0)])
G.add_node(K3)
G.number_of_nodes()
>>> 3
G.add_node(1, size=10)
G.add_node(3, weight=0.4, UTM=("13S", 382871, 3972649))
Graph.add_edge(u_of_edge, v_of_edge, **attr)[source]¶
Add an edge between u and v.
The nodes u and v will be automatically added if they are not already in the graph.
Edge attributes can be specified with keywords or by directly accessing the edge's attribute dictionary.
Parameters:
u_of_edge, v_of_edge: nodes
Nodes can be, for example, strings or numbers. Nodes must be hashable (and not None) Python objects.
attr: keyword arguments, optional
Edge data (or labels or objects) can be assigned using keyword arguments.
Examples:
G = nx.Graph() # or DiGraph, MultiGraph, MultiDiGraph, etc
e = (1, 2)
G.add_edge(1, 2) # explicit two-node form
G.add_edge(*e) # single edge as tuple of two nodes
G.add_edges_from([(1, 2)]) # add edges from iterable container
G.add_edge(1, 2, weight=3)
G.add_edge(1, 3, weight=7, capacity=15, length=342.7)
G.add_edge(1, 2)
G[1][2].update({0: 5})
G.edges[1, 2].update({0: 5})
Isomorphism¶
In NetworkX, the node_label parameter in the vf2pp_all_isomorphisms function is the name of a node attribute
that is used to compare nodes when checking for isomorphism between two graphs.
The default value for node_label is None, which means that node attributes are not considered in the comparison.
If a node doesn't have the node_label attribute, the default_label value is used instead.
API¶
rdworks.utils
¶
Functions¶
QT(rmsd_matrix, threshold)
¶
Perform QT clustering.
Parameters:
-
rmsd_matrix(ndarray) –pairwise rmsd matrix.
-
threshold(float) –quality threshold (A)
Returns:
-
tuple(tuple) –(cluster assignment, centroid indices)
Source code in src/rdworks/utils.py
def QT(rmsd_matrix: np.ndarray, threshold: float) -> tuple:
"""Perform QT clustering.
Args:
rmsd_matrix (np.ndarray): pairwise rmsd matrix.
threshold (float): quality threshold (A)
Returns:
tuple: (cluster assignment, centroid indices)
"""
N = rmsd_matrix.shape[0]
clusters = []
_QT_clustering(rmsd_matrix, set(list(range(N))), threshold, clusters)
# ex. clusters= [{6, 7, 11}, {4, 5, 8}, {0}, {1}, {10}, {9}, {2}, {3}]
cluster_assignment = [
None,
] * N
for cluster_idx, indices in enumerate(clusters):
for conf_idx in indices:
cluster_assignment[conf_idx] = cluster_idx
centroid_indices = centroid_medoid(cluster_assignment, rmsd_matrix)
return cluster_assignment, centroid_indices
compress_string(data)
¶
compress string to base64-encoded string.
Parameters:
-
data(str) –original string.
Returns:
-
str(str) –base64-encoded compressed string.
Source code in src/rdworks/utils.py
def compress_string(data: str) -> str:
"""compress string to base64-encoded string.
Args:
data (str): original string.
Returns:
str: base64-encoded compressed string.
"""
compressed_bytes = zlib.compress(data.encode("utf-8"))
encoded_str = base64.b64encode(compressed_bytes).decode("utf-8")
return encoded_str
compute(fn, largs, **kwargs)
¶
Source code in src/rdworks/utils.py
def compute(fn: Callable, largs: list, **kwargs) -> list:
max_workers = kwargs.get("max_workers", 1)
chunksize = kwargs.get("chunksize", 10)
progress = kwargs.get("progress", False)
desc = kwargs.get("desc", "Progress")
n = len(largs)
if max_workers > 1:
with ProcessPoolExecutor(max_workers=max_workers) as executor:
if progress:
results = list(
tqdm(
executor.map(fn, largs, chunksize=chunksize), desc=desc, total=n
)
)
else:
results = list(executor.map(fn, largs, chunksize=chunksize))
else:
if progress:
results = [fn(*larg) for larg in tqdm(largs, desc=desc, total=n)]
else:
results = [fn(*larg) for larg in largs]
return results
convert_tril_to_symm(lower_triangle_values)
¶
Converts lower triangle values to a symmetric full matrix.
Parameters:
-
lower_triangle_values(list) –list of lower triangle matrix values.
Returns:
-
ndarray–np.ndarray: numpy array of a symmetric full matrix.
Source code in src/rdworks/utils.py
def convert_tril_to_symm(lower_triangle_values: list) -> np.ndarray:
"""Converts lower triangle values to a symmetric full matrix.
Args:
lower_triangle_values (list): list of lower triangle matrix values.
Returns:
np.ndarray: numpy array of a symmetric full matrix.
"""
n = math.ceil(math.sqrt(len(lower_triangle_values) * 2))
rmsd_matrix = np.zeros((n, n))
rmsd_matrix[np.tril_indices(n, k=-1)] = lower_triangle_values
symm_matrix = np.maximum(rmsd_matrix, rmsd_matrix.transpose())
return symm_matrix
convert_triu_to_symm(upper_triangle_values)
¶
Converts upper triangle values to a symmetric full matrix.
Parameters:
-
upper_triangle_values(list) –list of upper triangle matrix values.
Returns:
-
ndarray–np.ndarray: numpy array of a symmetric full matrix.
Source code in src/rdworks/utils.py
def convert_triu_to_symm(upper_triangle_values: list) -> np.ndarray:
"""Converts upper triangle values to a symmetric full matrix.
Args:
upper_triangle_values (list): list of upper triangle matrix values.
Returns:
np.ndarray: numpy array of a symmetric full matrix.
"""
n = math.ceil(math.sqrt(len(upper_triangle_values) * 2))
rmsd_matrix = np.zeros((n, n))
rmsd_matrix[np.triu_indices(n, k=1)] = upper_triangle_values
symm_matrix = np.maximum(rmsd_matrix, rmsd_matrix.transpose())
return symm_matrix
decompress_string(encoded_str)
¶
decompress base64-encoded string to original string.
Parameters:
-
encoded_str(str) –base64-encoded compressed string.
Returns:
-
str(str) –original string.
Source code in src/rdworks/utils.py
def decompress_string(encoded_str: str) -> str:
"""decompress base64-encoded string to original string.
Args:
encoded_str (str): base64-encoded compressed string.
Returns:
str: original string.
"""
# automatically add missing padding
missing_padding = len(encoded_str) % 4
if missing_padding:
encoded_str += "=" * (4 - missing_padding)
decoded_bytes = base64.b64decode(encoded_str)
decompressed = zlib.decompress(decoded_bytes)
return decompressed.decode("utf-8")
deserialize(encoded_str)
¶
Decode, decompress, and deserialize a base64 string back to Python object.
Parameters:
-
encoded_str(str) –Base64-encoded compressed JSON string
Returns:
-
Any–Deserialized Python object
Source code in src/rdworks/utils.py
def deserialize(encoded_str: str) -> Any:
"""
Decode, decompress, and deserialize a base64 string back to Python object.
Args:
encoded_str: Base64-encoded compressed JSON string
Returns:
Deserialized Python object
"""
try:
# 1. Convert string to bytes
encoded_bytes = encoded_str.encode("utf-8")
# 2. Base64 decode
# Base64 output only contains: A-Z, a-z, 0-9, +, /, =
compressed = base64.b64decode(encoded_bytes)
# 3. Decompress
json_bytes = zlib.decompress(compressed)
# 4. Decode bytes to string
json_str = json_bytes.decode("utf-8")
# 5. Parse JSON
return json.loads(json_str)
except (zlib.error, binascii.Error, json.JSONDecodeError, UnicodeDecodeError) as e:
raise ValueError(f"Failed to deserialize data: {e}")
dict_to_simplenamespace(data)
¶
fix_decimals_in_dict(in_dict, decimals=2)
¶
Fixes the decimal places of all float values in a dictionary.
Parameters:
-
dictionary–The dictionary to fix.
-
decimals(int, default:2) –The number of decimal places to fix the float values to.
Returns:
-
dict(dict) –a dictionary with the float values fixed to the specified number of decimal places.
Source code in src/rdworks/utils.py
def fix_decimals_in_dict(in_dict: dict, decimals: int = 2) -> dict:
"""Fixes the decimal places of all float values in a dictionary.
Args:
dictionary: The dictionary to fix.
decimals (int): The number of decimal places to fix the float values to.
Returns:
dict: a dictionary with the float values fixed to the specified number of decimal places.
"""
out_dict = {}
for k, v in in_dict.items():
if isinstance(v, float):
out_dict[k] = round(v, decimals)
elif isinstance(v, list) or isinstance(v, tuple):
out_dict[k] = fix_decimals_in_list(v, decimals)
elif isinstance(v, dict):
out_dict[k] = fix_decimals_in_dict(v, decimals)
else:
out_dict[k] = v
return out_dict
fix_decimals_in_list(in_list, decimals=2)
¶
Fixes the decimal places of all float values in a list.
Parameters:
-
list–The list to fix.
-
decimals(int, default:2) –The number of decimal places to fix the float values to.
Returns:
-
list(list) –a list with the float values fixed to the specified number of decimal places.
Source code in src/rdworks/utils.py
def fix_decimals_in_list(in_list: list, decimals: int = 2) -> list:
"""Fixes the decimal places of all float values in a list.
Args:
list: The list to fix.
decimals (int): The number of decimal places to fix the float values to.
Returns:
list: a list with the float values fixed to the specified number of decimal places.
"""
out_list = []
for item in in_list:
if isinstance(item, float):
out_list.append(round(item, decimals))
elif isinstance(item, dict):
out_list.append(fix_decimals_in_dict(item, decimals))
elif isinstance(item, list) or isinstance(item, tuple):
out_list.append(fix_decimals_in_list(item, decimals))
else:
out_list.append(item)
return out_list
recursive_round(data, decimals=2)
¶
Recursively round float values to a given decimal places.
data: The input data, which can be a list, dictionary, or any other data type. It can contain nested lists and dictionaries. decimals: number of decimal places.
Source code in src/rdworks/utils.py
def recursive_round(data: Any, decimals: int = 2) -> Any:
"""Recursively round float values to a given decimal places.
Args:
data: The input data, which can be a list, dictionary, or any
other data type. It can contain nested lists and dictionaries.
decimals: number of decimal places.
"""
if not isinstance(decimals, int) or decimals < 0:
raise ValueError("decimals must be a non-negative integer.")
def _recursive_round(current_item):
if isinstance(current_item, float):
return round(current_item, decimals)
elif isinstance(current_item, np.float64):
return round(float(current_item), decimals)
elif isinstance(current_item, list):
return [_recursive_round(item) for item in current_item]
elif isinstance(current_item, dict):
return {key: _recursive_round(value) for key, value in current_item.items()}
else:
return current_item
return _recursive_round(data)
serialize(data)
¶
Serialize, compress, and encode data to a base64 string.
Notes
The JSON specification only supports string keys in objects. For example, after JSON-serialization/deserialization, keys of integer type are changed to string. {1: 'a', 2: 'b', 3: 'c'} --> {'1': 'a', '2': 'b', '3': 'c'} Unfortunately, this is a fundamental limitation of JSON itself. Integer keys are not valid JSON.
Parameters:
-
data(Any) –Any JSON-serializable Python object
Returns:
-
str–Base64-encoded string
Source code in src/rdworks/utils.py
def serialize(data: Any) -> str:
"""
Serialize, compress, and encode data to a base64 string.
Notes:
The JSON specification only supports string keys in objects.
For example, after JSON-serialization/deserialization, keys of integer type are changed to string.
{1: 'a', 2: 'b', 3: 'c'} --> {'1': 'a', '2': 'b', '3': 'c'}
Unfortunately, this is a fundamental limitation of JSON itself.
Integer keys are not valid JSON.
Args:
data: Any JSON-serializable Python object
Returns:
Base64-encoded string
"""
# 1. Serialize to JSON string
json_str = json.dumps(data, separators=(",", ":")) # Compact format
# 2. Encode to bytes
json_bytes = json_str.encode("utf-8")
# 3. Compress
compressed = zlib.compress(json_bytes)
# 4. Base64 encode (no need to decode to str, keep as bytes if storing in binary)
# Base64 output only contains: A-Z, a-z, 0-9, +, /, =
encoded = base64.b64encode(compressed)
# 5. Convert to string for text storage/transmission
return encoded.decode("utf-8")
rdworks.units
¶
rdworks.io
¶
Attributes¶
conf_name_convention = re.compile('[a-zA-Z0-9-_.!@#$%^&*()+=]+.[0-9]+/[0-9]+')
module-attribute
¶
Classes¶
Functions¶
guess_molecular_id(rdmols)
¶
Guesses molecular ID from SDF properties.
Molecular ID is guessed by the coverage(=count of unique values divided by total count).
Priority
_Name > ID property with shortest values > any ID property
Parameters:
-
rdmols(List[Mol]) –a list of molecules.
Returns:
-
str(str | None) –property suitable for id
Source code in src/rdworks/io.py
def guess_molecular_id(rdmols: list[Chem.Mol]) -> str | None:
"""Guesses molecular ID from SDF properties.
Molecular ID is guessed by the coverage(=count of unique values divided by total count).
Priority:
`_Name` > ID property with shortest values > any ID property
Args:
rdmols (List[Chem.Mol]): a list of molecules.
Returns:
str: property suitable for id
"""
U = defaultdict(list)
for rdmol in rdmols:
properties_dict = rdmol.GetPropsAsDict()
if "_Name" not in properties_dict.keys():
U["_Name"].append(rdmol.GetProp("_Name"))
for k, v in properties_dict.items():
# float is not suitable for molecular id
if isinstance(v, float):
continue
# str(int) is acceptable for molecular id
if isinstance(v, int):
U[k].append(str(v))
elif isinstance(v, str):
U[k].append(v)
id_properties = [k for k, v in U.items() if len(set(v)) == len(rdmols)]
num_id_properties = len(id_properties)
if num_id_properties == 0:
return None
elif num_id_properties == 1:
return id_properties[0]
elif num_id_properties > 1:
if "_Name" in id_properties:
return "_Name"
else:
# shorter id
_, id_property = sorted(
[(max([len(v) for v in U[k]]), k) for k in id_properties], reverse=True
)[0]
return id_property
merge_csv(libr, path, on='name')
¶
Returns a copy of MolLibr merged with properties from on column of a .csv file.
Parameters:
-
libr(MolLibr) –library to be merged.
-
path(Union[str, Path]) –filename or path to a .csv file.
-
on(str, default:'name') –column for name. Defaults to 'name'.
Raises:
-
ValueError–if
oncolumn is not found in the csv file.
Returns:
-
MolLibr(MolLibr) –a copy of library of molecules.
Source code in src/rdworks/io.py
def merge_csv(libr: MolLibr, path: str | Path, on: str = "name") -> MolLibr:
"""Returns a copy of MolLibr merged with properties from `on` column of a .csv file.
Args:
libr (MolLibr): library to be merged.
path (Union[str, Path]): filename or path to a .csv file.
on (str, optional): column for name. Defaults to 'name'.
Raises:
ValueError: if `on` column is not found in the csv file.
Returns:
MolLibr: a copy of library of molecules.
"""
path = validate_path(path)
df = pd.read_csv(path)
try:
assert on in list(df.columns)
except:
raise ValueError(f"Cannot find ON column (`on=`) {on}")
# A list of dictionaries, where each dictionary represents a row,
# with column names as keys and cell values as values:
# [{column -> value}, ..., {column -> value}].
csv_records = df.to_dict("records")
data = {}
for row_dict in csv_records:
data[row_dict[on]] = {k: v for (k, v) in row_dict.items() if k != on}
merged_libr = libr.copy()
for mol in merged_libr:
if mol.name in data: # mol.props can be partly updated from csv
mol.props.update(data[mol.name])
return merged_libr
read_csv(path, smiles, name, std=False, **kwargs)
¶
Returns a library of molecules reading from a .csv file.
Other columns will be read as properties.
Parameters:
-
path(Union[str, Path]) –filename or path to a .csv file.
-
smiles(str) –column for SMILES.
-
name(str) –column for name.
-
std(bool, default:False) –whether to standardize the input. Defaults to False.
Raises:
-
ValueError–if
smilesornamecolumn is not found in the csv file.
Returns:
-
MolLibr(MolLibr) –a library of molecules.
Source code in src/rdworks/io.py
def read_csv(
path: str | Path, smiles: str, name: str, std: bool = False, **kwargs
) -> MolLibr:
"""Returns a library of molecules reading from a .csv file.
Other columns will be read as properties.
Args:
path (Union[str, Path]): filename or path to a .csv file.
smiles (str): column for SMILES.
name (str): column for name.
std (bool, optional): whether to standardize the input. Defaults to False.
Raises:
ValueError: if `smiles` or `name` column is not found in the csv file.
Returns:
MolLibr: a library of molecules.
"""
path = validate_path(path)
df = pd.read_csv(path)
try:
assert smiles in list(df.columns)
except:
raise ValueError(f"Cannot find SMILES column (`smiles=`) {smiles}")
try:
assert name in list(df.columns)
except:
raise ValueError(f"Cannot find NAME column (`name=`) {name}")
largs = [
(smiles, name, std) for smiles, name in zip(list(df[smiles]), list(df[name]))
]
libr = MolLibr(compute(Mol, largs, desc="Reading CSV", **kwargs))
# read other columns as properties
# A list of dictionaries, where each dictionary represents a row,
# with column names as keys and cell values as values:
# [{column -> value}, ..., {column -> value}].
csv_records = df.to_dict("records")
for mol, row_dict in zip(libr, csv_records):
mol.props.update(
{k: v for (k, v) in row_dict.items() if k not in [smiles, name]}
)
return libr
read_dataframe(df, smiles, name, std=False)
¶
Returns rdworks.MolLibr object from a pandas DataFrame.
Parameters:
-
df(DataFrame) –pandas.DataFrame.
-
smiles(str) –column for SMILES.
-
name(str) –column for name.
-
std(bool, default:False) –whether to standardize the input. Defaults to False.
Raises:
-
TypeError–if
dfis not pandas DataFrame. -
ValueError–if
smilesornamecolumn is not found.
Returns:
-
MolLibr(MolLibr) –a library of molecules.
Source code in src/rdworks/io.py
def read_dataframe(
df: pd.DataFrame, smiles: str, name: str, std: bool = False
) -> MolLibr:
"""Returns rdworks.MolLibr object from a pandas DataFrame.
Args:
df (pd.DataFrame): pandas.DataFrame.
smiles (str): column for SMILES.
name (str): column for name.
std (bool, optional): whether to standardize the input. Defaults to False.
Raises:
TypeError: if `df` is not pandas DataFrame.
ValueError: if `smiles` or `name` column is not found.
Returns:
MolLibr: a library of molecules.
"""
if not isinstance(df, pd.DataFrame):
raise TypeError(f"Expects a pandas.DataFrame object")
try:
assert smiles in list(df.columns)
except:
raise ValueError(f"Cannot find SMILES column (`smiles=`) {smiles}")
try:
assert name in list(df.columns)
except:
raise ValueError(f"Cannot find NAME column (`name=`) {name}")
return MolLibr(list(df[smiles]), list(df[name]), std=std)
read_mae(path, std=False, confs=True, **kwargs)
¶
Returns a library of molecules reading from a Schrodinger Maestro file.
Parameters:
-
path(Union[str, Path]) –filename or path to the .mae or .maegz file.
-
std(bool, default:False) –whether to standardize the input. Defaults to False.
-
confs(bool, default:True) –whether to read 3D conformers. Defaults to True.
Returns:
-
MolLibr(MolLibr) –a library of molecules.
Source code in src/rdworks/io.py
def read_mae(
path: str | Path, std: bool = False, confs: bool = True, **kwargs
) -> MolLibr:
"""Returns a library of molecules reading from a Schrodinger Maestro file.
Args:
path (Union[str, Path]): filename or path to the .mae or .maegz file.
std (bool, optional): whether to standardize the input. Defaults to False.
confs (bool, optional): whether to read 3D conformers. Defaults to True.
Returns:
MolLibr: a library of molecules.
"""
path = validate_path(path)
if path.suffix == ".maegz":
with gzip.open(path, "rb") as gz:
# switch ^ True, XOR(^) inverts only if switch is True
with rdmolfiles.MaeMolSupplier(
gz, sanitize=True, removeHs=(confs ^ True)
) as maegz:
rdmols = [m for m in maegz if m is not None]
else:
# switch ^ True, XOR(^) inverts only if switch is True
with rdmolfiles.MaeMolSupplier(
path, sanitize=True, removeHs=(confs ^ True)
) as mae:
rdmols = [m for m in mae if m is not None]
lnames = [m.GetProp("_Name") for m in rdmols]
largs = [(rdmol, name, std) for rdmol, name in zip(rdmols, lnames)]
obj = MolLibr()
if confs: # reading 3D SDF (conformers)
last_smiles = None
new_mol = None
for rdmol, name in zip(rdmols, lnames):
# rdworks name convention (e.g. xxxx.yy/zzz)
if conf_name_convention.match(name):
(isomer_name, _) = name.split("/")
else:
isomer_name = name
smiles = Chem.MolToSmiles(rdmol) # canonicalized SMILES
if last_smiles is None or last_smiles != smiles:
if new_mol:
obj.libr.append(new_mol.rename())
# start a new molecule
# !!!! rdmol and new_mol do not have consistent atom indices !!!
# idxmap: original atom index -> canonicalized rdmol atom index
# smiles = Chem.MolToSmiles(rdmol) # canonicalization creates `_smilesAtomOutputOrder` property
# idxord_o = ast.literal_eval(rdmol.GetProp("_smilesAtomOutputOrder"))
# idxmap_o = {o.GetIdx():idxord_o.index(o.GetIdx()) for o in rdmol.GetAtoms()}
rdmol_2d = Chem.RemoveHs(rdmol)
AllChem.Compute2DCoords(rdmol_2d)
new_mol = Mol(
rdmol_2d, isomer_name, std=False
) # atom indices remain unchanged.
new_mol.confs.append(Conf(rdmol))
last_smiles = smiles
if new_mol: # handle the last molecule
obj.libr.append(new_mol.rename())
else: # reading 2D SDF
obj = MolLibr(compute(Mol, largs, desc="Reading Mae", **kwargs))
return obj
read_sdf(path, name=None, prefix=None, std=False, confs=False, props=True, **kwargs)
¶
Returns a library of molecules reading from a SDF file.
Parameters:
-
path(Union[str, PosixPath]) –filename or path to the .sdf file.
-
name(str, default:None) –property name to be used for name. Defaults to None.
-
prefix(str, default:None) –prefix to be used for name if necessary. Defaults to None.
-
std(bool, default:False) –whether to standardize the input. Defaults to False.
-
confs(bool, default:False) –whether to read 3D conformers and keep hydrogens. Defaults to False.
-
props(bool, default:True) –whether to read SDF properties. Defaults to True.
Returns:
-
MolLibr(MolLibr) –a library of molecules.
Source code in src/rdworks/io.py
def read_sdf(
path: str | Path,
name: str | None = None,
prefix: str | None = None,
std: bool = False,
confs: bool = False,
props: bool = True,
**kwargs,
) -> MolLibr:
"""Returns a library of molecules reading from a SDF file.
Args:
path (Union[str, PosixPath]): filename or path to the .sdf file.
name (str, optional): property name to be used for name. Defaults to None.
prefix (str, optional): prefix to be used for name if necessary. Defaults to None.
std (bool, optional): whether to standardize the input. Defaults to False.
confs (bool, optional): whether to read 3D conformers and keep hydrogens. Defaults to False.
props (bool, optional): whether to read SDF properties. Defaults to True.
Returns:
MolLibr: a library of molecules.
"""
path = validate_path(path)
if path.suffix == ".gz":
with gzip.open(path, "rb") as gz:
# switch ^ True, XOR(^) inverts only if switch is True
with Chem.ForwardSDMolSupplier(
gz, sanitize=True, removeHs=(confs ^ True)
) as gzsdf:
rdmols = [m for m in gzsdf if m is not None]
else:
# switch ^ True, XOR(^) inverts only if switch is True
with Chem.SDMolSupplier(path, sanitize=True, removeHs=(confs ^ True)) as sdf:
rdmols = [m for m in sdf if m is not None]
if isinstance(name, str):
id_property = name
else:
id_property = guess_molecular_id(rdmols)
if isinstance(id_property, str):
names = [m.GetProp(id_property) for m in rdmols]
else:
names = [f"{prefix}_{i + 1}" for i in range(len(rdmols))]
if props:
_props = [m.GetPropsAsDict() for m in rdmols]
else:
_props = [None] * len(rdmols)
_args = [
(rdmol, name, std, props) for rdmol, name, props in zip(rdmols, names, _props)
]
obj = MolLibr()
if confs:
# reading 3D SDF (conformers)
last_smiles = None
new_mol = None
for rdmol, name, props in zip(rdmols, names, _props):
# rdworks name convention (e.g. xxxx.yy/zzz)
if conf_name_convention.match(name):
(isomer_name, _) = name.split("/")
else:
isomer_name = name
smiles = Chem.MolToSmiles(rdmol) # canonicalized SMILES
if last_smiles is None or last_smiles != smiles:
if new_mol:
obj.libr.append(new_mol.rename())
# start a new molecule
rdmol_2d = Chem.RemoveHs(rdmol)
AllChem.Compute2DCoords(rdmol_2d)
# initialize a new molecule with the H-removed 2D
new_mol = Mol(
rdmol_2d, isomer_name, std=False
) # atom indices remain unchanged.
new_conf = Conf(rdmol)
new_conf.props.update(props)
new_mol.confs.append(new_conf)
last_smiles = smiles
if new_mol: # handle the last molecule
obj.libr.append(new_mol.rename())
else:
# reading 2D SDF
obj = MolLibr(compute(_map_sdf, _args, desc="Reading SDF", **kwargs))
return obj
read_smi(path, std=False, **kwargs)
¶
Read a SMILES file and create a molecular library.
Parameters:
-
path(str | Path) –path to the SMILES file.
-
std(bool, default:False) –whether to standardize. Defaults to False.
Raises:
-
FileNotFoundError–when path does not exist.
Returns:
-
MolLibr(MolLibr) –a library of molecules.
Source code in src/rdworks/io.py
def read_smi(path: str | Path, std: bool = False, **kwargs) -> MolLibr:
"""Read a SMILES file and create a molecular library.
Args:
path (str | Path): path to the SMILES file.
std (bool, optional): whether to standardize. Defaults to False.
Raises:
FileNotFoundError: when path does not exist.
Returns:
MolLibr: a library of molecules.
"""
path = validate_path(path)
if path.suffix == ".gz":
with gzip.open(path, "rb") as gz:
largs = [
tuple(line.decode("utf-8").strip().split()[:2] + [std]) for line in gz
]
else:
with open(path, "r") as smi:
largs = [tuple(line.strip().split()[:2] + [std]) for line in smi]
return MolLibr(compute(Mol, largs, desc="Reading SMILES", **kwargs))
validate_path(path)
¶
Prechecks filename or path and returns a string for the pathlib.PosixPath.
Parameters:
-
path(Union[str, PosixPath]) –filename or path.
Raises:
-
FileNotFoundError–if the path is not found.
Returns:
-
str(Path) –a string for the path.
Source code in src/rdworks/io.py
def validate_path(path: str | Path) -> Path:
"""Prechecks filename or path and returns a string for the pathlib.PosixPath.
Args:
path (Union[str, PosixPath]): filename or path.
Raises:
FileNotFoundError: if the path is not found.
Returns:
str: a string for the path.
"""
if isinstance(path, Path):
pass
elif isinstance(path, str):
path = Path(path)
if path.exists() and path.is_file():
return path
else:
raise FileNotFoundError(f"File {path.as_posix()} does not exist.")
rdworks.view
¶
Classes¶
DescriptiveDraw
¶
Descriptive 2D Drawing
Source code in src/rdworks/view.py
class DescriptiveDraw:
"""Descriptive 2D Drawing"""
_angles = np.linspace(0, np.pi * 2, 60)
_circle_x, _circle_y = np.sin(_angles), np.cos(_angles)
circle = np.vstack([_circle_x, _circle_y]).T
style = {
"aromatic": {
"r": 0.3,
"rgba": (136, 180, 168, 0.6),
"linewidth": 1,
"fill": True,
},
"conjugated": {
"r": 0.1,
"rgba": (51, 51, 51, 0.7),
"linewidth": 1,
"fill": True,
},
"HBA": {"r": 0.4, "rgba": (11, 57, 235, 0.7), "linewidth": 3, "fill": False},
"HBD": {"r": 0.5, "rgba": (254, 97, 0, 0.7), "linewidth": 3, "fill": False},
"ionizable": {
"r": 0.5,
"rgba": (254, 97, 0, 0.7),
"linewidth": 3,
"fill": False,
},
# Bootstrap colors
"primary": {
"r": 0.5,
"rgba": (13, 110, 253, 0.7),
"linewidth": 3,
"fill": False,
},
"secondary": {
"r": 0.5,
"rgba": (108, 117, 125, 0.7),
"linewidth": 3,
"fill": False,
},
"success": {
"r": 0.5,
"rgba": (25, 135, 84, 0.7),
"linewidth": 3,
"fill": False,
},
"info": {"r": 0.5, "rgba": (13, 202, 240, 0.7), "linewidth": 3, "fill": False},
"warning": {
"r": 0.5,
"rgba": (255, 193, 7, 0.7),
"linewidth": 3,
"fill": False,
},
"danger": {"r": 0.5, "rgba": (220, 53, 69, 0.7), "linewidth": 3, "fill": False},
"light": {
"r": 0.5,
"rgba": (248, 249, 250, 0.7),
"linewidth": 3,
"fill": False,
},
"dark": {"r": 0.5, "rgba": (33, 37, 41, 0.7), "linewidth": 3, "fill": False},
"blue": {"r": 0.5, "rgba": (13, 110, 253, 0.7), "linewidth": 3, "fill": False},
"indigo": {
"r": 0.5,
"rgba": (102, 16, 242, 0.7),
"linewidth": 3,
"fill": False,
},
"purple": {
"r": 0.5,
"rgba": (111, 66, 193, 0.7),
"linewidth": 3,
"fill": False,
},
"pink": {"r": 0.5, "rgba": (214, 51, 132, 0.7), "linewidth": 3, "fill": False},
"red": {"r": 0.5, "rgba": (220, 53, 69, 0.7), "linewidth": 3, "fill": False},
"orange": {
"r": 0.5,
"rgba": (253, 126, 20, 0.7),
"linewidth": 3,
"fill": False,
},
"yellow": {"r": 0.5, "rgba": (255, 193, 7, 0.7), "linewidth": 3, "fill": False},
"green": {"r": 0.5, "rgba": (25, 135, 84, 0.7), "linewidth": 3, "fill": False},
"teal": {"r": 0.5, "rgba": (32, 201, 151, 0.7), "linewidth": 3, "fill": False},
"cyan": {"r": 0.5, "rgba": (13, 202, 240, 0.7), "linewidth": 3, "fill": False},
"white": {
"r": 0.5,
"rgba": (255, 255, 255, 0.7),
"linewidth": 3,
"fill": False,
},
"black": {"r": 0.5, "rgba": (0, 0, 0, 0.7), "linewidth": 3, "fill": False},
}
def __init__(self, rdmol: Chem.Mol, legend: str = "") -> None:
self.rdmol = Chem.Mol(rdmol) # copy of input molecule
self.rdmolH = Chem.AddHs(rdmol) # does not modify the input molecule object
self._set_basic_nitrogens()
self._set_acidic_oxygens()
self.rdmol = Draw.PrepareMolForDrawing(self.rdmol)
self.legend = legend
self.conf = self.rdmol.GetConformer(0)
self.canvas = None
def _set_basic_nitrogens(self) -> None:
nitrogens = [a for a in self.rdmol.GetAtoms() if a.GetSymbol() == "N"]
for atom in nitrogens:
if atom.GetIsAromatic():
continue
bonds = atom.GetBonds()
conj = any([b.GetIsConjugated() for b in bonds])
if conj:
continue
deg = atom.GetDegree()
if atom.GetExplicitValence() == deg:
atom.SetNumExplicitHs(4 - deg)
atom.SetFormalCharge(+1)
def _set_acidic_oxygens(self) -> None:
# carboxylates
oxygens = [
i[0]
for i in self.rdmol.GetSubstructMatches(
Chem.MolFromSmarts("[$([OD1][CX3](=[OD1]))]")
)
]
for oidx in oxygens:
atom = self.rdmol.GetAtomWithIdx(oidx)
# atom.SetNumExplicitHs(0)
atom.SetFormalCharge(-1)
atom.UpdatePropertyCache()
Chem.SanitizeMol(self.rdmol)
def _get_lone_pairs(self, atom_idx: int) -> int:
"""Get number of lone pairs.
Credit: AstraZeneca/Jazzy
Args:
atom_idx (int): atom index.
Returns:
int, number of lone pairs.
"""
pt = Chem.GetPeriodicTable()
atom = self.rdmolH.GetAtomWithIdx(atom_idx)
symbol = atom.GetSymbol()
valence_electrons = PeriodicTable.GetNOuterElecs(pt, symbol)
unavailable_electrons = atom.GetValence(Chem.ValenceType.EXPLICIT)
charge = atom.GetFormalCharge()
free_electrons = valence_electrons - unavailable_electrons - charge
return int(free_electrons / 2)
def _get_coords(self, atom_idx: int) -> np.ndarray:
"""Get atomic coordinates
Args:
atom_idx (int): atom index
Returns:
np.ndarray: 2D coordinates
"""
atom_pos = self.conf.GetAtomPosition(atom_idx)
atom_pos = np.array([atom_pos.x, atom_pos.y])
return atom_pos
def _draw_circle(self, pos: np.ndarray, style: str) -> None:
"""Draw a circle at give position and style.
Args:
pos (np.ndarray): position
style (str): drawing style
"""
_ = DescriptiveDraw.style.get(style)
circle_ = DescriptiveDraw.circle * _.get("r") + pos
circle_2d = [Point2D(*c) for c in circle_]
color = tuple([v / 256 for v in _.get("rgba")[:3]] + [_.get("rgba")[-1]])
self.canvas.SetFillPolys(_.get("fill"))
self.canvas.SetColour(color)
self.canvas.SetLineWidth(_.get("linewidth"))
self.canvas.DrawPolygon(circle_2d)
def set_style(
self,
name: str,
rgba: tuple[float, float, float, float],
r: float = 0.52,
linewidth: int = 1,
fill: bool = False,
) -> None:
"""Set style.
Args:
name (str): name of style
rgba (tuple[float,float,float,float]): RGB(0-255) and opacity (0-1)
r (float, optional): radius of circle. Defaults to 0.52.
linewidth (int, optional): linewidth. Defaults to 1.
fill (bool, optional): whether to fill the circle. Defaults to False.
"""
self.style[name] = {"r": r, "rgba": rgba, "linewidth": linewidth, "fill": fill}
def show_styles(self) -> None:
print(f"{'Name':<16} {'r':<8} {'rgba':<24} {'linewidth':<10} {'fill':<10}")
for k, v in sorted(self.style.items()):
print(
f"{k:<16} {v['r']:<8.2f} {str(v['rgba']):<24} {v['linewidth']:<10} {v['fill']:<10}"
)
print()
def draw(
self,
width: int = 400,
height: int = 400,
aromatic: bool = False,
conjugated: bool = False,
HBA: bool = False,
HBD: bool = False,
circles: Iterable | None = None,
style: str = "primary",
r: float | None = None,
rgba: Iterable | None = None,
linewidth: int | None = None,
fill: bool | None = None,
) -> str:
"""Drawing SVG
Args:
width (int, optional): width. Defaults to 400.
height (int, optional): height. Defaults to 400.
aromatic (bool): whether to highlight aromatic atoms. Defaults to False.
conjugated (bool): whether to highlight conjugated bonds. Defaults to False.
HBA (bool): whether to circle H-bond acceptor atoms. Defaults to False.
HBD (bool): whether to circle H-bond donor atoms. Defaults to False.
circles (Iterable, optional): list/tuple of atom indices to circle. Defaults to None.
style: style for circles if circles is not None.
r (float, optional): for circles, overriding style. radius of circle. Defaults to 0.52.
rgba (tuple[float,float,float,float]): for circles, overriding style. RGB(0-255) and opacity (0-1)
linewidth (int, optional): for circles, overriding style. linewidth. Defaults to 1.
fill (bool, optional): for circles, overriding style. whether to fill the circle. Defaults to False.
Returns:
str: SVG drawing text.
"""
self.canvas = rdMolDraw2D.MolDraw2DSVG(width, height)
self.canvas.drawOptions().addAtomIndices = True
self.canvas.DrawMolecule(self.rdmol, legend=self.legend)
if aromatic:
for atom in self.rdmol.GetAtoms():
aidx = atom.GetIdx()
if atom.GetIsAromatic():
pos = self._get_coords(aidx)
self._draw_circle(pos, "aromatic")
if conjugated:
for bond in self.rdmol.GetBonds():
if bond.GetIsConjugated():
begin_aidx = bond.GetBeginAtomIdx()
end_aidx = bond.GetEndAtomIdx()
begin_pos = self._get_coords(begin_aidx)
end_pos = self._get_coords(end_aidx)
pos = begin_pos / 2 + end_pos / 2
self._draw_circle(pos, "conjugated")
if HBA:
_HBA = Chem.MolFromSmarts(
"[$([O,S;H1;v2]-[!$(*=[O,N,P,S])]),$([O,S;H0;v2]),$([O,S;-]),$("
+ "[N;v3;!$(N-*=!@[O,N,P,S])]),$([nH0,o,s;+0])]"
)
for idx in [i[0] for i in self.rdmol.GetSubstructMatches(_HBA)]:
pos = self._get_coords(idx)
self._draw_circle(pos, "HBA")
if HBD:
_HBD = Chem.MolFromSmarts("[N&!H0&v3,N&!H0&+1&v4,O&H1&+0,S&H1&+0,n&H1&+0]")
for idx in [i[0] for i in self.rdmol.GetSubstructMatches(_HBD)]:
pos = self._get_coords(idx)
self._draw_circle(pos, "HBD")
if (isinstance(circles, list) or isinstance(circles, tuple)) and isinstance(
circles[0], int
):
adhoc = self.style[style]
if isinstance(r, float):
adhoc.update({"r": r})
if isinstance(linewidth, int):
adhoc.update({"linewidth": linewidth})
if isinstance(fill, bool):
adhoc.update({"fill": fill})
if isinstance(rgba, list) or isinstance(rgba, tuple):
adhoc.update({"rgba": rgba})
self.style["__adhoc__"] = adhoc
for idx in circles:
pos = self._get_coords(idx)
self._draw_circle(pos, "__adhoc__")
# remove the temporary style
del self.style["__adhoc__"]
self.canvas.FinishDrawing()
return self.canvas.GetDrawingText()
Attributes¶
canvas = None
instance-attribute
¶
circle = np.vstack([_circle_x, _circle_y]).T
class-attribute
instance-attribute
¶
conf = self.rdmol.GetConformer(0)
instance-attribute
¶
legend = legend
instance-attribute
¶
rdmol = Draw.PrepareMolForDrawing(self.rdmol)
instance-attribute
¶
rdmolH = Chem.AddHs(rdmol)
instance-attribute
¶
style = {'aromatic': {'r': 0.3, 'rgba': (136, 180, 168, 0.6), 'linewidth': 1, 'fill': True}, 'conjugated': {'r': 0.1, 'rgba': (51, 51, 51, 0.7), 'linewidth': 1, 'fill': True}, 'HBA': {'r': 0.4, 'rgba': (11, 57, 235, 0.7), 'linewidth': 3, 'fill': False}, 'HBD': {'r': 0.5, 'rgba': (254, 97, 0, 0.7), 'linewidth': 3, 'fill': False}, 'ionizable': {'r': 0.5, 'rgba': (254, 97, 0, 0.7), 'linewidth': 3, 'fill': False}, 'primary': {'r': 0.5, 'rgba': (13, 110, 253, 0.7), 'linewidth': 3, 'fill': False}, 'secondary': {'r': 0.5, 'rgba': (108, 117, 125, 0.7), 'linewidth': 3, 'fill': False}, 'success': {'r': 0.5, 'rgba': (25, 135, 84, 0.7), 'linewidth': 3, 'fill': False}, 'info': {'r': 0.5, 'rgba': (13, 202, 240, 0.7), 'linewidth': 3, 'fill': False}, 'warning': {'r': 0.5, 'rgba': (255, 193, 7, 0.7), 'linewidth': 3, 'fill': False}, 'danger': {'r': 0.5, 'rgba': (220, 53, 69, 0.7), 'linewidth': 3, 'fill': False}, 'light': {'r': 0.5, 'rgba': (248, 249, 250, 0.7), 'linewidth': 3, 'fill': False}, 'dark': {'r': 0.5, 'rgba': (33, 37, 41, 0.7), 'linewidth': 3, 'fill': False}, 'blue': {'r': 0.5, 'rgba': (13, 110, 253, 0.7), 'linewidth': 3, 'fill': False}, 'indigo': {'r': 0.5, 'rgba': (102, 16, 242, 0.7), 'linewidth': 3, 'fill': False}, 'purple': {'r': 0.5, 'rgba': (111, 66, 193, 0.7), 'linewidth': 3, 'fill': False}, 'pink': {'r': 0.5, 'rgba': (214, 51, 132, 0.7), 'linewidth': 3, 'fill': False}, 'red': {'r': 0.5, 'rgba': (220, 53, 69, 0.7), 'linewidth': 3, 'fill': False}, 'orange': {'r': 0.5, 'rgba': (253, 126, 20, 0.7), 'linewidth': 3, 'fill': False}, 'yellow': {'r': 0.5, 'rgba': (255, 193, 7, 0.7), 'linewidth': 3, 'fill': False}, 'green': {'r': 0.5, 'rgba': (25, 135, 84, 0.7), 'linewidth': 3, 'fill': False}, 'teal': {'r': 0.5, 'rgba': (32, 201, 151, 0.7), 'linewidth': 3, 'fill': False}, 'cyan': {'r': 0.5, 'rgba': (13, 202, 240, 0.7), 'linewidth': 3, 'fill': False}, 'white': {'r': 0.5, 'rgba': (255, 255, 255, 0.7), 'linewidth': 3, 'fill': False}, 'black': {'r': 0.5, 'rgba': (0, 0, 0, 0.7), 'linewidth': 3, 'fill': False}}
class-attribute
instance-attribute
¶
Functions¶
draw(width=400, height=400, aromatic=False, conjugated=False, HBA=False, HBD=False, circles=None, style='primary', r=None, rgba=None, linewidth=None, fill=None)
¶
Drawing SVG
Parameters:
-
width(int, default:400) –width. Defaults to 400.
-
height(int, default:400) –height. Defaults to 400.
-
aromatic(bool, default:False) –whether to highlight aromatic atoms. Defaults to False.
-
conjugated(bool, default:False) –whether to highlight conjugated bonds. Defaults to False.
-
HBA(bool, default:False) –whether to circle H-bond acceptor atoms. Defaults to False.
-
HBD(bool, default:False) –whether to circle H-bond donor atoms. Defaults to False.
-
circles(Iterable, default:None) –list/tuple of atom indices to circle. Defaults to None.
-
style(str, default:'primary') –style for circles if circles is not None.
-
r(float, default:None) –for circles, overriding style. radius of circle. Defaults to 0.52.
-
rgba(tuple[float, float, float, float], default:None) –for circles, overriding style. RGB(0-255) and opacity (0-1)
-
linewidth(int, default:None) –for circles, overriding style. linewidth. Defaults to 1.
-
fill(bool, default:None) –for circles, overriding style. whether to fill the circle. Defaults to False.
Returns:
-
str(str) –SVG drawing text.
Source code in src/rdworks/view.py
def draw(
self,
width: int = 400,
height: int = 400,
aromatic: bool = False,
conjugated: bool = False,
HBA: bool = False,
HBD: bool = False,
circles: Iterable | None = None,
style: str = "primary",
r: float | None = None,
rgba: Iterable | None = None,
linewidth: int | None = None,
fill: bool | None = None,
) -> str:
"""Drawing SVG
Args:
width (int, optional): width. Defaults to 400.
height (int, optional): height. Defaults to 400.
aromatic (bool): whether to highlight aromatic atoms. Defaults to False.
conjugated (bool): whether to highlight conjugated bonds. Defaults to False.
HBA (bool): whether to circle H-bond acceptor atoms. Defaults to False.
HBD (bool): whether to circle H-bond donor atoms. Defaults to False.
circles (Iterable, optional): list/tuple of atom indices to circle. Defaults to None.
style: style for circles if circles is not None.
r (float, optional): for circles, overriding style. radius of circle. Defaults to 0.52.
rgba (tuple[float,float,float,float]): for circles, overriding style. RGB(0-255) and opacity (0-1)
linewidth (int, optional): for circles, overriding style. linewidth. Defaults to 1.
fill (bool, optional): for circles, overriding style. whether to fill the circle. Defaults to False.
Returns:
str: SVG drawing text.
"""
self.canvas = rdMolDraw2D.MolDraw2DSVG(width, height)
self.canvas.drawOptions().addAtomIndices = True
self.canvas.DrawMolecule(self.rdmol, legend=self.legend)
if aromatic:
for atom in self.rdmol.GetAtoms():
aidx = atom.GetIdx()
if atom.GetIsAromatic():
pos = self._get_coords(aidx)
self._draw_circle(pos, "aromatic")
if conjugated:
for bond in self.rdmol.GetBonds():
if bond.GetIsConjugated():
begin_aidx = bond.GetBeginAtomIdx()
end_aidx = bond.GetEndAtomIdx()
begin_pos = self._get_coords(begin_aidx)
end_pos = self._get_coords(end_aidx)
pos = begin_pos / 2 + end_pos / 2
self._draw_circle(pos, "conjugated")
if HBA:
_HBA = Chem.MolFromSmarts(
"[$([O,S;H1;v2]-[!$(*=[O,N,P,S])]),$([O,S;H0;v2]),$([O,S;-]),$("
+ "[N;v3;!$(N-*=!@[O,N,P,S])]),$([nH0,o,s;+0])]"
)
for idx in [i[0] for i in self.rdmol.GetSubstructMatches(_HBA)]:
pos = self._get_coords(idx)
self._draw_circle(pos, "HBA")
if HBD:
_HBD = Chem.MolFromSmarts("[N&!H0&v3,N&!H0&+1&v4,O&H1&+0,S&H1&+0,n&H1&+0]")
for idx in [i[0] for i in self.rdmol.GetSubstructMatches(_HBD)]:
pos = self._get_coords(idx)
self._draw_circle(pos, "HBD")
if (isinstance(circles, list) or isinstance(circles, tuple)) and isinstance(
circles[0], int
):
adhoc = self.style[style]
if isinstance(r, float):
adhoc.update({"r": r})
if isinstance(linewidth, int):
adhoc.update({"linewidth": linewidth})
if isinstance(fill, bool):
adhoc.update({"fill": fill})
if isinstance(rgba, list) or isinstance(rgba, tuple):
adhoc.update({"rgba": rgba})
self.style["__adhoc__"] = adhoc
for idx in circles:
pos = self._get_coords(idx)
self._draw_circle(pos, "__adhoc__")
# remove the temporary style
del self.style["__adhoc__"]
self.canvas.FinishDrawing()
return self.canvas.GetDrawingText()
set_style(name, rgba, r=0.52, linewidth=1, fill=False)
¶
Set style.
Parameters:
-
name(str) –name of style
-
rgba(tuple[float, float, float, float]) –RGB(0-255) and opacity (0-1)
-
r(float, default:0.52) –radius of circle. Defaults to 0.52.
-
linewidth(int, default:1) –linewidth. Defaults to 1.
-
fill(bool, default:False) –whether to fill the circle. Defaults to False.
Source code in src/rdworks/view.py
def set_style(
self,
name: str,
rgba: tuple[float, float, float, float],
r: float = 0.52,
linewidth: int = 1,
fill: bool = False,
) -> None:
"""Set style.
Args:
name (str): name of style
rgba (tuple[float,float,float,float]): RGB(0-255) and opacity (0-1)
r (float, optional): radius of circle. Defaults to 0.52.
linewidth (int, optional): linewidth. Defaults to 1.
fill (bool, optional): whether to fill the circle. Defaults to False.
"""
self.style[name] = {"r": r, "rgba": rgba, "linewidth": linewidth, "fill": fill}
show_styles()
¶
Functions¶
get_highlight_bonds(rdmol, atom_indices)
¶
Get bond indices for bonds between atom indices.
Parameters:
-
rdmol(Mol) –rdkit Chem.Mol object.
-
atom_indices(list[int]) –atom indices.
Returns:
-
list[int] | None–list[int]: bond indices.
Source code in src/rdworks/view.py
def get_highlight_bonds(rdmol: Chem.Mol, atom_indices: list[int]) -> list[int] | None:
"""Get bond indices for bonds between atom indices.
Args:
rdmol (Chem.Mol): rdkit Chem.Mol object.
atom_indices (list[int]): atom indices.
Returns:
list[int]: bond indices.
"""
bond_indices = []
for bond in rdmol.GetBonds():
if (
bond.GetBeginAtomIdx() in atom_indices
and bond.GetEndAtomIdx() in atom_indices
):
bond_indices.append(bond.GetIdx())
if bond_indices:
return bond_indices
else:
return None
render_2D_mol(rdmol, moldrawer, redraw=False, coordgen=False, legend='', atom_index=False, highlight_atoms=None, highlight_bonds=None)
¶
Source code in src/rdworks/view.py
def render_2D_mol(
rdmol: Chem.Mol,
moldrawer: rdMolDraw2D,
redraw: bool = False,
coordgen: bool = False,
legend: str = "",
atom_index: bool = False,
highlight_atoms: list[int] | None = None,
highlight_bonds: list[int] | None = None,
) -> str:
rdmol_2d = Chem.Mol(rdmol)
if redraw or rdmol_2d.GetNumConformers() == 0:
rdDepictor.SetPreferCoordGen(coordgen)
rdmol_2d = Chem.RemoveHs(rdmol_2d)
rdDepictor.Compute2DCoords(rdmol_2d)
rdDepictor.StraightenDepiction(rdmol_2d)
if (highlight_bonds is None) and (highlight_atoms is not None):
# highlight bonds between the highlighted atoms
highlight_bonds = get_highlight_bonds(rdmol_2d, highlight_atoms)
draw_options = moldrawer.drawOptions()
draw_options.addAtomIndices = atom_index
# draw_options.setHighlightColour((0,.9,.9,.8)) # Cyan highlight
# draw_options.addBondIndices = True
# draw_options.noAtomLabels = True
draw_options.atomLabelDeuteriumTritium = True # D, T
# draw_options.explicitMethyl = True
draw_options.singleColourWedgeBonds = True
draw_options.addStereoAnnotation = True
# draw_options.fillHighlights = False
# draw_options.highlightRadius = .4
# draw_options.highlightBondWidthMultiplier = 12
# draw_options.variableAtomRadius = 0.2
# draw_options.variableBondWidthMultiplier = 40
# draw_options.setVariableAttachmentColour((.5,.5,1))
# draw_options.baseFontSize = 1.0 # default is 0.6
# draw_options.annotationFontScale = 1
# draw_options.rotate = 30 # rotation angle in degrees
# draw_options.padding = 0.2 # default is 0.05
# for atom in rdmol_2d.GetAtoms():
# for key in atom.GetPropsAsDict():
# atom.ClearProp(key)
# if index: # index hides polar hydrogens
# for atom in rdmol_2d.GetAtoms():
# atom.SetProp("atomLabel", str(atom.GetIdx()))
# # # atom.SetProp("atomNote", str(atom.GetIdx()))
# # # atom.SetProp("molAtomMapNumber", str(atom.GetIdx()))
moldrawer.DrawMolecule(
rdmol_2d,
legend=legend,
highlightAtoms=highlight_atoms,
highlightBonds=highlight_bonds,
)
moldrawer.FinishDrawing()
return moldrawer.GetDrawingText()
render_matrix_grid(rdmol, legend, highlight_atoms=None, highlight_bonds=None, mols_per_row=5, width=200, height=200, atom_index=False, redraw=False, coordgen=False, svg=True)
¶
Rendering a grid image from a list of molecules.
Parameters:
-
rdmol(list[Mol]) –list of rdkit Chem.Mol objects.
-
legend(list[str]) –list of legends
-
highlight_atoms(list[list[int]] | None, default:None) –list of atom(s) to highlight. Defaults to None.
-
highlight_bonds(list[list[int]] | None, default:None) –list of bond(s) to highlight. Defaults to None.
-
mols_per_row(int, default:5) –molecules per row. Defaults to 5.
-
width(int, default:200) –width. Defaults to 200.
-
height(int, default:200) –height. Defaults to 200.
-
atom_index(bool, default:False) –whether to show atom index. Defaults to False.
-
redraw(bool, default:False) –whether to redraw 2D. Defaults to False.
-
coordgen(bool, default:False) –whether to use coordgen to depict. Defaults to False.
Returns:
-
str | Image–str | Image.Image: SVG string or PIL Image object.
Reference
https://greglandrum.github.io/rdkit-blog/posts/2023-10-25-molsmatrixtogridimage.html
Source code in src/rdworks/view.py
def render_matrix_grid(
rdmol: list[Chem.Mol],
legend: list[str] | None,
highlight_atoms: list[list[int]] | None = None,
highlight_bonds: list[list[int]] | None = None,
mols_per_row: int = 5,
width: int = 200,
height: int = 200,
atom_index: bool = False,
redraw: bool = False,
coordgen: bool = False,
svg: bool = True,
) -> str | Image.Image:
"""Rendering a grid image from a list of molecules.
Args:
rdmol (list[Chem.Mol]): list of rdkit Chem.Mol objects.
legend (list[str]): list of legends
highlight_atoms (list[list[int]] | None, optional): list of atom(s) to highlight. Defaults to None.
highlight_bonds (list[list[int]] | None, optional): list of bond(s) to highlight. Defaults to None.
mols_per_row (int, optional): molecules per row. Defaults to 5.
width (int, optional): width. Defaults to 200.
height (int, optional): height. Defaults to 200.
atom_index (bool, optional): whether to show atom index. Defaults to False.
redraw (bool, optional): whether to redraw 2D. Defaults to False.
coordgen (bool, optional): whether to use coordgen to depict. Defaults to False.
Returns:
str | Image.Image: SVG string or PIL Image object.
Reference:
https://greglandrum.github.io/rdkit-blog/posts/2023-10-25-molsmatrixtogridimage.html
"""
n = len(rdmol)
if isinstance(legend, list):
assert len(legend) == n, "number of legends and molecules must be the same"
elif legend is None:
legend = [
"",
] * n
if isinstance(highlight_atoms, list):
assert len(highlight_atoms) == n, (
"number of highlights and molecules must be the same"
)
elif highlight_atoms is None:
highlight_atoms = [
(),
] * n
if isinstance(highlight_bonds, list):
assert len(highlight_bonds) == n, (
"number of highlights and molecules must be the same"
)
elif highlight_bonds is None:
highlight_bonds = [
(),
] * n
rdmol_matrix = []
legend_matrix = []
highlight_atoms_matrix = []
highlight_bonds_matrix = []
for i in range(0, n, mols_per_row):
rdmol_matrix.append(rdmol[i : (i + mols_per_row)])
legend_matrix.append(legend[i : (i + mols_per_row)])
highlight_atoms_matrix.append(highlight_atoms[i : (i + mols_per_row)])
highlight_bonds_matrix.append(highlight_bonds[i : (i + mols_per_row)])
return MolsMatrixToGridImage(
molsMatrix=rdmol_matrix,
subImgSize=(width, height),
legendsMatrix=legend_matrix,
highlightAtomListsMatrix=highlight_atoms_matrix,
highlightBondListsMatrix=highlight_bonds_matrix,
useSVG=svg,
returnPNG=False, # whether to return PNG data (True) or a PIL object (False)
)
render_png(rdmol, width=300, height=300, legend='', atom_index=False, highlight_atoms=None, highlight_bonds=None, redraw=False, coordgen=False, trim=True)
¶
Draw 2D molecule in PNG format.
Parameters:
-
rdmol(Mol) –rdkit Chem.Mol object.
-
width(int, default:300) –width. Defaults to 300.
-
height(int, default:300) –height. Defaults to 300.
-
legend(str, default:'') –legend. Defaults to ''.
-
atom_index(bool, default:False) –whether to show atom index. Defaults to False.
-
highlight_atoms(list[int] | None, default:None) –atom(s) to highlight. Defaults to None.
-
highlight_bonds(list[int] | None, default:None) –bond(s) to highlight. Defaults to None.
-
redraw(bool, default:False) –whether to redraw. Defaults to False.
-
coordgen(bool, default:False) –whether to use coordgen. Defaults to False.
Returns:
-
Image–Image.Image: output PIL Image object.
Source code in src/rdworks/view.py
def render_png(
rdmol: Chem.Mol,
width: int = 300,
height: int = 300,
legend: str = "",
atom_index: bool = False,
highlight_atoms: list[int] | None = None,
highlight_bonds: list[int] | None = None,
redraw: bool = False,
coordgen: bool = False,
trim: bool = True,
) -> Image.Image:
"""Draw 2D molecule in PNG format.
Args:
rdmol (Chem.Mol): rdkit Chem.Mol object.
width (int, optional): width. Defaults to 300.
height (int, optional): height. Defaults to 300.
legend (str, optional): legend. Defaults to ''.
atom_index (bool, optional): whether to show atom index. Defaults to False.
highlight_atoms (list[int] | None, optional): atom(s) to highlight. Defaults to None.
highlight_bonds (list[int] | None, optional): bond(s) to highlight. Defaults to None.
redraw (bool, optional): whether to redraw. Defaults to False.
coordgen (bool, optional): whether to use coordgen. Defaults to False.
Returns:
Image.Image: output PIL Image object.
"""
png_string = render_2D_mol(
rdmol,
moldrawer=rdMolDraw2D.MolDraw2DCairo(width, height),
redraw=redraw,
coordgen=coordgen,
legend=legend,
atom_index=atom_index,
highlight_atoms=highlight_atoms,
highlight_bonds=highlight_bonds,
)
img = Image.open(BytesIO(png_string))
if trim:
img = trim_png(img)
return img
render_svg(rdmol, width=300, height=300, legend='', atom_index=False, highlight_atoms=None, highlight_bonds=None, redraw=False, coordgen=False, optimize=True)
¶
Draw 2D molecule in SVG format.
Examples:
For Jupyternotebook, wrap the output with SVG:
Parameters:
-
rdmol(Mol) –rdkit Chem.Mol object.
-
width(int, default:300) –width. Defaults to 300.
-
height(int, default:300) –height. Defaults to 300.
-
legend(str, default:'') –legend. Defaults to ''.
-
atom_index(bool, default:False) –whether to show atom index. Defaults to False.
-
highlight_atoms(list[int] | None, default:None) –atom(s) to highlight. Defaults to None.
-
highlight_bonds(list[int] | None, default:None) –bond(s) to highlight. Defaults to None.
-
redraw(bool, default:False) –whether to redraw. Defaults to False.
-
coordgen(bool, default:False) –whether to use coordgen. Defaults to False.
-
optimize(bool, default:True) –whether to optimize SVG string. Defaults to True.
Returns:
-
str(str) –SVG string
Source code in src/rdworks/view.py
def render_svg(
rdmol: Chem.Mol,
width: int = 300,
height: int = 300,
legend: str = "",
atom_index: bool = False,
highlight_atoms: list[int] | None = None,
highlight_bonds: list[int] | None = None,
redraw: bool = False,
coordgen: bool = False,
optimize: bool = True,
) -> str:
"""Draw 2D molecule in SVG format.
Examples:
For Jupyternotebook, wrap the output with SVG:
>>> from IPython.display import SVG
>>> SVG(libr[0].to_svg())
Args:
rdmol (Chem.Mol): rdkit Chem.Mol object.
width (int, optional): width. Defaults to 300.
height (int, optional): height. Defaults to 300.
legend (str, optional): legend. Defaults to ''.
atom_index (bool, optional): whether to show atom index. Defaults to False.
highlight_atoms (list[int] | None, optional): atom(s) to highlight. Defaults to None.
highlight_bonds (list[int] | None, optional): bond(s) to highlight. Defaults to None.
redraw (bool, optional): whether to redraw. Defaults to False.
coordgen (bool, optional): whether to use coordgen. Defaults to False.
optimize (bool, optional): whether to optimize SVG string. Defaults to True.
Returns:
str: SVG string
"""
svg_string = render_2D_mol(
rdmol,
moldrawer=rdMolDraw2D.MolDraw2DSVG(width, height),
redraw=redraw,
coordgen=coordgen,
legend=legend,
atom_index=atom_index,
highlight_atoms=highlight_atoms,
highlight_bonds=highlight_bonds,
)
if optimize:
scour_options = {
"strip_comments": True,
"strip_ids": True,
"shorten_ids": True,
"compact_paths": True,
"indent_type": "none",
}
svg_string = scourString(svg_string, options=scour_options)
return svg_string
rescale(rdmol, factor=1.5)
¶
Returns a copy of rdmol by a factor.
Parameters:
-
rdmol(Mol) –input molecule.
-
factor(float, default:1.5) –scaling factor.
Returns:
-
Mol–Chem.Mol: a copy of rescaled rdkit.Chem.Mol object.
Source code in src/rdworks/view.py
def rescale(rdmol: Chem.Mol, factor: float = 1.5) -> Chem.Mol:
"""Returns a copy of `rdmol` by a `factor`.
Args:
rdmol (Chem.Mol): input molecule.
factor (float): scaling factor.
Returns:
Chem.Mol: a copy of rescaled rdkit.Chem.Mol object.
"""
transformed_rdmol = Chem.Mol(rdmol)
center = AllChem.ComputeCentroid(transformed_rdmol.GetConformer())
tf = np.identity(4, np.float)
tf[0][3] -= center[0]
tf[1][3] -= center[1]
tf[0][0] = tf[1][1] = tf[2][2] = factor
AllChem.TransformMol(transformed_rdmol, tf)
return transformed_rdmol
rotate(rdmol, axis, degree)
¶
Rotate rdmol around given axis and degree.
Input rdmol will be modified.
Parameters:
-
rdmol(Mol) –input molecule.
-
axis(str) –axis of rotation, 'x' or 'y' or 'z'.
-
degree(float) –degree of rotation.
Source code in src/rdworks/view.py
def rotate(rdmol: Chem.Mol, axis: str, degree: float) -> None:
"""Rotate `rdmol` around given axis and degree.
Input `rdmol` will be modified.
Args:
rdmol (Chem.Mol): input molecule.
axis (str): axis of rotation, 'x' or 'y' or 'z'.
degree (float): degree of rotation.
"""
try:
conf = rdmol.GetConformer()
except:
AllChem.Compute2DCoords(rdmol)
conf = rdmol.GetConformer()
R = rotation_matrix(axis, degree)
rdMolTransforms.TransformConformer(conf, R)
rotation_matrix(axis, degree)
¶
Returns a numpy rotation matrix of shape (4,4).
Parameters:
-
axis(str) –'x' or 'y' or 'z'.
-
degree(float) –degree of rotation.
Returns:
-
ndarray–np.ndarray: a numpy array of shape (4,4).
Source code in src/rdworks/view.py
def rotation_matrix(axis: str, degree: float) -> np.ndarray:
"""Returns a numpy rotation matrix of shape (4,4).
Args:
axis (str): 'x' or 'y' or 'z'.
degree (float): degree of rotation.
Returns:
np.ndarray: a numpy array of shape (4,4).
"""
rad = (np.pi / 180.0) * degree
c = np.cos(rad)
s = np.sin(rad)
if axis.lower() == "x":
return np.array(
[
[1.0, 0.0, 0.0, 0.0],
[0.0, c, -s, 0.0],
[0.0, s, c, 0.0],
[0.0, 0.0, 0.0, 1.0],
]
)
elif axis.lower() == "y":
return np.array(
[
[c, 0.0, s, 0.0],
[0.0, 1.0, 0.0, 0.0],
[-s, 0.0, c, 0.0],
[0.0, 0.0, 0.0, 1.0],
]
)
elif axis.lower() == "z":
return np.array(
[
[c, -s, 0.0, 0.0],
[s, c, 0.0, 0.0],
[0.0, 0.0, 1.0, 0.0],
[0.0, 0.0, 0.0, 1.0],
]
)
trim_png(img)
¶
Removes white margin around molecular drawing.
Parameters:
-
img(Image) –input PIL Image object.
Returns:
-
Image–Image.Image: output PIL Image object.
Source code in src/rdworks/view.py
def trim_png(img: Image.Image) -> Image.Image:
"""Removes white margin around molecular drawing.
Args:
img (Image.Image): input PIL Image object.
Returns:
Image.Image: output PIL Image object.
"""
bg = Image.new(img.mode, img.size, img.getpixel((0, 0)))
diff = ImageChops.difference(img, bg)
diff = ImageChops.add(diff, diff, 2.0, -100)
bbox = diff.getbbox()
if bbox:
return img.crop(bbox)
return img
rdworks.descriptor
¶
Attributes¶
rd_descriptor = {'QED': 'Quantitative estimate of drug-likeness.', 'MolWt': 'Molecular weight', 'LogP': 'Predicted octanol/water partition coefficient', 'TPSA': 'Topological polar surface area', 'HBD': 'Number of hydrogen bonding donors', 'HBA': 'Number of hydrogen bonding acceptors', 'RotBonds': 'Number of rotatable bonds', 'RingCount': 'Number of rings', 'FCsp3': 'Fraction of SP3 carbons', 'HAC': 'Number of heavy atoms', 'Hetero': 'Number of hetero atoms (not H or C) [B,N,O,P,S,F,Cl,Br,I]', 'LipinskiHBA': 'Number of hydrogen bonding acceptors according to the Lipinski definition', 'LipinskiHBD': 'Number of hydrogen bonding donors according to the Lipinski definition'}
module-attribute
¶
rd_descriptor_f = {'QED': QED.qed, 'MolWt': Descriptors.MolWt, 'HAC': Descriptors.HeavyAtomCount, 'LogP': Descriptors.MolLogP, 'TPSA': Descriptors.TPSA, 'HBA': rdMolDescriptors.CalcNumHBA, 'HBD': rdMolDescriptors.CalcNumHBD, 'RotBonds': rdMolDescriptors.CalcNumRotatableBonds, 'RingCount': rdMolDescriptors.CalcNumRings, 'FCsp3': rdMolDescriptors.CalcFractionCSP3, 'Hetero': rdMolDescriptors.CalcNumHeteroatoms, 'LipinskiHBA': rdMolDescriptors.CalcNumLipinskiHBA, 'LipinskiHBD': rdMolDescriptors.CalcNumLipinskiHBD}
module-attribute
¶
rdworks.xml
¶
This module contains XML definitions for substructure and descriptor matching.
Available descriptors:
Name | Description | RDKit function
------------- | ----------------------------------------- | --------------------------------------
HAC | Num. of Non-H atoms | Descriptors.HeavyAtomCount
HBA | Num. of H-bond acceptors | Descriptors.NumHAcceptors
HBD | Num. of H-bond donors | Descriptors.NumHDonors
LipinskiHBA | Num. of Lipinski H-bond acceptors | rdMolDescriptors.CalcNumLipinskiHBA
LipinskiHBD | Num. of Lipinski H-bond donors | rdMolDescriptors.CalcNumLipinskiHBD
MolWt | Molecular weight | Descriptors.MolWt
TPSA | Topological polar surface area | Descriptors.TPSA
LogP | log(octanol/water partition coefficient) | Descriptors.MolLogP
RotBonds | Num. of rotatable bonds | Descriptors.NumRotatableBonds
RingCount | Num. of rings | Descriptors.RingCount
FCsp3 | fraction of C atoms that are Sp3 | Descriptors.FractionCSP3
rdHBD | Num. of H-bond donors | rdMolDescriptors.CalcNumHBD
rdHBA | Num. of H-bond acceptors | rdMolDescriptors.CalcNumHBA
rdRingCount | Num. of rings | rdMolDescriptors.CalcNumRings
rdRotBondst | Num. of rotatable bonds | rdMolDescriptors.CalcNumRotatableBonds
rdFCsp3 | fraction of C atoms that are Sp3 | rdMolDescriptors.CalcFractionCSP3
Hetero | Num. of non-H and non-C atoms | rdMolDescriptors.CalcNumHeteroatoms
ALogP | Wildman-Crippen LogP value | Crippen.MolLogP
QED | Quantitative estimation of drug-likeness | QED.qed
PSA | MOE-like molecular surface area | MolSurf.TPSA
StereoCenters | Num. of atom stereo centers | rdMolDescriptors.CalcNumAtomStereoCenters
References:
1. `alert_collection.csv` is copied from Patrick Walters' blog and github:
- http://practicalcheminformatics.blogspot.com/2018/08/filtering-chemical-libraries.html
- https://github.com/PatWalters/rd_filters
1. Jeroen Kazius, Ross McGuire, and Roberta Bursi.
Derivation and Validation of Toxicophores for Mutagenicity Prediction.
J. Med. Chem. 2005, 48, 312-320.
1. J. F. Blake.
Identification and Evaluation of Molecular Properties Related to Preclinical Optimization and Clinical Fate.
Med Chem. 2005, 1, 649-55.
1. Mike Hann, Brian Hudson, Xiao Lewell, Rob Lifely, Luke Miller, and Nigel Ramsden.
Strategic Pooling of Compounds for High-Throughput Screening.
J. Chem. Inf. Comput. Sci. 1999, 39, 897-902.
1. Jonathan B. Baell and Georgina A. Holloway. New Substructure Filters for Removal of Pan Assay Interference Compounds (PAINS)
from Screening Libraries and for Their Exclusion in Bioassays.
J. Med. Chem. 2010, 53, 2719-2740.
1. Bradley C. Pearce, Michael J. Sofia, Andrew C. Good, Dieter M. Drexler, and David A. Stock.
An Empirical Process for the Design of High-Throughput Screening Deck Filters.
J. Chem. Inf. Model. 2006, 46, 1060-1068.
1. Ruth Brenk, Alessandro Schipani, Daniel James, Agata Krasowski, Ian Hugh Gilbert, Julie Frearson aand Paul Graham Wyatt.
Lessons learnt from assembling screening libraries for drug discovery for neglected diseases.
ChemMedChem. 2008, 3, 435-44.
1. Sivaraman Dandapani, Gerard Rosse, Noel Southall, Joseph M. Salvino, Craig J. Thomas.
Selecting, Acquiring, and Using Small Molecule Libraries for High‐Throughput Screening.
Curr Protoc Chem Biol. 2012, 4, 177–191.
1. Huth JR, Mendoza R, Olejniczak ET, Johnson RW, Cothron DA, Liu Y, Lerner CG, Chen J, Hajduk PJ.
ALARM NMR: a rapid and robust experimental method to detect reactive false positives in biochemical screens.
J Am Chem Soc. 2005, 127, 217-24.
- identificaiton of thiol reactive compounds by monitoring DTT-dependent 13-C chemical shift changes
of the human La protein in the presence of a test compound
Attributes:
Attributes¶
predefined_xml = {'Zinc_fragment': {'Path': 'ZINC_fragment.xml', 'Description': "ZINC's fragment-like criteria", 'Reference': 'ZINC'}, 'Zinc_leadlike': {'Path': 'ZINC_leadlike.xml', 'Description': "ZINC's lead-like criteria", 'Reference': 'ZINC'}, 'Zinc_druglike': {'Path': 'ZINC_druglike.xml', 'Description': "ZINC's drug-like criteria", 'Reference': 'ZINC'}, 'fragment': {'Path': 'fragment.xml', 'Description': 'fragment', 'Reference': ''}, 'MLSMR': {'Path': 'ChEMBL_Walters/MLSMR.xml', 'Description': 'NIH Mol. Lib. Small Molecule Repository filters', 'Reference': 'Dandapani et al. (2012)'}, 'CNS': {'Path': 'CNS.xml', 'Description': 'CNS MPO descriptors', 'Reference': 'Wager et al. (2010)'}, 'PAINS': {'Path': 'Baell2010_PAINS/Baell2010A.xml', 'Description': 'Pan Assay Interference (>150 hits)', 'Reference': 'Baell et al. (2010)'}, 'Dundee': {'Path': 'ChEMBL_Walters/Dundee.xml', 'Description': 'Dundee NTD library filters', 'Reference': 'Brenk et al. (2008)'}, 'BMS': {'Path': 'ChEMBL_Walters/BMS.xml', 'Description': 'BMS HTS deck filters', 'Reference': 'Pearce et al. (2006)'}, 'LINT': {'Path': 'ChEMBL_Walters/LINT.xml', 'Description': 'Pfizer LINT filters', 'Reference': 'Blake (2005)'}, 'Toxicophore': {'Path': 'Kazius2005/Kazius2005.xml', 'Description': 'Toxicophores for mutagenicity', 'Reference': 'Kazius et al. (2005)'}, 'Glaxo': {'Path': 'ChEMBL_Walters/Glaxo.xml', 'Description': 'Glaxo hard filters', 'Reference': 'Hann et al. (1999)'}, 'Acid': {'Path': 'Hann1999_Glaxo/Hann1999Acid.xml', 'Description': 'acid', 'Reference': 'Hann et al. (1999)'}, 'Base': {'Path': 'Hann1999_Glaxo/Hann1999Base.xml', 'Description': 'base', 'Reference': 'Hann et al. (1999)'}, 'Nucleophile': {'Path': 'Hann1999_Glaxo/Hann1999NuPh.xml', 'Description': 'nucleophile', 'Reference': 'Hann et al. (1999)'}, 'Electrophile': {'Path': 'Hann1999_Glaxo/Hann1999ElPh.xml', 'Description': 'electrophile', 'Reference': 'Hann et al. (1999)'}, 'Inpharmatica': {'Path': 'ChEMBL_Walters/Inpharmatica.xml', 'Description': 'Inpharmatica unwanted fragments', 'Reference': 'ChEMBL'}, 'SureChEMBL': {'Path': 'ChEMBL_Walters/SureChEMBL.xml', 'Description': 'SureChEMBL filter', 'Reference': 'ChEMBL'}, 'Reactive': {'Path': 'misc/reactive.xml', 'Description': 'reactive functional groups', 'Reference': ''}, 'Astex_RO3': {'Path': 'Astex_RO3.xml', 'Description': 'Astex rule of 3', 'Reference': 'Astex'}, 'Asinex_fragment': {'Path': 'Asinex_fragment.xml', 'Description': "Asinex's fragment", 'Reference': 'Asinex'}}
module-attribute
¶
Functions¶
get_predefined_xml(name)
¶
Returns matched predefined xml file.
Parameters:
-
name(str) –name of predefined entry.
Returns:
-
PathLike–os.PathLike: path to the xml file.
Source code in src/rdworks/xml.py
def get_predefined_xml(name: str) -> os.PathLike:
"""Returns matched predefined xml file.
Args:
name (str): name of predefined entry.
Returns:
os.PathLike: path to the xml file.
"""
t = name.upper()
n = len(t)
path = None
for k in predefined_xml:
if k.upper()[:n] == t:
datadir = importlib.resources.files("rdworks.data")
path = pathlib.Path(datadir / predefined_xml[k]["Path"])
break
if path is None:
raise ValueError(f"is_matching() cannot find the xml file for {name}")
return path
list_predefined_xml()
¶
Returns text output of list of predefined xml.
Returns:
-
str(str) –text output of list of predefined xml
Source code in src/rdworks/xml.py
def list_predefined_xml() -> str:
"""Returns text output of list of predefined xml.
Returns:
str: text output of list of predefined xml
"""
s = f"\n| {'Name':<18} | {'Description':<48} | {'Reference':<23} |\n"
s += f"| {'-' * 18} | {'-' * 48} | {'-' * 23} |\n"
for k, v in predefined_xml.items():
s += f"| {k:<18} | {v['Description']:<48} | {v['Reference']:<23} |\n"
return s
parse_xml(path)
¶
Parse a XML file.
Parameters:
-
path(PathLike) –filename of the xml.
Returns:
-
Tuple(Tuple) –parsed results.
Source code in src/rdworks/xml.py
def parse_xml(path: os.PathLike) -> Tuple:
"""Parse a XML file.
Args:
path (os.PathLike): filename of the xml.
Returns:
Tuple: parsed results.
"""
tree = ET.parse(path)
root = tree.getroot()
terms = []
try:
combine = root.attrib["combine"].upper()
except:
combine = "OR" # default
for child in root:
name = child.attrib["name"]
if child.tag == "substructure":
smarts = child.find("SMARTS").text
terms.append((name, smarts, 0.0, 0.0))
elif child.tag == "descriptor":
L = child.find("min")
U = child.find("max")
lb = float(L.text) if L is not None else None
ub = float(U.text) if U is not None else None
terms.append((name, None, lb, ub))
# # parse SMARTS definitions
# for substructure in tree.findall('substructure'):
# name = substructure.get('name')
# smarts = substructure.find('SMARTS').text
# terms.append((name, smarts, 0.0, 0.0))
# # parse descriptors lower and upper bounds
# for descriptor in tree.findall('descriptor'):
# name = descriptor.get('name')
# L = descriptor.find('min')
# U = descriptor.find('max')
# lb = float(L.text) if L is not None else None
# ub = float(U.text) if U is not None else None
# terms.append((name, '', lb, ub))
return (terms, combine)