Ternary Complex
mdscribe.ternary
Ligand
dataclass
Class for ligand information.
Source code in mdscribe/ternary/complex.py
@dataclass
class Ligand:
"""Class for ligand information."""
st: parmed.structure.Structure
idx0: List[int] = field(default_factory=list) # all atoms
idx: List[int] = field(default_factory=list) # non-hydrogens
imap: Dict[int,int] = field(default_factory=dict) # pdb atom idx -> st atom idx
mol: Chem.Mol | None = None
highlight: List[int] = field(default_factory=list)
def coor(self) -> np.ndarray:
return self.st.coordinates[self.idx,:]
def centroid(self) -> np.ndarray:
return np.mean(self.coor(), axis=0)
def name(self) -> List[str]:
return [self.st.atoms[i].name for i in self.idx ]
def atomic_number(self) -> List[int]:
return [self.st.atoms[i].atomic_number for i in self.idx ]
def residue_name(self) -> List[str]:
return [self.st.atoms[i].residue.name for i in self.idx ]
def residue_number(self) -> List[int]:
return [self.st.atoms[i].residue.number for i in self.idx ]
highlight = field(default_factory=list)
class-attribute
instance-attribute
idx = field(default_factory=list)
class-attribute
instance-attribute
idx0 = field(default_factory=list)
class-attribute
instance-attribute
imap = field(default_factory=dict)
class-attribute
instance-attribute
mol = None
class-attribute
instance-attribute
st
instance-attribute
__init__(st, idx0=list(), idx=list(), imap=dict(), mol=None, highlight=list())
atomic_number()
centroid()
coor()
name()
residue_name()
Molecule
dataclass
Class for protein information.
Source code in mdscribe/ternary/complex.py
idx = field(default_factory=list)
class-attribute
instance-attribute
st
instance-attribute
__init__(st, idx=list())
centroid()
Pocket
dataclass
Class for pocket information.
Source code in mdscribe/ternary/complex.py
@dataclass
class Pocket:
"""Class for pocket information."""
st: parmed.structure.Structure
idx: List[int] = field(default_factory=list)
residues: List = field(default_factory=list)
imap: Dict[int,int] = field(default_factory=dict) # pocket atom idx -> st atom idx
def coor(self) -> np.ndarray:
return self.st.coordinates[self.idx,:]
def centroid(self) -> np.ndarray:
return np.mean(self.coor(), axis=0)
def name(self) -> List[str]:
return [self.st.atoms[i].name for i in self.idx ]
def residue_number(self) -> List[int]:
return [self.st.atoms[i].residue.number for i in self.idx ]
# return [ r.number for r in self.residues ]
def residue_name(self) -> List[str]:
return [self.st.atoms[i].residue.name for i in self.idx ]
idx = field(default_factory=list)
class-attribute
instance-attribute
imap = field(default_factory=dict)
class-attribute
instance-attribute
residues = field(default_factory=list)
class-attribute
instance-attribute
st
instance-attribute
__init__(st, idx=list(), residues=list(), imap=dict())
centroid()
coor()
name()
residue_name()
Reference
dataclass
Class for reference information.
Source code in mdscribe/ternary/complex.py
ligand
instance-attribute
pocket
instance-attribute
st
instance-attribute
__init__(st, pocket, ligand)
centroid()
Ternary
Class for ternary complex modeling.
Source code in mdscribe/ternary/complex.py
class Ternary:
"""Class for ternary complex modeling."""
def __init__(self,
st: parmed.structure.Structure,
molecules: Dict,
references: Dict,
ligand_resname: str,
workdir: str | pathlib.Path = '.',
):
# parmed.struct.Structure objects
self.st = st # md system
self.imap = {(a.residue.number,a.name) : a.idx for a in st.atoms}
self.molecule = {}
for k,v in molecules.items():
idx = [self.imap[(b.residue.number, b.name)] for b in st[v].atoms]
self.molecule[k] = Molecule(st, idx)
self.references = references
if not isinstance(workdir, pathlib.Path):
workdir = pathlib.Path(workdir)
workdir.mkdir(parents=True, exist_ok=True)
self.workdir = workdir
self.map = {}
self.map_data = {}
self.ligand = {}
self.pocket = {}
self.ref = {}
self.rmsd_ref_coor = np.zeros((len(st.atoms), 3))
self.rmsd_idx_group = {}
self.centroid = {}
# reference coordinates for RMSDForce
# should have identical number of coordinates as the self.st
# even though all the coordinates are not used
self.ligand['0'] = self.create_ligand(st, ligand_resname, "structure-ligand")
for k in references:
self.map_to_reference(k)
self.rmsd_ref_coor[np.array(self.pocket[k].idx),:] = self.ref[k].pocket.coor()
self.rmsd_ref_coor[np.array(self.ligand[k].idx),:] = self.ref[k].ligand.coor()
self.rmsd_idx_group[k] = np.concatenate((self.pocket[k].idx, self.ligand[k].idx,))
self.map[k] = {
'ligand': pd.DataFrame(self.map_data[k]['ligand']),
'pocket': pd.DataFrame(self.map_data[k]['pocket']),
}
def create_ligand(self, st:parmed.structure.Structure, resname:str, prefix:str) -> Ligand:
"""Create a Ligand class object.
Args:
st (parmed.structure.Structure): input structure.
resname (str): residue name for ligand.
prefix (str): prefix for output pdb file.
Raises:
ValueError: when residue name does not exist.
Returns:
Ligand: object containing ligand information.
"""
outfile = (self.workdir / f"{prefix}-{resname}.pdb").as_posix()
try:
idx0 = [atom.idx for atom in st.atoms if (atom.residue.name == resname)]
assert len(idx0) > 0
except:
raise ValueError(f"{resname} does not exist")
try:
# write the ligand to a pdb file which then is read by Chem.MolFromPDBFile to create a rdkit.Chem.Mol
# non-hydrogen atoms
# Order of atoms in the pdb is same as that of the rdmol
indices = []
imap = {}
pdb_idx = 0
for atom in st.atoms:
if atom.residue.name == resname and atom.atomic_number > 1:
imap[pdb_idx] = atom.idx
indices.append(atom.idx)
pdb_idx += 1
st[f':{resname}&!@H='].save(outfile, overwrite=True)
mol = Chem.MolFromPDBFile(outfile)
assert mol
assert imap
except:
raise ValueError(f"cannot create molecule for {resname}")
return Ligand(st=st, idx0=idx0, idx=indices, imap=imap, mol=mol)
def create_pocket(self, st:parmed.structure.Structure, resname:str, dist:float, include:str, exclude:str) -> Pocket:
"""Create a Pocket class object.
Args:
st (parmed.structure.Structure): input structure.
resname (str): residue name for ligand.
dist (float): distance cutoff from ligand which is regarded as pocket.
include (str): residue or atom selection to include as pocket.
exclude (str): residue or atom selection to exlude from pocket.
Raises:
ValueError: when pocket cannot be defined.
Returns:
Pocket: object containing pocket information.
"""
try:
# select all `pocket_atom` atoms within `pocket_dist` from the `ref_resname` residues and save to a pdb.
# the whole residue is selected if any atom satisfies the distance criteria
pocket = st[f"(:{resname}<:{dist})&(!:{resname})&({include})&!({exclude})"]
imap = {(a.residue.number, a.name) : a.idx for a in st.atoms}
indices = [imap[(b.residue.number, b.name)] for b in pocket.atoms]
assert len(pocket.residues) > 0
assert len(pocket.atoms) > 0
except:
raise ValueError("cannot define reference ligand binding pocket")
return Pocket(st=st, idx=indices, residues=pocket.residues, imap=imap)
def create_map_data(self, ref_id:str) -> None:
"""Create a map/dictionary for atom indexes.
Args:
ref_id (str): id for reference.
"""
self.map_data[ref_id] = {
'ligand' : {
'ref_idx': self.ref[ref_id].ligand.idx,
'ref_name': self.ref[ref_id].ligand.name(),
'ref_atomic_number' : self.ref[ref_id].ligand.atomic_number(),
'ref_residue_name' : self.ref[ref_id].ligand.residue_name(),
'ref_residue_number' : self.ref[ref_id].ligand.residue_number(),
'idx': self.ligand[ref_id].idx,
'name' : self.ligand[ref_id].name(),
'atomic_number': self.ligand[ref_id].atomic_number(),
'residue_name' : self.ligand[ref_id].residue_name(),
'residue_number' : self.ligand[ref_id].residue_number(),
},
'pocket' : {
'ref_idx' : self.ref[ref_id].pocket.idx,
'ref_name' : self.ref[ref_id].pocket.name(),
'ref_residue_name' : self.ref[ref_id].pocket.residue_name(),
'ref_residue_number' : self.ref[ref_id].pocket.residue_number(),
'idx' : self.pocket[ref_id].idx,
'name' : self.pocket[ref_id].name(),
'residue_name' : self.pocket[ref_id].residue_name(),
'residue_number' : self.pocket[ref_id].residue_number(),
}
}
def map_to_reference(self, ref_id: str, quiet:bool=False) -> None:
"""Map structure to the given reference.
Args:
ref_id (str): id of reference structure.
quiet (bool, optional): whether to print details. Defaults to False.
Raises:
ValueError: when mapping fails.
"""
pdb = self.references[ref_id]["pdb"]
resname = self.references[ref_id]["resname"]
smarts = self.references[ref_id]["smarts"]
dist = self.references[ref_id]["pocket"]["distance"]
include = self.references[ref_id]["pocket"]["include"]
exclude = self.references[ref_id]["pocket"]["exclude"]
if not quiet:
print(f"Mapping to {ref_id}...")
try:
if isinstance(pdb, pathlib.Path):
ref_st = parmed.load_file(pdb.as_posix())
else:
ref_st = parmed.load_file(pdb)
assert ref_st
except:
raise ValueError(f"cannot load reference pdb file: {pdb}")
self.map_data[ref_id] = {'ligand': {}, 'pocket': {}}
# reference ligand
ref_ligand = self.create_ligand(ref_st, resname, "reference-ligand")
success = False
for _smarts in smarts:
query = Chem.MolFromSmarts(_smarts)
if not (self.ligand['0'].mol.HasSubstructMatch(query) and ref_ligand.mol.HasSubstructMatch(query)):
continue
ligand_match = self.ligand['0'].mol.GetSubstructMatch(query)
ref_match = ref_ligand.mol.GetSubstructMatch(query)
# GetSubstructMatch() returns the indices of the molecule’s atoms that match a substructure query.
# only a single match is returned
# the ordering of the indices corresponds to the atom ordering in the query.
# For example, the first index is for the atom in this molecule that matches the first atom in the query.
# create a ligand substructure that matches with the reference in SMARTS
indices = [self.ligand['0'].imap[i] for i in ligand_match]
self.ligand[ref_id] = Ligand(self.st, idx=indices, mol=self.ligand['0'].mol, highlight=ligand_match)
ref_ligand.idx = [ref_ligand.imap[i] for i in ref_match]
ref_ligand.highlight = ref_match
success = True
break
assert success, 'No match found for the reference ligand SMARTS'
# reference pocket
ref_pocket = self.create_pocket(ref_st, resname, dist, include, exclude)
# create a reference
self.ref[ref_id] = Reference(st=ref_st, ligand=ref_ligand, pocket=ref_pocket)
# mapping structure to the reference
# find matching residue numbers and names with reference
ref_pocket_residue_numbers = [r.number for r in ref_pocket.residues]
ref_pocket_residue_names = [r.name for r in ref_pocket.residues]
offset = [ x-ref_pocket_residue_numbers[0] for x in ref_pocket_residue_numbers ]
rmap = {} # ref_pocket_residue_number -> st residue.number
residues = []
for i in range(len(self.st.residues)):
if self.st.residues[i].name == ref_pocket_residue_names[0]:
found = True
for o, n in zip(offset, ref_pocket_residue_names):
st_resname = self.st.residues[i+o].name
if st_resname in ['HIE', 'HID']:
st_resname = 'HIS'
if st_resname != n:
found = False
break
if found: # sequence matches
for o, r, n in zip(offset, ref_pocket_residue_numbers, ref_pocket_residue_names):
st_residx = i + o
st_residue_name = self.st.residues[st_residx].name
st_residue_number = self.st.residues[st_residx].number
rmap[r] = st_residue_number
residues.append(self.st.residues[st_residx])
try:
assert len(rmap) > 0
except:
print("ref_pocket_residue_numbers", ref_pocket_residue_numbers)
print("ref_pocket_residue_nanes", ref_pocket_residue_names)
raise ValueError('No match found for the reference pocket.')
indices = []
for i in ref_pocket.idx:
ai = ref_pocket.st.atoms[i]
matching_residue_number = rmap[ai.residue.number]
for aj in self.st.atoms:
if aj.residue.number == matching_residue_number and aj.name == ai.name:
indices.append(aj.idx)
# create a pocket that matches the reference pocket
self.pocket[ref_id] = Pocket(self.st, idx=indices, residues=residues)
try:
assert len(self.ligand[ref_id].idx) == len(self.ref[ref_id].ligand.idx)
assert len(self.pocket[ref_id].idx) == len(self.ref[ref_id].pocket.idx)
except:
raise ValueError('Numbers of indices between reference and structure do not match.')
if not quiet:
print(f"number of ligand atoms: {len(self.ligand[ref_id].idx)}")
print(f"number of pocket atoms: {len(self.pocket[ref_id].idx)}")
self.create_map_data(ref_id)
def molview(self, ref_id:str | None = None, width:int=600, height:int=400) -> None:
"""Depict a ligand with substructure highlights.
Args:
ref_id (str | None, optional): id of reference structure. Defaults to None.
width (int, optional): width. Defaults to 600.
height (int, optional): height. Defaults to 400.
"""
if ref_id is None:
highlight = []
for k in self.references:
highlight.extend(self.ligand[k].highlight)
view_svg(self.ligand['0'].mol, highlight=highlight, width=width, height=height)
else:
view_svg(self.ref[ref_id].ligand.mol, highlight=self.ref[ref_id].ligand.highlight)
def distance(self) -> None:
"""Summary of centroid distances.
"""
for k in self.references:
self.centroid[k] = {
"ref_pocket" : self.ref[k].pocket.centroid(),
"ref_ligand" : self.ref[k].ligand.centroid(),
"pocket": self.pocket[k].centroid(),
"ligand": self.ligand[k].centroid(),
}
print(f"Centroid({k}):")
for kk, v in self.centroid[k].items():
print(f" {kk}: {v}")
dpls = np.linalg.norm(self.centroid[k]["pocket"] - self.centroid[k]["ligand"])
dplr = np.linalg.norm(self.centroid[k]["ref_pocket"] - self.centroid[k]["ref_ligand"])
print(f"Distance Centroid({k} pocket)-Centroid({k} ligand) {dpls:8.3f} (reference: {dplr:5.3f})")
[k1, k2] = [ k for k in self.references ]
dpp = np.linalg.norm(self.centroid[k1]["pocket"] - self.centroid[k2]["pocket"])
dll = np.linalg.norm(self.centroid[k1]["ligand"] - self.centroid[k2]["ligand"])
print(f"Distance Centroid({k1} pocket)-Centroid({k2} pocket) {dpp:8.3f}")
print(f"Distance Centroid({k1} ligand)-Centroid({k2} ligand) {dll:8.3f}")
def rmsd(self) -> None:
"""Summary of RMSD."""
for k in self.references:
indices = self.rmsd_idx_group[k]
(_rot, _trans, _centroid, _rmsd) = kabsch_algorithm(self.st.coordinates[indices,:], self.rmsd_ref_coor[indices,:])
print(f"RMSD ({k} pocket and ligand) {_rmsd:8.3f}")
def move_ligand_to(self, ref_id:str) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
"""Get transformations to move ligand coordinates to the supposed binding pocket.
Args:
ref_id (str): id of reference structure (pocket).
Returns:
Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: (transformed coordinates, centroid, rotation matrix, translation vector)
"""
# 1. align the reference to the structure using the reference pocket
# ref_pocket_pos = self.ref_st[ref_id][self.ref_pocket_idx_group[ref_id],:]
# pocket_pos = self.st.coordinates[self.pocket_idx_group[ref_id],:]
(rot, trans, centroid, rmsd) = kabsch_algorithm(self.ref[ref_id].pocket.coor(),
self.pocket[ref_id].coor())
# aligned_ref_st_pos = np.dot(self.ref_st[ref_id].coordinates-centroid, rot.T) + centroid + trans
aligned_ref = np.dot(self.ref[ref_id].coor() - centroid, rot.T) + centroid + trans
# 2. align the structure ligand to the reference ligand
aligned_ref_ligand_pos = aligned_ref[self.ref[ref_id].ligand.idx,:]
# ligand_pos = self.st.coordinates[self.ligand_idx_group[ref_id],:]
(rot, trans, centroid, rmsd) = kabsch_algorithm(self.ligand[ref_id].coor(),
aligned_ref_ligand_pos)
# ligand = self.st.coordinates[self.ligand_indexes,:]
# transform whole ligand
transformed = np.dot(self.ligand['0'].coor() - centroid, rot.T) + centroid + trans
return (transformed, centroid, rot, trans)
def move_molecule_to(self, molecule_id:str, ref_id:str) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
"""Get transformations to move protein coordinates to the supposed ligand.
Args:
molecule_id (str): id of molecule.
ref_id (str): id of reference structure(ligand)
Returns:
Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: _description_
"""
# 1. align the reference to the structure using the reference ligand
# ref_ligand_pos = self.ref_st[ref_id].coordinates[self.ref_ligand_idx_group[ref_id],:]
# ligand_pos = self.st.coordinates[self.ligand_idx_group[ref_id],:]
(rot, trans, centroid, rmsd) = kabsch_algorithm(self.ref[ref_id].ligand.coor(),
self.ligand[ref_id].coor())
# aligned_ref_st_pos = np.dot(self.ref_st[ref_id].coordinates-centroid, rot.T) + centroid + trans
aligned_ref = np.dot(self.ref[ref_id].coor() - centroid, rot.T) + centroid + trans
# 2. align the structure pocket to the reference pocket
aligned_ref_pocket_pos = aligned_ref[self.ref[ref_id].pocket.idx,:]
# pocket_pos = self.st.coordinates[self.pocket_idx_group[ref_id],:]
(rot, trans, centroid, rmsd) = kabsch_algorithm(self.pocket[ref_id].coor(),
aligned_ref_pocket_pos)
transformed = np.dot(self.molecule[molecule_id].coor() -centroid, rot.T) + centroid + trans
return (transformed, centroid, rot, trans)
def save_molecule(self, molecule_id:str, path:str | pathlib.Path, overwrite:bool=True) -> None:
"""Save molecular coordinates to an output file.
Args:
molecule_id (str): id of molecule.
path (str | pathlib.Path): output file path.
overwrite (bool, optional): whether to overwrite. Defaults to True.
"""
if isinstance(path, pathlib.Path):
path = path.as_posix()
st = self.molecule[molecule_id].st
st[self.molecule[molecule_id].idx].save(path, overwrite=overwrite)
centroid = {}
instance-attribute
imap = {(a.residue.number, a.name): a.idxfor a in st.atoms}
instance-attribute
ligand = {}
instance-attribute
map = {}
instance-attribute
map_data = {}
instance-attribute
molecule = {}
instance-attribute
pocket = {}
instance-attribute
ref = {}
instance-attribute
references = references
instance-attribute
rmsd_idx_group = {}
instance-attribute
rmsd_ref_coor = np.zeros((len(st.atoms), 3))
instance-attribute
st = st
instance-attribute
workdir = workdir
instance-attribute
__init__(st, molecules, references, ligand_resname, workdir='.')
Source code in mdscribe/ternary/complex.py
def __init__(self,
st: parmed.structure.Structure,
molecules: Dict,
references: Dict,
ligand_resname: str,
workdir: str | pathlib.Path = '.',
):
# parmed.struct.Structure objects
self.st = st # md system
self.imap = {(a.residue.number,a.name) : a.idx for a in st.atoms}
self.molecule = {}
for k,v in molecules.items():
idx = [self.imap[(b.residue.number, b.name)] for b in st[v].atoms]
self.molecule[k] = Molecule(st, idx)
self.references = references
if not isinstance(workdir, pathlib.Path):
workdir = pathlib.Path(workdir)
workdir.mkdir(parents=True, exist_ok=True)
self.workdir = workdir
self.map = {}
self.map_data = {}
self.ligand = {}
self.pocket = {}
self.ref = {}
self.rmsd_ref_coor = np.zeros((len(st.atoms), 3))
self.rmsd_idx_group = {}
self.centroid = {}
# reference coordinates for RMSDForce
# should have identical number of coordinates as the self.st
# even though all the coordinates are not used
self.ligand['0'] = self.create_ligand(st, ligand_resname, "structure-ligand")
for k in references:
self.map_to_reference(k)
self.rmsd_ref_coor[np.array(self.pocket[k].idx),:] = self.ref[k].pocket.coor()
self.rmsd_ref_coor[np.array(self.ligand[k].idx),:] = self.ref[k].ligand.coor()
self.rmsd_idx_group[k] = np.concatenate((self.pocket[k].idx, self.ligand[k].idx,))
self.map[k] = {
'ligand': pd.DataFrame(self.map_data[k]['ligand']),
'pocket': pd.DataFrame(self.map_data[k]['pocket']),
}
create_ligand(st, resname, prefix)
Create a Ligand class object.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
st
|
Structure
|
input structure. |
required |
resname
|
str
|
residue name for ligand. |
required |
prefix
|
str
|
prefix for output pdb file. |
required |
Raises:
| Type | Description |
|---|---|
ValueError
|
when residue name does not exist. |
Returns:
| Name | Type | Description |
|---|---|---|
Ligand |
Ligand
|
object containing ligand information. |
Source code in mdscribe/ternary/complex.py
def create_ligand(self, st:parmed.structure.Structure, resname:str, prefix:str) -> Ligand:
"""Create a Ligand class object.
Args:
st (parmed.structure.Structure): input structure.
resname (str): residue name for ligand.
prefix (str): prefix for output pdb file.
Raises:
ValueError: when residue name does not exist.
Returns:
Ligand: object containing ligand information.
"""
outfile = (self.workdir / f"{prefix}-{resname}.pdb").as_posix()
try:
idx0 = [atom.idx for atom in st.atoms if (atom.residue.name == resname)]
assert len(idx0) > 0
except:
raise ValueError(f"{resname} does not exist")
try:
# write the ligand to a pdb file which then is read by Chem.MolFromPDBFile to create a rdkit.Chem.Mol
# non-hydrogen atoms
# Order of atoms in the pdb is same as that of the rdmol
indices = []
imap = {}
pdb_idx = 0
for atom in st.atoms:
if atom.residue.name == resname and atom.atomic_number > 1:
imap[pdb_idx] = atom.idx
indices.append(atom.idx)
pdb_idx += 1
st[f':{resname}&!@H='].save(outfile, overwrite=True)
mol = Chem.MolFromPDBFile(outfile)
assert mol
assert imap
except:
raise ValueError(f"cannot create molecule for {resname}")
return Ligand(st=st, idx0=idx0, idx=indices, imap=imap, mol=mol)
create_map_data(ref_id)
Create a map/dictionary for atom indexes.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
ref_id
|
str
|
id for reference. |
required |
Source code in mdscribe/ternary/complex.py
def create_map_data(self, ref_id:str) -> None:
"""Create a map/dictionary for atom indexes.
Args:
ref_id (str): id for reference.
"""
self.map_data[ref_id] = {
'ligand' : {
'ref_idx': self.ref[ref_id].ligand.idx,
'ref_name': self.ref[ref_id].ligand.name(),
'ref_atomic_number' : self.ref[ref_id].ligand.atomic_number(),
'ref_residue_name' : self.ref[ref_id].ligand.residue_name(),
'ref_residue_number' : self.ref[ref_id].ligand.residue_number(),
'idx': self.ligand[ref_id].idx,
'name' : self.ligand[ref_id].name(),
'atomic_number': self.ligand[ref_id].atomic_number(),
'residue_name' : self.ligand[ref_id].residue_name(),
'residue_number' : self.ligand[ref_id].residue_number(),
},
'pocket' : {
'ref_idx' : self.ref[ref_id].pocket.idx,
'ref_name' : self.ref[ref_id].pocket.name(),
'ref_residue_name' : self.ref[ref_id].pocket.residue_name(),
'ref_residue_number' : self.ref[ref_id].pocket.residue_number(),
'idx' : self.pocket[ref_id].idx,
'name' : self.pocket[ref_id].name(),
'residue_name' : self.pocket[ref_id].residue_name(),
'residue_number' : self.pocket[ref_id].residue_number(),
}
}
create_pocket(st, resname, dist, include, exclude)
Create a Pocket class object.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
st
|
Structure
|
input structure. |
required |
resname
|
str
|
residue name for ligand. |
required |
dist
|
float
|
distance cutoff from ligand which is regarded as pocket. |
required |
include
|
str
|
residue or atom selection to include as pocket. |
required |
exclude
|
str
|
residue or atom selection to exlude from pocket. |
required |
Raises:
| Type | Description |
|---|---|
ValueError
|
when pocket cannot be defined. |
Returns:
| Name | Type | Description |
|---|---|---|
Pocket |
Pocket
|
object containing pocket information. |
Source code in mdscribe/ternary/complex.py
def create_pocket(self, st:parmed.structure.Structure, resname:str, dist:float, include:str, exclude:str) -> Pocket:
"""Create a Pocket class object.
Args:
st (parmed.structure.Structure): input structure.
resname (str): residue name for ligand.
dist (float): distance cutoff from ligand which is regarded as pocket.
include (str): residue or atom selection to include as pocket.
exclude (str): residue or atom selection to exlude from pocket.
Raises:
ValueError: when pocket cannot be defined.
Returns:
Pocket: object containing pocket information.
"""
try:
# select all `pocket_atom` atoms within `pocket_dist` from the `ref_resname` residues and save to a pdb.
# the whole residue is selected if any atom satisfies the distance criteria
pocket = st[f"(:{resname}<:{dist})&(!:{resname})&({include})&!({exclude})"]
imap = {(a.residue.number, a.name) : a.idx for a in st.atoms}
indices = [imap[(b.residue.number, b.name)] for b in pocket.atoms]
assert len(pocket.residues) > 0
assert len(pocket.atoms) > 0
except:
raise ValueError("cannot define reference ligand binding pocket")
return Pocket(st=st, idx=indices, residues=pocket.residues, imap=imap)
distance()
Summary of centroid distances.
Source code in mdscribe/ternary/complex.py
def distance(self) -> None:
"""Summary of centroid distances.
"""
for k in self.references:
self.centroid[k] = {
"ref_pocket" : self.ref[k].pocket.centroid(),
"ref_ligand" : self.ref[k].ligand.centroid(),
"pocket": self.pocket[k].centroid(),
"ligand": self.ligand[k].centroid(),
}
print(f"Centroid({k}):")
for kk, v in self.centroid[k].items():
print(f" {kk}: {v}")
dpls = np.linalg.norm(self.centroid[k]["pocket"] - self.centroid[k]["ligand"])
dplr = np.linalg.norm(self.centroid[k]["ref_pocket"] - self.centroid[k]["ref_ligand"])
print(f"Distance Centroid({k} pocket)-Centroid({k} ligand) {dpls:8.3f} (reference: {dplr:5.3f})")
[k1, k2] = [ k for k in self.references ]
dpp = np.linalg.norm(self.centroid[k1]["pocket"] - self.centroid[k2]["pocket"])
dll = np.linalg.norm(self.centroid[k1]["ligand"] - self.centroid[k2]["ligand"])
print(f"Distance Centroid({k1} pocket)-Centroid({k2} pocket) {dpp:8.3f}")
print(f"Distance Centroid({k1} ligand)-Centroid({k2} ligand) {dll:8.3f}")
map_to_reference(ref_id, quiet=False)
Map structure to the given reference.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
ref_id
|
str
|
id of reference structure. |
required |
quiet
|
bool
|
whether to print details. Defaults to False. |
False
|
Raises:
| Type | Description |
|---|---|
ValueError
|
when mapping fails. |
Source code in mdscribe/ternary/complex.py
def map_to_reference(self, ref_id: str, quiet:bool=False) -> None:
"""Map structure to the given reference.
Args:
ref_id (str): id of reference structure.
quiet (bool, optional): whether to print details. Defaults to False.
Raises:
ValueError: when mapping fails.
"""
pdb = self.references[ref_id]["pdb"]
resname = self.references[ref_id]["resname"]
smarts = self.references[ref_id]["smarts"]
dist = self.references[ref_id]["pocket"]["distance"]
include = self.references[ref_id]["pocket"]["include"]
exclude = self.references[ref_id]["pocket"]["exclude"]
if not quiet:
print(f"Mapping to {ref_id}...")
try:
if isinstance(pdb, pathlib.Path):
ref_st = parmed.load_file(pdb.as_posix())
else:
ref_st = parmed.load_file(pdb)
assert ref_st
except:
raise ValueError(f"cannot load reference pdb file: {pdb}")
self.map_data[ref_id] = {'ligand': {}, 'pocket': {}}
# reference ligand
ref_ligand = self.create_ligand(ref_st, resname, "reference-ligand")
success = False
for _smarts in smarts:
query = Chem.MolFromSmarts(_smarts)
if not (self.ligand['0'].mol.HasSubstructMatch(query) and ref_ligand.mol.HasSubstructMatch(query)):
continue
ligand_match = self.ligand['0'].mol.GetSubstructMatch(query)
ref_match = ref_ligand.mol.GetSubstructMatch(query)
# GetSubstructMatch() returns the indices of the molecule’s atoms that match a substructure query.
# only a single match is returned
# the ordering of the indices corresponds to the atom ordering in the query.
# For example, the first index is for the atom in this molecule that matches the first atom in the query.
# create a ligand substructure that matches with the reference in SMARTS
indices = [self.ligand['0'].imap[i] for i in ligand_match]
self.ligand[ref_id] = Ligand(self.st, idx=indices, mol=self.ligand['0'].mol, highlight=ligand_match)
ref_ligand.idx = [ref_ligand.imap[i] for i in ref_match]
ref_ligand.highlight = ref_match
success = True
break
assert success, 'No match found for the reference ligand SMARTS'
# reference pocket
ref_pocket = self.create_pocket(ref_st, resname, dist, include, exclude)
# create a reference
self.ref[ref_id] = Reference(st=ref_st, ligand=ref_ligand, pocket=ref_pocket)
# mapping structure to the reference
# find matching residue numbers and names with reference
ref_pocket_residue_numbers = [r.number for r in ref_pocket.residues]
ref_pocket_residue_names = [r.name for r in ref_pocket.residues]
offset = [ x-ref_pocket_residue_numbers[0] for x in ref_pocket_residue_numbers ]
rmap = {} # ref_pocket_residue_number -> st residue.number
residues = []
for i in range(len(self.st.residues)):
if self.st.residues[i].name == ref_pocket_residue_names[0]:
found = True
for o, n in zip(offset, ref_pocket_residue_names):
st_resname = self.st.residues[i+o].name
if st_resname in ['HIE', 'HID']:
st_resname = 'HIS'
if st_resname != n:
found = False
break
if found: # sequence matches
for o, r, n in zip(offset, ref_pocket_residue_numbers, ref_pocket_residue_names):
st_residx = i + o
st_residue_name = self.st.residues[st_residx].name
st_residue_number = self.st.residues[st_residx].number
rmap[r] = st_residue_number
residues.append(self.st.residues[st_residx])
try:
assert len(rmap) > 0
except:
print("ref_pocket_residue_numbers", ref_pocket_residue_numbers)
print("ref_pocket_residue_nanes", ref_pocket_residue_names)
raise ValueError('No match found for the reference pocket.')
indices = []
for i in ref_pocket.idx:
ai = ref_pocket.st.atoms[i]
matching_residue_number = rmap[ai.residue.number]
for aj in self.st.atoms:
if aj.residue.number == matching_residue_number and aj.name == ai.name:
indices.append(aj.idx)
# create a pocket that matches the reference pocket
self.pocket[ref_id] = Pocket(self.st, idx=indices, residues=residues)
try:
assert len(self.ligand[ref_id].idx) == len(self.ref[ref_id].ligand.idx)
assert len(self.pocket[ref_id].idx) == len(self.ref[ref_id].pocket.idx)
except:
raise ValueError('Numbers of indices between reference and structure do not match.')
if not quiet:
print(f"number of ligand atoms: {len(self.ligand[ref_id].idx)}")
print(f"number of pocket atoms: {len(self.pocket[ref_id].idx)}")
self.create_map_data(ref_id)
molview(ref_id=None, width=600, height=400)
Depict a ligand with substructure highlights.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
ref_id
|
str | None
|
id of reference structure. Defaults to None. |
None
|
width
|
int
|
width. Defaults to 600. |
600
|
height
|
int
|
height. Defaults to 400. |
400
|
Source code in mdscribe/ternary/complex.py
def molview(self, ref_id:str | None = None, width:int=600, height:int=400) -> None:
"""Depict a ligand with substructure highlights.
Args:
ref_id (str | None, optional): id of reference structure. Defaults to None.
width (int, optional): width. Defaults to 600.
height (int, optional): height. Defaults to 400.
"""
if ref_id is None:
highlight = []
for k in self.references:
highlight.extend(self.ligand[k].highlight)
view_svg(self.ligand['0'].mol, highlight=highlight, width=width, height=height)
else:
view_svg(self.ref[ref_id].ligand.mol, highlight=self.ref[ref_id].ligand.highlight)
move_ligand_to(ref_id)
Get transformations to move ligand coordinates to the supposed binding pocket.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
ref_id
|
str
|
id of reference structure (pocket). |
required |
Returns:
| Type | Description |
|---|---|
Tuple[ndarray, ndarray, ndarray, ndarray]
|
Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: (transformed coordinates, centroid, rotation matrix, translation vector) |
Source code in mdscribe/ternary/complex.py
def move_ligand_to(self, ref_id:str) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
"""Get transformations to move ligand coordinates to the supposed binding pocket.
Args:
ref_id (str): id of reference structure (pocket).
Returns:
Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: (transformed coordinates, centroid, rotation matrix, translation vector)
"""
# 1. align the reference to the structure using the reference pocket
# ref_pocket_pos = self.ref_st[ref_id][self.ref_pocket_idx_group[ref_id],:]
# pocket_pos = self.st.coordinates[self.pocket_idx_group[ref_id],:]
(rot, trans, centroid, rmsd) = kabsch_algorithm(self.ref[ref_id].pocket.coor(),
self.pocket[ref_id].coor())
# aligned_ref_st_pos = np.dot(self.ref_st[ref_id].coordinates-centroid, rot.T) + centroid + trans
aligned_ref = np.dot(self.ref[ref_id].coor() - centroid, rot.T) + centroid + trans
# 2. align the structure ligand to the reference ligand
aligned_ref_ligand_pos = aligned_ref[self.ref[ref_id].ligand.idx,:]
# ligand_pos = self.st.coordinates[self.ligand_idx_group[ref_id],:]
(rot, trans, centroid, rmsd) = kabsch_algorithm(self.ligand[ref_id].coor(),
aligned_ref_ligand_pos)
# ligand = self.st.coordinates[self.ligand_indexes,:]
# transform whole ligand
transformed = np.dot(self.ligand['0'].coor() - centroid, rot.T) + centroid + trans
return (transformed, centroid, rot, trans)
move_molecule_to(molecule_id, ref_id)
Get transformations to move protein coordinates to the supposed ligand.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
molecule_id
|
str
|
id of molecule. |
required |
ref_id
|
str
|
id of reference structure(ligand) |
required |
Returns:
| Type | Description |
|---|---|
Tuple[ndarray, ndarray, ndarray, ndarray]
|
Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: description |
Source code in mdscribe/ternary/complex.py
def move_molecule_to(self, molecule_id:str, ref_id:str) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
"""Get transformations to move protein coordinates to the supposed ligand.
Args:
molecule_id (str): id of molecule.
ref_id (str): id of reference structure(ligand)
Returns:
Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: _description_
"""
# 1. align the reference to the structure using the reference ligand
# ref_ligand_pos = self.ref_st[ref_id].coordinates[self.ref_ligand_idx_group[ref_id],:]
# ligand_pos = self.st.coordinates[self.ligand_idx_group[ref_id],:]
(rot, trans, centroid, rmsd) = kabsch_algorithm(self.ref[ref_id].ligand.coor(),
self.ligand[ref_id].coor())
# aligned_ref_st_pos = np.dot(self.ref_st[ref_id].coordinates-centroid, rot.T) + centroid + trans
aligned_ref = np.dot(self.ref[ref_id].coor() - centroid, rot.T) + centroid + trans
# 2. align the structure pocket to the reference pocket
aligned_ref_pocket_pos = aligned_ref[self.ref[ref_id].pocket.idx,:]
# pocket_pos = self.st.coordinates[self.pocket_idx_group[ref_id],:]
(rot, trans, centroid, rmsd) = kabsch_algorithm(self.pocket[ref_id].coor(),
aligned_ref_pocket_pos)
transformed = np.dot(self.molecule[molecule_id].coor() -centroid, rot.T) + centroid + trans
return (transformed, centroid, rot, trans)
rmsd()
Summary of RMSD.
Source code in mdscribe/ternary/complex.py
save_molecule(molecule_id, path, overwrite=True)
Save molecular coordinates to an output file.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
molecule_id
|
str
|
id of molecule. |
required |
path
|
str | Path
|
output file path. |
required |
overwrite
|
bool
|
whether to overwrite. Defaults to True. |
True
|
Source code in mdscribe/ternary/complex.py
def save_molecule(self, molecule_id:str, path:str | pathlib.Path, overwrite:bool=True) -> None:
"""Save molecular coordinates to an output file.
Args:
molecule_id (str): id of molecule.
path (str | pathlib.Path): output file path.
overwrite (bool, optional): whether to overwrite. Defaults to True.
"""
if isinstance(path, pathlib.Path):
path = path.as_posix()
st = self.molecule[molecule_id].st
st[self.molecule[molecule_id].idx].save(path, overwrite=overwrite)
kabsch_algorithm(P, Q)
Computes the optimal rotation and translation to align two sets of points (P -> Q), and their RMSD.
Examples:
>>> P = np.array([[0, 0, 0], [1, 0, 0], [0, 1, 0]])
>>> Q = np.array([[1, 1, 0], [2, 1, 0], [1, 2, 0]])
>>> # Q is translated by (1,1,0)
>>> (rot, trans, rmsd) = kabsch_transform(P, Q)
>>> transform(P, rot, trans)
>>> P = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
>>> Q = np.array([[0, 1, 0], [0, 0, 1], [1, 0, 0]])
>>> # Q is a 90-degree rotation of P around the z-axis
>>> (rot, trans, rmsd) = kabsch_transform(P, Q)
>>> transform(P, rot, trans)
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
P
|
ndarray
|
subject coordinates. Not modified. |
required |
Q
|
ndarray
|
target coordinates. Not modified. |
required |
Returns:
| Type | Description |
|---|---|
Tuple[ndarray, ndarray, ndarray, float]
|
Tuple[np.ndarray, np.ndarray, np.ndarray, float]: A tuple containing the optimal rotation matrix, the optimal translation vector, the centroid of P, and the RMSD. |
Source code in mdscribe/helper/coord.py
def kabsch_algorithm(P:np.ndarray, Q:np.ndarray) -> Tuple[np.ndarray, np.ndarray, np.ndarray, float]:
"""Computes the optimal rotation and translation to align two sets of points (P -> Q),
and their RMSD.
Examples:
>>> P = np.array([[0, 0, 0], [1, 0, 0], [0, 1, 0]])
>>> Q = np.array([[1, 1, 0], [2, 1, 0], [1, 2, 0]])
>>> # Q is translated by (1,1,0)
>>> (rot, trans, rmsd) = kabsch_transform(P, Q)
>>> transform(P, rot, trans)
>>> P = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
>>> Q = np.array([[0, 1, 0], [0, 0, 1], [1, 0, 0]])
>>> # Q is a 90-degree rotation of P around the z-axis
>>> (rot, trans, rmsd) = kabsch_transform(P, Q)
>>> transform(P, rot, trans)
Args:
P (np.ndarray): subject coordinates. Not modified.
Q (np.ndarray): target coordinates. Not modified.
Returns:
Tuple[np.ndarray, np.ndarray, np.ndarray, float]: A tuple containing the optimal rotation matrix, the optimal
translation vector, the centroid of P, and the RMSD.
"""
assert P.shape == Q.shape, "Matrix dimensions must match"
# Compute centroids
centroid_P = np.mean(P, axis=0)
centroid_Q = np.mean(Q, axis=0)
# Optimal translation
t = centroid_Q - centroid_P
# Center the points
p = P - centroid_P
q = Q - centroid_Q
# Compute the covariance matrix
H = np.dot(p.T, q)
# SVD
U, S, Vt = np.linalg.svd(H)
V = Vt.T
d = np.linalg.det(np.dot(V, U.T))
e = np.array([[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, d]])
# Optimal rotation
R = np.dot(np.dot(V, e), U.T)
# RMSD
rmsd = np.sqrt(np.sum(np.square(np.dot(p, R.T) - q)) / P.shape[0])
return (R, t, centroid_P, rmsd)
view_svg(rdmol, highlight=[], width=300, height=300)
SVG depiction for the Jupyter Notebook.
Reference
https://www.linkedin.com/pulse/using-rdkit-jupyter-notebooks-lee-davies/
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
rdmol
|
Mol
|
rdkit molecule. |
required |
highlight
|
list
|
highlighted atom indexes. Defaults to []. |
[]
|
width
|
int
|
width. Defaults to 300. |
300
|
height
|
int
|
height. Defaults to 300. |
300
|
Source code in mdscribe/helper/svg.py
def view_svg(rdmol:Chem.Mol, highlight=[], width=300, height=300):
"""SVG depiction for the Jupyter Notebook.
Reference:
https://www.linkedin.com/pulse/using-rdkit-jupyter-notebooks-lee-davies/
Args:
rdmol (Chem.Mol): rdkit molecule.
highlight (list, optional): highlighted atom indexes. Defaults to [].
width (int, optional): width. Defaults to 300.
height (int, optional): height. Defaults to 300.
"""
rdmol2d = Chem.Mol(rdmol)
AllChem.Compute2DCoords(rdmol2d)
drawer = rdMolDraw2D.MolDraw2DSVG(width, height)
drawer.DrawMolecule(rdmol2d, highlightAtoms=highlight)
drawer.FinishDrawing()
svg = drawer.GetDrawingText()
display(SVG(svg.replace("svg:","")))