Helper
mdscribe.helper
GlcNAC = {'HN2': 'H2N', 'C7': 'C2N', 'O7': 'O2N', 'C8': 'CME', 'H81': 'H1M', 'H82': 'H3M', 'H83': 'H2M', 'HO3': 'H3O', 'HO4': 'H4O', 'H61': 'H62', 'H62': 'H61', 'HO6': 'H6O'}
module-attribute
nucleic_1_3 = {'A': 'ADE', 'G': 'GUA', 'C': 'CYT', 'U': 'URA', 'T': 'THY'}
module-attribute
nucleic_3_1 = {'ADE': 'A', 'GUA': 'G', 'CYT': 'C', 'URA': 'U', 'THY': 'T'}
module-attribute
protein_1_3 = {'G': 'GLY', 'A': 'ALA', 'V': 'VAL', 'L': 'LEU', 'I': 'ILE', 'M': 'MET', 'P': 'PRO', 'F': 'PHE', 'W': 'TRP', 'S': 'SER', 'T': 'THR', 'N': 'ASN', 'Q': 'GLN', 'Y': 'TYR', 'C': 'CYS', 'K': 'LYS', 'R': 'ARG', 'H': 'HIS', 'D': 'ASP', 'E': 'GLU'}
module-attribute
protein_3_1 = {'ALA': 'A', 'ARG': 'R', 'ASN': 'N', 'ASP': 'D', 'CYS': 'C', 'GLU': 'E', 'GLN': 'Q', 'GLY': 'G', 'HIS': 'H', 'ILE': 'I', 'LEU': 'L', 'LYS': 'K', 'MET': 'M', 'PHE': 'F', 'PRO': 'P', 'SER': 'S', 'THR': 'T', 'TRP': 'W', 'TYR': 'Y', 'VAL': 'V'}
module-attribute
MOL2file
Class for handling MOL2 file.
Source code in mdscribe/helper/mol2file.py
class MOL2file:
"""Class for handling MOL2 file."""
# class bound variable
name_syntax = re.compile(r'(?P<element>[a-zA-Z]+)(?P<serial>\d+)')
def __init__(self, filename:str | pathlib.Path):
self.n_atoms = 0
self.blocks = {}
self.coords = None # To be modified
self.pdblines = [] # reserved for PDB conversion
cursor = None
with open(filename, 'r') as f:
for i, line in enumerate(f):
if "@<TRIPOS>" in line:
cursor = line.split("@<TRIPOS>")[1].strip().lower()
self.blocks[cursor] = []
continue
elif line.startswith('#') or line == '\n':
continue
self.blocks[cursor].append(line)
try:
self.n_atoms = len(self.blocks['atom'])
assert self.n_atoms > 0
except:
print("MOL2 block has no atom")
sys.exit(0)
try:
assert len(self.blocks['bond']) > 0
except:
print("MOL2 block has no bond")
sys.exit(0)
self.coords = np.zeros((self.n_atoms, 3), dtype=np.float32)
altLoc = ' '
chainId = ' '
iCode = ' '
occupancy = 1.0
tempFactor = 0.0
segId = ' '
for i, a in enumerate(self.blocks['atom']):
aid, name, x, y, z, atom_type, resid, resname, charge = a.split()[:9]
x, y, z = float(x), float(y), float(z)
self.coords[i, :] = x, y, z
m = MOL2file.name_syntax.match(name)
element = m.group("element")
self.pdblines.append(
f"HETATM {(i+1):>5} {name:<4}{altLoc}{resname:<3} {chainId}{resid:<4}{iCode} {x:8.3f}{y:8.3f}{z:8.3f}{occupancy:6.2f}{tempFactor:6.2f} {segId:<4}{element:<2}{charge:<2}"
)
for i, b in enumerate(self.blocks['bond']):
bid, atom_1, atom_2, bond_type = b.split()[:4]
_pdb = f"CONECT {atom_1:>5} {atom_2:>5}"
self.pdblines.append(_pdb)
def write(self, filename:str | pathlib.Path) -> None:
"""Write to a file.
Args:
filename (str | pathlib.Path): output filename.
"""
with open(filename, 'w') as f:
i = 0
for cursor in ['molecule', 'atom', 'bond', 'substructure']:
if not cursor in self.blocks:
continue
f.write(f"@<TRIPOS>{cursor.upper()}\n")
i += 1
if cursor == 'atom':
k = 0
for line in self.blocks[cursor]:
if cursor != 'atom':
f.write(line)
else:
aid, name, x, y, z, atom_type, resid, resname, charge = line.split()[:9]
# override x,y,z with self.coords
f.write("{0:>4} {1:>4} {2:>13.4f} {3:>9.4f} {4:>9.4f} {5:>4} {6} {7} {8:>7.4f}\n".format(
aid,
name,
self.coords[k, 0],
self.coords[k, 1],
self.coords[k, 2],
atom_type,
resid,
resname,
float(charge),
))
k += 1
def transform(self, R:np.ndarray, T:np.ndarray) -> None:
"""Apply rotation/translation to the coordinates.
Args:
R (np.ndarray): 3x3 rotation matrix
T (np.ndarray): translation vector
"""
com = np.mean(self.coords, axis=0)
self.coords = np.dot(self.coords - com, R) + T
blocks = {}
instance-attribute
coords = np.zeros((self.n_atoms, 3), dtype=np.float32)
instance-attribute
n_atoms = len(self.blocks['atom'])
instance-attribute
name_syntax = re.compile('(?P<element>[a-zA-Z]+)(?P<serial>\\d+)')
class-attribute
instance-attribute
pdblines = []
instance-attribute
__init__(filename)
Source code in mdscribe/helper/mol2file.py
def __init__(self, filename:str | pathlib.Path):
self.n_atoms = 0
self.blocks = {}
self.coords = None # To be modified
self.pdblines = [] # reserved for PDB conversion
cursor = None
with open(filename, 'r') as f:
for i, line in enumerate(f):
if "@<TRIPOS>" in line:
cursor = line.split("@<TRIPOS>")[1].strip().lower()
self.blocks[cursor] = []
continue
elif line.startswith('#') or line == '\n':
continue
self.blocks[cursor].append(line)
try:
self.n_atoms = len(self.blocks['atom'])
assert self.n_atoms > 0
except:
print("MOL2 block has no atom")
sys.exit(0)
try:
assert len(self.blocks['bond']) > 0
except:
print("MOL2 block has no bond")
sys.exit(0)
self.coords = np.zeros((self.n_atoms, 3), dtype=np.float32)
altLoc = ' '
chainId = ' '
iCode = ' '
occupancy = 1.0
tempFactor = 0.0
segId = ' '
for i, a in enumerate(self.blocks['atom']):
aid, name, x, y, z, atom_type, resid, resname, charge = a.split()[:9]
x, y, z = float(x), float(y), float(z)
self.coords[i, :] = x, y, z
m = MOL2file.name_syntax.match(name)
element = m.group("element")
self.pdblines.append(
f"HETATM {(i+1):>5} {name:<4}{altLoc}{resname:<3} {chainId}{resid:<4}{iCode} {x:8.3f}{y:8.3f}{z:8.3f}{occupancy:6.2f}{tempFactor:6.2f} {segId:<4}{element:<2}{charge:<2}"
)
for i, b in enumerate(self.blocks['bond']):
bid, atom_1, atom_2, bond_type = b.split()[:4]
_pdb = f"CONECT {atom_1:>5} {atom_2:>5}"
self.pdblines.append(_pdb)
transform(R, T)
Apply rotation/translation to the coordinates.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
R
|
ndarray
|
3x3 rotation matrix |
required |
T
|
ndarray
|
translation vector |
required |
Source code in mdscribe/helper/mol2file.py
write(filename)
Write to a file.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
filename
|
str | Path
|
output filename. |
required |
Source code in mdscribe/helper/mol2file.py
def write(self, filename:str | pathlib.Path) -> None:
"""Write to a file.
Args:
filename (str | pathlib.Path): output filename.
"""
with open(filename, 'w') as f:
i = 0
for cursor in ['molecule', 'atom', 'bond', 'substructure']:
if not cursor in self.blocks:
continue
f.write(f"@<TRIPOS>{cursor.upper()}\n")
i += 1
if cursor == 'atom':
k = 0
for line in self.blocks[cursor]:
if cursor != 'atom':
f.write(line)
else:
aid, name, x, y, z, atom_type, resid, resname, charge = line.split()[:9]
# override x,y,z with self.coords
f.write("{0:>4} {1:>4} {2:>13.4f} {3:>9.4f} {4:>9.4f} {5:>4} {6} {7} {8:>7.4f}\n".format(
aid,
name,
self.coords[k, 0],
self.coords[k, 1],
self.coords[k, 2],
atom_type,
resid,
resname,
float(charge),
))
k += 1
PDBfile
Source code in mdscribe/helper/pdbfile.py
class PDBfile:
def __init__(self, pdbfile:str | Path) -> None:
self.parser = PDBParser()
self.st = self.parser.get_structure("USER_PDB", pdbfile)
# atomic coordinates
coords = []
for model in self.st:
for chain in model:
for residue in chain:
for atom in residue:
coords.append(atom.get_coord())
self.coords = np.array(coords)
self.shape = self.coords.shape
def center_of_geometry(self) -> np.ndarray:
return np.mean(self.coords, axis=0)
def update_coord(self, coords:np.ndarray) -> None:
assert self.coords.shape == coords.shape, "Matrix dimensions must match"
# update atomic coordinates
atom_idx = 0
for model in self.st:
for chain in model:
for residue in chain:
for atom in residue:
atom.coord = coords[atom_idx,:]
atom_idx += 1
def transform(self,
centroid: np.ndarray = np.array([0., 0., 0.]),
rot:np.ndarray = np.array([[1.,0.,0.],[0.,1.,0.],[0.,0.,1.]]),
trans:np.ndarray = np.array([0., 0., 0.])) -> None:
try:
assert rot.shape == (3,3)
except:
raise ValueError('shape of rotation matrix should be (3,3)')
try:
assert trans.shape == (3,)
except:
raise ValueError('shape of translation matrix should be (3,)')
# rotation
self.coords = np.dot(self.coords - centroid, rot.T) + centroid
# translation
self.coords = self.coords + trans
# update atomic coordinates
atom_idx = 0
for model in self.st:
for chain in model:
for residue in chain:
for atom in residue:
atom.coord = self.coords[atom_idx,:]
atom_idx += 1
def get_seqeunce(self) -> None:
wrap= 50
for model in self.st:
prev_residue_number = None
header = "Model "+ str(model.id)
sequence = ""
for chain in model:
print("chain=", chain)
for residue in chain:
resname = residue.get_resname()
if not resname in aminoacids:
continue
hetflag,resseq,iCode = residue.get_id()
resSeq = int(resseq)
if prev_residue_number is None:
print("(first residue) %4d" % resSeq)
else:
num_missing_residues = resSeq-prev_residue_number-1
if num_missing_residues > 0:
sequence += "-" * num_missing_residues
print("(missing residues) %4d - %4d (%d)" %
(prev_residue_number+1,resSeq-1,num_missing_residues))
sequence += aminoacids[resname]
prev_residue_number = resSeq
print("(last residue) %4d" % resSeq)
# output
print(header)
l= len(sequence)//wrap
for i in range(0,l):
start = wrap*i
end = max(wrap*(i+1),l)
print(sequence[start:end])
@staticmethod
def readlines(pdbfile:str):
"""(Not Used) Read PDB Lines.
Args:
pdbfile (str): filename.
"""
pdblines = {1: []}
with open(pdbfile, "r") as f:
for line in f:
if line.startswith("MODEL"):
model_number = int(line.strip().split()[1])
pdblines[model_number] = []
if line.startswith("ATOM") or line.startswith("HETATM") or line.startswith("TER"):
# PDB format version 2.3
serial = line[6:12]
name = line[12:16].strip()
altLoc = line[16:17]
if altLoc=="":
altLoc=" "
resName = line[17:21].strip()
chainId = line[21:22]
if chainId=="":
chainId=" "
resSeq = int(line[22:26])
iCode = line[26:27]
if iCode=="":
iCode=" "
x = float(line[30:38])
y = float(line[38:46])
z = float(line[46:54])
occupancy = line[54:60]
tempFactor = float(line[60:66])
segId = line[72:76]
element = line[76:78]
charge = line[78:80]
def write(self, filename: str | Path,
model:Optional[List[int]] = None,
chain:Optional[List[str]] = None,
resname:Optional[List[str]] = None,
resnumber:Optional[str] = None,
) -> None:
"""Write to PDB with selections.
Examples:
>>> pdb.write(model=[0], chain=['A'])
write chain A and residues first-10,22-50,130-200,550,600-last
>>> pdb.write(chain=['A'], resnumber="-10,22-50,130-200,550,600-")
Args:
filename (str | Path): output filename or path
model (Optional[List[int]], optional): list of model numbers. Defaults to None.
chain (Optional[List[str]], optional): list of chain ids. Defaults to None.
resname (Optional[List[str]], optional): list of residue names. Defaults to None.
resnumber (Optional[str], optional): residue number ranges. Defaults to None.
"""
io = PDBIO()
if (model is None) and (chain is None) and (resname is None) and (resnumber is None):
# write structure as it as
io.set_structure(self.st)
else:
# write only select model(s), chain(s) or residue(s)
# select residues by numbers
if resnumber is not None:
select_resseqs = [tuple(map(lambda s: int(s) if s else -1, r.split("-"))) for r in resnumber.split(",")]
# for resnumber="-10,22-50,130-200,550,600-"
# [(-1, 10), (22, 50), (130, 200), (550,), (600, -1)]
builder = StructureBuilder()
builder.init_structure(self.st.id)
for _model in self.st:
if (model is not None) and (_model.id not in model):
continue # reject model
builder.init_model(_model.id, serial_num=None)
for _chain in _model:
if (chain is not None) and (_chain.id not in chain):
continue # reject chain
builder.init_chain(_chain.id)
for _residue in _chain:
hetflag, resseq, iCode = _residue.get_id()
if (resname is not None) and (_residue.resname not in resname):
continue # reject residue by name
# select residue numbers
include_flags = []
for lu in select_resseqs:
if len(lu) == 1:
if lu == resseq:
include_flags.append(True)
else:
include_flags.append(False)
else:
(l,u) = lu
if (l == -1 or l <= resseq) and (u ==-1 or u >= resseq):
include_flags.append(True)
else:
include_flags.append(False)
if not any(include_flags):
continue # reject residue by number
builder.init_residue(_residue.resname, hetflag, resseq, iCode)
for atom in _residue:
builder.init_atom(atom.name,
atom.coord,
atom.bfactor,
atom.occupancy,
atom.altloc,
atom.fullname,
None, # serial_number
atom.element,
)
io.set_structure(builder.get_structure())
with open(filename, "w") as f:
io.save(f)
def reorder(self) -> None:
builder = StructureBuilder()
builder.init_structure(self.st.id)
for model in self.st:
builder.init_model(model.id, serial_num=None)
for chain in sorted(model, key=lambda x:x.id):
builder.init_chain(chain.id)
for residue in sorted(chain, key=lambda x: x.get_id()[1]):
hetflag, resseq, iCode = residue.get_id()
builder.init_residue(residue.resname, hetflag, resseq, iCode)
for atom in residue:
builder.init_atom(atom.name,
atom.coord,
atom.bfactor,
atom.occupancy,
atom.altloc,
atom.fullname,
None, # serial_number
atom.element,
)
self.st = builder.get_structure()
def rename(self,
chain:Optional[dict] = None,
resname:Optional[dict] = None,
atom:Optional[dict] = None) -> None:
"""Rename PDB chains/residues/atoms.
Examples:
Rename chain 'C' to 'A' and 'D' to 'B'.
>>> rename(chain={'C':'A', 'D':'B'})
Rename residue 'UNL' to 'LIG' for all chains.
>>> rename(resname={'UNL' : 'LIG'})
Rename residue 'UNL' to 'LIG' only for chain B.
>>> rename(chain={"B":"B"}, resname={'UNL' : 'LIG'})
Rename atom '2H2' to 'H22' for all residues and chains.
>>> rename(atom={"2H2": "H22"})
Rename atoms 'H1' and 'H2' to 'H11' and 'H12' for chain C and residue UNL.
>>> rename(chain={"C:C"}, resname={"UNL":"UNL"}, atom={"H1" : "H11", "H2": "H12"})
Args:
chain (Optional[dict], optional): map of chain ids {old:new}. Defaults to None.
resname (Optional[dict], optional): map of residue names {old:new}. Defaults to None.
atom (Optional[dict], optional): map of atom names {old:new}. Defaults to None.
"""
for model in self.st:
for _chain in model:
if chain is None:
subject_chains = [c.id for c in model.get_chains()]
else:
subject_chains = [k for k in chain]
if _chain.id not in subject_chains:
continue
if chain:
old_chain_id = _chain.id
new_chain_id = chain[old_chain_id]
_chain.id = new_chain_id
if old_chain_id != new_chain_id:
print(f"renamed chain id : ", end=" ")
print(f"{old_chain_id}", end=" -> ")
print(f"{new_chain_id}")
for _residue in _chain:
if resname is None:
subject_resnames = [r.resname for r in _chain.get_residues()]
else:
subject_resnames = [k for k in resname]
if _residue.resname not in subject_resnames:
continue
hetflag, resseq, iCode = _residue.get_id()
if resname:
old_resname = _residue.resname
new_resname = resname[old_resname]
_residue.resname = new_resname
if old_resname != new_resname:
print(f"renamed residue {_chain.id}", end=" : ")
print(f"{old_resname}({resseq})", end=" ->")
print(f"{new_resname}({resseq})")
else:
old_resname = None
new_resname = None
for _atom in _residue:
if atom is None:
subject_atoms = [a.name for a in _residue.get_atoms()]
else:
subject_atoms = [k for k in atom]
if _atom.name not in subject_atoms:
continue
if atom:
old_atom_name = _atom.name
new_atom_name = atom[old_atom_name]
_atom.name = new_atom_name
if old_atom_name != new_atom_name:
print(f"renamed atom {_chain.id}.{_residue.resname}({resseq})", end=" : ")
print(f"{old_atom_name}", end=" -> ")
print(f"{new_atom_name}")
def reorient(self,
residue_selection:str="",
invert_Z:bool=False,
offset:np.ndarray=np.array([0.,0.,0.])) -> None:
"""Reorient coordinates according to the principal axis.
Examples:
Chain A residues 12-50 and chain B residues 100-200 will be used for principal axis calculation.
>>> pdb.reorient("A:12-50,B:100-200")
Args:
residue_selection (str, optional): residues for principal axis calculation. Defaults to "".
invert_Z (bool, optional): whether to invert Z axis. Defaults to False.
offset (np.ndarray, optional): translate coordinates. Defaults to np.array([0.,0.,0.]).
"""
coords = []
if residue_selection:
subset_residues = {}
# subset of atoms for Eigenvector/Eigenvalue calculations
for chain in residue_selection.split(","):
(chainId, resSeq_range) = chain.split(":")
resSeq_range_tuple = tuple(sorted(map(int, resSeq_range.split("-"))))
if chainId in subset_residues:
subset_residues[chainId].append(resSeq_range_tuple)
else:
subset_residues[chainId] = [resSeq_range_tuple]
for model in self.st:
for chain in model:
if chain.id not in subset_residues:
continue
for residue in chain:
hetflag, resseq, iCode = residue.get_id()
flag = []
for (lb, ub) in subset_residues[chain.id]:
if resseq >= lb and resseq <= ub:
flag.append(True)
else:
flag.append(False)
if any(flag):
coords.extend([atom.coord for atom in residue])
else:
for model in self.st:
for chain in model:
for residue in chain:
coords.extend([atom.coord for atom in residue])
coords = np.array(coords, float)
centroid = np.mean(coords, axis=0)
box_min = np.min(coords, axis=0)
box_max = np.max(coords, axis=0)
print("REMARK geometric center:", centroid)
print("REMARK coordinate range:", box_min, box_max)
print("REMARK box size:", box_max-box_min)
coords = coords - centroid
# import numpy as np
# data = np.array([[1, 2], [3, 4], [5, 6]]) # Example data
# cov_matrix = np.cov(data, rowvar=False)
# eigenvalues, eigenvectors = np.linalg.eig(cov_matrix)
# rotation_matrix = eigenvectors
# rotated_data = np.dot(data, rotation_matrix.T)
inertia = np.dot(coords.transpose(), coords)
w,v = np.linalg.eig(inertia)
# warning eigen values are not necessary ordered!
# http://docs.scipy.org/doc/numpy/reference/generated/numpy.linalg.eig.html
#--------------------------------------------------------------------------
# order eigen values (and eigen vectors)
#
# axis1 is the principal axis with the biggest eigen value (eval1)
# axis2 is the principal axis with the second biggest eigen value (eval2)
# axis3 is the principal axis with the smallest eigen value (eval3)
#--------------------------------------------------------------------------
# axis1 --> Z
# axis2 --> Y
# axis3 --> X
order = np.argsort(w)
eval3, eval2, eval1 = w[order]
axis3, axis2, axis1 = v[:, order]
print("REMARK x-axis",axis3,"eval=",eval3)
print("REMARK y-axis",axis2,"eval=",eval2)
print("REMARK z-axis",axis1,"eval=",eval1)
R = np.array([axis3, axis2, axis1]) # decreasing order
# R_inv = np.linalg.inv(R)
self.transform(rot=R, trans=-centroid)
# xyz = np.array([x,y,z])
# xyz = xyz - center
# xyz = np.dot(R_inv, xyz)
# x, y, z = xyz
# if args.inverse_z_axis:
# z = -z
# x += args.offset_x
# y += args.offset_y
# z += args.offset_z
@staticmethod
def expand_residues(txt):
residues = []
step1 = txt.split(",")
for item in step1:
step2 = item.split("-")
if len(step2) == 2:
residues += range(int(step2[0]), int(step2[1])+1)
elif len(step2) == 1:
residues += [ int(step2[0]) ]
return sorted(residues)
def contacts(self,
residue_selection_1:str,
residue_selection_2:str,
cutoff:float=4.0) -> None:
residues_1 = PDBfile.expand_residues(residue_selection_1)
residues_2 = PDBfile.expand_residues(residue_selection_2)
source_atoms = []
target_atoms = []
for model in self.st:
for chain in model:
for residue in chain:
hetflag, resSeq, iCode = residue.get_id()
resName = residue.get_resname()
if resSeq in residues_1:
source_atoms += [ atom for atom in residue ]
elif resSeq in residues_2:
target_atoms += [ atom for atom in residue ]
close_contacts = []
for ai in source_atoms:
for aj in target_atoms:
d = ai-aj
if d < cutoff:
close_contacts.append((ai, aj, d))
close_contacts = sorted(close_contacts, key=lambda x: (x[0], x[1], x[2]))
for (ai, aj, d) in close_contacts:
i_hetflag, i_resSeq, i_iCode = ai.get_parent().get_id()
j_hetflag, j_resSeq, j_iCode = aj.get_parent().get_id()
print("%4d %4s %4d %4s %3.1f" % (i_resSeq, ai.id, j_resSeq, aj.id, d))
coords = np.array(coords)
instance-attribute
parser = PDBParser()
instance-attribute
shape = self.coords.shape
instance-attribute
st = self.parser.get_structure('USER_PDB', pdbfile)
instance-attribute
__init__(pdbfile)
Source code in mdscribe/helper/pdbfile.py
def __init__(self, pdbfile:str | Path) -> None:
self.parser = PDBParser()
self.st = self.parser.get_structure("USER_PDB", pdbfile)
# atomic coordinates
coords = []
for model in self.st:
for chain in model:
for residue in chain:
for atom in residue:
coords.append(atom.get_coord())
self.coords = np.array(coords)
self.shape = self.coords.shape
center_of_geometry()
contacts(residue_selection_1, residue_selection_2, cutoff=4.0)
Source code in mdscribe/helper/pdbfile.py
def contacts(self,
residue_selection_1:str,
residue_selection_2:str,
cutoff:float=4.0) -> None:
residues_1 = PDBfile.expand_residues(residue_selection_1)
residues_2 = PDBfile.expand_residues(residue_selection_2)
source_atoms = []
target_atoms = []
for model in self.st:
for chain in model:
for residue in chain:
hetflag, resSeq, iCode = residue.get_id()
resName = residue.get_resname()
if resSeq in residues_1:
source_atoms += [ atom for atom in residue ]
elif resSeq in residues_2:
target_atoms += [ atom for atom in residue ]
close_contacts = []
for ai in source_atoms:
for aj in target_atoms:
d = ai-aj
if d < cutoff:
close_contacts.append((ai, aj, d))
close_contacts = sorted(close_contacts, key=lambda x: (x[0], x[1], x[2]))
for (ai, aj, d) in close_contacts:
i_hetflag, i_resSeq, i_iCode = ai.get_parent().get_id()
j_hetflag, j_resSeq, j_iCode = aj.get_parent().get_id()
print("%4d %4s %4d %4s %3.1f" % (i_resSeq, ai.id, j_resSeq, aj.id, d))
expand_residues(txt)
staticmethod
Source code in mdscribe/helper/pdbfile.py
get_seqeunce()
Source code in mdscribe/helper/pdbfile.py
def get_seqeunce(self) -> None:
wrap= 50
for model in self.st:
prev_residue_number = None
header = "Model "+ str(model.id)
sequence = ""
for chain in model:
print("chain=", chain)
for residue in chain:
resname = residue.get_resname()
if not resname in aminoacids:
continue
hetflag,resseq,iCode = residue.get_id()
resSeq = int(resseq)
if prev_residue_number is None:
print("(first residue) %4d" % resSeq)
else:
num_missing_residues = resSeq-prev_residue_number-1
if num_missing_residues > 0:
sequence += "-" * num_missing_residues
print("(missing residues) %4d - %4d (%d)" %
(prev_residue_number+1,resSeq-1,num_missing_residues))
sequence += aminoacids[resname]
prev_residue_number = resSeq
print("(last residue) %4d" % resSeq)
# output
print(header)
l= len(sequence)//wrap
for i in range(0,l):
start = wrap*i
end = max(wrap*(i+1),l)
print(sequence[start:end])
readlines(pdbfile)
staticmethod
(Not Used) Read PDB Lines.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
pdbfile
|
str
|
filename. |
required |
Source code in mdscribe/helper/pdbfile.py
@staticmethod
def readlines(pdbfile:str):
"""(Not Used) Read PDB Lines.
Args:
pdbfile (str): filename.
"""
pdblines = {1: []}
with open(pdbfile, "r") as f:
for line in f:
if line.startswith("MODEL"):
model_number = int(line.strip().split()[1])
pdblines[model_number] = []
if line.startswith("ATOM") or line.startswith("HETATM") or line.startswith("TER"):
# PDB format version 2.3
serial = line[6:12]
name = line[12:16].strip()
altLoc = line[16:17]
if altLoc=="":
altLoc=" "
resName = line[17:21].strip()
chainId = line[21:22]
if chainId=="":
chainId=" "
resSeq = int(line[22:26])
iCode = line[26:27]
if iCode=="":
iCode=" "
x = float(line[30:38])
y = float(line[38:46])
z = float(line[46:54])
occupancy = line[54:60]
tempFactor = float(line[60:66])
segId = line[72:76]
element = line[76:78]
charge = line[78:80]
rename(chain=None, resname=None, atom=None)
Rename PDB chains/residues/atoms.
Examples:
Rename chain 'C' to 'A' and 'D' to 'B'.
Rename residue 'UNL' to 'LIG' for all chains.
Rename residue 'UNL' to 'LIG' only for chain B.
Rename atom '2H2' to 'H22' for all residues and chains.
Rename atoms 'H1' and 'H2' to 'H11' and 'H12' for chain C and residue UNL.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
chain
|
Optional[dict]
|
map of chain ids {old:new}. Defaults to None. |
None
|
resname
|
Optional[dict]
|
map of residue names {old:new}. Defaults to None. |
None
|
atom
|
Optional[dict]
|
map of atom names {old:new}. Defaults to None. |
None
|
Source code in mdscribe/helper/pdbfile.py
def rename(self,
chain:Optional[dict] = None,
resname:Optional[dict] = None,
atom:Optional[dict] = None) -> None:
"""Rename PDB chains/residues/atoms.
Examples:
Rename chain 'C' to 'A' and 'D' to 'B'.
>>> rename(chain={'C':'A', 'D':'B'})
Rename residue 'UNL' to 'LIG' for all chains.
>>> rename(resname={'UNL' : 'LIG'})
Rename residue 'UNL' to 'LIG' only for chain B.
>>> rename(chain={"B":"B"}, resname={'UNL' : 'LIG'})
Rename atom '2H2' to 'H22' for all residues and chains.
>>> rename(atom={"2H2": "H22"})
Rename atoms 'H1' and 'H2' to 'H11' and 'H12' for chain C and residue UNL.
>>> rename(chain={"C:C"}, resname={"UNL":"UNL"}, atom={"H1" : "H11", "H2": "H12"})
Args:
chain (Optional[dict], optional): map of chain ids {old:new}. Defaults to None.
resname (Optional[dict], optional): map of residue names {old:new}. Defaults to None.
atom (Optional[dict], optional): map of atom names {old:new}. Defaults to None.
"""
for model in self.st:
for _chain in model:
if chain is None:
subject_chains = [c.id for c in model.get_chains()]
else:
subject_chains = [k for k in chain]
if _chain.id not in subject_chains:
continue
if chain:
old_chain_id = _chain.id
new_chain_id = chain[old_chain_id]
_chain.id = new_chain_id
if old_chain_id != new_chain_id:
print(f"renamed chain id : ", end=" ")
print(f"{old_chain_id}", end=" -> ")
print(f"{new_chain_id}")
for _residue in _chain:
if resname is None:
subject_resnames = [r.resname for r in _chain.get_residues()]
else:
subject_resnames = [k for k in resname]
if _residue.resname not in subject_resnames:
continue
hetflag, resseq, iCode = _residue.get_id()
if resname:
old_resname = _residue.resname
new_resname = resname[old_resname]
_residue.resname = new_resname
if old_resname != new_resname:
print(f"renamed residue {_chain.id}", end=" : ")
print(f"{old_resname}({resseq})", end=" ->")
print(f"{new_resname}({resseq})")
else:
old_resname = None
new_resname = None
for _atom in _residue:
if atom is None:
subject_atoms = [a.name for a in _residue.get_atoms()]
else:
subject_atoms = [k for k in atom]
if _atom.name not in subject_atoms:
continue
if atom:
old_atom_name = _atom.name
new_atom_name = atom[old_atom_name]
_atom.name = new_atom_name
if old_atom_name != new_atom_name:
print(f"renamed atom {_chain.id}.{_residue.resname}({resseq})", end=" : ")
print(f"{old_atom_name}", end=" -> ")
print(f"{new_atom_name}")
reorder()
Source code in mdscribe/helper/pdbfile.py
def reorder(self) -> None:
builder = StructureBuilder()
builder.init_structure(self.st.id)
for model in self.st:
builder.init_model(model.id, serial_num=None)
for chain in sorted(model, key=lambda x:x.id):
builder.init_chain(chain.id)
for residue in sorted(chain, key=lambda x: x.get_id()[1]):
hetflag, resseq, iCode = residue.get_id()
builder.init_residue(residue.resname, hetflag, resseq, iCode)
for atom in residue:
builder.init_atom(atom.name,
atom.coord,
atom.bfactor,
atom.occupancy,
atom.altloc,
atom.fullname,
None, # serial_number
atom.element,
)
self.st = builder.get_structure()
reorient(residue_selection='', invert_Z=False, offset=np.array([0.0, 0.0, 0.0]))
Reorient coordinates according to the principal axis.
Examples:
Chain A residues 12-50 and chain B residues 100-200 will be used for principal axis calculation.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
residue_selection
|
str
|
residues for principal axis calculation. Defaults to "". |
''
|
invert_Z
|
bool
|
whether to invert Z axis. Defaults to False. |
False
|
offset
|
ndarray
|
translate coordinates. Defaults to np.array([0.,0.,0.]). |
array([0.0, 0.0, 0.0])
|
Source code in mdscribe/helper/pdbfile.py
def reorient(self,
residue_selection:str="",
invert_Z:bool=False,
offset:np.ndarray=np.array([0.,0.,0.])) -> None:
"""Reorient coordinates according to the principal axis.
Examples:
Chain A residues 12-50 and chain B residues 100-200 will be used for principal axis calculation.
>>> pdb.reorient("A:12-50,B:100-200")
Args:
residue_selection (str, optional): residues for principal axis calculation. Defaults to "".
invert_Z (bool, optional): whether to invert Z axis. Defaults to False.
offset (np.ndarray, optional): translate coordinates. Defaults to np.array([0.,0.,0.]).
"""
coords = []
if residue_selection:
subset_residues = {}
# subset of atoms for Eigenvector/Eigenvalue calculations
for chain in residue_selection.split(","):
(chainId, resSeq_range) = chain.split(":")
resSeq_range_tuple = tuple(sorted(map(int, resSeq_range.split("-"))))
if chainId in subset_residues:
subset_residues[chainId].append(resSeq_range_tuple)
else:
subset_residues[chainId] = [resSeq_range_tuple]
for model in self.st:
for chain in model:
if chain.id not in subset_residues:
continue
for residue in chain:
hetflag, resseq, iCode = residue.get_id()
flag = []
for (lb, ub) in subset_residues[chain.id]:
if resseq >= lb and resseq <= ub:
flag.append(True)
else:
flag.append(False)
if any(flag):
coords.extend([atom.coord for atom in residue])
else:
for model in self.st:
for chain in model:
for residue in chain:
coords.extend([atom.coord for atom in residue])
coords = np.array(coords, float)
centroid = np.mean(coords, axis=0)
box_min = np.min(coords, axis=0)
box_max = np.max(coords, axis=0)
print("REMARK geometric center:", centroid)
print("REMARK coordinate range:", box_min, box_max)
print("REMARK box size:", box_max-box_min)
coords = coords - centroid
# import numpy as np
# data = np.array([[1, 2], [3, 4], [5, 6]]) # Example data
# cov_matrix = np.cov(data, rowvar=False)
# eigenvalues, eigenvectors = np.linalg.eig(cov_matrix)
# rotation_matrix = eigenvectors
# rotated_data = np.dot(data, rotation_matrix.T)
inertia = np.dot(coords.transpose(), coords)
w,v = np.linalg.eig(inertia)
# warning eigen values are not necessary ordered!
# http://docs.scipy.org/doc/numpy/reference/generated/numpy.linalg.eig.html
#--------------------------------------------------------------------------
# order eigen values (and eigen vectors)
#
# axis1 is the principal axis with the biggest eigen value (eval1)
# axis2 is the principal axis with the second biggest eigen value (eval2)
# axis3 is the principal axis with the smallest eigen value (eval3)
#--------------------------------------------------------------------------
# axis1 --> Z
# axis2 --> Y
# axis3 --> X
order = np.argsort(w)
eval3, eval2, eval1 = w[order]
axis3, axis2, axis1 = v[:, order]
print("REMARK x-axis",axis3,"eval=",eval3)
print("REMARK y-axis",axis2,"eval=",eval2)
print("REMARK z-axis",axis1,"eval=",eval1)
R = np.array([axis3, axis2, axis1]) # decreasing order
# R_inv = np.linalg.inv(R)
self.transform(rot=R, trans=-centroid)
transform(centroid=np.array([0.0, 0.0, 0.0]), rot=np.array([[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]]), trans=np.array([0.0, 0.0, 0.0]))
Source code in mdscribe/helper/pdbfile.py
def transform(self,
centroid: np.ndarray = np.array([0., 0., 0.]),
rot:np.ndarray = np.array([[1.,0.,0.],[0.,1.,0.],[0.,0.,1.]]),
trans:np.ndarray = np.array([0., 0., 0.])) -> None:
try:
assert rot.shape == (3,3)
except:
raise ValueError('shape of rotation matrix should be (3,3)')
try:
assert trans.shape == (3,)
except:
raise ValueError('shape of translation matrix should be (3,)')
# rotation
self.coords = np.dot(self.coords - centroid, rot.T) + centroid
# translation
self.coords = self.coords + trans
# update atomic coordinates
atom_idx = 0
for model in self.st:
for chain in model:
for residue in chain:
for atom in residue:
atom.coord = self.coords[atom_idx,:]
atom_idx += 1
update_coord(coords)
Source code in mdscribe/helper/pdbfile.py
write(filename, model=None, chain=None, resname=None, resnumber=None)
Write to PDB with selections.
Examples:
>>> pdb.write(model=[0], chain=['A'])
write chain A and residues first-10,22-50,130-200,550,600-last
>>> pdb.write(chain=['A'], resnumber="-10,22-50,130-200,550,600-")
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
filename
|
str | Path
|
output filename or path |
required |
model
|
Optional[List[int]]
|
list of model numbers. Defaults to None. |
None
|
chain
|
Optional[List[str]]
|
list of chain ids. Defaults to None. |
None
|
resname
|
Optional[List[str]]
|
list of residue names. Defaults to None. |
None
|
resnumber
|
Optional[str]
|
residue number ranges. Defaults to None. |
None
|
Source code in mdscribe/helper/pdbfile.py
def write(self, filename: str | Path,
model:Optional[List[int]] = None,
chain:Optional[List[str]] = None,
resname:Optional[List[str]] = None,
resnumber:Optional[str] = None,
) -> None:
"""Write to PDB with selections.
Examples:
>>> pdb.write(model=[0], chain=['A'])
write chain A and residues first-10,22-50,130-200,550,600-last
>>> pdb.write(chain=['A'], resnumber="-10,22-50,130-200,550,600-")
Args:
filename (str | Path): output filename or path
model (Optional[List[int]], optional): list of model numbers. Defaults to None.
chain (Optional[List[str]], optional): list of chain ids. Defaults to None.
resname (Optional[List[str]], optional): list of residue names. Defaults to None.
resnumber (Optional[str], optional): residue number ranges. Defaults to None.
"""
io = PDBIO()
if (model is None) and (chain is None) and (resname is None) and (resnumber is None):
# write structure as it as
io.set_structure(self.st)
else:
# write only select model(s), chain(s) or residue(s)
# select residues by numbers
if resnumber is not None:
select_resseqs = [tuple(map(lambda s: int(s) if s else -1, r.split("-"))) for r in resnumber.split(",")]
# for resnumber="-10,22-50,130-200,550,600-"
# [(-1, 10), (22, 50), (130, 200), (550,), (600, -1)]
builder = StructureBuilder()
builder.init_structure(self.st.id)
for _model in self.st:
if (model is not None) and (_model.id not in model):
continue # reject model
builder.init_model(_model.id, serial_num=None)
for _chain in _model:
if (chain is not None) and (_chain.id not in chain):
continue # reject chain
builder.init_chain(_chain.id)
for _residue in _chain:
hetflag, resseq, iCode = _residue.get_id()
if (resname is not None) and (_residue.resname not in resname):
continue # reject residue by name
# select residue numbers
include_flags = []
for lu in select_resseqs:
if len(lu) == 1:
if lu == resseq:
include_flags.append(True)
else:
include_flags.append(False)
else:
(l,u) = lu
if (l == -1 or l <= resseq) and (u ==-1 or u >= resseq):
include_flags.append(True)
else:
include_flags.append(False)
if not any(include_flags):
continue # reject residue by number
builder.init_residue(_residue.resname, hetflag, resseq, iCode)
for atom in _residue:
builder.init_atom(atom.name,
atom.coord,
atom.bfactor,
atom.occupancy,
atom.altloc,
atom.fullname,
None, # serial_number
atom.element,
)
io.set_structure(builder.get_structure())
with open(filename, "w") as f:
io.save(f)
angle_between(v1, v2)
Returns the angle in radians between vectors 'v1' and 'v2'::
angle_between((1, 0, 0), (0, 1, 0)) 1.5707963267948966 angle_between((1, 0, 0), (1, 0, 0)) 0.0 angle_between((1, 0, 0), (-1, 0, 0)) 3.141592653589793
Source code in mdscribe/helper/coord.py
def angle_between(v1, v2):
""" Returns the angle in radians between vectors 'v1' and 'v2'::
>>> angle_between((1, 0, 0), (0, 1, 0))
1.5707963267948966
>>> angle_between((1, 0, 0), (1, 0, 0))
0.0
>>> angle_between((1, 0, 0), (-1, 0, 0))
3.141592653589793
"""
v1_u = unit_vector(v1)
v2_u = unit_vector(v2)
rad = np.arccos(np.clip(np.dot(v1_u, v2_u), -1.0, 1.0))
deg = rad*180./np.pi
if deg > 90.0 :
deg = 180.0 - deg
return deg
avoid_clash(fixed, movable, max_move=25.0, clash_cutoff=5.0, max_trials=1000)
Find a translation vector for movable coordinates to avoid steric clashes.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
fixed
|
ndarray
|
coordinates to be fixed. |
required |
movable
|
ndarry
|
coordinates to be moved. |
required |
max_move
|
float
|
maximum translation. Defaults to 25.0. |
25.0
|
clash_cutoff
|
float
|
distance considered as potential clash. Defaults to 5.0. |
5.0
|
max_trials
|
int
|
maximum number of trials. Defaults to 1000. |
1000
|
Returns:
| Type | Description |
|---|---|
(int, int, float, ndarray)
|
(num_trials, clash_count, closest_dist, trans) |
Source code in mdscribe/helper/coord.py
def avoid_clash(fixed:np.ndarray, movable:np.ndarray, max_move:float=25.0, clash_cutoff:float=5.0, max_trials:int=1000) -> Tuple[int, int, float, np.ndarray]:
"""Find a translation vector for `movable` coordinates to avoid steric clashes.
Args:
fixed (np.ndarray): coordinates to be fixed.
movable (np.ndarry): coordinates to be moved.
max_move (float, optional): maximum translation. Defaults to 25.0.
clash_cutoff (float, optional): distance considered as potential clash. Defaults to 5.0.
max_trials (int, optional): maximum number of trials. Defaults to 1000.
Returns:
(int,int,float,np.ndarray): (num_trials, clash_count, closest_dist, trans)
"""
kd = KDTree(fixed)
num_trials = 0
clash_count = 1
while clash_count > 0 and num_trials < max_trials:
num_trials += 1
trans = max_move * (2.0*np.random.rand(3) -1.0)
# numpy.random.rand() generates [0,1) so 2x-1 -> [-1,+1)
coords = movable + trans
distances, indices = kd.query(coords)
clash_count = np.sum(distances < clash_cutoff)
closest_dist = np.min(distances)
return (num_trials, clash_count, closest_dist, trans)
download(PDBID, verbose=True)
get_ligands(PDBID)
Examples:
@structureId : 4KEB
@chemicalID : 1QZ
@type : non-polymer
@molecularWeight : 423.51
chemicalName : 6-ethyl-5-{(3S)-3-[3-(isoquinolin-5-yl)-5-methoxyphenyl]but-1-yn-1-yl}pyrimidine-2,4-diamine
formula : C26 H25 N5 O
InChI : InChI=1S/C26H25N5O/c1-4-24-23(25(27)31-26(28)30-24)9-8-16(2)18-12-19(14-20(13-18)32-3)21-7-5-6-17-15-29-11-10-22(17)21/h5-7,10-16H,4H2,1-3H3,(H4,27,28,30,31)/t16-/m1/s1
InChIKey : MGLLCDAARSVGLO-MRXNPFEDSA-N
smiles : CCc1c(c(nc(n1)N)N)C#C[C@@H](C)c2cc(cc(c2)OC)c3cccc4c3ccnc4
@structureId : 4KEB
@chemicalID : FOL
@type : non-polymer
@molecularWeight : 441.397
chemicalName : FOLIC ACID
formula : C19 H19 N7 O6
InChI : InChI=1S/C19H19N7O6/c20-19-25-15-14(17(30)26-19)23-11(8-22-15)7-21-10-3-1-9(2-4-10)16(29)24-12(18(31)32)5-6-13(27)28/h1-4,8,12,21H,5-7H2,(H,24,29)(H,27,28)(H,31,32)(H3,20,22,25,26,30)/t12-/m0/s1
InChIKey : OVBPIULPVIDEAO-LBPRGKRZSA-N
smiles : c1cc(ccc1C(=O)N[C@@H](CCC(=O)O)C(=O)O)NCc2cnc3c(n2)C(=O)N=C(N3)N
@structureId : 4KEB
@chemicalID : NDP
@type : non-polymer
@molecularWeight : 745.421
chemicalName : NADPH DIHYDRO-NICOTINAMIDE-ADENINE-DINUCLEOTIDE PHOSPHATE
formula : C21 H30 N7 O17 P3
InChIKey : ACFIXJIJDZMPPO-NNYOXOHSSA-N
InChI : InChI=1S/C21H30N7O17P3/c22-17-12-19(25-7-24-17)28(8-26-12)21-16(44-46(33,34)35)14(30)11(43-21)6-41-48(38,39)45-47(36,37)40-5-10-13(29)15(31)20(42-10)27-3-1-2-9(4-27)18(23)32/h1,3-4,7-8,10-11,13-16,20-21,29-31H,2,5-6H2,(H2,23,32)(H,36,37)(H,38,39)(H2,22,24,25)(H2,33,34,35)/t10-,11-,13-,14-,15-,16-,20-,21-/m1/s1
smiles : c1nc(c2c(n1)n(cn2)[C@H]3[C@@H]([C@@H]([C@H](O3)CO[P@](=O)(O)O[P@@](=O)(O)OC[C@@H]4[C@H]([C@H]([C@@H](O4)N5C=CCC(=C5)C(=O)N)O)O)O)OP(=O)(O)O)N
Source code in mdscribe/helper/pdbfile.py
def get_ligands(PDBID:str) -> None:
"""
Examples:
@structureId : 4KEB
@chemicalID : 1QZ
@type : non-polymer
@molecularWeight : 423.51
chemicalName : 6-ethyl-5-{(3S)-3-[3-(isoquinolin-5-yl)-5-methoxyphenyl]but-1-yn-1-yl}pyrimidine-2,4-diamine
formula : C26 H25 N5 O
InChI : InChI=1S/C26H25N5O/c1-4-24-23(25(27)31-26(28)30-24)9-8-16(2)18-12-19(14-20(13-18)32-3)21-7-5-6-17-15-29-11-10-22(17)21/h5-7,10-16H,4H2,1-3H3,(H4,27,28,30,31)/t16-/m1/s1
InChIKey : MGLLCDAARSVGLO-MRXNPFEDSA-N
smiles : CCc1c(c(nc(n1)N)N)C#C[C@@H](C)c2cc(cc(c2)OC)c3cccc4c3ccnc4
@structureId : 4KEB
@chemicalID : FOL
@type : non-polymer
@molecularWeight : 441.397
chemicalName : FOLIC ACID
formula : C19 H19 N7 O6
InChI : InChI=1S/C19H19N7O6/c20-19-25-15-14(17(30)26-19)23-11(8-22-15)7-21-10-3-1-9(2-4-10)16(29)24-12(18(31)32)5-6-13(27)28/h1-4,8,12,21H,5-7H2,(H,24,29)(H,27,28)(H,31,32)(H3,20,22,25,26,30)/t12-/m0/s1
InChIKey : OVBPIULPVIDEAO-LBPRGKRZSA-N
smiles : c1cc(ccc1C(=O)N[C@@H](CCC(=O)O)C(=O)O)NCc2cnc3c(n2)C(=O)N=C(N3)N
@structureId : 4KEB
@chemicalID : NDP
@type : non-polymer
@molecularWeight : 745.421
chemicalName : NADPH DIHYDRO-NICOTINAMIDE-ADENINE-DINUCLEOTIDE PHOSPHATE
formula : C21 H30 N7 O17 P3
InChIKey : ACFIXJIJDZMPPO-NNYOXOHSSA-N
InChI : InChI=1S/C21H30N7O17P3/c22-17-12-19(25-7-24-17)28(8-26-12)21-16(44-46(33,34)35)14(30)11(43-21)6-41-48(38,39)45-47(36,37)40-5-10-13(29)15(31)20(42-10)27-3-1-2-9(4-27)18(23)32/h1,3-4,7-8,10-11,13-16,20-21,29-31H,2,5-6H2,(H2,23,32)(H,36,37)(H,38,39)(H2,22,24,25)(H2,33,34,35)/t10-,11-,13-,14-,15-,16-,20-,21-/m1/s1
smiles : c1nc(c2c(n1)n(cn2)[C@H]3[C@@H]([C@@H]([C@H](O3)CO[P@](=O)(O)O[P@@](=O)(O)OC[C@@H]4[C@H]([C@H]([C@@H](O4)N5C=CCC(=C5)C(=O)N)O)O)O)OP(=O)(O)O)N
"""
ligandInfo = pypdb.get_ligands(PDBID)
try:
ligands = ligandInfo["ligandInfo"]["ligand"]
except:
ligands = []
for ligand_dict in ligands:
for k in ligand_dict:
print("%20s : %s" % (k, ligand_dict[k]))
# chem_desc= pypdb.describe_chemical(PDBID)
# chem_desc["describeHet"]["ligandInfo"]["ligand"]["smiles"]
# pdb_desc= pypdb.describe_pdb("5cgc")
# pdb content as list of strings
data = {
"name": [],
"smiles": [],
"molwt": [],
"chemical name": [],
"InChiKey": [],
"InChi": [],
}
if "," in PDBID:
PDBID_list = PDBID.split(",")
for PDBID in PDBID_list:
for d in ligands:
try:
data["name"].append(d['@chemicalID'])
data["smiles"].append(d['smiles'])
data["molwt"].append(float(d['@molecularWeight']))
data["chemical name"].append(d['chemicalName'])
data["InChiKey"].append(d['InChIKey'])
data["InChi"].append(d['InChi'])
except:
continue
else:
for d in ligands:
try:
data["name"].append(d['@chemicalID'])
data["smiles"].append(d['smiles'])
data["molwt"].append(float(d['@molecularWeight']))
data["chemical name"].append(d['chemicalName'])
data["InChiKey"].append(d['InChIKey'])
data["InChi"].append(d['InChi'])
except:
continue
hexagonal_grid()
Generate XY coordinates of hexagonal grid for membrane MD simulations.
Returns:
| Type | Description |
|---|---|
ndarray
|
np.ndarray: (x,y) coordinates |
Source code in mdscribe/helper/coord.py
def hexagonal_grid() -> np.ndarray:
"""Generate XY coordinates of hexagonal grid for membrane MD simulations.
Returns:
np.ndarray: (x,y) coordinates
"""
theta = np.radians(60)
c, s = np.cos(theta), np.sin(theta)
R = np.array([[c, -s], [s, c]])
d = 50.0
grid = {0: [ np.array([0,0]) ] }
for shell in [1, 2]:
v = np.array([0.0, d*shell])
grid[shell] = [v]
for i in range(0, 5):
v = np.dot(R, v)
if shell > 1:
delta = (grid[shell][-1] - v)/shell
for j in range(1, shell):
grid[shell].append(j*delta + v)
grid[shell].append(v)
if shell > 1:
v = np.dot(R, v)
delta = (grid[shell][-1] - v)/shell
for j in range(1, shell):
grid[shell].append(j*delta + v)
grid[shell].append(v)
xy = []
for shell in grid:
for item in grid[shell] :
x_, y_ = item
xy.append([x_, y_])
return np.array(xy)
kabsch_algorithm(P, Q)
Computes the optimal rotation and translation to align two sets of points (P -> Q), and their RMSD.
Examples:
>>> P = np.array([[0, 0, 0], [1, 0, 0], [0, 1, 0]])
>>> Q = np.array([[1, 1, 0], [2, 1, 0], [1, 2, 0]])
>>> # Q is translated by (1,1,0)
>>> (rot, trans, rmsd) = kabsch_transform(P, Q)
>>> transform(P, rot, trans)
>>> P = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
>>> Q = np.array([[0, 1, 0], [0, 0, 1], [1, 0, 0]])
>>> # Q is a 90-degree rotation of P around the z-axis
>>> (rot, trans, rmsd) = kabsch_transform(P, Q)
>>> transform(P, rot, trans)
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
P
|
ndarray
|
subject coordinates. Not modified. |
required |
Q
|
ndarray
|
target coordinates. Not modified. |
required |
Returns:
| Type | Description |
|---|---|
Tuple[ndarray, ndarray, ndarray, float]
|
Tuple[np.ndarray, np.ndarray, np.ndarray, float]: A tuple containing the optimal rotation matrix, the optimal translation vector, the centroid of P, and the RMSD. |
Source code in mdscribe/helper/coord.py
def kabsch_algorithm(P:np.ndarray, Q:np.ndarray) -> Tuple[np.ndarray, np.ndarray, np.ndarray, float]:
"""Computes the optimal rotation and translation to align two sets of points (P -> Q),
and their RMSD.
Examples:
>>> P = np.array([[0, 0, 0], [1, 0, 0], [0, 1, 0]])
>>> Q = np.array([[1, 1, 0], [2, 1, 0], [1, 2, 0]])
>>> # Q is translated by (1,1,0)
>>> (rot, trans, rmsd) = kabsch_transform(P, Q)
>>> transform(P, rot, trans)
>>> P = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
>>> Q = np.array([[0, 1, 0], [0, 0, 1], [1, 0, 0]])
>>> # Q is a 90-degree rotation of P around the z-axis
>>> (rot, trans, rmsd) = kabsch_transform(P, Q)
>>> transform(P, rot, trans)
Args:
P (np.ndarray): subject coordinates. Not modified.
Q (np.ndarray): target coordinates. Not modified.
Returns:
Tuple[np.ndarray, np.ndarray, np.ndarray, float]: A tuple containing the optimal rotation matrix, the optimal
translation vector, the centroid of P, and the RMSD.
"""
assert P.shape == Q.shape, "Matrix dimensions must match"
# Compute centroids
centroid_P = np.mean(P, axis=0)
centroid_Q = np.mean(Q, axis=0)
# Optimal translation
t = centroid_Q - centroid_P
# Center the points
p = P - centroid_P
q = Q - centroid_Q
# Compute the covariance matrix
H = np.dot(p.T, q)
# SVD
U, S, Vt = np.linalg.svd(H)
V = Vt.T
d = np.linalg.det(np.dot(V, U.T))
e = np.array([[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, d]])
# Optimal rotation
R = np.dot(np.dot(V, e), U.T)
# RMSD
rmsd = np.sqrt(np.sum(np.square(np.dot(p, R.T) - q)) / P.shape[0])
return (R, t, centroid_P, rmsd)
pca(X)
Source code in mdscribe/helper/coord.py
def pca(X):
# Data matrix X
n, m = X.shape
centroid = X.mean(axis=0)
X = X - centroid # make it 0-centered
assert np.allclose(X.mean(axis=0), np.zeros(m), atol=1e-5)
# Compute covariance matrix
C = np.dot(X.T, X) / (n-1)
# Eigen decomposition
eigen_vals, eigen_vecs = np.linalg.eig(C)
# Project X onto PC space
# X_pca = np.dot(X, eigen_vecs)
return eigen_vecs
quaternion_to_rotation_matrix(Q)
Covert a quaternion into a full three-dimensional rotation matrix.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
Q
|
List[float]
|
A 4 element array representing the quaternion (q0,q1,q2,q3) |
required |
Returns:
| Type | Description |
|---|---|
ndarray
|
np.ndarray: A 3x3 element matrix representing the full 3D rotation matrix. This rotation matrix converts a point in the local reference frame to a point in the global reference frame. |
Source code in mdscribe/helper/coord.py
def quaternion_to_rotation_matrix(Q:List[float]) -> np.ndarray:
"""Covert a quaternion into a full three-dimensional rotation matrix.
Args:
Q (List[float]): A 4 element array representing the quaternion (q0,q1,q2,q3)
Returns:
np.ndarray: A 3x3 element matrix representing the full 3D rotation matrix.
This rotation matrix converts a point in the local reference frame
to a point in the global reference frame.
"""
# Extract the values from Q
q0 = Q[0]
q1 = Q[1]
q2 = Q[2]
q3 = Q[3]
# First row of the rotation matrix
r00 = 2 * (q0 * q0 + q1 * q1) - 1
r01 = 2 * (q1 * q2 - q0 * q3)
r02 = 2 * (q1 * q3 + q0 * q2)
# Second row of the rotation matrix
r10 = 2 * (q1 * q2 + q0 * q3)
r11 = 2 * (q0 * q0 + q2 * q2) - 1
r12 = 2 * (q2 * q3 - q0 * q1)
# Third row of the rotation matrix
r20 = 2 * (q1 * q3 - q0 * q2)
r21 = 2 * (q2 * q3 + q0 * q1)
r22 = 2 * (q0 * q0 + q3 * q3) - 1
# 3x3 rotation matrix, R
R = np.array([[r00, r01, r02],
[r10, r11, r12],
[r20, r21, r22]])
return R
random_rotation_matrix()
Generate a random rotational matrix.
Returns:
| Name | Type | Description |
|---|---|---|
ndarray |
ndarray
|
random (3,3) matrix |
Source code in mdscribe/helper/coord.py
def random_rotation_matrix() -> np.ndarray:
"""Generate a random rotational matrix.
Returns:
ndarray : random (3,3) matrix
"""
axis = np.random.randn(3)
axis /= np.linalg.norm(axis)
angle = random.uniform(0, 2 * np.pi)
# Build the rotation matrix using Rodrigues' formula
K = np.array([[0, -axis[2], axis[1]],
[axis[2], 0, -axis[0]],
[-axis[1], axis[0], 0]])
R = np.eye(3) + np.sin(angle) * K + (1 - np.cos(angle)) * np.dot(K, K)
return R
random_translation_vector(lower=0.0, upper=1.0)
Generate a random translational vector.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
lower
|
float)
|
lower bound. Defaults to 0.0 |
0.0
|
upper
|
float)
|
upper bound. Defaults to 1.0 |
1.0
|
Returns:
| Type | Description |
|---|---|
ndarray
|
np.ndarray: random (3,) vector |
Source code in mdscribe/helper/coord.py
def random_translation_vector(lower:float=0.0, upper:float=1.0) -> np.ndarray:
"""Generate a random translational vector.
Args:
lower (float) : lower bound. Defaults to 0.0
upper (float) : upper bound. Defaults to 1.0
Returns:
np.ndarray: random (3,) vector
"""
rng = np.random.default_rng()
return rng.uniform(lower, upper, size=3)
renumber(residue)
reorient()
Source code in mdscribe/helper/pdbfile.py
def reorient():
argparser = argparse.ArgumentParser(description='Reorient PDB')
argparser.add_argument("filename", help="pdb filename")
argparser.add_argument("--inverse-z", dest="inverse_z_axis", help="inverse z axis", default=False, action="store_true")
argparser.add_argument("--residue", help="residues for principal axes calc. ex. A:23-50,B:1-90", default="")
argparser.add_argument("--offset-x", dest="offset_x", help="translate x coordinates", type=float, default=0.0)
argparser.add_argument("--offset-y", dest="offset_y", help="translate y coordinates", type=float, default=0.0)
argparser.add_argument("--offset-z", dest="offset_z", help="translate z coordinates", type=float, default=0.0)
argparser.add_argument("--segid", dest="segId", help="override segment id", default="")
argparser.add_argument("--chainid", dest="chainId", help="override chain id", default="")
args = argparser.parse_args()
# subset of atoms for Eigenvector/Eigenvalue calculations
subset_atoms = []
if args.residue:
for chain in args.residue.split(","):
(chainId, resSeq_range) = chain.split(":")
(resSeq_begin, resSeq_end) = resSeq_range.split("-")
subset_atoms.append((chainId, int(resSeq_begin), int(resSeq_end)))
with open(args.filename, "r") as pdb_file:
xyz = []
for line in pdb_file:
if line.startswith('ATOM') or line.startswith('HETATM'):
chainId = line[21:22]
resSeq = int(line[22:26])
if subset_atoms:
for chainId_, resSeq_begin, resSeq_end in subset_atoms:
if not (chainId == chainId_ and resSeq >= resSeq_begin and resSeq <= resSeq_end):
continue
x = float(line[30:38].strip())
y = float(line[38:46].strip())
z = float(line[46:54].strip())
xyz.append([x,y,z])
print("REMARK reoriented according to its principal axes")
if args.residue:
print(f"REMARK principal axes are calculated using --residue {args.residue}")
print(f"REMARK coordinates from {args.filename} ({len(xyz)} atoms)")
if args.inverse_z_axis:
print("REMARK Z axis inverted")
print(f"REMARK coordinates offset X {args.offset_x:8.3f} Y {args.offset_x:8.3f} Z {args.offset_x:8.3f}")
coord = np.array(xyz, float)
# geometric center
center = np.mean(coord, 0)
box_min = np.min(coord, 0)
box_max = np.max(coord, 0)
print("REMARK geometric center:", center)
print("REMARK coordinate range:", box_min, box_max)
print("REMARK box size:", box_max-box_min)
coord = coord - center
# compute principal axis matrix
inertia = np.dot(coord.transpose(), coord)
w,v = np.linalg.eig(inertia)
# warning eigen values are not necessary ordered!
# http://docs.scipy.org/doc/numpy/reference/generated/numpy.linalg.eig.html
#--------------------------------------------------------------------------
# order eigen values (and eigen vectors)
#
# axis1 is the principal axis with the biggest eigen value (eval1)
# axis2 is the principal axis with the second biggest eigen value (eval2)
# axis3 is the principal axis with the smallest eigen value (eval3)
#--------------------------------------------------------------------------
# axis1 --> Z
# axis2 --> Y
# axis3 --> X
order = np.argsort(w)
eval3, eval2, eval1 = w[order]
axis3, axis2, axis1 = v[:, order]
print("REMARK x-axis",axis3,"eval=",eval3)
print("REMARK y-axis",axis2,"eval=",eval2)
print("REMARK z-axis",axis1,"eval=",eval1)
R = np.array([axis3, axis2, axis1]) # decreasing order
R_inv = np.linalg.inv(R)
pdb_file.seek(0)
for line in pdb_file:
if not line: continue
if line.startswith('ATOM') or line.startswith('HETATM'):
x = float(line[30:38].strip())
y = float(line[38:46].strip())
z = float(line[46:54].strip())
xyz = np.array([x,y,z])
xyz = xyz - center
xyz = np.dot(R_inv, xyz)
x, y, z = xyz
if args.inverse_z_axis:
z = -z
x += args.offset_x
y += args.offset_y
z += args.offset_z
# keep the rest of information as they are
line = line[:30] + "%8.3f%8.3f%8.3f" % (x, y, z) + line[54:]
# override chain id
if args.chainId:
line = line[:21] + "%1s" % args.chainId + line[22:]
# override segment id
if args.segId:
line = line[:72] + "%4s" % args.segId + line[76:]
print(line, end='')
else:
print(line, end='')
test_kabsch_algorithm(trials=100, rtol=1e-05, atol=1e-08)
Source code in mdscribe/helper/coord.py
def test_kabsch_algorithm(trials:int=100, rtol:float=1e-5, atol:float=1e-8) -> None:
results = []
for i in range(trials):
rot = random_rotation_matrix()
trans = random_translation_vector(0.0, 50.0)
# generate a 10x3 array with random floats between 0 and 1
P = np.random.rand(10, 3) * 100.0
Q = transform(P, rot, trans)
(rot_, trans_, centroid_, rmsd_) = kabsch_algorithm(P, Q)
res = np.allclose(rot, rot_, rtol=rtol, atol=atol) and np.allclose(trans, trans_, rtol=rtol, atol=atol)
results.append(res)
if all(results):
print("pass")
else:
print("failed")
transform(P, centroid=np.array([0.0, 0.0, 0.0]), rot=np.array([[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]]), trans=np.array([0.0, 0.0, 0.0]))
Rotate and translate input coordinates.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
P
|
ndarray
|
input coordinates. |
required |
centroid
|
ndarray
|
centroid. Defaults to np.array([0., 0., 0.]) |
array([0.0, 0.0, 0.0])
|
rot
|
ndarray
|
rotation matrix. Defaults to np.array([[1.,0.,0.],[0.,1.,0.],[0.,0.,1.]]). |
array([[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]])
|
trans
|
ndarray
|
translation vector. Defaults to np.array([0., 0., 0.]). |
array([0.0, 0.0, 0.0])
|
Returns:
| Type | Description |
|---|---|
ndarray
|
np.ndarray: transformed output coordinates. |
Source code in mdscribe/helper/coord.py
def transform(P:np.ndarray,
centroid:np.ndarray = np.array([0., 0., 0.]),
rot:np.ndarray = np.array([[1.,0.,0.],[0.,1.,0.],[0.,0.,1.]]),
trans:np.ndarray = np.array([0., 0., 0.])) -> np.ndarray:
"""Rotate and translate input coordinates.
Args:
P (np.ndarray): input coordinates.
centroid (np.ndarray, optional): centroid. Defaults to np.array([0., 0., 0.])
rot (np.ndarray, optional): rotation matrix. Defaults to np.array([[1.,0.,0.],[0.,1.,0.],[0.,0.,1.]]).
trans (np.ndarray, optional): translation vector. Defaults to np.array([0., 0., 0.]).
Returns:
np.ndarray: transformed output coordinates.
"""
Q = np.dot(P-centroid, rot.T) + centroid + trans
return Q
unit_vector(vector)
view_svg(rdmol, highlight=[], width=300, height=300)
SVG depiction for the Jupyter Notebook.
Reference
https://www.linkedin.com/pulse/using-rdkit-jupyter-notebooks-lee-davies/
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
rdmol
|
Mol
|
rdkit molecule. |
required |
highlight
|
list
|
highlighted atom indexes. Defaults to []. |
[]
|
width
|
int
|
width. Defaults to 300. |
300
|
height
|
int
|
height. Defaults to 300. |
300
|
Source code in mdscribe/helper/svg.py
def view_svg(rdmol:Chem.Mol, highlight=[], width=300, height=300):
"""SVG depiction for the Jupyter Notebook.
Reference:
https://www.linkedin.com/pulse/using-rdkit-jupyter-notebooks-lee-davies/
Args:
rdmol (Chem.Mol): rdkit molecule.
highlight (list, optional): highlighted atom indexes. Defaults to [].
width (int, optional): width. Defaults to 300.
height (int, optional): height. Defaults to 300.
"""
rdmol2d = Chem.Mol(rdmol)
AllChem.Compute2DCoords(rdmol2d)
drawer = rdMolDraw2D.MolDraw2DSVG(width, height)
drawer.DrawMolecule(rdmol2d, highlightAtoms=highlight)
drawer.FinishDrawing()
svg = drawer.GetDrawingText()
display(SVG(svg.replace("svg:","")))