Skip to content

Helper

mdscribe.helper

GlcNAC = {'HN2': 'H2N', 'C7': 'C2N', 'O7': 'O2N', 'C8': 'CME', 'H81': 'H1M', 'H82': 'H3M', 'H83': 'H2M', 'HO3': 'H3O', 'HO4': 'H4O', 'H61': 'H62', 'H62': 'H61', 'HO6': 'H6O'} module-attribute

nucleic_1_3 = {'A': 'ADE', 'G': 'GUA', 'C': 'CYT', 'U': 'URA', 'T': 'THY'} module-attribute

nucleic_3_1 = {'ADE': 'A', 'GUA': 'G', 'CYT': 'C', 'URA': 'U', 'THY': 'T'} module-attribute

protein_1_3 = {'G': 'GLY', 'A': 'ALA', 'V': 'VAL', 'L': 'LEU', 'I': 'ILE', 'M': 'MET', 'P': 'PRO', 'F': 'PHE', 'W': 'TRP', 'S': 'SER', 'T': 'THR', 'N': 'ASN', 'Q': 'GLN', 'Y': 'TYR', 'C': 'CYS', 'K': 'LYS', 'R': 'ARG', 'H': 'HIS', 'D': 'ASP', 'E': 'GLU'} module-attribute

protein_3_1 = {'ALA': 'A', 'ARG': 'R', 'ASN': 'N', 'ASP': 'D', 'CYS': 'C', 'GLU': 'E', 'GLN': 'Q', 'GLY': 'G', 'HIS': 'H', 'ILE': 'I', 'LEU': 'L', 'LYS': 'K', 'MET': 'M', 'PHE': 'F', 'PRO': 'P', 'SER': 'S', 'THR': 'T', 'TRP': 'W', 'TYR': 'Y', 'VAL': 'V'} module-attribute

MOL2file

Class for handling MOL2 file.

Source code in mdscribe/helper/mol2file.py
class MOL2file:
    """Class for handling MOL2 file."""

    # class bound variable
    name_syntax = re.compile(r'(?P<element>[a-zA-Z]+)(?P<serial>\d+)')

    def __init__(self, filename:str | pathlib.Path):
        self.n_atoms = 0
        self.blocks = {}
        self.coords = None # To be modified
        self.pdblines = [] # reserved for PDB conversion

        cursor = None
        with open(filename, 'r') as f:
            for i, line in enumerate(f):
                if "@<TRIPOS>" in line:
                    cursor = line.split("@<TRIPOS>")[1].strip().lower()
                    self.blocks[cursor] = []
                    continue
                elif line.startswith('#') or line == '\n':
                    continue
                self.blocks[cursor].append(line)

        try:
            self.n_atoms = len(self.blocks['atom'])
            assert self.n_atoms > 0
        except:
            print("MOL2 block has no atom")
            sys.exit(0)

        try:
            assert len(self.blocks['bond']) > 0
        except:
            print("MOL2 block has no bond")
            sys.exit(0)

        self.coords = np.zeros((self.n_atoms, 3), dtype=np.float32)

        altLoc = ' '
        chainId = ' '
        iCode = ' '
        occupancy = 1.0
        tempFactor = 0.0
        segId = ' '
        for i, a in enumerate(self.blocks['atom']):
            aid, name, x, y, z, atom_type, resid, resname, charge = a.split()[:9]
            x, y, z = float(x), float(y), float(z)
            self.coords[i, :] = x, y, z
            m = MOL2file.name_syntax.match(name)
            element = m.group("element")
            self.pdblines.append(
                f"HETATM {(i+1):>5} {name:<4}{altLoc}{resname:<3} {chainId}{resid:<4}{iCode}   {x:8.3f}{y:8.3f}{z:8.3f}{occupancy:6.2f}{tempFactor:6.2f}      {segId:<4}{element:<2}{charge:<2}"
            )

        for i, b in enumerate(self.blocks['bond']):
            bid, atom_1, atom_2, bond_type = b.split()[:4]
            _pdb  = f"CONECT {atom_1:>5} {atom_2:>5}"
            self.pdblines.append(_pdb)


    def write(self, filename:str | pathlib.Path) -> None:
        """Write to a file.

        Args:
            filename (str | pathlib.Path): output filename.
        """
        with open(filename, 'w') as f:
            i = 0
            for cursor in ['molecule', 'atom', 'bond', 'substructure']:
                if not cursor in self.blocks:
                    continue
                f.write(f"@<TRIPOS>{cursor.upper()}\n")
                i += 1
                if cursor == 'atom':
                    k = 0
                for line in self.blocks[cursor]:
                    if cursor != 'atom':
                        f.write(line)
                    else:    
                        aid, name, x, y, z, atom_type, resid, resname, charge = line.split()[:9]
                        # override x,y,z with self.coords
                        f.write("{0:>4} {1:>4} {2:>13.4f} {3:>9.4f} {4:>9.4f} {5:>4} {6} {7} {8:>7.4f}\n".format(
                            aid, 
                            name, 
                            self.coords[k, 0], 
                            self.coords[k, 1], 
                            self.coords[k, 2],
                            atom_type,
                            resid,
                            resname,
                            float(charge),
                        ))
                        k += 1


    def transform(self, R:np.ndarray, T:np.ndarray) -> None:
        """Apply rotation/translation to the coordinates.

        Args:
            R (np.ndarray): 3x3 rotation matrix
            T (np.ndarray): translation vector
        """
        com = np.mean(self.coords, axis=0)
        self.coords = np.dot(self.coords - com, R) + T

blocks = {} instance-attribute

coords = np.zeros((self.n_atoms, 3), dtype=np.float32) instance-attribute

n_atoms = len(self.blocks['atom']) instance-attribute

name_syntax = re.compile('(?P<element>[a-zA-Z]+)(?P<serial>\\d+)') class-attribute instance-attribute

pdblines = [] instance-attribute

__init__(filename)

Source code in mdscribe/helper/mol2file.py
def __init__(self, filename:str | pathlib.Path):
    self.n_atoms = 0
    self.blocks = {}
    self.coords = None # To be modified
    self.pdblines = [] # reserved for PDB conversion

    cursor = None
    with open(filename, 'r') as f:
        for i, line in enumerate(f):
            if "@<TRIPOS>" in line:
                cursor = line.split("@<TRIPOS>")[1].strip().lower()
                self.blocks[cursor] = []
                continue
            elif line.startswith('#') or line == '\n':
                continue
            self.blocks[cursor].append(line)

    try:
        self.n_atoms = len(self.blocks['atom'])
        assert self.n_atoms > 0
    except:
        print("MOL2 block has no atom")
        sys.exit(0)

    try:
        assert len(self.blocks['bond']) > 0
    except:
        print("MOL2 block has no bond")
        sys.exit(0)

    self.coords = np.zeros((self.n_atoms, 3), dtype=np.float32)

    altLoc = ' '
    chainId = ' '
    iCode = ' '
    occupancy = 1.0
    tempFactor = 0.0
    segId = ' '
    for i, a in enumerate(self.blocks['atom']):
        aid, name, x, y, z, atom_type, resid, resname, charge = a.split()[:9]
        x, y, z = float(x), float(y), float(z)
        self.coords[i, :] = x, y, z
        m = MOL2file.name_syntax.match(name)
        element = m.group("element")
        self.pdblines.append(
            f"HETATM {(i+1):>5} {name:<4}{altLoc}{resname:<3} {chainId}{resid:<4}{iCode}   {x:8.3f}{y:8.3f}{z:8.3f}{occupancy:6.2f}{tempFactor:6.2f}      {segId:<4}{element:<2}{charge:<2}"
        )

    for i, b in enumerate(self.blocks['bond']):
        bid, atom_1, atom_2, bond_type = b.split()[:4]
        _pdb  = f"CONECT {atom_1:>5} {atom_2:>5}"
        self.pdblines.append(_pdb)

transform(R, T)

Apply rotation/translation to the coordinates.

Parameters:

Name Type Description Default
R ndarray

3x3 rotation matrix

required
T ndarray

translation vector

required
Source code in mdscribe/helper/mol2file.py
def transform(self, R:np.ndarray, T:np.ndarray) -> None:
    """Apply rotation/translation to the coordinates.

    Args:
        R (np.ndarray): 3x3 rotation matrix
        T (np.ndarray): translation vector
    """
    com = np.mean(self.coords, axis=0)
    self.coords = np.dot(self.coords - com, R) + T

write(filename)

Write to a file.

Parameters:

Name Type Description Default
filename str | Path

output filename.

required
Source code in mdscribe/helper/mol2file.py
def write(self, filename:str | pathlib.Path) -> None:
    """Write to a file.

    Args:
        filename (str | pathlib.Path): output filename.
    """
    with open(filename, 'w') as f:
        i = 0
        for cursor in ['molecule', 'atom', 'bond', 'substructure']:
            if not cursor in self.blocks:
                continue
            f.write(f"@<TRIPOS>{cursor.upper()}\n")
            i += 1
            if cursor == 'atom':
                k = 0
            for line in self.blocks[cursor]:
                if cursor != 'atom':
                    f.write(line)
                else:    
                    aid, name, x, y, z, atom_type, resid, resname, charge = line.split()[:9]
                    # override x,y,z with self.coords
                    f.write("{0:>4} {1:>4} {2:>13.4f} {3:>9.4f} {4:>9.4f} {5:>4} {6} {7} {8:>7.4f}\n".format(
                        aid, 
                        name, 
                        self.coords[k, 0], 
                        self.coords[k, 1], 
                        self.coords[k, 2],
                        atom_type,
                        resid,
                        resname,
                        float(charge),
                    ))
                    k += 1

PDBfile

Source code in mdscribe/helper/pdbfile.py
class PDBfile:
    def __init__(self, pdbfile:str | Path) -> None:
        self.parser = PDBParser()
        self.st = self.parser.get_structure("USER_PDB", pdbfile)
        # atomic coordinates
        coords = []
        for model in self.st:
            for chain in model:
                for residue in chain:
                    for atom in residue:
                        coords.append(atom.get_coord())
        self.coords = np.array(coords)
        self.shape = self.coords.shape


    def center_of_geometry(self) -> np.ndarray:
        return np.mean(self.coords, axis=0)


    def update_coord(self, coords:np.ndarray) -> None:
        assert self.coords.shape == coords.shape, "Matrix dimensions must match"
        # update atomic coordinates
        atom_idx = 0
        for model in self.st:
            for chain in model:
                for residue in chain:
                    for atom in residue:
                        atom.coord = coords[atom_idx,:]
                        atom_idx += 1


    def transform(self,
                  centroid: np.ndarray = np.array([0., 0., 0.]),
                  rot:np.ndarray = np.array([[1.,0.,0.],[0.,1.,0.],[0.,0.,1.]]),
                  trans:np.ndarray = np.array([0., 0., 0.])) -> None:
        try:
            assert rot.shape == (3,3)
        except:
            raise ValueError('shape of rotation matrix should be (3,3)')
        try:
            assert trans.shape == (3,)
        except:
            raise ValueError('shape of translation matrix should be (3,)')

        # rotation
        self.coords = np.dot(self.coords - centroid, rot.T) + centroid

        # translation
        self.coords = self.coords + trans

        # update atomic coordinates
        atom_idx = 0
        for model in self.st:
            for chain in model:
                for residue in chain:
                    for atom in residue:
                        atom.coord = self.coords[atom_idx,:]
                        atom_idx += 1


    def get_seqeunce(self) -> None:
        wrap= 50
        for model in self.st:
            prev_residue_number = None
            header = "Model "+ str(model.id)
            sequence = ""
            for chain in model:
                print("chain=", chain)
                for residue in chain:
                    resname = residue.get_resname()
                    if not resname in aminoacids:
                        continue
                    hetflag,resseq,iCode = residue.get_id()
                    resSeq = int(resseq)
                    if prev_residue_number is None:
                        print("(first residue) %4d" % resSeq)
                    else:
                        num_missing_residues = resSeq-prev_residue_number-1
                        if num_missing_residues > 0:
                            sequence += "-" * num_missing_residues
                            print("(missing residues) %4d - %4d (%d)" %
                                (prev_residue_number+1,resSeq-1,num_missing_residues))
                    sequence += aminoacids[resname]
                    prev_residue_number = resSeq
                print("(last residue) %4d" % resSeq)
                # output
                print(header)
                l= len(sequence)//wrap
                for i in range(0,l):
                    start = wrap*i
                    end = max(wrap*(i+1),l)
                    print(sequence[start:end])


    @staticmethod
    def readlines(pdbfile:str):
        """(Not Used) Read PDB Lines.

        Args:
            pdbfile (str): filename.
        """
        pdblines = {1: []}
        with open(pdbfile, "r") as f:
            for line in f:
                if line.startswith("MODEL"):
                    model_number = int(line.strip().split()[1])
                    pdblines[model_number] = []

                if line.startswith("ATOM") or line.startswith("HETATM") or line.startswith("TER"):
                    # PDB format version 2.3
                    serial = line[6:12]
                    name = line[12:16].strip()
                    altLoc = line[16:17]
                    if altLoc=="":
                        altLoc=" "
                    resName = line[17:21].strip()
                    chainId = line[21:22]
                    if chainId=="":
                        chainId=" "
                    resSeq = int(line[22:26])
                    iCode = line[26:27]
                    if iCode=="":
                        iCode=" "
                    x = float(line[30:38])
                    y = float(line[38:46])
                    z = float(line[46:54])
                    occupancy = line[54:60]
                    tempFactor = float(line[60:66])
                    segId = line[72:76]
                    element = line[76:78]
                    charge = line[78:80]


    def write(self, filename: str | Path, 
              model:Optional[List[int]] = None,
              chain:Optional[List[str]] = None, 
              resname:Optional[List[str]] = None,
              resnumber:Optional[str] = None,
              ) -> None:
        """Write to PDB with selections.

        Examples:
            >>> pdb.write(model=[0], chain=['A'])
            write chain A and residues first-10,22-50,130-200,550,600-last
            >>> pdb.write(chain=['A'], resnumber="-10,22-50,130-200,550,600-")

        Args:
            filename (str | Path): output filename or path
            model (Optional[List[int]], optional): list of model numbers. Defaults to None.
            chain (Optional[List[str]], optional): list of chain ids. Defaults to None.
            resname (Optional[List[str]], optional): list of residue names. Defaults to None.
            resnumber (Optional[str], optional): residue number ranges. Defaults to None.
        """

        io = PDBIO()

        if (model is None) and (chain is None) and (resname is None) and (resnumber is None):
            # write structure as it as
            io.set_structure(self.st)
        else: 
            # write only select model(s), chain(s) or residue(s)
            # select residues by numbers
            if resnumber is not None:
                select_resseqs = [tuple(map(lambda s: int(s) if s else -1, r.split("-"))) for r in resnumber.split(",")]
                # for resnumber="-10,22-50,130-200,550,600-"
                # [(-1, 10), (22, 50), (130, 200), (550,), (600, -1)]
            builder = StructureBuilder()
            builder.init_structure(self.st.id)
            for _model in self.st:
                if (model is not None) and (_model.id not in model):
                    continue # reject model
                builder.init_model(_model.id, serial_num=None)
                for _chain in _model:
                    if (chain is not None) and (_chain.id not in chain):
                        continue # reject chain
                    builder.init_chain(_chain.id)
                    for _residue in _chain:
                        hetflag, resseq, iCode = _residue.get_id()
                        if (resname is not None) and (_residue.resname not in resname):
                            continue # reject residue by name
                        # select residue numbers
                        include_flags = []
                        for lu in select_resseqs:
                            if len(lu) == 1:
                                if lu == resseq:
                                    include_flags.append(True)
                                else:
                                    include_flags.append(False)
                            else:
                                (l,u) = lu
                                if (l == -1 or l <= resseq) and (u ==-1 or u >= resseq):
                                    include_flags.append(True)
                                else:
                                    include_flags.append(False)
                        if not any(include_flags):
                            continue # reject residue by number
                        builder.init_residue(_residue.resname, hetflag, resseq, iCode)
                        for atom in _residue:
                            builder.init_atom(atom.name, 
                                            atom.coord, 
                                            atom.bfactor, 
                                            atom.occupancy,
                                            atom.altloc,
                                            atom.fullname,
                                            None, # serial_number
                                            atom.element,
                                            )
            io.set_structure(builder.get_structure())
        with open(filename, "w") as f:
            io.save(f)



    def reorder(self) -> None:
        builder = StructureBuilder()
        builder.init_structure(self.st.id)
        for model in self.st:
            builder.init_model(model.id, serial_num=None)
            for chain in sorted(model, key=lambda x:x.id):
                builder.init_chain(chain.id)
                for residue in sorted(chain, key=lambda x: x.get_id()[1]):
                    hetflag, resseq, iCode = residue.get_id()
                    builder.init_residue(residue.resname, hetflag, resseq, iCode)
                    for atom in residue:
                        builder.init_atom(atom.name, 
                                        atom.coord, 
                                        atom.bfactor, 
                                        atom.occupancy,
                                        atom.altloc,
                                        atom.fullname,
                                        None, # serial_number
                                        atom.element,
                                        )
        self.st = builder.get_structure()



    def rename(self, 
               chain:Optional[dict] = None, 
               resname:Optional[dict] = None, 
               atom:Optional[dict] = None) -> None:
        """Rename PDB chains/residues/atoms.

        Examples:
            Rename chain 'C' to 'A' and 'D' to 'B'.
            >>> rename(chain={'C':'A', 'D':'B'})

            Rename residue 'UNL' to 'LIG' for all chains.
            >>> rename(resname={'UNL' : 'LIG'})

            Rename residue 'UNL' to 'LIG' only for chain B.
            >>> rename(chain={"B":"B"}, resname={'UNL' : 'LIG'})

            Rename atom '2H2' to 'H22' for all residues and chains.
            >>> rename(atom={"2H2": "H22"})

            Rename atoms 'H1' and 'H2' to 'H11' and 'H12' for chain C and residue UNL.
            >>> rename(chain={"C:C"}, resname={"UNL":"UNL"}, atom={"H1" : "H11", "H2": "H12"})

        Args:
            chain (Optional[dict], optional): map of chain ids {old:new}. Defaults to None.
            resname (Optional[dict], optional): map of residue names {old:new}. Defaults to None.
            atom (Optional[dict], optional): map of atom names {old:new}. Defaults to None.
        """
        for model in self.st:
            for _chain in model:
                if chain is None:
                    subject_chains = [c.id for c in model.get_chains()]
                else:
                    subject_chains = [k for k in chain]

                if _chain.id not in subject_chains:
                    continue

                if chain:
                    old_chain_id = _chain.id
                    new_chain_id = chain[old_chain_id]
                    _chain.id = new_chain_id
                    if old_chain_id != new_chain_id:
                        print(f"renamed chain id : ", end=" ")
                        print(f"{old_chain_id}", end=" -> ")
                        print(f"{new_chain_id}")

                for _residue in _chain:
                    if resname is None:
                        subject_resnames = [r.resname for r in _chain.get_residues()]
                    else:
                        subject_resnames = [k for k in resname]

                    if _residue.resname not in subject_resnames:
                        continue

                    hetflag, resseq, iCode = _residue.get_id()

                    if resname:
                        old_resname = _residue.resname
                        new_resname = resname[old_resname]
                        _residue.resname = new_resname
                        if old_resname != new_resname:
                            print(f"renamed residue {_chain.id}", end=" : ")
                            print(f"{old_resname}({resseq})", end=" ->")
                            print(f"{new_resname}({resseq})")
                    else:
                        old_resname = None
                        new_resname = None

                    for _atom in _residue:
                        if atom is None:
                            subject_atoms = [a.name for a in _residue.get_atoms()]
                        else:
                            subject_atoms = [k for k in atom]

                        if _atom.name not in subject_atoms:
                            continue

                        if atom:
                            old_atom_name = _atom.name
                            new_atom_name = atom[old_atom_name]
                            _atom.name = new_atom_name
                            if old_atom_name != new_atom_name:
                                print(f"renamed atom {_chain.id}.{_residue.resname}({resseq})", end=" : ")
                                print(f"{old_atom_name}", end=" -> ")
                                print(f"{new_atom_name}")


    def reorient(self, 
                 residue_selection:str="", 
                 invert_Z:bool=False, 
                 offset:np.ndarray=np.array([0.,0.,0.])) -> None:
        """Reorient coordinates according to the principal axis.

        Examples:
            Chain A residues 12-50 and chain B residues 100-200 will be used for principal axis calculation.
            >>> pdb.reorient("A:12-50,B:100-200") 

        Args:
            residue_selection (str, optional): residues for principal axis calculation. Defaults to "".
            invert_Z (bool, optional): whether to invert Z axis. Defaults to False.
            offset (np.ndarray, optional): translate coordinates. Defaults to np.array([0.,0.,0.]).
        """
        coords = []
        if residue_selection:
            subset_residues = {}
            # subset of atoms for Eigenvector/Eigenvalue calculations
            for chain in residue_selection.split(","):
                (chainId, resSeq_range) = chain.split(":")
                resSeq_range_tuple = tuple(sorted(map(int, resSeq_range.split("-"))))
                if chainId in subset_residues:
                    subset_residues[chainId].append(resSeq_range_tuple)
                else:
                    subset_residues[chainId] = [resSeq_range_tuple]
            for model in self.st:
                for chain in model:
                    if chain.id not in subset_residues:
                        continue
                    for residue in chain:
                        hetflag, resseq, iCode = residue.get_id()
                        flag = []
                        for (lb, ub) in subset_residues[chain.id]:
                            if resseq >= lb and resseq <= ub:
                                flag.append(True)
                            else:
                                flag.append(False)
                        if any(flag):
                            coords.extend([atom.coord for atom in residue])
        else:
            for model in self.st:
                for chain in model:
                    for residue in chain:
                        coords.extend([atom.coord for atom in residue])

        coords = np.array(coords, float)
        centroid = np.mean(coords, axis=0)
        box_min = np.min(coords, axis=0)
        box_max = np.max(coords, axis=0)
        print("REMARK geometric center:", centroid)
        print("REMARK coordinate range:", box_min, box_max)
        print("REMARK box size:", box_max-box_min)

        coords = coords - centroid

        # import numpy as np
        # data = np.array([[1, 2], [3, 4], [5, 6]])  # Example data
        # cov_matrix = np.cov(data, rowvar=False)
        # eigenvalues, eigenvectors = np.linalg.eig(cov_matrix)
        # rotation_matrix = eigenvectors
        # rotated_data = np.dot(data, rotation_matrix.T)

        inertia = np.dot(coords.transpose(), coords)
        w,v = np.linalg.eig(inertia)

        # warning eigen values are not necessary ordered!
        # http://docs.scipy.org/doc/numpy/reference/generated/numpy.linalg.eig.html
        #--------------------------------------------------------------------------
        # order eigen values (and eigen vectors)
        #
        # axis1 is the principal axis with the biggest eigen value (eval1)
        # axis2 is the principal axis with the second biggest eigen value (eval2)
        # axis3 is the principal axis with the smallest eigen value (eval3)
        #--------------------------------------------------------------------------
        # axis1 --> Z
        # axis2 --> Y
        # axis3 --> X
        order = np.argsort(w)
        eval3, eval2, eval1 = w[order]
        axis3, axis2, axis1 = v[:, order]

        print("REMARK x-axis",axis3,"eval=",eval3)
        print("REMARK y-axis",axis2,"eval=",eval2)
        print("REMARK z-axis",axis1,"eval=",eval1)

        R = np.array([axis3, axis2, axis1]) # decreasing order
        # R_inv = np.linalg.inv(R)

        self.transform(rot=R, trans=-centroid)
        # xyz = np.array([x,y,z])
        # xyz = xyz - center
        # xyz = np.dot(R_inv, xyz)
        # x, y, z = xyz
        # if args.inverse_z_axis:
        #     z = -z
        # x += args.offset_x
        # y += args.offset_y
        # z += args.offset_z


    @staticmethod
    def expand_residues(txt):
        residues = []
        step1 = txt.split(",")
        for item in step1:
            step2 = item.split("-")
            if len(step2) == 2:
                residues += range(int(step2[0]), int(step2[1])+1)
            elif len(step2) == 1:
                residues += [ int(step2[0]) ]
        return sorted(residues)


    def contacts(self, 
                       residue_selection_1:str, 
                       residue_selection_2:str,
                       cutoff:float=4.0) -> None:
        residues_1 = PDBfile.expand_residues(residue_selection_1)
        residues_2 = PDBfile.expand_residues(residue_selection_2)

        source_atoms = []
        target_atoms = []
        for model in self.st:
            for chain in model:
                for residue in chain:
                    hetflag, resSeq, iCode = residue.get_id()
                    resName = residue.get_resname()
                    if resSeq in residues_1:
                        source_atoms += [ atom for atom in residue ]
                    elif resSeq in residues_2:
                        target_atoms += [ atom for atom in residue ]

        close_contacts = []
        for ai in source_atoms:
            for aj in target_atoms:
                d = ai-aj
                if d < cutoff:
                    close_contacts.append((ai, aj, d))
        close_contacts = sorted(close_contacts, key=lambda x: (x[0], x[1], x[2]))
        for (ai, aj, d) in close_contacts:
            i_hetflag, i_resSeq, i_iCode = ai.get_parent().get_id()
            j_hetflag, j_resSeq, j_iCode = aj.get_parent().get_id()
            print("%4d %4s %4d %4s %3.1f" % (i_resSeq, ai.id, j_resSeq, aj.id, d))

coords = np.array(coords) instance-attribute

parser = PDBParser() instance-attribute

shape = self.coords.shape instance-attribute

st = self.parser.get_structure('USER_PDB', pdbfile) instance-attribute

__init__(pdbfile)

Source code in mdscribe/helper/pdbfile.py
def __init__(self, pdbfile:str | Path) -> None:
    self.parser = PDBParser()
    self.st = self.parser.get_structure("USER_PDB", pdbfile)
    # atomic coordinates
    coords = []
    for model in self.st:
        for chain in model:
            for residue in chain:
                for atom in residue:
                    coords.append(atom.get_coord())
    self.coords = np.array(coords)
    self.shape = self.coords.shape

center_of_geometry()

Source code in mdscribe/helper/pdbfile.py
def center_of_geometry(self) -> np.ndarray:
    return np.mean(self.coords, axis=0)

contacts(residue_selection_1, residue_selection_2, cutoff=4.0)

Source code in mdscribe/helper/pdbfile.py
def contacts(self, 
                   residue_selection_1:str, 
                   residue_selection_2:str,
                   cutoff:float=4.0) -> None:
    residues_1 = PDBfile.expand_residues(residue_selection_1)
    residues_2 = PDBfile.expand_residues(residue_selection_2)

    source_atoms = []
    target_atoms = []
    for model in self.st:
        for chain in model:
            for residue in chain:
                hetflag, resSeq, iCode = residue.get_id()
                resName = residue.get_resname()
                if resSeq in residues_1:
                    source_atoms += [ atom for atom in residue ]
                elif resSeq in residues_2:
                    target_atoms += [ atom for atom in residue ]

    close_contacts = []
    for ai in source_atoms:
        for aj in target_atoms:
            d = ai-aj
            if d < cutoff:
                close_contacts.append((ai, aj, d))
    close_contacts = sorted(close_contacts, key=lambda x: (x[0], x[1], x[2]))
    for (ai, aj, d) in close_contacts:
        i_hetflag, i_resSeq, i_iCode = ai.get_parent().get_id()
        j_hetflag, j_resSeq, j_iCode = aj.get_parent().get_id()
        print("%4d %4s %4d %4s %3.1f" % (i_resSeq, ai.id, j_resSeq, aj.id, d))

expand_residues(txt) staticmethod

Source code in mdscribe/helper/pdbfile.py
@staticmethod
def expand_residues(txt):
    residues = []
    step1 = txt.split(",")
    for item in step1:
        step2 = item.split("-")
        if len(step2) == 2:
            residues += range(int(step2[0]), int(step2[1])+1)
        elif len(step2) == 1:
            residues += [ int(step2[0]) ]
    return sorted(residues)

get_seqeunce()

Source code in mdscribe/helper/pdbfile.py
def get_seqeunce(self) -> None:
    wrap= 50
    for model in self.st:
        prev_residue_number = None
        header = "Model "+ str(model.id)
        sequence = ""
        for chain in model:
            print("chain=", chain)
            for residue in chain:
                resname = residue.get_resname()
                if not resname in aminoacids:
                    continue
                hetflag,resseq,iCode = residue.get_id()
                resSeq = int(resseq)
                if prev_residue_number is None:
                    print("(first residue) %4d" % resSeq)
                else:
                    num_missing_residues = resSeq-prev_residue_number-1
                    if num_missing_residues > 0:
                        sequence += "-" * num_missing_residues
                        print("(missing residues) %4d - %4d (%d)" %
                            (prev_residue_number+1,resSeq-1,num_missing_residues))
                sequence += aminoacids[resname]
                prev_residue_number = resSeq
            print("(last residue) %4d" % resSeq)
            # output
            print(header)
            l= len(sequence)//wrap
            for i in range(0,l):
                start = wrap*i
                end = max(wrap*(i+1),l)
                print(sequence[start:end])

readlines(pdbfile) staticmethod

(Not Used) Read PDB Lines.

Parameters:

Name Type Description Default
pdbfile str

filename.

required
Source code in mdscribe/helper/pdbfile.py
@staticmethod
def readlines(pdbfile:str):
    """(Not Used) Read PDB Lines.

    Args:
        pdbfile (str): filename.
    """
    pdblines = {1: []}
    with open(pdbfile, "r") as f:
        for line in f:
            if line.startswith("MODEL"):
                model_number = int(line.strip().split()[1])
                pdblines[model_number] = []

            if line.startswith("ATOM") or line.startswith("HETATM") or line.startswith("TER"):
                # PDB format version 2.3
                serial = line[6:12]
                name = line[12:16].strip()
                altLoc = line[16:17]
                if altLoc=="":
                    altLoc=" "
                resName = line[17:21].strip()
                chainId = line[21:22]
                if chainId=="":
                    chainId=" "
                resSeq = int(line[22:26])
                iCode = line[26:27]
                if iCode=="":
                    iCode=" "
                x = float(line[30:38])
                y = float(line[38:46])
                z = float(line[46:54])
                occupancy = line[54:60]
                tempFactor = float(line[60:66])
                segId = line[72:76]
                element = line[76:78]
                charge = line[78:80]

rename(chain=None, resname=None, atom=None)

Rename PDB chains/residues/atoms.

Examples:

Rename chain 'C' to 'A' and 'D' to 'B'.

>>> rename(chain={'C':'A', 'D':'B'})

Rename residue 'UNL' to 'LIG' for all chains.

>>> rename(resname={'UNL' : 'LIG'})

Rename residue 'UNL' to 'LIG' only for chain B.

>>> rename(chain={"B":"B"}, resname={'UNL' : 'LIG'})

Rename atom '2H2' to 'H22' for all residues and chains.

>>> rename(atom={"2H2": "H22"})

Rename atoms 'H1' and 'H2' to 'H11' and 'H12' for chain C and residue UNL.

>>> rename(chain={"C:C"}, resname={"UNL":"UNL"}, atom={"H1" : "H11", "H2": "H12"})

Parameters:

Name Type Description Default
chain Optional[dict]

map of chain ids {old:new}. Defaults to None.

None
resname Optional[dict]

map of residue names {old:new}. Defaults to None.

None
atom Optional[dict]

map of atom names {old:new}. Defaults to None.

None
Source code in mdscribe/helper/pdbfile.py
def rename(self, 
           chain:Optional[dict] = None, 
           resname:Optional[dict] = None, 
           atom:Optional[dict] = None) -> None:
    """Rename PDB chains/residues/atoms.

    Examples:
        Rename chain 'C' to 'A' and 'D' to 'B'.
        >>> rename(chain={'C':'A', 'D':'B'})

        Rename residue 'UNL' to 'LIG' for all chains.
        >>> rename(resname={'UNL' : 'LIG'})

        Rename residue 'UNL' to 'LIG' only for chain B.
        >>> rename(chain={"B":"B"}, resname={'UNL' : 'LIG'})

        Rename atom '2H2' to 'H22' for all residues and chains.
        >>> rename(atom={"2H2": "H22"})

        Rename atoms 'H1' and 'H2' to 'H11' and 'H12' for chain C and residue UNL.
        >>> rename(chain={"C:C"}, resname={"UNL":"UNL"}, atom={"H1" : "H11", "H2": "H12"})

    Args:
        chain (Optional[dict], optional): map of chain ids {old:new}. Defaults to None.
        resname (Optional[dict], optional): map of residue names {old:new}. Defaults to None.
        atom (Optional[dict], optional): map of atom names {old:new}. Defaults to None.
    """
    for model in self.st:
        for _chain in model:
            if chain is None:
                subject_chains = [c.id for c in model.get_chains()]
            else:
                subject_chains = [k for k in chain]

            if _chain.id not in subject_chains:
                continue

            if chain:
                old_chain_id = _chain.id
                new_chain_id = chain[old_chain_id]
                _chain.id = new_chain_id
                if old_chain_id != new_chain_id:
                    print(f"renamed chain id : ", end=" ")
                    print(f"{old_chain_id}", end=" -> ")
                    print(f"{new_chain_id}")

            for _residue in _chain:
                if resname is None:
                    subject_resnames = [r.resname for r in _chain.get_residues()]
                else:
                    subject_resnames = [k for k in resname]

                if _residue.resname not in subject_resnames:
                    continue

                hetflag, resseq, iCode = _residue.get_id()

                if resname:
                    old_resname = _residue.resname
                    new_resname = resname[old_resname]
                    _residue.resname = new_resname
                    if old_resname != new_resname:
                        print(f"renamed residue {_chain.id}", end=" : ")
                        print(f"{old_resname}({resseq})", end=" ->")
                        print(f"{new_resname}({resseq})")
                else:
                    old_resname = None
                    new_resname = None

                for _atom in _residue:
                    if atom is None:
                        subject_atoms = [a.name for a in _residue.get_atoms()]
                    else:
                        subject_atoms = [k for k in atom]

                    if _atom.name not in subject_atoms:
                        continue

                    if atom:
                        old_atom_name = _atom.name
                        new_atom_name = atom[old_atom_name]
                        _atom.name = new_atom_name
                        if old_atom_name != new_atom_name:
                            print(f"renamed atom {_chain.id}.{_residue.resname}({resseq})", end=" : ")
                            print(f"{old_atom_name}", end=" -> ")
                            print(f"{new_atom_name}")

reorder()

Source code in mdscribe/helper/pdbfile.py
def reorder(self) -> None:
    builder = StructureBuilder()
    builder.init_structure(self.st.id)
    for model in self.st:
        builder.init_model(model.id, serial_num=None)
        for chain in sorted(model, key=lambda x:x.id):
            builder.init_chain(chain.id)
            for residue in sorted(chain, key=lambda x: x.get_id()[1]):
                hetflag, resseq, iCode = residue.get_id()
                builder.init_residue(residue.resname, hetflag, resseq, iCode)
                for atom in residue:
                    builder.init_atom(atom.name, 
                                    atom.coord, 
                                    atom.bfactor, 
                                    atom.occupancy,
                                    atom.altloc,
                                    atom.fullname,
                                    None, # serial_number
                                    atom.element,
                                    )
    self.st = builder.get_structure()

reorient(residue_selection='', invert_Z=False, offset=np.array([0.0, 0.0, 0.0]))

Reorient coordinates according to the principal axis.

Examples:

Chain A residues 12-50 and chain B residues 100-200 will be used for principal axis calculation.

>>> pdb.reorient("A:12-50,B:100-200") 

Parameters:

Name Type Description Default
residue_selection str

residues for principal axis calculation. Defaults to "".

''
invert_Z bool

whether to invert Z axis. Defaults to False.

False
offset ndarray

translate coordinates. Defaults to np.array([0.,0.,0.]).

array([0.0, 0.0, 0.0])
Source code in mdscribe/helper/pdbfile.py
def reorient(self, 
             residue_selection:str="", 
             invert_Z:bool=False, 
             offset:np.ndarray=np.array([0.,0.,0.])) -> None:
    """Reorient coordinates according to the principal axis.

    Examples:
        Chain A residues 12-50 and chain B residues 100-200 will be used for principal axis calculation.
        >>> pdb.reorient("A:12-50,B:100-200") 

    Args:
        residue_selection (str, optional): residues for principal axis calculation. Defaults to "".
        invert_Z (bool, optional): whether to invert Z axis. Defaults to False.
        offset (np.ndarray, optional): translate coordinates. Defaults to np.array([0.,0.,0.]).
    """
    coords = []
    if residue_selection:
        subset_residues = {}
        # subset of atoms for Eigenvector/Eigenvalue calculations
        for chain in residue_selection.split(","):
            (chainId, resSeq_range) = chain.split(":")
            resSeq_range_tuple = tuple(sorted(map(int, resSeq_range.split("-"))))
            if chainId in subset_residues:
                subset_residues[chainId].append(resSeq_range_tuple)
            else:
                subset_residues[chainId] = [resSeq_range_tuple]
        for model in self.st:
            for chain in model:
                if chain.id not in subset_residues:
                    continue
                for residue in chain:
                    hetflag, resseq, iCode = residue.get_id()
                    flag = []
                    for (lb, ub) in subset_residues[chain.id]:
                        if resseq >= lb and resseq <= ub:
                            flag.append(True)
                        else:
                            flag.append(False)
                    if any(flag):
                        coords.extend([atom.coord for atom in residue])
    else:
        for model in self.st:
            for chain in model:
                for residue in chain:
                    coords.extend([atom.coord for atom in residue])

    coords = np.array(coords, float)
    centroid = np.mean(coords, axis=0)
    box_min = np.min(coords, axis=0)
    box_max = np.max(coords, axis=0)
    print("REMARK geometric center:", centroid)
    print("REMARK coordinate range:", box_min, box_max)
    print("REMARK box size:", box_max-box_min)

    coords = coords - centroid

    # import numpy as np
    # data = np.array([[1, 2], [3, 4], [5, 6]])  # Example data
    # cov_matrix = np.cov(data, rowvar=False)
    # eigenvalues, eigenvectors = np.linalg.eig(cov_matrix)
    # rotation_matrix = eigenvectors
    # rotated_data = np.dot(data, rotation_matrix.T)

    inertia = np.dot(coords.transpose(), coords)
    w,v = np.linalg.eig(inertia)

    # warning eigen values are not necessary ordered!
    # http://docs.scipy.org/doc/numpy/reference/generated/numpy.linalg.eig.html
    #--------------------------------------------------------------------------
    # order eigen values (and eigen vectors)
    #
    # axis1 is the principal axis with the biggest eigen value (eval1)
    # axis2 is the principal axis with the second biggest eigen value (eval2)
    # axis3 is the principal axis with the smallest eigen value (eval3)
    #--------------------------------------------------------------------------
    # axis1 --> Z
    # axis2 --> Y
    # axis3 --> X
    order = np.argsort(w)
    eval3, eval2, eval1 = w[order]
    axis3, axis2, axis1 = v[:, order]

    print("REMARK x-axis",axis3,"eval=",eval3)
    print("REMARK y-axis",axis2,"eval=",eval2)
    print("REMARK z-axis",axis1,"eval=",eval1)

    R = np.array([axis3, axis2, axis1]) # decreasing order
    # R_inv = np.linalg.inv(R)

    self.transform(rot=R, trans=-centroid)

transform(centroid=np.array([0.0, 0.0, 0.0]), rot=np.array([[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]]), trans=np.array([0.0, 0.0, 0.0]))

Source code in mdscribe/helper/pdbfile.py
def transform(self,
              centroid: np.ndarray = np.array([0., 0., 0.]),
              rot:np.ndarray = np.array([[1.,0.,0.],[0.,1.,0.],[0.,0.,1.]]),
              trans:np.ndarray = np.array([0., 0., 0.])) -> None:
    try:
        assert rot.shape == (3,3)
    except:
        raise ValueError('shape of rotation matrix should be (3,3)')
    try:
        assert trans.shape == (3,)
    except:
        raise ValueError('shape of translation matrix should be (3,)')

    # rotation
    self.coords = np.dot(self.coords - centroid, rot.T) + centroid

    # translation
    self.coords = self.coords + trans

    # update atomic coordinates
    atom_idx = 0
    for model in self.st:
        for chain in model:
            for residue in chain:
                for atom in residue:
                    atom.coord = self.coords[atom_idx,:]
                    atom_idx += 1

update_coord(coords)

Source code in mdscribe/helper/pdbfile.py
def update_coord(self, coords:np.ndarray) -> None:
    assert self.coords.shape == coords.shape, "Matrix dimensions must match"
    # update atomic coordinates
    atom_idx = 0
    for model in self.st:
        for chain in model:
            for residue in chain:
                for atom in residue:
                    atom.coord = coords[atom_idx,:]
                    atom_idx += 1

write(filename, model=None, chain=None, resname=None, resnumber=None)

Write to PDB with selections.

Examples:

>>> pdb.write(model=[0], chain=['A'])
write chain A and residues first-10,22-50,130-200,550,600-last
>>> pdb.write(chain=['A'], resnumber="-10,22-50,130-200,550,600-")

Parameters:

Name Type Description Default
filename str | Path

output filename or path

required
model Optional[List[int]]

list of model numbers. Defaults to None.

None
chain Optional[List[str]]

list of chain ids. Defaults to None.

None
resname Optional[List[str]]

list of residue names. Defaults to None.

None
resnumber Optional[str]

residue number ranges. Defaults to None.

None
Source code in mdscribe/helper/pdbfile.py
def write(self, filename: str | Path, 
          model:Optional[List[int]] = None,
          chain:Optional[List[str]] = None, 
          resname:Optional[List[str]] = None,
          resnumber:Optional[str] = None,
          ) -> None:
    """Write to PDB with selections.

    Examples:
        >>> pdb.write(model=[0], chain=['A'])
        write chain A and residues first-10,22-50,130-200,550,600-last
        >>> pdb.write(chain=['A'], resnumber="-10,22-50,130-200,550,600-")

    Args:
        filename (str | Path): output filename or path
        model (Optional[List[int]], optional): list of model numbers. Defaults to None.
        chain (Optional[List[str]], optional): list of chain ids. Defaults to None.
        resname (Optional[List[str]], optional): list of residue names. Defaults to None.
        resnumber (Optional[str], optional): residue number ranges. Defaults to None.
    """

    io = PDBIO()

    if (model is None) and (chain is None) and (resname is None) and (resnumber is None):
        # write structure as it as
        io.set_structure(self.st)
    else: 
        # write only select model(s), chain(s) or residue(s)
        # select residues by numbers
        if resnumber is not None:
            select_resseqs = [tuple(map(lambda s: int(s) if s else -1, r.split("-"))) for r in resnumber.split(",")]
            # for resnumber="-10,22-50,130-200,550,600-"
            # [(-1, 10), (22, 50), (130, 200), (550,), (600, -1)]
        builder = StructureBuilder()
        builder.init_structure(self.st.id)
        for _model in self.st:
            if (model is not None) and (_model.id not in model):
                continue # reject model
            builder.init_model(_model.id, serial_num=None)
            for _chain in _model:
                if (chain is not None) and (_chain.id not in chain):
                    continue # reject chain
                builder.init_chain(_chain.id)
                for _residue in _chain:
                    hetflag, resseq, iCode = _residue.get_id()
                    if (resname is not None) and (_residue.resname not in resname):
                        continue # reject residue by name
                    # select residue numbers
                    include_flags = []
                    for lu in select_resseqs:
                        if len(lu) == 1:
                            if lu == resseq:
                                include_flags.append(True)
                            else:
                                include_flags.append(False)
                        else:
                            (l,u) = lu
                            if (l == -1 or l <= resseq) and (u ==-1 or u >= resseq):
                                include_flags.append(True)
                            else:
                                include_flags.append(False)
                    if not any(include_flags):
                        continue # reject residue by number
                    builder.init_residue(_residue.resname, hetflag, resseq, iCode)
                    for atom in _residue:
                        builder.init_atom(atom.name, 
                                        atom.coord, 
                                        atom.bfactor, 
                                        atom.occupancy,
                                        atom.altloc,
                                        atom.fullname,
                                        None, # serial_number
                                        atom.element,
                                        )
        io.set_structure(builder.get_structure())
    with open(filename, "w") as f:
        io.save(f)

angle_between(v1, v2)

Returns the angle in radians between vectors 'v1' and 'v2'::

angle_between((1, 0, 0), (0, 1, 0)) 1.5707963267948966 angle_between((1, 0, 0), (1, 0, 0)) 0.0 angle_between((1, 0, 0), (-1, 0, 0)) 3.141592653589793

Source code in mdscribe/helper/coord.py
def angle_between(v1, v2):
    """ Returns the angle in radians between vectors 'v1' and 'v2'::

            >>> angle_between((1, 0, 0), (0, 1, 0))
            1.5707963267948966
            >>> angle_between((1, 0, 0), (1, 0, 0))
            0.0
            >>> angle_between((1, 0, 0), (-1, 0, 0))
            3.141592653589793
    """
    v1_u = unit_vector(v1)
    v2_u = unit_vector(v2)
    rad = np.arccos(np.clip(np.dot(v1_u, v2_u), -1.0, 1.0))
    deg = rad*180./np.pi
    if deg > 90.0 :
        deg = 180.0 - deg
    return deg

avoid_clash(fixed, movable, max_move=25.0, clash_cutoff=5.0, max_trials=1000)

Find a translation vector for movable coordinates to avoid steric clashes.

Parameters:

Name Type Description Default
fixed ndarray

coordinates to be fixed.

required
movable ndarry

coordinates to be moved.

required
max_move float

maximum translation. Defaults to 25.0.

25.0
clash_cutoff float

distance considered as potential clash. Defaults to 5.0.

5.0
max_trials int

maximum number of trials. Defaults to 1000.

1000

Returns:

Type Description
(int, int, float, ndarray)

(num_trials, clash_count, closest_dist, trans)

Source code in mdscribe/helper/coord.py
def avoid_clash(fixed:np.ndarray, movable:np.ndarray, max_move:float=25.0, clash_cutoff:float=5.0, max_trials:int=1000) -> Tuple[int, int, float, np.ndarray]:
    """Find a translation vector for `movable` coordinates to avoid steric clashes.

    Args:
        fixed (np.ndarray): coordinates to be fixed.
        movable (np.ndarry): coordinates to be moved.
        max_move (float, optional): maximum translation. Defaults to 25.0.
        clash_cutoff (float, optional): distance considered as potential clash. Defaults to 5.0.
        max_trials (int, optional): maximum number of trials. Defaults to 1000.

    Returns:
        (int,int,float,np.ndarray): (num_trials, clash_count, closest_dist, trans)
    """
    kd = KDTree(fixed)
    num_trials = 0
    clash_count = 1
    while clash_count > 0 and num_trials < max_trials:
        num_trials += 1
        trans = max_move * (2.0*np.random.rand(3) -1.0) 
        # numpy.random.rand() generates [0,1) so 2x-1 -> [-1,+1)
        coords = movable + trans
        distances, indices = kd.query(coords)
        clash_count = np.sum(distances < clash_cutoff)
        closest_dist = np.min(distances)
    return (num_trials, clash_count, closest_dist, trans)

download(PDBID, verbose=True)

Source code in mdscribe/helper/pdbfile.py
def download(PDBID:str, verbose:bool=True) -> None:
    pdb = pypdb.get_pdb_file(PDBID, filetype="pdb")
    filename = PDBID+".pdb"
    if verbose:
        print("downloading ",PDBID,"as", filename, end="")
    with open(filename,"w") as f:
        f.write(pdb)
        if verbose:
            print("done")

get_ligands(PDBID)

Examples:

@structureId : 4KEB
@chemicalID : 1QZ
@type : non-polymer
@molecularWeight : 423.51
chemicalName : 6-ethyl-5-{(3S)-3-[3-(isoquinolin-5-yl)-5-methoxyphenyl]but-1-yn-1-yl}pyrimidine-2,4-diamine
formula : C26 H25 N5 O
InChI : InChI=1S/C26H25N5O/c1-4-24-23(25(27)31-26(28)30-24)9-8-16(2)18-12-19(14-20(13-18)32-3)21-7-5-6-17-15-29-11-10-22(17)21/h5-7,10-16H,4H2,1-3H3,(H4,27,28,30,31)/t16-/m1/s1
InChIKey : MGLLCDAARSVGLO-MRXNPFEDSA-N
smiles : CCc1c(c(nc(n1)N)N)C#C[C@@H](C)c2cc(cc(c2)OC)c3cccc4c3ccnc4

@structureId : 4KEB
@chemicalID : FOL
@type : non-polymer
@molecularWeight : 441.397
chemicalName : FOLIC ACID
formula : C19 H19 N7 O6
InChI : InChI=1S/C19H19N7O6/c20-19-25-15-14(17(30)26-19)23-11(8-22-15)7-21-10-3-1-9(2-4-10)16(29)24-12(18(31)32)5-6-13(27)28/h1-4,8,12,21H,5-7H2,(H,24,29)(H,27,28)(H,31,32)(H3,20,22,25,26,30)/t12-/m0/s1
InChIKey : OVBPIULPVIDEAO-LBPRGKRZSA-N
smiles : c1cc(ccc1C(=O)N[C@@H](CCC(=O)O)C(=O)O)NCc2cnc3c(n2)C(=O)N=C(N3)N

@structureId : 4KEB
@chemicalID : NDP
@type : non-polymer
@molecularWeight : 745.421
chemicalName : NADPH DIHYDRO-NICOTINAMIDE-ADENINE-DINUCLEOTIDE PHOSPHATE
formula : C21 H30 N7 O17 P3
InChIKey : ACFIXJIJDZMPPO-NNYOXOHSSA-N
InChI : InChI=1S/C21H30N7O17P3/c22-17-12-19(25-7-24-17)28(8-26-12)21-16(44-46(33,34)35)14(30)11(43-21)6-41-48(38,39)45-47(36,37)40-5-10-13(29)15(31)20(42-10)27-3-1-2-9(4-27)18(23)32/h1,3-4,7-8,10-11,13-16,20-21,29-31H,2,5-6H2,(H2,23,32)(H,36,37)(H,38,39)(H2,22,24,25)(H2,33,34,35)/t10-,11-,13-,14-,15-,16-,20-,21-/m1/s1
smiles : c1nc(c2c(n1)n(cn2)[C@H]3[C@@H]([C@@H]([C@H](O3)CO[P@](=O)(O)O[P@@](=O)(O)OC[C@@H]4[C@H]([C@H]([C@@H](O4)N5C=CCC(=C5)C(=O)N)O)O)O)OP(=O)(O)O)N
Source code in mdscribe/helper/pdbfile.py
def get_ligands(PDBID:str) -> None:
    """
    Examples:

        @structureId : 4KEB
        @chemicalID : 1QZ
        @type : non-polymer
        @molecularWeight : 423.51
        chemicalName : 6-ethyl-5-{(3S)-3-[3-(isoquinolin-5-yl)-5-methoxyphenyl]but-1-yn-1-yl}pyrimidine-2,4-diamine
        formula : C26 H25 N5 O
        InChI : InChI=1S/C26H25N5O/c1-4-24-23(25(27)31-26(28)30-24)9-8-16(2)18-12-19(14-20(13-18)32-3)21-7-5-6-17-15-29-11-10-22(17)21/h5-7,10-16H,4H2,1-3H3,(H4,27,28,30,31)/t16-/m1/s1
        InChIKey : MGLLCDAARSVGLO-MRXNPFEDSA-N
        smiles : CCc1c(c(nc(n1)N)N)C#C[C@@H](C)c2cc(cc(c2)OC)c3cccc4c3ccnc4

        @structureId : 4KEB
        @chemicalID : FOL
        @type : non-polymer
        @molecularWeight : 441.397
        chemicalName : FOLIC ACID
        formula : C19 H19 N7 O6
        InChI : InChI=1S/C19H19N7O6/c20-19-25-15-14(17(30)26-19)23-11(8-22-15)7-21-10-3-1-9(2-4-10)16(29)24-12(18(31)32)5-6-13(27)28/h1-4,8,12,21H,5-7H2,(H,24,29)(H,27,28)(H,31,32)(H3,20,22,25,26,30)/t12-/m0/s1
        InChIKey : OVBPIULPVIDEAO-LBPRGKRZSA-N
        smiles : c1cc(ccc1C(=O)N[C@@H](CCC(=O)O)C(=O)O)NCc2cnc3c(n2)C(=O)N=C(N3)N

        @structureId : 4KEB
        @chemicalID : NDP
        @type : non-polymer
        @molecularWeight : 745.421
        chemicalName : NADPH DIHYDRO-NICOTINAMIDE-ADENINE-DINUCLEOTIDE PHOSPHATE
        formula : C21 H30 N7 O17 P3
        InChIKey : ACFIXJIJDZMPPO-NNYOXOHSSA-N
        InChI : InChI=1S/C21H30N7O17P3/c22-17-12-19(25-7-24-17)28(8-26-12)21-16(44-46(33,34)35)14(30)11(43-21)6-41-48(38,39)45-47(36,37)40-5-10-13(29)15(31)20(42-10)27-3-1-2-9(4-27)18(23)32/h1,3-4,7-8,10-11,13-16,20-21,29-31H,2,5-6H2,(H2,23,32)(H,36,37)(H,38,39)(H2,22,24,25)(H2,33,34,35)/t10-,11-,13-,14-,15-,16-,20-,21-/m1/s1
        smiles : c1nc(c2c(n1)n(cn2)[C@H]3[C@@H]([C@@H]([C@H](O3)CO[P@](=O)(O)O[P@@](=O)(O)OC[C@@H]4[C@H]([C@H]([C@@H](O4)N5C=CCC(=C5)C(=O)N)O)O)O)OP(=O)(O)O)N
    """

    ligandInfo = pypdb.get_ligands(PDBID)

    try:    
        ligands = ligandInfo["ligandInfo"]["ligand"]
    except:
        ligands = []

    for ligand_dict in ligands:
        for k in ligand_dict:
            print("%20s : %s" % (k, ligand_dict[k]))

    # chem_desc= pypdb.describe_chemical(PDBID)
    # chem_desc["describeHet"]["ligandInfo"]["ligand"]["smiles"]
    # pdb_desc= pypdb.describe_pdb("5cgc")
    # pdb content as list of strings
    data = {
        "name": [],
        "smiles": [],
        "molwt": [],
        "chemical name": [],
        "InChiKey": [],
        "InChi": [],
    }
    if "," in PDBID:
        PDBID_list = PDBID.split(",")
        for PDBID in PDBID_list:
            for d in ligands:
                try:
                    data["name"].append(d['@chemicalID'])
                    data["smiles"].append(d['smiles'])
                    data["molwt"].append(float(d['@molecularWeight']))
                    data["chemical name"].append(d['chemicalName'])
                    data["InChiKey"].append(d['InChIKey'])
                    data["InChi"].append(d['InChi'])
                except:
                    continue
    else:
        for d in ligands:
            try:
                data["name"].append(d['@chemicalID'])
                data["smiles"].append(d['smiles'])
                data["molwt"].append(float(d['@molecularWeight']))
                data["chemical name"].append(d['chemicalName'])
                data["InChiKey"].append(d['InChIKey'])
                data["InChi"].append(d['InChi'])
            except:
                continue

hexagonal_grid()

Generate XY coordinates of hexagonal grid for membrane MD simulations.

Returns:

Type Description
ndarray

np.ndarray: (x,y) coordinates

Source code in mdscribe/helper/coord.py
def hexagonal_grid() -> np.ndarray:
    """Generate XY coordinates of hexagonal grid for membrane MD simulations.

    Returns:
        np.ndarray: (x,y) coordinates
    """
    theta = np.radians(60)
    c, s = np.cos(theta), np.sin(theta)
    R = np.array([[c, -s], [s, c]])

    d = 50.0
    grid = {0: [ np.array([0,0]) ] }
    for shell in [1, 2]:
        v = np.array([0.0, d*shell])
        grid[shell] = [v]
        for i in range(0, 5):
            v = np.dot(R, v)
            if shell > 1:
                delta = (grid[shell][-1] - v)/shell
                for j in range(1, shell):
                    grid[shell].append(j*delta + v)
            grid[shell].append(v)
        if shell > 1:
            v = np.dot(R, v)
            delta = (grid[shell][-1] - v)/shell
            for j in range(1, shell):
                grid[shell].append(j*delta + v)
        grid[shell].append(v)

    xy = []
    for shell in grid:
        for item in grid[shell] :
            x_, y_ = item
            xy.append([x_, y_])
    return np.array(xy)

kabsch_algorithm(P, Q)

Computes the optimal rotation and translation to align two sets of points (P -> Q), and their RMSD.

Examples:

>>> P = np.array([[0, 0, 0], [1, 0, 0], [0, 1, 0]])
>>> Q = np.array([[1, 1, 0], [2, 1, 0], [1, 2, 0]])
>>> # Q is translated by (1,1,0) 
>>> (rot, trans, rmsd) = kabsch_transform(P, Q)
>>> transform(P, rot, trans)
>>> P = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
>>> Q = np.array([[0, 1, 0], [0, 0, 1], [1, 0, 0]])
>>> # Q is a 90-degree rotation of P around the z-axis
>>> (rot, trans, rmsd) = kabsch_transform(P, Q)
>>> transform(P, rot, trans)

Parameters:

Name Type Description Default
P ndarray

subject coordinates. Not modified.

required
Q ndarray

target coordinates. Not modified.

required

Returns:

Type Description
Tuple[ndarray, ndarray, ndarray, float]

Tuple[np.ndarray, np.ndarray, np.ndarray, float]: A tuple containing the optimal rotation matrix, the optimal translation vector, the centroid of P, and the RMSD.

Source code in mdscribe/helper/coord.py
def kabsch_algorithm(P:np.ndarray, Q:np.ndarray) -> Tuple[np.ndarray, np.ndarray, np.ndarray, float]:
    """Computes the optimal rotation and translation to align two sets of points (P -> Q),
    and their RMSD.

    Examples:
        >>> P = np.array([[0, 0, 0], [1, 0, 0], [0, 1, 0]])
        >>> Q = np.array([[1, 1, 0], [2, 1, 0], [1, 2, 0]])
        >>> # Q is translated by (1,1,0) 
        >>> (rot, trans, rmsd) = kabsch_transform(P, Q)
        >>> transform(P, rot, trans)

        >>> P = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
        >>> Q = np.array([[0, 1, 0], [0, 0, 1], [1, 0, 0]])
        >>> # Q is a 90-degree rotation of P around the z-axis
        >>> (rot, trans, rmsd) = kabsch_transform(P, Q)
        >>> transform(P, rot, trans)

    Args:
        P (np.ndarray): subject coordinates. Not modified.
        Q (np.ndarray): target coordinates. Not modified.

    Returns:
        Tuple[np.ndarray, np.ndarray, np.ndarray, float]: A tuple containing the optimal rotation matrix, the optimal
            translation vector, the centroid of P, and the RMSD.
    """

    assert P.shape == Q.shape, "Matrix dimensions must match"

    # Compute centroids
    centroid_P = np.mean(P, axis=0)
    centroid_Q = np.mean(Q, axis=0)

    # Optimal translation
    t = centroid_Q - centroid_P

    # Center the points
    p = P - centroid_P
    q = Q - centroid_Q

    # Compute the covariance matrix
    H = np.dot(p.T, q)

    # SVD
    U, S, Vt = np.linalg.svd(H)
    V = Vt.T
    d = np.linalg.det(np.dot(V, U.T))
    e = np.array([[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, d]])

    # Optimal rotation
    R = np.dot(np.dot(V, e), U.T)

    # RMSD
    rmsd = np.sqrt(np.sum(np.square(np.dot(p, R.T) - q)) / P.shape[0])

    return (R, t, centroid_P, rmsd)

pca(X)

Source code in mdscribe/helper/coord.py
def pca(X):
    # Data matrix X
    n, m = X.shape
    centroid = X.mean(axis=0)
    X = X - centroid # make it 0-centered
    assert np.allclose(X.mean(axis=0), np.zeros(m), atol=1e-5)
    # Compute covariance matrix
    C = np.dot(X.T, X) / (n-1)
    # Eigen decomposition
    eigen_vals, eigen_vecs = np.linalg.eig(C)
    # Project X onto PC space
    # X_pca = np.dot(X, eigen_vecs)
    return eigen_vecs

quaternion_to_rotation_matrix(Q)

Covert a quaternion into a full three-dimensional rotation matrix.

Parameters:

Name Type Description Default
Q List[float]

A 4 element array representing the quaternion (q0,q1,q2,q3)

required

Returns:

Type Description
ndarray

np.ndarray: A 3x3 element matrix representing the full 3D rotation matrix. This rotation matrix converts a point in the local reference frame to a point in the global reference frame.

Source code in mdscribe/helper/coord.py
def quaternion_to_rotation_matrix(Q:List[float]) -> np.ndarray:
    """Covert a quaternion into a full three-dimensional rotation matrix.

    Args:
        Q (List[float]): A 4 element array representing the quaternion (q0,q1,q2,q3)

    Returns:
        np.ndarray: A 3x3 element matrix representing the full 3D rotation matrix. 
            This rotation matrix converts a point in the local reference frame 
            to a point in the global reference frame.
    """

    # Extract the values from Q
    q0 = Q[0]
    q1 = Q[1]
    q2 = Q[2]
    q3 = Q[3]

    # First row of the rotation matrix
    r00 = 2 * (q0 * q0 + q1 * q1) - 1
    r01 = 2 * (q1 * q2 - q0 * q3)
    r02 = 2 * (q1 * q3 + q0 * q2)

    # Second row of the rotation matrix
    r10 = 2 * (q1 * q2 + q0 * q3)
    r11 = 2 * (q0 * q0 + q2 * q2) - 1
    r12 = 2 * (q2 * q3 - q0 * q1)

    # Third row of the rotation matrix
    r20 = 2 * (q1 * q3 - q0 * q2)
    r21 = 2 * (q2 * q3 + q0 * q1)
    r22 = 2 * (q0 * q0 + q3 * q3) - 1

    # 3x3 rotation matrix, R
    R = np.array([[r00, r01, r02],
                           [r10, r11, r12],
                           [r20, r21, r22]])

    return R

random_rotation_matrix()

Generate a random rotational matrix.

Returns:

Name Type Description
ndarray ndarray

random (3,3) matrix

Source code in mdscribe/helper/coord.py
def random_rotation_matrix() -> np.ndarray:
    """Generate a random rotational matrix.

    Returns:
        ndarray : random (3,3) matrix
    """
    axis = np.random.randn(3)
    axis /= np.linalg.norm(axis)

    angle = random.uniform(0, 2 * np.pi)
    # Build the rotation matrix using Rodrigues' formula
    K = np.array([[0, -axis[2], axis[1]],
                  [axis[2], 0, -axis[0]],
                  [-axis[1], axis[0], 0]])
    R = np.eye(3) + np.sin(angle) * K + (1 - np.cos(angle)) * np.dot(K, K)

    return R

random_translation_vector(lower=0.0, upper=1.0)

Generate a random translational vector.

Parameters:

Name Type Description Default
lower float)

lower bound. Defaults to 0.0

0.0
upper float)

upper bound. Defaults to 1.0

1.0

Returns:

Type Description
ndarray

np.ndarray: random (3,) vector

Source code in mdscribe/helper/coord.py
def random_translation_vector(lower:float=0.0, upper:float=1.0) -> np.ndarray:
    """Generate a random translational vector.

    Args:
        lower (float) : lower bound. Defaults to 0.0
        upper (float) : upper bound. Defaults to 1.0

    Returns:
        np.ndarray: random (3,) vector
    """
    rng = np.random.default_rng()
    return rng.uniform(lower, upper, size=3)

renumber(residue)

Source code in mdscribe/helper/pdbfile.py
def renumber(residue:Optional[dict]):
    pass

reorient()

Source code in mdscribe/helper/pdbfile.py
def reorient():
    argparser = argparse.ArgumentParser(description='Reorient PDB')
    argparser.add_argument("filename", help="pdb filename")
    argparser.add_argument("--inverse-z", dest="inverse_z_axis", help="inverse z axis", default=False, action="store_true")
    argparser.add_argument("--residue", help="residues for principal axes calc. ex. A:23-50,B:1-90", default="")
    argparser.add_argument("--offset-x", dest="offset_x", help="translate x coordinates", type=float, default=0.0)
    argparser.add_argument("--offset-y", dest="offset_y", help="translate y coordinates", type=float, default=0.0)
    argparser.add_argument("--offset-z", dest="offset_z", help="translate z coordinates", type=float, default=0.0)
    argparser.add_argument("--segid", dest="segId", help="override segment id", default="")
    argparser.add_argument("--chainid", dest="chainId", help="override chain id", default="")
    args = argparser.parse_args()

    # subset of atoms for Eigenvector/Eigenvalue calculations
    subset_atoms = []
    if args.residue:
        for chain in args.residue.split(","):
            (chainId, resSeq_range) = chain.split(":")
            (resSeq_begin, resSeq_end) = resSeq_range.split("-")
            subset_atoms.append((chainId, int(resSeq_begin), int(resSeq_end)))

    with open(args.filename, "r") as pdb_file:
        xyz = []
        for line in pdb_file:
            if line.startswith('ATOM') or line.startswith('HETATM'):
                chainId = line[21:22]
                resSeq = int(line[22:26])
                if subset_atoms:
                    for chainId_, resSeq_begin, resSeq_end in subset_atoms:
                        if not (chainId == chainId_ and resSeq >= resSeq_begin and resSeq <= resSeq_end):
                            continue
                x = float(line[30:38].strip())
                y = float(line[38:46].strip())
                z = float(line[46:54].strip())
                xyz.append([x,y,z])

        print("REMARK reoriented according to its principal axes")
        if args.residue:
            print(f"REMARK principal axes are calculated using --residue {args.residue}")
        print(f"REMARK coordinates from {args.filename} ({len(xyz)} atoms)")
        if args.inverse_z_axis:
            print("REMARK Z axis inverted")
        print(f"REMARK coordinates offset X {args.offset_x:8.3f} Y {args.offset_x:8.3f} Z {args.offset_x:8.3f}")

        coord = np.array(xyz, float)

        # geometric center
        center = np.mean(coord, 0)
        box_min = np.min(coord, 0)
        box_max = np.max(coord, 0)
        print("REMARK geometric center:", center)
        print("REMARK coordinate range:", box_min, box_max)
        print("REMARK box size:", box_max-box_min)

        coord = coord - center

        # compute principal axis matrix
        inertia = np.dot(coord.transpose(), coord)
        w,v = np.linalg.eig(inertia)

        # warning eigen values are not necessary ordered!
        # http://docs.scipy.org/doc/numpy/reference/generated/numpy.linalg.eig.html
        #--------------------------------------------------------------------------
        # order eigen values (and eigen vectors)
        #
        # axis1 is the principal axis with the biggest eigen value (eval1)
        # axis2 is the principal axis with the second biggest eigen value (eval2)
        # axis3 is the principal axis with the smallest eigen value (eval3)
        #--------------------------------------------------------------------------
        # axis1 --> Z
        # axis2 --> Y
        # axis3 --> X
        order = np.argsort(w)
        eval3, eval2, eval1 = w[order]
        axis3, axis2, axis1 = v[:, order]

        print("REMARK x-axis",axis3,"eval=",eval3)
        print("REMARK y-axis",axis2,"eval=",eval2)
        print("REMARK z-axis",axis1,"eval=",eval1)

        R = np.array([axis3, axis2, axis1]) # decreasing order
        R_inv = np.linalg.inv(R)

        pdb_file.seek(0)
        for line in pdb_file:
            if not line: continue
            if line.startswith('ATOM') or line.startswith('HETATM'):
                x = float(line[30:38].strip())
                y = float(line[38:46].strip())
                z = float(line[46:54].strip())
                xyz = np.array([x,y,z])
                xyz = xyz - center
                xyz = np.dot(R_inv, xyz)
                x, y, z = xyz
                if args.inverse_z_axis:
                    z = -z
                x += args.offset_x
                y += args.offset_y
                z += args.offset_z

                # keep the rest of information as they are
                line = line[:30] + "%8.3f%8.3f%8.3f" % (x, y, z) + line[54:]

                # override chain id
                if args.chainId:
                    line = line[:21] + "%1s" % args.chainId + line[22:]

                # override segment id
                if args.segId:
                    line = line[:72] + "%4s" % args.segId + line[76:]

                print(line, end='')
            else:
                print(line, end='')

test_kabsch_algorithm(trials=100, rtol=1e-05, atol=1e-08)

Source code in mdscribe/helper/coord.py
def test_kabsch_algorithm(trials:int=100, rtol:float=1e-5, atol:float=1e-8) -> None:
    results = []
    for i in range(trials):
        rot = random_rotation_matrix()
        trans = random_translation_vector(0.0, 50.0)
        # generate a 10x3 array with random floats between 0 and 1
        P = np.random.rand(10, 3) * 100.0
        Q = transform(P, rot, trans)
        (rot_, trans_, centroid_, rmsd_) = kabsch_algorithm(P, Q)
        res = np.allclose(rot, rot_, rtol=rtol, atol=atol) and np.allclose(trans, trans_, rtol=rtol, atol=atol)
        results.append(res)
    if all(results):
        print("pass")
    else:
        print("failed")

transform(P, centroid=np.array([0.0, 0.0, 0.0]), rot=np.array([[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]]), trans=np.array([0.0, 0.0, 0.0]))

Rotate and translate input coordinates.

Parameters:

Name Type Description Default
P ndarray

input coordinates.

required
centroid ndarray

centroid. Defaults to np.array([0., 0., 0.])

array([0.0, 0.0, 0.0])
rot ndarray

rotation matrix. Defaults to np.array([[1.,0.,0.],[0.,1.,0.],[0.,0.,1.]]).

array([[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]])
trans ndarray

translation vector. Defaults to np.array([0., 0., 0.]).

array([0.0, 0.0, 0.0])

Returns:

Type Description
ndarray

np.ndarray: transformed output coordinates.

Source code in mdscribe/helper/coord.py
def transform(P:np.ndarray,
              centroid:np.ndarray = np.array([0., 0., 0.]),
              rot:np.ndarray = np.array([[1.,0.,0.],[0.,1.,0.],[0.,0.,1.]]), 
              trans:np.ndarray = np.array([0., 0., 0.])) -> np.ndarray:
    """Rotate and translate input coordinates.

    Args:
        P (np.ndarray): input coordinates.
        centroid (np.ndarray, optional): centroid. Defaults to np.array([0., 0., 0.])
        rot (np.ndarray, optional): rotation matrix. Defaults to np.array([[1.,0.,0.],[0.,1.,0.],[0.,0.,1.]]).
        trans (np.ndarray, optional): translation vector. Defaults to np.array([0., 0., 0.]).

    Returns:
        np.ndarray: transformed output coordinates.
    """
    Q = np.dot(P-centroid, rot.T) + centroid + trans
    return Q

unit_vector(vector)

Returns the unit vector of the vector.

Source code in mdscribe/helper/coord.py
def unit_vector(vector):
    """ Returns the unit vector of the vector.  """
    return vector / np.linalg.norm(vector)

view_svg(rdmol, highlight=[], width=300, height=300)

SVG depiction for the Jupyter Notebook.

Reference

https://www.linkedin.com/pulse/using-rdkit-jupyter-notebooks-lee-davies/

Parameters:

Name Type Description Default
rdmol Mol

rdkit molecule.

required
highlight list

highlighted atom indexes. Defaults to [].

[]
width int

width. Defaults to 300.

300
height int

height. Defaults to 300.

300
Source code in mdscribe/helper/svg.py
def view_svg(rdmol:Chem.Mol, highlight=[], width=300, height=300):
    """SVG depiction for the Jupyter Notebook.

    Reference:
        https://www.linkedin.com/pulse/using-rdkit-jupyter-notebooks-lee-davies/

    Args:
        rdmol (Chem.Mol): rdkit molecule.
        highlight (list, optional): highlighted atom indexes. Defaults to [].
        width (int, optional): width. Defaults to 300.
        height (int, optional): height. Defaults to 300.
    """
    rdmol2d = Chem.Mol(rdmol)
    AllChem.Compute2DCoords(rdmol2d)
    drawer = rdMolDraw2D.MolDraw2DSVG(width, height)
    drawer.DrawMolecule(rdmol2d, highlightAtoms=highlight)
    drawer.FinishDrawing()
    svg = drawer.GetDrawingText()
    display(SVG(svg.replace("svg:","")))