Introduction
DesmondTools have a set of command-line scripts and a python library written to make setting up the molecular dynamics simulations easier for Desmond.
Install
What is Multisim Class?
The following code examples from test/test_multisim.py
demonstrate the functionality of the Multisim class.
from desmondtools import Multisim
def test_Multisim_variable():
# parse_string returns ['barostat.tau']
o1 = Multisim.variable.parse_string("barostat.tau")[0]
o2 = Multisim.variable.parse_string("barostat .tau")[0]
assert o1 == o2
def test_Multisim_expression_1():
i = "task {} simulate { meta = [{a=1 b=3 c=[7 8 9]}] f = {} } simulate {n=2} simulate {n = 3}"
D = Multisim(string=i).to_dot()
assert D.simulate[0].meta[0].b == str(3)
assert D.simulate[0].meta[0].c[1] == str(8)
assert D.simulate[-1].n == str(3)
def test_Multisim_expression_2():
i = "simulate { effect_if = [[ a ] b ] }"
D = Multisim(string=i).to_dot()
assert D.simulate.effect_if[0] == ['a']
assert D.simulate.effect_if[1] == 'b'
def test_Multisim_expression_3():
i = """ensemble = {
brownie = {
delta_max = 0.1
}
class = NVT
method = Brownie
thermostat = {
tau = 1.0
}
}"""
D = Multisim(string=i)
assert D.dot.ensemble.brownie.delta_max == str(0.1)
# assert D.dot.ensemble.class == 'NVT'
# ^^^^^
# SyntaxError: invalid syntax
# 'class' is reserved word in python and raises SyntaxError
# use instead .['class']
assert D.dot.ensemble['class'] == 'NVT' # <--- it does not raise SyntaxError
assert D.dot.ensemble.method == 'Brownie'
assert D.dot.ensemble.thermostat.tau == str(1.0)
D.write()
def test_Multisim_expression_4():
i = """task {
task = "desmond:auto"
}
simulate {
title = "NPT and no restraints, 24ps"
effect_if = [[ "@*.*.annealing" ] 'annealing = off temperature = "@*.*.temperature[0][0]"' ]
time = 24
ensemble = {
class = NPT
method = Langevin
thermostat.tau = 0.1
barostat.tau = 2.0
}
eneseq.interval = 0.3
trajectory.center = solute
}
simulate {
meta = {
cv = [
{
atom = [0 0 0 0]
type = dihedral
wall = 0
width = 5.0
}
]
cv_name = "$JOBNAME$[_replica$REPLICA$].cvseq"
first = 0.0
height = 0.03
interval = 0.09
name = "$JOBNAME$[_replica$REPLICA$].kerseq"
}
}
"""
D = Multisim(string=i)
assert len(D.dot.simulate[0].effect_if) == 2
assert len(D.dot.simulate[0].effect_if[0]) == 1
assert D.dot.simulate[0].ensemble.method == 'Langevin'
assert D.dot.simulate[0].ensemble.barostat.tau == str(2.0)
assert D.dot.simulate[0].eneseq.interval == str(0.3)
assert D.dot.simulate[0].trajectory.center == 'solute' # <---
assert len(D.dot.simulate[1].meta.cv) == 1
assert len(D.dot.simulate[1].meta.cv[0].atom) == 4
assert D.dot.simulate[1].meta.height == str(0.03)
assert D.dot.simulate[-1].meta.cv[0].wall == str(0)
D.dot.simulate[0].ensemble.barostat.tau = 2.1
assert D.dot.simulate[0].ensemble.barostat.tau == 2.1
D.dot.simulate[1].meta.height = 0.06
assert D.dot.simulate[1].meta.height == 0.06
D.dot.simulate[-1].meta.cv[0].wall = 50.0
assert D.dot.simulate[-1].meta.cv[0].wall == 50.0
def test_Multisim_expression_5():
D= Multisim(template="desmond-md.msj")
D.write()
Caution
Some keywords in the .msj
or .cfg
files may conflict with the reserved python syntax. In the below example, accessing the class
variable by directly using the .dot
expression such as D.dot.ensemble.class = 'NVT'
would raise a SyntaxError
because class
is a reserved word in Python. However, you can avoid the error by using a dictionary type access such as D.dot.ensemble['class'] = 'NVT'
.
ensemble = {
brownie = {
delta_max = 0.1
}
class = NVT
method = Brownie
thermostat = {
tau = 1.0
}
}
Use of Multisim Class
The desmondtools.Multisim
class facilitates reading and modifying Schrodinger Desmond .msj
and .cfg
files, which are typically generated by Schrodinger Maestro GUI task panels. By using a simple Python script with desmondtools
, you can efficiently prepare and set up multiple MD simulations without repeatedly launching Maestro or manually editing files. The example below demonstrates how to modify or create the contents of .msj
and .cfg
files using the .dot
attribute of a Multisim
instance.
from desmondtools import Multisim
# read template .msj and .cfg
md_msj = Multisim(template="desmond-md.msj")
md_cfg = Multisim(template="desmond-md.cfg")
with open(msj_file,"w") as msj:
# modify desmond msj template
md_msj.dot.simulate[-1].cfg_file = cfg_file_basename
# Setting up restraints using the restraints keyword:
# https://www.schrodinger.com/kb/332119
if args.posres_force > 0.0:
# print the restraints in the multisim log file
md_msj.dot.simulate[-1].print_restraint = 'true'
# add the new terms defined in "restraints.new" to existing restraints.
# The default is restraints.existing = ignore which will
# delete existing terms before adding any new ones.
# md_msj.dot.simulate[-1].restraints.existing = 'retain'
md_msj.dot.simulate[-1].restraints.new = [
{
'name' : 'posre_harm',
'atoms' : [ f'"{args.posres}"' ],
'force_constants' : [ args.posres_force, ] * 3,
}
]
# force constants in the x, y, and z direction
# writing modified msj
md_msj.write(msj)
with open(cfg_file,"w") as cfg:
# read and modify desmond cfg template
md_cfg.dot.randomize_velocity.seed = random.randint(1000, 9999)
md_cfg.dot.time = total_simulation_time
md_cfg.dot.temperature = t_schedule
md_cfg.dot.trajectory.interval = args.interval
# wring modified cfg
md_cfg.write(cfg)
Below are examples of .msj
and .cfg
files:
# .msj file example (part)
task {
task = "desmond:auto"
set_family = {
desmond = {
checkpt.write_last_step = no
}
}
}
simulate {
title = "Brownian Dynamics NVT, T = 10 K, small timesteps, ..."
annealing = off
time = 100
timestep = [0.001 0.001 0.003 ]
temperature = 10.0
ensemble = {
class = "NVT"
method = "Brownie"
brownie = {
delta_max = 0.1
}
}
restrain = {
atom = "solute_heavy_atom"
force_constant = 50.0
}
}
simulate {
effect_if = [["==" "-gpu" "@*.*.jlaunch_opt[-1]"] 'ensemble.method = Langevin']
title = "NVT, T = 10 K, small timesteps, ..."
annealing = off
time = 12
timestep = [0.001 0.001 0.003]
temperature = 10.0
restrain = { atom = solute_heavy_atom force_constant = 50.0 }
ensemble = {
class = NVT
method = Berendsen
thermostat.tau = 0.1
}
randomize_velocity.interval = 1.0
eneseq.interval = 0.3
trajectory.center = []
}
# .cfg file example (part)
randomize_velocity = {
first = 0.0
interval = inf
seed = 2967
temperature = "@*.temperature"
}
restrain = none
simbox = {
first = 0.0
interval = 1.2
name = "$JOBNAME$[_replica$REPLICA$]_simbox.dat"
}
surface_tension = 0.0
taper = false
temperature = [
[300.0 0 ]
]
time = 100000.0
timestep = [0.002 0.002 0.006 ]
Event and Interaction Classes
Schrodinger event analysis format or .eaf
files use the same syntax as the .cfg
or .msj
files. So, the desmondtools.Multisim
class is reused in the desmondtools.Event
and desmondtools.Interaction
classes to process Schrodinger provided trajectory analysis output ...-out.eaf
files. For example, below codes from desmondtools.Interaction
class shows how the class uses the .dot
attributes to process .eaf
output files.
for section in self.dot.Keywords:
try:
assert section.ProtLigInter.HBondResult
self.num_frames = len(section.ProtLigInter.HBondResult)
for frame in section.ProtLigInter.HBondResult:
# [[3 "_:ARG_143:HH22" d-s "L-FRAG_0:N6" ]]
for (frameno, prot, hbond_type, lig) in frame:
prot = prot.strip('\"')
(_, resid, atom) = prot.split(":")
(resName, resSeq) = resid.split("_")
resSeq = int(resSeq)
if resSeq in self.HBond:
self.HBond[resSeq]['count'] += 1
else:
self.HBond[resSeq] = {'resName': resName, 'count':1 }
for resSeq in sorted(self.HBond):
fraction = float(self.HBond[resSeq]['count'])/self.num_frames
if verbose:
print(f"HBond {self.HBond[resSeq]['resName']}_{resSeq} {fraction:5.3f} {self.num_frames}")
except:
pass
Command-Line Interface
Command-Line Interfaces (CLI) are built on the Multisim
class. Some CLIs require Schrodinger Python API.
Command-line interface | Description |
---|---|
batch-desmond-cif | Convert .mae/.maegz to .cif |
batch-desmond-mdinfo | Check running MD simulations |
batch-desmond-maeinfo | Show info of .mae/.maegz file(s) |
batch-desmond-cmsinfo | Show info of .cms file(s) |
batch-desmond-setup | Batch Prepare MD systems |
batch-desmond-min | Batch Setup energy minimizations |
batch-desmond-md | Batch Setup MD simulations |
batch-desmond-metad | Batch Setup metadynamics |
batch-desmond-report | Batch Generate reports |
batch-desmond-pli | Batch Analyze protein-ligand interactions |
batch-desmond-distance | Batch Analyze distance |
batch-desmond-dihedral | Batch Analyze dihedral angles |
batch-desmond-ligrmsd | Batch Analyze ligand rmsd |
batch-desmond-rg | Batch Analyze radius of gyration |
batch-desmond-rmsx | Batch Analyze RMSD and ligand RMSF |
batch-desmond-mmgbsa | Batch Analyze trajectory MMGBSA |
For more helps, $ command --help
.
Metadynamics
Types of Collective Variables
type | Description | Default Width | Wall | Floor | Atom sites |
---|---|---|---|---|---|
dist | Distance | 0.05 Å | yes | yes | 2 |
angle | Angle | 2.5° | yes | yes | 3 |
dihedral | Dihedral | 5.0° | no | no | 4 |
rgyr | Radius of gyration | 0.1 Å | no | no | 1 |
rgyr_mass | Mass-weighted radius of gyration | 0.1 Å | no | no | 1 |
rmsd | RMSD from aligned starting structure | 0.1 Å | no | no | 1 |
rmsd_symm | Symmetry aware RMSD | 0.1 Å | no | no | 1 |
zdist | Distance along the z axis | 0.05 Å | yes | yes | 1 |
zdist0 | Absolute distance along the z axis | 0.1 Å | yes | yes | 1 |
whim1 | WHIM1 - first principal moment [35] | 0.5 Å2 | no | no | 1 |
whim2 | WHIM2 - second principal moment [35] | 0.25 Å2 | no | no | 1 |
Examples of Collective Variables
distance:
cv = [
{atom = ["res. UNK" "res. 849" ]
type = dist
wall = 40
floor = 10
width = 0.05
}
]
dihedral:
cv = [
{atom = [404 406 407 415 ]
type = dihedral
width = 5.0
}
{atom = [406 407 415 417 ]
type = dihedral
width = 5.0
}
]
zdist(membrane):
cv = [
{atom = ["res. UNK"]
type = zdist
width = 0.05
wall = 20
floor = 5
}
]
rmsd:
cv = [
{atom = ["res. UNK" "res. 849" ]
type = rmsd
width = 0.1
}
]
Well-Tempered Metadynamics
In well-tempered metadynamics, the height of the deployed Gaussians are rescaled (decreased) during simulation time by:
omega_0 * exp(-V/(kB * ΔT))
Where omega_0 is the initial hill height, V is the bias potential and the denominator in the exponential, kB * ΔT,
is the bias factor (kTemp). During well-tempered metadynamics, the dynamics of the system are effectively accelerated
(without heating up the system) up to T + ΔT, where T is the chosen MD simulation temperature.
The choice of the bias factor value is guided by the highest barrier in the simulation system
which the well-tempered metadynamics run should overcome. Here are some suggestions,
assuming that the initial hill height ω0 (height parameter in the Metadynamics panel) has been set to 0.3 kcal/mol:
Max. barrier height (kcal/mol) | kTemp (kcal/mol) |
---|---|
3 | 1.7 |
6 | 3.4 |
10 | 5.6 |
15 | 8.4 |
20 | 11.2 |
Extending Simulations
See https://www.schrodinger.com/kb/788642 for extending metadynamics simulation.
Python Classes
desmondtools.Multisim
Parsing Desmond multisim .cfg and .msj expressions
Source code in src/desmondtools/multisim.py
class Multisim:
"""Parsing Desmond multisim .cfg and .msj expressions"""
# variable, value, array, expr
# pyparsing module’s default behavior is to ignore whitespace.
# +: AND, |: MatchFirst, left-to-right, ^: Or(longest match)
# Group: --> list
# Dict: --> dict
# Forward: --> recursive
EQ = pp.Suppress('=')
LBRACKET, RBRACKET, LBRACE, RBRACE = map(pp.Literal, "[]{}")
variable = (pp.Word(pp.alphanums + "._/?-@") +
pp.Opt("." + pp.Word(pp.alphanums))).set_parse_action(''.join)
_string1 = pp.Word(pp.alphanums + "._/?-@*")
_string2 = pp.quoted_string()
_number = ppc.number()
value = (_string1 | _string2 | _number)
array = pp.Forward()
array <<= pp.Group(LBRACKET + (pp.ZeroOrMore(value | array)) + RBRACKET)
expr = pp.Forward()
_expr_0 = (variable + EQ + value)
_expr_1 = (variable + EQ + array)
_expr_2 = (variable + EQ + pp.Group(
LBRACE + pp.ZeroOrMore(expr) + RBRACE ))
_expr_3 = (variable + EQ + pp.Group(
LBRACKET + pp.ZeroOrMore(pp.Group(LBRACE + expr + RBRACE)) + RBRACKET))
_expr_4 = pp.Group(variable + pp.Group(
LBRACE + pp.ZeroOrMore( expr ) + RBRACE))
expr <<= pp.OneOrMore(pp.Dict(pp.Group(
_expr_0 | _expr_1 | _expr_2 | _expr_3 | _expr_4)))
expr.ignore("#" + pp.restOfLine)
def __init__(self, **kwargs):
self.template_path = None
self.ParseResults = None
self.dict = {}
self.dot = DotMap()
self.output = sys.stdout # do not attempt to close
self.indent = 4
if 'template' in kwargs:
template = kwargs['template']
template_path = pathlib.Path(template)
if template_path.is_file():
self.template_path = template_path
else:
template_path = importlib.resources.files('desmondtools')
self.template_path = template_path / template
if self.template_path is None:
raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), template)
self.ParseResults = Multisim.expr.parse_file(self.template_path)
self.decode()
elif 'string' in kwargs:
string = kwargs['string']
try:
self.ParseResults = Multisim.expr.parse_string(string)
self.decode()
except:
raise RuntimeError("Multisim: cannot parse the input string.")
else:
raise RuntimeError("Multisim: template filename or string is required.")
@staticmethod
def unfold_dict_key(d) -> None:
"""Unfold '.' in the keys of a dictionary
Args:
d (dict): dictionary (to be modified)
"""
ks = [ k for k in d ]
for k in ks:
v = d[k]
if '.' in k:
# d[k] = v --> d[kk[0]] = { kk[1] : v }
_u = None
_k = k.split('.')[0]
for kk in k.split('.')[-1:0:-1]:
if _u is None:
_u = {kk : v }
else:
_u = {kk : _u}
d[_k] = _u
del d[k]
else:
pass
if isinstance(v, dict):
Multisim.unfold_dict_key(v)
@staticmethod
def traverse_dict(d) -> None:
"""Recursively traverse a nested dictionary/list
Args:
d (dict): dictionary (to be modified)
"""
if isinstance(d, dict):
for k,v in d.items():
if isinstance(v, dict):
Multisim.traverse_dict(v)
elif isinstance(v, list):
if v == ['{','}']:
d[k] = {}
elif v == ['[',']']:
d[k] = []
elif v[0] == '[' and v[-1] == ']':
d[k] = v[1:-1]
Multisim.traverse_dict(d[k])
elif v[0] == '{' and v[-1] == '}':
d[k] = dict(v[1:-1])
Multisim.traverse_dict(d[k])
elif isinstance(d, list):
for k,v in enumerate(d):
if isinstance(v, dict):
Multisim.traverse_dict(v)
elif isinstance(v, list):
if v == ['{','}']:
d[k] = {}
elif v == ['[',']']:
d[k] = []
elif v[0] == '[' and v[-1] == ']':
d[k] = v[1:-1]
Multisim.traverse_dict(d[k])
elif v[0] == '{' and v[-1] == '}':
d[k] = dict(v[1:-1])
Multisim.traverse_dict(d[k])
def decode(self) -> None:
"""decode the parsed results into a dictionary and its dotmap"""
# create .dict
if isinstance(self.ParseResults.as_list()[0][0], str): # key
self.dict = self.ParseResults.as_dict()
Multisim.traverse_dict(self.dict)
# handle the case where key has '.'
Multisim.unfold_dict_key(self.dict)
elif isinstance(self.ParseResults.as_list()[0][0], list):
self.dict = [] # now self.dict is a list of dictionary
for section in self.ParseResults:
dict_ = dict(section.as_list())
Multisim.traverse_dict(dict_)
# handle the case where key has '.'
Multisim.unfold_dict_key(dict_)
self.dict.append(dict_)
# create .dot
if isinstance(self.dict, list):
self.dot = {}
for d in self.dict:
for k,v in d.items():
if k in self.dot:
if isinstance(self.dot[k], list):
self.dot[k].append(v)
else:
self.dot[k] = [self.dot[k], v]
else:
self.dot[k] = v
self.dot = DotMap(self.dot)
else:
self.dot = DotMap(self.dict)
def write(self, output=None):
"""Writes DOT object"""
if isinstance(output, io.IOBase):
self.output = output
elif isinstance(output, str):
self.output = open(output, "w")
if isinstance(self.dict, list):
blocks = []
for k, v in self.dot.items():
if isinstance(v, list):
for vv in v:
blocks.append({k:vv})
else:
blocks.append({k:v})
for block in blocks:
self._write_dict(block, block=True)
self.output.write("\n")
else:
self._write_dict(self.dot)
def _write_dict(self, d, block=False, depth=0):
"""subroutine of .write() method"""
spc = ' ' * self.indent * depth
if isinstance(d, dict) or isinstance(d, DotMap):
for k, v in d.items():
k = str(k)
if v:
if isinstance(v, dict) or isinstance(v, DotMap):
if depth == 0 and block:
self.output.write(spc + k + " {\n")
else:
self.output.write(spc + k + " = {\n")
self._write_dict(v, depth=depth+1)
self.output.write(spc + "}\n")
elif isinstance(v, list):
self.output.write(spc + k + " = [")
for vv in v:
if isinstance(vv, dict) or isinstance(vv, DotMap):
self.output.write("{\n")
elif isinstance(vv, list):
self.output.write("[")
self._write_dict(vv, depth=depth+1)
if isinstance(vv, dict) or isinstance(vv, DotMap):
self.output.write(spc + "}")
elif isinstance(vv, list):
self.output.write("]")
self.output.write("]\n")
else:
self.output.write(spc + k + " = " + str(v) +"\n")
else:
if isinstance(v, list) and (not bool(v)):
self.output.write(spc + k + " = []\n")
elif (isinstance(v, dict) or isinstance(v, DotMap))and (not bool(v)):
self.output.write(spc + k + " = {\n}\n")
else:
self.output.write(spc + k + " = \n")
elif isinstance(d, list):
for v in d:
self._write_dict(v, depth=depth+1)
else:
self.output.write(" " + str(d) + " ")
def to_dot(self) -> DotMap:
"""Returns parsed results as a DotMap object
Returns:
DotMap : DotMap object
"""
return self.dot
def to_list(self) -> list:
"""Returns parsed results as a list
Returns:
list : list
"""
return self.ParseResults.as_list()
def to_dict(self) -> dict:
"""Returns parsed results as a dictionary
Returns:
dict : dictionary
"""
return self.dict
Attributes
EQ = pp.Suppress('=')
class-attribute
instance-attribute
ParseResults = Multisim.expr.parse_string(string)
instance-attribute
array = pp.Forward()
class-attribute
instance-attribute
dict = {}
instance-attribute
dot = DotMap()
instance-attribute
expr = pp.Forward()
class-attribute
instance-attribute
indent = 4
instance-attribute
output = sys.stdout
instance-attribute
template_path = None
instance-attribute
value = _string1 | _string2 | _number
class-attribute
instance-attribute
variable = pp.Word(pp.alphanums + '._/?-@') + pp.Opt('.' + pp.Word(pp.alphanums)).set_parse_action(''.join)
class-attribute
instance-attribute
Functions
__init__(**kwargs)
Source code in src/desmondtools/multisim.py
def __init__(self, **kwargs):
self.template_path = None
self.ParseResults = None
self.dict = {}
self.dot = DotMap()
self.output = sys.stdout # do not attempt to close
self.indent = 4
if 'template' in kwargs:
template = kwargs['template']
template_path = pathlib.Path(template)
if template_path.is_file():
self.template_path = template_path
else:
template_path = importlib.resources.files('desmondtools')
self.template_path = template_path / template
if self.template_path is None:
raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), template)
self.ParseResults = Multisim.expr.parse_file(self.template_path)
self.decode()
elif 'string' in kwargs:
string = kwargs['string']
try:
self.ParseResults = Multisim.expr.parse_string(string)
self.decode()
except:
raise RuntimeError("Multisim: cannot parse the input string.")
else:
raise RuntimeError("Multisim: template filename or string is required.")
decode()
decode the parsed results into a dictionary and its dotmap
Source code in src/desmondtools/multisim.py
def decode(self) -> None:
"""decode the parsed results into a dictionary and its dotmap"""
# create .dict
if isinstance(self.ParseResults.as_list()[0][0], str): # key
self.dict = self.ParseResults.as_dict()
Multisim.traverse_dict(self.dict)
# handle the case where key has '.'
Multisim.unfold_dict_key(self.dict)
elif isinstance(self.ParseResults.as_list()[0][0], list):
self.dict = [] # now self.dict is a list of dictionary
for section in self.ParseResults:
dict_ = dict(section.as_list())
Multisim.traverse_dict(dict_)
# handle the case where key has '.'
Multisim.unfold_dict_key(dict_)
self.dict.append(dict_)
# create .dot
if isinstance(self.dict, list):
self.dot = {}
for d in self.dict:
for k,v in d.items():
if k in self.dot:
if isinstance(self.dot[k], list):
self.dot[k].append(v)
else:
self.dot[k] = [self.dot[k], v]
else:
self.dot[k] = v
self.dot = DotMap(self.dot)
else:
self.dot = DotMap(self.dict)
to_dict()
to_dot()
to_list()
traverse_dict(d)
staticmethod
Recursively traverse a nested dictionary/list
Parameters:
Name | Type | Description | Default |
---|---|---|---|
|
dict
|
dictionary (to be modified) |
required |
Source code in src/desmondtools/multisim.py
@staticmethod
def traverse_dict(d) -> None:
"""Recursively traverse a nested dictionary/list
Args:
d (dict): dictionary (to be modified)
"""
if isinstance(d, dict):
for k,v in d.items():
if isinstance(v, dict):
Multisim.traverse_dict(v)
elif isinstance(v, list):
if v == ['{','}']:
d[k] = {}
elif v == ['[',']']:
d[k] = []
elif v[0] == '[' and v[-1] == ']':
d[k] = v[1:-1]
Multisim.traverse_dict(d[k])
elif v[0] == '{' and v[-1] == '}':
d[k] = dict(v[1:-1])
Multisim.traverse_dict(d[k])
elif isinstance(d, list):
for k,v in enumerate(d):
if isinstance(v, dict):
Multisim.traverse_dict(v)
elif isinstance(v, list):
if v == ['{','}']:
d[k] = {}
elif v == ['[',']']:
d[k] = []
elif v[0] == '[' and v[-1] == ']':
d[k] = v[1:-1]
Multisim.traverse_dict(d[k])
elif v[0] == '{' and v[-1] == '}':
d[k] = dict(v[1:-1])
Multisim.traverse_dict(d[k])
unfold_dict_key(d)
staticmethod
Unfold '.' in the keys of a dictionary
Parameters:
Name | Type | Description | Default |
---|---|---|---|
|
dict
|
dictionary (to be modified) |
required |
Source code in src/desmondtools/multisim.py
@staticmethod
def unfold_dict_key(d) -> None:
"""Unfold '.' in the keys of a dictionary
Args:
d (dict): dictionary (to be modified)
"""
ks = [ k for k in d ]
for k in ks:
v = d[k]
if '.' in k:
# d[k] = v --> d[kk[0]] = { kk[1] : v }
_u = None
_k = k.split('.')[0]
for kk in k.split('.')[-1:0:-1]:
if _u is None:
_u = {kk : v }
else:
_u = {kk : _u}
d[_k] = _u
del d[k]
else:
pass
if isinstance(v, dict):
Multisim.unfold_dict_key(v)
write(output=None)
Writes DOT object
Source code in src/desmondtools/multisim.py
def write(self, output=None):
"""Writes DOT object"""
if isinstance(output, io.IOBase):
self.output = output
elif isinstance(output, str):
self.output = open(output, "w")
if isinstance(self.dict, list):
blocks = []
for k, v in self.dot.items():
if isinstance(v, list):
for vv in v:
blocks.append({k:vv})
else:
blocks.append({k:v})
for block in blocks:
self._write_dict(block, block=True)
self.output.write("\n")
else:
self._write_dict(self.dot)
desmondtools.Event
Source code in src/desmondtools/event.py
Attributes
dot = DotMap(d)
instance-attribute
Functions
__init__(filename)
desmondtools.Interaction
Bases: Event
Source code in src/desmondtools/event.py
class Interaction(Event):
def __init__(self, filename:Path | str, verbose:bool=False):
super().__init__(filename)
self.num_frames = 0
self.HBond = {}
self.Hydrophobic = {}
self.WaterBridge = {}
self.Polar = {}
self.HalogenBond = {}
self.LigWat = {}
self.Metal = {}
self.PiCat = {}
self.PiPi = {}
for section in self.dot.Keywords:
try:
assert section.ProtLigInter.HBondResult
self.num_frames = len(section.ProtLigInter.HBondResult)
for frame in section.ProtLigInter.HBondResult:
# [[3 "_:ARG_143:HH22" d-s "L-FRAG_0:N6" ]]
for (frameno, prot, hbond_type, lig) in frame:
prot = prot.strip('\"')
(_, resid, atom) = prot.split(":")
(resName, resSeq) = resid.split("_")
resSeq = int(resSeq)
if resSeq in self.HBond:
self.HBond[resSeq]['count'] += 1
else:
self.HBond[resSeq] = {'resName': resName, 'count':1 }
for resSeq in sorted(self.HBond):
fraction = float(self.HBond[resSeq]['count'])/self.num_frames
if verbose:
print(f"HBond {self.HBond[resSeq]['resName']}_{resSeq} {fraction:5.3f} {self.num_frames}")
except:
pass
try:
assert section.ProtLigInter.HydrophobicResult
self.num_frames = len(section.ProtLigInter.HydrophobicResult)
for frame in section.ProtLigInter.HydrophobicResult:
# [[0 "_:PHE_223" L-FRAG_0 ] [0 "_:ALA_241" L-FRAG_0 ]]
for (frameno, prot, lig) in frame:
prot = prot.strip('\"')
(_, resid) = prot.split(":")
(resName, resSeq) = resid.split("_")
resSeq = int(resSeq)
if resSeq in self.Hydrophobic:
self.Hydrophobic[resSeq]['count'] += 1
else:
self.Hydrophobic[resSeq] = {'resName': resName, 'count':1 }
for resSeq in sorted(self.Hydrophobic):
fraction = float(self.Hydrophobic[resSeq]['count'])/self.num_frames
if verbose:
print(f"Hydrophobic {self.Hydrophobic[resSeq]['resName']}_{resSeq} {fraction:5.3f} {self.num_frames}")
except:
pass
try:
assert section.ProtLigInter.PolarResult
self.num_frames = len(section.ProtLigInter.PolarResult)
for frame in section.ProtLigInter.PolarResult:
# [[1 "_:GLU_216:OE2" b "L-FRAG_1:N3" 4.45 ]]
for (frameno, prot, _, lig, _) in frame:
prot = prot.strip('\"')
(_, resid, atom) = prot.split(":")
(resName, resSeq) = resid.split("_")
resSeq = int(resSeq)
if resSeq in self.Polar:
self.Polar[resSeq]['count'] += 1
else:
self.Polar[resSeq] = {'resName': resName, 'count':1 }
for resSeq in sorted(self.Polar):
fraction = float(self.Polar[resSeq]['count'])/self.num_frames
if verbose:
print(f"Polar {self.Polar[resSeq]['resName']}_{resSeq} {fraction:5.3f} {self.num_frames}")
except:
pass
try:
assert section.ProtLigInter.WaterBridgeResult
self.num_frames = len(section.ProtLigInter.WaterBridgeResult)
for frame in section.ProtLigInter.WaterBridgeResult:
# [[3 "_:GLU_216:OE2" a "L-FRAG_0:N2" a 2431 ]]
for (frameno, prot, _, lig, _, _) in frame:
prot = prot.strip('\"')
(_, resid, atom) = prot.split(":")
(resName, resSeq) = resid.split("_")
resSeq = int(resSeq)
if resSeq in self.WaterBridge:
self.WaterBridge[resSeq]['count'] += 1
else:
self.WaterBridge[resSeq] = {'resName': resName, 'count':1 }
for resSeq in sorted(self.WaterBridge):
fraction = float(self.WaterBridge[resSeq]['count'])/self.num_frames
if verbose:
print(f"WaterBridge {self.WaterBridge[resSeq]['resName']}_{resSeq} {fraction:5.3f} {self.num_frames}")
except:
pass
Attributes
HBond = {}
instance-attribute
HalogenBond = {}
instance-attribute
Hydrophobic = {}
instance-attribute
LigWat = {}
instance-attribute
Metal = {}
instance-attribute
PiCat = {}
instance-attribute
PiPi = {}
instance-attribute
Polar = {}
instance-attribute
WaterBridge = {}
instance-attribute
num_frames = len(section.ProtLigInter.WaterBridgeResult)
instance-attribute
Functions
__init__(filename, verbose=False)
Source code in src/desmondtools/event.py
def __init__(self, filename:Path | str, verbose:bool=False):
super().__init__(filename)
self.num_frames = 0
self.HBond = {}
self.Hydrophobic = {}
self.WaterBridge = {}
self.Polar = {}
self.HalogenBond = {}
self.LigWat = {}
self.Metal = {}
self.PiCat = {}
self.PiPi = {}
for section in self.dot.Keywords:
try:
assert section.ProtLigInter.HBondResult
self.num_frames = len(section.ProtLigInter.HBondResult)
for frame in section.ProtLigInter.HBondResult:
# [[3 "_:ARG_143:HH22" d-s "L-FRAG_0:N6" ]]
for (frameno, prot, hbond_type, lig) in frame:
prot = prot.strip('\"')
(_, resid, atom) = prot.split(":")
(resName, resSeq) = resid.split("_")
resSeq = int(resSeq)
if resSeq in self.HBond:
self.HBond[resSeq]['count'] += 1
else:
self.HBond[resSeq] = {'resName': resName, 'count':1 }
for resSeq in sorted(self.HBond):
fraction = float(self.HBond[resSeq]['count'])/self.num_frames
if verbose:
print(f"HBond {self.HBond[resSeq]['resName']}_{resSeq} {fraction:5.3f} {self.num_frames}")
except:
pass
try:
assert section.ProtLigInter.HydrophobicResult
self.num_frames = len(section.ProtLigInter.HydrophobicResult)
for frame in section.ProtLigInter.HydrophobicResult:
# [[0 "_:PHE_223" L-FRAG_0 ] [0 "_:ALA_241" L-FRAG_0 ]]
for (frameno, prot, lig) in frame:
prot = prot.strip('\"')
(_, resid) = prot.split(":")
(resName, resSeq) = resid.split("_")
resSeq = int(resSeq)
if resSeq in self.Hydrophobic:
self.Hydrophobic[resSeq]['count'] += 1
else:
self.Hydrophobic[resSeq] = {'resName': resName, 'count':1 }
for resSeq in sorted(self.Hydrophobic):
fraction = float(self.Hydrophobic[resSeq]['count'])/self.num_frames
if verbose:
print(f"Hydrophobic {self.Hydrophobic[resSeq]['resName']}_{resSeq} {fraction:5.3f} {self.num_frames}")
except:
pass
try:
assert section.ProtLigInter.PolarResult
self.num_frames = len(section.ProtLigInter.PolarResult)
for frame in section.ProtLigInter.PolarResult:
# [[1 "_:GLU_216:OE2" b "L-FRAG_1:N3" 4.45 ]]
for (frameno, prot, _, lig, _) in frame:
prot = prot.strip('\"')
(_, resid, atom) = prot.split(":")
(resName, resSeq) = resid.split("_")
resSeq = int(resSeq)
if resSeq in self.Polar:
self.Polar[resSeq]['count'] += 1
else:
self.Polar[resSeq] = {'resName': resName, 'count':1 }
for resSeq in sorted(self.Polar):
fraction = float(self.Polar[resSeq]['count'])/self.num_frames
if verbose:
print(f"Polar {self.Polar[resSeq]['resName']}_{resSeq} {fraction:5.3f} {self.num_frames}")
except:
pass
try:
assert section.ProtLigInter.WaterBridgeResult
self.num_frames = len(section.ProtLigInter.WaterBridgeResult)
for frame in section.ProtLigInter.WaterBridgeResult:
# [[3 "_:GLU_216:OE2" a "L-FRAG_0:N2" a 2431 ]]
for (frameno, prot, _, lig, _, _) in frame:
prot = prot.strip('\"')
(_, resid, atom) = prot.split(":")
(resName, resSeq) = resid.split("_")
resSeq = int(resSeq)
if resSeq in self.WaterBridge:
self.WaterBridge[resSeq]['count'] += 1
else:
self.WaterBridge[resSeq] = {'resName': resName, 'count':1 }
for resSeq in sorted(self.WaterBridge):
fraction = float(self.WaterBridge[resSeq]['count'])/self.num_frames
if verbose:
print(f"WaterBridge {self.WaterBridge[resSeq]['resName']}_{resSeq} {fraction:5.3f} {self.num_frames}")
except:
pass
desmondtools.Maestro
Export maestro file
m_atom block from PDB
1 atom_index
2 i_m_mmod_type
3 r_m_x_coord
4 r_m_y_coord
5 r_m_z_coord
6 i_m_residue_number
7 s_m_mmod_res
8 s_m_chain_name
9 i_m_color
10 r_m_charge1
11 r_m_charge2
12 s_m_pdb_residue_name
13 s_m_pdb_atom_name
14 i_m_atomic_number
15 i_m_formal_charge
16 s_m_color_rgb
17 s_m_atom_name
18 i_m_secondary_structure
19 r_glide_flexr_altx1
20 r_glide_flexr_altx2
21 r_glide_flexr_alty1
22 r_glide_flexr_alty2
23 r_glide_flexr_altz1
24 r_glide_flexr_altz2
25 r_m_pdb_occupancy
26 r_m_pdb_tfactor
27 i_glide_flexr_naltpos
28 i_i_internal_atom_index
29 i_pdb_PDB_serial
30 i_pdb_seqres_index
31 b_glide_flexr_movable
32 r_m_alt_pdb_occupancy
33 r_m_alt_pdb_tfactor
34 r_m_alt_x_coord
35 r_m_alt_y_coord
36 r_m_alt_z_coord
37 r_pdb_alt_occupancy_A
38 r_pdb_alt_occupancy_B
39 r_pdb_alt_tfactor_A
40 r_pdb_alt_tfactor_B
41 r_pdb_alt_x_coord_A
42 r_pdb_alt_x_coord_B
43 r_pdb_alt_y_coord_A
44 r_pdb_alt_y_coord_B
45 r_pdb_alt_z_coord_A
46 r_pdb_alt_z_coord_B
47 i_m_pdb_convert_problem
48 i_pdb_alt_PDB_serial
49 i_pdb_alt_PDB_serial_A
50 i_pdb_alt_PDB_serial_B
51 s_pdb_altloc_chars
m_atom block from ligand
i_m_mmod_type
r_m_x_coord
r_m_y_coord
r_m_z_coord
i_m_residue_number
i_m_color
r_m_charge1
r_m_charge2
i_m_atomic_number
i_m_formal_charge
s_m_color_rgb
i_sd_original_parity
b_st_SpecifiedChirality
r_epik_H2O_pKa
r_epik_H2O_pKa_uncertainty
m_bond block
# First column is bond index #
i_m_from
i_m_to
i_m_order
i_sd_original_parity
r_glide_torcontrol_penalty
s_glide_torcontrol_name
Source code in src/desmondtools/maestro.py
class Maestro:
"""
Export maestro file
m_atom block from PDB
=====================
1 atom_index
2 i_m_mmod_type
3 r_m_x_coord
4 r_m_y_coord
5 r_m_z_coord
6 i_m_residue_number
7 s_m_mmod_res
8 s_m_chain_name
9 i_m_color
10 r_m_charge1
11 r_m_charge2
12 s_m_pdb_residue_name
13 s_m_pdb_atom_name
14 i_m_atomic_number
15 i_m_formal_charge
16 s_m_color_rgb
17 s_m_atom_name
18 i_m_secondary_structure
19 r_glide_flexr_altx1
20 r_glide_flexr_altx2
21 r_glide_flexr_alty1
22 r_glide_flexr_alty2
23 r_glide_flexr_altz1
24 r_glide_flexr_altz2
25 r_m_pdb_occupancy
26 r_m_pdb_tfactor
27 i_glide_flexr_naltpos
28 i_i_internal_atom_index
29 i_pdb_PDB_serial
30 i_pdb_seqres_index
31 b_glide_flexr_movable
32 r_m_alt_pdb_occupancy
33 r_m_alt_pdb_tfactor
34 r_m_alt_x_coord
35 r_m_alt_y_coord
36 r_m_alt_z_coord
37 r_pdb_alt_occupancy_A
38 r_pdb_alt_occupancy_B
39 r_pdb_alt_tfactor_A
40 r_pdb_alt_tfactor_B
41 r_pdb_alt_x_coord_A
42 r_pdb_alt_x_coord_B
43 r_pdb_alt_y_coord_A
44 r_pdb_alt_y_coord_B
45 r_pdb_alt_z_coord_A
46 r_pdb_alt_z_coord_B
47 i_m_pdb_convert_problem
48 i_pdb_alt_PDB_serial
49 i_pdb_alt_PDB_serial_A
50 i_pdb_alt_PDB_serial_B
51 s_pdb_altloc_chars
m_atom block from ligand
========================
i_m_mmod_type
r_m_x_coord
r_m_y_coord
r_m_z_coord
i_m_residue_number
i_m_color
r_m_charge1
r_m_charge2
i_m_atomic_number
i_m_formal_charge
s_m_color_rgb
i_sd_original_parity
b_st_SpecifiedChirality
r_epik_H2O_pKa
r_epik_H2O_pKa_uncertainty
m_bond block
============
# First column is bond index #
i_m_from
i_m_to
i_m_order
i_sd_original_parity
r_glide_torcontrol_penalty
s_glide_torcontrol_name
"""
def __init__(self, filename:str, max_two_entries:bool=True) -> None:
""" initialize """
if not Path(filename).exists:
print(f"Error file not found: {filename}")
sys.exit(1)
self.filename = filename # ex: ./dir/a.maegz ./dir/a.mae.gz
self.filename_prefix = None # ex: ./dir/a ./dir/a
self.filename_stem = None # ex: a a
self.lines = None
self.entries = None
self.max_two_entries = max_two_entries
self.title = ""
self.serial = 0
self.usedId = {}
self.mmcif = {"_entry": None, "_chem_comp_bond": None, "_atom_site": None}
self.iamap = {}
@staticmethod
def getFromDict(dataDict, mapList):
return reduce(operator.getitem, mapList, dataDict)
@staticmethod
def setInDict(dataDict, mapList, value):
if mapList:
Maestro.getFromDict(dataDict, mapList[:-1])[mapList[-1]] = value
else:
for k,v in value.items():
dataDict[k] = v
def parse_impl(self, f:TextIO) -> None:
"""Parse Maestro file to a list of dictionaries"""
self.entries = []
count = re.compile(r'(\w+)\[(\d+)\]')
tokens = shlex.split(f.read())
level = []
data = {}
previous_token = None
header = False
extra_column = 0
num_repeat = 1
skip = False
for token in tokens :
if token == "#" :
skip = not skip # invert
continue
elif skip:
continue
elif token == "{" :
header = True
key = []
val = []
arr = []
if previous_token:
if previous_token == "f_m_ct" and data:
self.entries.append(data)
data = {}
try:
(block, num_repeat) = count.findall(previous_token)[0]
num_repeat = int(num_repeat)
extra_column = 1
except:
block = previous_token
num_repeat = 1
extra_column = 0
level.append(block)
elif token == "}":
if level:
level.pop()
elif token == ":::":
header = False
elif header:
key.append(token)
else:
val.append(token)
# only store f_m_ct blocks (level != [])
if len(val) == (len(key)+extra_column) and level :
arr.append(val[extra_column:])
val = []
if len(arr) == num_repeat:
if len(arr) == 1:
Maestro.setInDict(data, level, dict(zip(key, arr[0])))
else:
T = list(zip(*arr)) # transpose
Maestro.setInDict(data, level, dict(zip(key, T)))
previous_token = token
if data:
self.entries.append(data)
def parse(self) -> None:
"""Parse a Maestro file
set self.lines, self.filename_prefix, self.filename_stem
"""
if self.filename.endswith('.maegz'):
self.filename_prefix = self.filename.replace(".maegz", "")
with gzip.open(self.filename, 'rt') as f:
self.parse_impl(f)
elif self.filename.endswith('.mae.gz'):
self.filename_prefix = self.filename.replace(".mae.gz", "")
with gzip.open(self.filename, "rt") as f:
self.parse_impl(f)
elif self.filename.endswith('.mae'):
self.filename_prefix = self.filename.replace(".mae", "")
with open(self.filename, "rt") as f:
self.parse_impl(f)
else:
print("Error : .mae, .mae.gz, or .maegz file is expected")
sys.exit(1)
self.filename_stem = Path(self.filename_prefix).stem
def read_lines(self) -> None:
"""Read a Maestro file
set self.lines, self.filename_prefix, self.filename_stem
"""
if self.filename.endswith('.maegz'):
self.filename_prefix = self.filename.replace(".maegz", "")
with gzip.open(self.filename, 'rt') as f:
self.lines = f.readlines()
elif self.filename.endswith('.mae.gz'):
self.filename_prefix = self.filename.replace(".mae.gz", "")
with gzip.open(self.filename, "rt") as f:
self.lines = f.readlines()
elif self.filename.endswith('.mae'):
self.filename_prefix = self.filename.replace(".mae", "")
with open(self.filename, "rt") as f:
self.lines = f.readlines()
else:
print("Error : .mae, .mae.gz, or .maegz file is expected")
sys.exit(1)
self.filename_stem = Path(self.filename_prefix).stem
def append_entry(self, title):
"""Start a new entry."""
if self.mmcif["_entry"]:
if isinstance(self.mmcif["_entry"]["id"], list):
# you got here as the third and later entry
if self.max_two_entries:
# write out last two entries
self.write_mmcif()
# return to the state in which only the first entry was defined
self.mmcif = copy.deepcopy(self.mmcif_first)
self.usedId = copy.deepcopy(self.mmcif_first_usedId)
self.serial = self.mmcif_first_serial
# regardless of its appearance, current entry now is regarded as the second entry
self.mmcif["_entry"]["id"] = [ self.mmcif["_entry"]["id"][0], title ]
else:
# otherwise continue to add entry
self.mmcif["_entry"]["id"].append(title)
else:
# you got here as the second entry
# save the first entry before adding the second entry
self.mmcif_first = copy.deepcopy(self.mmcif)
self.mmcif_first_usedId = copy.deepcopy(self.usedId)
self.mmcif_first_serial = self.serial
# change to a list type
self.mmcif["_entry"]["id"] = [ self.mmcif["_entry"]["id"], title ]
else:
# regular string type
self.mmcif["_entry"] = { "id" : title }
def append_atom_site(self,
group_PDB, id,
type_symbol,
label_atom_id,
label_alt_id,
label_comp_id,
label_asym_id,
label_entity_id,
label_seq_id,
pdbx_PDB_ins_code,
Cartn_x,
Cartn_y,
Cartn_z,
occupancy,
B_iso_or_equiv,
pdbx_formal_charge,
auth_seq_id,
auth_comp_id,
auth_asym_id,
auth_atom_id,
pdbx_PDB_model_num):
"""Add mmcif _atom_site """
if self.mmcif["_atom_site"] :
self.mmcif["_atom_site"]["group_PDB"].append(group_PDB)
self.mmcif["_atom_site"]["id"].append(id)
self.mmcif["_atom_site"]["type_symbol"].append(type_symbol)
self.mmcif["_atom_site"]["label_atom_id"].append(label_atom_id)
self.mmcif["_atom_site"]["label_alt_id"].append(label_alt_id)
self.mmcif["_atom_site"]["label_comp_id"].append(label_comp_id)
self.mmcif["_atom_site"]["label_asym_id"].append(label_asym_id)
self.mmcif["_atom_site"]["label_entity_id"].append(label_entity_id)
self.mmcif["_atom_site"]["label_seq_id"].append(label_seq_id)
self.mmcif["_atom_site"]["pdbx_PDB_ins_code"].append(pdbx_PDB_ins_code)
self.mmcif["_atom_site"]["Cartn_x"].append(Cartn_x)
self.mmcif["_atom_site"]["Cartn_y"].append(Cartn_y)
self.mmcif["_atom_site"]["Cartn_z"].append(Cartn_z)
self.mmcif["_atom_site"]["occupancy"].append(occupancy)
self.mmcif["_atom_site"]["B_iso_or_equiv"].append(B_iso_or_equiv)
self.mmcif["_atom_site"]["pdbx_formal_charge"].append(pdbx_formal_charge)
self.mmcif["_atom_site"]["auth_seq_id"].append(auth_seq_id)
self.mmcif["_atom_site"]["auth_comp_id"].append(auth_comp_id)
self.mmcif["_atom_site"]["auth_asym_id"].append(auth_asym_id)
self.mmcif["_atom_site"]["auth_atom_id"].append(auth_atom_id)
self.mmcif["_atom_site"]["pdbx_PDB_model_num"].append(pdbx_PDB_model_num)
else:
self.mmcif["_atom_site"] = {
"group_PDB" : [ group_PDB ],
"id" : [ id ],
"type_symbol" : [ type_symbol ],
"label_atom_id" : [ label_atom_id ],
"label_alt_id" : [ label_alt_id ],
"label_comp_id" : [ label_comp_id ],
"label_asym_id" : [ label_asym_id ],
"label_entity_id" : [ label_entity_id ],
"label_seq_id" : [ label_seq_id ],
"pdbx_PDB_ins_code" : [ pdbx_PDB_ins_code ],
"Cartn_x" : [ Cartn_x ],
"Cartn_y" : [ Cartn_y ],
"Cartn_z" : [ Cartn_z ],
"occupancy" : [ occupancy ],
"B_iso_or_equiv" : [ B_iso_or_equiv ],
"pdbx_formal_charge" : [ pdbx_formal_charge ],
"auth_seq_id" : [ auth_seq_id ],
"auth_comp_id" : [ auth_comp_id ],
"auth_asym_id" : [ auth_asym_id ],
"auth_atom_id" : [ auth_atom_id ],
"pdbx_PDB_model_num" : [ pdbx_PDB_model_num ],
}
def append_chem_comp_bond(self, chem_comp_id, i, j, k):
""" add chem_comp_bond """
p = self.mmcif["_atom_site"]["label_atom_id"][self.mmcif["_atom_site"]["id"].index(i)]
q = self.mmcif["_atom_site"]["label_atom_id"][self.mmcif["_atom_site"]["id"].index(j)]
if self.mmcif["_chem_comp_bond"]:
self.mmcif["_chem_comp_bond"]["comp_id"].append(chem_comp_id)
self.mmcif["_chem_comp_bond"]["atom_id_1"].append(p)
self.mmcif["_chem_comp_bond"]["atom_id_2"].append(q)
self.mmcif["_chem_comp_bond"]["value_order"].append(k)
else:
self.mmcif["_chem_comp_bond"] = {
"comp_id" : [ chem_comp_id ],
"atom_id_1" : [ p ],
"atom_id_2" : [ q ],
"value_order" : [ k ],
}
def new_chem_comp(self):
""" start a new chem_comp entry """
if not type(self.mmcif["_entry"]["id"]) == list or len(self.mmcif["_entry"]["id"]) <= 2:
self.chem_comp_id = "LIG"
else:
self.chem_comp_id = ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(3))
self.chem_comp_chainId = [ c for c in string.ascii_uppercase if not (c in self.usedId)][0]
self.chem_comp_resSeq = 100
self.usedId[self.chem_comp_chainId] = []
return self.chem_comp_chainId, self.chem_comp_resSeq, self.chem_comp_id
def new_chem_comp_atom(self, obj, element):
""" create and return unique atom name """
if "s_m_pdb_atom_name" in obj:
atom_name = obj["s_m_pdb_atom_name"].strip()
else:
atom_name = "{}{}".format(element,1)
if not (atom_name in self.usedId[self.chem_comp_chainId]):
self.usedId[self.chem_comp_chainId].append(atom_name)
return atom_name
else:
for i in range(1,10000):
new_name = "{}{}".format(element,i)
if not (new_name in self.usedId[self.chem_comp_chainId]):
self.usedId[self.chem_comp_chainId].append(new_name)
return new_name
def to_mmcif(self, **kwargs):
token = None
entity_id = 0
for line in self.lines:
line = line.strip()
if not line or line.startswith("#"):
continue
# f_m_ct block
if line.startswith("f_m_ct {"):
token = 'f_m_ct_key'
natoms = 0
nbonds = 0
entity_id += 1
k = []
continue
if token == 'f_m_ct_key':
if line.startswith(":::"): # end of f_m_ct key block
n = len(k)
token = 'f_m_ct_val'
v = []
continue
k.append(line)
if token == 'f_m_ct_val':
v.append(line)
if len(v) == n:
data = dict(zip(k,v))
if "s_lp_Variant" in data:
self.title = data['s_lp_Variant'].replace('"','').replace("'","")
else:
self.title = data['s_m_title'].replace('"','').replace("'","")
self.append_entry(self.title)
token = None
# m_atom block
if line.startswith("m_atom") :
natoms = int(Maestro.count.findall(line)[0])
token = 'm_atom_key'
k = ['i_atom_index'] # 1st column is atom_index
continue
if token == 'm_atom_key':
if line.startswith(":::"):
token = 'm_atom_val'
nv = 0
m_atom = {}
m_atom_resName = {}
continue
k.append(line)
if token == 'm_atom_val':
v = ['' if x=="<>" else x for x in shlex.split(line)]
data = dict(zip(k,v))
# build hierarchical structure of chain/ resSeq/ resName
chainId = get_value_or_default(data,"s_m_chain_name", "A")
resSeq = int(get_value_or_default(data, "i_m_residue_number", "1"))
resName = get_value_or_default(data, "s_m_pdb_residue_name", "?")
if resName in std_rename:
resName = std_rename[resName]
if not chainId in m_atom:
m_atom[chainId] = {}
if not resSeq in m_atom[chainId]:
m_atom[chainId][resSeq] = {}
if not resName in m_atom[chainId][resSeq]:
m_atom[chainId][resSeq][resName] = []
m_atom[chainId][resSeq][resName].append(data)
m_atom_resName[int(data["i_atom_index"])] = resName
nv += 1
if nv == natoms: # end of m_atom block
token = None
# m_bond block
if line.startswith("m_bond") :
nbonds = int(Maestro.count.findall(line)[0])
token = 'm_bond_key'
k = ['i_bond_index']
continue
if token == 'm_bond_key':
if line.startswith(":::"): # end of m_bond key block
token = 'm_bond_val'
nv = 0
m_bond = {}
continue
k.append(line)
if token == 'm_bond_val':
v = ['' if x=="<>" else x for x in shlex.split(line)]
data = dict(zip(k,v))
# build bond connectivity
p = int(data["i_m_from"])
q = int(data["i_m_to"])
r = int(data["i_m_order"])
if p in m_bond:
m_bond[p][q] = r
else:
m_bond[p] = {q:r}
if q in m_bond:
m_bond[q][p] = r
else:
m_bond[q] = {p:r}
nv += 1
if nv == nbonds: # m_bond block ends
token = None
# create mmcif _atom_site
for chainId_ in sorted(m_atom):
for resSeq_ in sorted(m_atom[chainId_]):
for resName_ in sorted(m_atom[chainId_][resSeq_]):
if resName_ in std_residues:
group_PDB = "ATOM"
chainId, resSeq, resName = chainId_, resSeq_, resName_
else:
group_PDB = "HETATM"
# check if new_chem_comp is necessary
this_residue = [ int(d["i_atom_index"]) for d in m_atom[chainId_][resSeq_][resName_] ]
need_new_chem_comp = True
for p_ in this_residue:
for q_, bond_order in m_bond[p_].items():
if (not (q_ in this_residue)) and \
(not (m_atom_resName[q_] in std_residues)) and \
(q_ in self.iamap):
# do not create another chem_comp
need_new_chem_comp = False
q, chainId, resSeq, resName = self.iamap[q_]
break
if need_new_chem_comp:
chainId, resSeq, resName = self.new_chem_comp()
# go through all atoms within the same residue
for d in m_atom[chainId_][resSeq_][resName_]:
self.serial += 1
self.iamap[int(d["i_atom_index"])] = (self.serial, chainId, resSeq, resName)
element = atomic_symbol[int(d["i_m_atomic_number"])]
if group_PDB == "ATOM":
name = get_value_or_default(d,"s_m_pdb_atom_name", "?")
else:
name = self.new_chem_comp_atom(d, element)
x = float(d["r_m_x_coord"])
y = float(d["r_m_y_coord"])
z = float(d["r_m_z_coord"])
occupancy = float(get_value_or_default(d,"r_m_pdb_occupancy","1"))
bfactor = float(get_value_or_default(d,"r_m_pdb_tfactor", "0"))
formal_charge = int(get_value_or_default(d,"i_m_formal_charge", "0"))
self.append_atom_site(
group_PDB = group_PDB,
id = self.serial,
type_symbol = element,
label_atom_id = name,
label_alt_id = ".",
label_comp_id = resName,
label_asym_id = chainId,
label_entity_id = entity_id,
label_seq_id = resSeq,
pdbx_PDB_ins_code = "?",
Cartn_x = x,
Cartn_y = y,
Cartn_z = z,
occupancy = occupancy,
B_iso_or_equiv = bfactor,
pdbx_formal_charge = formal_charge,
auth_seq_id = resSeq,
auth_comp_id = resName,
auth_asym_id = chainId,
auth_atom_id = name,
pdbx_PDB_model_num = 1,
)
# create mmcif _chem_comp_bond
for chainId_ in sorted(m_atom):
for resSeq_ in sorted(m_atom[chainId_]):
for resName_ in sorted(m_atom[chainId_][resSeq_]):
if not (resName_ in std_residues): # HETATM
for d in m_atom[chainId_][resSeq_][resName_]:
p_ = int(d["i_atom_index"])
p, p_chainId, p_resSeq, p_resName = self.iamap[p_]
if p_ in m_bond:
for q_, bond_order in m_bond[p_].items():
q, q_chainId, q_resSeq, q_resName = self.iamap[q_]
self.append_chem_comp_bond(p_resName, p, q, bond_order)
cifo = CifFileWriter("{}.cif".format(self.filename_stem))
# clean up undefined dictionary
# force to copy a list to avoid
# RuntimeError: dictionary changed size during iteration
for k in list(self.mmcif):
if not self.mmcif[k]:
del self.mmcif[k]
cifo.write({ "desmondtools" : self.mmcif })
def export2(self, **kwargs) -> None:
"""Export to PDB/mmCIF"""
pdb_format = kwargs.get("pdb", False)
cif_format = kwargs.get("cif", False)
skip_first = kwargs.get("skip_first", False)
only_first = kwargs.get("only_first", False)
as_complex = kwargs.get("as_complex", False)
separately = kwargs.get("separately", False)
names = kwargs.get("names", [])
if names:
# if `--names` is used, they are separately saved.
separately = True
if as_complex:
# if `--as-complex` is used, `skip_first` and `only_first` is disabled.
skip_first = False
only_first = False
self.parse()
exported = []
for i, data in enumerate(self.entries, start=1):
if ((i == 1) and (not skip_first)) or ((i > 1) and (not only_first)):
entry = dict_to_simplenamespace(data)
if (not names) or (names and entry.f_m_ct.s_m_title in names):
exported.append(SimpleNamespace(number=i, title=entry.f_m_ct.s_m_title, PDB=to_pdb_str(entry)))
if as_complex:
for x in exported:
title = safe_filename(x.title)
with open(f"{self.filename_stem}_{title}.pdb", "w") as f:
f.write(exported[0].PDB)
f.write(x.PDB)
f.write("END\n")
elif separately:
for x in exported:
title = safe_filename(x.title)
with open(f"{self.filename_stem}_{title}.pdb", "w") as f:
f.write(x.PDB)
f.write("END\n")
else:
with open(f"{self.filename_stem}.pdb", "w") as f:
for x in exported:
f.write(PDB_MODEL_FORMAT(x.number) + "\n")
f.write(x.PDB)
f.write("ENDMDL\n")
f.write("END\n")
def export(self, **kwargs) -> None:
"""Export to PDB/mmCIF"""
pdb_format = kwargs.get("pdb", False)
cif_format = kwargs.get("cif", False)
skip_first = kwargs.get("skip_first", False)
only_first = kwargs.get("only_first", False)
as_complex = kwargs.get("as_complex", False)
separately = kwargs.get("separately", False)
names = kwargs.get("names", [])
if names:
# if `--names` is used, they are separately saved.
separately = True
if as_complex:
# if `--as-complex` is used, `skip_first` and `only_first` is disabled.
skip_first = False
only_first = False
entries = []
entry_number = 0
q = None
count = re.compile(r'\[(\d+)\]')
self.read_lines()
for line in self.lines:
line = line.strip()
if not line or line.startswith("#"):
continue
if line.startswith("f_m_ct {"): # new entry
q = 'f_m_ct_key'
k = []
if entry_number > 0 and entry.PDB:
if (not names) or (names and entry.title in names):
entries.append(entry)
entry_number += 1
entry = SimpleNamespace(number=entry_number, title="", PDB="", natoms=0, nbonds=0, data=None)
continue
if q == 'f_m_ct_key' and line.startswith(":::"):
n = len(k)
q = 'f_m_ct_val'
vm = []
va = []
vb = []
continue
if q == 'f_m_ct_key':
k.append(line)
if q == 'f_m_ct_val':
vm.append(line)
if len(vm) == n:
data = dict(zip(k, vm))
q = None
entry.title = data['s_m_title'].replace('"','').replace("'","")
entry.data = data
if line.startswith("m_atom") :
entry.natoms = int(count.findall(line)[0])
q = 'm_atom_key'
k = ['atom_index']
continue
if q == 'm_atom_key' and line.startswith(":::"):
q = 'm_atom_val'
nv = 0
continue
if q == 'm_atom_key':
k.append(line)
if q == 'm_atom_val':
c = ['' if x=="<>" else x for x in shlex.split(line)]
data = dict(zip(k, c))
nv += 1
va.append(data)
if nv == entry.natoms:
q = None
if ((entry_number == 1) and (not skip_first)) or ((entry_number > 1) and (not only_first)):
entry.PDB = PDB_ATOM_FORMAT(va)
if line.startswith("m_bond") :
entry.nbonds = int(count.findall(line)[0])
q = 'm_bond_key'
k = ['bond_index']
continue
if q == 'm_bond_key' and line.startswith(":::"):
q = 'm_bond_val'
nv = 0
continue
if q == 'm_bond_key':
k.append(line)
if q == 'm_bond_val':
c = ['' if x=="<>" else x for x in shlex.split(line)]
data = dict(zip(k,c))
# conect information only for ligands
if not 'i_glide_grid_version' in entry.data :
vb.append(data)
nv += 1
if nv == entry.nbonds:
q = None
if vb:
if ((entry_number == 1) and (not skip_first)) or ((entry_number > 1) and (not only_first)):
entry.PDB += PDB_CONECT_FORMAT(vb)
if cif_format:
for entry in entries:
pdb_strings = entry.PDB
with StringIO(pdb_strings) as pdb:
parser = PDBParser()
structure = parser.get_structure(entry.title, pdb)
mmcif_io = MMCIFIO()
mmcif_io.set_structure(structure)
mmcif_io.save(f"{self.filename_stem}.cif")
elif pdb_format:
if as_complex:
for entry in entries:
title = safe_filename(entry.title)
with open(f"{self.filename_stem}_{title}.pdb", "w") as f:
f.write(PDB_MODEL_FORMAT(entries[0].number) + "\n")
f.write(entries[0].PDB)
f.write("ENDMDL\n")
f.write(entry.PDB)
f.write("ENDMDL\n")
f.write("END\n")
elif separately:
for entry in entries:
title = safe_filename(entry.title)
with open(f"{self.filename_stem}_{title}.pdb", "w") as f:
f.write(entry.PDB)
f.write("END\n")
else:
with open(f"{self.filename_stem}.pdb", "w") as f:
for entry in entries:
f.write(PDB_MODEL_FORMAT(entry.number) + "\n")
f.write(entry.PDB)
f.write("ENDMDL\n")
f.write("END\n")
Attributes
entries = None
instance-attribute
filename = filename
instance-attribute
filename_prefix = None
instance-attribute
filename_stem = None
instance-attribute
iamap = {}
instance-attribute
lines = None
instance-attribute
max_two_entries = max_two_entries
instance-attribute
mmcif = {'_entry': None, '_chem_comp_bond': None, '_atom_site': None}
instance-attribute
serial = 0
instance-attribute
title = ''
instance-attribute
usedId = {}
instance-attribute
Functions
__init__(filename, max_two_entries=True)
initialize
Source code in src/desmondtools/maestro.py
def __init__(self, filename:str, max_two_entries:bool=True) -> None:
""" initialize """
if not Path(filename).exists:
print(f"Error file not found: {filename}")
sys.exit(1)
self.filename = filename # ex: ./dir/a.maegz ./dir/a.mae.gz
self.filename_prefix = None # ex: ./dir/a ./dir/a
self.filename_stem = None # ex: a a
self.lines = None
self.entries = None
self.max_two_entries = max_two_entries
self.title = ""
self.serial = 0
self.usedId = {}
self.mmcif = {"_entry": None, "_chem_comp_bond": None, "_atom_site": None}
self.iamap = {}
append_atom_site(group_PDB, id, type_symbol, label_atom_id, label_alt_id, label_comp_id, label_asym_id, label_entity_id, label_seq_id, pdbx_PDB_ins_code, Cartn_x, Cartn_y, Cartn_z, occupancy, B_iso_or_equiv, pdbx_formal_charge, auth_seq_id, auth_comp_id, auth_asym_id, auth_atom_id, pdbx_PDB_model_num)
Add mmcif _atom_site
Source code in src/desmondtools/maestro.py
def append_atom_site(self,
group_PDB, id,
type_symbol,
label_atom_id,
label_alt_id,
label_comp_id,
label_asym_id,
label_entity_id,
label_seq_id,
pdbx_PDB_ins_code,
Cartn_x,
Cartn_y,
Cartn_z,
occupancy,
B_iso_or_equiv,
pdbx_formal_charge,
auth_seq_id,
auth_comp_id,
auth_asym_id,
auth_atom_id,
pdbx_PDB_model_num):
"""Add mmcif _atom_site """
if self.mmcif["_atom_site"] :
self.mmcif["_atom_site"]["group_PDB"].append(group_PDB)
self.mmcif["_atom_site"]["id"].append(id)
self.mmcif["_atom_site"]["type_symbol"].append(type_symbol)
self.mmcif["_atom_site"]["label_atom_id"].append(label_atom_id)
self.mmcif["_atom_site"]["label_alt_id"].append(label_alt_id)
self.mmcif["_atom_site"]["label_comp_id"].append(label_comp_id)
self.mmcif["_atom_site"]["label_asym_id"].append(label_asym_id)
self.mmcif["_atom_site"]["label_entity_id"].append(label_entity_id)
self.mmcif["_atom_site"]["label_seq_id"].append(label_seq_id)
self.mmcif["_atom_site"]["pdbx_PDB_ins_code"].append(pdbx_PDB_ins_code)
self.mmcif["_atom_site"]["Cartn_x"].append(Cartn_x)
self.mmcif["_atom_site"]["Cartn_y"].append(Cartn_y)
self.mmcif["_atom_site"]["Cartn_z"].append(Cartn_z)
self.mmcif["_atom_site"]["occupancy"].append(occupancy)
self.mmcif["_atom_site"]["B_iso_or_equiv"].append(B_iso_or_equiv)
self.mmcif["_atom_site"]["pdbx_formal_charge"].append(pdbx_formal_charge)
self.mmcif["_atom_site"]["auth_seq_id"].append(auth_seq_id)
self.mmcif["_atom_site"]["auth_comp_id"].append(auth_comp_id)
self.mmcif["_atom_site"]["auth_asym_id"].append(auth_asym_id)
self.mmcif["_atom_site"]["auth_atom_id"].append(auth_atom_id)
self.mmcif["_atom_site"]["pdbx_PDB_model_num"].append(pdbx_PDB_model_num)
else:
self.mmcif["_atom_site"] = {
"group_PDB" : [ group_PDB ],
"id" : [ id ],
"type_symbol" : [ type_symbol ],
"label_atom_id" : [ label_atom_id ],
"label_alt_id" : [ label_alt_id ],
"label_comp_id" : [ label_comp_id ],
"label_asym_id" : [ label_asym_id ],
"label_entity_id" : [ label_entity_id ],
"label_seq_id" : [ label_seq_id ],
"pdbx_PDB_ins_code" : [ pdbx_PDB_ins_code ],
"Cartn_x" : [ Cartn_x ],
"Cartn_y" : [ Cartn_y ],
"Cartn_z" : [ Cartn_z ],
"occupancy" : [ occupancy ],
"B_iso_or_equiv" : [ B_iso_or_equiv ],
"pdbx_formal_charge" : [ pdbx_formal_charge ],
"auth_seq_id" : [ auth_seq_id ],
"auth_comp_id" : [ auth_comp_id ],
"auth_asym_id" : [ auth_asym_id ],
"auth_atom_id" : [ auth_atom_id ],
"pdbx_PDB_model_num" : [ pdbx_PDB_model_num ],
}
append_chem_comp_bond(chem_comp_id, i, j, k)
add chem_comp_bond
Source code in src/desmondtools/maestro.py
def append_chem_comp_bond(self, chem_comp_id, i, j, k):
""" add chem_comp_bond """
p = self.mmcif["_atom_site"]["label_atom_id"][self.mmcif["_atom_site"]["id"].index(i)]
q = self.mmcif["_atom_site"]["label_atom_id"][self.mmcif["_atom_site"]["id"].index(j)]
if self.mmcif["_chem_comp_bond"]:
self.mmcif["_chem_comp_bond"]["comp_id"].append(chem_comp_id)
self.mmcif["_chem_comp_bond"]["atom_id_1"].append(p)
self.mmcif["_chem_comp_bond"]["atom_id_2"].append(q)
self.mmcif["_chem_comp_bond"]["value_order"].append(k)
else:
self.mmcif["_chem_comp_bond"] = {
"comp_id" : [ chem_comp_id ],
"atom_id_1" : [ p ],
"atom_id_2" : [ q ],
"value_order" : [ k ],
}
append_entry(title)
Start a new entry.
Source code in src/desmondtools/maestro.py
def append_entry(self, title):
"""Start a new entry."""
if self.mmcif["_entry"]:
if isinstance(self.mmcif["_entry"]["id"], list):
# you got here as the third and later entry
if self.max_two_entries:
# write out last two entries
self.write_mmcif()
# return to the state in which only the first entry was defined
self.mmcif = copy.deepcopy(self.mmcif_first)
self.usedId = copy.deepcopy(self.mmcif_first_usedId)
self.serial = self.mmcif_first_serial
# regardless of its appearance, current entry now is regarded as the second entry
self.mmcif["_entry"]["id"] = [ self.mmcif["_entry"]["id"][0], title ]
else:
# otherwise continue to add entry
self.mmcif["_entry"]["id"].append(title)
else:
# you got here as the second entry
# save the first entry before adding the second entry
self.mmcif_first = copy.deepcopy(self.mmcif)
self.mmcif_first_usedId = copy.deepcopy(self.usedId)
self.mmcif_first_serial = self.serial
# change to a list type
self.mmcif["_entry"]["id"] = [ self.mmcif["_entry"]["id"], title ]
else:
# regular string type
self.mmcif["_entry"] = { "id" : title }
export(**kwargs)
Export to PDB/mmCIF
Source code in src/desmondtools/maestro.py
def export(self, **kwargs) -> None:
"""Export to PDB/mmCIF"""
pdb_format = kwargs.get("pdb", False)
cif_format = kwargs.get("cif", False)
skip_first = kwargs.get("skip_first", False)
only_first = kwargs.get("only_first", False)
as_complex = kwargs.get("as_complex", False)
separately = kwargs.get("separately", False)
names = kwargs.get("names", [])
if names:
# if `--names` is used, they are separately saved.
separately = True
if as_complex:
# if `--as-complex` is used, `skip_first` and `only_first` is disabled.
skip_first = False
only_first = False
entries = []
entry_number = 0
q = None
count = re.compile(r'\[(\d+)\]')
self.read_lines()
for line in self.lines:
line = line.strip()
if not line or line.startswith("#"):
continue
if line.startswith("f_m_ct {"): # new entry
q = 'f_m_ct_key'
k = []
if entry_number > 0 and entry.PDB:
if (not names) or (names and entry.title in names):
entries.append(entry)
entry_number += 1
entry = SimpleNamespace(number=entry_number, title="", PDB="", natoms=0, nbonds=0, data=None)
continue
if q == 'f_m_ct_key' and line.startswith(":::"):
n = len(k)
q = 'f_m_ct_val'
vm = []
va = []
vb = []
continue
if q == 'f_m_ct_key':
k.append(line)
if q == 'f_m_ct_val':
vm.append(line)
if len(vm) == n:
data = dict(zip(k, vm))
q = None
entry.title = data['s_m_title'].replace('"','').replace("'","")
entry.data = data
if line.startswith("m_atom") :
entry.natoms = int(count.findall(line)[0])
q = 'm_atom_key'
k = ['atom_index']
continue
if q == 'm_atom_key' and line.startswith(":::"):
q = 'm_atom_val'
nv = 0
continue
if q == 'm_atom_key':
k.append(line)
if q == 'm_atom_val':
c = ['' if x=="<>" else x for x in shlex.split(line)]
data = dict(zip(k, c))
nv += 1
va.append(data)
if nv == entry.natoms:
q = None
if ((entry_number == 1) and (not skip_first)) or ((entry_number > 1) and (not only_first)):
entry.PDB = PDB_ATOM_FORMAT(va)
if line.startswith("m_bond") :
entry.nbonds = int(count.findall(line)[0])
q = 'm_bond_key'
k = ['bond_index']
continue
if q == 'm_bond_key' and line.startswith(":::"):
q = 'm_bond_val'
nv = 0
continue
if q == 'm_bond_key':
k.append(line)
if q == 'm_bond_val':
c = ['' if x=="<>" else x for x in shlex.split(line)]
data = dict(zip(k,c))
# conect information only for ligands
if not 'i_glide_grid_version' in entry.data :
vb.append(data)
nv += 1
if nv == entry.nbonds:
q = None
if vb:
if ((entry_number == 1) and (not skip_first)) or ((entry_number > 1) and (not only_first)):
entry.PDB += PDB_CONECT_FORMAT(vb)
if cif_format:
for entry in entries:
pdb_strings = entry.PDB
with StringIO(pdb_strings) as pdb:
parser = PDBParser()
structure = parser.get_structure(entry.title, pdb)
mmcif_io = MMCIFIO()
mmcif_io.set_structure(structure)
mmcif_io.save(f"{self.filename_stem}.cif")
elif pdb_format:
if as_complex:
for entry in entries:
title = safe_filename(entry.title)
with open(f"{self.filename_stem}_{title}.pdb", "w") as f:
f.write(PDB_MODEL_FORMAT(entries[0].number) + "\n")
f.write(entries[0].PDB)
f.write("ENDMDL\n")
f.write(entry.PDB)
f.write("ENDMDL\n")
f.write("END\n")
elif separately:
for entry in entries:
title = safe_filename(entry.title)
with open(f"{self.filename_stem}_{title}.pdb", "w") as f:
f.write(entry.PDB)
f.write("END\n")
else:
with open(f"{self.filename_stem}.pdb", "w") as f:
for entry in entries:
f.write(PDB_MODEL_FORMAT(entry.number) + "\n")
f.write(entry.PDB)
f.write("ENDMDL\n")
f.write("END\n")
export2(**kwargs)
Export to PDB/mmCIF
Source code in src/desmondtools/maestro.py
def export2(self, **kwargs) -> None:
"""Export to PDB/mmCIF"""
pdb_format = kwargs.get("pdb", False)
cif_format = kwargs.get("cif", False)
skip_first = kwargs.get("skip_first", False)
only_first = kwargs.get("only_first", False)
as_complex = kwargs.get("as_complex", False)
separately = kwargs.get("separately", False)
names = kwargs.get("names", [])
if names:
# if `--names` is used, they are separately saved.
separately = True
if as_complex:
# if `--as-complex` is used, `skip_first` and `only_first` is disabled.
skip_first = False
only_first = False
self.parse()
exported = []
for i, data in enumerate(self.entries, start=1):
if ((i == 1) and (not skip_first)) or ((i > 1) and (not only_first)):
entry = dict_to_simplenamespace(data)
if (not names) or (names and entry.f_m_ct.s_m_title in names):
exported.append(SimpleNamespace(number=i, title=entry.f_m_ct.s_m_title, PDB=to_pdb_str(entry)))
if as_complex:
for x in exported:
title = safe_filename(x.title)
with open(f"{self.filename_stem}_{title}.pdb", "w") as f:
f.write(exported[0].PDB)
f.write(x.PDB)
f.write("END\n")
elif separately:
for x in exported:
title = safe_filename(x.title)
with open(f"{self.filename_stem}_{title}.pdb", "w") as f:
f.write(x.PDB)
f.write("END\n")
else:
with open(f"{self.filename_stem}.pdb", "w") as f:
for x in exported:
f.write(PDB_MODEL_FORMAT(x.number) + "\n")
f.write(x.PDB)
f.write("ENDMDL\n")
f.write("END\n")
getFromDict(dataDict, mapList)
staticmethod
new_chem_comp()
start a new chem_comp entry
Source code in src/desmondtools/maestro.py
def new_chem_comp(self):
""" start a new chem_comp entry """
if not type(self.mmcif["_entry"]["id"]) == list or len(self.mmcif["_entry"]["id"]) <= 2:
self.chem_comp_id = "LIG"
else:
self.chem_comp_id = ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(3))
self.chem_comp_chainId = [ c for c in string.ascii_uppercase if not (c in self.usedId)][0]
self.chem_comp_resSeq = 100
self.usedId[self.chem_comp_chainId] = []
return self.chem_comp_chainId, self.chem_comp_resSeq, self.chem_comp_id
new_chem_comp_atom(obj, element)
create and return unique atom name
Source code in src/desmondtools/maestro.py
def new_chem_comp_atom(self, obj, element):
""" create and return unique atom name """
if "s_m_pdb_atom_name" in obj:
atom_name = obj["s_m_pdb_atom_name"].strip()
else:
atom_name = "{}{}".format(element,1)
if not (atom_name in self.usedId[self.chem_comp_chainId]):
self.usedId[self.chem_comp_chainId].append(atom_name)
return atom_name
else:
for i in range(1,10000):
new_name = "{}{}".format(element,i)
if not (new_name in self.usedId[self.chem_comp_chainId]):
self.usedId[self.chem_comp_chainId].append(new_name)
return new_name
parse()
Parse a Maestro file
set self.lines, self.filename_prefix, self.filename_stem
Source code in src/desmondtools/maestro.py
def parse(self) -> None:
"""Parse a Maestro file
set self.lines, self.filename_prefix, self.filename_stem
"""
if self.filename.endswith('.maegz'):
self.filename_prefix = self.filename.replace(".maegz", "")
with gzip.open(self.filename, 'rt') as f:
self.parse_impl(f)
elif self.filename.endswith('.mae.gz'):
self.filename_prefix = self.filename.replace(".mae.gz", "")
with gzip.open(self.filename, "rt") as f:
self.parse_impl(f)
elif self.filename.endswith('.mae'):
self.filename_prefix = self.filename.replace(".mae", "")
with open(self.filename, "rt") as f:
self.parse_impl(f)
else:
print("Error : .mae, .mae.gz, or .maegz file is expected")
sys.exit(1)
self.filename_stem = Path(self.filename_prefix).stem
parse_impl(f)
Parse Maestro file to a list of dictionaries
Source code in src/desmondtools/maestro.py
def parse_impl(self, f:TextIO) -> None:
"""Parse Maestro file to a list of dictionaries"""
self.entries = []
count = re.compile(r'(\w+)\[(\d+)\]')
tokens = shlex.split(f.read())
level = []
data = {}
previous_token = None
header = False
extra_column = 0
num_repeat = 1
skip = False
for token in tokens :
if token == "#" :
skip = not skip # invert
continue
elif skip:
continue
elif token == "{" :
header = True
key = []
val = []
arr = []
if previous_token:
if previous_token == "f_m_ct" and data:
self.entries.append(data)
data = {}
try:
(block, num_repeat) = count.findall(previous_token)[0]
num_repeat = int(num_repeat)
extra_column = 1
except:
block = previous_token
num_repeat = 1
extra_column = 0
level.append(block)
elif token == "}":
if level:
level.pop()
elif token == ":::":
header = False
elif header:
key.append(token)
else:
val.append(token)
# only store f_m_ct blocks (level != [])
if len(val) == (len(key)+extra_column) and level :
arr.append(val[extra_column:])
val = []
if len(arr) == num_repeat:
if len(arr) == 1:
Maestro.setInDict(data, level, dict(zip(key, arr[0])))
else:
T = list(zip(*arr)) # transpose
Maestro.setInDict(data, level, dict(zip(key, T)))
previous_token = token
if data:
self.entries.append(data)
read_lines()
Read a Maestro file
set self.lines, self.filename_prefix, self.filename_stem
Source code in src/desmondtools/maestro.py
def read_lines(self) -> None:
"""Read a Maestro file
set self.lines, self.filename_prefix, self.filename_stem
"""
if self.filename.endswith('.maegz'):
self.filename_prefix = self.filename.replace(".maegz", "")
with gzip.open(self.filename, 'rt') as f:
self.lines = f.readlines()
elif self.filename.endswith('.mae.gz'):
self.filename_prefix = self.filename.replace(".mae.gz", "")
with gzip.open(self.filename, "rt") as f:
self.lines = f.readlines()
elif self.filename.endswith('.mae'):
self.filename_prefix = self.filename.replace(".mae", "")
with open(self.filename, "rt") as f:
self.lines = f.readlines()
else:
print("Error : .mae, .mae.gz, or .maegz file is expected")
sys.exit(1)
self.filename_stem = Path(self.filename_prefix).stem
setInDict(dataDict, mapList, value)
staticmethod
to_mmcif(**kwargs)
Source code in src/desmondtools/maestro.py
def to_mmcif(self, **kwargs):
token = None
entity_id = 0
for line in self.lines:
line = line.strip()
if not line or line.startswith("#"):
continue
# f_m_ct block
if line.startswith("f_m_ct {"):
token = 'f_m_ct_key'
natoms = 0
nbonds = 0
entity_id += 1
k = []
continue
if token == 'f_m_ct_key':
if line.startswith(":::"): # end of f_m_ct key block
n = len(k)
token = 'f_m_ct_val'
v = []
continue
k.append(line)
if token == 'f_m_ct_val':
v.append(line)
if len(v) == n:
data = dict(zip(k,v))
if "s_lp_Variant" in data:
self.title = data['s_lp_Variant'].replace('"','').replace("'","")
else:
self.title = data['s_m_title'].replace('"','').replace("'","")
self.append_entry(self.title)
token = None
# m_atom block
if line.startswith("m_atom") :
natoms = int(Maestro.count.findall(line)[0])
token = 'm_atom_key'
k = ['i_atom_index'] # 1st column is atom_index
continue
if token == 'm_atom_key':
if line.startswith(":::"):
token = 'm_atom_val'
nv = 0
m_atom = {}
m_atom_resName = {}
continue
k.append(line)
if token == 'm_atom_val':
v = ['' if x=="<>" else x for x in shlex.split(line)]
data = dict(zip(k,v))
# build hierarchical structure of chain/ resSeq/ resName
chainId = get_value_or_default(data,"s_m_chain_name", "A")
resSeq = int(get_value_or_default(data, "i_m_residue_number", "1"))
resName = get_value_or_default(data, "s_m_pdb_residue_name", "?")
if resName in std_rename:
resName = std_rename[resName]
if not chainId in m_atom:
m_atom[chainId] = {}
if not resSeq in m_atom[chainId]:
m_atom[chainId][resSeq] = {}
if not resName in m_atom[chainId][resSeq]:
m_atom[chainId][resSeq][resName] = []
m_atom[chainId][resSeq][resName].append(data)
m_atom_resName[int(data["i_atom_index"])] = resName
nv += 1
if nv == natoms: # end of m_atom block
token = None
# m_bond block
if line.startswith("m_bond") :
nbonds = int(Maestro.count.findall(line)[0])
token = 'm_bond_key'
k = ['i_bond_index']
continue
if token == 'm_bond_key':
if line.startswith(":::"): # end of m_bond key block
token = 'm_bond_val'
nv = 0
m_bond = {}
continue
k.append(line)
if token == 'm_bond_val':
v = ['' if x=="<>" else x for x in shlex.split(line)]
data = dict(zip(k,v))
# build bond connectivity
p = int(data["i_m_from"])
q = int(data["i_m_to"])
r = int(data["i_m_order"])
if p in m_bond:
m_bond[p][q] = r
else:
m_bond[p] = {q:r}
if q in m_bond:
m_bond[q][p] = r
else:
m_bond[q] = {p:r}
nv += 1
if nv == nbonds: # m_bond block ends
token = None
# create mmcif _atom_site
for chainId_ in sorted(m_atom):
for resSeq_ in sorted(m_atom[chainId_]):
for resName_ in sorted(m_atom[chainId_][resSeq_]):
if resName_ in std_residues:
group_PDB = "ATOM"
chainId, resSeq, resName = chainId_, resSeq_, resName_
else:
group_PDB = "HETATM"
# check if new_chem_comp is necessary
this_residue = [ int(d["i_atom_index"]) for d in m_atom[chainId_][resSeq_][resName_] ]
need_new_chem_comp = True
for p_ in this_residue:
for q_, bond_order in m_bond[p_].items():
if (not (q_ in this_residue)) and \
(not (m_atom_resName[q_] in std_residues)) and \
(q_ in self.iamap):
# do not create another chem_comp
need_new_chem_comp = False
q, chainId, resSeq, resName = self.iamap[q_]
break
if need_new_chem_comp:
chainId, resSeq, resName = self.new_chem_comp()
# go through all atoms within the same residue
for d in m_atom[chainId_][resSeq_][resName_]:
self.serial += 1
self.iamap[int(d["i_atom_index"])] = (self.serial, chainId, resSeq, resName)
element = atomic_symbol[int(d["i_m_atomic_number"])]
if group_PDB == "ATOM":
name = get_value_or_default(d,"s_m_pdb_atom_name", "?")
else:
name = self.new_chem_comp_atom(d, element)
x = float(d["r_m_x_coord"])
y = float(d["r_m_y_coord"])
z = float(d["r_m_z_coord"])
occupancy = float(get_value_or_default(d,"r_m_pdb_occupancy","1"))
bfactor = float(get_value_or_default(d,"r_m_pdb_tfactor", "0"))
formal_charge = int(get_value_or_default(d,"i_m_formal_charge", "0"))
self.append_atom_site(
group_PDB = group_PDB,
id = self.serial,
type_symbol = element,
label_atom_id = name,
label_alt_id = ".",
label_comp_id = resName,
label_asym_id = chainId,
label_entity_id = entity_id,
label_seq_id = resSeq,
pdbx_PDB_ins_code = "?",
Cartn_x = x,
Cartn_y = y,
Cartn_z = z,
occupancy = occupancy,
B_iso_or_equiv = bfactor,
pdbx_formal_charge = formal_charge,
auth_seq_id = resSeq,
auth_comp_id = resName,
auth_asym_id = chainId,
auth_atom_id = name,
pdbx_PDB_model_num = 1,
)
# create mmcif _chem_comp_bond
for chainId_ in sorted(m_atom):
for resSeq_ in sorted(m_atom[chainId_]):
for resName_ in sorted(m_atom[chainId_][resSeq_]):
if not (resName_ in std_residues): # HETATM
for d in m_atom[chainId_][resSeq_][resName_]:
p_ = int(d["i_atom_index"])
p, p_chainId, p_resSeq, p_resName = self.iamap[p_]
if p_ in m_bond:
for q_, bond_order in m_bond[p_].items():
q, q_chainId, q_resSeq, q_resName = self.iamap[q_]
self.append_chem_comp_bond(p_resName, p, q, bond_order)
cifo = CifFileWriter("{}.cif".format(self.filename_stem))
# clean up undefined dictionary
# force to copy a list to avoid
# RuntimeError: dictionary changed size during iteration
for k in list(self.mmcif):
if not self.mmcif[k]:
del self.mmcif[k]
cifo.write({ "desmondtools" : self.mmcif })